2010-02-12 02:16:35 +03:00
|
|
|
/* $NetBSD: vfs_subr.c,v 1.398 2010/02/11 23:16:35 haad Exp $ */
|
1997-10-05 22:37:01 +04:00
|
|
|
|
|
|
|
/*-
|
2008-01-17 16:06:04 +03:00
|
|
|
* Copyright (c) 1997, 1998, 2004, 2005, 2007, 2008 The NetBSD Foundation, Inc.
|
1997-10-05 22:37:01 +04:00
|
|
|
* All rights reserved.
|
|
|
|
*
|
|
|
|
* This code is derived from software contributed to The NetBSD Foundation
|
|
|
|
* by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
|
2007-10-11 00:42:20 +04:00
|
|
|
* NASA Ames Research Center, by Charles M. Hannum, and by Andrew Doran.
|
1997-10-05 22:37:01 +04:00
|
|
|
*
|
|
|
|
* Redistribution and use in source and binary forms, with or without
|
|
|
|
* modification, are permitted provided that the following conditions
|
|
|
|
* are met:
|
|
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer.
|
|
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
|
|
* documentation and/or other materials provided with the distribution.
|
|
|
|
*
|
|
|
|
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
|
|
|
|
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
|
|
|
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
|
|
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
|
|
|
|
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
|
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
|
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
|
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
|
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
|
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
|
|
* POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
*/
|
1994-06-29 10:29:24 +04:00
|
|
|
|
1994-05-17 08:21:49 +04:00
|
|
|
/*
|
1994-06-08 15:28:29 +04:00
|
|
|
* Copyright (c) 1989, 1993
|
|
|
|
* The Regents of the University of California. All rights reserved.
|
1994-05-17 08:21:49 +04:00
|
|
|
* (c) UNIX System Laboratories, Inc.
|
|
|
|
* All or some portions of this file are derived from material licensed
|
|
|
|
* to the University of California by American Telephone and Telegraph
|
|
|
|
* Co. or Unix System Laboratories, Inc. and are reproduced herein with
|
|
|
|
* the permission of UNIX System Laboratories, Inc.
|
|
|
|
*
|
|
|
|
* Redistribution and use in source and binary forms, with or without
|
|
|
|
* modification, are permitted provided that the following conditions
|
|
|
|
* are met:
|
|
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer.
|
|
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
|
|
* documentation and/or other materials provided with the distribution.
|
2003-08-07 20:26:28 +04:00
|
|
|
* 3. Neither the name of the University nor the names of its contributors
|
1994-05-17 08:21:49 +04:00
|
|
|
* may be used to endorse or promote products derived from this software
|
|
|
|
* without specific prior written permission.
|
|
|
|
*
|
|
|
|
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
|
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
|
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
|
|
* SUCH DAMAGE.
|
|
|
|
*
|
1994-06-29 10:29:24 +04:00
|
|
|
* @(#)vfs_subr.c 8.13 (Berkeley) 4/18/94
|
1994-05-17 08:21:49 +04:00
|
|
|
*/
|
|
|
|
|
|
|
|
/*
|
2008-06-02 20:25:34 +04:00
|
|
|
* Note on v_usecount and locking:
|
|
|
|
*
|
|
|
|
* At nearly all points it is known that v_usecount could be zero, the
|
|
|
|
* vnode interlock will be held.
|
|
|
|
*
|
|
|
|
* To change v_usecount away from zero, the interlock must be held. To
|
|
|
|
* change from a non-zero value to zero, again the interlock must be
|
|
|
|
* held.
|
|
|
|
*
|
2009-05-16 12:29:53 +04:00
|
|
|
* There's a flag bit, VC_XLOCK, embedded in v_usecount.
|
|
|
|
* To raise v_usecount, if the VC_XLOCK bit is set in it, the interlock
|
|
|
|
* must be held.
|
|
|
|
* To modify the VC_XLOCK bit, the interlock must be held.
|
|
|
|
* We always keep the usecount (v_usecount & VC_MASK) non-zero while the
|
|
|
|
* VC_XLOCK bit is set.
|
|
|
|
*
|
|
|
|
* Unless the VC_XLOCK bit is set, changing the usecount from a non-zero
|
|
|
|
* value to a non-zero value can safely be done using atomic operations,
|
|
|
|
* without the interlock held.
|
|
|
|
* Even if the VC_XLOCK bit is set, decreasing the usecount to a non-zero
|
|
|
|
* value can be done using atomic operations, without the interlock held.
|
1994-05-17 08:21:49 +04:00
|
|
|
*/
|
|
|
|
|
2001-11-12 18:25:01 +03:00
|
|
|
#include <sys/cdefs.h>
|
2010-02-12 02:16:35 +03:00
|
|
|
__KERNEL_RCSID(0, "$NetBSD: vfs_subr.c,v 1.398 2010/02/11 23:16:35 haad Exp $");
|
2001-11-12 18:25:01 +03:00
|
|
|
|
2000-04-10 06:22:13 +04:00
|
|
|
#include "opt_ddb.h"
|
1998-11-15 21:38:11 +03:00
|
|
|
#include "opt_compat_netbsd.h"
|
1998-12-10 18:07:01 +03:00
|
|
|
#include "opt_compat_43.h"
|
1998-02-10 17:08:44 +03:00
|
|
|
|
1994-05-17 08:21:49 +04:00
|
|
|
#include <sys/param.h>
|
1994-06-08 15:28:29 +04:00
|
|
|
#include <sys/systm.h>
|
2008-12-29 20:41:18 +03:00
|
|
|
#include <sys/conf.h>
|
1994-05-17 08:21:49 +04:00
|
|
|
#include <sys/proc.h>
|
2000-08-31 18:41:35 +04:00
|
|
|
#include <sys/kernel.h>
|
1994-05-17 08:21:49 +04:00
|
|
|
#include <sys/mount.h>
|
1995-07-03 20:58:38 +04:00
|
|
|
#include <sys/fcntl.h>
|
1994-05-17 08:21:49 +04:00
|
|
|
#include <sys/vnode.h>
|
1994-06-08 15:28:29 +04:00
|
|
|
#include <sys/stat.h>
|
1994-05-17 08:21:49 +04:00
|
|
|
#include <sys/namei.h>
|
|
|
|
#include <sys/ucred.h>
|
|
|
|
#include <sys/buf.h>
|
|
|
|
#include <sys/errno.h>
|
2009-01-17 10:02:35 +03:00
|
|
|
#include <sys/kmem.h>
|
1996-02-09 21:59:18 +03:00
|
|
|
#include <sys/syscallargs.h>
|
1997-01-31 05:50:36 +03:00
|
|
|
#include <sys/device.h>
|
2003-04-17 01:44:18 +04:00
|
|
|
#include <sys/filedesc.h>
|
2006-05-15 01:15:11 +04:00
|
|
|
#include <sys/kauth.h>
|
2007-11-29 21:07:11 +03:00
|
|
|
#include <sys/atomic.h>
|
2008-01-02 14:48:20 +03:00
|
|
|
#include <sys/kthread.h>
|
2008-07-31 09:38:04 +04:00
|
|
|
#include <sys/wapbl.h>
|
1996-02-04 05:17:43 +03:00
|
|
|
|
2009-06-23 23:36:38 +04:00
|
|
|
#include <miscfs/genfs/genfs.h>
|
1994-06-08 15:28:29 +04:00
|
|
|
#include <miscfs/specfs/specdev.h>
|
1999-11-15 21:49:07 +03:00
|
|
|
#include <miscfs/syncfs/syncfs.h>
|
1994-06-08 15:28:29 +04:00
|
|
|
|
2000-04-10 06:22:13 +04:00
|
|
|
#include <uvm/uvm.h>
|
2005-11-30 01:52:02 +03:00
|
|
|
#include <uvm/uvm_readahead.h>
|
2000-04-10 06:22:13 +04:00
|
|
|
#include <uvm/uvm_ddb.h>
|
1998-02-05 10:59:28 +03:00
|
|
|
|
2000-06-27 21:41:07 +04:00
|
|
|
#include <sys/sysctl.h>
|
|
|
|
|
2008-07-17 00:06:19 +04:00
|
|
|
const enum vtype iftovt_tab[16] = {
|
|
|
|
VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
|
|
|
|
VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD,
|
|
|
|
};
|
|
|
|
const int vttoif_tab[9] = {
|
|
|
|
0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK,
|
|
|
|
S_IFSOCK, S_IFIFO, S_IFMT,
|
|
|
|
};
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Insq/Remq for the vnode usage lists.
|
|
|
|
*/
|
|
|
|
#define bufinsvn(bp, dp) LIST_INSERT_HEAD(dp, bp, b_vnbufs)
|
|
|
|
#define bufremvn(bp) { \
|
|
|
|
LIST_REMOVE(bp, b_vnbufs); \
|
|
|
|
(bp)->b_vnbufs.le_next = NOLIST; \
|
|
|
|
}
|
|
|
|
|
|
|
|
int doforce = 1; /* 1 => permit forcible unmounting */
|
|
|
|
int prtactive = 0; /* 1 => print out reclaim of active vnodes */
|
|
|
|
|
2008-01-02 14:48:20 +03:00
|
|
|
static vnodelst_t vnode_free_list = TAILQ_HEAD_INITIALIZER(vnode_free_list);
|
|
|
|
static vnodelst_t vnode_hold_list = TAILQ_HEAD_INITIALIZER(vnode_hold_list);
|
|
|
|
static vnodelst_t vrele_list = TAILQ_HEAD_INITIALIZER(vrele_list);
|
1999-11-15 21:49:07 +03:00
|
|
|
|
2008-07-17 00:06:19 +04:00
|
|
|
struct mntlist mountlist = /* mounted filesystem list */
|
|
|
|
CIRCLEQ_HEAD_INITIALIZER(mountlist);
|
|
|
|
|
|
|
|
u_int numvnodes;
|
|
|
|
static specificdata_domain_t mount_specificdata_domain;
|
|
|
|
|
2008-01-02 14:48:20 +03:00
|
|
|
static int vrele_pending;
|
2008-06-23 15:23:39 +04:00
|
|
|
static int vrele_gen;
|
2008-01-02 14:48:20 +03:00
|
|
|
static kmutex_t vrele_lock;
|
|
|
|
static kcondvar_t vrele_cv;
|
|
|
|
static lwp_t *vrele_lwp;
|
1998-02-18 10:16:41 +03:00
|
|
|
|
2009-06-26 22:53:07 +04:00
|
|
|
static uint64_t mountgen = 0;
|
|
|
|
static kmutex_t mountgen_lock;
|
|
|
|
|
2008-07-17 00:06:19 +04:00
|
|
|
kmutex_t mountlist_lock;
|
|
|
|
kmutex_t mntid_lock;
|
|
|
|
kmutex_t mntvnode_lock;
|
|
|
|
kmutex_t vnode_free_list_lock;
|
|
|
|
kmutex_t vfs_list_lock;
|
|
|
|
|
2008-01-02 14:48:20 +03:00
|
|
|
static pool_cache_t vnode_cache;
|
1998-09-01 07:09:14 +04:00
|
|
|
|
2008-07-17 00:06:19 +04:00
|
|
|
/*
|
|
|
|
* These define the root filesystem and device.
|
|
|
|
*/
|
|
|
|
struct vnode *rootvnode;
|
|
|
|
struct device *root_device; /* root device */
|
|
|
|
|
1998-06-08 19:52:07 +04:00
|
|
|
/*
|
|
|
|
* Local declarations.
|
|
|
|
*/
|
2006-11-17 20:05:18 +03:00
|
|
|
|
2008-01-02 14:48:20 +03:00
|
|
|
static void vrele_thread(void *);
|
|
|
|
static void insmntque(vnode_t *, struct mount *);
|
|
|
|
static int getdevvp(dev_t, vnode_t **, enum vtype);
|
2009-01-03 06:31:23 +03:00
|
|
|
static vnode_t *getcleanvnode(void);
|
2008-01-02 14:48:20 +03:00
|
|
|
void vpanic(vnode_t *, const char *);
|
2009-06-26 22:53:07 +04:00
|
|
|
static void vfs_shutdown1(struct lwp *);
|
2008-01-02 14:48:20 +03:00
|
|
|
|
2008-07-17 00:06:19 +04:00
|
|
|
#ifdef DEBUG
|
|
|
|
void printlockedvnodes(void);
|
|
|
|
#endif
|
|
|
|
|
2008-01-02 14:48:20 +03:00
|
|
|
#ifdef DIAGNOSTIC
|
|
|
|
void
|
|
|
|
vpanic(vnode_t *vp, const char *msg)
|
|
|
|
{
|
|
|
|
|
|
|
|
vprint(NULL, vp);
|
|
|
|
panic("%s\n", msg);
|
|
|
|
}
|
|
|
|
#else
|
|
|
|
#define vpanic(vp, msg) /* nothing */
|
|
|
|
#endif
|
|
|
|
|
|
|
|
void
|
|
|
|
vn_init1(void)
|
|
|
|
{
|
|
|
|
|
|
|
|
vnode_cache = pool_cache_init(sizeof(struct vnode), 0, 0, 0, "vnodepl",
|
|
|
|
NULL, IPL_NONE, NULL, NULL, NULL);
|
|
|
|
KASSERT(vnode_cache != NULL);
|
|
|
|
|
|
|
|
/* Create deferred release thread. */
|
|
|
|
mutex_init(&vrele_lock, MUTEX_DEFAULT, IPL_NONE);
|
|
|
|
cv_init(&vrele_cv, "vrele");
|
|
|
|
if (kthread_create(PRI_VM, KTHREAD_MPSAFE, NULL, vrele_thread,
|
|
|
|
NULL, &vrele_lwp, "vrele"))
|
|
|
|
panic("fork vrele");
|
|
|
|
}
|
1996-02-09 21:59:18 +03:00
|
|
|
|
2008-07-17 00:06:19 +04:00
|
|
|
/*
|
|
|
|
* Initialize the vnode management data structures.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
vntblinit(void)
|
|
|
|
{
|
|
|
|
|
2009-06-26 22:53:07 +04:00
|
|
|
mutex_init(&mountgen_lock, MUTEX_DEFAULT, IPL_NONE);
|
2008-07-17 00:06:19 +04:00
|
|
|
mutex_init(&mountlist_lock, MUTEX_DEFAULT, IPL_NONE);
|
|
|
|
mutex_init(&mntid_lock, MUTEX_DEFAULT, IPL_NONE);
|
|
|
|
mutex_init(&mntvnode_lock, MUTEX_DEFAULT, IPL_NONE);
|
|
|
|
mutex_init(&vnode_free_list_lock, MUTEX_DEFAULT, IPL_NONE);
|
|
|
|
mutex_init(&vfs_list_lock, MUTEX_DEFAULT, IPL_NONE);
|
|
|
|
|
|
|
|
mount_specificdata_domain = specificdata_domain_create();
|
|
|
|
|
|
|
|
/* Initialize the filesystem syncer. */
|
|
|
|
vn_initialize_syncerd();
|
|
|
|
vn_init1();
|
|
|
|
}
|
|
|
|
|
2003-07-30 16:09:46 +04:00
|
|
|
int
|
2005-12-11 15:16:03 +03:00
|
|
|
vfs_drainvnodes(long target, struct lwp *l)
|
2003-07-30 16:09:46 +04:00
|
|
|
{
|
|
|
|
|
|
|
|
while (numvnodes > target) {
|
2008-01-02 14:48:20 +03:00
|
|
|
vnode_t *vp;
|
2003-07-30 16:09:46 +04:00
|
|
|
|
2008-01-02 14:48:20 +03:00
|
|
|
mutex_enter(&vnode_free_list_lock);
|
|
|
|
vp = getcleanvnode();
|
2003-07-30 16:09:46 +04:00
|
|
|
if (vp == NULL)
|
|
|
|
return EBUSY; /* give up */
|
2008-01-02 14:48:20 +03:00
|
|
|
ungetnewvnode(vp);
|
2003-07-30 16:09:46 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2008-07-17 00:06:19 +04:00
|
|
|
/*
|
|
|
|
* Lookup a mount point by filesystem identifier.
|
|
|
|
*
|
|
|
|
* XXX Needs to add a reference to the mount point.
|
|
|
|
*/
|
|
|
|
struct mount *
|
|
|
|
vfs_getvfs(fsid_t *fsid)
|
|
|
|
{
|
|
|
|
struct mount *mp;
|
|
|
|
|
|
|
|
mutex_enter(&mountlist_lock);
|
|
|
|
CIRCLEQ_FOREACH(mp, &mountlist, mnt_list) {
|
|
|
|
if (mp->mnt_stat.f_fsidx.__fsid_val[0] == fsid->__fsid_val[0] &&
|
|
|
|
mp->mnt_stat.f_fsidx.__fsid_val[1] == fsid->__fsid_val[1]) {
|
|
|
|
mutex_exit(&mountlist_lock);
|
|
|
|
return (mp);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
mutex_exit(&mountlist_lock);
|
|
|
|
return ((struct mount *)0);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Drop a reference to a mount structure, freeing if the last reference.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
vfs_destroy(struct mount *mp)
|
|
|
|
{
|
|
|
|
|
2008-09-24 13:33:40 +04:00
|
|
|
if (__predict_true((int)atomic_dec_uint_nv(&mp->mnt_refcnt) > 0)) {
|
2008-07-17 00:06:19 +04:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Nothing else has visibility of the mount: we can now
|
|
|
|
* free the data structures.
|
|
|
|
*/
|
2008-09-24 13:33:40 +04:00
|
|
|
KASSERT(mp->mnt_refcnt == 0);
|
2008-07-17 00:06:19 +04:00
|
|
|
specificdata_fini(mount_specificdata_domain, &mp->mnt_specdataref);
|
|
|
|
rw_destroy(&mp->mnt_unmounting);
|
|
|
|
mutex_destroy(&mp->mnt_updating);
|
|
|
|
mutex_destroy(&mp->mnt_renamelock);
|
|
|
|
if (mp->mnt_op != NULL) {
|
|
|
|
vfs_delref(mp->mnt_op);
|
|
|
|
}
|
|
|
|
kmem_free(mp, sizeof(*mp));
|
|
|
|
}
|
|
|
|
|
2003-07-30 16:09:46 +04:00
|
|
|
/*
|
|
|
|
* grab a vnode from freelist and clean it.
|
|
|
|
*/
|
2008-01-02 14:48:20 +03:00
|
|
|
vnode_t *
|
|
|
|
getcleanvnode(void)
|
2003-07-30 16:09:46 +04:00
|
|
|
{
|
2008-01-02 14:48:20 +03:00
|
|
|
vnode_t *vp;
|
|
|
|
vnodelst_t *listhd;
|
2003-07-30 16:09:46 +04:00
|
|
|
|
2008-01-02 14:48:20 +03:00
|
|
|
KASSERT(mutex_owned(&vnode_free_list_lock));
|
2004-06-19 10:20:02 +04:00
|
|
|
|
2008-01-02 14:48:20 +03:00
|
|
|
retry:
|
2004-06-19 10:20:02 +04:00
|
|
|
listhd = &vnode_free_list;
|
|
|
|
try_nextlist:
|
|
|
|
TAILQ_FOREACH(vp, listhd, v_freelist) {
|
2008-01-02 14:48:20 +03:00
|
|
|
/*
|
|
|
|
* It's safe to test v_usecount and v_iflag
|
|
|
|
* without holding the interlock here, since
|
|
|
|
* these vnodes should never appear on the
|
|
|
|
* lists.
|
|
|
|
*/
|
|
|
|
if (vp->v_usecount != 0) {
|
|
|
|
vpanic(vp, "free vnode isn't");
|
|
|
|
}
|
|
|
|
if ((vp->v_iflag & VI_CLEAN) != 0) {
|
|
|
|
vpanic(vp, "clean vnode on freelist");
|
|
|
|
}
|
|
|
|
if (vp->v_freelisthd != listhd) {
|
|
|
|
printf("vnode sez %p, listhd %p\n", vp->v_freelisthd, listhd);
|
|
|
|
vpanic(vp, "list head mismatch");
|
|
|
|
}
|
|
|
|
if (!mutex_tryenter(&vp->v_interlock))
|
2003-10-15 15:28:59 +04:00
|
|
|
continue;
|
2004-06-16 16:32:51 +04:00
|
|
|
/*
|
2008-01-02 14:48:20 +03:00
|
|
|
* Our lwp might hold the underlying vnode
|
|
|
|
* locked, so don't try to reclaim a VI_LAYER
|
|
|
|
* node if it's locked.
|
2004-06-16 16:32:51 +04:00
|
|
|
*/
|
2007-10-11 00:42:20 +04:00
|
|
|
if ((vp->v_iflag & VI_XLOCK) == 0 &&
|
|
|
|
((vp->v_iflag & VI_LAYER) == 0 || VOP_ISLOCKED(vp) == 0)) {
|
2007-04-03 20:11:31 +04:00
|
|
|
break;
|
2003-07-30 16:09:46 +04:00
|
|
|
}
|
2008-01-02 14:48:20 +03:00
|
|
|
mutex_exit(&vp->v_interlock);
|
2003-07-30 16:09:46 +04:00
|
|
|
}
|
|
|
|
|
2008-01-02 14:48:20 +03:00
|
|
|
if (vp == NULL) {
|
2004-06-19 10:20:02 +04:00
|
|
|
if (listhd == &vnode_free_list) {
|
|
|
|
listhd = &vnode_hold_list;
|
|
|
|
goto try_nextlist;
|
|
|
|
}
|
2008-01-02 14:48:20 +03:00
|
|
|
mutex_exit(&vnode_free_list_lock);
|
|
|
|
return NULL;
|
2003-07-30 16:09:46 +04:00
|
|
|
}
|
|
|
|
|
2008-01-02 14:48:20 +03:00
|
|
|
/* Remove it from the freelist. */
|
2003-07-30 16:09:46 +04:00
|
|
|
TAILQ_REMOVE(listhd, vp, v_freelist);
|
2008-01-02 14:48:20 +03:00
|
|
|
vp->v_freelisthd = NULL;
|
|
|
|
mutex_exit(&vnode_free_list_lock);
|
|
|
|
|
2009-11-05 11:18:02 +03:00
|
|
|
if (vp->v_usecount != 0) {
|
|
|
|
/*
|
|
|
|
* was referenced again before we got the interlock
|
|
|
|
* Don't return to freelist - the holder of the last
|
|
|
|
* reference will destroy it.
|
|
|
|
*/
|
2009-11-18 01:20:14 +03:00
|
|
|
mutex_exit(&vp->v_interlock);
|
2009-11-05 11:18:02 +03:00
|
|
|
mutex_enter(&vnode_free_list_lock);
|
|
|
|
goto retry;
|
|
|
|
}
|
|
|
|
|
2008-01-02 14:48:20 +03:00
|
|
|
/*
|
|
|
|
* The vnode is still associated with a file system, so we must
|
|
|
|
* clean it out before reusing it. We need to add a reference
|
|
|
|
* before doing this. If the vnode gains another reference while
|
|
|
|
* being cleaned out then we lose - retry.
|
|
|
|
*/
|
2009-05-16 12:29:53 +04:00
|
|
|
atomic_add_int(&vp->v_usecount, 1 + VC_XLOCK);
|
2008-01-02 14:48:20 +03:00
|
|
|
vclean(vp, DOCLOSE);
|
2009-05-16 12:29:53 +04:00
|
|
|
KASSERT(vp->v_usecount >= 1 + VC_XLOCK);
|
|
|
|
atomic_add_int(&vp->v_usecount, -VC_XLOCK);
|
2008-01-02 14:48:20 +03:00
|
|
|
if (vp->v_usecount == 1) {
|
|
|
|
/* We're about to dirty it. */
|
|
|
|
vp->v_iflag &= ~VI_CLEAN;
|
|
|
|
mutex_exit(&vp->v_interlock);
|
2008-01-24 20:32:52 +03:00
|
|
|
if (vp->v_type == VBLK || vp->v_type == VCHR) {
|
|
|
|
spec_node_destroy(vp);
|
|
|
|
}
|
|
|
|
vp->v_type = VNON;
|
2008-01-02 14:48:20 +03:00
|
|
|
} else {
|
|
|
|
/*
|
|
|
|
* Don't return to freelist - the holder of the last
|
|
|
|
* reference will destroy it.
|
|
|
|
*/
|
2008-06-03 02:56:09 +04:00
|
|
|
vrelel(vp, 0); /* releases vp->v_interlock */
|
2008-01-02 14:48:20 +03:00
|
|
|
mutex_enter(&vnode_free_list_lock);
|
|
|
|
goto retry;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (vp->v_data != NULL || vp->v_uobj.uo_npages != 0 ||
|
|
|
|
!TAILQ_EMPTY(&vp->v_uobj.memq)) {
|
|
|
|
vpanic(vp, "cleaned vnode isn't");
|
|
|
|
}
|
|
|
|
if (vp->v_numoutput != 0) {
|
|
|
|
vpanic(vp, "clean vnode has pending I/O's");
|
|
|
|
}
|
|
|
|
if ((vp->v_iflag & VI_ONWORKLST) != 0) {
|
|
|
|
vpanic(vp, "clean vnode on syncer list");
|
|
|
|
}
|
2003-07-30 16:09:46 +04:00
|
|
|
|
|
|
|
return vp;
|
|
|
|
}
|
|
|
|
|
1994-05-17 08:21:49 +04:00
|
|
|
/*
|
2008-04-30 03:51:04 +04:00
|
|
|
* Mark a mount point as busy, and gain a new reference to it. Used to
|
PR kern/38141 lookup/vfs_busy acquire rwlock recursively
Simplify the mount locking. Remove all the crud to deal with recursion on
the mount lock, and crud to deal with unmount as another weirdo lock.
Hopefully this will once and for all fix the deadlocks with this. With this
commit there are two locks on each mount:
- krwlock_t mnt_unmounting. This is used to prevent unmount across critical
sections like getnewvnode(). It's only ever read locked with rw_tryenter(),
and is only ever write locked in dounmount(). A write hold can't be taken
on this lock if the current LWP could hold a vnode lock.
- kmutex_t mnt_updating. This is taken by threads updating the mount, for
example when going r/o -> r/w, and is only present to serialize updates.
In order to take this lock, a read hold must first be taken on
mnt_unmounting, and the two need to be held across the operation.
One effect of this change: previously if an unmount failed, we would make a
half hearted attempt to back out of it gracefully, but that was unlikely to
work in a lot of cases. Now while an unmount that will be aborted is in
progress, new file operations within the mount will fail instead of being
delayed. That is unlikely to be a problem though, because if the admin
requests unmount of a file system then s(he) has made a decision to deny
access to the resource.
2008-05-06 22:43:44 +04:00
|
|
|
* prevent the file system from being unmounted during critical sections.
|
2008-04-30 03:51:04 +04:00
|
|
|
*
|
2008-04-30 16:49:16 +04:00
|
|
|
* => The caller must hold a pre-existing reference to the mount.
|
PR kern/38141 lookup/vfs_busy acquire rwlock recursively
Simplify the mount locking. Remove all the crud to deal with recursion on
the mount lock, and crud to deal with unmount as another weirdo lock.
Hopefully this will once and for all fix the deadlocks with this. With this
commit there are two locks on each mount:
- krwlock_t mnt_unmounting. This is used to prevent unmount across critical
sections like getnewvnode(). It's only ever read locked with rw_tryenter(),
and is only ever write locked in dounmount(). A write hold can't be taken
on this lock if the current LWP could hold a vnode lock.
- kmutex_t mnt_updating. This is taken by threads updating the mount, for
example when going r/o -> r/w, and is only present to serialize updates.
In order to take this lock, a read hold must first be taken on
mnt_unmounting, and the two need to be held across the operation.
One effect of this change: previously if an unmount failed, we would make a
half hearted attempt to back out of it gracefully, but that was unlikely to
work in a lot of cases. Now while an unmount that will be aborted is in
progress, new file operations within the mount will fail instead of being
delayed. That is unlikely to be a problem though, because if the admin
requests unmount of a file system then s(he) has made a decision to deny
access to the resource.
2008-05-06 22:43:44 +04:00
|
|
|
* => Will fail if the file system is being unmounted, or is unmounted.
|
2008-01-30 14:46:59 +03:00
|
|
|
*/
|
|
|
|
int
|
PR kern/38141 lookup/vfs_busy acquire rwlock recursively
Simplify the mount locking. Remove all the crud to deal with recursion on
the mount lock, and crud to deal with unmount as another weirdo lock.
Hopefully this will once and for all fix the deadlocks with this. With this
commit there are two locks on each mount:
- krwlock_t mnt_unmounting. This is used to prevent unmount across critical
sections like getnewvnode(). It's only ever read locked with rw_tryenter(),
and is only ever write locked in dounmount(). A write hold can't be taken
on this lock if the current LWP could hold a vnode lock.
- kmutex_t mnt_updating. This is taken by threads updating the mount, for
example when going r/o -> r/w, and is only present to serialize updates.
In order to take this lock, a read hold must first be taken on
mnt_unmounting, and the two need to be held across the operation.
One effect of this change: previously if an unmount failed, we would make a
half hearted attempt to back out of it gracefully, but that was unlikely to
work in a lot of cases. Now while an unmount that will be aborted is in
progress, new file operations within the mount will fail instead of being
delayed. That is unlikely to be a problem though, because if the admin
requests unmount of a file system then s(he) has made a decision to deny
access to the resource.
2008-05-06 22:43:44 +04:00
|
|
|
vfs_busy(struct mount *mp, struct mount **nextp)
|
2008-01-30 14:46:59 +03:00
|
|
|
{
|
2008-04-30 03:51:04 +04:00
|
|
|
|
PR kern/38141 lookup/vfs_busy acquire rwlock recursively
Simplify the mount locking. Remove all the crud to deal with recursion on
the mount lock, and crud to deal with unmount as another weirdo lock.
Hopefully this will once and for all fix the deadlocks with this. With this
commit there are two locks on each mount:
- krwlock_t mnt_unmounting. This is used to prevent unmount across critical
sections like getnewvnode(). It's only ever read locked with rw_tryenter(),
and is only ever write locked in dounmount(). A write hold can't be taken
on this lock if the current LWP could hold a vnode lock.
- kmutex_t mnt_updating. This is taken by threads updating the mount, for
example when going r/o -> r/w, and is only present to serialize updates.
In order to take this lock, a read hold must first be taken on
mnt_unmounting, and the two need to be held across the operation.
One effect of this change: previously if an unmount failed, we would make a
half hearted attempt to back out of it gracefully, but that was unlikely to
work in a lot of cases. Now while an unmount that will be aborted is in
progress, new file operations within the mount will fail instead of being
delayed. That is unlikely to be a problem though, because if the admin
requests unmount of a file system then s(he) has made a decision to deny
access to the resource.
2008-05-06 22:43:44 +04:00
|
|
|
KASSERT(mp->mnt_refcnt > 0);
|
2008-04-30 03:51:04 +04:00
|
|
|
|
PR kern/38141 lookup/vfs_busy acquire rwlock recursively
Simplify the mount locking. Remove all the crud to deal with recursion on
the mount lock, and crud to deal with unmount as another weirdo lock.
Hopefully this will once and for all fix the deadlocks with this. With this
commit there are two locks on each mount:
- krwlock_t mnt_unmounting. This is used to prevent unmount across critical
sections like getnewvnode(). It's only ever read locked with rw_tryenter(),
and is only ever write locked in dounmount(). A write hold can't be taken
on this lock if the current LWP could hold a vnode lock.
- kmutex_t mnt_updating. This is taken by threads updating the mount, for
example when going r/o -> r/w, and is only present to serialize updates.
In order to take this lock, a read hold must first be taken on
mnt_unmounting, and the two need to be held across the operation.
One effect of this change: previously if an unmount failed, we would make a
half hearted attempt to back out of it gracefully, but that was unlikely to
work in a lot of cases. Now while an unmount that will be aborted is in
progress, new file operations within the mount will fail instead of being
delayed. That is unlikely to be a problem though, because if the admin
requests unmount of a file system then s(he) has made a decision to deny
access to the resource.
2008-05-06 22:43:44 +04:00
|
|
|
if (__predict_false(!rw_tryenter(&mp->mnt_unmounting, RW_READER))) {
|
|
|
|
if (nextp != NULL) {
|
|
|
|
KASSERT(mutex_owned(&mountlist_lock));
|
2008-04-30 16:49:16 +04:00
|
|
|
*nextp = CIRCLEQ_NEXT(mp, mnt_list);
|
|
|
|
}
|
PR kern/38141 lookup/vfs_busy acquire rwlock recursively
Simplify the mount locking. Remove all the crud to deal with recursion on
the mount lock, and crud to deal with unmount as another weirdo lock.
Hopefully this will once and for all fix the deadlocks with this. With this
commit there are two locks on each mount:
- krwlock_t mnt_unmounting. This is used to prevent unmount across critical
sections like getnewvnode(). It's only ever read locked with rw_tryenter(),
and is only ever write locked in dounmount(). A write hold can't be taken
on this lock if the current LWP could hold a vnode lock.
- kmutex_t mnt_updating. This is taken by threads updating the mount, for
example when going r/o -> r/w, and is only present to serialize updates.
In order to take this lock, a read hold must first be taken on
mnt_unmounting, and the two need to be held across the operation.
One effect of this change: previously if an unmount failed, we would make a
half hearted attempt to back out of it gracefully, but that was unlikely to
work in a lot of cases. Now while an unmount that will be aborted is in
progress, new file operations within the mount will fail instead of being
delayed. That is unlikely to be a problem though, because if the admin
requests unmount of a file system then s(he) has made a decision to deny
access to the resource.
2008-05-06 22:43:44 +04:00
|
|
|
return EBUSY;
|
2008-04-30 16:49:16 +04:00
|
|
|
}
|
PR kern/38141 lookup/vfs_busy acquire rwlock recursively
Simplify the mount locking. Remove all the crud to deal with recursion on
the mount lock, and crud to deal with unmount as another weirdo lock.
Hopefully this will once and for all fix the deadlocks with this. With this
commit there are two locks on each mount:
- krwlock_t mnt_unmounting. This is used to prevent unmount across critical
sections like getnewvnode(). It's only ever read locked with rw_tryenter(),
and is only ever write locked in dounmount(). A write hold can't be taken
on this lock if the current LWP could hold a vnode lock.
- kmutex_t mnt_updating. This is taken by threads updating the mount, for
example when going r/o -> r/w, and is only present to serialize updates.
In order to take this lock, a read hold must first be taken on
mnt_unmounting, and the two need to be held across the operation.
One effect of this change: previously if an unmount failed, we would make a
half hearted attempt to back out of it gracefully, but that was unlikely to
work in a lot of cases. Now while an unmount that will be aborted is in
progress, new file operations within the mount will fail instead of being
delayed. That is unlikely to be a problem though, because if the admin
requests unmount of a file system then s(he) has made a decision to deny
access to the resource.
2008-05-06 22:43:44 +04:00
|
|
|
if (__predict_false((mp->mnt_iflag & IMNT_GONE) != 0)) {
|
|
|
|
rw_exit(&mp->mnt_unmounting);
|
2008-04-30 03:51:04 +04:00
|
|
|
if (nextp != NULL) {
|
PR kern/38141 lookup/vfs_busy acquire rwlock recursively
Simplify the mount locking. Remove all the crud to deal with recursion on
the mount lock, and crud to deal with unmount as another weirdo lock.
Hopefully this will once and for all fix the deadlocks with this. With this
commit there are two locks on each mount:
- krwlock_t mnt_unmounting. This is used to prevent unmount across critical
sections like getnewvnode(). It's only ever read locked with rw_tryenter(),
and is only ever write locked in dounmount(). A write hold can't be taken
on this lock if the current LWP could hold a vnode lock.
- kmutex_t mnt_updating. This is taken by threads updating the mount, for
example when going r/o -> r/w, and is only present to serialize updates.
In order to take this lock, a read hold must first be taken on
mnt_unmounting, and the two need to be held across the operation.
One effect of this change: previously if an unmount failed, we would make a
half hearted attempt to back out of it gracefully, but that was unlikely to
work in a lot of cases. Now while an unmount that will be aborted is in
progress, new file operations within the mount will fail instead of being
delayed. That is unlikely to be a problem though, because if the admin
requests unmount of a file system then s(he) has made a decision to deny
access to the resource.
2008-05-06 22:43:44 +04:00
|
|
|
KASSERT(mutex_owned(&mountlist_lock));
|
|
|
|
*nextp = CIRCLEQ_NEXT(mp, mnt_list);
|
2008-04-30 03:51:04 +04:00
|
|
|
}
|
PR kern/38141 lookup/vfs_busy acquire rwlock recursively
Simplify the mount locking. Remove all the crud to deal with recursion on
the mount lock, and crud to deal with unmount as another weirdo lock.
Hopefully this will once and for all fix the deadlocks with this. With this
commit there are two locks on each mount:
- krwlock_t mnt_unmounting. This is used to prevent unmount across critical
sections like getnewvnode(). It's only ever read locked with rw_tryenter(),
and is only ever write locked in dounmount(). A write hold can't be taken
on this lock if the current LWP could hold a vnode lock.
- kmutex_t mnt_updating. This is taken by threads updating the mount, for
example when going r/o -> r/w, and is only present to serialize updates.
In order to take this lock, a read hold must first be taken on
mnt_unmounting, and the two need to be held across the operation.
One effect of this change: previously if an unmount failed, we would make a
half hearted attempt to back out of it gracefully, but that was unlikely to
work in a lot of cases. Now while an unmount that will be aborted is in
progress, new file operations within the mount will fail instead of being
delayed. That is unlikely to be a problem though, because if the admin
requests unmount of a file system then s(he) has made a decision to deny
access to the resource.
2008-05-06 22:43:44 +04:00
|
|
|
return ENOENT;
|
2008-01-30 14:46:59 +03:00
|
|
|
}
|
PR kern/38141 lookup/vfs_busy acquire rwlock recursively
Simplify the mount locking. Remove all the crud to deal with recursion on
the mount lock, and crud to deal with unmount as another weirdo lock.
Hopefully this will once and for all fix the deadlocks with this. With this
commit there are two locks on each mount:
- krwlock_t mnt_unmounting. This is used to prevent unmount across critical
sections like getnewvnode(). It's only ever read locked with rw_tryenter(),
and is only ever write locked in dounmount(). A write hold can't be taken
on this lock if the current LWP could hold a vnode lock.
- kmutex_t mnt_updating. This is taken by threads updating the mount, for
example when going r/o -> r/w, and is only present to serialize updates.
In order to take this lock, a read hold must first be taken on
mnt_unmounting, and the two need to be held across the operation.
One effect of this change: previously if an unmount failed, we would make a
half hearted attempt to back out of it gracefully, but that was unlikely to
work in a lot of cases. Now while an unmount that will be aborted is in
progress, new file operations within the mount will fail instead of being
delayed. That is unlikely to be a problem though, because if the admin
requests unmount of a file system then s(he) has made a decision to deny
access to the resource.
2008-05-06 22:43:44 +04:00
|
|
|
if (nextp != NULL) {
|
|
|
|
mutex_exit(&mountlist_lock);
|
|
|
|
}
|
|
|
|
atomic_inc_uint(&mp->mnt_refcnt);
|
|
|
|
return 0;
|
2008-01-30 14:46:59 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
PR kern/38141 lookup/vfs_busy acquire rwlock recursively
Simplify the mount locking. Remove all the crud to deal with recursion on
the mount lock, and crud to deal with unmount as another weirdo lock.
Hopefully this will once and for all fix the deadlocks with this. With this
commit there are two locks on each mount:
- krwlock_t mnt_unmounting. This is used to prevent unmount across critical
sections like getnewvnode(). It's only ever read locked with rw_tryenter(),
and is only ever write locked in dounmount(). A write hold can't be taken
on this lock if the current LWP could hold a vnode lock.
- kmutex_t mnt_updating. This is taken by threads updating the mount, for
example when going r/o -> r/w, and is only present to serialize updates.
In order to take this lock, a read hold must first be taken on
mnt_unmounting, and the two need to be held across the operation.
One effect of this change: previously if an unmount failed, we would make a
half hearted attempt to back out of it gracefully, but that was unlikely to
work in a lot of cases. Now while an unmount that will be aborted is in
progress, new file operations within the mount will fail instead of being
delayed. That is unlikely to be a problem though, because if the admin
requests unmount of a file system then s(he) has made a decision to deny
access to the resource.
2008-05-06 22:43:44 +04:00
|
|
|
* Unbusy a busy filesystem.
|
2008-04-30 16:49:16 +04:00
|
|
|
*
|
PR kern/38141 lookup/vfs_busy acquire rwlock recursively
Simplify the mount locking. Remove all the crud to deal with recursion on
the mount lock, and crud to deal with unmount as another weirdo lock.
Hopefully this will once and for all fix the deadlocks with this. With this
commit there are two locks on each mount:
- krwlock_t mnt_unmounting. This is used to prevent unmount across critical
sections like getnewvnode(). It's only ever read locked with rw_tryenter(),
and is only ever write locked in dounmount(). A write hold can't be taken
on this lock if the current LWP could hold a vnode lock.
- kmutex_t mnt_updating. This is taken by threads updating the mount, for
example when going r/o -> r/w, and is only present to serialize updates.
In order to take this lock, a read hold must first be taken on
mnt_unmounting, and the two need to be held across the operation.
One effect of this change: previously if an unmount failed, we would make a
half hearted attempt to back out of it gracefully, but that was unlikely to
work in a lot of cases. Now while an unmount that will be aborted is in
progress, new file operations within the mount will fail instead of being
delayed. That is unlikely to be a problem though, because if the admin
requests unmount of a file system then s(he) has made a decision to deny
access to the resource.
2008-05-06 22:43:44 +04:00
|
|
|
* => If keepref is true, preserve reference added by vfs_busy().
|
|
|
|
* => If nextp != NULL, acquire mountlist_lock.
|
1994-05-17 08:21:49 +04:00
|
|
|
*/
|
|
|
|
void
|
2008-04-30 16:49:16 +04:00
|
|
|
vfs_unbusy(struct mount *mp, bool keepref, struct mount **nextp)
|
1994-05-17 08:21:49 +04:00
|
|
|
{
|
|
|
|
|
2008-01-30 14:46:59 +03:00
|
|
|
KASSERT(mp->mnt_refcnt > 0);
|
|
|
|
|
2008-04-30 16:49:16 +04:00
|
|
|
if (nextp != NULL) {
|
PR kern/38141 lookup/vfs_busy acquire rwlock recursively
Simplify the mount locking. Remove all the crud to deal with recursion on
the mount lock, and crud to deal with unmount as another weirdo lock.
Hopefully this will once and for all fix the deadlocks with this. With this
commit there are two locks on each mount:
- krwlock_t mnt_unmounting. This is used to prevent unmount across critical
sections like getnewvnode(). It's only ever read locked with rw_tryenter(),
and is only ever write locked in dounmount(). A write hold can't be taken
on this lock if the current LWP could hold a vnode lock.
- kmutex_t mnt_updating. This is taken by threads updating the mount, for
example when going r/o -> r/w, and is only present to serialize updates.
In order to take this lock, a read hold must first be taken on
mnt_unmounting, and the two need to be held across the operation.
One effect of this change: previously if an unmount failed, we would make a
half hearted attempt to back out of it gracefully, but that was unlikely to
work in a lot of cases. Now while an unmount that will be aborted is in
progress, new file operations within the mount will fail instead of being
delayed. That is unlikely to be a problem though, because if the admin
requests unmount of a file system then s(he) has made a decision to deny
access to the resource.
2008-05-06 22:43:44 +04:00
|
|
|
mutex_enter(&mountlist_lock);
|
2008-04-30 16:49:16 +04:00
|
|
|
}
|
PR kern/38141 lookup/vfs_busy acquire rwlock recursively
Simplify the mount locking. Remove all the crud to deal with recursion on
the mount lock, and crud to deal with unmount as another weirdo lock.
Hopefully this will once and for all fix the deadlocks with this. With this
commit there are two locks on each mount:
- krwlock_t mnt_unmounting. This is used to prevent unmount across critical
sections like getnewvnode(). It's only ever read locked with rw_tryenter(),
and is only ever write locked in dounmount(). A write hold can't be taken
on this lock if the current LWP could hold a vnode lock.
- kmutex_t mnt_updating. This is taken by threads updating the mount, for
example when going r/o -> r/w, and is only present to serialize updates.
In order to take this lock, a read hold must first be taken on
mnt_unmounting, and the two need to be held across the operation.
One effect of this change: previously if an unmount failed, we would make a
half hearted attempt to back out of it gracefully, but that was unlikely to
work in a lot of cases. Now while an unmount that will be aborted is in
progress, new file operations within the mount will fail instead of being
delayed. That is unlikely to be a problem though, because if the admin
requests unmount of a file system then s(he) has made a decision to deny
access to the resource.
2008-05-06 22:43:44 +04:00
|
|
|
rw_exit(&mp->mnt_unmounting);
|
2008-01-30 14:46:59 +03:00
|
|
|
if (!keepref) {
|
PR kern/38141 lookup/vfs_busy acquire rwlock recursively
Simplify the mount locking. Remove all the crud to deal with recursion on
the mount lock, and crud to deal with unmount as another weirdo lock.
Hopefully this will once and for all fix the deadlocks with this. With this
commit there are two locks on each mount:
- krwlock_t mnt_unmounting. This is used to prevent unmount across critical
sections like getnewvnode(). It's only ever read locked with rw_tryenter(),
and is only ever write locked in dounmount(). A write hold can't be taken
on this lock if the current LWP could hold a vnode lock.
- kmutex_t mnt_updating. This is taken by threads updating the mount, for
example when going r/o -> r/w, and is only present to serialize updates.
In order to take this lock, a read hold must first be taken on
mnt_unmounting, and the two need to be held across the operation.
One effect of this change: previously if an unmount failed, we would make a
half hearted attempt to back out of it gracefully, but that was unlikely to
work in a lot of cases. Now while an unmount that will be aborted is in
progress, new file operations within the mount will fail instead of being
delayed. That is unlikely to be a problem though, because if the admin
requests unmount of a file system then s(he) has made a decision to deny
access to the resource.
2008-05-06 22:43:44 +04:00
|
|
|
vfs_destroy(mp);
|
|
|
|
}
|
|
|
|
if (nextp != NULL) {
|
|
|
|
KASSERT(mutex_owned(&mountlist_lock));
|
|
|
|
*nextp = CIRCLEQ_NEXT(mp, mnt_list);
|
2008-01-30 14:46:59 +03:00
|
|
|
}
|
1994-05-17 08:21:49 +04:00
|
|
|
}
|
|
|
|
|
2009-04-29 05:03:43 +04:00
|
|
|
struct mount *
|
|
|
|
vfs_mountalloc(struct vfsops *vfsops, struct vnode *vp)
|
|
|
|
{
|
|
|
|
int error;
|
|
|
|
struct mount *mp;
|
|
|
|
|
|
|
|
mp = kmem_zalloc(sizeof(*mp), KM_SLEEP);
|
|
|
|
if (mp == NULL)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
mp->mnt_op = vfsops;
|
|
|
|
mp->mnt_refcnt = 1;
|
|
|
|
TAILQ_INIT(&mp->mnt_vnodelist);
|
|
|
|
rw_init(&mp->mnt_unmounting);
|
|
|
|
mutex_init(&mp->mnt_renamelock, MUTEX_DEFAULT, IPL_NONE);
|
|
|
|
mutex_init(&mp->mnt_updating, MUTEX_DEFAULT, IPL_NONE);
|
|
|
|
error = vfs_busy(mp, NULL);
|
|
|
|
KASSERT(error == 0);
|
|
|
|
mp->mnt_vnodecovered = vp;
|
|
|
|
mount_initspecific(mp);
|
|
|
|
|
2009-06-26 22:53:07 +04:00
|
|
|
mutex_enter(&mountgen_lock);
|
|
|
|
mp->mnt_gen = mountgen++;
|
|
|
|
mutex_exit(&mountgen_lock);
|
|
|
|
|
2009-04-29 05:03:43 +04:00
|
|
|
return mp;
|
|
|
|
}
|
|
|
|
|
1994-05-17 08:21:49 +04:00
|
|
|
/*
|
1998-03-01 05:20:01 +03:00
|
|
|
* Lookup a filesystem type, and if found allocate and initialize
|
|
|
|
* a mount structure for it.
|
|
|
|
*
|
|
|
|
* Devname is usually updated by mount(8) after booting.
|
1994-05-17 08:21:49 +04:00
|
|
|
*/
|
1996-02-04 05:17:43 +03:00
|
|
|
int
|
2005-06-06 03:47:48 +04:00
|
|
|
vfs_rootmountalloc(const char *fstypename, const char *devname,
|
|
|
|
struct mount **mpp)
|
1994-05-17 08:21:49 +04:00
|
|
|
{
|
1998-03-01 05:20:01 +03:00
|
|
|
struct vfsops *vfsp = NULL;
|
|
|
|
struct mount *mp;
|
1994-05-17 08:21:49 +04:00
|
|
|
|
2008-01-02 14:48:20 +03:00
|
|
|
mutex_enter(&vfs_list_lock);
|
2001-06-26 23:14:25 +04:00
|
|
|
LIST_FOREACH(vfsp, &vfs_list, vfs_list)
|
2007-07-18 01:14:05 +04:00
|
|
|
if (!strncmp(vfsp->vfs_name, fstypename,
|
|
|
|
sizeof(mp->mnt_stat.f_fstypename)))
|
1998-03-01 05:20:01 +03:00
|
|
|
break;
|
2008-01-17 16:06:04 +03:00
|
|
|
if (vfsp == NULL) {
|
|
|
|
mutex_exit(&vfs_list_lock);
|
1998-03-01 05:20:01 +03:00
|
|
|
return (ENODEV);
|
2008-01-17 16:06:04 +03:00
|
|
|
}
|
2008-01-02 14:48:20 +03:00
|
|
|
vfsp->vfs_refcount++;
|
|
|
|
mutex_exit(&vfs_list_lock);
|
|
|
|
|
2009-04-29 05:03:43 +04:00
|
|
|
if ((mp = vfs_mountalloc(vfsp, NULL)) == NULL)
|
2008-01-30 14:46:59 +03:00
|
|
|
return ENOMEM;
|
1998-03-01 05:20:01 +03:00
|
|
|
mp->mnt_flag = MNT_RDONLY;
|
2007-07-18 01:14:05 +04:00
|
|
|
(void)strlcpy(mp->mnt_stat.f_fstypename, vfsp->vfs_name,
|
|
|
|
sizeof(mp->mnt_stat.f_fstypename));
|
1998-03-01 05:20:01 +03:00
|
|
|
mp->mnt_stat.f_mntonname[0] = '/';
|
2008-01-10 16:05:01 +03:00
|
|
|
mp->mnt_stat.f_mntonname[1] = '\0';
|
2007-07-18 01:14:05 +04:00
|
|
|
mp->mnt_stat.f_mntfromname[sizeof(mp->mnt_stat.f_mntfromname) - 1] =
|
|
|
|
'\0';
|
|
|
|
(void)copystr(devname, mp->mnt_stat.f_mntfromname,
|
|
|
|
sizeof(mp->mnt_stat.f_mntfromname) - 1, 0);
|
1998-03-01 05:20:01 +03:00
|
|
|
*mpp = mp;
|
|
|
|
return (0);
|
1994-05-17 08:21:49 +04:00
|
|
|
}
|
|
|
|
|
1994-06-08 15:28:29 +04:00
|
|
|
/*
|
|
|
|
* Routines having to do with the management of the vnode table.
|
|
|
|
*/
|
2004-03-23 16:22:32 +03:00
|
|
|
extern int (**dead_vnodeop_p)(void *);
|
1994-06-08 15:28:29 +04:00
|
|
|
|
1994-05-17 08:21:49 +04:00
|
|
|
/*
|
|
|
|
* Return the next vnode from the free list.
|
|
|
|
*/
|
1996-02-04 05:17:43 +03:00
|
|
|
int
|
2005-06-06 03:47:48 +04:00
|
|
|
getnewvnode(enum vtagtype tag, struct mount *mp, int (**vops)(void *),
|
2008-01-02 14:48:20 +03:00
|
|
|
vnode_t **vpp)
|
1994-05-17 08:21:49 +04:00
|
|
|
{
|
2000-11-27 11:39:39 +03:00
|
|
|
struct uvm_object *uobj;
|
1999-11-15 21:49:07 +03:00
|
|
|
static int toggle;
|
2008-01-02 14:48:20 +03:00
|
|
|
vnode_t *vp;
|
2001-06-27 02:52:03 +04:00
|
|
|
int error = 0, tryalloc;
|
a whole bunch of changes to improve performance and robustness under load:
- remove special treatment of pager_map mappings in pmaps. this is
required now, since I've removed the globals that expose the address range.
pager_map now uses pmap_kenter_pa() instead of pmap_enter(), so there's
no longer any need to special-case it.
- eliminate struct uvm_vnode by moving its fields into struct vnode.
- rewrite the pageout path. the pager is now responsible for handling the
high-level requests instead of only getting control after a bunch of work
has already been done on its behalf. this will allow us to UBCify LFS,
which needs tighter control over its pages than other filesystems do.
writing a page to disk no longer requires making it read-only, which
allows us to write wired pages without causing all kinds of havoc.
- use a new PG_PAGEOUT flag to indicate that a page should be freed
on behalf of the pagedaemon when it's unlocked. this flag is very similar
to PG_RELEASED, but unlike PG_RELEASED, PG_PAGEOUT can be cleared if the
pageout fails due to eg. an indirect-block buffer being locked.
this allows us to remove the "version" field from struct vm_page,
and together with shrinking "loan_count" from 32 bits to 16,
struct vm_page is now 4 bytes smaller.
- no longer use PG_RELEASED for swap-backed pages. if the page is busy
because it's being paged out, we can't release the swap slot to be
reallocated until that write is complete, but unlike with vnodes we
don't keep a count of in-progress writes so there's no good way to
know when the write is done. instead, when we need to free a busy
swap-backed page, just sleep until we can get it busy ourselves.
- implement a fast-path for extending writes which allows us to avoid
zeroing new pages. this substantially reduces cpu usage.
- encapsulate the data used by the genfs code in a struct genfs_node,
which must be the first element of the filesystem-specific vnode data
for filesystems which use genfs_{get,put}pages().
- eliminate many of the UVM pagerops, since they aren't needed anymore
now that the pager "put" operation is a higher-level operation.
- enhance the genfs code to allow NFS to use the genfs_{get,put}pages
instead of a modified copy.
- clean up struct vnode by removing all the fields that used to be used by
the vfs_cluster.c code (which we don't use anymore with UBC).
- remove kmem_object and mb_object since they were useless.
instead of allocating pages to these objects, we now just allocate
pages with no object. such pages are mapped in the kernel until they
are freed, so we can use the mapping to find the page to free it.
this allows us to remove splvm() protection in several places.
The sum of all these changes improves write throughput on my
decstation 5000/200 to within 1% of the rate of NetBSD 1.5
and reduces the elapsed time for "make release" of a NetBSD 1.5
source tree on my 128MB pc to 10% less than a 1.5 kernel took.
2001-09-16 00:36:31 +04:00
|
|
|
|
2001-09-26 04:59:57 +04:00
|
|
|
try_again:
|
2008-01-30 14:46:59 +03:00
|
|
|
if (mp != NULL) {
|
1999-07-04 20:20:12 +04:00
|
|
|
/*
|
2008-01-30 14:46:59 +03:00
|
|
|
* Mark filesystem busy while we're creating a
|
|
|
|
* vnode. If unmount is in progress, this will
|
2008-05-06 16:37:04 +04:00
|
|
|
* fail.
|
1999-07-04 20:20:12 +04:00
|
|
|
*/
|
PR kern/38141 lookup/vfs_busy acquire rwlock recursively
Simplify the mount locking. Remove all the crud to deal with recursion on
the mount lock, and crud to deal with unmount as another weirdo lock.
Hopefully this will once and for all fix the deadlocks with this. With this
commit there are two locks on each mount:
- krwlock_t mnt_unmounting. This is used to prevent unmount across critical
sections like getnewvnode(). It's only ever read locked with rw_tryenter(),
and is only ever write locked in dounmount(). A write hold can't be taken
on this lock if the current LWP could hold a vnode lock.
- kmutex_t mnt_updating. This is taken by threads updating the mount, for
example when going r/o -> r/w, and is only present to serialize updates.
In order to take this lock, a read hold must first be taken on
mnt_unmounting, and the two need to be held across the operation.
One effect of this change: previously if an unmount failed, we would make a
half hearted attempt to back out of it gracefully, but that was unlikely to
work in a lot of cases. Now while an unmount that will be aborted is in
progress, new file operations within the mount will fail instead of being
delayed. That is unlikely to be a problem though, because if the admin
requests unmount of a file system then s(he) has made a decision to deny
access to the resource.
2008-05-06 22:43:44 +04:00
|
|
|
error = vfs_busy(mp, NULL);
|
2008-01-30 14:46:59 +03:00
|
|
|
if (error)
|
1999-07-04 20:20:12 +04:00
|
|
|
return error;
|
|
|
|
}
|
1994-05-17 08:21:49 +04:00
|
|
|
|
1999-11-15 21:49:07 +03:00
|
|
|
/*
|
|
|
|
* We must choose whether to allocate a new vnode or recycle an
|
|
|
|
* existing one. The criterion for allocating a new one is that
|
|
|
|
* the total number of vnodes is less than the number desired or
|
|
|
|
* there are no vnodes on either free list. Generally we only
|
|
|
|
* want to recycle vnodes that have no buffers associated with
|
|
|
|
* them, so we look first on the vnode_free_list. If it is empty,
|
|
|
|
* we next consider vnodes with referencing buffers on the
|
|
|
|
* vnode_hold_list. The toggle ensures that half the time we
|
|
|
|
* will use a buffer from the vnode_hold_list, and half the time
|
|
|
|
* we will allocate a new one unless the list has grown to twice
|
|
|
|
* the desired size. We are reticent to recycle vnodes from the
|
|
|
|
* vnode_hold_list because we will lose the identity of all its
|
|
|
|
* referencing buffers.
|
|
|
|
*/
|
2000-11-27 11:39:39 +03:00
|
|
|
|
2001-06-27 02:52:03 +04:00
|
|
|
vp = NULL;
|
|
|
|
|
2008-01-02 14:48:20 +03:00
|
|
|
mutex_enter(&vnode_free_list_lock);
|
2001-06-27 02:52:03 +04:00
|
|
|
|
1999-11-15 21:49:07 +03:00
|
|
|
toggle ^= 1;
|
|
|
|
if (numvnodes > 2 * desiredvnodes)
|
|
|
|
toggle = 0;
|
|
|
|
|
2001-06-27 02:52:03 +04:00
|
|
|
tryalloc = numvnodes < desiredvnodes ||
|
2001-09-26 04:59:57 +04:00
|
|
|
(TAILQ_FIRST(&vnode_free_list) == NULL &&
|
|
|
|
(TAILQ_FIRST(&vnode_hold_list) == NULL || toggle));
|
2001-06-27 02:52:03 +04:00
|
|
|
|
2008-01-02 14:48:20 +03:00
|
|
|
if (tryalloc) {
|
2003-09-14 15:09:48 +04:00
|
|
|
numvnodes++;
|
2008-01-02 14:48:20 +03:00
|
|
|
mutex_exit(&vnode_free_list_lock);
|
2008-01-03 04:26:28 +03:00
|
|
|
if ((vp = vnalloc(NULL)) == NULL) {
|
2008-01-02 14:48:20 +03:00
|
|
|
mutex_enter(&vnode_free_list_lock);
|
|
|
|
numvnodes--;
|
|
|
|
} else
|
|
|
|
vp->v_usecount = 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (vp == NULL) {
|
|
|
|
vp = getcleanvnode();
|
|
|
|
if (vp == NULL) {
|
2008-01-30 14:46:59 +03:00
|
|
|
if (mp != NULL) {
|
2008-04-30 16:49:16 +04:00
|
|
|
vfs_unbusy(mp, false, NULL);
|
2008-01-30 14:46:59 +03:00
|
|
|
}
|
2001-06-27 02:52:03 +04:00
|
|
|
if (tryalloc) {
|
|
|
|
printf("WARNING: unable to allocate new "
|
|
|
|
"vnode, retrying...\n");
|
2008-05-27 21:49:07 +04:00
|
|
|
kpause("newvn", false, hz, NULL);
|
2001-06-27 02:52:03 +04:00
|
|
|
goto try_again;
|
|
|
|
}
|
2000-07-04 19:33:28 +04:00
|
|
|
tablefull("vnode", "increase kern.maxvnodes or NVNODE");
|
1994-05-17 08:21:49 +04:00
|
|
|
*vpp = 0;
|
|
|
|
return (ENFILE);
|
|
|
|
}
|
2007-10-11 00:42:20 +04:00
|
|
|
vp->v_iflag = 0;
|
|
|
|
vp->v_vflag = 0;
|
|
|
|
vp->v_uflag = 0;
|
a whole bunch of changes to improve performance and robustness under load:
- remove special treatment of pager_map mappings in pmaps. this is
required now, since I've removed the globals that expose the address range.
pager_map now uses pmap_kenter_pa() instead of pmap_enter(), so there's
no longer any need to special-case it.
- eliminate struct uvm_vnode by moving its fields into struct vnode.
- rewrite the pageout path. the pager is now responsible for handling the
high-level requests instead of only getting control after a bunch of work
has already been done on its behalf. this will allow us to UBCify LFS,
which needs tighter control over its pages than other filesystems do.
writing a page to disk no longer requires making it read-only, which
allows us to write wired pages without causing all kinds of havoc.
- use a new PG_PAGEOUT flag to indicate that a page should be freed
on behalf of the pagedaemon when it's unlocked. this flag is very similar
to PG_RELEASED, but unlike PG_RELEASED, PG_PAGEOUT can be cleared if the
pageout fails due to eg. an indirect-block buffer being locked.
this allows us to remove the "version" field from struct vm_page,
and together with shrinking "loan_count" from 32 bits to 16,
struct vm_page is now 4 bytes smaller.
- no longer use PG_RELEASED for swap-backed pages. if the page is busy
because it's being paged out, we can't release the swap slot to be
reallocated until that write is complete, but unlike with vnodes we
don't keep a count of in-progress writes so there's no good way to
know when the write is done. instead, when we need to free a busy
swap-backed page, just sleep until we can get it busy ourselves.
- implement a fast-path for extending writes which allows us to avoid
zeroing new pages. this substantially reduces cpu usage.
- encapsulate the data used by the genfs code in a struct genfs_node,
which must be the first element of the filesystem-specific vnode data
for filesystems which use genfs_{get,put}pages().
- eliminate many of the UVM pagerops, since they aren't needed anymore
now that the pager "put" operation is a higher-level operation.
- enhance the genfs code to allow NFS to use the genfs_{get,put}pages
instead of a modified copy.
- clean up struct vnode by removing all the fields that used to be used by
the vfs_cluster.c code (which we don't use anymore with UBC).
- remove kmem_object and mb_object since they were useless.
instead of allocating pages to these objects, we now just allocate
pages with no object. such pages are mapped in the kernel until they
are freed, so we can use the mapping to find the page to free it.
this allows us to remove splvm() protection in several places.
The sum of all these changes improves write throughput on my
decstation 5000/200 to within 1% of the rate of NetBSD 1.5
and reduces the elapsed time for "make release" of a NetBSD 1.5
source tree on my 128MB pc to 10% less than a 1.5 kernel took.
2001-09-16 00:36:31 +04:00
|
|
|
vp->v_socket = NULL;
|
1994-05-17 08:21:49 +04:00
|
|
|
}
|
2008-01-02 14:48:20 +03:00
|
|
|
|
2010-01-15 22:28:26 +03:00
|
|
|
KASSERT(vp->v_usecount == 1);
|
2008-01-02 14:48:20 +03:00
|
|
|
KASSERT(vp->v_freelisthd == NULL);
|
2004-05-07 02:01:14 +04:00
|
|
|
KASSERT(LIST_EMPTY(&vp->v_nclist));
|
|
|
|
KASSERT(LIST_EMPTY(&vp->v_dnclist));
|
2008-01-02 14:48:20 +03:00
|
|
|
|
|
|
|
vp->v_type = VNON;
|
|
|
|
vp->v_vnlock = &vp->v_lock;
|
1994-05-17 08:21:49 +04:00
|
|
|
vp->v_tag = tag;
|
|
|
|
vp->v_op = vops;
|
|
|
|
insmntque(vp, mp);
|
|
|
|
*vpp = vp;
|
1994-06-08 15:28:29 +04:00
|
|
|
vp->v_data = 0;
|
2000-11-27 11:39:39 +03:00
|
|
|
|
|
|
|
/*
|
|
|
|
* initialize uvm_object within vnode.
|
|
|
|
*/
|
|
|
|
|
a whole bunch of changes to improve performance and robustness under load:
- remove special treatment of pager_map mappings in pmaps. this is
required now, since I've removed the globals that expose the address range.
pager_map now uses pmap_kenter_pa() instead of pmap_enter(), so there's
no longer any need to special-case it.
- eliminate struct uvm_vnode by moving its fields into struct vnode.
- rewrite the pageout path. the pager is now responsible for handling the
high-level requests instead of only getting control after a bunch of work
has already been done on its behalf. this will allow us to UBCify LFS,
which needs tighter control over its pages than other filesystems do.
writing a page to disk no longer requires making it read-only, which
allows us to write wired pages without causing all kinds of havoc.
- use a new PG_PAGEOUT flag to indicate that a page should be freed
on behalf of the pagedaemon when it's unlocked. this flag is very similar
to PG_RELEASED, but unlike PG_RELEASED, PG_PAGEOUT can be cleared if the
pageout fails due to eg. an indirect-block buffer being locked.
this allows us to remove the "version" field from struct vm_page,
and together with shrinking "loan_count" from 32 bits to 16,
struct vm_page is now 4 bytes smaller.
- no longer use PG_RELEASED for swap-backed pages. if the page is busy
because it's being paged out, we can't release the swap slot to be
reallocated until that write is complete, but unlike with vnodes we
don't keep a count of in-progress writes so there's no good way to
know when the write is done. instead, when we need to free a busy
swap-backed page, just sleep until we can get it busy ourselves.
- implement a fast-path for extending writes which allows us to avoid
zeroing new pages. this substantially reduces cpu usage.
- encapsulate the data used by the genfs code in a struct genfs_node,
which must be the first element of the filesystem-specific vnode data
for filesystems which use genfs_{get,put}pages().
- eliminate many of the UVM pagerops, since they aren't needed anymore
now that the pager "put" operation is a higher-level operation.
- enhance the genfs code to allow NFS to use the genfs_{get,put}pages
instead of a modified copy.
- clean up struct vnode by removing all the fields that used to be used by
the vfs_cluster.c code (which we don't use anymore with UBC).
- remove kmem_object and mb_object since they were useless.
instead of allocating pages to these objects, we now just allocate
pages with no object. such pages are mapped in the kernel until they
are freed, so we can use the mapping to find the page to free it.
this allows us to remove splvm() protection in several places.
The sum of all these changes improves write throughput on my
decstation 5000/200 to within 1% of the rate of NetBSD 1.5
and reduces the elapsed time for "make release" of a NetBSD 1.5
source tree on my 128MB pc to 10% less than a 1.5 kernel took.
2001-09-16 00:36:31 +04:00
|
|
|
uobj = &vp->v_uobj;
|
|
|
|
KASSERT(uobj->pgops == &uvm_vnodeops);
|
|
|
|
KASSERT(uobj->uo_npages == 0);
|
|
|
|
KASSERT(TAILQ_FIRST(&uobj->memq) == NULL);
|
2007-06-05 16:31:30 +04:00
|
|
|
vp->v_size = vp->v_writesize = VSIZENOTSET;
|
2000-11-27 11:39:39 +03:00
|
|
|
|
2008-01-02 14:48:20 +03:00
|
|
|
if (mp != NULL) {
|
|
|
|
if ((mp->mnt_iflag & IMNT_MPSAFE) != 0)
|
|
|
|
vp->v_vflag |= VV_MPSAFE;
|
2008-04-30 16:49:16 +04:00
|
|
|
vfs_unbusy(mp, true, NULL);
|
2008-01-02 14:48:20 +03:00
|
|
|
}
|
|
|
|
|
1994-05-17 08:21:49 +04:00
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
2000-06-28 03:34:45 +04:00
|
|
|
/*
|
|
|
|
* This is really just the reverse of getnewvnode(). Needed for
|
|
|
|
* VFS_VGET functions who may need to push back a vnode in case
|
|
|
|
* of a locking race.
|
|
|
|
*/
|
|
|
|
void
|
2008-01-02 14:48:20 +03:00
|
|
|
ungetnewvnode(vnode_t *vp)
|
2000-06-28 03:34:45 +04:00
|
|
|
{
|
|
|
|
|
2010-01-15 22:28:26 +03:00
|
|
|
KASSERT(vp->v_usecount == 1);
|
2008-01-02 14:48:20 +03:00
|
|
|
KASSERT(vp->v_data == NULL);
|
|
|
|
KASSERT(vp->v_freelisthd == NULL);
|
|
|
|
|
|
|
|
mutex_enter(&vp->v_interlock);
|
|
|
|
vp->v_iflag |= VI_CLEAN;
|
2008-01-28 01:47:31 +03:00
|
|
|
vrelel(vp, 0);
|
2008-01-02 14:48:20 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Allocate a new, uninitialized vnode. If 'mp' is non-NULL, this is a
|
|
|
|
* marker vnode and we are prepared to wait for the allocation.
|
|
|
|
*/
|
|
|
|
vnode_t *
|
2008-01-03 04:26:28 +03:00
|
|
|
vnalloc(struct mount *mp)
|
2008-01-02 14:48:20 +03:00
|
|
|
{
|
|
|
|
vnode_t *vp;
|
|
|
|
|
|
|
|
vp = pool_cache_get(vnode_cache, (mp != NULL ? PR_WAITOK : PR_NOWAIT));
|
|
|
|
if (vp == NULL) {
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
memset(vp, 0, sizeof(*vp));
|
|
|
|
UVM_OBJ_INIT(&vp->v_uobj, &uvm_vnodeops, 0);
|
|
|
|
cv_init(&vp->v_cv, "vnode");
|
2004-03-23 16:22:32 +03:00
|
|
|
/*
|
2008-01-02 14:48:20 +03:00
|
|
|
* done by memset() above.
|
|
|
|
* LIST_INIT(&vp->v_nclist);
|
|
|
|
* LIST_INIT(&vp->v_dnclist);
|
2000-06-28 03:34:45 +04:00
|
|
|
*/
|
2008-01-02 14:48:20 +03:00
|
|
|
|
|
|
|
if (mp != NULL) {
|
|
|
|
vp->v_mount = mp;
|
|
|
|
vp->v_type = VBAD;
|
|
|
|
vp->v_iflag = VI_MARKER;
|
|
|
|
} else {
|
2008-01-30 12:50:19 +03:00
|
|
|
rw_init(&vp->v_lock.vl_lock);
|
2008-01-02 14:48:20 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
return vp;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Free an unused, unreferenced vnode.
|
|
|
|
*/
|
|
|
|
void
|
2008-01-03 04:26:28 +03:00
|
|
|
vnfree(vnode_t *vp)
|
2008-01-02 14:48:20 +03:00
|
|
|
{
|
|
|
|
|
|
|
|
KASSERT(vp->v_usecount == 0);
|
|
|
|
|
|
|
|
if ((vp->v_iflag & VI_MARKER) == 0) {
|
2008-01-30 12:50:19 +03:00
|
|
|
rw_destroy(&vp->v_lock.vl_lock);
|
2008-01-02 14:48:20 +03:00
|
|
|
mutex_enter(&vnode_free_list_lock);
|
|
|
|
numvnodes--;
|
|
|
|
mutex_exit(&vnode_free_list_lock);
|
|
|
|
}
|
|
|
|
|
|
|
|
UVM_OBJ_DESTROY(&vp->v_uobj);
|
|
|
|
cv_destroy(&vp->v_cv);
|
|
|
|
pool_cache_put(vnode_cache, vp);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Remove a vnode from its freelist.
|
|
|
|
*/
|
|
|
|
static inline void
|
|
|
|
vremfree(vnode_t *vp)
|
|
|
|
{
|
|
|
|
|
|
|
|
KASSERT(mutex_owned(&vp->v_interlock));
|
2008-06-05 16:32:57 +04:00
|
|
|
KASSERT(vp->v_usecount == 0);
|
2008-01-02 14:48:20 +03:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Note that the reference count must not change until
|
|
|
|
* the vnode is removed.
|
|
|
|
*/
|
|
|
|
mutex_enter(&vnode_free_list_lock);
|
|
|
|
if (vp->v_holdcnt > 0) {
|
|
|
|
KASSERT(vp->v_freelisthd == &vnode_hold_list);
|
|
|
|
} else {
|
|
|
|
KASSERT(vp->v_freelisthd == &vnode_free_list);
|
|
|
|
}
|
|
|
|
TAILQ_REMOVE(vp->v_freelisthd, vp, v_freelist);
|
|
|
|
vp->v_freelisthd = NULL;
|
|
|
|
mutex_exit(&vnode_free_list_lock);
|
2000-06-28 03:34:45 +04:00
|
|
|
}
|
|
|
|
|
1994-05-17 08:21:49 +04:00
|
|
|
/*
|
|
|
|
* Move a vnode from one mount queue to another.
|
|
|
|
*/
|
2006-01-17 00:44:46 +03:00
|
|
|
static void
|
2008-01-02 14:48:20 +03:00
|
|
|
insmntque(vnode_t *vp, struct mount *mp)
|
1994-05-17 08:21:49 +04:00
|
|
|
{
|
2008-01-30 14:46:59 +03:00
|
|
|
struct mount *omp;
|
1994-05-17 08:21:49 +04:00
|
|
|
|
1999-07-04 20:20:12 +04:00
|
|
|
#ifdef DIAGNOSTIC
|
|
|
|
if ((mp != NULL) &&
|
2003-10-14 18:02:56 +04:00
|
|
|
(mp->mnt_iflag & IMNT_UNMOUNT) &&
|
1999-11-15 21:49:07 +03:00
|
|
|
vp->v_tag != VT_VFS) {
|
1999-07-04 20:20:12 +04:00
|
|
|
panic("insmntque into dying filesystem");
|
|
|
|
}
|
|
|
|
#endif
|
2004-03-23 16:22:32 +03:00
|
|
|
|
2008-01-02 14:48:20 +03:00
|
|
|
mutex_enter(&mntvnode_lock);
|
1994-05-17 08:21:49 +04:00
|
|
|
/*
|
|
|
|
* Delete from old mount point vnode list, if on one.
|
|
|
|
*/
|
2008-01-30 14:46:59 +03:00
|
|
|
if ((omp = vp->v_mount) != NULL)
|
2006-10-20 22:58:12 +04:00
|
|
|
TAILQ_REMOVE(&vp->v_mount->mnt_vnodelist, vp, v_mntvnodes);
|
1994-05-17 08:21:49 +04:00
|
|
|
/*
|
2008-01-30 14:46:59 +03:00
|
|
|
* Insert into list of vnodes for the new mount point, if
|
|
|
|
* available. The caller must take a reference on the mount
|
|
|
|
* structure and donate to the vnode.
|
1994-05-17 08:21:49 +04:00
|
|
|
*/
|
2007-01-15 22:13:30 +03:00
|
|
|
if ((vp->v_mount = mp) != NULL)
|
|
|
|
TAILQ_INSERT_TAIL(&mp->mnt_vnodelist, vp, v_mntvnodes);
|
2008-01-02 14:48:20 +03:00
|
|
|
mutex_exit(&mntvnode_lock);
|
2008-01-30 14:46:59 +03:00
|
|
|
|
|
|
|
if (omp != NULL) {
|
|
|
|
/* Release reference to old mount. */
|
PR kern/38141 lookup/vfs_busy acquire rwlock recursively
Simplify the mount locking. Remove all the crud to deal with recursion on
the mount lock, and crud to deal with unmount as another weirdo lock.
Hopefully this will once and for all fix the deadlocks with this. With this
commit there are two locks on each mount:
- krwlock_t mnt_unmounting. This is used to prevent unmount across critical
sections like getnewvnode(). It's only ever read locked with rw_tryenter(),
and is only ever write locked in dounmount(). A write hold can't be taken
on this lock if the current LWP could hold a vnode lock.
- kmutex_t mnt_updating. This is taken by threads updating the mount, for
example when going r/o -> r/w, and is only present to serialize updates.
In order to take this lock, a read hold must first be taken on
mnt_unmounting, and the two need to be held across the operation.
One effect of this change: previously if an unmount failed, we would make a
half hearted attempt to back out of it gracefully, but that was unlikely to
work in a lot of cases. Now while an unmount that will be aborted is in
progress, new file operations within the mount will fail instead of being
delayed. That is unlikely to be a problem though, because if the admin
requests unmount of a file system then s(he) has made a decision to deny
access to the resource.
2008-05-06 22:43:44 +04:00
|
|
|
vfs_destroy(omp);
|
2008-01-30 14:46:59 +03:00
|
|
|
}
|
1994-05-17 08:21:49 +04:00
|
|
|
}
|
|
|
|
|
2008-07-17 00:06:19 +04:00
|
|
|
/*
|
|
|
|
* Wait for a vnode (typically with VI_XLOCK set) to be cleaned or
|
|
|
|
* recycled.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
vwait(vnode_t *vp, int flags)
|
|
|
|
{
|
|
|
|
|
|
|
|
KASSERT(mutex_owned(&vp->v_interlock));
|
|
|
|
KASSERT(vp->v_usecount != 0);
|
|
|
|
|
|
|
|
while ((vp->v_iflag & flags) != 0)
|
|
|
|
cv_wait(&vp->v_cv, &vp->v_interlock);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Insert a marker vnode into a mount's vnode list, after the
|
|
|
|
* specified vnode. mntvnode_lock must be held.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
vmark(vnode_t *mvp, vnode_t *vp)
|
|
|
|
{
|
|
|
|
struct mount *mp;
|
|
|
|
|
|
|
|
mp = mvp->v_mount;
|
|
|
|
|
|
|
|
KASSERT(mutex_owned(&mntvnode_lock));
|
|
|
|
KASSERT((mvp->v_iflag & VI_MARKER) != 0);
|
|
|
|
KASSERT(vp->v_mount == mp);
|
|
|
|
|
|
|
|
TAILQ_INSERT_AFTER(&mp->mnt_vnodelist, vp, mvp, v_mntvnodes);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Remove a marker vnode from a mount's vnode list, and return
|
|
|
|
* a pointer to the next vnode in the list. mntvnode_lock must
|
|
|
|
* be held.
|
|
|
|
*/
|
|
|
|
vnode_t *
|
|
|
|
vunmark(vnode_t *mvp)
|
|
|
|
{
|
|
|
|
vnode_t *vp;
|
|
|
|
struct mount *mp;
|
|
|
|
|
|
|
|
mp = mvp->v_mount;
|
|
|
|
|
|
|
|
KASSERT(mutex_owned(&mntvnode_lock));
|
|
|
|
KASSERT((mvp->v_iflag & VI_MARKER) != 0);
|
|
|
|
|
|
|
|
vp = TAILQ_NEXT(mvp, v_mntvnodes);
|
|
|
|
TAILQ_REMOVE(&mp->mnt_vnodelist, mvp, v_mntvnodes);
|
|
|
|
|
|
|
|
KASSERT(vp == NULL || vp->v_mount == mp);
|
|
|
|
|
|
|
|
return vp;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Update outstanding I/O count and do wakeup if requested.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
vwakeup(struct buf *bp)
|
|
|
|
{
|
|
|
|
struct vnode *vp;
|
|
|
|
|
|
|
|
if ((vp = bp->b_vp) == NULL)
|
|
|
|
return;
|
|
|
|
|
|
|
|
KASSERT(bp->b_objlock == &vp->v_interlock);
|
|
|
|
KASSERT(mutex_owned(bp->b_objlock));
|
|
|
|
|
|
|
|
if (--vp->v_numoutput < 0)
|
|
|
|
panic("vwakeup: neg numoutput, vp %p", vp);
|
|
|
|
if (vp->v_numoutput == 0)
|
|
|
|
cv_broadcast(&vp->v_cv);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Flush out and invalidate all buffers associated with a vnode.
|
|
|
|
* Called with the underlying vnode locked, which should prevent new dirty
|
|
|
|
* buffers from being queued.
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
vinvalbuf(struct vnode *vp, int flags, kauth_cred_t cred, struct lwp *l,
|
|
|
|
bool catch, int slptimeo)
|
|
|
|
{
|
|
|
|
struct buf *bp, *nbp;
|
|
|
|
int error;
|
|
|
|
int flushflags = PGO_ALLPAGES | PGO_FREE | PGO_SYNCIO |
|
|
|
|
(flags & V_SAVE ? PGO_CLEANIT | PGO_RECLAIM : 0);
|
|
|
|
|
|
|
|
/* XXXUBC this doesn't look at flags or slp* */
|
|
|
|
mutex_enter(&vp->v_interlock);
|
|
|
|
error = VOP_PUTPAGES(vp, 0, 0, flushflags);
|
|
|
|
if (error) {
|
|
|
|
return error;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (flags & V_SAVE) {
|
|
|
|
error = VOP_FSYNC(vp, cred, FSYNC_WAIT|FSYNC_RECLAIM, 0, 0);
|
|
|
|
if (error)
|
|
|
|
return (error);
|
|
|
|
KASSERT(LIST_EMPTY(&vp->v_dirtyblkhd));
|
|
|
|
}
|
|
|
|
|
|
|
|
mutex_enter(&bufcache_lock);
|
|
|
|
restart:
|
|
|
|
for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
|
|
|
|
nbp = LIST_NEXT(bp, b_vnbufs);
|
|
|
|
error = bbusy(bp, catch, slptimeo, NULL);
|
|
|
|
if (error != 0) {
|
|
|
|
if (error == EPASSTHROUGH)
|
|
|
|
goto restart;
|
|
|
|
mutex_exit(&bufcache_lock);
|
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
brelsel(bp, BC_INVAL | BC_VFLUSH);
|
|
|
|
}
|
|
|
|
|
|
|
|
for (bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) {
|
|
|
|
nbp = LIST_NEXT(bp, b_vnbufs);
|
|
|
|
error = bbusy(bp, catch, slptimeo, NULL);
|
|
|
|
if (error != 0) {
|
|
|
|
if (error == EPASSTHROUGH)
|
|
|
|
goto restart;
|
|
|
|
mutex_exit(&bufcache_lock);
|
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
/*
|
|
|
|
* XXX Since there are no node locks for NFS, I believe
|
|
|
|
* there is a slight chance that a delayed write will
|
|
|
|
* occur while sleeping just above, so check for it.
|
|
|
|
*/
|
|
|
|
if ((bp->b_oflags & BO_DELWRI) && (flags & V_SAVE)) {
|
|
|
|
#ifdef DEBUG
|
|
|
|
printf("buffer still DELWRI\n");
|
|
|
|
#endif
|
|
|
|
bp->b_cflags |= BC_BUSY | BC_VFLUSH;
|
|
|
|
mutex_exit(&bufcache_lock);
|
|
|
|
VOP_BWRITE(bp);
|
|
|
|
mutex_enter(&bufcache_lock);
|
|
|
|
goto restart;
|
|
|
|
}
|
|
|
|
brelsel(bp, BC_INVAL | BC_VFLUSH);
|
|
|
|
}
|
|
|
|
|
|
|
|
#ifdef DIAGNOSTIC
|
|
|
|
if (!LIST_EMPTY(&vp->v_cleanblkhd) || !LIST_EMPTY(&vp->v_dirtyblkhd))
|
|
|
|
panic("vinvalbuf: flush failed, vp %p", vp);
|
|
|
|
#endif
|
|
|
|
|
|
|
|
mutex_exit(&bufcache_lock);
|
|
|
|
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Destroy any in core blocks past the truncation length.
|
|
|
|
* Called with the underlying vnode locked, which should prevent new dirty
|
|
|
|
* buffers from being queued.
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
vtruncbuf(struct vnode *vp, daddr_t lbn, bool catch, int slptimeo)
|
|
|
|
{
|
|
|
|
struct buf *bp, *nbp;
|
|
|
|
int error;
|
|
|
|
voff_t off;
|
|
|
|
|
|
|
|
off = round_page((voff_t)lbn << vp->v_mount->mnt_fs_bshift);
|
|
|
|
mutex_enter(&vp->v_interlock);
|
|
|
|
error = VOP_PUTPAGES(vp, off, 0, PGO_FREE | PGO_SYNCIO);
|
|
|
|
if (error) {
|
|
|
|
return error;
|
|
|
|
}
|
|
|
|
|
|
|
|
mutex_enter(&bufcache_lock);
|
|
|
|
restart:
|
|
|
|
for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
|
|
|
|
nbp = LIST_NEXT(bp, b_vnbufs);
|
|
|
|
if (bp->b_lblkno < lbn)
|
|
|
|
continue;
|
|
|
|
error = bbusy(bp, catch, slptimeo, NULL);
|
|
|
|
if (error != 0) {
|
|
|
|
if (error == EPASSTHROUGH)
|
|
|
|
goto restart;
|
|
|
|
mutex_exit(&bufcache_lock);
|
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
brelsel(bp, BC_INVAL | BC_VFLUSH);
|
|
|
|
}
|
|
|
|
|
|
|
|
for (bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) {
|
|
|
|
nbp = LIST_NEXT(bp, b_vnbufs);
|
|
|
|
if (bp->b_lblkno < lbn)
|
|
|
|
continue;
|
|
|
|
error = bbusy(bp, catch, slptimeo, NULL);
|
|
|
|
if (error != 0) {
|
|
|
|
if (error == EPASSTHROUGH)
|
|
|
|
goto restart;
|
|
|
|
mutex_exit(&bufcache_lock);
|
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
brelsel(bp, BC_INVAL | BC_VFLUSH);
|
|
|
|
}
|
|
|
|
mutex_exit(&bufcache_lock);
|
|
|
|
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Flush all dirty buffers from a vnode.
|
|
|
|
* Called with the underlying vnode locked, which should prevent new dirty
|
|
|
|
* buffers from being queued.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
vflushbuf(struct vnode *vp, int sync)
|
|
|
|
{
|
|
|
|
struct buf *bp, *nbp;
|
|
|
|
int flags = PGO_CLEANIT | PGO_ALLPAGES | (sync ? PGO_SYNCIO : 0);
|
|
|
|
bool dirty;
|
|
|
|
|
|
|
|
mutex_enter(&vp->v_interlock);
|
|
|
|
(void) VOP_PUTPAGES(vp, 0, 0, flags);
|
|
|
|
|
|
|
|
loop:
|
|
|
|
mutex_enter(&bufcache_lock);
|
|
|
|
for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
|
|
|
|
nbp = LIST_NEXT(bp, b_vnbufs);
|
|
|
|
if ((bp->b_cflags & BC_BUSY))
|
|
|
|
continue;
|
|
|
|
if ((bp->b_oflags & BO_DELWRI) == 0)
|
|
|
|
panic("vflushbuf: not dirty, bp %p", bp);
|
|
|
|
bp->b_cflags |= BC_BUSY | BC_VFLUSH;
|
|
|
|
mutex_exit(&bufcache_lock);
|
|
|
|
/*
|
|
|
|
* Wait for I/O associated with indirect blocks to complete,
|
|
|
|
* since there is no way to quickly wait for them below.
|
|
|
|
*/
|
|
|
|
if (bp->b_vp == vp || sync == 0)
|
|
|
|
(void) bawrite(bp);
|
|
|
|
else
|
|
|
|
(void) bwrite(bp);
|
|
|
|
goto loop;
|
|
|
|
}
|
|
|
|
mutex_exit(&bufcache_lock);
|
|
|
|
|
|
|
|
if (sync == 0)
|
|
|
|
return;
|
|
|
|
|
|
|
|
mutex_enter(&vp->v_interlock);
|
|
|
|
while (vp->v_numoutput != 0)
|
|
|
|
cv_wait(&vp->v_cv, &vp->v_interlock);
|
|
|
|
dirty = !LIST_EMPTY(&vp->v_dirtyblkhd);
|
|
|
|
mutex_exit(&vp->v_interlock);
|
|
|
|
|
|
|
|
if (dirty) {
|
|
|
|
vprint("vflushbuf: dirty", vp);
|
|
|
|
goto loop;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
1994-05-17 08:21:49 +04:00
|
|
|
/*
|
|
|
|
* Create a vnode for a block device.
|
1997-01-31 22:10:27 +03:00
|
|
|
* Used for root filesystem and swap areas.
|
1994-05-17 08:21:49 +04:00
|
|
|
* Also used for memory file system special devices.
|
|
|
|
*/
|
1996-02-04 05:17:43 +03:00
|
|
|
int
|
2008-01-02 14:48:20 +03:00
|
|
|
bdevvp(dev_t dev, vnode_t **vpp)
|
1994-05-17 08:21:49 +04:00
|
|
|
{
|
1994-06-08 15:28:29 +04:00
|
|
|
|
|
|
|
return (getdevvp(dev, vpp, VBLK));
|
1994-05-17 08:21:49 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Create a vnode for a character device.
|
|
|
|
* Used for kernfs and some console handling.
|
|
|
|
*/
|
1996-02-04 05:17:43 +03:00
|
|
|
int
|
2008-01-02 14:48:20 +03:00
|
|
|
cdevvp(dev_t dev, vnode_t **vpp)
|
1994-05-17 08:21:49 +04:00
|
|
|
{
|
1994-06-08 15:28:29 +04:00
|
|
|
|
|
|
|
return (getdevvp(dev, vpp, VCHR));
|
1994-05-17 08:21:49 +04:00
|
|
|
}
|
|
|
|
|
2008-07-17 00:06:19 +04:00
|
|
|
/*
|
|
|
|
* Associate a buffer with a vnode. There must already be a hold on
|
|
|
|
* the vnode.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
bgetvp(struct vnode *vp, struct buf *bp)
|
|
|
|
{
|
|
|
|
|
|
|
|
KASSERT(bp->b_vp == NULL);
|
|
|
|
KASSERT(bp->b_objlock == &buffer_lock);
|
|
|
|
KASSERT(mutex_owned(&vp->v_interlock));
|
|
|
|
KASSERT(mutex_owned(&bufcache_lock));
|
|
|
|
KASSERT((bp->b_cflags & BC_BUSY) != 0);
|
|
|
|
KASSERT(!cv_has_waiters(&bp->b_done));
|
|
|
|
|
|
|
|
vholdl(vp);
|
|
|
|
bp->b_vp = vp;
|
|
|
|
if (vp->v_type == VBLK || vp->v_type == VCHR)
|
|
|
|
bp->b_dev = vp->v_rdev;
|
|
|
|
else
|
|
|
|
bp->b_dev = NODEV;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Insert onto list for new vnode.
|
|
|
|
*/
|
|
|
|
bufinsvn(bp, &vp->v_cleanblkhd);
|
|
|
|
bp->b_objlock = &vp->v_interlock;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Disassociate a buffer from a vnode.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
brelvp(struct buf *bp)
|
|
|
|
{
|
|
|
|
struct vnode *vp = bp->b_vp;
|
|
|
|
|
|
|
|
KASSERT(vp != NULL);
|
|
|
|
KASSERT(bp->b_objlock == &vp->v_interlock);
|
|
|
|
KASSERT(mutex_owned(&vp->v_interlock));
|
|
|
|
KASSERT(mutex_owned(&bufcache_lock));
|
|
|
|
KASSERT((bp->b_cflags & BC_BUSY) != 0);
|
|
|
|
KASSERT(!cv_has_waiters(&bp->b_done));
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Delete from old vnode list, if on one.
|
|
|
|
*/
|
|
|
|
if (LIST_NEXT(bp, b_vnbufs) != NOLIST)
|
|
|
|
bufremvn(bp);
|
|
|
|
|
|
|
|
if (TAILQ_EMPTY(&vp->v_uobj.memq) && (vp->v_iflag & VI_ONWORKLST) &&
|
|
|
|
LIST_FIRST(&vp->v_dirtyblkhd) == NULL) {
|
|
|
|
vp->v_iflag &= ~VI_WRMAPDIRTY;
|
|
|
|
vn_syncer_remove_from_worklist(vp);
|
|
|
|
}
|
|
|
|
|
|
|
|
bp->b_objlock = &buffer_lock;
|
|
|
|
bp->b_vp = NULL;
|
|
|
|
holdrelel(vp);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Reassign a buffer from one vnode list to another.
|
|
|
|
* The list reassignment must be within the same vnode.
|
|
|
|
* Used to assign file specific control information
|
|
|
|
* (indirect blocks) to the list to which they belong.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
reassignbuf(struct buf *bp, struct vnode *vp)
|
|
|
|
{
|
|
|
|
struct buflists *listheadp;
|
|
|
|
int delayx;
|
|
|
|
|
|
|
|
KASSERT(mutex_owned(&bufcache_lock));
|
|
|
|
KASSERT(bp->b_objlock == &vp->v_interlock);
|
|
|
|
KASSERT(mutex_owned(&vp->v_interlock));
|
|
|
|
KASSERT((bp->b_cflags & BC_BUSY) != 0);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Delete from old vnode list, if on one.
|
|
|
|
*/
|
|
|
|
if (LIST_NEXT(bp, b_vnbufs) != NOLIST)
|
|
|
|
bufremvn(bp);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If dirty, put on list of dirty buffers;
|
|
|
|
* otherwise insert onto list of clean buffers.
|
|
|
|
*/
|
|
|
|
if ((bp->b_oflags & BO_DELWRI) == 0) {
|
|
|
|
listheadp = &vp->v_cleanblkhd;
|
|
|
|
if (TAILQ_EMPTY(&vp->v_uobj.memq) &&
|
|
|
|
(vp->v_iflag & VI_ONWORKLST) &&
|
|
|
|
LIST_FIRST(&vp->v_dirtyblkhd) == NULL) {
|
|
|
|
vp->v_iflag &= ~VI_WRMAPDIRTY;
|
|
|
|
vn_syncer_remove_from_worklist(vp);
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
listheadp = &vp->v_dirtyblkhd;
|
|
|
|
if ((vp->v_iflag & VI_ONWORKLST) == 0) {
|
|
|
|
switch (vp->v_type) {
|
|
|
|
case VDIR:
|
|
|
|
delayx = dirdelay;
|
|
|
|
break;
|
|
|
|
case VBLK:
|
|
|
|
if (vp->v_specmountpoint != NULL) {
|
|
|
|
delayx = metadelay;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
/* fall through */
|
|
|
|
default:
|
|
|
|
delayx = filedelay;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (!vp->v_mount ||
|
|
|
|
(vp->v_mount->mnt_flag & MNT_ASYNC) == 0)
|
|
|
|
vn_syncer_add_to_worklist(vp, delayx);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
bufinsvn(bp, listheadp);
|
|
|
|
}
|
|
|
|
|
1994-05-17 08:21:49 +04:00
|
|
|
/*
|
|
|
|
* Create a vnode for a device.
|
|
|
|
* Used by bdevvp (block device) for root file system etc.,
|
|
|
|
* and by cdevvp (character device) for console and kernfs.
|
|
|
|
*/
|
2006-01-17 00:44:46 +03:00
|
|
|
static int
|
2008-01-02 14:48:20 +03:00
|
|
|
getdevvp(dev_t dev, vnode_t **vpp, enum vtype type)
|
1994-05-17 08:21:49 +04:00
|
|
|
{
|
2008-01-02 14:48:20 +03:00
|
|
|
vnode_t *vp;
|
|
|
|
vnode_t *nvp;
|
1994-05-17 08:21:49 +04:00
|
|
|
int error;
|
|
|
|
|
1998-03-01 05:20:01 +03:00
|
|
|
if (dev == NODEV) {
|
2007-10-11 00:42:20 +04:00
|
|
|
*vpp = NULL;
|
1994-05-17 08:21:49 +04:00
|
|
|
return (0);
|
1998-03-01 05:20:01 +03:00
|
|
|
}
|
1996-02-04 05:17:43 +03:00
|
|
|
error = getnewvnode(VT_NON, NULL, spec_vnodeop_p, &nvp);
|
1994-05-17 08:21:49 +04:00
|
|
|
if (error) {
|
2007-10-11 00:42:20 +04:00
|
|
|
*vpp = NULL;
|
1994-05-17 08:21:49 +04:00
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
vp = nvp;
|
|
|
|
vp->v_type = type;
|
2008-01-02 14:48:20 +03:00
|
|
|
vp->v_vflag |= VV_MPSAFE;
|
2007-08-06 21:09:11 +04:00
|
|
|
uvm_vnp_setsize(vp, 0);
|
2008-01-24 20:32:52 +03:00
|
|
|
spec_node_init(vp, dev);
|
1994-05-17 08:21:49 +04:00
|
|
|
*vpp = vp;
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
2008-06-03 18:54:12 +04:00
|
|
|
/*
|
|
|
|
* Try to gain a reference to a vnode, without acquiring its interlock.
|
|
|
|
* The caller must hold a lock that will prevent the vnode from being
|
|
|
|
* recycled or freed.
|
|
|
|
*/
|
|
|
|
bool
|
|
|
|
vtryget(vnode_t *vp)
|
|
|
|
{
|
|
|
|
u_int use, next;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If the vnode is being freed, don't make life any harder
|
|
|
|
* for vclean() by adding another reference without waiting.
|
|
|
|
* This is not strictly necessary, but we'll do it anyway.
|
|
|
|
*/
|
|
|
|
if (__predict_false((vp->v_iflag & (VI_XLOCK | VI_FREEING)) != 0)) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
for (use = vp->v_usecount;; use = next) {
|
2009-05-16 12:29:53 +04:00
|
|
|
if (use == 0 || __predict_false((use & VC_XLOCK) != 0)) {
|
2008-06-03 18:54:12 +04:00
|
|
|
/* Need interlock held if first reference. */
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
next = atomic_cas_uint(&vp->v_usecount, use, use + 1);
|
|
|
|
if (__predict_true(next == use)) {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
1994-05-17 08:21:49 +04:00
|
|
|
/*
|
|
|
|
* Grab a particular vnode from the free list, increment its
|
1998-03-04 12:13:48 +03:00
|
|
|
* reference count and lock it. If the vnode lock bit is set the
|
|
|
|
* vnode is being eliminated in vgone. In that case, we can not
|
|
|
|
* grab the vnode, so the process is awakened when the transition is
|
|
|
|
* completed, and an error returned to indicate that the vnode is no
|
|
|
|
* longer usable (possibly having been changed to a new file system type).
|
1994-05-17 08:21:49 +04:00
|
|
|
*/
|
1994-06-08 15:28:29 +04:00
|
|
|
int
|
2008-01-02 14:48:20 +03:00
|
|
|
vget(vnode_t *vp, int flags)
|
1994-05-17 08:21:49 +04:00
|
|
|
{
|
2002-05-24 03:05:25 +04:00
|
|
|
int error;
|
1994-05-17 08:21:49 +04:00
|
|
|
|
2008-01-02 14:48:20 +03:00
|
|
|
KASSERT((vp->v_iflag & VI_MARKER) == 0);
|
|
|
|
|
|
|
|
if ((flags & LK_INTERLOCK) == 0)
|
|
|
|
mutex_enter(&vp->v_interlock);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Before adding a reference, we must remove the vnode
|
|
|
|
* from its freelist.
|
|
|
|
*/
|
|
|
|
if (vp->v_usecount == 0) {
|
|
|
|
vremfree(vp);
|
2008-06-05 16:32:57 +04:00
|
|
|
vp->v_usecount = 1;
|
2008-06-02 20:25:34 +04:00
|
|
|
} else {
|
|
|
|
atomic_inc_uint(&vp->v_usecount);
|
2008-01-02 14:48:20 +03:00
|
|
|
}
|
|
|
|
|
1994-06-08 15:28:29 +04:00
|
|
|
/*
|
|
|
|
* If the vnode is in the process of being cleaned out for
|
|
|
|
* another use, we wait for the cleaning to finish and then
|
2008-01-09 19:15:22 +03:00
|
|
|
* return failure. Cleaning is determined by checking if
|
|
|
|
* the VI_XLOCK or VI_FREEING flags are set.
|
1994-06-08 15:28:29 +04:00
|
|
|
*/
|
2008-01-09 19:15:22 +03:00
|
|
|
if ((vp->v_iflag & (VI_XLOCK | VI_FREEING)) != 0) {
|
2008-01-10 00:29:38 +03:00
|
|
|
if ((flags & LK_NOWAIT) != 0) {
|
2008-01-28 01:47:31 +03:00
|
|
|
vrelel(vp, 0);
|
2000-11-27 11:39:39 +03:00
|
|
|
return EBUSY;
|
|
|
|
}
|
2008-01-09 19:15:22 +03:00
|
|
|
vwait(vp, VI_XLOCK | VI_FREEING);
|
2008-01-28 01:47:31 +03:00
|
|
|
vrelel(vp, 0);
|
2008-01-10 00:29:38 +03:00
|
|
|
return ENOENT;
|
1994-05-17 08:21:49 +04:00
|
|
|
}
|
2009-11-05 11:18:02 +03:00
|
|
|
|
|
|
|
if ((vp->v_iflag & VI_INACTNOW) != 0) {
|
|
|
|
/*
|
|
|
|
* if it's being desactived, wait for it to complete.
|
|
|
|
* Make sure to not return a clean vnode.
|
|
|
|
*/
|
|
|
|
if ((flags & LK_NOWAIT) != 0) {
|
|
|
|
vrelel(vp, 0);
|
|
|
|
return EBUSY;
|
|
|
|
}
|
|
|
|
vwait(vp, VI_INACTNOW);
|
|
|
|
if ((vp->v_iflag & VI_CLEAN) != 0) {
|
|
|
|
vrelel(vp, 0);
|
|
|
|
return ENOENT;
|
|
|
|
}
|
|
|
|
}
|
1998-03-01 05:20:01 +03:00
|
|
|
if (flags & LK_TYPE_MASK) {
|
2008-01-10 00:29:38 +03:00
|
|
|
error = vn_lock(vp, flags | LK_INTERLOCK);
|
|
|
|
if (error != 0) {
|
2005-12-23 18:31:40 +03:00
|
|
|
vrele(vp);
|
1999-11-15 21:49:07 +03:00
|
|
|
}
|
2008-01-10 00:29:38 +03:00
|
|
|
return error;
|
1998-03-01 05:20:01 +03:00
|
|
|
}
|
2008-01-02 14:48:20 +03:00
|
|
|
mutex_exit(&vp->v_interlock);
|
2008-01-10 00:29:38 +03:00
|
|
|
return 0;
|
1994-05-17 08:21:49 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* vput(), just unlock and vrele()
|
|
|
|
*/
|
|
|
|
void
|
2008-01-02 14:48:20 +03:00
|
|
|
vput(vnode_t *vp)
|
1994-05-17 08:21:49 +04:00
|
|
|
{
|
1994-06-08 15:28:29 +04:00
|
|
|
|
2008-01-02 14:48:20 +03:00
|
|
|
KASSERT((vp->v_iflag & VI_MARKER) == 0);
|
|
|
|
|
|
|
|
VOP_UNLOCK(vp, 0);
|
|
|
|
vrele(vp);
|
1994-05-17 08:21:49 +04:00
|
|
|
}
|
|
|
|
|
2008-06-02 20:25:34 +04:00
|
|
|
/*
|
|
|
|
* Try to drop reference on a vnode. Abort if we are releasing the
|
2008-12-14 14:13:36 +03:00
|
|
|
* last reference. Note: this _must_ succeed if not the last reference.
|
2008-06-02 20:25:34 +04:00
|
|
|
*/
|
|
|
|
static inline bool
|
|
|
|
vtryrele(vnode_t *vp)
|
|
|
|
{
|
|
|
|
u_int use, next;
|
|
|
|
|
|
|
|
for (use = vp->v_usecount;; use = next) {
|
2009-05-16 12:29:53 +04:00
|
|
|
if (use == 1) {
|
2008-06-02 20:25:34 +04:00
|
|
|
return false;
|
|
|
|
}
|
2009-05-16 12:29:53 +04:00
|
|
|
KASSERT((use & VC_MASK) > 1);
|
2008-06-02 20:25:34 +04:00
|
|
|
next = atomic_cas_uint(&vp->v_usecount, use, use - 1);
|
|
|
|
if (__predict_true(next == use)) {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
1994-05-17 08:21:49 +04:00
|
|
|
/*
|
2008-01-02 14:48:20 +03:00
|
|
|
* Vnode release. If reference count drops to zero, call inactive
|
|
|
|
* routine and either return to freelist or free to the pool.
|
1994-05-17 08:21:49 +04:00
|
|
|
*/
|
2008-01-02 14:48:20 +03:00
|
|
|
void
|
2008-01-28 01:47:31 +03:00
|
|
|
vrelel(vnode_t *vp, int flags)
|
1994-05-17 08:21:49 +04:00
|
|
|
{
|
2008-01-02 14:48:20 +03:00
|
|
|
bool recycle, defer;
|
|
|
|
int error;
|
1994-05-17 08:21:49 +04:00
|
|
|
|
2008-01-02 14:48:20 +03:00
|
|
|
KASSERT(mutex_owned(&vp->v_interlock));
|
|
|
|
KASSERT((vp->v_iflag & VI_MARKER) == 0);
|
2008-01-17 16:06:04 +03:00
|
|
|
KASSERT(vp->v_freelisthd == NULL);
|
2008-01-02 14:48:20 +03:00
|
|
|
|
2008-12-14 14:13:36 +03:00
|
|
|
if (__predict_false(vp->v_op == dead_vnodeop_p &&
|
|
|
|
(vp->v_iflag & (VI_CLEAN|VI_XLOCK)) == 0)) {
|
2008-01-02 14:48:20 +03:00
|
|
|
vpanic(vp, "dead but not clean");
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If not the last reference, just drop the reference count
|
|
|
|
* and unlock.
|
|
|
|
*/
|
2008-06-02 20:25:34 +04:00
|
|
|
if (vtryrele(vp)) {
|
2008-01-02 14:48:20 +03:00
|
|
|
vp->v_iflag |= VI_INACTREDO;
|
|
|
|
mutex_exit(&vp->v_interlock);
|
1994-05-17 08:21:49 +04:00
|
|
|
return;
|
1998-03-01 05:20:01 +03:00
|
|
|
}
|
2008-01-02 14:48:20 +03:00
|
|
|
if (vp->v_usecount <= 0 || vp->v_writecount != 0) {
|
2008-12-14 14:13:36 +03:00
|
|
|
vpanic(vp, "vrelel: bad ref count");
|
1994-05-17 08:21:49 +04:00
|
|
|
}
|
2008-01-02 14:48:20 +03:00
|
|
|
|
2008-12-14 14:13:36 +03:00
|
|
|
KASSERT((vp->v_iflag & VI_XLOCK) == 0);
|
|
|
|
|
1994-06-08 15:28:29 +04:00
|
|
|
/*
|
2008-01-02 14:48:20 +03:00
|
|
|
* If not clean, deactivate the vnode, but preserve
|
|
|
|
* our reference across the call to VOP_INACTIVE().
|
1994-06-08 15:28:29 +04:00
|
|
|
*/
|
2008-01-02 14:48:20 +03:00
|
|
|
retry:
|
|
|
|
if ((vp->v_iflag & VI_CLEAN) == 0) {
|
|
|
|
recycle = false;
|
2008-06-02 20:25:34 +04:00
|
|
|
vp->v_iflag |= VI_INACTNOW;
|
|
|
|
|
2008-01-02 14:48:20 +03:00
|
|
|
/*
|
|
|
|
* XXX This ugly block can be largely eliminated if
|
|
|
|
* locking is pushed down into the file systems.
|
2010-02-12 02:16:35 +03:00
|
|
|
*
|
|
|
|
* Defer vnode release to vrele_thread if caller
|
|
|
|
* requests it explicitly.
|
2008-01-02 14:48:20 +03:00
|
|
|
*/
|
2010-02-12 02:16:35 +03:00
|
|
|
if ((curlwp == uvm.pagedaemon_lwp) ||
|
|
|
|
(flags & VRELEL_ASYNC_RELE) != 0) {
|
2008-01-02 14:48:20 +03:00
|
|
|
/* The pagedaemon can't wait around; defer. */
|
|
|
|
defer = true;
|
|
|
|
} else if (curlwp == vrele_lwp) {
|
2009-11-28 13:10:17 +03:00
|
|
|
/*
|
|
|
|
* We have to try harder. But we can't sleep
|
|
|
|
* with VI_INACTNOW as vget() may be waiting on it.
|
|
|
|
*/
|
|
|
|
vp->v_iflag &= ~(VI_INACTREDO|VI_INACTNOW);
|
|
|
|
cv_broadcast(&vp->v_cv);
|
2008-01-02 14:48:20 +03:00
|
|
|
error = vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK |
|
|
|
|
LK_RETRY);
|
|
|
|
if (error != 0) {
|
|
|
|
/* XXX */
|
|
|
|
vpanic(vp, "vrele: unable to lock %p");
|
|
|
|
}
|
2009-11-28 13:10:17 +03:00
|
|
|
mutex_enter(&vp->v_interlock);
|
|
|
|
/*
|
|
|
|
* if we did get another reference while
|
|
|
|
* sleeping, don't try to inactivate it yet.
|
|
|
|
*/
|
|
|
|
if (__predict_false(vtryrele(vp))) {
|
|
|
|
VOP_UNLOCK(vp, 0);
|
|
|
|
mutex_exit(&vp->v_interlock);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
vp->v_iflag |= VI_INACTNOW;
|
|
|
|
mutex_exit(&vp->v_interlock);
|
2008-01-02 14:48:20 +03:00
|
|
|
defer = false;
|
|
|
|
} else if ((vp->v_iflag & VI_LAYER) != 0) {
|
|
|
|
/*
|
|
|
|
* Acquiring the stack's lock in vclean() even
|
|
|
|
* for an honest vput/vrele is dangerous because
|
|
|
|
* our caller may hold other vnode locks; defer.
|
|
|
|
*/
|
|
|
|
defer = true;
|
|
|
|
} else {
|
|
|
|
/* If we can't acquire the lock, then defer. */
|
|
|
|
vp->v_iflag &= ~VI_INACTREDO;
|
|
|
|
error = vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK |
|
|
|
|
LK_NOWAIT);
|
|
|
|
if (error != 0) {
|
|
|
|
defer = true;
|
|
|
|
mutex_enter(&vp->v_interlock);
|
|
|
|
} else {
|
|
|
|
defer = false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (defer) {
|
|
|
|
/*
|
|
|
|
* Defer reclaim to the kthread; it's not safe to
|
|
|
|
* clean it here. We donate it our last reference.
|
|
|
|
*/
|
|
|
|
KASSERT(mutex_owned(&vp->v_interlock));
|
|
|
|
KASSERT((vp->v_iflag & VI_INACTPEND) == 0);
|
2008-06-02 20:25:34 +04:00
|
|
|
vp->v_iflag &= ~VI_INACTNOW;
|
2008-01-02 14:48:20 +03:00
|
|
|
vp->v_iflag |= VI_INACTPEND;
|
|
|
|
mutex_enter(&vrele_lock);
|
|
|
|
TAILQ_INSERT_TAIL(&vrele_list, vp, v_freelist);
|
|
|
|
if (++vrele_pending > (desiredvnodes >> 8))
|
|
|
|
cv_signal(&vrele_cv);
|
|
|
|
mutex_exit(&vrele_lock);
|
2009-11-05 11:18:02 +03:00
|
|
|
cv_broadcast(&vp->v_cv);
|
2008-01-02 14:48:20 +03:00
|
|
|
mutex_exit(&vp->v_interlock);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2008-01-24 20:32:52 +03:00
|
|
|
#ifdef DIAGNOSTIC
|
2008-01-25 00:04:12 +03:00
|
|
|
if ((vp->v_type == VBLK || vp->v_type == VCHR) &&
|
|
|
|
vp->v_specnode != NULL && vp->v_specnode->sn_opencnt != 0) {
|
2008-01-24 20:32:52 +03:00
|
|
|
vprint("vrelel: missing VOP_CLOSE()", vp);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2008-01-02 14:48:20 +03:00
|
|
|
/*
|
2008-01-09 19:15:22 +03:00
|
|
|
* The vnode can gain another reference while being
|
|
|
|
* deactivated. If VOP_INACTIVE() indicates that
|
|
|
|
* the described file has been deleted, then recycle
|
|
|
|
* the vnode irrespective of additional references.
|
|
|
|
* Another thread may be waiting to re-use the on-disk
|
|
|
|
* inode.
|
|
|
|
*
|
|
|
|
* Note that VOP_INACTIVE() will drop the vnode lock.
|
2008-01-02 14:48:20 +03:00
|
|
|
*/
|
|
|
|
VOP_INACTIVE(vp, &recycle);
|
|
|
|
mutex_enter(&vp->v_interlock);
|
2008-06-02 20:25:34 +04:00
|
|
|
vp->v_iflag &= ~VI_INACTNOW;
|
2009-11-05 11:18:02 +03:00
|
|
|
cv_broadcast(&vp->v_cv);
|
2008-01-09 19:15:22 +03:00
|
|
|
if (!recycle) {
|
2008-06-02 20:25:34 +04:00
|
|
|
if (vtryrele(vp)) {
|
2008-01-09 19:15:22 +03:00
|
|
|
mutex_exit(&vp->v_interlock);
|
|
|
|
return;
|
|
|
|
}
|
2008-01-02 14:48:20 +03:00
|
|
|
|
2008-01-09 19:15:22 +03:00
|
|
|
/*
|
|
|
|
* If we grew another reference while
|
|
|
|
* VOP_INACTIVE() was underway, retry.
|
|
|
|
*/
|
|
|
|
if ((vp->v_iflag & VI_INACTREDO) != 0) {
|
|
|
|
goto retry;
|
|
|
|
}
|
2008-01-02 14:48:20 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Take care of space accounting. */
|
|
|
|
if (vp->v_iflag & VI_EXECMAP) {
|
|
|
|
atomic_add_int(&uvmexp.execpages,
|
|
|
|
-vp->v_uobj.uo_npages);
|
|
|
|
atomic_add_int(&uvmexp.filepages,
|
|
|
|
vp->v_uobj.uo_npages);
|
|
|
|
}
|
2008-06-02 20:25:34 +04:00
|
|
|
vp->v_iflag &= ~(VI_TEXT|VI_EXECMAP|VI_WRMAP);
|
2008-01-02 14:48:20 +03:00
|
|
|
vp->v_vflag &= ~VV_MAPPED;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Recycle the vnode if the file is now unused (unlinked),
|
|
|
|
* otherwise just free it.
|
|
|
|
*/
|
|
|
|
if (recycle) {
|
|
|
|
vclean(vp, DOCLOSE);
|
|
|
|
}
|
|
|
|
KASSERT(vp->v_usecount > 0);
|
2007-08-09 12:51:21 +04:00
|
|
|
}
|
2008-01-02 14:48:20 +03:00
|
|
|
|
2008-06-02 20:25:34 +04:00
|
|
|
if (atomic_dec_uint_nv(&vp->v_usecount) != 0) {
|
2008-01-02 14:48:20 +03:00
|
|
|
/* Gained another reference while being reclaimed. */
|
|
|
|
mutex_exit(&vp->v_interlock);
|
|
|
|
return;
|
2001-03-09 04:02:10 +03:00
|
|
|
}
|
2007-08-09 12:51:21 +04:00
|
|
|
|
2008-01-02 14:48:20 +03:00
|
|
|
if ((vp->v_iflag & VI_CLEAN) != 0) {
|
|
|
|
/*
|
|
|
|
* It's clean so destroy it. It isn't referenced
|
|
|
|
* anywhere since it has been reclaimed.
|
|
|
|
*/
|
|
|
|
KASSERT(vp->v_holdcnt == 0);
|
|
|
|
KASSERT(vp->v_writecount == 0);
|
|
|
|
mutex_exit(&vp->v_interlock);
|
|
|
|
insmntque(vp, NULL);
|
2008-01-24 20:32:52 +03:00
|
|
|
if (vp->v_type == VBLK || vp->v_type == VCHR) {
|
|
|
|
spec_node_destroy(vp);
|
|
|
|
}
|
2008-01-03 04:26:28 +03:00
|
|
|
vnfree(vp);
|
2007-08-09 12:51:21 +04:00
|
|
|
} else {
|
2008-01-02 14:48:20 +03:00
|
|
|
/*
|
|
|
|
* Otherwise, put it back onto the freelist. It
|
|
|
|
* can't be destroyed while still associated with
|
|
|
|
* a file system.
|
|
|
|
*/
|
|
|
|
mutex_enter(&vnode_free_list_lock);
|
|
|
|
if (vp->v_holdcnt > 0) {
|
|
|
|
vp->v_freelisthd = &vnode_hold_list;
|
|
|
|
} else {
|
|
|
|
vp->v_freelisthd = &vnode_free_list;
|
|
|
|
}
|
|
|
|
TAILQ_INSERT_TAIL(vp->v_freelisthd, vp, v_freelist);
|
|
|
|
mutex_exit(&vnode_free_list_lock);
|
|
|
|
mutex_exit(&vp->v_interlock);
|
2007-08-09 12:51:21 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
2008-01-02 14:48:20 +03:00
|
|
|
vrele(vnode_t *vp)
|
2007-08-09 12:51:21 +04:00
|
|
|
{
|
|
|
|
|
2008-01-02 14:48:20 +03:00
|
|
|
KASSERT((vp->v_iflag & VI_MARKER) == 0);
|
|
|
|
|
2008-06-02 20:25:34 +04:00
|
|
|
if ((vp->v_iflag & VI_INACTNOW) == 0 && vtryrele(vp)) {
|
|
|
|
return;
|
|
|
|
}
|
2008-01-02 14:48:20 +03:00
|
|
|
mutex_enter(&vp->v_interlock);
|
2008-01-28 01:47:31 +03:00
|
|
|
vrelel(vp, 0);
|
2007-08-09 12:51:21 +04:00
|
|
|
}
|
|
|
|
|
2010-02-12 02:16:35 +03:00
|
|
|
/*
|
|
|
|
* Asynchronous vnode release, vnode is released in different context.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
vrele_async(vnode_t *vp)
|
|
|
|
{
|
|
|
|
|
|
|
|
KASSERT((vp->v_iflag & VI_MARKER) == 0);
|
|
|
|
|
|
|
|
if ((vp->v_iflag & VI_INACTNOW) == 0 && vtryrele(vp)) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
mutex_enter(&vp->v_interlock);
|
|
|
|
vrelel(vp, VRELEL_ASYNC_RELE);
|
|
|
|
}
|
|
|
|
|
2008-01-02 14:48:20 +03:00
|
|
|
static void
|
|
|
|
vrele_thread(void *cookie)
|
2007-08-09 12:51:21 +04:00
|
|
|
{
|
2008-01-02 14:48:20 +03:00
|
|
|
vnode_t *vp;
|
2007-08-09 12:51:21 +04:00
|
|
|
|
2008-01-02 14:48:20 +03:00
|
|
|
for (;;) {
|
|
|
|
mutex_enter(&vrele_lock);
|
|
|
|
while (TAILQ_EMPTY(&vrele_list)) {
|
2008-06-23 15:23:39 +04:00
|
|
|
vrele_gen++;
|
|
|
|
cv_broadcast(&vrele_cv);
|
2008-01-02 14:48:20 +03:00
|
|
|
cv_timedwait(&vrele_cv, &vrele_lock, hz);
|
|
|
|
}
|
|
|
|
vp = TAILQ_FIRST(&vrele_list);
|
|
|
|
TAILQ_REMOVE(&vrele_list, vp, v_freelist);
|
|
|
|
vrele_pending--;
|
|
|
|
mutex_exit(&vrele_lock);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If not the last reference, then ignore the vnode
|
|
|
|
* and look for more work.
|
|
|
|
*/
|
|
|
|
mutex_enter(&vp->v_interlock);
|
|
|
|
KASSERT((vp->v_iflag & VI_INACTPEND) != 0);
|
|
|
|
vp->v_iflag &= ~VI_INACTPEND;
|
2008-01-28 01:47:31 +03:00
|
|
|
vrelel(vp, 0);
|
2008-01-02 14:48:20 +03:00
|
|
|
}
|
1994-05-17 08:21:49 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Page or buffer structure gets a reference.
|
2005-12-27 07:06:45 +03:00
|
|
|
* Called with v_interlock held.
|
1994-05-17 08:21:49 +04:00
|
|
|
*/
|
1994-06-08 15:28:29 +04:00
|
|
|
void
|
2008-01-02 14:48:20 +03:00
|
|
|
vholdl(vnode_t *vp)
|
1994-05-17 08:21:49 +04:00
|
|
|
{
|
|
|
|
|
2008-01-02 14:48:20 +03:00
|
|
|
KASSERT(mutex_owned(&vp->v_interlock));
|
|
|
|
KASSERT((vp->v_iflag & VI_MARKER) == 0);
|
|
|
|
|
|
|
|
if (vp->v_holdcnt++ == 0 && vp->v_usecount == 0) {
|
|
|
|
mutex_enter(&vnode_free_list_lock);
|
|
|
|
KASSERT(vp->v_freelisthd == &vnode_free_list);
|
|
|
|
TAILQ_REMOVE(vp->v_freelisthd, vp, v_freelist);
|
|
|
|
vp->v_freelisthd = &vnode_hold_list;
|
|
|
|
TAILQ_INSERT_TAIL(vp->v_freelisthd, vp, v_freelist);
|
|
|
|
mutex_exit(&vnode_free_list_lock);
|
1999-11-15 21:49:07 +03:00
|
|
|
}
|
1994-05-17 08:21:49 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Page or buffer structure frees a reference.
|
2005-12-27 07:06:45 +03:00
|
|
|
* Called with v_interlock held.
|
1994-05-17 08:21:49 +04:00
|
|
|
*/
|
1994-06-08 15:28:29 +04:00
|
|
|
void
|
2008-01-02 14:48:20 +03:00
|
|
|
holdrelel(vnode_t *vp)
|
1994-05-17 08:21:49 +04:00
|
|
|
{
|
|
|
|
|
2008-01-02 14:48:20 +03:00
|
|
|
KASSERT(mutex_owned(&vp->v_interlock));
|
|
|
|
KASSERT((vp->v_iflag & VI_MARKER) == 0);
|
2000-11-27 11:39:39 +03:00
|
|
|
|
2008-01-02 14:48:20 +03:00
|
|
|
if (vp->v_holdcnt <= 0) {
|
|
|
|
vpanic(vp, "holdrelel: holdcnt vp %p");
|
|
|
|
}
|
2000-11-27 11:39:39 +03:00
|
|
|
|
2008-01-02 14:48:20 +03:00
|
|
|
vp->v_holdcnt--;
|
|
|
|
if (vp->v_holdcnt == 0 && vp->v_usecount == 0) {
|
|
|
|
mutex_enter(&vnode_free_list_lock);
|
|
|
|
KASSERT(vp->v_freelisthd == &vnode_hold_list);
|
|
|
|
TAILQ_REMOVE(vp->v_freelisthd, vp, v_freelist);
|
|
|
|
vp->v_freelisthd = &vnode_free_list;
|
|
|
|
TAILQ_INSERT_TAIL(vp->v_freelisthd, vp, v_freelist);
|
|
|
|
mutex_exit(&vnode_free_list_lock);
|
1999-11-15 21:49:07 +03:00
|
|
|
}
|
1994-05-17 08:21:49 +04:00
|
|
|
}
|
1998-03-01 12:51:29 +03:00
|
|
|
|
|
|
|
/*
|
2008-01-02 14:48:20 +03:00
|
|
|
* Vnode reference, where a reference is already held by some other
|
|
|
|
* object (for example, a file structure).
|
1998-03-01 12:51:29 +03:00
|
|
|
*/
|
|
|
|
void
|
2008-01-02 14:48:20 +03:00
|
|
|
vref(vnode_t *vp)
|
1998-03-01 12:51:29 +03:00
|
|
|
{
|
|
|
|
|
2008-01-02 14:48:20 +03:00
|
|
|
KASSERT((vp->v_iflag & VI_MARKER) == 0);
|
2008-06-02 20:25:34 +04:00
|
|
|
KASSERT(vp->v_usecount != 0);
|
2008-01-02 14:48:20 +03:00
|
|
|
|
2008-06-02 20:25:34 +04:00
|
|
|
atomic_inc_uint(&vp->v_usecount);
|
1998-03-01 12:51:29 +03:00
|
|
|
}
|
1994-05-17 08:21:49 +04:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Remove any vnodes in the vnode table belonging to mount point mp.
|
|
|
|
*
|
2002-12-29 09:47:57 +03:00
|
|
|
* If FORCECLOSE is not specified, there should not be any active ones,
|
1994-05-17 08:21:49 +04:00
|
|
|
* return error if any are found (nb: this is a user error, not a
|
2002-12-29 09:47:57 +03:00
|
|
|
* system error). If FORCECLOSE is specified, detach any active vnodes
|
1994-05-17 08:21:49 +04:00
|
|
|
* that are found.
|
2002-12-29 09:47:57 +03:00
|
|
|
*
|
|
|
|
* If WRITECLOSE is set, only flush out regular file vnodes open for
|
|
|
|
* writing.
|
|
|
|
*
|
|
|
|
* SKIPSYSTEM causes any vnodes marked V_SYSTEM to be skipped.
|
1994-05-17 08:21:49 +04:00
|
|
|
*/
|
1994-06-08 15:28:29 +04:00
|
|
|
#ifdef DEBUG
|
|
|
|
int busyprt = 0; /* print out busy vnodes */
|
|
|
|
struct ctldebug debug1 = { "busyprt", &busyprt };
|
|
|
|
#endif
|
1994-05-17 08:21:49 +04:00
|
|
|
|
2008-02-15 16:06:02 +03:00
|
|
|
static vnode_t *
|
|
|
|
vflushnext(vnode_t *mvp, int *when)
|
|
|
|
{
|
|
|
|
|
|
|
|
if (hardclock_ticks > *when) {
|
|
|
|
mutex_exit(&mntvnode_lock);
|
|
|
|
yield();
|
|
|
|
mutex_enter(&mntvnode_lock);
|
|
|
|
*when = hardclock_ticks + hz / 10;
|
|
|
|
}
|
|
|
|
|
|
|
|
return vunmark(mvp);
|
|
|
|
}
|
|
|
|
|
1996-02-04 05:17:43 +03:00
|
|
|
int
|
2008-01-02 14:48:20 +03:00
|
|
|
vflush(struct mount *mp, vnode_t *skipvp, int flags)
|
1994-05-17 08:21:49 +04:00
|
|
|
{
|
2008-01-02 14:48:20 +03:00
|
|
|
vnode_t *vp, *mvp;
|
2008-06-23 15:23:39 +04:00
|
|
|
int busy = 0, when = 0, gen;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* First, flush out any vnode references from vrele_list.
|
|
|
|
*/
|
|
|
|
mutex_enter(&vrele_lock);
|
|
|
|
gen = vrele_gen;
|
2008-07-07 18:15:41 +04:00
|
|
|
while (vrele_pending && gen == vrele_gen) {
|
2008-06-23 15:23:39 +04:00
|
|
|
cv_broadcast(&vrele_cv);
|
|
|
|
cv_wait(&vrele_cv, &vrele_lock);
|
2008-07-07 18:15:41 +04:00
|
|
|
}
|
2008-06-23 15:23:39 +04:00
|
|
|
mutex_exit(&vrele_lock);
|
1994-05-17 08:21:49 +04:00
|
|
|
|
2008-01-02 14:48:20 +03:00
|
|
|
/* Allocate a marker vnode. */
|
2008-01-03 04:26:28 +03:00
|
|
|
if ((mvp = vnalloc(mp)) == NULL)
|
2008-01-02 14:48:20 +03:00
|
|
|
return (ENOMEM);
|
|
|
|
|
2006-10-21 00:29:52 +04:00
|
|
|
/*
|
|
|
|
* NOTE: not using the TAILQ_FOREACH here since in this loop vgone()
|
|
|
|
* and vclean() are called
|
|
|
|
*/
|
2008-06-23 15:23:39 +04:00
|
|
|
mutex_enter(&mntvnode_lock);
|
2008-02-15 16:06:02 +03:00
|
|
|
for (vp = TAILQ_FIRST(&mp->mnt_vnodelist); vp != NULL;
|
|
|
|
vp = vflushnext(mvp, &when)) {
|
2008-01-02 14:48:20 +03:00
|
|
|
vmark(mvp, vp);
|
|
|
|
if (vp->v_mount != mp || vismarker(vp))
|
|
|
|
continue;
|
1994-05-17 08:21:49 +04:00
|
|
|
/*
|
|
|
|
* Skip over a selected vnode.
|
|
|
|
*/
|
|
|
|
if (vp == skipvp)
|
|
|
|
continue;
|
2008-01-02 14:48:20 +03:00
|
|
|
mutex_enter(&vp->v_interlock);
|
2008-01-17 16:06:04 +03:00
|
|
|
/*
|
|
|
|
* Ignore clean but still referenced vnodes.
|
|
|
|
*/
|
|
|
|
if ((vp->v_iflag & VI_CLEAN) != 0) {
|
|
|
|
mutex_exit(&vp->v_interlock);
|
|
|
|
continue;
|
|
|
|
}
|
1994-05-17 08:21:49 +04:00
|
|
|
/*
|
2008-01-02 14:48:20 +03:00
|
|
|
* Skip over a vnodes marked VSYSTEM.
|
1994-05-17 08:21:49 +04:00
|
|
|
*/
|
2007-10-11 00:42:20 +04:00
|
|
|
if ((flags & SKIPSYSTEM) && (vp->v_vflag & VV_SYSTEM)) {
|
2008-01-02 14:48:20 +03:00
|
|
|
mutex_exit(&vp->v_interlock);
|
1994-05-17 08:21:49 +04:00
|
|
|
continue;
|
1998-03-01 05:20:01 +03:00
|
|
|
}
|
1994-06-08 15:28:29 +04:00
|
|
|
/*
|
|
|
|
* If WRITECLOSE is set, only flush out regular file
|
|
|
|
* vnodes open for writing.
|
|
|
|
*/
|
|
|
|
if ((flags & WRITECLOSE) &&
|
1998-08-17 21:29:20 +04:00
|
|
|
(vp->v_writecount == 0 || vp->v_type != VREG)) {
|
2008-01-02 14:48:20 +03:00
|
|
|
mutex_exit(&vp->v_interlock);
|
1994-06-08 15:28:29 +04:00
|
|
|
continue;
|
1998-08-17 21:29:20 +04:00
|
|
|
}
|
1994-05-17 08:21:49 +04:00
|
|
|
/*
|
|
|
|
* With v_usecount == 0, all we need to do is clear
|
|
|
|
* out the vnode data structures and we are done.
|
|
|
|
*/
|
|
|
|
if (vp->v_usecount == 0) {
|
2008-01-02 14:48:20 +03:00
|
|
|
mutex_exit(&mntvnode_lock);
|
2008-06-03 02:56:09 +04:00
|
|
|
vremfree(vp);
|
2008-06-05 16:32:57 +04:00
|
|
|
vp->v_usecount = 1;
|
2008-01-02 14:48:20 +03:00
|
|
|
vclean(vp, DOCLOSE);
|
2008-01-28 01:47:31 +03:00
|
|
|
vrelel(vp, 0);
|
2008-01-02 14:48:20 +03:00
|
|
|
mutex_enter(&mntvnode_lock);
|
1994-05-17 08:21:49 +04:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
/*
|
1994-06-08 15:28:29 +04:00
|
|
|
* If FORCECLOSE is set, forcibly close the vnode.
|
1994-05-17 08:21:49 +04:00
|
|
|
* For block or character devices, revert to an
|
2008-01-24 20:32:52 +03:00
|
|
|
* anonymous device. For all other files, just
|
|
|
|
* kill them.
|
1994-05-17 08:21:49 +04:00
|
|
|
*/
|
|
|
|
if (flags & FORCECLOSE) {
|
2008-01-02 14:48:20 +03:00
|
|
|
mutex_exit(&mntvnode_lock);
|
2008-06-02 20:25:34 +04:00
|
|
|
atomic_inc_uint(&vp->v_usecount);
|
1994-05-17 08:21:49 +04:00
|
|
|
if (vp->v_type != VBLK && vp->v_type != VCHR) {
|
2008-01-02 14:48:20 +03:00
|
|
|
vclean(vp, DOCLOSE);
|
2008-01-28 01:47:31 +03:00
|
|
|
vrelel(vp, 0);
|
1994-05-17 08:21:49 +04:00
|
|
|
} else {
|
2008-01-02 14:48:20 +03:00
|
|
|
vclean(vp, 0);
|
2008-01-24 20:32:52 +03:00
|
|
|
vp->v_op = spec_vnodeop_p; /* XXXSMP */
|
2008-01-24 21:31:52 +03:00
|
|
|
mutex_exit(&vp->v_interlock);
|
|
|
|
/*
|
|
|
|
* The vnode isn't clean, but still resides
|
|
|
|
* on the mount list. Remove it. XXX This
|
|
|
|
* is a bit dodgy.
|
|
|
|
*/
|
|
|
|
insmntque(vp, NULL);
|
|
|
|
vrele(vp);
|
1994-05-17 08:21:49 +04:00
|
|
|
}
|
2008-01-02 14:48:20 +03:00
|
|
|
mutex_enter(&mntvnode_lock);
|
1994-05-17 08:21:49 +04:00
|
|
|
continue;
|
|
|
|
}
|
1994-06-08 15:28:29 +04:00
|
|
|
#ifdef DEBUG
|
1994-05-17 08:21:49 +04:00
|
|
|
if (busyprt)
|
|
|
|
vprint("vflush: busy vnode", vp);
|
1994-06-08 15:28:29 +04:00
|
|
|
#endif
|
2008-01-02 14:48:20 +03:00
|
|
|
mutex_exit(&vp->v_interlock);
|
1994-05-17 08:21:49 +04:00
|
|
|
busy++;
|
|
|
|
}
|
2008-01-02 14:48:20 +03:00
|
|
|
mutex_exit(&mntvnode_lock);
|
2008-01-03 04:26:28 +03:00
|
|
|
vnfree(mvp);
|
1994-05-17 08:21:49 +04:00
|
|
|
if (busy)
|
|
|
|
return (EBUSY);
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Disassociate the underlying file system from a vnode.
|
2008-01-02 14:48:20 +03:00
|
|
|
*
|
|
|
|
* Must be called with the interlock held, and will return with it held.
|
1994-05-17 08:21:49 +04:00
|
|
|
*/
|
2008-01-02 14:48:20 +03:00
|
|
|
void
|
|
|
|
vclean(vnode_t *vp, int flags)
|
1994-05-17 08:21:49 +04:00
|
|
|
{
|
2008-01-02 14:48:20 +03:00
|
|
|
lwp_t *l = curlwp;
|
|
|
|
bool recycle, active;
|
2008-01-24 20:32:52 +03:00
|
|
|
int error;
|
1994-05-17 08:21:49 +04:00
|
|
|
|
2008-01-02 14:48:20 +03:00
|
|
|
KASSERT(mutex_owned(&vp->v_interlock));
|
|
|
|
KASSERT((vp->v_iflag & VI_MARKER) == 0);
|
|
|
|
KASSERT(vp->v_usecount != 0);
|
2001-12-06 07:34:33 +03:00
|
|
|
|
2008-01-02 14:48:20 +03:00
|
|
|
/* If cleaning is already in progress wait until done and return. */
|
|
|
|
if (vp->v_iflag & VI_XLOCK) {
|
|
|
|
vwait(vp, VI_XLOCK);
|
|
|
|
return;
|
|
|
|
}
|
2001-12-06 07:34:33 +03:00
|
|
|
|
2008-01-02 14:48:20 +03:00
|
|
|
/* If already clean, nothing to do. */
|
|
|
|
if ((vp->v_iflag & VI_CLEAN) != 0) {
|
|
|
|
return;
|
1999-10-02 02:03:17 +04:00
|
|
|
}
|
1998-05-18 18:59:49 +04:00
|
|
|
|
1994-05-17 08:21:49 +04:00
|
|
|
/*
|
2008-01-02 14:48:20 +03:00
|
|
|
* Prevent the vnode from being recycled or brought into use
|
|
|
|
* while we clean it out.
|
1994-05-17 08:21:49 +04:00
|
|
|
*/
|
2007-10-11 00:42:20 +04:00
|
|
|
vp->v_iflag |= VI_XLOCK;
|
|
|
|
if (vp->v_iflag & VI_EXECMAP) {
|
2007-11-29 21:07:11 +03:00
|
|
|
atomic_add_int(&uvmexp.execpages, -vp->v_uobj.uo_npages);
|
|
|
|
atomic_add_int(&uvmexp.filepages, vp->v_uobj.uo_npages);
|
2001-03-09 04:02:10 +03:00
|
|
|
}
|
2007-10-11 00:42:20 +04:00
|
|
|
vp->v_iflag &= ~(VI_TEXT|VI_EXECMAP);
|
2008-01-02 14:48:20 +03:00
|
|
|
active = (vp->v_usecount > 1);
|
2000-11-27 11:39:39 +03:00
|
|
|
|
2008-01-02 14:48:20 +03:00
|
|
|
/* XXXAD should not lock vnode under layer */
|
|
|
|
VOP_LOCK(vp, LK_EXCLUSIVE | LK_INTERLOCK);
|
1998-03-01 05:20:01 +03:00
|
|
|
|
1999-02-09 04:57:05 +03:00
|
|
|
/*
|
2000-11-27 11:39:39 +03:00
|
|
|
* Clean out any cached data associated with the vnode.
|
2008-01-24 20:32:52 +03:00
|
|
|
* If purging an active vnode, it must be closed and
|
|
|
|
* deactivated before being reclaimed. Note that the
|
|
|
|
* VOP_INACTIVE will unlock the vnode.
|
1994-05-17 08:21:49 +04:00
|
|
|
*/
|
2001-12-06 07:34:33 +03:00
|
|
|
if (flags & DOCLOSE) {
|
2005-12-11 15:16:03 +03:00
|
|
|
error = vinvalbuf(vp, V_SAVE, NOCRED, l, 0, 0);
|
2008-07-31 09:38:04 +04:00
|
|
|
if (error != 0) {
|
|
|
|
/* XXX, fix vn_start_write's grab of mp and use that. */
|
|
|
|
|
|
|
|
if (wapbl_vphaswapbl(vp))
|
|
|
|
WAPBL_DISCARD(wapbl_vptomp(vp));
|
2005-12-11 15:16:03 +03:00
|
|
|
error = vinvalbuf(vp, 0, NOCRED, l, 0, 0);
|
2008-07-31 09:38:04 +04:00
|
|
|
}
|
2003-12-01 21:53:10 +03:00
|
|
|
KASSERT(error == 0);
|
2007-10-11 00:42:20 +04:00
|
|
|
KASSERT((vp->v_iflag & VI_ONWORKLST) == 0);
|
2008-01-24 20:32:52 +03:00
|
|
|
if (active && (vp->v_type == VBLK || vp->v_type == VCHR)) {
|
|
|
|
spec_node_revoke(vp);
|
There is an annoying deadlock that goes like this:
* Process A is closing one file descriptor belonging to a device. In doing so,
ffs_update() is called and starts writing a block synchronously. (Note: This
leaves the vnode locked. It also has other instances -- stdin, et al -- of
the same device open, so v_usecount is definitely non-zero.)
* Process B does a revoke() on the device. The revoke() has to wait for the
vnode to be unlocked because ffs_update() is still in progress.
* Process C tries to open() the device. It wedges in checkalias() repeatedly
calling vget() because it returns EBUSY immediately.
To fix, this:
* checkalias() now uses LK_SLEEPFAIL rather than LK_NOWAIT. Therefore it will
wait for the vnode to become unlocked, but it will recheck that it is on the
hash list, in case it was in the process of being revoke()d or was revoke()d
again before we were woken up.
* Since we're relying on the vnode lock to tell us that the vnode hasn't been
removed from the hash list *anyway*, I have moved the code to remove it into
the DOCLOSE section of vclean(), inside the vnode lock.
In the example at hand, process A was sh(1), process B was a child of init(8),
and process C was syslogd(8).
2004-08-14 02:48:06 +04:00
|
|
|
}
|
2001-12-06 07:34:33 +03:00
|
|
|
}
|
1994-05-17 08:21:49 +04:00
|
|
|
if (active) {
|
2008-01-02 14:48:20 +03:00
|
|
|
VOP_INACTIVE(vp, &recycle);
|
1998-03-01 05:20:01 +03:00
|
|
|
} else {
|
|
|
|
/*
|
|
|
|
* Any other processes trying to obtain this lock must first
|
2007-10-11 00:42:20 +04:00
|
|
|
* wait for VI_XLOCK to clear, then call the new lock operation.
|
1998-03-01 05:20:01 +03:00
|
|
|
*/
|
|
|
|
VOP_UNLOCK(vp, 0);
|
1994-05-17 08:21:49 +04:00
|
|
|
}
|
2000-11-27 11:39:39 +03:00
|
|
|
|
2008-01-02 14:48:20 +03:00
|
|
|
/* Disassociate the underlying file system from the vnode. */
|
|
|
|
if (VOP_RECLAIM(vp)) {
|
|
|
|
vpanic(vp, "vclean: cannot reclaim");
|
1998-05-18 18:59:49 +04:00
|
|
|
}
|
1994-06-08 15:28:29 +04:00
|
|
|
|
2002-02-05 10:50:58 +03:00
|
|
|
KASSERT(vp->v_uobj.uo_npages == 0);
|
2005-11-30 01:52:02 +03:00
|
|
|
if (vp->v_type == VREG && vp->v_ractx != NULL) {
|
|
|
|
uvm_ra_freectx(vp->v_ractx);
|
|
|
|
vp->v_ractx = NULL;
|
|
|
|
}
|
1998-03-01 05:20:01 +03:00
|
|
|
cache_purge(vp);
|
|
|
|
|
2008-01-02 14:48:20 +03:00
|
|
|
/* Done with purge, notify sleepers of the grim news. */
|
2008-12-14 14:15:59 +03:00
|
|
|
mutex_enter(&vp->v_interlock);
|
1994-06-08 15:28:29 +04:00
|
|
|
vp->v_op = dead_vnodeop_p;
|
|
|
|
vp->v_tag = VT_NON;
|
2008-01-02 14:48:20 +03:00
|
|
|
vp->v_vnlock = &vp->v_lock;
|
2008-02-05 17:19:52 +03:00
|
|
|
KNOTE(&vp->v_klist, NOTE_REVOKE);
|
2008-01-09 19:15:22 +03:00
|
|
|
vp->v_iflag &= ~(VI_XLOCK | VI_FREEING);
|
2007-10-23 23:40:53 +04:00
|
|
|
vp->v_vflag &= ~VV_LOCKSWORK;
|
2008-01-24 20:57:14 +03:00
|
|
|
if ((flags & DOCLOSE) != 0) {
|
2008-01-24 20:32:52 +03:00
|
|
|
vp->v_iflag |= VI_CLEAN;
|
|
|
|
}
|
2008-01-02 14:48:20 +03:00
|
|
|
cv_broadcast(&vp->v_cv);
|
|
|
|
|
|
|
|
KASSERT((vp->v_iflag & VI_ONWORKLST) == 0);
|
1994-05-17 08:21:49 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
1998-03-01 05:20:01 +03:00
|
|
|
* Recycle an unused vnode to the front of the free list.
|
|
|
|
* Release the passed interlock if the vnode will be recycled.
|
1994-05-17 08:21:49 +04:00
|
|
|
*/
|
1998-03-01 05:20:01 +03:00
|
|
|
int
|
2008-01-02 14:48:20 +03:00
|
|
|
vrecycle(vnode_t *vp, kmutex_t *inter_lkp, struct lwp *l)
|
2004-03-23 16:22:32 +03:00
|
|
|
{
|
|
|
|
|
2008-01-02 14:48:20 +03:00
|
|
|
KASSERT((vp->v_iflag & VI_MARKER) == 0);
|
|
|
|
|
|
|
|
mutex_enter(&vp->v_interlock);
|
|
|
|
if (vp->v_usecount != 0) {
|
|
|
|
mutex_exit(&vp->v_interlock);
|
|
|
|
return (0);
|
1994-05-17 08:21:49 +04:00
|
|
|
}
|
2008-01-02 14:48:20 +03:00
|
|
|
if (inter_lkp)
|
|
|
|
mutex_exit(inter_lkp);
|
2008-06-03 02:56:09 +04:00
|
|
|
vremfree(vp);
|
2008-06-05 16:32:57 +04:00
|
|
|
vp->v_usecount = 1;
|
2008-01-02 14:48:20 +03:00
|
|
|
vclean(vp, DOCLOSE);
|
2008-01-28 01:47:31 +03:00
|
|
|
vrelel(vp, 0);
|
2008-01-02 14:48:20 +03:00
|
|
|
return (1);
|
1994-05-17 08:21:49 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2008-01-02 14:48:20 +03:00
|
|
|
* Eliminate all activity associated with a vnode in preparation for
|
|
|
|
* reuse. Drops a reference from the vnode.
|
1994-05-17 08:21:49 +04:00
|
|
|
*/
|
|
|
|
void
|
2008-01-02 14:48:20 +03:00
|
|
|
vgone(vnode_t *vp)
|
1998-03-01 05:20:01 +03:00
|
|
|
{
|
|
|
|
|
2008-01-02 14:48:20 +03:00
|
|
|
mutex_enter(&vp->v_interlock);
|
|
|
|
vclean(vp, DOCLOSE);
|
2008-01-28 01:47:31 +03:00
|
|
|
vrelel(vp, 0);
|
1994-05-17 08:21:49 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Lookup a vnode by device number.
|
|
|
|
*/
|
1996-02-04 05:17:43 +03:00
|
|
|
int
|
2008-01-02 14:48:20 +03:00
|
|
|
vfinddev(dev_t dev, enum vtype type, vnode_t **vpp)
|
1994-05-17 08:21:49 +04:00
|
|
|
{
|
2008-01-02 14:48:20 +03:00
|
|
|
vnode_t *vp;
|
1998-03-01 05:20:01 +03:00
|
|
|
int rc = 0;
|
1994-05-17 08:21:49 +04:00
|
|
|
|
2008-12-29 20:41:18 +03:00
|
|
|
mutex_enter(&device_lock);
|
2008-01-24 20:32:52 +03:00
|
|
|
for (vp = specfs_hash[SPECHASH(dev)]; vp; vp = vp->v_specnext) {
|
1994-05-17 08:21:49 +04:00
|
|
|
if (dev != vp->v_rdev || type != vp->v_type)
|
|
|
|
continue;
|
|
|
|
*vpp = vp;
|
1998-03-01 05:20:01 +03:00
|
|
|
rc = 1;
|
|
|
|
break;
|
1994-05-17 08:21:49 +04:00
|
|
|
}
|
2008-12-29 20:41:18 +03:00
|
|
|
mutex_exit(&device_lock);
|
1998-03-01 05:20:01 +03:00
|
|
|
return (rc);
|
1994-05-17 08:21:49 +04:00
|
|
|
}
|
|
|
|
|
1998-11-18 23:24:59 +03:00
|
|
|
/*
|
|
|
|
* Revoke all the vnodes corresponding to the specified minor number
|
|
|
|
* range (endpoints inclusive) of the specified major.
|
|
|
|
*/
|
|
|
|
void
|
2005-06-06 03:47:48 +04:00
|
|
|
vdevgone(int maj, int minl, int minh, enum vtype type)
|
1998-11-18 23:24:59 +03:00
|
|
|
{
|
2008-01-17 20:28:54 +03:00
|
|
|
vnode_t *vp, **vpp;
|
|
|
|
dev_t dev;
|
1998-11-18 23:24:59 +03:00
|
|
|
int mn;
|
|
|
|
|
2006-10-22 04:48:14 +04:00
|
|
|
vp = NULL; /* XXX gcc */
|
|
|
|
|
2008-12-29 20:41:18 +03:00
|
|
|
mutex_enter(&device_lock);
|
2008-01-17 20:28:54 +03:00
|
|
|
for (mn = minl; mn <= minh; mn++) {
|
|
|
|
dev = makedev(maj, mn);
|
2008-01-24 20:32:52 +03:00
|
|
|
vpp = &specfs_hash[SPECHASH(dev)];
|
2008-01-17 20:28:54 +03:00
|
|
|
for (vp = *vpp; vp != NULL;) {
|
|
|
|
mutex_enter(&vp->v_interlock);
|
|
|
|
if ((vp->v_iflag & VI_CLEAN) != 0 ||
|
|
|
|
dev != vp->v_rdev || type != vp->v_type) {
|
|
|
|
mutex_exit(&vp->v_interlock);
|
|
|
|
vp = vp->v_specnext;
|
|
|
|
continue;
|
|
|
|
}
|
2008-12-29 20:41:18 +03:00
|
|
|
mutex_exit(&device_lock);
|
2008-01-17 20:28:54 +03:00
|
|
|
if (vget(vp, LK_INTERLOCK) == 0) {
|
|
|
|
VOP_REVOKE(vp, REVOKEALL);
|
|
|
|
vrele(vp);
|
|
|
|
}
|
2008-12-29 20:41:18 +03:00
|
|
|
mutex_enter(&device_lock);
|
2008-01-17 20:28:54 +03:00
|
|
|
vp = *vpp;
|
|
|
|
}
|
|
|
|
}
|
2008-12-29 20:41:18 +03:00
|
|
|
mutex_exit(&device_lock);
|
1998-11-18 23:24:59 +03:00
|
|
|
}
|
|
|
|
|
2008-01-17 20:28:54 +03:00
|
|
|
/*
|
|
|
|
* Eliminate all activity associated with the requested vnode
|
|
|
|
* and with all vnodes aliased to the requested vnode.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
vrevoke(vnode_t *vp)
|
|
|
|
{
|
|
|
|
vnode_t *vq, **vpp;
|
|
|
|
enum vtype type;
|
|
|
|
dev_t dev;
|
|
|
|
|
|
|
|
KASSERT(vp->v_usecount > 0);
|
|
|
|
|
|
|
|
mutex_enter(&vp->v_interlock);
|
|
|
|
if ((vp->v_iflag & VI_CLEAN) != 0) {
|
|
|
|
mutex_exit(&vp->v_interlock);
|
|
|
|
return;
|
2009-02-05 16:37:24 +03:00
|
|
|
} else if (vp->v_type != VBLK && vp->v_type != VCHR) {
|
|
|
|
atomic_inc_uint(&vp->v_usecount);
|
|
|
|
vclean(vp, DOCLOSE);
|
|
|
|
vrelel(vp, 0);
|
|
|
|
return;
|
2008-01-17 20:28:54 +03:00
|
|
|
} else {
|
|
|
|
dev = vp->v_rdev;
|
|
|
|
type = vp->v_type;
|
|
|
|
mutex_exit(&vp->v_interlock);
|
|
|
|
}
|
|
|
|
|
2008-01-24 20:32:52 +03:00
|
|
|
vpp = &specfs_hash[SPECHASH(dev)];
|
2008-12-29 20:41:18 +03:00
|
|
|
mutex_enter(&device_lock);
|
2008-01-17 20:28:54 +03:00
|
|
|
for (vq = *vpp; vq != NULL;) {
|
2008-02-05 18:13:25 +03:00
|
|
|
/* If clean or being cleaned, then ignore it. */
|
|
|
|
mutex_enter(&vq->v_interlock);
|
|
|
|
if ((vq->v_iflag & (VI_CLEAN | VI_XLOCK)) != 0 ||
|
2008-01-17 22:23:13 +03:00
|
|
|
vq->v_rdev != dev || vq->v_type != type) {
|
2008-02-05 18:13:25 +03:00
|
|
|
mutex_exit(&vq->v_interlock);
|
2008-01-17 20:28:54 +03:00
|
|
|
vq = vq->v_specnext;
|
|
|
|
continue;
|
|
|
|
}
|
2008-12-29 20:41:18 +03:00
|
|
|
mutex_exit(&device_lock);
|
2008-06-05 16:32:57 +04:00
|
|
|
if (vq->v_usecount == 0) {
|
2008-01-17 22:23:13 +03:00
|
|
|
vremfree(vq);
|
2008-06-05 16:32:57 +04:00
|
|
|
vq->v_usecount = 1;
|
|
|
|
} else {
|
|
|
|
atomic_inc_uint(&vq->v_usecount);
|
2008-01-17 20:28:54 +03:00
|
|
|
}
|
|
|
|
vclean(vq, DOCLOSE);
|
2008-01-28 01:47:31 +03:00
|
|
|
vrelel(vq, 0);
|
2008-12-29 20:41:18 +03:00
|
|
|
mutex_enter(&device_lock);
|
2008-01-17 20:28:54 +03:00
|
|
|
vq = *vpp;
|
|
|
|
}
|
2008-12-29 20:41:18 +03:00
|
|
|
mutex_exit(&device_lock);
|
2008-01-17 20:28:54 +03:00
|
|
|
}
|
|
|
|
|
2004-04-19 04:15:55 +04:00
|
|
|
/*
|
|
|
|
* sysctl helper routine to return list of supported fstypes
|
|
|
|
*/
|
2008-12-07 23:58:46 +03:00
|
|
|
int
|
2004-04-19 04:15:55 +04:00
|
|
|
sysctl_vfs_generic_fstypes(SYSCTLFN_ARGS)
|
|
|
|
{
|
2007-07-18 01:14:05 +04:00
|
|
|
char bf[sizeof(((struct statvfs *)NULL)->f_fstypename)];
|
2004-04-19 04:15:55 +04:00
|
|
|
char *where = oldp;
|
|
|
|
struct vfsops *v;
|
|
|
|
size_t needed, left, slen;
|
|
|
|
int error, first;
|
|
|
|
|
|
|
|
if (newp != NULL)
|
|
|
|
return (EPERM);
|
|
|
|
if (namelen != 0)
|
|
|
|
return (EINVAL);
|
|
|
|
|
|
|
|
first = 1;
|
|
|
|
error = 0;
|
|
|
|
needed = 0;
|
|
|
|
left = *oldlenp;
|
|
|
|
|
2008-01-07 19:12:52 +03:00
|
|
|
sysctl_unlock();
|
2007-10-11 00:42:20 +04:00
|
|
|
mutex_enter(&vfs_list_lock);
|
2004-04-19 04:15:55 +04:00
|
|
|
LIST_FOREACH(v, &vfs_list, vfs_list) {
|
|
|
|
if (where == NULL)
|
|
|
|
needed += strlen(v->vfs_name) + 1;
|
|
|
|
else {
|
2005-05-30 02:24:14 +04:00
|
|
|
memset(bf, 0, sizeof(bf));
|
2004-04-19 04:15:55 +04:00
|
|
|
if (first) {
|
2005-05-30 02:24:14 +04:00
|
|
|
strncpy(bf, v->vfs_name, sizeof(bf));
|
2004-04-19 04:15:55 +04:00
|
|
|
first = 0;
|
|
|
|
} else {
|
2005-05-30 02:24:14 +04:00
|
|
|
bf[0] = ' ';
|
|
|
|
strncpy(bf + 1, v->vfs_name, sizeof(bf) - 1);
|
2004-04-19 04:15:55 +04:00
|
|
|
}
|
2005-05-30 02:24:14 +04:00
|
|
|
bf[sizeof(bf)-1] = '\0';
|
|
|
|
slen = strlen(bf);
|
2004-04-19 04:15:55 +04:00
|
|
|
if (left < slen + 1)
|
|
|
|
break;
|
2007-10-11 00:42:20 +04:00
|
|
|
v->vfs_refcount++;
|
|
|
|
mutex_exit(&vfs_list_lock);
|
2008-07-27 19:08:37 +04:00
|
|
|
/* +1 to copy out the trailing NUL byte */
|
2005-05-30 02:24:14 +04:00
|
|
|
error = copyout(bf, where, slen + 1);
|
2007-10-11 00:42:20 +04:00
|
|
|
mutex_enter(&vfs_list_lock);
|
|
|
|
v->vfs_refcount--;
|
2004-04-19 04:15:55 +04:00
|
|
|
if (error)
|
|
|
|
break;
|
|
|
|
where += slen;
|
|
|
|
needed += slen;
|
|
|
|
left -= slen;
|
|
|
|
}
|
|
|
|
}
|
2007-10-11 00:42:20 +04:00
|
|
|
mutex_exit(&vfs_list_lock);
|
2008-01-07 19:12:52 +03:00
|
|
|
sysctl_relock();
|
2004-04-19 04:15:55 +04:00
|
|
|
*oldlenp = needed;
|
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
Dynamic sysctl.
Gone are the old kern_sysctl(), cpu_sysctl(), hw_sysctl(),
vfs_sysctl(), etc, routines, along with sysctl_int() et al. Now all
nodes are registered with the tree, and nodes can be added (or
removed) easily, and I/O to and from the tree is handled generically.
Since the nodes are registered with the tree, the mapping from name to
number (and back again) can now be discovered, instead of having to be
hard coded. Adding new nodes to the tree is likewise much simpler --
the new infrastructure handles almost all the work for simple types,
and just about anything else can be done with a small helper function.
All existing nodes are where they were before (numerically speaking),
so all existing consumers of sysctl information should notice no
difference.
PS - I'm sorry, but there's a distinct lack of documentation at the
moment. I'm working on sysctl(3/8/9) right now, and I promise to
watch out for buses.
2003-12-04 22:38:21 +03:00
|
|
|
|
1994-05-17 08:21:49 +04:00
|
|
|
int kinfo_vdebug = 1;
|
|
|
|
int kinfo_vgetfailed;
|
|
|
|
#define KINFO_VNODESLOP 10
|
|
|
|
/*
|
|
|
|
* Dump vnode list (via sysctl).
|
|
|
|
* Copyout address of vnode followed by vnode.
|
|
|
|
*/
|
|
|
|
/* ARGSUSED */
|
1996-02-04 05:17:43 +03:00
|
|
|
int
|
Dynamic sysctl.
Gone are the old kern_sysctl(), cpu_sysctl(), hw_sysctl(),
vfs_sysctl(), etc, routines, along with sysctl_int() et al. Now all
nodes are registered with the tree, and nodes can be added (or
removed) easily, and I/O to and from the tree is handled generically.
Since the nodes are registered with the tree, the mapping from name to
number (and back again) can now be discovered, instead of having to be
hard coded. Adding new nodes to the tree is likewise much simpler --
the new infrastructure handles almost all the work for simple types,
and just about anything else can be done with a small helper function.
All existing nodes are where they were before (numerically speaking),
so all existing consumers of sysctl information should notice no
difference.
PS - I'm sorry, but there's a distinct lack of documentation at the
moment. I'm working on sysctl(3/8/9) right now, and I promise to
watch out for buses.
2003-12-04 22:38:21 +03:00
|
|
|
sysctl_kern_vnode(SYSCTLFN_ARGS)
|
1994-05-17 08:21:49 +04:00
|
|
|
{
|
Dynamic sysctl.
Gone are the old kern_sysctl(), cpu_sysctl(), hw_sysctl(),
vfs_sysctl(), etc, routines, along with sysctl_int() et al. Now all
nodes are registered with the tree, and nodes can be added (or
removed) easily, and I/O to and from the tree is handled generically.
Since the nodes are registered with the tree, the mapping from name to
number (and back again) can now be discovered, instead of having to be
hard coded. Adding new nodes to the tree is likewise much simpler --
the new infrastructure handles almost all the work for simple types,
and just about anything else can be done with a small helper function.
All existing nodes are where they were before (numerically speaking),
so all existing consumers of sysctl information should notice no
difference.
PS - I'm sorry, but there's a distinct lack of documentation at the
moment. I'm working on sysctl(3/8/9) right now, and I promise to
watch out for buses.
2003-12-04 22:38:21 +03:00
|
|
|
char *where = oldp;
|
|
|
|
size_t *sizep = oldlenp;
|
1998-03-01 05:20:01 +03:00
|
|
|
struct mount *mp, *nmp;
|
2008-01-07 19:12:52 +03:00
|
|
|
vnode_t *vp, *mvp, vbuf;
|
2009-11-19 06:01:05 +03:00
|
|
|
char *bp = where;
|
1994-05-17 08:21:49 +04:00
|
|
|
char *ewhere;
|
|
|
|
int error;
|
|
|
|
|
Dynamic sysctl.
Gone are the old kern_sysctl(), cpu_sysctl(), hw_sysctl(),
vfs_sysctl(), etc, routines, along with sysctl_int() et al. Now all
nodes are registered with the tree, and nodes can be added (or
removed) easily, and I/O to and from the tree is handled generically.
Since the nodes are registered with the tree, the mapping from name to
number (and back again) can now be discovered, instead of having to be
hard coded. Adding new nodes to the tree is likewise much simpler --
the new infrastructure handles almost all the work for simple types,
and just about anything else can be done with a small helper function.
All existing nodes are where they were before (numerically speaking),
so all existing consumers of sysctl information should notice no
difference.
PS - I'm sorry, but there's a distinct lack of documentation at the
moment. I'm working on sysctl(3/8/9) right now, and I promise to
watch out for buses.
2003-12-04 22:38:21 +03:00
|
|
|
if (namelen != 0)
|
|
|
|
return (EOPNOTSUPP);
|
|
|
|
if (newp != NULL)
|
|
|
|
return (EPERM);
|
|
|
|
|
2008-01-02 14:48:20 +03:00
|
|
|
#define VPTRSZ sizeof(vnode_t *)
|
|
|
|
#define VNODESZ sizeof(vnode_t)
|
1994-05-17 08:21:49 +04:00
|
|
|
if (where == NULL) {
|
|
|
|
*sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ);
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
ewhere = where + *sizep;
|
1998-03-01 05:20:01 +03:00
|
|
|
|
2008-01-07 19:12:52 +03:00
|
|
|
sysctl_unlock();
|
2007-10-11 00:42:20 +04:00
|
|
|
mutex_enter(&mountlist_lock);
|
2002-09-04 05:32:31 +04:00
|
|
|
for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist;
|
2009-11-19 06:01:05 +03:00
|
|
|
mp = nmp) {
|
PR kern/38141 lookup/vfs_busy acquire rwlock recursively
Simplify the mount locking. Remove all the crud to deal with recursion on
the mount lock, and crud to deal with unmount as another weirdo lock.
Hopefully this will once and for all fix the deadlocks with this. With this
commit there are two locks on each mount:
- krwlock_t mnt_unmounting. This is used to prevent unmount across critical
sections like getnewvnode(). It's only ever read locked with rw_tryenter(),
and is only ever write locked in dounmount(). A write hold can't be taken
on this lock if the current LWP could hold a vnode lock.
- kmutex_t mnt_updating. This is taken by threads updating the mount, for
example when going r/o -> r/w, and is only present to serialize updates.
In order to take this lock, a read hold must first be taken on
mnt_unmounting, and the two need to be held across the operation.
One effect of this change: previously if an unmount failed, we would make a
half hearted attempt to back out of it gracefully, but that was unlikely to
work in a lot of cases. Now while an unmount that will be aborted is in
progress, new file operations within the mount will fail instead of being
delayed. That is unlikely to be a problem though, because if the admin
requests unmount of a file system then s(he) has made a decision to deny
access to the resource.
2008-05-06 22:43:44 +04:00
|
|
|
if (vfs_busy(mp, &nmp)) {
|
1994-05-17 08:21:49 +04:00
|
|
|
continue;
|
1998-03-01 05:20:01 +03:00
|
|
|
}
|
2008-01-02 14:48:20 +03:00
|
|
|
/* Allocate a marker vnode. */
|
2009-04-25 22:53:43 +04:00
|
|
|
mvp = vnalloc(mp);
|
|
|
|
/* Should never fail for mp != NULL */
|
|
|
|
KASSERT(mvp != NULL);
|
2008-01-02 14:48:20 +03:00
|
|
|
mutex_enter(&mntvnode_lock);
|
2009-11-19 06:01:05 +03:00
|
|
|
for (vp = TAILQ_FIRST(&mp->mnt_vnodelist); vp;
|
|
|
|
vp = vunmark(mvp)) {
|
2008-01-02 14:48:20 +03:00
|
|
|
vmark(mvp, vp);
|
1994-05-17 08:21:49 +04:00
|
|
|
/*
|
|
|
|
* Check that the vp is still associated with
|
|
|
|
* this filesystem. RACE: could have been
|
|
|
|
* recycled onto the same filesystem.
|
|
|
|
*/
|
2008-01-02 14:48:20 +03:00
|
|
|
if (vp->v_mount != mp || vismarker(vp))
|
|
|
|
continue;
|
1994-05-17 08:21:49 +04:00
|
|
|
if (bp + VPTRSZ + VNODESZ > ewhere) {
|
2008-01-02 14:48:20 +03:00
|
|
|
(void)vunmark(mvp);
|
|
|
|
mutex_exit(&mntvnode_lock);
|
2008-01-03 04:26:28 +03:00
|
|
|
vnfree(mvp);
|
2009-11-19 05:59:33 +03:00
|
|
|
vfs_unbusy(mp, false, NULL);
|
2008-01-07 19:12:52 +03:00
|
|
|
sysctl_relock();
|
1994-05-17 08:21:49 +04:00
|
|
|
*sizep = bp - where;
|
|
|
|
return (ENOMEM);
|
|
|
|
}
|
2008-01-07 19:12:52 +03:00
|
|
|
memcpy(&vbuf, vp, VNODESZ);
|
2008-01-02 14:48:20 +03:00
|
|
|
mutex_exit(&mntvnode_lock);
|
2009-01-21 03:54:05 +03:00
|
|
|
if ((error = copyout(&vp, bp, VPTRSZ)) ||
|
2009-11-19 06:01:05 +03:00
|
|
|
(error = copyout(&vbuf, bp + VPTRSZ, VNODESZ))) {
|
2008-01-02 14:48:20 +03:00
|
|
|
mutex_enter(&mntvnode_lock);
|
|
|
|
(void)vunmark(mvp);
|
|
|
|
mutex_exit(&mntvnode_lock);
|
2008-01-03 04:26:28 +03:00
|
|
|
vnfree(mvp);
|
2009-11-19 05:59:33 +03:00
|
|
|
vfs_unbusy(mp, false, NULL);
|
2008-01-07 19:12:52 +03:00
|
|
|
sysctl_relock();
|
1994-05-17 08:21:49 +04:00
|
|
|
return (error);
|
2008-01-02 14:48:20 +03:00
|
|
|
}
|
1994-05-17 08:21:49 +04:00
|
|
|
bp += VPTRSZ + VNODESZ;
|
2008-01-02 14:48:20 +03:00
|
|
|
mutex_enter(&mntvnode_lock);
|
1994-05-17 08:21:49 +04:00
|
|
|
}
|
2008-01-02 14:48:20 +03:00
|
|
|
mutex_exit(&mntvnode_lock);
|
2008-01-03 04:26:28 +03:00
|
|
|
vnfree(mvp);
|
PR kern/38141 lookup/vfs_busy acquire rwlock recursively
Simplify the mount locking. Remove all the crud to deal with recursion on
the mount lock, and crud to deal with unmount as another weirdo lock.
Hopefully this will once and for all fix the deadlocks with this. With this
commit there are two locks on each mount:
- krwlock_t mnt_unmounting. This is used to prevent unmount across critical
sections like getnewvnode(). It's only ever read locked with rw_tryenter(),
and is only ever write locked in dounmount(). A write hold can't be taken
on this lock if the current LWP could hold a vnode lock.
- kmutex_t mnt_updating. This is taken by threads updating the mount, for
example when going r/o -> r/w, and is only present to serialize updates.
In order to take this lock, a read hold must first be taken on
mnt_unmounting, and the two need to be held across the operation.
One effect of this change: previously if an unmount failed, we would make a
half hearted attempt to back out of it gracefully, but that was unlikely to
work in a lot of cases. Now while an unmount that will be aborted is in
progress, new file operations within the mount will fail instead of being
delayed. That is unlikely to be a problem though, because if the admin
requests unmount of a file system then s(he) has made a decision to deny
access to the resource.
2008-05-06 22:43:44 +04:00
|
|
|
vfs_unbusy(mp, false, &nmp);
|
1994-05-17 08:21:49 +04:00
|
|
|
}
|
2007-10-11 00:42:20 +04:00
|
|
|
mutex_exit(&mountlist_lock);
|
2008-01-07 19:12:52 +03:00
|
|
|
sysctl_relock();
|
1994-05-17 08:21:49 +04:00
|
|
|
|
|
|
|
*sizep = bp - where;
|
|
|
|
return (0);
|
|
|
|
}
|
1994-06-08 15:28:29 +04:00
|
|
|
|
2008-01-02 14:48:20 +03:00
|
|
|
/*
|
|
|
|
* Remove clean vnodes from a mountpoint's vnode list.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
vfs_scrubvnlist(struct mount *mp)
|
|
|
|
{
|
|
|
|
vnode_t *vp, *nvp;
|
|
|
|
|
2008-01-30 14:46:59 +03:00
|
|
|
retry:
|
2008-01-02 14:48:20 +03:00
|
|
|
mutex_enter(&mntvnode_lock);
|
|
|
|
for (vp = TAILQ_FIRST(&mp->mnt_vnodelist); vp; vp = nvp) {
|
|
|
|
nvp = TAILQ_NEXT(vp, v_mntvnodes);
|
|
|
|
mutex_enter(&vp->v_interlock);
|
2008-01-17 16:06:04 +03:00
|
|
|
if ((vp->v_iflag & VI_CLEAN) != 0) {
|
2008-01-02 14:48:20 +03:00
|
|
|
TAILQ_REMOVE(&mp->mnt_vnodelist, vp, v_mntvnodes);
|
2008-01-17 16:06:04 +03:00
|
|
|
vp->v_mount = NULL;
|
2008-01-30 14:46:59 +03:00
|
|
|
mutex_exit(&mntvnode_lock);
|
|
|
|
mutex_exit(&vp->v_interlock);
|
PR kern/38141 lookup/vfs_busy acquire rwlock recursively
Simplify the mount locking. Remove all the crud to deal with recursion on
the mount lock, and crud to deal with unmount as another weirdo lock.
Hopefully this will once and for all fix the deadlocks with this. With this
commit there are two locks on each mount:
- krwlock_t mnt_unmounting. This is used to prevent unmount across critical
sections like getnewvnode(). It's only ever read locked with rw_tryenter(),
and is only ever write locked in dounmount(). A write hold can't be taken
on this lock if the current LWP could hold a vnode lock.
- kmutex_t mnt_updating. This is taken by threads updating the mount, for
example when going r/o -> r/w, and is only present to serialize updates.
In order to take this lock, a read hold must first be taken on
mnt_unmounting, and the two need to be held across the operation.
One effect of this change: previously if an unmount failed, we would make a
half hearted attempt to back out of it gracefully, but that was unlikely to
work in a lot of cases. Now while an unmount that will be aborted is in
progress, new file operations within the mount will fail instead of being
delayed. That is unlikely to be a problem though, because if the admin
requests unmount of a file system then s(he) has made a decision to deny
access to the resource.
2008-05-06 22:43:44 +04:00
|
|
|
vfs_destroy(mp);
|
2008-01-30 14:46:59 +03:00
|
|
|
goto retry;
|
2008-01-17 16:06:04 +03:00
|
|
|
}
|
2008-01-02 14:48:20 +03:00
|
|
|
mutex_exit(&vp->v_interlock);
|
|
|
|
}
|
|
|
|
mutex_exit(&mntvnode_lock);
|
|
|
|
}
|
|
|
|
|
1994-06-08 15:28:29 +04:00
|
|
|
/*
|
|
|
|
* Check to see if a filesystem is mounted on a block device.
|
|
|
|
*/
|
|
|
|
int
|
2008-01-02 14:48:20 +03:00
|
|
|
vfs_mountedon(vnode_t *vp)
|
1994-06-08 15:28:29 +04:00
|
|
|
{
|
2008-01-02 14:48:20 +03:00
|
|
|
vnode_t *vq;
|
1998-03-01 05:20:01 +03:00
|
|
|
int error = 0;
|
1994-06-08 15:28:29 +04:00
|
|
|
|
2006-01-18 17:26:55 +03:00
|
|
|
if (vp->v_type != VBLK)
|
|
|
|
return ENOTBLK;
|
1999-11-15 21:49:07 +03:00
|
|
|
if (vp->v_specmountpoint != NULL)
|
1994-06-08 15:28:29 +04:00
|
|
|
return (EBUSY);
|
2008-12-29 20:41:18 +03:00
|
|
|
mutex_enter(&device_lock);
|
2008-01-24 20:32:52 +03:00
|
|
|
for (vq = specfs_hash[SPECHASH(vp->v_rdev)]; vq != NULL;
|
|
|
|
vq = vq->v_specnext) {
|
|
|
|
if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type)
|
|
|
|
continue;
|
|
|
|
if (vq->v_specmountpoint != NULL) {
|
|
|
|
error = EBUSY;
|
|
|
|
break;
|
1994-06-08 15:28:29 +04:00
|
|
|
}
|
|
|
|
}
|
2008-12-29 20:41:18 +03:00
|
|
|
mutex_exit(&device_lock);
|
1998-03-01 05:20:01 +03:00
|
|
|
return (error);
|
1994-06-08 15:28:29 +04:00
|
|
|
}
|
|
|
|
|
1995-04-10 23:46:56 +04:00
|
|
|
/*
|
|
|
|
* Unmount all file systems.
|
|
|
|
* We traverse the list in reverse order under the assumption that doing so
|
|
|
|
* will avoid needing to worry about dependencies.
|
|
|
|
*/
|
2009-04-18 00:22:52 +04:00
|
|
|
bool
|
2005-12-11 15:16:03 +03:00
|
|
|
vfs_unmountall(struct lwp *l)
|
2009-04-29 19:44:55 +04:00
|
|
|
{
|
|
|
|
printf("unmounting file systems...");
|
|
|
|
return vfs_unmountall1(l, true, true);
|
|
|
|
}
|
|
|
|
|
2009-06-26 22:53:07 +04:00
|
|
|
static void
|
|
|
|
vfs_unmount_print(struct mount *mp, const char *pfx)
|
|
|
|
{
|
|
|
|
printf("%sunmounted %s on %s type %s\n", pfx,
|
|
|
|
mp->mnt_stat.f_mntfromname, mp->mnt_stat.f_mntonname,
|
|
|
|
mp->mnt_stat.f_fstypename);
|
|
|
|
}
|
|
|
|
|
|
|
|
bool
|
|
|
|
vfs_unmount_forceone(struct lwp *l)
|
|
|
|
{
|
|
|
|
struct mount *mp, *nmp = NULL;
|
|
|
|
int error;
|
|
|
|
|
|
|
|
CIRCLEQ_FOREACH_REVERSE(mp, &mountlist, mnt_list) {
|
|
|
|
if (nmp == NULL || mp->mnt_gen > nmp->mnt_gen)
|
|
|
|
nmp = mp;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (nmp == NULL)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
#ifdef DEBUG
|
|
|
|
printf("\nforcefully unmounting %s (%s)...",
|
|
|
|
nmp->mnt_stat.f_mntonname, nmp->mnt_stat.f_mntfromname);
|
|
|
|
#endif
|
|
|
|
atomic_inc_uint(&nmp->mnt_refcnt);
|
|
|
|
if ((error = dounmount(nmp, MNT_FORCE, l)) == 0) {
|
|
|
|
vfs_unmount_print(nmp, "forcefully ");
|
|
|
|
return true;
|
|
|
|
} else
|
|
|
|
atomic_dec_uint(&nmp->mnt_refcnt);
|
|
|
|
|
|
|
|
#ifdef DEBUG
|
|
|
|
printf("forceful unmount of %s failed with error %d\n",
|
|
|
|
nmp->mnt_stat.f_mntonname, error);
|
|
|
|
#endif
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2009-04-29 19:44:55 +04:00
|
|
|
bool
|
|
|
|
vfs_unmountall1(struct lwp *l, bool force, bool verbose)
|
1995-04-10 23:46:56 +04:00
|
|
|
{
|
2000-03-30 13:27:11 +04:00
|
|
|
struct mount *mp, *nmp;
|
2009-06-26 22:53:07 +04:00
|
|
|
bool any_error = false, progress = false;
|
2009-04-18 00:22:52 +04:00
|
|
|
int error;
|
1998-04-26 23:10:33 +04:00
|
|
|
|
2009-06-26 22:53:07 +04:00
|
|
|
for (mp = CIRCLEQ_LAST(&mountlist);
|
|
|
|
mp != (void *)&mountlist;
|
2008-01-28 21:24:05 +03:00
|
|
|
mp = nmp) {
|
|
|
|
nmp = CIRCLEQ_PREV(mp, mnt_list);
|
1996-06-02 00:24:05 +04:00
|
|
|
#ifdef DEBUG
|
2009-06-26 22:53:07 +04:00
|
|
|
printf("\nunmounting %p %s (%s)...",
|
|
|
|
(void *)mp, mp->mnt_stat.f_mntonname,
|
|
|
|
mp->mnt_stat.f_mntfromname);
|
1996-06-02 00:24:05 +04:00
|
|
|
#endif
|
PR kern/38141 lookup/vfs_busy acquire rwlock recursively
Simplify the mount locking. Remove all the crud to deal with recursion on
the mount lock, and crud to deal with unmount as another weirdo lock.
Hopefully this will once and for all fix the deadlocks with this. With this
commit there are two locks on each mount:
- krwlock_t mnt_unmounting. This is used to prevent unmount across critical
sections like getnewvnode(). It's only ever read locked with rw_tryenter(),
and is only ever write locked in dounmount(). A write hold can't be taken
on this lock if the current LWP could hold a vnode lock.
- kmutex_t mnt_updating. This is taken by threads updating the mount, for
example when going r/o -> r/w, and is only present to serialize updates.
In order to take this lock, a read hold must first be taken on
mnt_unmounting, and the two need to be held across the operation.
One effect of this change: previously if an unmount failed, we would make a
half hearted attempt to back out of it gracefully, but that was unlikely to
work in a lot of cases. Now while an unmount that will be aborted is in
progress, new file operations within the mount will fail instead of being
delayed. That is unlikely to be a problem though, because if the admin
requests unmount of a file system then s(he) has made a decision to deny
access to the resource.
2008-05-06 22:43:44 +04:00
|
|
|
atomic_inc_uint(&mp->mnt_refcnt);
|
2009-06-26 22:53:07 +04:00
|
|
|
if ((error = dounmount(mp, force ? MNT_FORCE : 0, l)) == 0) {
|
|
|
|
vfs_unmount_print(mp, "");
|
2009-04-18 00:22:52 +04:00
|
|
|
progress = true;
|
2009-06-26 22:53:07 +04:00
|
|
|
} else {
|
|
|
|
atomic_dec_uint(&mp->mnt_refcnt);
|
2009-04-29 19:44:55 +04:00
|
|
|
if (verbose) {
|
|
|
|
printf("unmount of %s failed with error %d\n",
|
|
|
|
mp->mnt_stat.f_mntonname, error);
|
|
|
|
}
|
2009-04-18 00:22:52 +04:00
|
|
|
any_error = true;
|
1995-04-22 01:55:11 +04:00
|
|
|
}
|
1995-04-10 23:46:56 +04:00
|
|
|
}
|
2009-04-29 19:44:55 +04:00
|
|
|
if (verbose)
|
|
|
|
printf(" done\n");
|
|
|
|
if (any_error && verbose)
|
1996-10-13 06:32:29 +04:00
|
|
|
printf("WARNING: some file systems would not unmount\n");
|
2009-04-18 00:22:52 +04:00
|
|
|
return progress;
|
1995-04-22 01:55:11 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Sync and unmount file systems before shutting down.
|
|
|
|
*/
|
|
|
|
void
|
2005-06-06 03:47:48 +04:00
|
|
|
vfs_shutdown(void)
|
1995-04-22 01:55:11 +04:00
|
|
|
{
|
2006-02-25 10:11:31 +03:00
|
|
|
struct lwp *l;
|
1995-04-22 01:55:11 +04:00
|
|
|
|
2006-02-25 10:11:31 +03:00
|
|
|
/* XXX we're certainly not running in lwp0's context! */
|
2009-04-29 19:44:55 +04:00
|
|
|
l = (curlwp == NULL) ? &lwp0 : curlwp;
|
2003-01-21 02:59:14 +03:00
|
|
|
|
2009-06-26 22:53:07 +04:00
|
|
|
vfs_shutdown1(l);
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
vfs_sync_all(struct lwp *l)
|
|
|
|
{
|
1997-06-07 21:27:57 +04:00
|
|
|
printf("syncing disks... ");
|
|
|
|
|
2007-11-04 20:31:16 +03:00
|
|
|
/* remove user processes from run queue */
|
2000-08-31 18:41:35 +04:00
|
|
|
suspendsched();
|
1995-04-22 01:55:11 +04:00
|
|
|
(void) spl0();
|
|
|
|
|
Fix assorted bugs around shutdown/reboot/panic time.
- add a new global variable, doing_shutdown, which is nonzero if
vfs_shutdown() or panic() have been called.
- in panic, set RB_NOSYNC if doing_shutdown is already set on entry
so we don't reenter vfs_shutdown if we panic'ed there.
- in vfs_shutdown, don't use proc0's process for sys_sync unless
curproc is NULL.
- in lockmgr, attribute successful locks to proc0 if doing_shutdown
&& curproc==NULL, and panic if we can't get the lock right away; avoids the
spurious lockmgr DIAGNOSTIC panic from the ddb reboot command.
- in subr_pool, deal with curproc==NULL in the doing_shutdown case.
- in mfs_strategy, bitbucket writes if doing_shutdown, so we don't
wedge waiting for the mfs process.
- in ltsleep, treat ((curproc == NULL) && doing_shutdown) like the
panicstr case.
Appears to fix: kern/9239, kern/10187, kern/9367.
May also fix kern/10122.
2000-06-10 22:44:43 +04:00
|
|
|
/* avoid coming back this way again if we panic. */
|
|
|
|
doing_shutdown = 1;
|
|
|
|
|
2003-01-18 13:06:22 +03:00
|
|
|
sys_sync(l, NULL, NULL);
|
1995-04-22 01:55:11 +04:00
|
|
|
|
|
|
|
/* Wait for sync to finish. */
|
2003-12-30 15:33:13 +03:00
|
|
|
if (buf_syncwait() != 0) {
|
2000-03-30 13:32:25 +04:00
|
|
|
#if defined(DDB) && defined(DEBUG_HALT_BUSY)
|
|
|
|
Debugger();
|
1999-08-19 17:54:06 +04:00
|
|
|
#endif
|
2000-03-30 13:32:25 +04:00
|
|
|
printf("giving up\n");
|
1998-04-26 22:58:54 +04:00
|
|
|
return;
|
1997-09-25 01:40:55 +04:00
|
|
|
} else
|
1996-10-13 06:32:29 +04:00
|
|
|
printf("done\n");
|
2009-06-26 22:53:07 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
vfs_shutdown1(struct lwp *l)
|
|
|
|
{
|
|
|
|
|
|
|
|
vfs_sync_all(l);
|
1997-09-25 01:40:55 +04:00
|
|
|
|
1998-04-26 22:58:54 +04:00
|
|
|
/*
|
|
|
|
* If we've panic'd, don't make the situation potentially
|
|
|
|
* worse by unmounting the file systems.
|
|
|
|
*/
|
|
|
|
if (panicstr != NULL)
|
|
|
|
return;
|
|
|
|
|
|
|
|
/* Release inodes held by texts before update. */
|
1997-09-25 01:40:55 +04:00
|
|
|
#ifdef notdef
|
1998-04-26 22:58:54 +04:00
|
|
|
vnshutdown();
|
1997-09-25 01:40:55 +04:00
|
|
|
#endif
|
1998-04-26 22:58:54 +04:00
|
|
|
/* Unmount file systems. */
|
2005-12-11 15:16:03 +03:00
|
|
|
vfs_unmountall(l);
|
1995-04-10 23:46:56 +04:00
|
|
|
}
|
1997-01-31 05:50:36 +03:00
|
|
|
|
2009-09-19 20:20:41 +04:00
|
|
|
/*
|
|
|
|
* Print a list of supported file system types (used by vfs_mountroot)
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
vfs_print_fstypes(void)
|
|
|
|
{
|
|
|
|
struct vfsops *v;
|
|
|
|
int cnt = 0;
|
|
|
|
|
|
|
|
mutex_enter(&vfs_list_lock);
|
|
|
|
LIST_FOREACH(v, &vfs_list, vfs_list)
|
|
|
|
++cnt;
|
|
|
|
mutex_exit(&vfs_list_lock);
|
|
|
|
|
|
|
|
if (cnt == 0) {
|
|
|
|
printf("WARNING: No file system modules have been loaded.\n");
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
printf("Supported file systems:");
|
|
|
|
mutex_enter(&vfs_list_lock);
|
|
|
|
LIST_FOREACH(v, &vfs_list, vfs_list) {
|
|
|
|
printf(" %s", v->vfs_name);
|
|
|
|
}
|
|
|
|
mutex_exit(&vfs_list_lock);
|
|
|
|
printf("\n");
|
|
|
|
}
|
|
|
|
|
1997-01-31 05:50:36 +03:00
|
|
|
/*
|
|
|
|
* Mount the root file system. If the operator didn't specify a
|
|
|
|
* file system to use, try all possible file systems until one
|
|
|
|
* succeeds.
|
|
|
|
*/
|
|
|
|
int
|
2005-06-06 03:47:48 +04:00
|
|
|
vfs_mountroot(void)
|
1997-01-31 05:50:36 +03:00
|
|
|
{
|
1998-02-18 10:16:41 +03:00
|
|
|
struct vfsops *v;
|
2005-01-09 06:11:48 +03:00
|
|
|
int error = ENODEV;
|
1997-01-31 05:50:36 +03:00
|
|
|
|
|
|
|
if (root_device == NULL)
|
|
|
|
panic("vfs_mountroot: root device unknown");
|
|
|
|
|
2006-02-21 07:32:38 +03:00
|
|
|
switch (device_class(root_device)) {
|
1997-01-31 05:50:36 +03:00
|
|
|
case DV_IFNET:
|
|
|
|
if (rootdev != NODEV)
|
2002-04-04 05:44:30 +04:00
|
|
|
panic("vfs_mountroot: rootdev set for DV_IFNET "
|
2009-01-11 05:45:45 +03:00
|
|
|
"(0x%llx -> %llu,%llu)",
|
|
|
|
(unsigned long long)rootdev,
|
|
|
|
(unsigned long long)major(rootdev),
|
|
|
|
(unsigned long long)minor(rootdev));
|
1997-01-31 05:50:36 +03:00
|
|
|
break;
|
|
|
|
|
|
|
|
case DV_DISK:
|
|
|
|
if (rootdev == NODEV)
|
|
|
|
panic("vfs_mountroot: rootdev not set for DV_DISK");
|
2005-01-09 06:11:48 +03:00
|
|
|
if (bdevvp(rootdev, &rootvp))
|
|
|
|
panic("vfs_mountroot: can't get vnode for rootdev");
|
2007-11-26 22:01:26 +03:00
|
|
|
error = VOP_OPEN(rootvp, FREAD, FSCRED);
|
2005-01-09 06:11:48 +03:00
|
|
|
if (error) {
|
|
|
|
printf("vfs_mountroot: can't open root device\n");
|
|
|
|
return (error);
|
|
|
|
}
|
1997-01-31 05:50:36 +03:00
|
|
|
break;
|
|
|
|
|
2009-11-26 23:52:19 +03:00
|
|
|
case DV_VIRTUAL:
|
|
|
|
break;
|
|
|
|
|
1997-01-31 05:50:36 +03:00
|
|
|
default:
|
|
|
|
printf("%s: inappropriate for root file system\n",
|
2008-04-05 00:13:18 +04:00
|
|
|
device_xname(root_device));
|
1997-01-31 05:50:36 +03:00
|
|
|
return (ENODEV);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2008-12-19 20:11:57 +03:00
|
|
|
* If user specified a root fs type, use it. Make sure the
|
|
|
|
* specified type exists and has a mount_root()
|
1997-01-31 05:50:36 +03:00
|
|
|
*/
|
2008-12-19 20:11:57 +03:00
|
|
|
if (strcmp(rootfstype, ROOT_FSTYPE_ANY) != 0) {
|
|
|
|
v = vfs_getopsbyname(rootfstype);
|
|
|
|
error = EFTYPE;
|
|
|
|
if (v != NULL) {
|
|
|
|
if (v->vfs_mountroot != NULL) {
|
|
|
|
error = (v->vfs_mountroot)();
|
|
|
|
}
|
|
|
|
v->vfs_refcount--;
|
|
|
|
}
|
2005-01-09 06:11:48 +03:00
|
|
|
goto done;
|
|
|
|
}
|
1997-01-31 05:50:36 +03:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Try each file system currently configured into the kernel.
|
|
|
|
*/
|
2007-10-11 00:42:20 +04:00
|
|
|
mutex_enter(&vfs_list_lock);
|
2004-04-19 04:15:55 +04:00
|
|
|
LIST_FOREACH(v, &vfs_list, vfs_list) {
|
1998-02-18 10:16:41 +03:00
|
|
|
if (v->vfs_mountroot == NULL)
|
1997-01-31 05:50:36 +03:00
|
|
|
continue;
|
|
|
|
#ifdef DEBUG
|
2003-05-18 02:22:41 +04:00
|
|
|
aprint_normal("mountroot: trying %s...\n", v->vfs_name);
|
1997-01-31 05:50:36 +03:00
|
|
|
#endif
|
2007-10-11 00:42:20 +04:00
|
|
|
v->vfs_refcount++;
|
|
|
|
mutex_exit(&vfs_list_lock);
|
2005-01-09 06:11:48 +03:00
|
|
|
error = (*v->vfs_mountroot)();
|
2007-10-11 00:42:20 +04:00
|
|
|
mutex_enter(&vfs_list_lock);
|
|
|
|
v->vfs_refcount--;
|
2005-01-09 06:11:48 +03:00
|
|
|
if (!error) {
|
2003-05-18 02:22:41 +04:00
|
|
|
aprint_normal("root file system type: %s\n",
|
|
|
|
v->vfs_name);
|
1998-02-18 10:16:41 +03:00
|
|
|
break;
|
1997-01-31 05:50:36 +03:00
|
|
|
}
|
|
|
|
}
|
2007-10-11 00:42:20 +04:00
|
|
|
mutex_exit(&vfs_list_lock);
|
1997-01-31 05:50:36 +03:00
|
|
|
|
1998-02-18 10:16:41 +03:00
|
|
|
if (v == NULL) {
|
2009-09-19 20:20:41 +04:00
|
|
|
vfs_print_fstypes();
|
2008-04-05 00:13:18 +04:00
|
|
|
printf("no file system for %s", device_xname(root_device));
|
2006-02-21 07:32:38 +03:00
|
|
|
if (device_class(root_device) == DV_DISK)
|
2009-01-11 05:45:45 +03:00
|
|
|
printf(" (dev 0x%llx)", (unsigned long long)rootdev);
|
1998-02-18 10:16:41 +03:00
|
|
|
printf("\n");
|
2005-01-09 06:11:48 +03:00
|
|
|
error = EFTYPE;
|
1998-02-18 10:16:41 +03:00
|
|
|
}
|
2005-01-09 06:11:48 +03:00
|
|
|
|
|
|
|
done:
|
2006-02-21 07:32:38 +03:00
|
|
|
if (error && device_class(root_device) == DV_DISK) {
|
2007-11-26 22:01:26 +03:00
|
|
|
VOP_CLOSE(rootvp, FREAD, FSCRED);
|
2005-01-09 06:11:48 +03:00
|
|
|
vrele(rootvp);
|
|
|
|
}
|
2009-11-27 19:43:51 +03:00
|
|
|
if (error == 0) {
|
|
|
|
extern struct cwdinfo cwdi0;
|
|
|
|
|
|
|
|
CIRCLEQ_FIRST(&mountlist)->mnt_flag |= MNT_ROOTFS;
|
|
|
|
CIRCLEQ_FIRST(&mountlist)->mnt_op->vfs_refcount++;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Get the vnode for '/'. Set cwdi0.cwdi_cdir to
|
|
|
|
* reference it.
|
|
|
|
*/
|
|
|
|
error = VFS_ROOT(CIRCLEQ_FIRST(&mountlist), &rootvnode);
|
|
|
|
if (error)
|
|
|
|
panic("cannot find root vnode, error=%d", error);
|
|
|
|
cwdi0.cwdi_cdir = rootvnode;
|
2010-01-08 14:35:07 +03:00
|
|
|
vref(cwdi0.cwdi_cdir);
|
2009-11-27 19:43:51 +03:00
|
|
|
VOP_UNLOCK(rootvnode, 0);
|
|
|
|
cwdi0.cwdi_rdir = NULL;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Now that root is mounted, we can fixup initproc's CWD
|
|
|
|
* info. All other processes are kthreads, which merely
|
|
|
|
* share proc0's CWD info.
|
|
|
|
*/
|
|
|
|
initproc->p_cwdi->cwdi_cdir = rootvnode;
|
2010-01-08 14:35:07 +03:00
|
|
|
vref(initproc->p_cwdi->cwdi_cdir);
|
2009-11-27 19:43:51 +03:00
|
|
|
initproc->p_cwdi->cwdi_rdir = NULL;
|
|
|
|
}
|
2005-01-09 06:11:48 +03:00
|
|
|
return (error);
|
1997-01-31 05:50:36 +03:00
|
|
|
}
|
2008-01-30 12:50:19 +03:00
|
|
|
|
|
|
|
/*
|
2008-07-17 00:06:19 +04:00
|
|
|
* Get a new unique fsid
|
2008-01-30 12:50:19 +03:00
|
|
|
*/
|
2008-07-17 00:06:19 +04:00
|
|
|
void
|
|
|
|
vfs_getnewfsid(struct mount *mp)
|
2008-01-30 12:50:19 +03:00
|
|
|
{
|
2008-07-17 00:06:19 +04:00
|
|
|
static u_short xxxfs_mntid;
|
|
|
|
fsid_t tfsid;
|
|
|
|
int mtype;
|
|
|
|
|
|
|
|
mutex_enter(&mntid_lock);
|
|
|
|
mtype = makefstype(mp->mnt_op->vfs_name);
|
|
|
|
mp->mnt_stat.f_fsidx.__fsid_val[0] = makedev(mtype, 0);
|
|
|
|
mp->mnt_stat.f_fsidx.__fsid_val[1] = mtype;
|
|
|
|
mp->mnt_stat.f_fsid = mp->mnt_stat.f_fsidx.__fsid_val[0];
|
|
|
|
if (xxxfs_mntid == 0)
|
|
|
|
++xxxfs_mntid;
|
|
|
|
tfsid.__fsid_val[0] = makedev(mtype & 0xff, xxxfs_mntid);
|
|
|
|
tfsid.__fsid_val[1] = mtype;
|
|
|
|
if (!CIRCLEQ_EMPTY(&mountlist)) {
|
|
|
|
while (vfs_getvfs(&tfsid)) {
|
|
|
|
tfsid.__fsid_val[0]++;
|
|
|
|
xxxfs_mntid++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
mp->mnt_stat.f_fsidx.__fsid_val[0] = tfsid.__fsid_val[0];
|
|
|
|
mp->mnt_stat.f_fsid = mp->mnt_stat.f_fsidx.__fsid_val[0];
|
|
|
|
mutex_exit(&mntid_lock);
|
|
|
|
}
|
2008-01-30 12:50:19 +03:00
|
|
|
|
2008-07-17 00:06:19 +04:00
|
|
|
/*
|
|
|
|
* Make a 'unique' number from a mount type name.
|
|
|
|
*/
|
|
|
|
long
|
|
|
|
makefstype(const char *type)
|
|
|
|
{
|
|
|
|
long rv;
|
2008-01-30 12:50:19 +03:00
|
|
|
|
2008-07-17 00:06:19 +04:00
|
|
|
for (rv = 0; *type; type++) {
|
|
|
|
rv <<= 2;
|
|
|
|
rv ^= *type;
|
|
|
|
}
|
|
|
|
return rv;
|
|
|
|
}
|
2008-01-30 12:50:19 +03:00
|
|
|
|
2008-07-17 00:06:19 +04:00
|
|
|
/*
|
|
|
|
* Set vnode attributes to VNOVAL
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
vattr_null(struct vattr *vap)
|
|
|
|
{
|
|
|
|
|
2010-01-07 22:54:40 +03:00
|
|
|
memset(vap, 0, sizeof(*vap));
|
|
|
|
|
2008-07-17 00:06:19 +04:00
|
|
|
vap->va_type = VNON;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Assign individually so that it is safe even if size and
|
|
|
|
* sign of each member are varied.
|
|
|
|
*/
|
|
|
|
vap->va_mode = VNOVAL;
|
|
|
|
vap->va_nlink = VNOVAL;
|
|
|
|
vap->va_uid = VNOVAL;
|
|
|
|
vap->va_gid = VNOVAL;
|
|
|
|
vap->va_fsid = VNOVAL;
|
|
|
|
vap->va_fileid = VNOVAL;
|
|
|
|
vap->va_size = VNOVAL;
|
|
|
|
vap->va_blocksize = VNOVAL;
|
|
|
|
vap->va_atime.tv_sec =
|
|
|
|
vap->va_mtime.tv_sec =
|
|
|
|
vap->va_ctime.tv_sec =
|
|
|
|
vap->va_birthtime.tv_sec = VNOVAL;
|
|
|
|
vap->va_atime.tv_nsec =
|
|
|
|
vap->va_mtime.tv_nsec =
|
|
|
|
vap->va_ctime.tv_nsec =
|
|
|
|
vap->va_birthtime.tv_nsec = VNOVAL;
|
|
|
|
vap->va_gen = VNOVAL;
|
|
|
|
vap->va_flags = VNOVAL;
|
|
|
|
vap->va_rdev = VNOVAL;
|
|
|
|
vap->va_bytes = VNOVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof(arr[0]))
|
|
|
|
#define ARRAY_PRINT(idx, arr) \
|
2009-03-30 20:38:05 +04:00
|
|
|
((unsigned int)(idx) < ARRAY_SIZE(arr) ? (arr)[(idx)] : "UNKNOWN")
|
2008-07-17 00:06:19 +04:00
|
|
|
|
|
|
|
const char * const vnode_tags[] = { VNODE_TAGS };
|
|
|
|
const char * const vnode_types[] = { VNODE_TYPES };
|
|
|
|
const char vnode_flagbits[] = VNODE_FLAGBITS;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Print out a description of a vnode.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
vprint(const char *label, struct vnode *vp)
|
|
|
|
{
|
|
|
|
struct vnlock *vl;
|
|
|
|
char bf[96];
|
|
|
|
int flag;
|
|
|
|
|
|
|
|
vl = (vp->v_vnlock != NULL ? vp->v_vnlock : &vp->v_lock);
|
|
|
|
flag = vp->v_iflag | vp->v_vflag | vp->v_uflag;
|
2008-12-17 01:35:21 +03:00
|
|
|
snprintb(bf, sizeof(bf), vnode_flagbits, flag);
|
2008-07-17 00:06:19 +04:00
|
|
|
|
|
|
|
if (label != NULL)
|
|
|
|
printf("%s: ", label);
|
|
|
|
printf("vnode @ %p, flags (%s)\n\ttag %s(%d), type %s(%d), "
|
|
|
|
"usecount %d, writecount %d, holdcount %d\n"
|
|
|
|
"\tfreelisthd %p, mount %p, data %p lock %p recursecnt %d\n",
|
|
|
|
vp, bf, ARRAY_PRINT(vp->v_tag, vnode_tags), vp->v_tag,
|
|
|
|
ARRAY_PRINT(vp->v_type, vnode_types), vp->v_type,
|
|
|
|
vp->v_usecount, vp->v_writecount, vp->v_holdcnt,
|
|
|
|
vp->v_freelisthd, vp->v_mount, vp->v_data, vl, vl->vl_recursecnt);
|
|
|
|
if (vp->v_data != NULL) {
|
|
|
|
printf("\t");
|
|
|
|
VOP_PRINT(vp);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#ifdef DEBUG
|
|
|
|
/*
|
|
|
|
* List all of the locked vnodes in the system.
|
|
|
|
* Called when debugging the kernel.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
printlockedvnodes(void)
|
|
|
|
{
|
|
|
|
struct mount *mp, *nmp;
|
|
|
|
struct vnode *vp;
|
|
|
|
|
|
|
|
printf("Locked vnodes\n");
|
|
|
|
mutex_enter(&mountlist_lock);
|
|
|
|
for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist;
|
|
|
|
mp = nmp) {
|
|
|
|
if (vfs_busy(mp, &nmp)) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
TAILQ_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) {
|
|
|
|
if (VOP_ISLOCKED(vp))
|
|
|
|
vprint(NULL, vp);
|
|
|
|
}
|
|
|
|
mutex_enter(&mountlist_lock);
|
|
|
|
vfs_unbusy(mp, false, &nmp);
|
|
|
|
}
|
|
|
|
mutex_exit(&mountlist_lock);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2009-06-23 23:36:38 +04:00
|
|
|
/* Deprecated. Kept for KPI compatibility. */
|
2008-07-17 00:06:19 +04:00
|
|
|
int
|
|
|
|
vaccess(enum vtype type, mode_t file_mode, uid_t uid, gid_t gid,
|
|
|
|
mode_t acc_mode, kauth_cred_t cred)
|
|
|
|
{
|
|
|
|
|
2009-06-23 23:36:38 +04:00
|
|
|
#ifdef DIAGNOSTIC
|
2009-06-24 03:04:11 +04:00
|
|
|
printf("vaccess: deprecated interface used.\n");
|
2009-06-23 23:36:38 +04:00
|
|
|
#endif /* DIAGNOSTIC */
|
2008-07-17 00:06:19 +04:00
|
|
|
|
2009-06-23 23:36:38 +04:00
|
|
|
return genfs_can_access(type, file_mode, uid, gid, acc_mode, cred);
|
2008-07-17 00:06:19 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Given a file system name, look up the vfsops for that
|
|
|
|
* file system, or return NULL if file system isn't present
|
|
|
|
* in the kernel.
|
|
|
|
*/
|
|
|
|
struct vfsops *
|
|
|
|
vfs_getopsbyname(const char *name)
|
|
|
|
{
|
|
|
|
struct vfsops *v;
|
|
|
|
|
|
|
|
mutex_enter(&vfs_list_lock);
|
|
|
|
LIST_FOREACH(v, &vfs_list, vfs_list) {
|
|
|
|
if (strcmp(v->vfs_name, name) == 0)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (v != NULL)
|
|
|
|
v->vfs_refcount++;
|
|
|
|
mutex_exit(&vfs_list_lock);
|
|
|
|
|
|
|
|
return (v);
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
copy_statvfs_info(struct statvfs *sbp, const struct mount *mp)
|
|
|
|
{
|
|
|
|
const struct statvfs *mbp;
|
|
|
|
|
|
|
|
if (sbp == (mbp = &mp->mnt_stat))
|
|
|
|
return;
|
|
|
|
|
|
|
|
(void)memcpy(&sbp->f_fsidx, &mbp->f_fsidx, sizeof(sbp->f_fsidx));
|
|
|
|
sbp->f_fsid = mbp->f_fsid;
|
|
|
|
sbp->f_owner = mbp->f_owner;
|
|
|
|
sbp->f_flag = mbp->f_flag;
|
|
|
|
sbp->f_syncwrites = mbp->f_syncwrites;
|
|
|
|
sbp->f_asyncwrites = mbp->f_asyncwrites;
|
|
|
|
sbp->f_syncreads = mbp->f_syncreads;
|
|
|
|
sbp->f_asyncreads = mbp->f_asyncreads;
|
|
|
|
(void)memcpy(sbp->f_spare, mbp->f_spare, sizeof(mbp->f_spare));
|
|
|
|
(void)memcpy(sbp->f_fstypename, mbp->f_fstypename,
|
|
|
|
sizeof(sbp->f_fstypename));
|
|
|
|
(void)memcpy(sbp->f_mntonname, mbp->f_mntonname,
|
|
|
|
sizeof(sbp->f_mntonname));
|
|
|
|
(void)memcpy(sbp->f_mntfromname, mp->mnt_stat.f_mntfromname,
|
|
|
|
sizeof(sbp->f_mntfromname));
|
|
|
|
sbp->f_namemax = mbp->f_namemax;
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
set_statvfs_info(const char *onp, int ukon, const char *fromp, int ukfrom,
|
|
|
|
const char *vfsname, struct mount *mp, struct lwp *l)
|
|
|
|
{
|
|
|
|
int error;
|
|
|
|
size_t size;
|
|
|
|
struct statvfs *sfs = &mp->mnt_stat;
|
|
|
|
int (*fun)(const void *, void *, size_t, size_t *);
|
|
|
|
|
|
|
|
(void)strlcpy(mp->mnt_stat.f_fstypename, vfsname,
|
|
|
|
sizeof(mp->mnt_stat.f_fstypename));
|
|
|
|
|
|
|
|
if (onp) {
|
|
|
|
struct cwdinfo *cwdi = l->l_proc->p_cwdi;
|
|
|
|
fun = (ukon == UIO_SYSSPACE) ? copystr : copyinstr;
|
|
|
|
if (cwdi->cwdi_rdir != NULL) {
|
|
|
|
size_t len;
|
|
|
|
char *bp;
|
|
|
|
char *path = PNBUF_GET();
|
|
|
|
|
|
|
|
bp = path + MAXPATHLEN;
|
|
|
|
*--bp = '\0';
|
|
|
|
rw_enter(&cwdi->cwdi_lock, RW_READER);
|
|
|
|
error = getcwd_common(cwdi->cwdi_rdir, rootvnode, &bp,
|
|
|
|
path, MAXPATHLEN / 2, 0, l);
|
|
|
|
rw_exit(&cwdi->cwdi_lock);
|
|
|
|
if (error) {
|
|
|
|
PNBUF_PUT(path);
|
|
|
|
return error;
|
|
|
|
}
|
|
|
|
|
|
|
|
len = strlen(bp);
|
|
|
|
if (len > sizeof(sfs->f_mntonname) - 1)
|
|
|
|
len = sizeof(sfs->f_mntonname) - 1;
|
|
|
|
(void)strncpy(sfs->f_mntonname, bp, len);
|
|
|
|
PNBUF_PUT(path);
|
|
|
|
|
|
|
|
if (len < sizeof(sfs->f_mntonname) - 1) {
|
|
|
|
error = (*fun)(onp, &sfs->f_mntonname[len],
|
|
|
|
sizeof(sfs->f_mntonname) - len - 1, &size);
|
|
|
|
if (error)
|
|
|
|
return error;
|
|
|
|
size += len;
|
|
|
|
} else {
|
|
|
|
size = len;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
error = (*fun)(onp, &sfs->f_mntonname,
|
|
|
|
sizeof(sfs->f_mntonname) - 1, &size);
|
|
|
|
if (error)
|
|
|
|
return error;
|
|
|
|
}
|
|
|
|
(void)memset(sfs->f_mntonname + size, 0,
|
|
|
|
sizeof(sfs->f_mntonname) - size);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (fromp) {
|
|
|
|
fun = (ukfrom == UIO_SYSSPACE) ? copystr : copyinstr;
|
|
|
|
error = (*fun)(fromp, sfs->f_mntfromname,
|
|
|
|
sizeof(sfs->f_mntfromname) - 1, &size);
|
|
|
|
if (error)
|
|
|
|
return error;
|
|
|
|
(void)memset(sfs->f_mntfromname + size, 0,
|
|
|
|
sizeof(sfs->f_mntfromname) - size);
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
vfs_timestamp(struct timespec *ts)
|
|
|
|
{
|
|
|
|
|
|
|
|
nanotime(ts);
|
|
|
|
}
|
|
|
|
|
|
|
|
time_t rootfstime; /* recorded root fs time, if known */
|
|
|
|
void
|
|
|
|
setrootfstime(time_t t)
|
|
|
|
{
|
|
|
|
rootfstime = t;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Sham lock manager for vnodes. This is a temporary measure.
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
vlockmgr(struct vnlock *vl, int flags)
|
|
|
|
{
|
|
|
|
|
|
|
|
KASSERT((flags & ~(LK_CANRECURSE | LK_NOWAIT | LK_TYPE_MASK)) == 0);
|
|
|
|
|
|
|
|
switch (flags & LK_TYPE_MASK) {
|
|
|
|
case LK_SHARED:
|
|
|
|
if (rw_tryenter(&vl->vl_lock, RW_READER)) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
if ((flags & LK_NOWAIT) != 0) {
|
|
|
|
return EBUSY;
|
|
|
|
}
|
|
|
|
rw_enter(&vl->vl_lock, RW_READER);
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
case LK_EXCLUSIVE:
|
|
|
|
if (rw_tryenter(&vl->vl_lock, RW_WRITER)) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
if ((vl->vl_canrecurse || (flags & LK_CANRECURSE) != 0) &&
|
|
|
|
rw_write_held(&vl->vl_lock)) {
|
|
|
|
vl->vl_recursecnt++;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
if ((flags & LK_NOWAIT) != 0) {
|
|
|
|
return EBUSY;
|
2008-01-30 12:50:19 +03:00
|
|
|
}
|
|
|
|
rw_enter(&vl->vl_lock, RW_WRITER);
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
case LK_RELEASE:
|
|
|
|
if (vl->vl_recursecnt != 0) {
|
|
|
|
KASSERT(rw_write_held(&vl->vl_lock));
|
|
|
|
vl->vl_recursecnt--;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
rw_exit(&vl->vl_lock);
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
default:
|
|
|
|
panic("vlockmgr: flags %x", flags);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
vlockstatus(struct vnlock *vl)
|
|
|
|
{
|
|
|
|
|
|
|
|
if (rw_write_held(&vl->vl_lock)) {
|
|
|
|
return LK_EXCLUSIVE;
|
|
|
|
}
|
|
|
|
if (rw_read_held(&vl->vl_lock)) {
|
|
|
|
return LK_SHARED;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
2008-07-17 00:06:19 +04:00
|
|
|
|
|
|
|
/*
|
|
|
|
* mount_specific_key_create --
|
|
|
|
* Create a key for subsystem mount-specific data.
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
mount_specific_key_create(specificdata_key_t *keyp, specificdata_dtor_t dtor)
|
|
|
|
{
|
|
|
|
|
|
|
|
return (specificdata_key_create(mount_specificdata_domain, keyp, dtor));
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* mount_specific_key_delete --
|
|
|
|
* Delete a key for subsystem mount-specific data.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
mount_specific_key_delete(specificdata_key_t key)
|
|
|
|
{
|
|
|
|
|
|
|
|
specificdata_key_delete(mount_specificdata_domain, key);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* mount_initspecific --
|
|
|
|
* Initialize a mount's specificdata container.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
mount_initspecific(struct mount *mp)
|
|
|
|
{
|
|
|
|
int error;
|
|
|
|
|
|
|
|
error = specificdata_init(mount_specificdata_domain,
|
|
|
|
&mp->mnt_specdataref);
|
|
|
|
KASSERT(error == 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* mount_finispecific --
|
|
|
|
* Finalize a mount's specificdata container.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
mount_finispecific(struct mount *mp)
|
|
|
|
{
|
|
|
|
|
|
|
|
specificdata_fini(mount_specificdata_domain, &mp->mnt_specdataref);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* mount_getspecific --
|
|
|
|
* Return mount-specific data corresponding to the specified key.
|
|
|
|
*/
|
|
|
|
void *
|
|
|
|
mount_getspecific(struct mount *mp, specificdata_key_t key)
|
|
|
|
{
|
|
|
|
|
|
|
|
return (specificdata_getspecific(mount_specificdata_domain,
|
|
|
|
&mp->mnt_specdataref, key));
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* mount_setspecific --
|
|
|
|
* Set mount-specific data corresponding to the specified key.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
mount_setspecific(struct mount *mp, specificdata_key_t key, void *data)
|
|
|
|
{
|
|
|
|
|
|
|
|
specificdata_setspecific(mount_specificdata_domain,
|
|
|
|
&mp->mnt_specdataref, key, data);
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
VFS_MOUNT(struct mount *mp, const char *a, void *b, size_t *c)
|
|
|
|
{
|
|
|
|
int error;
|
|
|
|
|
|
|
|
KERNEL_LOCK(1, NULL);
|
|
|
|
error = (*(mp->mnt_op->vfs_mount))(mp, a, b, c);
|
|
|
|
KERNEL_UNLOCK_ONE(NULL);
|
|
|
|
|
|
|
|
return error;
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
VFS_START(struct mount *mp, int a)
|
|
|
|
{
|
|
|
|
int error;
|
|
|
|
|
|
|
|
if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
|
|
|
|
KERNEL_LOCK(1, NULL);
|
|
|
|
}
|
|
|
|
error = (*(mp->mnt_op->vfs_start))(mp, a);
|
|
|
|
if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
|
|
|
|
KERNEL_UNLOCK_ONE(NULL);
|
|
|
|
}
|
|
|
|
|
|
|
|
return error;
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
VFS_UNMOUNT(struct mount *mp, int a)
|
|
|
|
{
|
|
|
|
int error;
|
|
|
|
|
|
|
|
KERNEL_LOCK(1, NULL);
|
|
|
|
error = (*(mp->mnt_op->vfs_unmount))(mp, a);
|
|
|
|
KERNEL_UNLOCK_ONE(NULL);
|
|
|
|
|
|
|
|
return error;
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
VFS_ROOT(struct mount *mp, struct vnode **a)
|
|
|
|
{
|
|
|
|
int error;
|
|
|
|
|
|
|
|
if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
|
|
|
|
KERNEL_LOCK(1, NULL);
|
|
|
|
}
|
|
|
|
error = (*(mp->mnt_op->vfs_root))(mp, a);
|
|
|
|
if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
|
|
|
|
KERNEL_UNLOCK_ONE(NULL);
|
|
|
|
}
|
|
|
|
|
|
|
|
return error;
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
VFS_QUOTACTL(struct mount *mp, int a, uid_t b, void *c)
|
|
|
|
{
|
|
|
|
int error;
|
|
|
|
|
|
|
|
if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
|
|
|
|
KERNEL_LOCK(1, NULL);
|
|
|
|
}
|
|
|
|
error = (*(mp->mnt_op->vfs_quotactl))(mp, a, b, c);
|
|
|
|
if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
|
|
|
|
KERNEL_UNLOCK_ONE(NULL);
|
|
|
|
}
|
|
|
|
|
|
|
|
return error;
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
VFS_STATVFS(struct mount *mp, struct statvfs *a)
|
|
|
|
{
|
|
|
|
int error;
|
|
|
|
|
|
|
|
if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
|
|
|
|
KERNEL_LOCK(1, NULL);
|
|
|
|
}
|
|
|
|
error = (*(mp->mnt_op->vfs_statvfs))(mp, a);
|
|
|
|
if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
|
|
|
|
KERNEL_UNLOCK_ONE(NULL);
|
|
|
|
}
|
|
|
|
|
|
|
|
return error;
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
VFS_SYNC(struct mount *mp, int a, struct kauth_cred *b)
|
|
|
|
{
|
|
|
|
int error;
|
|
|
|
|
|
|
|
if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
|
|
|
|
KERNEL_LOCK(1, NULL);
|
|
|
|
}
|
|
|
|
error = (*(mp->mnt_op->vfs_sync))(mp, a, b);
|
|
|
|
if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
|
|
|
|
KERNEL_UNLOCK_ONE(NULL);
|
|
|
|
}
|
|
|
|
|
|
|
|
return error;
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
VFS_FHTOVP(struct mount *mp, struct fid *a, struct vnode **b)
|
|
|
|
{
|
|
|
|
int error;
|
|
|
|
|
|
|
|
if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
|
|
|
|
KERNEL_LOCK(1, NULL);
|
|
|
|
}
|
|
|
|
error = (*(mp->mnt_op->vfs_fhtovp))(mp, a, b);
|
|
|
|
if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
|
|
|
|
KERNEL_UNLOCK_ONE(NULL);
|
|
|
|
}
|
|
|
|
|
|
|
|
return error;
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
VFS_VPTOFH(struct vnode *vp, struct fid *a, size_t *b)
|
|
|
|
{
|
|
|
|
int error;
|
|
|
|
|
|
|
|
if ((vp->v_vflag & VV_MPSAFE) == 0) {
|
|
|
|
KERNEL_LOCK(1, NULL);
|
|
|
|
}
|
|
|
|
error = (*(vp->v_mount->mnt_op->vfs_vptofh))(vp, a, b);
|
|
|
|
if ((vp->v_vflag & VV_MPSAFE) == 0) {
|
|
|
|
KERNEL_UNLOCK_ONE(NULL);
|
|
|
|
}
|
|
|
|
|
|
|
|
return error;
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
VFS_SNAPSHOT(struct mount *mp, struct vnode *a, struct timespec *b)
|
|
|
|
{
|
|
|
|
int error;
|
|
|
|
|
|
|
|
if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
|
|
|
|
KERNEL_LOCK(1, NULL);
|
|
|
|
}
|
|
|
|
error = (*(mp->mnt_op->vfs_snapshot))(mp, a, b);
|
|
|
|
if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
|
|
|
|
KERNEL_UNLOCK_ONE(NULL);
|
|
|
|
}
|
|
|
|
|
|
|
|
return error;
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
VFS_EXTATTRCTL(struct mount *mp, int a, struct vnode *b, int c, const char *d)
|
|
|
|
{
|
|
|
|
int error;
|
|
|
|
|
|
|
|
KERNEL_LOCK(1, NULL); /* XXXSMP check ffs */
|
|
|
|
error = (*(mp->mnt_op->vfs_extattrctl))(mp, a, b, c, d);
|
|
|
|
KERNEL_UNLOCK_ONE(NULL); /* XXX */
|
|
|
|
|
|
|
|
return error;
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
VFS_SUSPENDCTL(struct mount *mp, int a)
|
|
|
|
{
|
|
|
|
int error;
|
|
|
|
|
|
|
|
if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
|
|
|
|
KERNEL_LOCK(1, NULL);
|
|
|
|
}
|
|
|
|
error = (*(mp->mnt_op->vfs_suspendctl))(mp, a);
|
|
|
|
if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
|
|
|
|
KERNEL_UNLOCK_ONE(NULL);
|
|
|
|
}
|
|
|
|
|
|
|
|
return error;
|
|
|
|
}
|
|
|
|
|
2009-05-03 20:52:54 +04:00
|
|
|
#if defined(DDB) || defined(DEBUGPRINT)
|
2008-07-17 00:06:19 +04:00
|
|
|
static const char buf_flagbits[] = BUF_FLAGBITS;
|
|
|
|
|
|
|
|
void
|
|
|
|
vfs_buf_print(struct buf *bp, int full, void (*pr)(const char *, ...))
|
|
|
|
{
|
|
|
|
char bf[1024];
|
|
|
|
|
|
|
|
(*pr)(" vp %p lblkno 0x%"PRIx64" blkno 0x%"PRIx64" rawblkno 0x%"
|
|
|
|
PRIx64 " dev 0x%x\n",
|
|
|
|
bp->b_vp, bp->b_lblkno, bp->b_blkno, bp->b_rawblkno, bp->b_dev);
|
|
|
|
|
2008-12-17 01:35:21 +03:00
|
|
|
snprintb(bf, sizeof(bf),
|
|
|
|
buf_flagbits, bp->b_flags | bp->b_oflags | bp->b_cflags);
|
2008-07-17 00:06:19 +04:00
|
|
|
(*pr)(" error %d flags 0x%s\n", bp->b_error, bf);
|
|
|
|
|
|
|
|
(*pr)(" bufsize 0x%lx bcount 0x%lx resid 0x%lx\n",
|
|
|
|
bp->b_bufsize, bp->b_bcount, bp->b_resid);
|
2009-02-22 23:28:05 +03:00
|
|
|
(*pr)(" data %p saveaddr %p\n",
|
|
|
|
bp->b_data, bp->b_saveaddr);
|
2008-07-17 00:06:19 +04:00
|
|
|
(*pr)(" iodone %p objlock %p\n", bp->b_iodone, bp->b_objlock);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
vfs_vnode_print(struct vnode *vp, int full, void (*pr)(const char *, ...))
|
|
|
|
{
|
|
|
|
char bf[256];
|
|
|
|
|
|
|
|
uvm_object_printit(&vp->v_uobj, full, pr);
|
2008-12-17 01:35:21 +03:00
|
|
|
snprintb(bf, sizeof(bf),
|
|
|
|
vnode_flagbits, vp->v_iflag | vp->v_vflag | vp->v_uflag);
|
2008-07-17 00:06:19 +04:00
|
|
|
(*pr)("\nVNODE flags %s\n", bf);
|
|
|
|
(*pr)("mp %p numoutput %d size 0x%llx writesize 0x%llx\n",
|
|
|
|
vp->v_mount, vp->v_numoutput, vp->v_size, vp->v_writesize);
|
|
|
|
|
|
|
|
(*pr)("data %p writecount %ld holdcnt %ld\n",
|
|
|
|
vp->v_data, vp->v_writecount, vp->v_holdcnt);
|
|
|
|
|
|
|
|
(*pr)("tag %s(%d) type %s(%d) mount %p typedata %p\n",
|
|
|
|
ARRAY_PRINT(vp->v_tag, vnode_tags), vp->v_tag,
|
|
|
|
ARRAY_PRINT(vp->v_type, vnode_types), vp->v_type,
|
|
|
|
vp->v_mount, vp->v_mountedhere);
|
|
|
|
|
|
|
|
(*pr)("v_lock %p v_vnlock %p\n", &vp->v_lock, vp->v_vnlock);
|
|
|
|
|
|
|
|
if (full) {
|
|
|
|
struct buf *bp;
|
|
|
|
|
|
|
|
(*pr)("clean bufs:\n");
|
|
|
|
LIST_FOREACH(bp, &vp->v_cleanblkhd, b_vnbufs) {
|
|
|
|
(*pr)(" bp %p\n", bp);
|
|
|
|
vfs_buf_print(bp, full, pr);
|
|
|
|
}
|
|
|
|
|
|
|
|
(*pr)("dirty bufs:\n");
|
|
|
|
LIST_FOREACH(bp, &vp->v_dirtyblkhd, b_vnbufs) {
|
|
|
|
(*pr)(" bp %p\n", bp);
|
|
|
|
vfs_buf_print(bp, full, pr);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
vfs_mount_print(struct mount *mp, int full, void (*pr)(const char *, ...))
|
|
|
|
{
|
|
|
|
char sbuf[256];
|
|
|
|
|
|
|
|
(*pr)("vnodecovered = %p syncer = %p data = %p\n",
|
|
|
|
mp->mnt_vnodecovered,mp->mnt_syncer,mp->mnt_data);
|
|
|
|
|
|
|
|
(*pr)("fs_bshift %d dev_bshift = %d\n",
|
|
|
|
mp->mnt_fs_bshift,mp->mnt_dev_bshift);
|
|
|
|
|
2008-12-17 01:35:21 +03:00
|
|
|
snprintb(sbuf, sizeof(sbuf), __MNT_FLAG_BITS, mp->mnt_flag);
|
2008-07-17 00:06:19 +04:00
|
|
|
(*pr)("flag = %s\n", sbuf);
|
|
|
|
|
2008-12-17 01:35:21 +03:00
|
|
|
snprintb(sbuf, sizeof(sbuf), __IMNT_FLAG_BITS, mp->mnt_iflag);
|
2008-07-17 00:06:19 +04:00
|
|
|
(*pr)("iflag = %s\n", sbuf);
|
|
|
|
|
|
|
|
(*pr)("refcnt = %d unmounting @ %p updating @ %p\n", mp->mnt_refcnt,
|
|
|
|
&mp->mnt_unmounting, &mp->mnt_updating);
|
|
|
|
|
|
|
|
(*pr)("statvfs cache:\n");
|
|
|
|
(*pr)("\tbsize = %lu\n",mp->mnt_stat.f_bsize);
|
|
|
|
(*pr)("\tfrsize = %lu\n",mp->mnt_stat.f_frsize);
|
|
|
|
(*pr)("\tiosize = %lu\n",mp->mnt_stat.f_iosize);
|
|
|
|
|
|
|
|
(*pr)("\tblocks = %"PRIu64"\n",mp->mnt_stat.f_blocks);
|
|
|
|
(*pr)("\tbfree = %"PRIu64"\n",mp->mnt_stat.f_bfree);
|
|
|
|
(*pr)("\tbavail = %"PRIu64"\n",mp->mnt_stat.f_bavail);
|
|
|
|
(*pr)("\tbresvd = %"PRIu64"\n",mp->mnt_stat.f_bresvd);
|
|
|
|
|
|
|
|
(*pr)("\tfiles = %"PRIu64"\n",mp->mnt_stat.f_files);
|
|
|
|
(*pr)("\tffree = %"PRIu64"\n",mp->mnt_stat.f_ffree);
|
|
|
|
(*pr)("\tfavail = %"PRIu64"\n",mp->mnt_stat.f_favail);
|
|
|
|
(*pr)("\tfresvd = %"PRIu64"\n",mp->mnt_stat.f_fresvd);
|
|
|
|
|
|
|
|
(*pr)("\tf_fsidx = { 0x%"PRIx32", 0x%"PRIx32" }\n",
|
|
|
|
mp->mnt_stat.f_fsidx.__fsid_val[0],
|
|
|
|
mp->mnt_stat.f_fsidx.__fsid_val[1]);
|
|
|
|
|
|
|
|
(*pr)("\towner = %"PRIu32"\n",mp->mnt_stat.f_owner);
|
|
|
|
(*pr)("\tnamemax = %lu\n",mp->mnt_stat.f_namemax);
|
|
|
|
|
2008-12-17 01:35:21 +03:00
|
|
|
snprintb(sbuf, sizeof(sbuf), __MNT_FLAG_BITS, mp->mnt_stat.f_flag);
|
|
|
|
|
2008-07-17 00:06:19 +04:00
|
|
|
(*pr)("\tflag = %s\n",sbuf);
|
|
|
|
(*pr)("\tsyncwrites = %" PRIu64 "\n",mp->mnt_stat.f_syncwrites);
|
|
|
|
(*pr)("\tasyncwrites = %" PRIu64 "\n",mp->mnt_stat.f_asyncwrites);
|
|
|
|
(*pr)("\tsyncreads = %" PRIu64 "\n",mp->mnt_stat.f_syncreads);
|
|
|
|
(*pr)("\tasyncreads = %" PRIu64 "\n",mp->mnt_stat.f_asyncreads);
|
|
|
|
(*pr)("\tfstypename = %s\n",mp->mnt_stat.f_fstypename);
|
|
|
|
(*pr)("\tmntonname = %s\n",mp->mnt_stat.f_mntonname);
|
|
|
|
(*pr)("\tmntfromname = %s\n",mp->mnt_stat.f_mntfromname);
|
|
|
|
|
|
|
|
{
|
|
|
|
int cnt = 0;
|
|
|
|
struct vnode *vp;
|
|
|
|
(*pr)("locked vnodes =");
|
|
|
|
TAILQ_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) {
|
|
|
|
if (VOP_ISLOCKED(vp)) {
|
|
|
|
if ((++cnt % 6) == 0) {
|
|
|
|
(*pr)(" %p,\n\t", vp);
|
|
|
|
} else {
|
|
|
|
(*pr)(" %p,", vp);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
(*pr)("\n");
|
|
|
|
}
|
|
|
|
|
|
|
|
if (full) {
|
|
|
|
int cnt = 0;
|
|
|
|
struct vnode *vp;
|
|
|
|
(*pr)("all vnodes =");
|
|
|
|
TAILQ_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) {
|
|
|
|
if (!TAILQ_NEXT(vp, v_mntvnodes)) {
|
|
|
|
(*pr)(" %p", vp);
|
|
|
|
} else if ((++cnt % 6) == 0) {
|
|
|
|
(*pr)(" %p,\n\t", vp);
|
|
|
|
} else {
|
|
|
|
(*pr)(" %p,", vp);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
(*pr)("\n", vp);
|
|
|
|
}
|
|
|
|
}
|
2009-05-03 20:52:54 +04:00
|
|
|
#endif /* DDB || DEBUGPRINT */
|
2009-04-20 22:06:26 +04:00
|
|
|
|
2009-10-06 08:28:10 +04:00
|
|
|
/*
|
|
|
|
* Check if a device pointed to by vp is mounted.
|
|
|
|
*
|
|
|
|
* Returns:
|
|
|
|
* EINVAL if it's not a disk
|
|
|
|
* EBUSY if it's a disk and mounted
|
|
|
|
* 0 if it's a disk and not mounted
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
rawdev_mounted(struct vnode *vp, struct vnode **bvpp)
|
|
|
|
{
|
|
|
|
struct vnode *bvp;
|
|
|
|
dev_t dev;
|
|
|
|
int d_type;
|
|
|
|
|
|
|
|
bvp = NULL;
|
|
|
|
dev = vp->v_rdev;
|
|
|
|
d_type = D_OTHER;
|
|
|
|
|
|
|
|
if (iskmemvp(vp))
|
|
|
|
return EINVAL;
|
|
|
|
|
|
|
|
switch (vp->v_type) {
|
|
|
|
case VCHR: {
|
|
|
|
const struct cdevsw *cdev;
|
|
|
|
|
|
|
|
cdev = cdevsw_lookup(dev);
|
|
|
|
if (cdev != NULL) {
|
|
|
|
dev_t blkdev;
|
|
|
|
|
|
|
|
blkdev = devsw_chr2blk(dev);
|
|
|
|
if (blkdev != NODEV) {
|
|
|
|
vfinddev(blkdev, VBLK, &bvp);
|
|
|
|
if (bvp != NULL)
|
|
|
|
d_type = (cdev->d_flag & D_TYPEMASK);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
case VBLK: {
|
|
|
|
const struct bdevsw *bdev;
|
|
|
|
|
|
|
|
bdev = bdevsw_lookup(dev);
|
|
|
|
if (bdev != NULL)
|
|
|
|
d_type = (bdev->d_flag & D_TYPEMASK);
|
|
|
|
|
|
|
|
bvp = vp;
|
|
|
|
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (d_type != D_DISK)
|
|
|
|
return EINVAL;
|
|
|
|
|
|
|
|
if (bvpp != NULL)
|
|
|
|
*bvpp = bvp;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* XXX: This is bogus. We should be failing the request
|
|
|
|
* XXX: not only if this specific slice is mounted, but
|
|
|
|
* XXX: if it's on a disk with any other mounted slice.
|
|
|
|
*/
|
|
|
|
if (vfs_mountedon(bvp))
|
|
|
|
return EBUSY;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|