NetBSD/sys/ufs/mfs/mfs_vfsops.c

445 lines
11 KiB
C
Raw Normal View History

/* $NetBSD: mfs_vfsops.c,v 1.102 2010/03/02 17:20:02 pooka Exp $ */
/*
* Copyright (c) 1989, 1990, 1993, 1994
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
1998-03-01 05:20:01 +03:00
* @(#)mfs_vfsops.c 8.11 (Berkeley) 6/19/95
*/
2001-11-08 05:39:06 +03:00
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: mfs_vfsops.c,v 1.102 2010/03/02 17:20:02 pooka Exp $");
2001-11-08 05:39:06 +03:00
2001-05-30 15:57:16 +04:00
#if defined(_KERNEL_OPT)
#include "opt_compat_netbsd.h"
#endif
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/sysctl.h>
#include <sys/time.h>
#include <sys/kernel.h>
#include <sys/proc.h>
#include <sys/buf.h>
#include <sys/bufq.h>
#include <sys/mount.h>
#include <sys/signalvar.h>
#include <sys/vnode.h>
#include <sys/kmem.h>
#include <sys/module.h>
#include <miscfs/genfs/genfs.h>
#include <miscfs/specfs/specdev.h>
#include <ufs/ufs/quota.h>
#include <ufs/ufs/inode.h>
#include <ufs/ufs/ufsmount.h>
#include <ufs/ufs/ufs_extern.h>
#include <ufs/ffs/fs.h>
#include <ufs/ffs/ffs_extern.h>
#include <ufs/mfs/mfsnode.h>
#include <ufs/mfs/mfs_extern.h>
2008-11-13 14:10:41 +03:00
MODULE(MODULE_CLASS_VFS, mfs, "ffs");
kmutex_t mfs_lock; /* global lock */
/* used for building internal dev_t, minor == 0 reserved for miniroot */
static int mfs_minor = 1;
static int mfs_initcnt;
extern int (**mfs_vnodeop_p)(void *);
static struct sysctllog *mfs_sysctl_log;
/*
* mfs vfs operations.
*/
2005-02-27 01:31:44 +03:00
extern const struct vnodeopv_desc mfs_vnodeop_opv_desc;
const struct vnodeopv_desc * const mfs_vnodeopv_descs[] = {
&mfs_vnodeop_opv_desc,
NULL,
};
struct vfsops mfs_vfsops = {
MOUNT_MFS,
sizeof (struct mfs_args),
mfs_mount,
mfs_start,
ffs_unmount,
ufs_root,
ufs_quotactl,
mfs_statvfs,
ffs_sync,
ffs_vget,
ffs_fhtovp,
ffs_vptofh,
mfs_init,
mfs_reinit,
mfs_done,
NULL,
(int (*)(struct mount *, struct vnode *, struct timespec *)) eopnotsupp,
vfs_stdextattrctl,
(void *)eopnotsupp, /* vfs_suspendctl */
genfs_renamelock_enter,
genfs_renamelock_exit,
(void *)eopnotsupp,
mfs_vnodeopv_descs,
2006-09-02 11:04:01 +04:00
0,
{ NULL, NULL },
};
static int
mfs_modcmd(modcmd_t cmd, void *arg)
{
int error;
switch (cmd) {
case MODULE_CMD_INIT:
error = vfs_attach(&mfs_vfsops);
if (error != 0)
break;
sysctl_createv(&mfs_sysctl_log, 0, NULL, NULL,
CTLFLAG_PERMANENT,
CTLTYPE_NODE, "vfs", NULL,
NULL, 0, NULL, 0,
CTL_VFS, CTL_EOL);
sysctl_createv(&mfs_sysctl_log, 0, NULL, NULL,
CTLFLAG_PERMANENT|CTLFLAG_ALIAS,
CTLTYPE_NODE, "mfs",
SYSCTL_DESCR("Memory based file system"),
NULL, 1, NULL, 0,
CTL_VFS, 3, CTL_EOL);
/*
* XXX the "1" and the "3" above could be dynamic, thereby
* eliminating one more instance of the "number to vfs"
* mapping problem, but they are in order as taken from
* sys/mount.h
*/
break;
case MODULE_CMD_FINI:
error = vfs_detach(&mfs_vfsops);
if (error != 0)
break;
sysctl_teardown(&mfs_sysctl_log);
break;
default:
error = ENOTTY;
break;
}
return (error);
}
2005-02-27 01:31:44 +03:00
/*
1998-03-01 05:20:01 +03:00
* Memory based filesystem initialization.
2005-02-27 01:31:44 +03:00
*/
1998-03-01 05:20:01 +03:00
void
mfs_init(void)
1998-03-01 05:20:01 +03:00
{
if (mfs_initcnt++ == 0) {
mutex_init(&mfs_lock, MUTEX_DEFAULT, IPL_NONE);
ffs_init();
}
1998-03-01 05:20:01 +03:00
}
void
mfs_reinit(void)
{
ffs_reinit();
}
void
mfs_done(void)
{
if (--mfs_initcnt == 0) {
ffs_done();
mutex_destroy(&mfs_lock);
}
}
1998-03-01 05:20:01 +03:00
/*
* Called by main() when mfs is going to be mounted as root.
*/
1996-02-10 01:31:27 +03:00
int
mfs_mountroot(void)
{
1998-03-01 05:20:01 +03:00
struct fs *fs;
struct mount *mp;
2005-12-11 15:16:03 +03:00
struct lwp *l = curlwp; /* XXX */
struct ufsmount *ump;
struct mfsnode *mfsp;
1998-03-01 05:20:01 +03:00
int error = 0;
if ((error = vfs_rootmountalloc(MOUNT_MFS, "mfs_root", &mp))) {
vrele(rootvp);
1998-03-01 05:20:01 +03:00
return (error);
}
mfsp = kmem_alloc(sizeof(*mfsp), KM_SLEEP);
rootvp->v_data = mfsp;
rootvp->v_op = mfs_vnodeop_p;
rootvp->v_tag = VT_MFS;
mfsp->mfs_baseoff = mfs_rootbase;
mfsp->mfs_size = mfs_rootsize;
mfsp->mfs_vnode = rootvp;
mfsp->mfs_proc = NULL; /* indicate kernel space */
mfsp->mfs_shutdown = 0;
cv_init(&mfsp->mfs_cv, "mfs");
mfsp->mfs_refcnt = 1;
bufq_alloc(&mfsp->mfs_buflist, "fcfs", 0);
2005-12-11 15:16:03 +03:00
if ((error = ffs_mountfs(rootvp, mp, l)) != 0) {
vfs_unbusy(mp, false, NULL);
bufq_free(mfsp->mfs_buflist);
vfs_destroy(mp);
kmem_free(mfsp, sizeof(*mfsp));
return (error);
2005-02-27 01:31:44 +03:00
}
mutex_enter(&mountlist_lock);
CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list);
mutex_exit(&mountlist_lock);
mp->mnt_vnodecovered = NULLVP;
ump = VFSTOUFS(mp);
fs = ump->um_fs;
1998-03-01 05:20:01 +03:00
(void) copystr(mp->mnt_stat.f_mntonname, fs->fs_fsmnt, MNAMELEN - 1, 0);
(void)ffs_statvfs(mp, &mp->mnt_stat);
vfs_unbusy(mp, false, NULL);
return (0);
}
/*
* VFS Operations.
*
* mount system call
*/
/* ARGSUSED */
int
mfs_mount(struct mount *mp, const char *path, void *data, size_t *data_len)
{
struct lwp *l = curlwp;
struct vnode *devvp;
struct mfs_args *args = data;
struct ufsmount *ump;
2000-03-30 16:41:09 +04:00
struct fs *fs;
struct mfsnode *mfsp;
2005-12-11 15:16:03 +03:00
struct proc *p;
int flags, error = 0;
if (*data_len < sizeof *args)
return EINVAL;
2005-12-11 15:16:03 +03:00
p = l->l_proc;
2002-09-21 22:10:34 +04:00
if (mp->mnt_flag & MNT_GETARGS) {
struct vnode *vp;
ump = VFSTOUFS(mp);
if (ump == NULL)
return EIO;
vp = ump->um_devvp;
if (vp == NULL)
return EIO;
mfsp = VTOMFS(vp);
if (mfsp == NULL)
return EIO;
args->fspec = NULL;
args->base = mfsp->mfs_baseoff;
args->size = mfsp->mfs_size;
*data_len = sizeof *args;
return 0;
2002-09-21 22:10:34 +04:00
}
/*
* XXX turn off async to avoid hangs when writing lots of data.
* the problem is that MFS needs to allocate pages to clean pages,
* so if we wait until the last minute to clean pages then there
* may not be any pages available to do the cleaning.
* ... and since the default partially-synchronous mode turns out
* to not be sufficient under heavy load, make it full synchronous.
*/
mp->mnt_flag &= ~MNT_ASYNC;
mp->mnt_flag |= MNT_SYNCHRONOUS;
/*
* If updating, check whether changing from read-only to
* read/write; if there is no device name, that's all we do.
*/
if (mp->mnt_flag & MNT_UPDATE) {
ump = VFSTOUFS(mp);
fs = ump->um_fs;
if (fs->fs_ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) {
flags = WRITECLOSE;
if (mp->mnt_flag & MNT_FORCE)
flags |= FORCECLOSE;
2005-12-11 15:16:03 +03:00
error = ffs_flushfiles(mp, flags, l);
if (error)
return (error);
}
if (fs->fs_ronly && (mp->mnt_iflag & IMNT_WANTRDWR))
fs->fs_ronly = 0;
if (args->fspec == NULL)
Apply the NFS exports list rototill patch: - Remove all NFS related stuff from file system specific code. - Drop the vfs_checkexp hook and generalize it in the new nfs_check_export function, thus removing redundancy from all file systems. - Move all NFS export-related stuff from kern/vfs_subr.c to the new file sys/nfs/nfs_export.c. The former was becoming large and its code is always compiled, regardless of the build options. Using the latter, the code is only compiled in when NFSSERVER is enabled. While doing this, also make some functions in nfs_subs.c conditional to NFSSERVER. - Add a new command in nfssvc(2), called NFSSVC_SETEXPORTSLIST, that takes a path and a set of export entries. At the moment it can only clear the exports list or append entries, one by one, but it is done in a way that allows setting the whole set of entries atomically in the future (see the comment in mountd_set_exports_list or in doc/TODO). - Change mountd(8) to use the nfssvc(2) system call instead of mount(2) so that it becomes file system agnostic. In fact, all this whole thing was done to remove a 'XXX' block from this utility! - Change the mount*, newfs and fsck* userland utilities to not deal with NFS exports initialization; done internally by the kernel when initializing the NFS support for each file system. - Implement an interface for VFS (called VFS hooks) so that several kernel subsystems can run arbitrary code upon receipt of specific VFS events. At the moment, this only provides support for unmount and is used to destroy NFS exports lists from the file systems being unmounted, though it has room for extension. Thanks go to yamt@, chs@, thorpej@, wrstuden@ and others for their comments and advice in the development of this patch.
2005-09-23 16:10:31 +04:00
return EINVAL;
return (0);
}
error = getnewvnode(VT_MFS, (struct mount *)0, mfs_vnodeop_p, &devvp);
if (error)
return (error);
devvp->v_vflag |= VV_MPSAFE;
devvp->v_type = VBLK;
spec_node_init(devvp, makedev(255, mfs_minor));
mfs_minor++;
mfsp = kmem_alloc(sizeof(*mfsp), KM_SLEEP);
devvp->v_data = mfsp;
mfsp->mfs_baseoff = args->base;
mfsp->mfs_size = args->size;
mfsp->mfs_vnode = devvp;
mfsp->mfs_proc = p;
mfsp->mfs_shutdown = 0;
cv_init(&mfsp->mfs_cv, "mfsidl");
mfsp->mfs_refcnt = 1;
bufq_alloc(&mfsp->mfs_buflist, "fcfs", 0);
2005-12-11 15:16:03 +03:00
if ((error = ffs_mountfs(devvp, mp, l)) != 0) {
mfsp->mfs_shutdown = 1;
vrele(devvp);
return (error);
}
ump = VFSTOUFS(mp);
fs = ump->um_fs;
error = set_statvfs_info(path, UIO_USERSPACE, args->fspec,
UIO_USERSPACE, mp->mnt_op->vfs_name, mp, l);
if (error)
return error;
(void)strncpy(fs->fs_fsmnt, mp->mnt_stat.f_mntonname,
sizeof(fs->fs_fsmnt));
fs->fs_fsmnt[sizeof(fs->fs_fsmnt) - 1] = '\0';
/* XXX: cleanup on error */
return 0;
}
/*
* Used to grab the process and keep it in the kernel to service
* memory filesystem I/O requests.
*
* Loop servicing I/O requests.
* Copy the requested data into or out of the memory filesystem
* address space.
*/
/* ARGSUSED */
int
mfs_start(struct mount *mp, int flags)
{
struct vnode *vp;
struct mfsnode *mfsp;
2007-02-10 00:55:00 +03:00
struct proc *p;
struct buf *bp;
void *base;
int sleepreturn = 0, refcnt, error;
2007-02-10 00:55:00 +03:00
ksiginfoq_t kq;
/*
* Ensure that file system is still mounted when getting mfsnode.
* Add a reference to the mfsnode to prevent it disappearing in
* this routine.
*/
if ((error = vfs_busy(mp, NULL)) != 0)
return error;
vp = VFSTOUFS(mp)->um_devvp;
mfsp = VTOMFS(vp);
mutex_enter(&mfs_lock);
mfsp->mfs_refcnt++;
mutex_exit(&mfs_lock);
vfs_unbusy(mp, false, NULL);
base = mfsp->mfs_baseoff;
mutex_enter(&mfs_lock);
while (mfsp->mfs_shutdown != 1) {
while ((bp = bufq_get(mfsp->mfs_buflist)) != NULL) {
mutex_exit(&mfs_lock);
mfs_doio(bp, base);
mutex_enter(&mfs_lock);
}
/*
* If a non-ignored signal is received, try to unmount.
1998-03-01 05:20:01 +03:00
* If that fails, or the filesystem is already in the
* process of being unmounted, clear the signal (it has been
* "processed"), otherwise we will loop here, as tsleep
* will always return EINTR/ERESTART.
*/
1998-03-01 05:20:01 +03:00
if (sleepreturn != 0) {
mutex_exit(&mfs_lock);
if (dounmount(mp, 0, curlwp) != 0) {
p = curproc;
2007-02-10 00:55:00 +03:00
ksiginfo_queue_init(&kq);
mutex_enter(p->p_lock);
2007-02-10 00:55:00 +03:00
sigclearall(p, NULL, &kq);
mutex_exit(p->p_lock);
2007-02-10 00:55:00 +03:00
ksiginfo_queue_drain(&kq);
}
1998-03-01 05:20:01 +03:00
sleepreturn = 0;
mutex_enter(&mfs_lock);
continue;
}
sleepreturn = cv_wait_sig(&mfsp->mfs_cv, &mfs_lock);
}
KASSERT(bufq_peek(mfsp->mfs_buflist) == NULL);
refcnt = --mfsp->mfs_refcnt;
mutex_exit(&mfs_lock);
if (refcnt == 0) {
bufq_free(mfsp->mfs_buflist);
cv_destroy(&mfsp->mfs_cv);
kmem_free(mfsp, sizeof(*mfsp));
}
1998-03-01 05:20:01 +03:00
return (sleepreturn);
}
/*
* Get file system statistics.
*/
1996-02-10 01:31:27 +03:00
int
mfs_statvfs(struct mount *mp, struct statvfs *sbp)
{
int error;
error = ffs_statvfs(mp, sbp);
if (error)
return error;
(void)strncpy(sbp->f_fstypename, mp->mnt_op->vfs_name,
sizeof(sbp->f_fstypename));
sbp->f_fstypename[sizeof(sbp->f_fstypename) - 1] = '\0';
return 0;
}