NetBSD/sys/compat/common/vfs_syscalls_20.c
ad 42d0626726 PR kern/38141 lookup/vfs_busy acquire rwlock recursively
Simplify the mount locking. Remove all the crud to deal with recursion on
the mount lock, and crud to deal with unmount as another weirdo lock.

Hopefully this will once and for all fix the deadlocks with this. With this
commit there are two locks on each mount:

- krwlock_t mnt_unmounting. This is used to prevent unmount across critical
  sections like getnewvnode(). It's only ever read locked with rw_tryenter(),
  and is only ever write locked in dounmount(). A write hold can't be taken
  on this lock if the current LWP could hold a vnode lock.

- kmutex_t mnt_updating. This is taken by threads updating the mount, for
  example when going r/o -> r/w, and is only present to serialize updates.
  In order to take this lock, a read hold must first be taken on
  mnt_unmounting, and the two need to be held across the operation.

One effect of this change: previously if an unmount failed, we would make a
half hearted attempt to back out of it gracefully, but that was unlikely to
work in a lot of cases. Now while an unmount that will be aborted is in
progress, new file operations within the mount will fail instead of being
delayed. That is unlikely to be a problem though, because if the admin
requests unmount of a file system then s(he) has made a decision to deny
access to the resource.
2008-05-06 18:43:44 +00:00

302 lines
8.2 KiB
C

/* $NetBSD: vfs_syscalls_20.c,v 1.30 2008/05/06 18:43:44 ad Exp $ */
/*
* Copyright (c) 1989, 1993
* The Regents of the University of California. All rights reserved.
* (c) UNIX System Laboratories, Inc.
* All or some portions of this file are derived from material licensed
* to the University of California by American Telephone and Telegraph
* Co. or Unix System Laboratories, Inc. and are reproduced herein with
* the permission of UNIX System Laboratories, Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)vfs_syscalls.c 8.42 (Berkeley) 7/31/95
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: vfs_syscalls_20.c,v 1.30 2008/05/06 18:43:44 ad Exp $");
#include "opt_compat_netbsd.h"
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/namei.h>
#include <sys/filedesc.h>
#include <sys/kernel.h>
#include <sys/file.h>
#include <sys/stat.h>
#include <sys/vnode.h>
#include <sys/mount.h>
#include <sys/proc.h>
#include <sys/uio.h>
#include <sys/malloc.h>
#include <sys/dirent.h>
#include <sys/sysctl.h>
#include <sys/syscallargs.h>
#include <sys/kauth.h>
#include <compat/sys/mount.h>
#ifdef COMPAT_09
#define MOUNTNO_NONE 0
#define MOUNTNO_UFS 1 /* UNIX "Fast" Filesystem */
#define MOUNTNO_NFS 2 /* Network Filesystem */
#define MOUNTNO_MFS 3 /* Memory Filesystem */
#define MOUNTNO_MSDOS 4 /* MSDOS Filesystem */
#define MOUNTNO_CD9660 5 /* iso9660 cdrom */
#define MOUNTNO_FDESC 6 /* /dev/fd filesystem */
#define MOUNTNO_KERNFS 7 /* kernel variable filesystem */
#define MOUNTNO_DEVFS 8 /* device node filesystem */
#define MOUNTNO_AFS 9 /* AFS 3.x */
static const struct {
const char *name;
const int value;
} nv[] = {
{ MOUNT_UFS, MOUNTNO_UFS },
{ MOUNT_NFS, MOUNTNO_NFS },
{ MOUNT_MFS, MOUNTNO_MFS },
{ MOUNT_MSDOS, MOUNTNO_MSDOS },
{ MOUNT_CD9660, MOUNTNO_CD9660 },
{ MOUNT_FDESC, MOUNTNO_FDESC },
{ MOUNT_KERNFS, MOUNTNO_KERNFS },
{ MOUNT_AFS, MOUNTNO_AFS },
};
#endif
static int
vfs2fs(struct statfs12 *bfs, const struct statvfs *fs)
{
struct statfs12 ofs;
#ifdef COMPAT_09
int i = 0;
ofs.f_type = 0;
ofs.f_oflags = (short)fs->f_flag;
for (i = 0; i < sizeof(nv) / sizeof(nv[0]); i++) {
if (strcmp(nv[i].name, fs->f_fstypename) == 0) {
ofs.f_type = nv[i].value;
break;
}
}
#else
ofs.f_type = 0;
#endif
#define CLAMP(a) (long)(((a) & ~LONG_MAX) ? LONG_MAX : (a))
ofs.f_bsize = CLAMP(fs->f_frsize);
ofs.f_iosize = CLAMP(fs->f_iosize);
ofs.f_blocks = CLAMP(fs->f_blocks);
ofs.f_bfree = CLAMP(fs->f_bfree);
if (fs->f_bfree > fs->f_bresvd)
ofs.f_bavail = CLAMP(fs->f_bfree - fs->f_bresvd);
else
ofs.f_bavail = -CLAMP(fs->f_bresvd - fs->f_bfree);
ofs.f_files = CLAMP(fs->f_files);
ofs.f_ffree = CLAMP(fs->f_ffree);
ofs.f_fsid = fs->f_fsidx;
ofs.f_owner = fs->f_owner;
ofs.f_flags = (long)fs->f_flag;
ofs.f_syncwrites = CLAMP(fs->f_syncwrites);
ofs.f_asyncwrites = CLAMP(fs->f_asyncwrites);
(void)strncpy(ofs.f_fstypename, fs->f_fstypename,
sizeof(ofs.f_fstypename));
(void)strncpy(ofs.f_mntonname, fs->f_mntonname,
sizeof(ofs.f_mntonname));
(void)strncpy(ofs.f_mntfromname, fs->f_mntfromname,
sizeof(ofs.f_mntfromname));
return copyout(&ofs, bfs, sizeof(ofs));
}
/*
* Get filesystem statistics.
*/
/* ARGSUSED */
int
compat_20_sys_statfs(struct lwp *l, const struct compat_20_sys_statfs_args *uap, register_t *retval)
{
/* {
syscallarg(const char *) path;
syscallarg(struct statfs12 *) buf;
} */
struct mount *mp;
struct statvfs *sbuf;
int error = 0;
struct nameidata nd;
NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE,
SCARG(uap, path));
if ((error = namei(&nd)) != 0)
return error;
mp = nd.ni_vp->v_mount;
sbuf = malloc(sizeof(*sbuf), M_TEMP, M_WAITOK);
if ((error = dostatvfs(mp, sbuf, l, 0, 1)) != 0)
goto done;
error = vfs2fs(SCARG(uap, buf), sbuf);
done:
vrele(nd.ni_vp);
free(sbuf, M_TEMP);
return error;
}
/*
* Get filesystem statistics.
*/
/* ARGSUSED */
int
compat_20_sys_fstatfs(struct lwp *l, const struct compat_20_sys_fstatfs_args *uap, register_t *retval)
{
/* {
syscallarg(int) fd;
syscallarg(struct statfs12 *) buf;
} */
struct file *fp;
struct mount *mp;
struct statvfs *sbuf;
int error;
/* getvnode() will use the descriptor for us */
if ((error = getvnode(SCARG(uap, fd), &fp)) != 0)
return (error);
mp = ((struct vnode *)fp->f_data)->v_mount;
sbuf = malloc(sizeof(*sbuf), M_TEMP, M_WAITOK);
if ((error = dostatvfs(mp, sbuf, l, 0, 1)) != 0)
goto out;
error = vfs2fs(SCARG(uap, buf), sbuf);
out:
fd_putfile(SCARG(uap, fd));
free(sbuf, M_TEMP);
return error;
}
/*
* Get statistics on all filesystems.
*/
int
compat_20_sys_getfsstat(struct lwp *l, const struct compat_20_sys_getfsstat_args *uap, register_t *retval)
{
/* {
syscallarg(struct statfs12 *) buf;
syscallarg(long) bufsize;
syscallarg(int) flags;
} */
int root = 0;
struct mount *mp, *nmp;
struct statvfs *sbuf;
struct statfs12 *sfsp;
size_t count, maxcount;
int error = 0;
sbuf = malloc(sizeof(*sbuf), M_TEMP, M_WAITOK);
maxcount = (size_t)SCARG(uap, bufsize) / sizeof(struct statfs12);
sfsp = SCARG(uap, buf);
mutex_enter(&mountlist_lock);
count = 0;
for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist;
mp = nmp) {
if (vfs_busy(mp, &nmp)) {
continue;
}
if (sfsp && count < maxcount) {
error = dostatvfs(mp, sbuf, l, SCARG(uap, flags), 0);
if (error) {
vfs_unbusy(mp, false, &nmp);
continue;
}
error = vfs2fs(sfsp, sbuf);
if (error) {
vfs_unbusy(mp, false, NULL);
goto out;
}
sfsp++;
root |= strcmp(sbuf->f_mntonname, "/") == 0;
}
count++;
vfs_unbusy(mp, false, &nmp);
}
mutex_exit(&mountlist_lock);
if (root == 0 && l->l_proc->p_cwdi->cwdi_rdir) {
/*
* fake a root entry
*/
if ((error = dostatvfs(l->l_proc->p_cwdi->cwdi_rdir->v_mount,
sbuf, l, SCARG(uap, flags), 1)) != 0)
goto out;
if (sfsp)
error = vfs2fs(sfsp, sbuf);
count++;
}
if (sfsp && count > maxcount)
*retval = maxcount;
else
*retval = count;
out:
free(sbuf, M_TEMP);
return error;
}
int
compat_20_sys_fhstatfs(struct lwp *l, const struct compat_20_sys_fhstatfs_args *uap, register_t *retval)
{
/* {
syscallarg(const struct compat_30_fhandle *) fhp;
syscallarg(struct statfs12 *) buf;
} */
struct statvfs *sbuf;
struct compat_30_fhandle fh;
struct mount *mp;
struct vnode *vp;
int error;
/*
* Must be super user
*/
if ((error = kauth_authorize_system(l->l_cred,
KAUTH_SYSTEM_FILEHANDLE, 0, NULL, NULL, NULL)))
return (error);
if ((error = copyin(SCARG(uap, fhp), &fh, sizeof(fh))) != 0)
return (error);
if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
return (ESTALE);
if ((error = VFS_FHTOVP(mp, (struct fid*)&fh.fh_fid, &vp)))
return (error);
mp = vp->v_mount;
VOP_UNLOCK(vp, 0);
sbuf = malloc(sizeof(*sbuf), M_TEMP, M_WAITOK);
if ((error = VFS_STATVFS(mp, sbuf)) != 0)
goto out;
error = vfs2fs(SCARG(uap, buf), sbuf);
out:
vrele(vp);
free(sbuf, M_TEMP);
return error;
}