NetBSD/sys/compat/common/vfs_syscalls_43.c
thorpej 80cc38a1af Fix a partial construction problem that can cause race conditions
between creation of a file descriptor and close(2) when using kernel
assisted threads.  What we do is stick descriptors in the table, but
mark them as "larval".  This causes essentially everything to treat
it as a non-existent descriptor, except for fdalloc(), which sees a
filled slot so that it won't (incorrectly) allocate it again.  When
a descriptor is fully constructed, the code that has constructed it
marks it as "mature" (which actually clears the "larval" flag), and
things continue to work as normal.

While here, gather all the code that gets a descriptor from the table
into a fd_getfile() function, and call it, rather than having the
same (sometimes incorrect) code copied all over the place.
2001-06-14 20:32:41 +00:00

529 lines
12 KiB
C

/* $NetBSD: vfs_syscalls_43.c,v 1.19 2001/06/14 20:32:41 thorpej Exp $ */
/*
* Copyright (c) 1989, 1993
* The Regents of the University of California. All rights reserved.
* (c) UNIX System Laboratories, Inc.
* All or some portions of this file are derived from material licensed
* to the University of California by American Telephone and Telegraph
* Co. or Unix System Laboratories, Inc. and are reproduced herein with
* the permission of UNIX System Laboratories, Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)vfs_syscalls.c 8.28 (Berkeley) 12/10/94
*/
#if defined(_KERNEL_OPT)
#include "fs_union.h"
#endif
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/filedesc.h>
#include <sys/kernel.h>
#include <sys/proc.h>
#include <sys/file.h>
#include <sys/vnode.h>
#include <sys/namei.h>
#include <sys/dirent.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/stat.h>
#include <sys/ioctl.h>
#include <sys/fcntl.h>
#include <sys/malloc.h>
#include <sys/syslog.h>
#include <sys/unistd.h>
#include <sys/resourcevar.h>
#include <sys/mount.h>
#include <sys/syscallargs.h>
static void cvtstat __P((struct stat *, struct stat43 *));
/*
* Convert from an old to a new stat structure.
*/
static void
cvtstat(st, ost)
struct stat *st;
struct stat43 *ost;
{
ost->st_dev = st->st_dev;
ost->st_ino = st->st_ino;
ost->st_mode = st->st_mode & 0xffff;
ost->st_nlink = st->st_nlink;
ost->st_uid = st->st_uid;
ost->st_gid = st->st_gid;
ost->st_rdev = st->st_rdev;
if (st->st_size < (quad_t)1 << 32)
ost->st_size = st->st_size;
else
ost->st_size = -2;
ost->st_atime = st->st_atime;
ost->st_mtime = st->st_mtime;
ost->st_ctime = st->st_ctime;
ost->st_blksize = st->st_blksize;
ost->st_blocks = st->st_blocks;
ost->st_flags = st->st_flags;
ost->st_gen = st->st_gen;
}
/*
* Get file status; this version follows links.
*/
/* ARGSUSED */
int
compat_43_sys_stat(p, v, retval)
struct proc *p;
void *v;
register_t *retval;
{
struct compat_43_sys_stat_args /* {
syscallarg(char *) path;
syscallarg(struct stat43 *) ub;
} */ *uap = v;
struct stat sb;
struct stat43 osb;
int error;
struct nameidata nd;
NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
SCARG(uap, path), p);
if ((error = namei(&nd)) != 0)
return (error);
error = vn_stat(nd.ni_vp, &sb, p);
vput(nd.ni_vp);
if (error)
return (error);
cvtstat(&sb, &osb);
error = copyout((caddr_t)&osb, (caddr_t)SCARG(uap, ub), sizeof (osb));
return (error);
}
/*
* Get file status; this version does not follow links.
*/
/* ARGSUSED */
int
compat_43_sys_lstat(p, v, retval)
struct proc *p;
void *v;
register_t *retval;
{
struct compat_43_sys_lstat_args /* {
syscallarg(char *) path;
syscallarg(struct ostat *) ub;
} */ *uap = v;
struct vnode *vp, *dvp;
struct stat sb, sb1;
struct stat43 osb;
int error;
struct nameidata nd;
int ndflags;
ndflags = NOFOLLOW | LOCKLEAF | LOCKPARENT;
again:
NDINIT(&nd, LOOKUP, ndflags, UIO_USERSPACE, SCARG(uap, path), p);
if ((error = namei(&nd))) {
if (error == EISDIR && (ndflags & LOCKPARENT) != 0) {
/*
* Should only happen on '/'. Retry without LOCKPARENT;
* this is safe since the vnode won't be a VLNK.
*/
ndflags &= ~LOCKPARENT;
goto again;
}
return (error);
}
/*
* For symbolic links, always return the attributes of its
* containing directory, except for mode, size, and links.
*/
vp = nd.ni_vp;
dvp = nd.ni_dvp;
if (vp->v_type != VLNK) {
if ((ndflags & LOCKPARENT) != 0) {
if (dvp == vp)
vrele(dvp);
else
vput(dvp);
}
error = vn_stat(vp, &sb, p);
vput(vp);
if (error)
return (error);
} else {
error = vn_stat(dvp, &sb, p);
vput(dvp);
if (error) {
vput(vp);
return (error);
}
error = vn_stat(vp, &sb1, p);
vput(vp);
if (error)
return (error);
sb.st_mode &= ~S_IFDIR;
sb.st_mode |= S_IFLNK;
sb.st_nlink = sb1.st_nlink;
sb.st_size = sb1.st_size;
sb.st_blocks = sb1.st_blocks;
}
cvtstat(&sb, &osb);
error = copyout((caddr_t)&osb, (caddr_t)SCARG(uap, ub), sizeof (osb));
return (error);
}
/*
* Return status information about a file descriptor.
*/
/* ARGSUSED */
int
compat_43_sys_fstat(p, v, retval)
struct proc *p;
void *v;
register_t *retval;
{
struct compat_43_sys_fstat_args /* {
syscallarg(int) fd;
syscallarg(struct stat43 *) sb;
} */ *uap = v;
int fd = SCARG(uap, fd);
struct filedesc *fdp = p->p_fd;
struct file *fp;
struct stat ub;
struct stat43 oub;
int error;
if ((fp = fd_getfile(fdp, fd)) == NULL)
return (EBADF);
FILE_USE(fp);
error = (*fp->f_ops->fo_stat)(fp, &ub, p);
FILE_UNUSE(fp, p);
if (error == 0) {
cvtstat(&ub, &oub);
error = copyout((caddr_t)&oub, (caddr_t)SCARG(uap, sb),
sizeof (oub));
}
return (error);
}
/*
* Truncate a file given a file descriptor.
*/
/* ARGSUSED */
int
compat_43_sys_ftruncate(p, v, retval)
struct proc *p;
void *v;
register_t *retval;
{
struct compat_43_sys_ftruncate_args /* {
syscallarg(int) fd;
syscallarg(long) length;
} */ *uap = v;
struct sys_ftruncate_args /* {
syscallarg(int) fd;
syscallarg(int) pad;
syscallarg(off_t) length;
} */ nuap;
SCARG(&nuap, fd) = SCARG(uap, fd);
SCARG(&nuap, length) = SCARG(uap, length);
return (sys_ftruncate(p, &nuap, retval));
}
/*
* Truncate a file given its path name.
*/
/* ARGSUSED */
int
compat_43_sys_truncate(p, v, retval)
struct proc *p;
void *v;
register_t *retval;
{
struct compat_43_sys_truncate_args /* {
syscallarg(char *) path;
syscallarg(long) length;
} */ *uap = v;
struct sys_truncate_args /* {
syscallarg(char *) path;
syscallarg(int) pad;
syscallarg(off_t) length;
} */ nuap;
SCARG(&nuap, path) = SCARG(uap, path);
SCARG(&nuap, length) = SCARG(uap, length);
return (sys_truncate(p, &nuap, retval));
}
/*
* Reposition read/write file offset.
*/
int
compat_43_sys_lseek(p, v, retval)
struct proc *p;
void *v;
register_t *retval;
{
struct compat_43_sys_lseek_args /* {
syscallarg(int) fd;
syscallarg(long) offset;
syscallarg(int) whence;
} */ *uap = v;
struct sys_lseek_args /* {
syscallarg(int) fd;
syscallarg(int) pad;
syscallarg(off_t) offset;
syscallarg(int) whence;
} */ nuap;
off_t qret;
int error;
SCARG(&nuap, fd) = SCARG(uap, fd);
SCARG(&nuap, offset) = SCARG(uap, offset);
SCARG(&nuap, whence) = SCARG(uap, whence);
error = sys_lseek(p, &nuap, (register_t *)&qret);
*(long *)retval = qret;
return (error);
}
/*
* Create a file.
*/
int
compat_43_sys_creat(p, v, retval)
struct proc *p;
void *v;
register_t *retval;
{
struct compat_43_sys_creat_args /* {
syscallarg(char *) path;
syscallarg(int) mode;
} */ *uap = v;
struct sys_open_args /* {
syscallarg(char *) path;
syscallarg(int) flags;
syscallarg(int) mode;
} */ nuap;
SCARG(&nuap, path) = SCARG(uap, path);
SCARG(&nuap, mode) = SCARG(uap, mode);
SCARG(&nuap, flags) = O_WRONLY | O_CREAT | O_TRUNC;
return (sys_open(p, &nuap, retval));
}
/*ARGSUSED*/
int
compat_43_sys_quota(p, v, retval)
struct proc *p;
void *v;
register_t *retval;
{
return (ENOSYS);
}
/*
* Read a block of directory entries in a file system independent format.
*/
int
compat_43_sys_getdirentries(p, v, retval)
struct proc *p;
void *v;
register_t *retval;
{
struct compat_43_sys_getdirentries_args /* {
syscallarg(int) fd;
syscallarg(char *) buf;
syscallarg(u_int) count;
syscallarg(long *) basep;
} */ *uap = v;
struct vnode *vp;
struct file *fp;
struct uio auio, kuio;
struct iovec aiov, kiov;
struct dirent *dp, *edp;
caddr_t dirbuf;
int error, eofflag, readcnt;
long loff;
/* getvnode() will use the descriptor for us */
if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
return (error);
if ((fp->f_flag & FREAD) == 0) {
error = EBADF;
goto out;
}
vp = (struct vnode *)fp->f_data;
unionread:
if (vp->v_type != VDIR) {
error = EINVAL;
goto out;
}
aiov.iov_base = SCARG(uap, buf);
aiov.iov_len = SCARG(uap, count);
auio.uio_iov = &aiov;
auio.uio_iovcnt = 1;
auio.uio_rw = UIO_READ;
auio.uio_segflg = UIO_USERSPACE;
auio.uio_procp = p;
auio.uio_resid = SCARG(uap, count);
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
loff = auio.uio_offset = fp->f_offset;
# if (BYTE_ORDER != LITTLE_ENDIAN)
if (vp->v_mount->mnt_maxsymlinklen <= 0) {
error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag,
(off_t **)0, (int *)0);
fp->f_offset = auio.uio_offset;
} else
# endif
{
kuio = auio;
kuio.uio_iov = &kiov;
kuio.uio_segflg = UIO_SYSSPACE;
kiov.iov_len = SCARG(uap, count);
MALLOC(dirbuf, caddr_t, SCARG(uap, count), M_TEMP, M_WAITOK);
kiov.iov_base = dirbuf;
error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag,
(off_t **)0, (int *)0);
fp->f_offset = kuio.uio_offset;
if (error == 0) {
readcnt = SCARG(uap, count) - kuio.uio_resid;
edp = (struct dirent *)&dirbuf[readcnt];
for (dp = (struct dirent *)dirbuf; dp < edp; ) {
# if (BYTE_ORDER == LITTLE_ENDIAN)
/*
* The expected low byte of
* dp->d_namlen is our dp->d_type.
* The high MBZ byte of dp->d_namlen
* is our dp->d_namlen.
*/
dp->d_type = dp->d_namlen;
dp->d_namlen = 0;
# else
/*
* The dp->d_type is the high byte
* of the expected dp->d_namlen,
* so must be zero'ed.
*/
dp->d_type = 0;
# endif
if (dp->d_reclen > 0) {
dp = (struct dirent *)
((char *)dp + dp->d_reclen);
} else {
error = EIO;
break;
}
}
if (dp >= edp)
error = uiomove(dirbuf, readcnt, &auio);
}
FREE(dirbuf, M_TEMP);
}
VOP_UNLOCK(vp, 0);
if (error)
goto out;
#ifdef UNION
{
extern int (**union_vnodeop_p) __P((void *));
extern struct vnode *union_dircache __P((struct vnode *));
if ((SCARG(uap, count) == auio.uio_resid) &&
(vp->v_op == union_vnodeop_p)) {
struct vnode *lvp;
lvp = union_dircache(vp);
if (lvp != NULLVP) {
struct vattr va;
/*
* If the directory is opaque,
* then don't show lower entries
*/
error = VOP_GETATTR(vp, &va, fp->f_cred, p);
if (va.va_flags & OPAQUE) {
vput(lvp);
lvp = NULL;
}
}
if (lvp != NULLVP) {
error = VOP_OPEN(lvp, FREAD, fp->f_cred, p);
VOP_UNLOCK(lvp, 0);
if (error) {
vrele(lvp);
goto out;
}
fp->f_data = (caddr_t) lvp;
fp->f_offset = 0;
error = vn_close(vp, FREAD, fp->f_cred, p);
if (error)
goto out;
vp = lvp;
goto unionread;
}
}
}
#endif /* UNION */
if ((SCARG(uap, count) == auio.uio_resid) &&
(vp->v_flag & VROOT) &&
(vp->v_mount->mnt_flag & MNT_UNION)) {
struct vnode *tvp = vp;
vp = vp->v_mount->mnt_vnodecovered;
VREF(vp);
fp->f_data = (caddr_t) vp;
fp->f_offset = 0;
vrele(tvp);
goto unionread;
}
error = copyout((caddr_t)&loff, (caddr_t)SCARG(uap, basep),
sizeof(long));
*retval = SCARG(uap, count) - auio.uio_resid;
out:
FILE_UNUSE(fp, p);
return (error);
}