sys/kern: Allow custom fileops to specify fo_seek method.

Previously only vnodes allowed lseek/pread[v]/pwrite[v], which meant
converting a regular device to a cloning device doesn't always work.

Semantics is:

(*fp->f_ops->fo_seek)(fp, delta, whence, newoffp, flags)

1. Compute a new offset according to whence + delta -- that is, if
   whence is SEEK_CUR, add delta to fp->f_offset; if whence is
   SEEK_END, add delta to end of file; if whence is SEEK_CUR, use delta
   as is.

2. If newoffp is nonnull, return the new offset in *newoffp.

3. If flags & FOF_UPDATE_OFFSET, set fp->f_offset to the new offset.

Access to fp->f_offset, and *newoffp if newoffp = &fp->f_offset, must
happen under the object lock (e.g., vnode lock), in order to
synchronize fp->f_offset reads and writes.

This change has the side effect that every call to VOP_SEEK happens
under the vnode lock now, when previously it didn't.  However, from a
review of all the VOP_SEEK implementations, it does not appear that
any file system even examines the vnode, let alone locks it.  So I
think this is safe -- and essentially the only reasonable way to do
things, given that it is used to validate a change from oldoff to
newoff, and oldoff becomes stale the moment we unlock the vnode.

No kernel bump because this reuses a spare entry in struct fileops,
and it is safe for the entry to be null, so all existing fileops will
continue to work as before (rejecting seek).
This commit is contained in:
riastradh 2021-09-11 10:08:55 +00:00
parent 3c9047a5a9
commit 1f941f7217
5 changed files with 103 additions and 94 deletions

View File

@ -1,4 +1,4 @@
/* $NetBSD: netbsd32_fs.c,v 1.93 2021/02/16 14:47:20 simonb Exp $ */
/* $NetBSD: netbsd32_fs.c,v 1.94 2021/09/11 10:08:55 riastradh Exp $ */
/*
* Copyright (c) 1998, 2001 Matthew R. Green
@ -27,7 +27,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: netbsd32_fs.c,v 1.93 2021/02/16 14:47:20 simonb Exp $");
__KERNEL_RCSID(0, "$NetBSD: netbsd32_fs.c,v 1.94 2021/09/11 10:08:55 riastradh Exp $");
#include <sys/param.h>
#include <sys/systm.h>
@ -648,7 +648,6 @@ netbsd32_preadv(struct lwp *l, const struct netbsd32_preadv_args *uap, register_
syscallarg(netbsd32_off_t) offset;
} */
file_t *fp;
struct vnode *vp;
off_t offset;
int error, fd = SCARG(uap, fd);
@ -660,19 +659,14 @@ netbsd32_preadv(struct lwp *l, const struct netbsd32_preadv_args *uap, register_
return EBADF;
}
vp = fp->f_vnode;
if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
if (fp->f_ops->fo_seek == NULL) {
error = ESPIPE;
goto out;
}
offset = SCARG(uap, offset);
/*
* XXX This works because no file systems actually
* XXX take any action on the seek operation.
*/
if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0)
error = (*fp->f_ops->fo_seek)(fp, offset, SEEK_SET, &offset, 0);
if (error)
goto out;
return dofilereadv32(fd, fp, SCARG_P32(uap, iovp),
@ -694,7 +688,6 @@ netbsd32_pwritev(struct lwp *l, const struct netbsd32_pwritev_args *uap, registe
syscallarg(netbsd32_off_t) offset;
} */
file_t *fp;
struct vnode *vp;
off_t offset;
int error, fd = SCARG(uap, fd);
@ -706,19 +699,14 @@ netbsd32_pwritev(struct lwp *l, const struct netbsd32_pwritev_args *uap, registe
return EBADF;
}
vp = fp->f_vnode;
if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
if (fp->f_ops->fo_seek == NULL) {
error = ESPIPE;
goto out;
}
offset = SCARG(uap, offset);
/*
* XXX This works because no file systems actually
* XXX take any action on the seek operation.
*/
if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0)
error = (*fp->f_ops->fo_seek)(fp, offset, SEEK_SET, &offset, 0);
if (error)
goto out;
return dofilewritev32(fd, fp, SCARG_P32(uap, iovp),

View File

@ -1,4 +1,4 @@
/* $NetBSD: sys_generic.c,v 1.132 2020/05/23 23:42:43 ad Exp $ */
/* $NetBSD: sys_generic.c,v 1.133 2021/09/11 10:08:55 riastradh Exp $ */
/*-
* Copyright (c) 2007, 2008, 2009 The NetBSD Foundation, Inc.
@ -70,7 +70,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: sys_generic.c,v 1.132 2020/05/23 23:42:43 ad Exp $");
__KERNEL_RCSID(0, "$NetBSD: sys_generic.c,v 1.133 2021/09/11 10:08:55 riastradh Exp $");
#include <sys/param.h>
#include <sys/systm.h>
@ -208,17 +208,18 @@ do_filereadv(int fd, const struct iovec *iovp, int iovcnt,
if (offset == NULL)
offset = &fp->f_offset;
else {
struct vnode *vp = fp->f_vnode;
if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
/*
* Caller must not specify &fp->f_offset -- we can't
* safely dereference it for the call to fo_seek
* without holding some underlying object lock.
*/
KASSERT(offset != &fp->f_offset);
if (fp->f_ops->fo_seek == NULL) {
error = ESPIPE;
goto out;
}
/*
* Test that the device is seekable ?
* XXX This works because no file systems actually
* XXX take any action on the seek operation.
*/
error = VOP_SEEK(vp, fp->f_offset, *offset, fp->f_cred);
error = (*fp->f_ops->fo_seek)(fp, *offset, SEEK_SET, NULL,
0);
if (error != 0)
goto out;
}
@ -408,17 +409,18 @@ do_filewritev(int fd, const struct iovec *iovp, int iovcnt,
if (offset == NULL)
offset = &fp->f_offset;
else {
struct vnode *vp = fp->f_vnode;
if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
/*
* Caller must not specify &fp->f_offset -- we can't
* safely dereference it for the call to fo_seek
* without holding some underlying object lock.
*/
KASSERT(offset != &fp->f_offset);
if (fp->f_ops->fo_seek == NULL) {
error = ESPIPE;
goto out;
}
/*
* Test that the device is seekable ?
* XXX This works because no file systems actually
* XXX take any action on the seek operation.
*/
error = VOP_SEEK(vp, fp->f_offset, *offset, fp->f_cred);
error = (*fp->f_ops->fo_seek)(fp, *offset, SEEK_SET, NULL,
0);
if (error != 0)
goto out;
}

View File

@ -1,4 +1,4 @@
/* $NetBSD: vfs_syscalls.c,v 1.551 2021/07/03 09:39:26 mlelstv Exp $ */
/* $NetBSD: vfs_syscalls.c,v 1.552 2021/09/11 10:08:55 riastradh Exp $ */
/*-
* Copyright (c) 2008, 2009, 2019, 2020 The NetBSD Foundation, Inc.
@ -70,7 +70,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: vfs_syscalls.c,v 1.551 2021/07/03 09:39:26 mlelstv Exp $");
__KERNEL_RCSID(0, "$NetBSD: vfs_syscalls.c,v 1.552 2021/09/11 10:08:55 riastradh Exp $");
#ifdef _KERNEL_OPT
#include "opt_fileassoc.h"
@ -2856,50 +2856,30 @@ sys_lseek(struct lwp *l, const struct sys_lseek_args *uap, register_t *retval)
syscallarg(off_t) offset;
syscallarg(int) whence;
} */
kauth_cred_t cred = l->l_cred;
file_t *fp;
struct vnode *vp;
struct vattr vattr;
off_t newoff;
int error, fd;
switch (SCARG(uap, whence)) {
case SEEK_CUR:
case SEEK_END:
case SEEK_SET:
break;
default:
return EINVAL;
}
fd = SCARG(uap, fd);
if ((fp = fd_getfile(fd)) == NULL)
return (EBADF);
vp = fp->f_vnode;
if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
if (fp->f_ops->fo_seek == NULL) {
error = ESPIPE;
goto out;
}
vn_lock(vp, LK_SHARED | LK_RETRY);
switch (SCARG(uap, whence)) {
case SEEK_CUR:
newoff = fp->f_offset + SCARG(uap, offset);
break;
case SEEK_END:
error = VOP_GETATTR(vp, &vattr, cred);
if (error) {
VOP_UNLOCK(vp);
goto out;
}
newoff = SCARG(uap, offset) + vattr.va_size;
break;
case SEEK_SET:
newoff = SCARG(uap, offset);
break;
default:
error = EINVAL;
VOP_UNLOCK(vp);
goto out;
}
VOP_UNLOCK(vp);
if ((error = VOP_SEEK(vp, fp->f_offset, newoff, cred)) == 0) {
*(off_t *)retval = fp->f_offset = newoff;
}
error = (*fp->f_ops->fo_seek)(fp, SCARG(uap, offset),
SCARG(uap, whence), (off_t *)retval, FOF_UPDATE_OFFSET);
out:
fd_putfile(fd);
return (error);
@ -2918,7 +2898,6 @@ sys_pread(struct lwp *l, const struct sys_pread_args *uap, register_t *retval)
syscallarg(off_t) offset;
} */
file_t *fp;
struct vnode *vp;
off_t offset;
int error, fd = SCARG(uap, fd);
@ -2930,19 +2909,14 @@ sys_pread(struct lwp *l, const struct sys_pread_args *uap, register_t *retval)
return (EBADF);
}
vp = fp->f_vnode;
if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
if (fp->f_ops->fo_seek == NULL) {
error = ESPIPE;
goto out;
}
offset = SCARG(uap, offset);
/*
* XXX This works because no file systems actually
* XXX take any action on the seek operation.
*/
if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0)
error = (*fp->f_ops->fo_seek)(fp, offset, SEEK_SET, &offset, 0);
if (error)
goto out;
/* dofileread() will unuse the descriptor for us */
@ -2985,7 +2959,6 @@ sys_pwrite(struct lwp *l, const struct sys_pwrite_args *uap, register_t *retval)
syscallarg(off_t) offset;
} */
file_t *fp;
struct vnode *vp;
off_t offset;
int error, fd = SCARG(uap, fd);
@ -2997,19 +2970,14 @@ sys_pwrite(struct lwp *l, const struct sys_pwrite_args *uap, register_t *retval)
return (EBADF);
}
vp = fp->f_vnode;
if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
if (fp->f_ops->fo_seek == NULL) {
error = ESPIPE;
goto out;
}
offset = SCARG(uap, offset);
/*
* XXX This works because no file systems actually
* XXX take any action on the seek operation.
*/
if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0)
error = (*fp->f_ops->fo_seek)(fp, offset, SEEK_SET, &offset, 0);
if (error)
goto out;
/* dofilewrite() will unuse the descriptor for us */

View File

@ -1,4 +1,4 @@
/* $NetBSD: vfs_vnops.c,v 1.221 2021/07/18 09:30:36 dholland Exp $ */
/* $NetBSD: vfs_vnops.c,v 1.222 2021/09/11 10:08:55 riastradh Exp $ */
/*-
* Copyright (c) 2009 The NetBSD Foundation, Inc.
@ -66,7 +66,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: vfs_vnops.c,v 1.221 2021/07/18 09:30:36 dholland Exp $");
__KERNEL_RCSID(0, "$NetBSD: vfs_vnops.c,v 1.222 2021/09/11 10:08:55 riastradh Exp $");
#include "veriexec.h"
@ -121,6 +121,7 @@ static int vn_statfile(file_t *fp, struct stat *sb);
static int vn_ioctl(file_t *fp, u_long com, void *data);
static int vn_mmap(struct file *, off_t *, size_t, int, int *, int *,
struct uvm_object **, int *);
static int vn_seek(struct file *, off_t, int, off_t *, int);
const struct fileops vnops = {
.fo_name = "vn",
@ -134,6 +135,7 @@ const struct fileops vnops = {
.fo_kqfilter = vn_kqfilter,
.fo_restart = fnullop_restart,
.fo_mmap = vn_mmap,
.fo_seek = vn_seek,
};
/*
@ -1110,7 +1112,56 @@ vn_mmap(struct file *fp, off_t *offp, size_t size, int prot, int *flagsp,
return 0;
}
static int
vn_seek(struct file *fp, off_t delta, int whence, off_t *newoffp,
int flags)
{
kauth_cred_t cred = fp->f_cred;
off_t oldoff, newoff;
struct vnode *vp = fp->f_vnode;
struct vattr vattr;
int error;
if (vp->v_type == VFIFO)
return ESPIPE;
vn_lock(vp, LK_SHARED | LK_RETRY);
/* Compute the old and new offsets. */
oldoff = fp->f_offset;
switch (whence) {
case SEEK_CUR:
newoff = oldoff + delta; /* XXX arithmetic overflow */
break;
case SEEK_END:
error = VOP_GETATTR(vp, &vattr, cred);
if (error)
goto out;
newoff = delta + vattr.va_size; /* XXX arithmetic overflow */
break;
case SEEK_SET:
newoff = delta;
break;
default:
error = EINVAL;
goto out;
}
/* Pass the proposed change to the file system to audit. */
error = VOP_SEEK(vp, oldoff, newoff, cred);
if (error)
goto out;
/* Success! */
if (newoffp)
*newoffp = newoff;
if (flags & FOF_UPDATE_OFFSET)
fp->f_offset = newoff;
error = 0;
out: VOP_UNLOCK(vp);
return error;
}
/*
* Check that the vnode is still valid, and if so

View File

@ -1,4 +1,4 @@
/* $NetBSD: file.h,v 1.86 2020/05/02 18:43:02 christos Exp $ */
/* $NetBSD: file.h,v 1.87 2021/09/11 10:08:55 riastradh Exp $ */
/*-
* Copyright (c) 2009 The NetBSD Foundation, Inc.
@ -94,7 +94,7 @@ struct fileops {
void (*fo_restart) (struct file *);
int (*fo_mmap) (struct file *, off_t *, size_t, int, int *,
int *, struct uvm_object **, int *);
void (*fo_spare2) (void);
int (*fo_seek) (struct file *, off_t, int, off_t *, int);
};
union file_data {