NetBSD/sys/nfs/nfs_clntsubs.c
pooka 77486bda39 Get rid of dependency on fs_nfs.h, i.e. source modules with
conditional content depending on if the NFS client is wanted or
not.  The server can now be made an independent module not depending
on the nfs client.

Tested with rump_nfs (standalone client), rump_nfsd (standalone
nfsd) and a qemu installation with both the client and the server.
2010-03-02 23:19:09 +00:00

547 lines
16 KiB
C

/* $NetBSD: nfs_clntsubs.c,v 1.1 2010/03/02 23:19:09 pooka Exp $ */
/*
* Copyright (c) 1989, 1993
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Rick Macklem at The University of Guelph.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)nfs_subs.c 8.8 (Berkeley) 5/22/95
*/
/*
* Copyright 2000 Wasabi Systems, Inc.
* All rights reserved.
*
* Written by Frank van der Linden for Wasabi Systems, Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed for the NetBSD Project by
* Wasabi Systems, Inc.
* 4. The name of Wasabi Systems, Inc. may not be used to endorse
* or promote products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: nfs_clntsubs.c,v 1.1 2010/03/02 23:19:09 pooka Exp $");
#ifdef _KERNEL_OPT
#include "opt_nfs.h"
#endif
/*
* These functions support the macros and help fiddle mbuf chains for
* the nfs op functions. They do things like create the rpc header and
* copy data between mbuf chains and uio lists.
*/
#include <sys/param.h>
#include <sys/proc.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/kmem.h>
#include <sys/mount.h>
#include <sys/vnode.h>
#include <sys/namei.h>
#include <sys/mbuf.h>
#include <sys/socket.h>
#include <sys/stat.h>
#include <sys/filedesc.h>
#include <sys/time.h>
#include <sys/dirent.h>
#include <sys/once.h>
#include <sys/kauth.h>
#include <sys/atomic.h>
#include <uvm/uvm_extern.h>
#include <nfs/rpcv2.h>
#include <nfs/nfsproto.h>
#include <nfs/nfsnode.h>
#include <nfs/nfs.h>
#include <nfs/xdr_subs.h>
#include <nfs/nfsm_subs.h>
#include <nfs/nfsmount.h>
#include <nfs/nfsrtt.h>
#include <nfs/nfs_var.h>
#include <miscfs/specfs/specdev.h>
#include <netinet/in.h>
/*
* Attribute cache routines.
* nfs_loadattrcache() - loads or updates the cache contents from attributes
* that are on the mbuf list
* nfs_getattrcache() - returns valid attributes if found in cache, returns
* error otherwise
*/
/*
* Load the attribute cache (that lives in the nfsnode entry) with
* the values on the mbuf list and
* Iff vap not NULL
* copy the attributes to *vaper
*/
int
nfsm_loadattrcache(struct vnode **vpp, struct mbuf **mdp, char **dposp, struct vattr *vaper, int flags)
{
int32_t t1;
char *cp2;
int error = 0;
struct mbuf *md;
int v3 = NFS_ISV3(*vpp);
md = *mdp;
t1 = (mtod(md, char *) + md->m_len) - *dposp;
error = nfsm_disct(mdp, dposp, NFSX_FATTR(v3), t1, &cp2);
if (error)
return (error);
return nfs_loadattrcache(vpp, (struct nfs_fattr *)cp2, vaper, flags);
}
int
nfs_loadattrcache(struct vnode **vpp, struct nfs_fattr *fp, struct vattr *vaper, int flags)
{
struct vnode *vp = *vpp;
struct vattr *vap;
int v3 = NFS_ISV3(vp);
enum vtype vtyp;
u_short vmode;
struct timespec mtime;
struct timespec ctime;
int32_t rdev;
struct nfsnode *np;
extern int (**spec_nfsv2nodeop_p)(void *);
uid_t uid;
gid_t gid;
if (v3) {
vtyp = nfsv3tov_type(fp->fa_type);
vmode = fxdr_unsigned(u_short, fp->fa_mode);
rdev = makedev(fxdr_unsigned(u_int32_t, fp->fa3_rdev.specdata1),
fxdr_unsigned(u_int32_t, fp->fa3_rdev.specdata2));
fxdr_nfsv3time(&fp->fa3_mtime, &mtime);
fxdr_nfsv3time(&fp->fa3_ctime, &ctime);
} else {
vtyp = nfsv2tov_type(fp->fa_type);
vmode = fxdr_unsigned(u_short, fp->fa_mode);
if (vtyp == VNON || vtyp == VREG)
vtyp = IFTOVT(vmode);
rdev = fxdr_unsigned(int32_t, fp->fa2_rdev);
fxdr_nfsv2time(&fp->fa2_mtime, &mtime);
ctime.tv_sec = fxdr_unsigned(u_int32_t,
fp->fa2_ctime.nfsv2_sec);
ctime.tv_nsec = 0;
/*
* Really ugly NFSv2 kludge.
*/
if (vtyp == VCHR && rdev == 0xffffffff)
vtyp = VFIFO;
}
vmode &= ALLPERMS;
/*
* If v_type == VNON it is a new node, so fill in the v_type,
* n_mtime fields. Check to see if it represents a special
* device, and if so, check for a possible alias. Once the
* correct vnode has been obtained, fill in the rest of the
* information.
*/
np = VTONFS(vp);
if (vp->v_type == VNON) {
vp->v_type = vtyp;
if (vp->v_type == VFIFO) {
extern int (**fifo_nfsv2nodeop_p)(void *);
vp->v_op = fifo_nfsv2nodeop_p;
} else if (vp->v_type == VREG) {
mutex_init(&np->n_commitlock, MUTEX_DEFAULT, IPL_NONE);
} else if (vp->v_type == VCHR || vp->v_type == VBLK) {
vp->v_op = spec_nfsv2nodeop_p;
spec_node_init(vp, (dev_t)rdev);
}
np->n_mtime = mtime;
}
uid = fxdr_unsigned(uid_t, fp->fa_uid);
gid = fxdr_unsigned(gid_t, fp->fa_gid);
vap = np->n_vattr;
/*
* Invalidate access cache if uid, gid, mode or ctime changed.
*/
if (np->n_accstamp != -1 &&
(gid != vap->va_gid || uid != vap->va_uid || vmode != vap->va_mode
|| timespeccmp(&ctime, &vap->va_ctime, !=)))
np->n_accstamp = -1;
vap->va_type = vtyp;
vap->va_mode = vmode;
vap->va_rdev = (dev_t)rdev;
vap->va_mtime = mtime;
vap->va_ctime = ctime;
vap->va_birthtime.tv_sec = VNOVAL;
vap->va_birthtime.tv_nsec = VNOVAL;
vap->va_fsid = vp->v_mount->mnt_stat.f_fsidx.__fsid_val[0];
switch (vtyp) {
case VDIR:
vap->va_blocksize = NFS_DIRFRAGSIZ;
break;
case VBLK:
vap->va_blocksize = BLKDEV_IOSIZE;
break;
case VCHR:
vap->va_blocksize = MAXBSIZE;
break;
default:
vap->va_blocksize = v3 ? vp->v_mount->mnt_stat.f_iosize :
fxdr_unsigned(int32_t, fp->fa2_blocksize);
break;
}
if (v3) {
vap->va_nlink = fxdr_unsigned(u_short, fp->fa_nlink);
vap->va_uid = uid;
vap->va_gid = gid;
vap->va_size = fxdr_hyper(&fp->fa3_size);
vap->va_bytes = fxdr_hyper(&fp->fa3_used);
vap->va_fileid = fxdr_hyper(&fp->fa3_fileid);
fxdr_nfsv3time(&fp->fa3_atime, &vap->va_atime);
vap->va_flags = 0;
vap->va_filerev = 0;
} else {
vap->va_nlink = fxdr_unsigned(u_short, fp->fa_nlink);
vap->va_uid = uid;
vap->va_gid = gid;
vap->va_size = fxdr_unsigned(u_int32_t, fp->fa2_size);
vap->va_bytes = fxdr_unsigned(int32_t, fp->fa2_blocks)
* NFS_FABLKSIZE;
vap->va_fileid = fxdr_unsigned(int32_t, fp->fa2_fileid);
fxdr_nfsv2time(&fp->fa2_atime, &vap->va_atime);
vap->va_flags = 0;
vap->va_gen = fxdr_unsigned(u_int32_t,fp->fa2_ctime.nfsv2_usec);
vap->va_filerev = 0;
}
if (vap->va_size > VFSTONFS(vp->v_mount)->nm_maxfilesize) {
return EFBIG;
}
if (vap->va_size != np->n_size) {
if ((np->n_flag & NMODIFIED) && vap->va_size < np->n_size) {
vap->va_size = np->n_size;
} else {
np->n_size = vap->va_size;
if (vap->va_type == VREG) {
/*
* we can't free pages if NAC_NOTRUNC because
* the pages can be owned by ourselves.
*/
if (flags & NAC_NOTRUNC) {
np->n_flag |= NTRUNCDELAYED;
} else {
genfs_node_wrlock(vp);
mutex_enter(&vp->v_interlock);
(void)VOP_PUTPAGES(vp, 0,
0, PGO_SYNCIO | PGO_CLEANIT |
PGO_FREE | PGO_ALLPAGES);
uvm_vnp_setsize(vp, np->n_size);
genfs_node_unlock(vp);
}
}
}
}
np->n_attrstamp = time_second;
if (vaper != NULL) {
memcpy((void *)vaper, (void *)vap, sizeof(*vap));
if (np->n_flag & NCHG) {
if (np->n_flag & NACC)
vaper->va_atime = np->n_atim;
if (np->n_flag & NUPD)
vaper->va_mtime = np->n_mtim;
}
}
return (0);
}
/*
* Check the time stamp
* If the cache is valid, copy contents to *vap and return 0
* otherwise return an error
*/
int
nfs_getattrcache(struct vnode *vp, struct vattr *vaper)
{
struct nfsnode *np = VTONFS(vp);
struct nfsmount *nmp = VFSTONFS(vp->v_mount);
struct vattr *vap;
if (np->n_attrstamp == 0 ||
(time_second - np->n_attrstamp) >= nfs_attrtimeo(nmp, np)) {
nfsstats.attrcache_misses++;
return (ENOENT);
}
nfsstats.attrcache_hits++;
vap = np->n_vattr;
if (vap->va_size != np->n_size) {
if (vap->va_type == VREG) {
if ((np->n_flag & NMODIFIED) != 0 &&
vap->va_size < np->n_size) {
vap->va_size = np->n_size;
} else {
np->n_size = vap->va_size;
}
genfs_node_wrlock(vp);
uvm_vnp_setsize(vp, np->n_size);
genfs_node_unlock(vp);
} else
np->n_size = vap->va_size;
}
memcpy((void *)vaper, (void *)vap, sizeof(struct vattr));
if (np->n_flag & NCHG) {
if (np->n_flag & NACC)
vaper->va_atime = np->n_atim;
if (np->n_flag & NUPD)
vaper->va_mtime = np->n_mtim;
}
return (0);
}
void
nfs_delayedtruncate(struct vnode *vp)
{
struct nfsnode *np = VTONFS(vp);
if (np->n_flag & NTRUNCDELAYED) {
np->n_flag &= ~NTRUNCDELAYED;
genfs_node_wrlock(vp);
mutex_enter(&vp->v_interlock);
(void)VOP_PUTPAGES(vp, 0,
0, PGO_SYNCIO | PGO_CLEANIT | PGO_FREE | PGO_ALLPAGES);
uvm_vnp_setsize(vp, np->n_size);
genfs_node_unlock(vp);
}
}
#define NFS_WCCKLUDGE_TIMEOUT (24 * 60 * 60) /* 1 day */
#define NFS_WCCKLUDGE(nmp, now) \
(((nmp)->nm_iflag & NFSMNT_WCCKLUDGE) && \
((now) - (nmp)->nm_wcckludgetime - NFS_WCCKLUDGE_TIMEOUT) < 0)
/*
* nfs_check_wccdata: check inaccurate wcc_data
*
* => return non-zero if we shouldn't trust the wcc_data.
* => NFS_WCCKLUDGE_TIMEOUT is for the case that the server is "fixed".
*/
int
nfs_check_wccdata(struct nfsnode *np, const struct timespec *ctime,
struct timespec *mtime, bool docheck)
{
int error = 0;
#if !defined(NFS_V2_ONLY)
if (docheck) {
struct vnode *vp = NFSTOV(np);
struct nfsmount *nmp;
long now = time_second;
const struct timespec *omtime = &np->n_vattr->va_mtime;
const struct timespec *octime = &np->n_vattr->va_ctime;
const char *reason = NULL; /* XXX: gcc */
if (timespeccmp(omtime, mtime, <=)) {
reason = "mtime";
error = EINVAL;
}
if (vp->v_type == VDIR && timespeccmp(octime, ctime, <=)) {
reason = "ctime";
error = EINVAL;
}
nmp = VFSTONFS(vp->v_mount);
if (error) {
/*
* despite of the fact that we've updated the file,
* timestamps of the file were not updated as we
* expected.
* it means that the server has incompatible
* semantics of timestamps or (more likely)
* the server time is not precise enough to
* track each modifications.
* in that case, we disable wcc processing.
*
* yes, strictly speaking, we should disable all
* caching. it's a compromise.
*/
mutex_enter(&nmp->nm_lock);
if (!NFS_WCCKLUDGE(nmp, now)) {
printf("%s: inaccurate wcc data (%s) detected,"
" disabling wcc"
" (ctime %u.%09u %u.%09u,"
" mtime %u.%09u %u.%09u)\n",
vp->v_mount->mnt_stat.f_mntfromname,
reason,
(unsigned int)octime->tv_sec,
(unsigned int)octime->tv_nsec,
(unsigned int)ctime->tv_sec,
(unsigned int)ctime->tv_nsec,
(unsigned int)omtime->tv_sec,
(unsigned int)omtime->tv_nsec,
(unsigned int)mtime->tv_sec,
(unsigned int)mtime->tv_nsec);
}
nmp->nm_iflag |= NFSMNT_WCCKLUDGE;
nmp->nm_wcckludgetime = now;
mutex_exit(&nmp->nm_lock);
} else if (NFS_WCCKLUDGE(nmp, now)) {
error = EPERM; /* XXX */
} else if (nmp->nm_iflag & NFSMNT_WCCKLUDGE) {
mutex_enter(&nmp->nm_lock);
if (nmp->nm_iflag & NFSMNT_WCCKLUDGE) {
printf("%s: re-enabling wcc\n",
vp->v_mount->mnt_stat.f_mntfromname);
nmp->nm_iflag &= ~NFSMNT_WCCKLUDGE;
}
mutex_exit(&nmp->nm_lock);
}
}
#endif /* !defined(NFS_V2_ONLY) */
return error;
}
/*
* Heuristic to see if the server XDR encodes directory cookies or not.
* it is not supposed to, but a lot of servers may do this. Also, since
* most/all servers will implement V2 as well, it is expected that they
* may return just 32 bits worth of cookie information, so we need to
* find out in which 32 bits this information is available. We do this
* to avoid trouble with emulated binaries that can't handle 64 bit
* directory offsets.
*/
void
nfs_cookieheuristic(struct vnode *vp, int *flagp, struct lwp *l, kauth_cred_t cred)
{
struct uio auio;
struct iovec aiov;
char *tbuf, *cp;
struct dirent *dp;
off_t *cookies = NULL, *cop;
int error, eof, nc, len;
tbuf = malloc(NFS_DIRFRAGSIZ, M_TEMP, M_WAITOK);
aiov.iov_base = tbuf;
aiov.iov_len = NFS_DIRFRAGSIZ;
auio.uio_iov = &aiov;
auio.uio_iovcnt = 1;
auio.uio_rw = UIO_READ;
auio.uio_resid = NFS_DIRFRAGSIZ;
auio.uio_offset = 0;
UIO_SETUP_SYSSPACE(&auio);
error = VOP_READDIR(vp, &auio, cred, &eof, &cookies, &nc);
len = NFS_DIRFRAGSIZ - auio.uio_resid;
if (error || len == 0) {
free(tbuf, M_TEMP);
if (cookies)
free(cookies, M_TEMP);
return;
}
/*
* Find the first valid entry and look at its offset cookie.
*/
cp = tbuf;
for (cop = cookies; len > 0; len -= dp->d_reclen) {
dp = (struct dirent *)cp;
if (dp->d_fileno != 0 && len >= dp->d_reclen) {
if ((*cop >> 32) != 0 && (*cop & 0xffffffffLL) == 0) {
*flagp |= NFSMNT_SWAPCOOKIE;
nfs_invaldircache(vp, 0);
nfs_vinvalbuf(vp, 0, cred, l, 1);
}
break;
}
cop++;
cp += dp->d_reclen;
}
free(tbuf, M_TEMP);
free(cookies, M_TEMP);
}
/*
* Set the attribute timeout based on how recently the file has been modified.
*/
time_t
nfs_attrtimeo(struct nfsmount *nmp, struct nfsnode *np)
{
time_t timeo;
if ((nmp->nm_flag & NFSMNT_NOAC) != 0)
return 0;
if (((np)->n_flag & NMODIFIED) != 0)
return NFS_MINATTRTIMO;
timeo = (time_second - np->n_mtime.tv_sec) / 10;
timeo = max(timeo, NFS_MINATTRTIMO);
timeo = min(timeo, NFS_MAXATTRTIMO);
return timeo;
}