NetBSD/sys/miscfs/umapfs/umap_vnops.c
chs c398ae9734 a smorgasbord of improvements to vnode locking and path lookup:
- LOCKPARENT is no longer relevant for lookup(), relookup() or VOP_LOOKUP().
   these now always return the parent vnode locked.  namei() works as before.
   lookup() and various other paths no longer acquire vnode locks in the
   wrong order via vrele().  fixes PR 32535.
   as a nice side effect, path lookup is also up to 25% faster.
 - the above allows us to get rid of PDIRUNLOCK.
 - also get rid of WANTPARENT (just use LOCKPARENT and unlock it).
 - remove an assumption in layer_node_find() that all file systems implement
   a recursive VOP_LOCK() (unionfs doesn't).
 - require that all file systems supply vfs_vptofh and vfs_fhtovp routines.
   fill in eopnotsupp() for file systems that don't support being exported
   and remove the checks for NULL.  (layerfs calls these without checking.)
 - in union_lookup1(), don't change refcounts in the ISDOTDOT case, just
   adjust which vnode is locked.  fixes PR 33374.
 - apply fixes for ufs_rename() from ufs_vnops.c rev. 1.61 to ext2fs_rename().
2006-12-09 16:11:50 +00:00

596 lines
17 KiB
C

/* $NetBSD: umap_vnops.c,v 1.43 2006/12/09 16:11:52 chs Exp $ */
/*
* Copyright (c) 1992, 1993
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software donated to Berkeley by
* the UCLA Ficus project.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)umap_vnops.c 8.6 (Berkeley) 5/22/95
*/
/*
* Umap Layer
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: umap_vnops.c,v 1.43 2006/12/09 16:11:52 chs Exp $");
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/time.h>
#include <sys/vnode.h>
#include <sys/mount.h>
#include <sys/namei.h>
#include <sys/malloc.h>
#include <sys/buf.h>
#include <sys/kauth.h>
#include <miscfs/umapfs/umap.h>
#include <miscfs/genfs/genfs.h>
#include <miscfs/genfs/layer_extern.h>
int umap_lookup(void *);
int umap_getattr(void *);
int umap_print(void *);
int umap_rename(void *);
/*
* Global vfs data structures
*/
/*
* XXX - strategy, bwrite are hand coded currently. They should
* go away with a merged buffer/block cache.
*
*/
int (**umap_vnodeop_p)(void *);
const struct vnodeopv_entry_desc umap_vnodeop_entries[] = {
{ &vop_default_desc, umap_bypass },
{ &vop_lookup_desc, umap_lookup },
{ &vop_getattr_desc, umap_getattr },
{ &vop_print_desc, umap_print },
{ &vop_rename_desc, umap_rename },
{ &vop_lock_desc, layer_lock },
{ &vop_unlock_desc, layer_unlock },
{ &vop_islocked_desc, layer_islocked },
{ &vop_fsync_desc, layer_fsync },
{ &vop_inactive_desc, layer_inactive },
{ &vop_reclaim_desc, layer_reclaim },
{ &vop_open_desc, layer_open },
{ &vop_setattr_desc, layer_setattr },
{ &vop_access_desc, layer_access },
{ &vop_remove_desc, layer_remove },
{ &vop_rmdir_desc, layer_rmdir },
{ &vop_bwrite_desc, layer_bwrite },
{ &vop_bmap_desc, layer_bmap },
{ &vop_getpages_desc, layer_getpages },
{ &vop_putpages_desc, layer_putpages },
{ NULL, NULL }
};
const struct vnodeopv_desc umapfs_vnodeop_opv_desc =
{ &umap_vnodeop_p, umap_vnodeop_entries };
/*
* This is the 08-June-1999 bypass routine.
* See layer_vnops.c:layer_bypass for more details.
*/
int
umap_bypass(v)
void *v;
{
struct vop_generic_args /* {
struct vnodeop_desc *a_desc;
<other random data follows, presumably>
} */ *ap = v;
int (**our_vnodeop_p)(void *);
kauth_cred_t *credpp = NULL, credp = 0;
kauth_cred_t savecredp = 0, savecompcredp = 0;
kauth_cred_t compcredp = 0;
struct vnode **this_vp_p;
int error, error1;
struct vnode *old_vps[VDESC_MAX_VPS], *vp0;
struct vnode **vps_p[VDESC_MAX_VPS];
struct vnode ***vppp;
struct vnodeop_desc *descp = ap->a_desc;
int reles, i, flags;
struct componentname **compnamepp = 0;
#ifdef SAFETY
/*
* We require at least one vp.
*/
if (descp->vdesc_vp_offsets == NULL ||
descp->vdesc_vp_offsets[0] == VDESC_NO_OFFSET)
panic("%s: no vp's in map.\n", __func__);
#endif
vps_p[0] =
VOPARG_OFFSETTO(struct vnode**, descp->vdesc_vp_offsets[0], ap);
vp0 = *vps_p[0];
flags = MOUNTTOUMAPMOUNT(vp0->v_mount)->umapm_flags;
our_vnodeop_p = vp0->v_op;
if (flags & LAYERFS_MBYPASSDEBUG)
printf("%s: %s\n", __func__, descp->vdesc_name);
/*
* Map the vnodes going in.
* Later, we'll invoke the operation based on
* the first mapped vnode's operation vector.
*/
reles = descp->vdesc_flags;
for (i = 0; i < VDESC_MAX_VPS; reles >>= 1, i++) {
if (descp->vdesc_vp_offsets[i] == VDESC_NO_OFFSET)
break; /* bail out at end of list */
vps_p[i] = this_vp_p =
VOPARG_OFFSETTO(struct vnode**, descp->vdesc_vp_offsets[i],
ap);
/*
* We're not guaranteed that any but the first vnode
* are of our type. Check for and don't map any
* that aren't. (We must always map first vp or vclean fails.)
*/
if (i && (*this_vp_p == NULL ||
(*this_vp_p)->v_op != our_vnodeop_p)) {
old_vps[i] = NULL;
} else {
old_vps[i] = *this_vp_p;
*(vps_p[i]) = UMAPVPTOLOWERVP(*this_vp_p);
/*
* XXX - Several operations have the side effect
* of vrele'ing their vp's. We must account for
* that. (This should go away in the future.)
*/
if (reles & VDESC_VP0_WILLRELE)
VREF(*this_vp_p);
}
}
/*
* Fix the credentials. (That's the purpose of this layer.)
*/
if (descp->vdesc_cred_offset != VDESC_NO_OFFSET) {
credpp = VOPARG_OFFSETTO(kauth_cred_t*,
descp->vdesc_cred_offset, ap);
/* Save old values */
savecredp = *credpp;
if (savecredp != NOCRED)
*credpp = kauth_cred_dup(savecredp);
credp = *credpp;
if ((flags & LAYERFS_MBYPASSDEBUG) &&
kauth_cred_geteuid(credp) != 0)
printf("umap_bypass: user was %d, group %d\n",
kauth_cred_geteuid(credp), kauth_cred_getegid(credp));
/* Map all ids in the credential structure. */
umap_mapids(vp0->v_mount, credp);
if ((flags & LAYERFS_MBYPASSDEBUG) &&
kauth_cred_geteuid(credp) != 0)
printf("umap_bypass: user now %d, group %d\n",
kauth_cred_geteuid(credp), kauth_cred_getegid(credp));
}
/* BSD often keeps a credential in the componentname structure
* for speed. If there is one, it better get mapped, too.
*/
if (descp->vdesc_componentname_offset != VDESC_NO_OFFSET) {
compnamepp = VOPARG_OFFSETTO(struct componentname**,
descp->vdesc_componentname_offset, ap);
savecompcredp = (*compnamepp)->cn_cred;
if (savecompcredp != NOCRED)
(*compnamepp)->cn_cred = kauth_cred_dup(savecompcredp);
compcredp = (*compnamepp)->cn_cred;
if ((flags & LAYERFS_MBYPASSDEBUG) &&
kauth_cred_geteuid(compcredp) != 0)
printf("umap_bypass: component credit user was %d, group %d\n",
kauth_cred_geteuid(compcredp), kauth_cred_getegid(compcredp));
/* Map all ids in the credential structure. */
umap_mapids(vp0->v_mount, compcredp);
if ((flags & LAYERFS_MBYPASSDEBUG) &&
kauth_cred_geteuid(compcredp) != 0)
printf("umap_bypass: component credit user now %d, group %d\n",
kauth_cred_geteuid(compcredp), kauth_cred_getegid(compcredp));
}
/*
* Call the operation on the lower layer
* with the modified argument structure.
*/
error = VCALL(*vps_p[0], descp->vdesc_offset, ap);
/*
* Maintain the illusion of call-by-value
* by restoring vnodes in the argument structure
* to their original value.
*/
reles = descp->vdesc_flags;
for (i = 0; i < VDESC_MAX_VPS; reles >>= 1, i++) {
if (descp->vdesc_vp_offsets[i] == VDESC_NO_OFFSET)
break; /* bail out at end of list */
if (old_vps[i]) {
*(vps_p[i]) = old_vps[i];
if (reles & VDESC_VP0_WILLUNLOCK)
LAYERFS_UPPERUNLOCK(*(vps_p[i]), 0, error1);
if (reles & VDESC_VP0_WILLRELE)
vrele(*(vps_p[i]));
}
}
/*
* Map the possible out-going vpp
* (Assumes that the lower layer always returns
* a VREF'ed vpp unless it gets an error.)
*/
if (descp->vdesc_vpp_offset != VDESC_NO_OFFSET &&
!(descp->vdesc_flags & VDESC_NOMAP_VPP) &&
!error) {
/*
* XXX - even though some ops have vpp returned vp's,
* several ops actually vrele this before returning.
* We must avoid these ops.
* (This should go away when these ops are regularized.)
*/
if (descp->vdesc_flags & VDESC_VPP_WILLRELE)
goto out;
vppp = VOPARG_OFFSETTO(struct vnode***,
descp->vdesc_vpp_offset, ap);
/*
* Only vop_lookup, vop_create, vop_makedir, vop_bmap,
* vop_mknod, and vop_symlink return vpp's. vop_bmap
* doesn't call bypass as the lower vpp is fine (we're just
* going to do i/o on it). vop_lookup doesn't call bypass
* as a lookup on "." would generate a locking error.
* So all the calls which get us here have a locked vpp. :-)
*/
error = layer_node_create(old_vps[0]->v_mount, **vppp, *vppp);
if (error) {
vput(**vppp);
**vppp = NULL;
}
}
out:
/*
* Free duplicate cred structure and restore old one.
*/
if (descp->vdesc_cred_offset != VDESC_NO_OFFSET) {
if ((flags & LAYERFS_MBYPASSDEBUG) && credp &&
kauth_cred_geteuid(credp) != 0)
printf("umap_bypass: returning-user was %d\n",
kauth_cred_geteuid(credp));
if (savecredp != NOCRED && credpp) {
kauth_cred_free(credp);
*credpp = savecredp;
if ((flags & LAYERFS_MBYPASSDEBUG) && credpp &&
kauth_cred_geteuid(*credpp) != 0)
printf("umap_bypass: returning-user now %d\n\n",
kauth_cred_geteuid(savecredp));
}
}
if (descp->vdesc_componentname_offset != VDESC_NO_OFFSET) {
if ((flags & LAYERFS_MBYPASSDEBUG) && compcredp &&
kauth_cred_geteuid(compcredp) != 0)
printf("umap_bypass: returning-component-user was %d\n",
kauth_cred_geteuid(compcredp));
if (savecompcredp != NOCRED) {
kauth_cred_free(compcredp);
(*compnamepp)->cn_cred = savecompcredp;
if ((flags & LAYERFS_MBYPASSDEBUG) && savecompcredp &&
kauth_cred_geteuid(savecompcredp) != 0)
printf("umap_bypass: returning-component-user now %d\n",
kauth_cred_geteuid(savecompcredp));
}
}
return (error);
}
/*
* This is based on the 08-June-1999 bypass routine.
* See layer_vnops.c:layer_bypass for more details.
*/
int
umap_lookup(v)
void *v;
{
struct vop_lookup_args /* {
struct vnodeop_desc *a_desc;
struct vnode * a_dvp;
struct vnode ** a_vpp;
struct componentname * a_cnp;
} */ *ap = v;
struct componentname *cnp = ap->a_cnp;
kauth_cred_t savecompcredp = NULL;
kauth_cred_t compcredp = NULL;
struct vnode *dvp, *vp, *ldvp;
struct mount *mp;
int error;
int flags, cnf = cnp->cn_flags;
dvp = ap->a_dvp;
mp = dvp->v_mount;
if ((cnf & ISLASTCN) && (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
(cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
return (EROFS);
flags = MOUNTTOUMAPMOUNT(mp)->umapm_flags;
ldvp = UMAPVPTOLOWERVP(dvp);
if (flags & LAYERFS_MBYPASSDEBUG)
printf("umap_lookup\n");
/*
* Fix the credentials. (That's the purpose of this layer.)
*
* BSD often keeps a credential in the componentname structure
* for speed. If there is one, it better get mapped, too.
*/
if ((savecompcredp = cnp->cn_cred)) {
compcredp = kauth_cred_dup(savecompcredp);
cnp->cn_cred = compcredp;
if ((flags & LAYERFS_MBYPASSDEBUG) &&
kauth_cred_geteuid(compcredp) != 0)
printf("umap_lookup: component credit user was %d, group %d\n",
kauth_cred_geteuid(compcredp), kauth_cred_getegid(compcredp));
/* Map all ids in the credential structure. */
umap_mapids(mp, compcredp);
}
if ((flags & LAYERFS_MBYPASSDEBUG) && compcredp &&
kauth_cred_geteuid(compcredp) != 0)
printf("umap_lookup: component credit user now %d, group %d\n",
kauth_cred_geteuid(compcredp), kauth_cred_getegid(compcredp));
ap->a_dvp = ldvp;
error = VCALL(ldvp, ap->a_desc->vdesc_offset, ap);
vp = *ap->a_vpp;
*ap->a_vpp = NULL;
if (error == EJUSTRETURN && (cnf & ISLASTCN) &&
(dvp->v_mount->mnt_flag & MNT_RDONLY) &&
(cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME))
error = EROFS;
/* Do locking fixup as appropriate. See layer_lookup() for info */
if (ldvp == vp) {
*ap->a_vpp = dvp;
VREF(dvp);
vrele(vp);
} else if (vp != NULL) {
error = layer_node_create(mp, vp, ap->a_vpp);
if (error) {
vput(vp);
}
}
/*
* Free duplicate cred structure and restore old one.
*/
if ((flags & LAYERFS_MBYPASSDEBUG) && compcredp &&
kauth_cred_geteuid(compcredp) != 0)
printf("umap_lookup: returning-component-user was %d\n",
kauth_cred_geteuid(compcredp));
if (savecompcredp != NOCRED) {
if (compcredp)
kauth_cred_free(compcredp);
cnp->cn_cred = savecompcredp;
if ((flags & LAYERFS_MBYPASSDEBUG) && savecompcredp &&
kauth_cred_geteuid(savecompcredp) != 0)
printf("umap_lookup: returning-component-user now %d\n",
kauth_cred_geteuid(savecompcredp));
}
return (error);
}
/*
* We handle getattr to change the fsid.
*/
int
umap_getattr(v)
void *v;
{
struct vop_getattr_args /* {
struct vnode *a_vp;
struct vattr *a_vap;
kauth_cred_t a_cred;
struct lwp *a_l;
} */ *ap = v;
uid_t uid;
gid_t gid;
int error, tmpid, nentries, gnentries, flags;
u_long (*mapdata)[2];
u_long (*gmapdata)[2];
struct vnode **vp1p;
const struct vnodeop_desc *descp = ap->a_desc;
if ((error = umap_bypass(ap)) != 0)
return (error);
/* Requires that arguments be restored. */
ap->a_vap->va_fsid = ap->a_vp->v_mount->mnt_stat.f_fsidx.__fsid_val[0];
flags = MOUNTTOUMAPMOUNT(ap->a_vp->v_mount)->umapm_flags;
/*
* Umap needs to map the uid and gid returned by a stat
* into the proper values for this site. This involves
* finding the returned uid in the mapping information,
* translating it into the uid on the other end,
* and filling in the proper field in the vattr
* structure pointed to by ap->a_vap. The group
* is easier, since currently all groups will be
* translate to the NULLGROUP.
*/
/* Find entry in map */
uid = ap->a_vap->va_uid;
gid = ap->a_vap->va_gid;
if ((flags & LAYERFS_MBYPASSDEBUG))
printf("umap_getattr: mapped uid = %d, mapped gid = %d\n", uid,
gid);
vp1p = VOPARG_OFFSETTO(struct vnode**, descp->vdesc_vp_offsets[0], ap);
nentries = MOUNTTOUMAPMOUNT((*vp1p)->v_mount)->info_nentries;
mapdata = (MOUNTTOUMAPMOUNT((*vp1p)->v_mount)->info_mapdata);
gnentries = MOUNTTOUMAPMOUNT((*vp1p)->v_mount)->info_gnentries;
gmapdata = (MOUNTTOUMAPMOUNT((*vp1p)->v_mount)->info_gmapdata);
/* Reverse map the uid for the vnode. Since it's a reverse
map, we can't use umap_mapids() to do it. */
tmpid = umap_reverse_findid(uid, mapdata, nentries);
if (tmpid != -1) {
ap->a_vap->va_uid = (uid_t) tmpid;
if ((flags & LAYERFS_MBYPASSDEBUG))
printf("umap_getattr: original uid = %d\n", uid);
} else
ap->a_vap->va_uid = (uid_t) NOBODY;
/* Reverse map the gid for the vnode. */
tmpid = umap_reverse_findid(gid, gmapdata, gnentries);
if (tmpid != -1) {
ap->a_vap->va_gid = (gid_t) tmpid;
if ((flags & LAYERFS_MBYPASSDEBUG))
printf("umap_getattr: original gid = %d\n", gid);
} else
ap->a_vap->va_gid = (gid_t) NULLGROUP;
return (0);
}
int
umap_print(v)
void *v;
{
struct vop_print_args /* {
struct vnode *a_vp;
} */ *ap = v;
struct vnode *vp = ap->a_vp;
printf("\ttag VT_UMAPFS, vp=%p, lowervp=%p\n", vp,
UMAPVPTOLOWERVP(vp));
return (0);
}
int
umap_rename(v)
void *v;
{
struct vop_rename_args /* {
struct vnode *a_fdvp;
struct vnode *a_fvp;
struct componentname *a_fcnp;
struct vnode *a_tdvp;
struct vnode *a_tvp;
struct componentname *a_tcnp;
} */ *ap = v;
int error, flags;
struct componentname *compnamep;
kauth_cred_t compcredp, savecompcredp;
struct vnode *vp;
struct vnode *tvp;
/*
* Rename is irregular, having two componentname structures.
* We need to map the cre in the second structure,
* and then bypass takes care of the rest.
*/
vp = ap->a_fdvp;
flags = MOUNTTOUMAPMOUNT(vp->v_mount)->umapm_flags;
compnamep = ap->a_tcnp;
compcredp = compnamep->cn_cred;
savecompcredp = compcredp;
compcredp = compnamep->cn_cred = kauth_cred_dup(savecompcredp);
if ((flags & LAYERFS_MBYPASSDEBUG) &&
kauth_cred_geteuid(compcredp) != 0)
printf("umap_rename: rename component credit user was %d, group %d\n",
kauth_cred_geteuid(compcredp), kauth_cred_getegid(compcredp));
/* Map all ids in the credential structure. */
umap_mapids(vp->v_mount, compcredp);
if ((flags & LAYERFS_MBYPASSDEBUG) &&
kauth_cred_geteuid(compcredp) != 0)
printf("umap_rename: rename component credit user now %d, group %d\n",
kauth_cred_geteuid(compcredp), kauth_cred_getegid(compcredp));
tvp = ap->a_tvp;
if (tvp) {
if (tvp->v_mount != vp->v_mount)
tvp = NULL;
else
vref(tvp);
}
error = umap_bypass(ap);
if (tvp) {
if (error == 0)
VTOLAYER(tvp)->layer_flags |= LAYERFS_REMOVED;
vrele(tvp);
}
/* Restore the additional mapped componentname cred structure. */
kauth_cred_free(compcredp);
compnamep->cn_cred = savecompcredp;
return error;
}