Make LFS dirops get their vnode first, before incrementing the dirop count,

to prevent a deadlock trying to call VOP_PUTPAGES() on a VDIROP vnode.
This can happen when a stacked filesystem is mounted on top of an LFS: an
LFS dirop needs to get a vnode, which is available from the upper layer.
The corresponding lower layer vnode, however, is VDIROP, so the upper layer
can't be cleaned out since its VOP_PUTPAGES() is passed through to the lower
layer, which waits for dirops to drain before it can proceed.  Deadlock.

Tweak ufs_makeinode() and ufs_mkdir() to pass the a_vpp argument through
to VOP_VALLOC().

Partially addresses PR # 26043, though it probably does not completely fix
the problem described there.
This commit is contained in:
perseant 2005-03-23 00:12:51 +00:00
parent a2a01de947
commit c716c3d307
3 changed files with 130 additions and 125 deletions

View File

@ -1,4 +1,4 @@
/* $NetBSD: lfs_alloc.c,v 1.76 2005/03/08 00:18:19 perseant Exp $ */
/* $NetBSD: lfs_alloc.c,v 1.77 2005/03/23 00:12:51 perseant Exp $ */
/*-
* Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc.
@ -67,7 +67,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: lfs_alloc.c,v 1.76 2005/03/08 00:18:19 perseant Exp $");
__KERNEL_RCSID(0, "$NetBSD: lfs_alloc.c,v 1.77 2005/03/23 00:12:51 perseant Exp $");
#if defined(_KERNEL_OPT)
#include "opt_quota.h"
@ -294,7 +294,6 @@ lfs_valloc(void *v)
fs = VTOI(ap->a_pvp)->i_lfs;
if (fs->lfs_ronly)
return EROFS;
*ap->a_vpp = NULL;
lfs_seglock(fs, SEGM_PROT);
@ -352,17 +351,8 @@ lfs_ialloc(struct lfs *fs, struct vnode *pvp, ino_t new_ino, int new_gen,
{
struct inode *ip;
struct vnode *vp;
IFILE *ifp;
struct buf *bp, *cbp;
int error;
CLEANERINFO *cip;
error = getnewvnode(VT_LFS, pvp->v_mount, lfs_vnodeop_p, &vp);
DLOG((DLOG_ALLOC, "lfs_ialloc: ino %d vp %p error %d\n", new_ino,
vp, error));
if (error)
goto errout;
vp = *vpp;
lockmgr(&ufs_hashlock, LK_EXCLUSIVE, 0);
/* Create an inode to associate with the vnode. */
lfs_vcreate(pvp->v_mount, new_ino, vp);
@ -382,13 +372,13 @@ lfs_ialloc(struct lfs *fs, struct vnode *pvp, ino_t new_ino, int new_gen,
ufs_ihashins(ip);
lockmgr(&ufs_hashlock, LK_RELEASE, 0);
ufs_vinit(vp->v_mount, lfs_specop_p, lfs_fifoop_p, &vp);
ufs_vinit(vp->v_mount, lfs_specop_p, lfs_fifoop_p, vpp);
vp = *vpp;
ip = VTOI(vp);
memset(ip->i_lfs_fragsize, 0, NDADDR * sizeof(*ip->i_lfs_fragsize));
uvm_vnp_setsize(vp, 0);
*vpp = vp;
lfs_mark_vnode(vp);
genfs_node_init(vp, &lfs_genfsops);
VREF(ip->i_devvp);
@ -396,21 +386,6 @@ lfs_ialloc(struct lfs *fs, struct vnode *pvp, ino_t new_ino, int new_gen,
fs->lfs_fmod = 1;
++fs->lfs_nfiles;
return (0);
errout:
/*
* Put the new inum back on the free list.
*/
lfs_seglock(fs, SEGM_PROT);
LFS_IENTRY(ifp, fs, new_ino, bp);
ifp->if_daddr = LFS_UNUSED_DADDR;
LFS_GET_HEADFREE(fs, cip, cbp, &(ifp->if_nextfree));
LFS_PUT_HEADFREE(fs, cip, cbp, new_ino);
(void) LFS_BWRITE_LOG(bp); /* Ifile */
lfs_segunlock(fs);
*vpp = NULLVP;
return (error);
}
/* Create a new vnode/inode pair and initialize what fields we can. */

View File

@ -1,4 +1,4 @@
/* $NetBSD: lfs_vnops.c,v 1.137 2005/03/08 04:49:35 simonb Exp $ */
/* $NetBSD: lfs_vnops.c,v 1.138 2005/03/23 00:12:51 perseant Exp $ */
/*-
* Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc.
@ -67,7 +67,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: lfs_vnops.c,v 1.137 2005/03/08 04:49:35 simonb Exp $");
__KERNEL_RCSID(0, "$NetBSD: lfs_vnops.c,v 1.138 2005/03/23 00:12:51 perseant Exp $");
#include <sys/param.h>
#include <sys/systm.h>
@ -362,37 +362,39 @@ lfs_inactive(void *v)
* These macros are used to bracket UFS directory ops, so that we can
* identify all the pages touched during directory ops which need to
* be ordered and flushed atomically, so that they may be recovered.
*/
/*
* XXX KS - Because we have to mark nodes VDIROP in order to prevent
*
* Because we have to mark nodes VDIROP in order to prevent
* the cache from reclaiming them while a dirop is in progress, we must
* also manage the number of nodes so marked (otherwise we can run out).
* We do this by setting lfs_dirvcount to the number of marked vnodes; it
* is decremented during segment write, when VDIROP is taken off.
*/
#define SET_DIROP(vp) SET_DIROP2((vp), NULL)
#define SET_DIROP2(vp, vp2) lfs_set_dirop((vp), (vp2))
#define MARK_VNODE(vp) lfs_mark_vnode(vp)
#define UNMARK_VNODE(vp) lfs_unmark_vnode(vp)
#define SET_DIROP_CREATE(dvp, vpp) lfs_set_dirop_create((dvp), (vpp))
#define SET_DIROP_REMOVE(dvp, vp) lfs_set_dirop((dvp), (vp))
static int lfs_set_dirop_create(struct vnode *, struct vnode **);
static int lfs_set_dirop(struct vnode *, struct vnode *);
static int
lfs_set_dirop(struct vnode *vp, struct vnode *vp2)
lfs_set_dirop(struct vnode *dvp, struct vnode *vp)
{
struct lfs *fs;
int error;
KASSERT(VOP_ISLOCKED(vp));
KASSERT(vp2 == NULL || VOP_ISLOCKED(vp2));
KASSERT(VOP_ISLOCKED(dvp));
KASSERT(vp == NULL || VOP_ISLOCKED(vp));
fs = VTOI(vp)->i_lfs;
fs = VTOI(dvp)->i_lfs;
/*
* LFS_NRESERVE calculates direct and indirect blocks as well
* as an inode block; an overestimate in most cases.
*/
if ((error = lfs_reserve(fs, vp, vp2, LFS_NRESERVE(fs))) != 0)
if ((error = lfs_reserve(fs, dvp, vp, LFS_NRESERVE(fs))) != 0)
return (error);
if (fs->lfs_dirops == 0)
lfs_check(vp, LFS_UNUSED_LBN, 0);
lfs_check(dvp, LFS_UNUSED_LBN, 0);
restart:
simple_lock(&fs->lfs_interlock);
if (fs->lfs_writer) {
@ -427,36 +429,94 @@ restart:
simple_unlock(&fs->lfs_interlock);
/* Hold a reference so SET_ENDOP will be happy */
vref(dvp);
if (vp) {
vref(vp);
if (vp2)
vref(vp2);
MARK_VNODE(vp);
}
MARK_VNODE(dvp);
return 0;
unreserve:
lfs_reserve(fs, vp, vp2, -LFS_NRESERVE(fs));
lfs_reserve(fs, dvp, vp, -LFS_NRESERVE(fs));
return error;
}
#define SET_ENDOP(fs, vp, str) SET_ENDOP2((fs), (vp), NULL, (str))
#define SET_ENDOP2(fs, vp, vp2, str) { \
/*
* Get a new vnode *before* adjusting the dirop count, to avoid a deadlock
* in getnewvnode(), if we have a stacked filesystem mounted on top
* of us.
*
* NB: this means we have to clear the new vnodes on error. Fortunately
* SET_ENDOP is there to do that for us.
*/
static int
lfs_set_dirop_create(struct vnode *dvp, struct vnode **vpp)
{
int error;
struct lfs *fs;
fs = VFSTOUFS(dvp->v_mount)->um_lfs;
if (fs->lfs_ronly)
return EROFS;
if (vpp && (error = getnewvnode(VT_LFS, dvp->v_mount, lfs_vnodeop_p, vpp))) {
DLOG((DLOG_ALLOC, "lfs_set_dirop_create: dvp %p error %d\n",
dvp, error));
return error;
}
if ((error = lfs_set_dirop(dvp, NULL)) != 0) {
if (vpp) {
ungetnewvnode(*vpp);
*vpp = NULL;
}
return error;
}
return 0;
}
#define SET_ENDOP_BASE(fs, dvp, str) \
do { \
simple_lock(&(fs)->lfs_interlock); \
--(fs)->lfs_dirops; \
if (!(fs)->lfs_dirops) { \
if ((fs)->lfs_nadirop) { \
panic("SET_ENDOP: %s: no dirops but nadirop=%d", \
(str), (fs)->lfs_nadirop); \
panic("SET_ENDOP: %s: no dirops but " \
" nadirop=%d", (str), \
(fs)->lfs_nadirop); \
} \
wakeup(&(fs)->lfs_writer); \
lfs_check((vp),LFS_UNUSED_LBN,0); \
} \
lfs_reserve((fs), vp, vp2, -LFS_NRESERVE(fs)); /* XXX */ \
vrele(vp); \
if (vp2) \
vrele(vp2); \
}
#define MARK_VNODE(vp) lfs_mark_vnode(vp)
#define UNMARK_VNODE(vp) lfs_unmark_vnode(vp)
simple_unlock(&(fs)->lfs_interlock); \
lfs_check((dvp), LFS_UNUSED_LBN, 0); \
} else \
simple_unlock(&(fs)->lfs_interlock); \
} while(0)
#define SET_ENDOP_CREATE(fs, dvp, nvpp, str) \
do { \
UNMARK_VNODE(dvp); \
if (nvpp && *nvpp) \
UNMARK_VNODE(*nvpp); \
/* Check for error return to stem vnode leakage */ \
if (nvpp && *nvpp && !((*nvpp)->v_flag & VDIROP)) \
ungetnewvnode(*(nvpp)); \
SET_ENDOP_BASE((fs), (dvp), (str)); \
lfs_reserve((fs), (dvp), NULL, -LFS_NRESERVE(fs)); \
vrele(dvp); \
} while(0)
#define SET_ENDOP_CREATE_AP(ap, str) \
SET_ENDOP_CREATE(VTOI((ap)->a_dvp)->i_lfs, (ap)->a_dvp, \
(ap)->a_vpp, (str))
#define SET_ENDOP_REMOVE(fs, dvp, ovp, str) \
do { \
UNMARK_VNODE(dvp); \
if (ovp) \
UNMARK_VNODE(ovp); \
SET_ENDOP_BASE((fs), (dvp), (str)); \
lfs_reserve((fs), (dvp), (ovp), -LFS_NRESERVE(fs)); \
vrele(dvp); \
if (ovp) \
vrele(ovp); \
} while(0)
void
lfs_mark_vnode(struct vnode *vp)
@ -501,16 +561,12 @@ lfs_symlink(void *v)
} */ *ap = v;
int error;
if ((error = SET_DIROP(ap->a_dvp)) != 0) {
if ((error = SET_DIROP_CREATE(ap->a_dvp, ap->a_vpp)) != 0) {
vput(ap->a_dvp);
return error;
}
MARK_VNODE(ap->a_dvp);
error = ufs_symlink(ap);
UNMARK_VNODE(ap->a_dvp);
if (*(ap->a_vpp))
UNMARK_VNODE(*(ap->a_vpp));
SET_ENDOP(VTOI(ap->a_dvp)->i_lfs,ap->a_dvp,"symlink");
SET_ENDOP_CREATE_AP(ap, "symlink");
return (error);
}
@ -530,19 +586,15 @@ lfs_mknod(void *v)
struct mount *mp;
ino_t ino;
if ((error = SET_DIROP(ap->a_dvp)) != 0) {
if ((error = SET_DIROP_CREATE(ap->a_dvp, ap->a_vpp)) != 0) {
vput(ap->a_dvp);
return error;
}
MARK_VNODE(ap->a_dvp);
error = ufs_makeinode(MAKEIMODE(vap->va_type, vap->va_mode),
ap->a_dvp, vpp, ap->a_cnp);
UNMARK_VNODE(ap->a_dvp);
if (*(ap->a_vpp))
UNMARK_VNODE(*(ap->a_vpp));
/* Either way we're done with the dirop at this point */
SET_ENDOP(VTOI(ap->a_dvp)->i_lfs,ap->a_dvp,"mknod");
SET_ENDOP_CREATE_AP(ap, "mknod");
if (error)
return (error);
@ -608,16 +660,12 @@ lfs_create(void *v)
} */ *ap = v;
int error;
if ((error = SET_DIROP(ap->a_dvp)) != 0) {
if ((error = SET_DIROP_CREATE(ap->a_dvp, ap->a_vpp)) != 0) {
vput(ap->a_dvp);
return error;
}
MARK_VNODE(ap->a_dvp);
error = ufs_create(ap);
UNMARK_VNODE(ap->a_dvp);
if (*(ap->a_vpp))
UNMARK_VNODE(*(ap->a_vpp));
SET_ENDOP(VTOI(ap->a_dvp)->i_lfs,ap->a_dvp,"create");
SET_ENDOP_CREATE_AP(ap, "create");
return (error);
}
@ -632,16 +680,12 @@ lfs_mkdir(void *v)
} */ *ap = v;
int error;
if ((error = SET_DIROP(ap->a_dvp)) != 0) {
if ((error = SET_DIROP_CREATE(ap->a_dvp, ap->a_vpp)) != 0) {
vput(ap->a_dvp);
return error;
}
MARK_VNODE(ap->a_dvp);
error = ufs_mkdir(ap);
UNMARK_VNODE(ap->a_dvp);
if (*(ap->a_vpp))
UNMARK_VNODE(*(ap->a_vpp));
SET_ENDOP(VTOI(ap->a_dvp)->i_lfs,ap->a_dvp,"mkdir");
SET_ENDOP_CREATE_AP(ap, "mkdir");
return (error);
}
@ -658,7 +702,7 @@ lfs_remove(void *v)
dvp = ap->a_dvp;
vp = ap->a_vp;
if ((error = SET_DIROP2(dvp, vp)) != 0) {
if ((error = SET_DIROP_REMOVE(dvp, vp)) != 0) {
if (dvp == vp)
vrele(vp);
else
@ -666,13 +710,8 @@ lfs_remove(void *v)
vput(dvp);
return error;
}
MARK_VNODE(dvp);
MARK_VNODE(vp);
error = ufs_remove(ap);
UNMARK_VNODE(dvp);
UNMARK_VNODE(vp);
SET_ENDOP2(VTOI(dvp)->i_lfs, dvp, vp, "remove");
SET_ENDOP_REMOVE(VTOI(dvp)->i_lfs, dvp, vp, "remove");
return (error);
}
@ -689,20 +728,15 @@ lfs_rmdir(void *v)
int error;
vp = ap->a_vp;
if ((error = SET_DIROP2(ap->a_dvp, ap->a_vp)) != 0) {
if ((error = SET_DIROP_REMOVE(ap->a_dvp, ap->a_vp)) != 0) {
vrele(ap->a_dvp);
if (ap->a_vp != ap->a_dvp)
VOP_UNLOCK(ap->a_dvp, 0);
vput(vp);
return error;
}
MARK_VNODE(ap->a_dvp);
MARK_VNODE(vp);
error = ufs_rmdir(ap);
UNMARK_VNODE(ap->a_dvp);
UNMARK_VNODE(vp);
SET_ENDOP2(VTOI(ap->a_dvp)->i_lfs, ap->a_dvp, vp, "rmdir");
SET_ENDOP_REMOVE(VTOI(ap->a_dvp)->i_lfs, ap->a_dvp, vp, "rmdir");
return (error);
}
@ -715,15 +749,14 @@ lfs_link(void *v)
struct componentname *a_cnp;
} */ *ap = v;
int error;
struct vnode **vpp = NULL;
if ((error = SET_DIROP(ap->a_dvp)) != 0) {
if ((error = SET_DIROP_CREATE(ap->a_dvp, vpp)) != 0) {
vput(ap->a_dvp);
return error;
}
MARK_VNODE(ap->a_dvp);
error = ufs_link(ap);
UNMARK_VNODE(ap->a_dvp);
SET_ENDOP(VTOI(ap->a_dvp)->i_lfs,ap->a_dvp,"link");
SET_ENDOP_CREATE(VTOI(ap->a_dvp)->i_lfs, ap->a_dvp, vpp, "link");
return (error);
}
@ -772,7 +805,7 @@ lfs_rename(void *v)
* would leave us with an unaccounted-for number of live dirops.
*
* Inline the relevant section of ufs_rename here, *before*
* calling SET_DIROP2.
* calling SET_DIROP_REMOVE.
*/
if (tvp && ((VTOI(tvp)->i_flags & (IMMUTABLE | APPEND)) ||
(VTOI(tdvp)->i_flags & APPEND))) {
@ -802,23 +835,15 @@ lfs_rename(void *v)
return (VOP_REMOVE(fdvp, fvp, fcnp));
}
if ((error = SET_DIROP2(tdvp, tvp)) != 0)
if ((error = SET_DIROP_REMOVE(tdvp, tvp)) != 0)
goto errout;
MARK_VNODE(fdvp);
MARK_VNODE(tdvp);
MARK_VNODE(fvp);
if (tvp) {
MARK_VNODE(tvp);
}
error = ufs_rename(ap);
UNMARK_VNODE(fdvp);
UNMARK_VNODE(tdvp);
UNMARK_VNODE(fvp);
if (tvp) {
UNMARK_VNODE(tvp);
}
SET_ENDOP2(fs, tdvp, tvp, "rename");
SET_ENDOP_REMOVE(fs, tdvp, tvp, "rename");
return (error);
errout:
@ -1402,10 +1427,15 @@ check_dirty(struct lfs *fs, struct vnode *vp,
while (by_list || soff < MIN(blkeof, endoffset)) {
if (by_list) {
/*
* find the first page in a block.
* Find the first page in a block. Skip
* blocks outside our area of interest or beyond
* the end of file.
*/
if (pages_per_block > 1) {
while (curpg && (curpg->offset & fs->lfs_bmask))
while (curpg &&
((curpg->offset & fs->lfs_bmask) ||
curpg->offset >= vp->v_size ||
curpg->offset >= endoffset))
curpg = TAILQ_NEXT(curpg, listq);
}
if (curpg == NULL)

View File

@ -1,4 +1,4 @@
/* $NetBSD: ufs_vnops.c,v 1.126 2005/02/26 22:32:20 perry Exp $ */
/* $NetBSD: ufs_vnops.c,v 1.127 2005/03/23 00:12:51 perseant Exp $ */
/*
* Copyright (c) 1982, 1986, 1989, 1993, 1995
@ -37,7 +37,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: ufs_vnops.c,v 1.126 2005/02/26 22:32:20 perry Exp $");
__KERNEL_RCSID(0, "$NetBSD: ufs_vnops.c,v 1.127 2005/03/23 00:12:51 perseant Exp $");
#if defined(_KERNEL_OPT)
#include "opt_quota.h"
@ -1259,8 +1259,9 @@ ufs_mkdir(void *v)
* but not have it entered in the parent directory. The entry is
* made later after writing "." and ".." entries.
*/
if ((error = VOP_VALLOC(dvp, dmode, cnp->cn_cred, &tvp)) != 0)
if ((error = VOP_VALLOC(dvp, dmode, cnp->cn_cred, ap->a_vpp)) != 0)
goto out;
tvp = *ap->a_vpp;
ip = VTOI(tvp);
ip->i_uid = cnp->cn_cred->cr_uid;
DIP_ASSIGN(ip, uid, ip->i_uid);
@ -1375,7 +1376,6 @@ ufs_mkdir(void *v)
bad:
if (error == 0) {
VN_KNOTE(dvp, NOTE_WRITE | NOTE_LINK);
*ap->a_vpp = tvp;
} else {
dp->i_ffs_effnlink--;
dp->i_nlink--;
@ -2065,15 +2065,15 @@ ufs_makeinode(int mode, struct vnode *dvp, struct vnode **vpp,
if ((cnp->cn_flags & HASBUF) == 0)
panic("ufs_makeinode: no name");
#endif
*vpp = NULL;
if ((mode & IFMT) == 0)
mode |= IFREG;
if ((error = VOP_VALLOC(dvp, mode, cnp->cn_cred, &tvp)) != 0) {
if ((error = VOP_VALLOC(dvp, mode, cnp->cn_cred, vpp)) != 0) {
PNBUF_PUT(cnp->cn_pnbuf);
vput(dvp);
return (error);
}
tvp = *vpp;
ip = VTOI(tvp);
ip->i_gid = pdir->i_gid;
DIP_ASSIGN(ip, gid, ip->i_gid);