Postpone the segment accounting changes coming from truncation until the

inode that makes those changes valid is either written to disk by
lfs_writeinode() or discarded by lfs_vfree().

A couple of locking fixes are also included as well.
This commit is contained in:
perseant 2006-04-30 21:19:42 +00:00
parent 5cd433e986
commit 481da54fc1
7 changed files with 151 additions and 38 deletions

View File

@ -1,4 +1,4 @@
/* $NetBSD: lfs.h,v 1.103 2006/04/17 20:02:34 perseant Exp $ */
/* $NetBSD: lfs.h,v 1.104 2006/04/30 21:19:42 perseant Exp $ */
/*-
* Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc.
@ -700,6 +700,16 @@ struct dlfs {
/* Type used for the inode bitmap */
typedef u_int32_t lfs_bm_t;
/*
* Linked list of segments whose byte count needs updating following a
* file truncation.
*/
struct segdelta {
long segnum;
size_t num;
LIST_ENTRY(segdelta) list;
};
/*
* In-memory super block.
*/
@ -818,6 +828,7 @@ struct lfs {
int lfs_pages; /* dirty pages blaming this fs */
lfs_bm_t *lfs_ino_bitmap; /* Inuse inodes bitmap */
int lfs_nowrap; /* Suspend log wrap */
LIST_HEAD(, segdelta) lfs_segdhd; /* List of pending trunc accounting events */
};
/* NINDIR is the number of indirects in a file system block. */
@ -972,6 +983,7 @@ struct lfs_inode_ext {
#ifdef _KERNEL
SPLAY_HEAD(lfs_splay, lbnentry) lfs_lbtree; /* Tree of balloc'd lbns */
int lfs_nbtree; /* Size of tree */
LIST_HEAD(, segdelta) lfs_segdhd;
#endif
};
#define i_lfs_osize inode_ext.lfs->lfs_osize
@ -983,6 +995,7 @@ struct lfs_inode_ext {
#define i_lfs_hiblk inode_ext.lfs->lfs_hiblk
#define i_lfs_lbtree inode_ext.lfs->lfs_lbtree
#define i_lfs_nbtree inode_ext.lfs->lfs_nbtree
#define i_lfs_segdhd inode_ext.lfs->lfs_segdhd
/*
* Macros for determining free space on the disk, with the variable metadata

View File

@ -1,4 +1,4 @@
/* $NetBSD: lfs_alloc.c,v 1.89 2006/04/22 00:12:45 perseant Exp $ */
/* $NetBSD: lfs_alloc.c,v 1.90 2006/04/30 21:19:42 perseant Exp $ */
/*-
* Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc.
@ -67,7 +67,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: lfs_alloc.c,v 1.89 2006/04/22 00:12:45 perseant Exp $");
__KERNEL_RCSID(0, "$NetBSD: lfs_alloc.c,v 1.90 2006/04/30 21:19:42 perseant Exp $");
#if defined(_KERNEL_OPT)
#include "opt_quota.h"
@ -588,6 +588,26 @@ lfs_vfree(struct vnode *vp, ino_t ino, int mode)
simple_unlock(&fs->lfs_interlock);
wakeup(&lfs_dirvcount);
lfs_vunref(vp);
/*
* If this inode is not going to be written any more, any
* segment accounting left over from its truncation needs
* to occur at the end of the next dirops flush. Attach
* them to the fs-wide list for that purpose.
*/
if (LIST_FIRST(&ip->i_lfs_segdhd) != NULL) {
struct segdelta *sd;
while((sd = LIST_FIRST(&ip->i_lfs_segdhd)) != NULL) {
LIST_REMOVE(sd, list);
LIST_INSERT_HEAD(&fs->lfs_segdhd, sd, list);
}
}
} else {
/*
* If it's not a dirop, we can finalize right away.
*/
lfs_finalize_ino_seguse(fs, ip);
}
LFS_CLR_UINO(ip, IN_ACCESSED|IN_CLEANING|IN_MODIFIED);

View File

@ -1,4 +1,4 @@
/* $NetBSD: lfs_extern.h,v 1.79 2006/04/23 14:15:12 yamt Exp $ */
/* $NetBSD: lfs_extern.h,v 1.80 2006/04/30 21:19:42 perseant Exp $ */
/*-
* Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc.
@ -170,6 +170,8 @@ int lfs_update(struct vnode *, const struct timespec *, const struct timespec *,
int);
int lfs_truncate(struct vnode *, off_t, int, struct ucred *, struct lwp *);
struct ufs1_dinode *lfs_ifind(struct lfs *, ino_t, struct buf *);
void lfs_finalize_ino_seguse(struct lfs *, struct inode *);
void lfs_finalize_fs_seguse(struct lfs *);
/* lfs_segment.c */
void lfs_imtime(struct lfs *);

View File

@ -1,4 +1,4 @@
/* $NetBSD: lfs_inode.c,v 1.102 2006/04/19 00:22:15 perseant Exp $ */
/* $NetBSD: lfs_inode.c,v 1.103 2006/04/30 21:19:42 perseant Exp $ */
/*-
* Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc.
@ -67,7 +67,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: lfs_inode.c,v 1.102 2006/04/19 00:22:15 perseant Exp $");
__KERNEL_RCSID(0, "$NetBSD: lfs_inode.c,v 1.103 2006/04/30 21:19:42 perseant Exp $");
#if defined(_KERNEL_OPT)
#include "opt_quota.h"
@ -76,6 +76,7 @@ __KERNEL_RCSID(0, "$NetBSD: lfs_inode.c,v 1.102 2006/04/19 00:22:15 perseant Exp
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/mount.h>
#include <sys/malloc.h>
#include <sys/proc.h>
#include <sys/file.h>
#include <sys/buf.h>
@ -92,11 +93,11 @@ __KERNEL_RCSID(0, "$NetBSD: lfs_inode.c,v 1.102 2006/04/19 00:22:15 perseant Exp
#include <ufs/lfs/lfs.h>
#include <ufs/lfs/lfs_extern.h>
static int lfs_update_seguse(struct lfs *, long, size_t);
static int lfs_update_seguse(struct lfs *, struct inode *ip, long, size_t);
static int lfs_indirtrunc (struct inode *, daddr_t, daddr_t,
daddr_t, int, long *, long *, long *, size_t *,
struct lwp *);
static int lfs_blkfree (struct lfs *, daddr_t, size_t, long *, size_t *);
static int lfs_blkfree (struct lfs *, struct inode *, daddr_t, size_t, long *, size_t *);
static int lfs_vtruncbuf(struct vnode *, daddr_t, int, int);
/* Search a block for a specific dinode. */
@ -475,7 +476,8 @@ lfs_truncate(struct vnode *ovp, off_t length, int ioflag,
real_released += nblocks;
blocksreleased += nblocks;
oip->i_ffs1_ib[level] = 0;
lfs_blkfree(fs, bn, fs->lfs_bsize, &lastseg, &bc);
lfs_blkfree(fs, oip, bn, fs->lfs_bsize,
&lastseg, &bc);
lfs_deregister_block(ovp, bn);
}
}
@ -502,7 +504,7 @@ lfs_truncate(struct vnode *ovp, off_t length, int ioflag,
obsize = 0;
blocksreleased += btofsb(fs, bsize);
oip->i_ffs1_db[i] = 0;
lfs_blkfree(fs, bn, obsize, &lastseg, &bc);
lfs_blkfree(fs, oip, bn, obsize, &lastseg, &bc);
lfs_deregister_block(ovp, bn);
}
if (lastblock < 0)
@ -545,7 +547,7 @@ lfs_truncate(struct vnode *ovp, off_t length, int ioflag,
done:
/* Finish segment accounting corrections */
lfs_update_seguse(fs, lastseg, bc);
lfs_update_seguse(fs, oip, lastseg, bc);
#ifdef DIAGNOSTIC
for (level = SINGLE; level <= TRIPLE; level++)
if ((newblks[NDADDR + level] == 0) !=
@ -604,8 +606,8 @@ done:
/* Update segment and avail usage information when removing a block. */
static int
lfs_blkfree(struct lfs *fs, daddr_t daddr, size_t bsize, long *lastseg,
size_t *num)
lfs_blkfree(struct lfs *fs, struct inode *ip, daddr_t daddr,
size_t bsize, long *lastseg, size_t *num)
{
long seg;
int error = 0;
@ -614,7 +616,7 @@ lfs_blkfree(struct lfs *fs, daddr_t daddr, size_t bsize, long *lastseg,
bsize = fragroundup(fs, bsize);
if (daddr > 0) {
if (*lastseg != (seg = dtosn(fs, daddr))) {
error = lfs_update_seguse(fs, *lastseg, *num);
error = lfs_update_seguse(fs, ip, *lastseg, *num);
*num = bsize;
*lastseg = seg;
} else
@ -626,28 +628,70 @@ lfs_blkfree(struct lfs *fs, daddr_t daddr, size_t bsize, long *lastseg,
/* Finish the accounting updates for a segment. */
static int
lfs_update_seguse(struct lfs *fs, long lastseg, size_t num)
lfs_update_seguse(struct lfs *fs, struct inode *ip, long lastseg, size_t num)
{
SEGUSE *sup;
struct buf *bp;
struct segdelta *sd;
struct vnode *vp;
ASSERT_SEGLOCK(fs);
if (lastseg < 0 || num == 0)
return 0;
LFS_SEGENTRY(sup, fs, lastseg, bp);
if (num > sup->su_nbytes) {
printf("lfs_truncate: segment %ld short by %ld\n",
lastseg, (long)num - sup->su_nbytes);
panic("lfs_truncate: negative bytes");
sup->su_nbytes = num;
vp = ITOV(ip);
LIST_FOREACH(sd, &ip->i_lfs_segdhd, list)
if (sd->segnum == lastseg)
break;
if (sd == NULL) {
sd = malloc(sizeof(*sd), M_SEGMENT, M_WAITOK);
sd->segnum = lastseg;
sd->num = 0;
LIST_INSERT_HEAD(&ip->i_lfs_segdhd, sd, list);
}
sup->su_nbytes -= num;
LFS_WRITESEGENTRY(sup, fs, lastseg, bp);
sd->num += num;
return 0;
}
static void
lfs_finalize_seguse(struct lfs *fs, void *v)
{
SEGUSE *sup;
struct buf *bp;
struct segdelta *sd;
LIST_HEAD(, segdelta) *hd = v;
ASSERT_SEGLOCK(fs);
while((sd = LIST_FIRST(hd)) != NULL) {
LIST_REMOVE(sd, list);
LFS_SEGENTRY(sup, fs, sd->segnum, bp);
if (sd->num > sup->su_nbytes) {
printf("lfs_finalize_seguse: segment %ld short by %ld\n",
sd->segnum, (long)(sd->num - sup->su_nbytes));
panic("lfs_finalize_seguse: negative bytes");
sup->su_nbytes = sd->num;
}
sup->su_nbytes -= sd->num;
LFS_WRITESEGENTRY(sup, fs, sd->segnum, bp);
free(sd, M_SEGMENT);
}
}
/* Finish the accounting updates for a segment. */
void
lfs_finalize_ino_seguse(struct lfs *fs, struct inode *ip)
{
ASSERT_SEGLOCK(fs);
lfs_finalize_seguse(fs, &ip->i_lfs_segdhd);
}
/* Finish the accounting updates for a segment. */
void
lfs_finalize_fs_seguse(struct lfs *fs)
{
ASSERT_SEGLOCK(fs);
lfs_finalize_seguse(fs, &fs->lfs_segdhd);
}
/*
* Release blocks associated with the inode ip and stored in the indirect
* block bn. Blocks are free'd in LIFO order up to (but not including)
@ -746,7 +790,7 @@ lfs_indirtrunc(struct inode *ip, daddr_t lbn, daddr_t dbn,
blocksreleased += blkcount;
real_released += rblkcount;
}
lfs_blkfree(fs, nb, fs->lfs_bsize, lastsegp, bcp);
lfs_blkfree(fs, ip, nb, fs->lfs_bsize, lastsegp, bcp);
if (bap[i] > 0)
real_released += nblocks;
blocksreleased += nblocks;

View File

@ -1,4 +1,4 @@
/* $NetBSD: lfs_segment.c,v 1.175 2006/04/22 00:10:54 perseant Exp $ */
/* $NetBSD: lfs_segment.c,v 1.176 2006/04/30 21:19:42 perseant Exp $ */
/*-
* Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc.
@ -67,7 +67,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: lfs_segment.c,v 1.175 2006/04/22 00:10:54 perseant Exp $");
__KERNEL_RCSID(0, "$NetBSD: lfs_segment.c,v 1.176 2006/04/30 21:19:42 perseant Exp $");
#ifdef DEBUG
# define vndebug(vp, str) do { \
@ -370,6 +370,8 @@ lfs_vflush(struct vnode *vp)
* cleaner to run; but we're
* still not done with this vnode.
*/
lfs_writeinode(fs, sp, ip);
LFS_SET_UINO(ip, IN_MODIFIED);
lfs_writeseg(fs, sp);
lfs_segunlock(fs);
lfs_segunlock_relock(fs);
@ -534,6 +536,7 @@ lfs_writevnodes(struct lfs *fs, struct mount *mp, struct segment *sp, int op)
* over after the cleaner has
* had a chance to run.
*/
lfs_writeinode(fs, sp, ip);
lfs_writeseg(fs, sp);
if (!VPISEMPTY(vp) &&
!WRITEINPROG(vp) &&
@ -625,6 +628,7 @@ lfs_segwrite(struct mount *mp, int flags)
if (um_error == 0)
um_error = error;
((SEGSUM *)(sp->segsum))->ss_flags &= ~(SS_CONT);
lfs_finalize_fs_seguse(fs);
}
if (do_ckp && um_error) {
lfs_segunlock_relock(fs);
@ -957,6 +961,9 @@ lfs_writeinode(struct lfs *fs, struct segment *sp, struct inode *ip)
cdp = ((struct ufs1_dinode *)bp->b_data) + (sp->ninodes % INOPB(fs));
*cdp = *ip->i_din.ffs1_din;
/* We can finish the segment accounting for truncations now */
lfs_finalize_ino_seguse(fs, ip);
/*
* If we are cleaning, ensure that we don't write UNWRITTEN disk
* addresses to disk; possibly change the on-disk record of

View File

@ -1,4 +1,4 @@
/* $NetBSD: lfs_vfsops.c,v 1.205 2006/04/18 23:40:47 perseant Exp $ */
/* $NetBSD: lfs_vfsops.c,v 1.206 2006/04/30 21:19:42 perseant Exp $ */
/*-
* Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc.
@ -67,7 +67,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: lfs_vfsops.c,v 1.205 2006/04/18 23:40:47 perseant Exp $");
__KERNEL_RCSID(0, "$NetBSD: lfs_vfsops.c,v 1.206 2006/04/30 21:19:42 perseant Exp $");
#if defined(_KERNEL_OPT)
#include "opt_quota.h"
@ -1149,6 +1149,8 @@ lfs_mountfs(struct vnode *devvp, struct mount *mp, struct lwp *l)
TAILQ_INIT(&fs->lfs_dchainhd);
/* and paging tailq */
TAILQ_INIT(&fs->lfs_pchainhd);
/* and delayed segment accounting for truncation list */
LIST_INIT(&fs->lfs_segdhd);
/*
* We use the ifile vnode for almost every operation. Instead of
@ -1513,9 +1515,25 @@ lfs_sync(struct mount *mp, int waitfor, struct ucred *cred, struct lwp *l)
fs = VFSTOUFS(mp)->um_lfs;
if (fs->lfs_ronly)
return 0;
/* Snapshots should not hose the syncer */
/*
* XXX Sync can block here anyway, since we don't have a very
* XXX good idea of how much data is pending. If it's more
* XXX than a segment and lfs_nextseg is close to the end of
* XXX the log, we'll likely block.
*/
simple_lock(&fs->lfs_interlock);
if (fs->lfs_nowrap && fs->lfs_nextseg < fs->lfs_curseg) {
simple_unlock(&fs->lfs_interlock);
return 0;
}
simple_unlock(&fs->lfs_interlock);
lfs_writer_enter(fs, "lfs_dirops");
/* All syncs must be checkpoints until roll-forward is implemented. */
DLOG((DLOG_FLUSH, "lfs_sync at 0x%x\n", fs->lfs_offset));
error = lfs_segwrite(mp, SEGM_CKP | (waitfor ? SEGM_SYNC : 0));
lfs_writer_leave(fs);
#ifdef QUOTA

View File

@ -1,4 +1,4 @@
/* $NetBSD: lfs_vnops.c,v 1.169 2006/04/18 21:41:20 perseant Exp $ */
/* $NetBSD: lfs_vnops.c,v 1.170 2006/04/30 21:19:42 perseant Exp $ */
/*-
* Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc.
@ -67,7 +67,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: lfs_vnops.c,v 1.169 2006/04/18 21:41:20 perseant Exp $");
__KERNEL_RCSID(0, "$NetBSD: lfs_vnops.c,v 1.170 2006/04/30 21:19:42 perseant Exp $");
#include <sys/param.h>
#include <sys/systm.h>
@ -700,7 +700,7 @@ lfs_remove(void *v)
return error;
}
error = ufs_remove(ap);
SET_ENDOP_REMOVE(VTOI(dvp)->i_lfs, dvp, vp, "remove");
SET_ENDOP_REMOVE(VTOI(dvp)->i_lfs, dvp, ap->a_vp, "remove");
return (error);
}
@ -725,7 +725,7 @@ lfs_rmdir(void *v)
return error;
}
error = ufs_rmdir(ap);
SET_ENDOP_REMOVE(VTOI(ap->a_dvp)->i_lfs, ap->a_dvp, vp, "rmdir");
SET_ENDOP_REMOVE(VTOI(ap->a_dvp)->i_lfs, ap->a_dvp, ap->a_vp, "rmdir");
return (error);
}
@ -1105,10 +1105,18 @@ lfs_strategy(void *v)
"lfs_strategy: sleeping on ino %d lbn %"
PRId64 "\n", ip->i_number, bp->b_lblkno));
simple_lock(&fs->lfs_interlock);
if (fs->lfs_seglock) {
if (LFS_SEGLOCK_HELD(fs) && fs->lfs_iocount) {
/* Cleaner can't wait for itself */
ltsleep(&fs->lfs_iocount,
(PRIBIO + 1) | PNORELOCK,
"clean2", 0,
&fs->lfs_interlock);
slept = 1;
break;
} else if (fs->lfs_seglock) {
ltsleep(&fs->lfs_seglock,
(PRIBIO + 1) | PNORELOCK,
"lfs_strategy", 0,
"clean1", 0,
&fs->lfs_interlock);
slept = 1;
break;
@ -1216,6 +1224,7 @@ lfs_flush_dirops(struct lfs *fs)
simple_unlock(&fs->lfs_interlock);
/* We've written all the dirops there are */
((SEGSUM *)(sp->segsum))->ss_flags &= ~(SS_CONT);
lfs_finalize_fs_seguse(fs);
(void) lfs_writeseg(fs, sp);
lfs_segunlock(fs);
}
@ -1300,12 +1309,12 @@ lfs_flush_pchain(struct lfs *fs)
VOP_UNLOCK(vp, 0);
lfs_vunref(vp);
simple_lock(&fs->lfs_interlock);
if (error == EAGAIN) {
lfs_writeseg(fs, sp);
simple_lock(&fs->lfs_interlock);
break;
}
simple_lock(&fs->lfs_interlock);
}
simple_unlock(&fs->lfs_interlock);
(void) lfs_writeseg(fs, sp);