If LFS_DO_ROLLFORWARD is defined, roll forward from the older checkpoint

on mount, through the newer checkpoint and on through any newer
partial-segments that may have been written but not checkpointed because
of an intervening crash.

LFS_DO_ROLLFORWARD is not defined by default.
This commit is contained in:
perseant 2000-11-27 03:33:57 +00:00
parent 25491e6a22
commit 0055236dda
6 changed files with 775 additions and 88 deletions

View File

@ -1,4 +1,4 @@
/* $NetBSD: lfs_alloc.c,v 1.43 2000/09/09 04:49:54 perseant Exp $ */
/* $NetBSD: lfs_alloc.c,v 1.44 2000/11/27 03:33:57 perseant Exp $ */
/*-
* Copyright (c) 1999, 2000 The NetBSD Foundation, Inc.
@ -95,6 +95,166 @@
extern int lfs_dirvcount;
extern struct lock ufs_hashlock;
static int extend_ifile(struct lfs *, struct ucred *);
static int lfs_ialloc(struct lfs *, struct vnode *, ino_t, int, struct vnode **);
/*
* Allocate a particular inode with a particular version number, freeing
* any previous versions of this inode that may have gone before.
* Used by the roll-forward code.
*
* XXX this function does not have appropriate locking to be used on a live fs;
* XXX but something similar could probably be used for an "undelete" call.
*/
int
lfs_rf_valloc(struct lfs *fs, ino_t ino, int version, struct proc *p,
struct vnode **vpp)
{
IFILE *ifp;
struct buf *bp;
struct vnode *vp;
struct inode *ip;
ino_t tino, oldnext;
int error;
/*
* First, just try a vget. If the version number is the one we want,
* we don't have to do anything else. If the version number is wrong,
* take appropriate action.
*/
error = VFS_VGET(fs->lfs_ivnode->v_mount, ino, &vp);
if (error == 0) {
/* printf("lfs_rf_valloc[1]: ino %d vp %p\n", ino, vp); */
*vpp = vp;
ip = VTOI(vp);
if (ip->i_ffs_gen == version)
return 0;
else if (ip->i_ffs_gen < version) {
VOP_TRUNCATE(vp, (off_t)0, 0, NOCRED, p);
ip->i_ffs_gen = version;
LFS_SET_UINO(ip, IN_CHANGE | IN_MODIFIED | IN_UPDATE);
return 0;
} else {
/* printf("ino %d: asked for version %d but got %d\n",
ino, version, ip->i_ffs_gen); */
vput(vp);
*vpp = NULLVP;
return EEXIST;
}
}
/*
* The inode is not in use. Find it on the free list.
*/
/* If the Ifile is too short to contain this inum, extend it */
while (VTOI(fs->lfs_ivnode)->i_ffs_size <=
dbtob(fsbtodb(fs, ino / fs->lfs_ifpb + fs->lfs_cleansz +
fs->lfs_segtabsz))) {
extend_ifile(fs, NOCRED);
}
LFS_IENTRY(ifp, fs, ino, bp);
oldnext = ifp->if_nextfree;
ifp->if_version = version;
brelse(bp);
if (ino == fs->lfs_free) {
fs->lfs_free = oldnext;
} else {
tino = fs->lfs_free;
while(1) {
LFS_IENTRY(ifp, fs, tino, bp);
if (ifp->if_nextfree == ino ||
ifp->if_nextfree == LFS_UNUSED_INUM)
break;
tino = ifp->if_nextfree;
brelse(bp);
}
if (ifp->if_nextfree == LFS_UNUSED_INUM) {
brelse(bp);
return ENOENT;
}
ifp->if_nextfree = oldnext;
VOP_BWRITE(bp);
}
error = lfs_ialloc(fs, fs->lfs_ivnode, ino, version, &vp);
if (error == 0) {
/*
* Make it VREG so we can put blocks on it. We will change
* this later if it turns out to be some other kind of file.
*/
ip = VTOI(vp);
ip->i_ffs_mode = IFREG;
ip->i_ffs_nlink = 1;
ip->i_ffs_effnlink = 1;
ufs_vinit(vp->v_mount, lfs_specop_p, lfs_fifoop_p, &vp);
ip = VTOI(vp);
/* printf("lfs_rf_valloc: ino %d vp %p\n", ino, vp); */
/* The dirop-nature of this vnode is past */
(void)lfs_vunref(vp);
--lfs_dirvcount;
vp->v_flag &= ~VDIROP;
--fs->lfs_nadirop;
ip->i_flag &= ~IN_ADIROP;
}
*vpp = vp;
return error;
}
static int
extend_ifile(struct lfs *fs, struct ucred *cred)
{
struct vnode *vp;
struct inode *ip;
IFILE *ifp;
struct buf *bp;
int error;
ufs_daddr_t i, blkno, max;
ino_t oldlast;
vp = fs->lfs_ivnode;
(void)lfs_vref(vp);
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
ip = VTOI(vp);
blkno = lblkno(fs, ip->i_ffs_size);
if ((error = VOP_BALLOC(vp, ip->i_ffs_size, fs->lfs_bsize, cred, 0,
&bp)) != 0) {
VOP_UNLOCK(vp, 0);
lfs_vunref(vp);
return (error);
}
ip->i_ffs_size += fs->lfs_bsize;
uvm_vnp_setsize(vp, ip->i_ffs_size);
(void)uvm_vnp_uncache(vp);
VOP_UNLOCK(vp, 0);
i = (blkno - fs->lfs_segtabsz - fs->lfs_cleansz) *
fs->lfs_ifpb;
oldlast = fs->lfs_free;
fs->lfs_free = i;
#ifdef DIAGNOSTIC
if(fs->lfs_free == LFS_UNUSED_INUM)
panic("inode 0 allocated [2]");
#endif /* DIAGNOSTIC */
max = i + fs->lfs_ifpb;
/* printf("extend ifile for ino %d--%d\n", i, max); */
for (ifp = (struct ifile *)bp->b_data; i < max; ++ifp) {
ifp->if_version = 1;
ifp->if_daddr = LFS_UNUSED_DADDR;
ifp->if_nextfree = ++i;
}
ifp--;
ifp->if_nextfree = oldlast;
(void) VOP_BWRITE(bp); /* Ifile */
lfs_vunref(vp);
return 0;
}
/* Allocate a new inode. */
/* ARGSUSED */
/* VOP_BWRITE 2i times */
@ -111,14 +271,9 @@ lfs_valloc(v)
struct lfs *fs;
struct buf *bp;
struct ifile *ifp;
struct inode *ip;
struct vnode *vp;
ufs_daddr_t blkno;
ino_t new_ino;
u_long i, max;
int error;
int new_gen;
extern int lfs_dirvcount;
fs = VTOI(ap->a_pvp)->i_lfs;
if (fs->lfs_ronly)
@ -132,7 +287,7 @@ lfs_valloc(v)
* written to disk.
*
* XXX this sucks. We should instead encode the head of the free
* list into the CLEANERINFO block of the Ifile.
* list into the CLEANERINFO block of the Ifile. [XXX v2]
*/
lfs_seglock(fs, SEGM_PROT);
@ -147,7 +302,7 @@ lfs_valloc(v)
}
#endif /* DIAGNOSTIC */
#ifdef ALLOCPRINT
printf("lfs_ialloc: allocate inode %d\n", new_ino);
printf("lfs_valloc: allocate inode %d\n", new_ino);
#endif
/*
@ -156,52 +311,18 @@ lfs_valloc(v)
*/
LFS_IENTRY(ifp, fs, new_ino, bp);
if (ifp->if_daddr != LFS_UNUSED_DADDR)
panic("lfs_ialloc: inuse inode %d on the free list", new_ino);
panic("lfs_valloc: inuse inode %d on the free list", new_ino);
fs->lfs_free = ifp->if_nextfree;
new_gen = ifp->if_version; /* version was updated by vfree */
#ifdef LFS_DEBUG_NEXTFREE
ifp->if_nextfree = 0;
(void) VOP_BWRITE(bp); /* Ifile */
#else
brelse(bp);
#endif
/* Extend IFILE so that the next lfs_valloc will succeed. */
if (fs->lfs_free == LFS_UNUSED_INUM) {
vp = fs->lfs_ivnode;
(void)lfs_vref(vp);
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
ip = VTOI(vp);
blkno = lblkno(fs, ip->i_ffs_size);
if ((error = VOP_BALLOC(vp, ip->i_ffs_size, fs->lfs_bsize,
ap->a_cred, 0, &bp)) != 0) {
VOP_UNLOCK(vp, 0);
lfs_segunlock(fs);
if ((error = extend_ifile(fs, ap->a_cred)) != 0) {
fs->lfs_free = new_ino;
return (error);
lfs_segunlock(fs);
return error;
}
ip->i_ffs_size += fs->lfs_bsize;
uvm_vnp_setsize(vp, ip->i_ffs_size);
(void)uvm_vnp_uncache(vp);
VOP_UNLOCK(vp, 0);
i = (blkno - fs->lfs_segtabsz - fs->lfs_cleansz) *
fs->lfs_ifpb;
fs->lfs_free = i;
#ifdef DIAGNOSTIC
if(fs->lfs_free == LFS_UNUSED_INUM)
panic("inode 0 allocated [2]");
#endif /* DIAGNOSTIC */
max = i + fs->lfs_ifpb;
for (ifp = (struct ifile *)bp->b_data; i < max; ++ifp) {
ifp->if_version = 1;
ifp->if_daddr = LFS_UNUSED_DADDR;
ifp->if_nextfree = ++i;
}
ifp--;
ifp->if_nextfree = LFS_UNUSED_INUM;
(void) VOP_BWRITE(bp); /* Ifile */
lfs_vunref(vp);
}
#ifdef DIAGNOSTIC
if(fs->lfs_free == LFS_UNUSED_INUM)
@ -210,13 +331,27 @@ lfs_valloc(v)
lfs_segunlock(fs);
if ((error = getnewvnode(VT_LFS, ap->a_pvp->v_mount,
lfs_vnodeop_p, &vp)) != 0)
return lfs_ialloc(fs, ap->a_pvp, new_ino, new_gen, ap->a_vpp);
}
static int
lfs_ialloc(struct lfs *fs, struct vnode *pvp, ino_t new_ino, int new_gen,
struct vnode **vpp)
{
struct inode *ip;
struct vnode *vp;
IFILE *ifp;
struct buf *bp;
int error;
error = getnewvnode(VT_LFS, pvp->v_mount, lfs_vnodeop_p, &vp);
/* printf("lfs_ialloc: ino %d vp %p error %d\n", new_ino, vp, error);*/
if (error)
goto errout;
lockmgr(&ufs_hashlock, LK_EXCLUSIVE, 0);
/* Create an inode to associate with the vnode. */
lfs_vcreate(ap->a_pvp->v_mount, new_ino, vp);
lfs_vcreate(pvp->v_mount, new_ino, vp);
ip = VTOI(vp);
/* Zero out the direct and indirect block addresses. */
@ -224,19 +359,22 @@ lfs_valloc(v)
ip->i_din.ffs_din.di_inumber = new_ino;
/* Set a new generation number for this inode. */
ip->i_ffs_gen = new_gen;
if (new_gen)
ip->i_ffs_gen = new_gen;
/* Insert into the inode hash table. */
ufs_ihashins(ip);
lockmgr(&ufs_hashlock, LK_RELEASE, 0);
error = ufs_vinit(vp->v_mount, lfs_specop_p, lfs_fifoop_p, &vp);
ip = VTOI(vp);
if (error) {
vput(vp);
goto errout;
}
/* printf("lfs_ialloc[2]: ino %d vp %p\n", new_ino, vp);*/
*ap->a_vpp = vp;
*vpp = vp;
#if 1
if(!(vp->v_flag & VDIROP)) {
(void)lfs_vref(vp);
@ -264,6 +402,7 @@ lfs_valloc(v)
fs->lfs_free = new_ino;
(void) VOP_BWRITE(bp); /* Ifile */
*vpp = NULLVP;
return (error);
}
@ -302,7 +441,7 @@ lfs_vcreate(mp, ino, vp)
ip->i_ffs_blocks = 0;
ip->i_lfs_effnblks = 0;
ip->i_flag = 0;
LFS_SET_UINO(ip, IN_MODIFIED);
LFS_SET_UINO(ip, IN_CHANGE | IN_MODIFIED);
}
/* Free an inode. */

View File

@ -1,4 +1,4 @@
/* $NetBSD: lfs_bio.c,v 1.32 2000/11/17 19:14:41 perseant Exp $ */
/* $NetBSD: lfs_bio.c,v 1.33 2000/11/27 03:33:57 perseant Exp $ */
/*-
* Copyright (c) 1999, 2000 The NetBSD Foundation, Inc.
@ -448,27 +448,30 @@ lfs_check(vp, blkno, flags)
wakeup(&fs->lfs_dirops);
}
while (locked_queue_count > LFS_WAIT_BUFS
|| locked_queue_bytes > LFS_WAIT_BYTES)
while (locked_queue_count > LFS_WAIT_BUFS
|| locked_queue_bytes > LFS_WAIT_BYTES)
{
if(lfs_dostats)
++lfs_stats.wait_exceeded;
#ifdef DEBUG_LFS
#ifdef DEBUG
printf("lfs_check: waiting: count=%d, bytes=%ld\n",
locked_queue_count, locked_queue_bytes);
#endif
error = tsleep(&locked_queue_count, PCATCH | PUSER,
"buffers", hz * LFS_BUFWAIT);
if (error != EWOULDBLOCK)
break;
/*
* lfs_flush might not flush all the buffers, if some of the
* inodes were locked. Try flushing again to keep us from
* blocking indefinitely.
* inodes were locked or if most of them were Ifile blocks
* and we weren't asked to checkpoint. Try flushing again
* to keep us from blocking indefinitely.
*/
if (locked_queue_count > LFS_MAX_BUFS ||
locked_queue_bytes > LFS_MAX_BYTES)
{
++fs->lfs_writer;
lfs_flush(fs, flags);
lfs_flush(fs, flags | SEGM_CKP);
if(--fs->lfs_writer==0)
wakeup(&fs->lfs_dirops);
}

View File

@ -1,4 +1,4 @@
/* $NetBSD: lfs_inode.c,v 1.47 2000/11/21 00:00:31 perseant Exp $ */
/* $NetBSD: lfs_inode.c,v 1.48 2000/11/27 03:33:57 perseant Exp $ */
/*-
* Copyright (c) 1999, 2000 The NetBSD Foundation, Inc.
@ -99,7 +99,8 @@ extern long locked_queue_bytes;
static int lfs_update_seguse(struct lfs *, long, size_t);
static int lfs_indirtrunc (struct inode *, ufs_daddr_t, ufs_daddr_t,
ufs_daddr_t, int, long *, long *, long *, size_t *);
ufs_daddr_t, int, long *, long *, long *, size_t *,
struct proc *);
static int lfs_blkfree (struct lfs *, daddr_t, size_t, long *, size_t *);
static int lfs_vtruncbuf(struct vnode *, daddr_t, int, int);
@ -392,7 +393,7 @@ lfs_truncate(v)
error = lfs_indirtrunc(oip, indir_lbn[level],
bn, lastiblock[level],
level, &count, &rcount,
&lastseg, &bc);
&lastseg, &bc, ap->a_p);
if (error)
allerror = error;
real_released += rcount;
@ -543,7 +544,7 @@ lfs_update_seguse(struct lfs *fs, long lastseg, size_t num)
static int
lfs_indirtrunc(struct inode *ip, ufs_daddr_t lbn, daddr_t dbn,
ufs_daddr_t lastbn, int level, long *countp,
long *rcountp, long *lastsegp, size_t *bcp)
long *rcountp, long *lastsegp, size_t *bcp, struct proc *p)
{
int i;
struct buf *bp;
@ -582,7 +583,7 @@ lfs_indirtrunc(struct inode *ip, ufs_daddr_t lbn, daddr_t dbn,
trace(TR_BREADHIT, pack(vp, fs->lfs_bsize), lbn);
} else {
trace(TR_BREADMISS, pack(vp, fs->lfs_bsize), lbn);
curproc->p_stats->p_ru.ru_inblock++; /* pay for read */
p->p_stats->p_ru.ru_inblock++; /* pay for read */
bp->b_flags |= B_READ;
if (bp->b_bcount > bp->b_bufsize)
panic("lfs_indirtrunc: bad buffer size");
@ -620,7 +621,7 @@ lfs_indirtrunc(struct inode *ip, ufs_daddr_t lbn, daddr_t dbn,
error = lfs_indirtrunc(ip, nlbn, nb,
(ufs_daddr_t)-1, level - 1,
&blkcount, &rblkcount,
lastsegp, bcp);
lastsegp, bcp, p);
if (error)
allerror = error;
blocksreleased += blkcount;
@ -641,7 +642,7 @@ lfs_indirtrunc(struct inode *ip, ufs_daddr_t lbn, daddr_t dbn,
if (nb != 0) {
error = lfs_indirtrunc(ip, nlbn, nb,
last, level - 1, &blkcount,
&rblkcount, lastsegp, bcp);
&rblkcount, lastsegp, bcp, p);
if (error)
allerror = error;
real_released += rblkcount;

View File

@ -1,4 +1,4 @@
/* $NetBSD: lfs_segment.c,v 1.62 2000/11/17 19:14:41 perseant Exp $ */
/* $NetBSD: lfs_segment.c,v 1.63 2000/11/27 03:33:57 perseant Exp $ */
/*-
* Copyright (c) 1999, 2000 The NetBSD Foundation, Inc.
@ -587,22 +587,21 @@ lfs_segwrite(mp, flags)
did_ckp = 0;
if (do_ckp || fs->lfs_doifile) {
redo:
vp = fs->lfs_ivnode;
do {
vp = fs->lfs_ivnode;
vget(vp, LK_EXCLUSIVE | LK_CANRECURSE | LK_RETRY);
vget(vp, LK_EXCLUSIVE | LK_CANRECURSE | LK_RETRY);
ip = VTOI(vp);
if (vp->v_dirtyblkhd.lh_first != NULL)
lfs_writefile(fs, sp, vp);
if (ip->i_flag & IN_ALLMOD)
++did_ckp;
(void) lfs_writeinode(fs, sp, ip);
ip = VTOI(vp);
if (vp->v_dirtyblkhd.lh_first != NULL)
lfs_writefile(fs, sp, vp);
if (ip->i_flag & IN_ALLMOD)
++did_ckp;
(void) lfs_writeinode(fs, sp, ip);
vput(vp);
} while (lfs_writeseg(fs, sp) && do_ckp);
vput(vp);
if (lfs_writeseg(fs, sp) && do_ckp)
goto redo;
/* The ifile should now be all clear */
LFS_CLR_UINO(ip, IN_ALLMOD);
} else {
@ -814,6 +813,12 @@ lfs_writeinode(fs, sp, ip)
IN_UPDATE);
if (ip->i_lfs_effnblks == ip->i_ffs_blocks)
LFS_CLR_UINO(ip, IN_MODIFIED);
#ifdef DEBUG_LFS
else
printf("lfs_writeinode: ino %d: real blks=%d, "
"eff=%d\n", ip->i_number, ip->i_ffs_blocks,
ip->i_lfs_effnblks);
#endif
}
if(ip->i_number == LFS_IFILE_INUM) /* We know sp->idp == NULL */

View File

@ -1,4 +1,4 @@
/* $NetBSD: lfs_syscalls.c,v 1.53 2000/11/22 22:11:34 perseant Exp $ */
/* $NetBSD: lfs_syscalls.c,v 1.54 2000/11/27 03:33:57 perseant Exp $ */
/*-
* Copyright (c) 1999, 2000 The NetBSD Foundation, Inc.
@ -988,6 +988,19 @@ lfs_fastvget(mp, ino, daddr, vpp, dinp, need_unlock)
dev = ump->um_dev;
*need_unlock = 0;
/*
* Wait until the filesystem is fully mounted before allowing vget
* to complete. This prevents possible problems with roll-forward.
*/
while(ump->um_lfs->lfs_flags & LFS_NOTYET) {
tsleep(&ump->um_lfs->lfs_flags, PRIBIO+1, "lfs_fnotyet", 0);
}
/*
* This is playing fast and loose. Someone may have the inode
* locked, in which case they are going to be distinctly unhappy
* if we trash something.
*/
error = lfs_fasthashget(dev, ino, need_unlock, vpp);
if (error != 0 || *vpp != NULL)
return (error);

View File

@ -1,4 +1,4 @@
/* $NetBSD: lfs_vfsops.c,v 1.59 2000/11/14 00:42:55 perseant Exp $ */
/* $NetBSD: lfs_vfsops.c,v 1.60 2000/11/27 03:33:57 perseant Exp $ */
/*-
* Copyright (c) 1999, 2000 The NetBSD Foundation, Inc.
@ -139,6 +139,9 @@ struct vfsops lfs_vfsops = {
struct pool lfs_inode_pool;
extern int locked_queue_count;
extern long locked_queue_bytes;
/*
* Initialize the filesystem, most work done by ufs_init.
*/
@ -319,6 +322,400 @@ lfs_mount(mp, path, data, ndp, p)
return (0);
}
#ifdef LFS_DO_ROLLFORWARD
/*
* Roll-forward code.
*/
/*
* Load the appropriate indirect block, and change the appropriate pointer.
* Mark the block dirty. Do segment and avail accounting.
*/
static int
update_meta(struct lfs *fs, ino_t ino, int version, ufs_daddr_t lbn,
daddr_t ndaddr, size_t size, struct proc *p)
{
int error;
struct vnode *vp;
struct inode *ip;
daddr_t odaddr, ooff;
struct indir a[NIADDR], *ap;
struct buf *bp;
SEGUSE *sup;
int num;
if ((error = lfs_rf_valloc(fs, ino, version, p, &vp)) != 0) {
printf("update_meta: ino %d: lfs_rf_valloc returned %d\n", ino,
error);
return error;
}
if ((error = VOP_BALLOC(vp, (lbn << fs->lfs_bshift), size,
NOCRED, 0, &bp)) != 0) {
vput(vp);
return (error);
}
/* No need to write, the block is already on disk */
if (bp->b_flags & B_DELWRI) {
LFS_UNLOCK_BUF(bp);
fs->lfs_avail += btodb(bp->b_bcount);
}
bp->b_flags |= B_INVAL;
brelse(bp);
/*
* Extend the file, if it is not large enough already.
* XXX this is not exactly right, we don't know how much of the
* XXX last block is actually used. We hope that an inode will
* XXX appear later to give the correct size.
*/
ip = VTOI(vp);
if (ip->i_ffs_size <= (lbn << fs->lfs_bshift)) {
if (lbn < NDADDR)
ip->i_ffs_size = (lbn << fs->lfs_bshift) +
(size - fs->lfs_fsize) + 1;
else
ip->i_ffs_size = (lbn << fs->lfs_bshift) + 1;
}
error = ufs_bmaparray(vp, lbn, &odaddr, &a[0], &num, NULL);
if (error) {
printf("update_meta: ufs_bmaparray returned %d\n", error);
vput(vp);
return error;
}
switch (num) {
case 0:
ooff = ip->i_ffs_db[lbn];
if (ooff == UNWRITTEN)
ip->i_ffs_blocks += btodb(size);
ip->i_ffs_db[lbn] = ndaddr;
break;
case 1:
ooff = ip->i_ffs_ib[a[0].in_off];
if (ooff == UNWRITTEN)
ip->i_ffs_blocks += btodb(size);
ip->i_ffs_ib[a[0].in_off] = ndaddr;
break;
default:
ap = &a[num - 1];
if (bread(vp, ap->in_lbn, fs->lfs_bsize, NOCRED, &bp))
panic("update_meta: bread bno %d", ap->in_lbn);
ooff = ((ufs_daddr_t *)bp->b_data)[ap->in_off];
if (ooff == UNWRITTEN)
ip->i_ffs_blocks += btodb(size);
((ufs_daddr_t *)bp->b_data)[ap->in_off] = ndaddr;
(void) VOP_BWRITE(bp);
}
LFS_SET_UINO(ip, IN_CHANGE | IN_MODIFIED | IN_UPDATE);
/* Update segment usage information. */
if (odaddr > 0) {
LFS_SEGENTRY(sup, fs, datosn(fs, odaddr), bp);
#ifdef DIAGNOSTIC
if (sup->su_nbytes < size) {
panic("update_meta: negative bytes "
"(segment %d short by %ld)\n",
datosn(fs, odaddr), (long)size - sup->su_nbytes);
sup->su_nbytes = size;
}
#endif
sup->su_nbytes -= size;
VOP_BWRITE(bp);
}
LFS_SEGENTRY(sup, fs, datosn(fs, ndaddr), bp);
sup->su_nbytes += size;
VOP_BWRITE(bp);
/* Fix this so it can be released */
/* ip->i_lfs_effnblks = ip->i_ffs_blocks; */
/* Now look again to make sure it worked */
ufs_bmaparray(vp, lbn, &odaddr, &a[0], &num, NULL );
if (odaddr != ndaddr)
printf("update_meta: failed setting ino %d lbn %d to %x\n",
ino, lbn, ndaddr);
vput(vp);
return 0;
}
static int
update_inoblk(struct lfs *fs, daddr_t offset, struct ucred *cred,
struct proc *p)
{
struct vnode *devvp, *vp;
struct inode *ip;
struct dinode *dip;
struct buf *dbp, *ibp;
int error;
daddr_t daddr;
IFILE *ifp;
SEGUSE *sup;
devvp = VTOI(fs->lfs_ivnode)->i_devvp;
/*
* Get the inode, update times and perms.
* DO NOT update disk blocks, we do that separately.
*/
error = bread(devvp, offset, fs->lfs_bsize, cred, &dbp);
if (error) {
printf("update_inoblk: bread returned %d\n", error);
return error;
}
dip = ((struct dinode *)(dbp->b_data)) + INOPB(fs);
while(--dip >= (struct dinode *)dbp->b_data) {
if(dip->di_inumber > LFS_IFILE_INUM) {
/* printf("ino %d version %d\n", dip->di_inumber,
dip->di_gen); */
error = lfs_rf_valloc(fs, dip->di_inumber, dip->di_gen,
p, &vp);
if (error) {
printf("update_inoblk: lfs_rf_valloc returned %d\n", error);
continue;
}
ip = VTOI(vp);
if (dip->di_size != ip->i_ffs_size)
VOP_TRUNCATE(vp, dip->di_size, 0, NOCRED, p);
/* Get mode, link count, size, and times */
memcpy(&ip->i_din.ffs_din, dip,
offsetof(struct dinode, di_db[0]));
/* Then the rest, except di_blocks */
ip->i_ffs_flags = dip->di_flags;
ip->i_ffs_gen = dip->di_gen;
ip->i_ffs_uid = dip->di_uid;
ip->i_ffs_gid = dip->di_gid;
ip->i_ffs_effnlink = dip->di_nlink;
LFS_SET_UINO(ip, IN_CHANGE | IN_MODIFIED | IN_UPDATE);
/* Re-initialize to get type right */
ufs_vinit(vp->v_mount, lfs_specop_p, lfs_fifoop_p,
&vp);
vput(vp);
/* Record change in location */
LFS_IENTRY(ifp, fs, dip->di_inumber, ibp);
daddr = ifp->if_daddr;
ifp->if_daddr = dbp->b_blkno;
error = VOP_BWRITE(ibp); /* Ifile */
/* And do segment accounting */
if (datosn(fs, daddr) != datosn(fs, dbp->b_blkno)) {
if (daddr > 0) {
LFS_SEGENTRY(sup, fs, datosn(fs, daddr),
ibp);
sup->su_nbytes -= DINODE_SIZE;
VOP_BWRITE(ibp);
}
LFS_SEGENTRY(sup, fs, datosn(fs, dbp->b_blkno),
ibp);
sup->su_nbytes += DINODE_SIZE;
VOP_BWRITE(ibp);
}
}
}
dbp->b_flags |= B_AGE;
brelse(dbp);
return 0;
}
#define CHECK_CKSUM 0x0001 /* Check the checksum to make sure it's valid */
#define CHECK_UPDATE 0x0002 /* Update Ifile for new data blocks / inodes */
static daddr_t
check_segsum(struct lfs *fs, daddr_t offset,
struct ucred *cred, int flags, int *pseg_flags, struct proc *p)
{
struct vnode *devvp;
struct buf *bp, *dbp;
int error, nblocks, ninos, i, j;
SEGSUM *ssp;
u_long *dp, *datap; /* XXX u_int32_t */
daddr_t *iaddr, oldoffset;
FINFO *fip;
SEGUSE *sup;
size_t size;
devvp = VTOI(fs->lfs_ivnode)->i_devvp;
/*
* If the segment has a superblock and we're at the top
* of the segment, skip the superblock.
*/
if(sntoda(fs, datosn(fs, offset)) == offset) {
LFS_SEGENTRY(sup, fs, datosn(fs, offset), bp);
if(sup->su_flags & SEGUSE_SUPERBLOCK)
offset += btodb(LFS_SBPAD);
brelse(bp);
}
/* Read in the segment summary */
error = bread(devvp, offset, LFS_SUMMARY_SIZE, cred, &bp);
if(error)
return -1;
/* Check summary checksum */
ssp = (SEGSUM *)bp->b_data;
if(flags & CHECK_CKSUM) {
if(ssp->ss_sumsum != cksum(&ssp->ss_datasum,
LFS_SUMMARY_SIZE -
sizeof(ssp->ss_sumsum))) {
#ifdef DEBUG_LFS_RFW
printf("Sumsum error at 0x%x\n", offset);
#endif
offset = -1;
goto err1;
}
if (ssp->ss_nfinfo == 0 && ssp->ss_ninos == 0) {
#ifdef DEBUG_LFS_RFW
printf("Empty pseg at 0x%x\n", offset);
#endif
offset = -1;
goto err1;
}
if (ssp->ss_create < fs->lfs_tstamp) {
#ifdef DEBUG_LFS_RFW
printf("Old data at 0x%x\n", offset);
#endif
offset = -1;
goto err1;
}
}
if(pseg_flags)
*pseg_flags = ssp->ss_flags;
oldoffset = offset;
offset += btodb(LFS_SUMMARY_SIZE);
ninos = howmany(ssp->ss_ninos, INOPB(fs));
iaddr = (daddr_t *)(bp->b_data + LFS_SUMMARY_SIZE - sizeof(daddr_t));
if(flags & CHECK_CKSUM) {
/* Count blocks */
nblocks = 0;
fip = (FINFO *)(bp->b_data + sizeof(SEGSUM));
for(i = 0; i < ssp->ss_nfinfo; ++i) {
nblocks += fip->fi_nblocks;
if(fip->fi_nblocks <= 0)
break;
fip = (FINFO *)(((char *)fip) + sizeof(FINFO) +
(fip->fi_nblocks - 1) *
sizeof(ufs_daddr_t));
}
nblocks += ninos;
/* Create the sum array */
datap = dp = (u_long *)malloc(nblocks * sizeof(u_long),
M_SEGMENT, M_WAITOK);
}
/* Handle individual blocks */
fip = (FINFO *)(bp->b_data + sizeof(SEGSUM));
for(i = 0; i < ssp->ss_nfinfo || ninos; ++i) {
/* Inode block? */
if(ninos && *iaddr == offset) {
if(flags & CHECK_CKSUM) {
/* Read in the head and add to the buffer */
error = bread(devvp, offset, fs->lfs_bsize,
cred, &dbp);
if(error) {
offset = -1;
goto err2;
}
(*dp++) = ((u_long *)(dbp->b_data))[0];
dbp->b_flags |= B_AGE;
brelse(dbp);
}
if(flags & CHECK_UPDATE) {
if ((error = update_inoblk(fs, offset, cred, p))
!= 0) {
offset = -1;
goto err2;
}
}
offset += fsbtodb(fs,1);
--iaddr;
--ninos;
--i; /* compensate */
continue;
}
/* printf("check: blocks from ino %d version %d\n",
fip->fi_ino, fip->fi_version); */
size = fs->lfs_bsize;
for(j = 0; j < fip->fi_nblocks; ++j) {
if (j == fip->fi_nblocks - 1)
size = fip->fi_lastlength;
if(flags & CHECK_CKSUM) {
error = bread(devvp, offset, size, cred, &dbp);
if(error) {
offset = -1;
goto err2;
}
(*dp++) = ((u_long *)(dbp->b_data))[0];
dbp->b_flags |= B_AGE;
brelse(dbp);
}
/* Account for and update any direct blocks */
if((flags & CHECK_UPDATE) &&
fip->fi_ino > LFS_IFILE_INUM &&
fip->fi_blocks[j] >= 0) {
update_meta(fs, fip->fi_ino, fip->fi_version,
fip->fi_blocks[j], offset, size, p);
}
offset += btodb(size);
}
fip = (FINFO *)(((char *)fip) + sizeof(FINFO)
+ (fip->fi_nblocks - 1) * sizeof(ufs_daddr_t));
}
/* Checksum the array, compare */
if((flags & CHECK_CKSUM) &&
ssp->ss_datasum != cksum(datap, nblocks * sizeof(u_long)))
{
printf("Datasum error at 0x%x (wanted %x got %x)\n", offset,
ssp->ss_datasum, cksum(datap, nblocks *
sizeof(u_long)));
offset = -1;
goto err2;
}
/* If we're at the end of the segment, move to the next */
if(datosn(fs, offset + btodb(LFS_SUMMARY_SIZE + fs->lfs_bsize)) !=
datosn(fs, offset)) {
if (datosn(fs, offset) == datosn(fs, ssp->ss_next)) {
offset = -1;
goto err2;
}
offset = ssp->ss_next;
#ifdef DEBUG_LFS_RFW
printf("LFS roll forward: moving on to offset 0x%x "
" -> segment %d\n", offset, datosn(fs,offset));
#endif
}
if (flags & CHECK_UPDATE) {
fs->lfs_avail -= (offset - oldoffset);
/* Don't clog the buffer queue */
if (locked_queue_count > LFS_MAX_BUFS ||
locked_queue_bytes > LFS_MAX_BYTES) {
++fs->lfs_writer;
lfs_flush(fs, SEGM_CKP);
if(--fs->lfs_writer==0)
wakeup(&fs->lfs_dirops);
}
}
err2:
if(flags & CHECK_CKSUM)
free(datap, M_SEGMENT);
err1:
bp->b_flags |= B_AGE;
brelse(bp);
return offset;
}
#endif /* LFS_DO_ROLLFORWARD */
/*
* Common code for mount and mountroot
* LFS specific
@ -330,7 +727,7 @@ lfs_mountfs(devvp, mp, p)
struct proc *p;
{
extern struct vnode *rootvp;
struct dlfs *dfs, *adfs;
struct dlfs *tdfs, *dfs, *adfs;
struct lfs *fs;
struct ufsmount *ump;
struct vnode *vp;
@ -341,6 +738,11 @@ lfs_mountfs(devvp, mp, p)
struct ucred *cred;
CLEANERINFO *cip;
SEGUSE *sup;
#ifdef LFS_DO_ROLLFORWARD
int flags, dirty;
daddr_t offset, oldoffset, lastgoodpseg;
int sn, curseg;
#endif
cred = p ? p->p_ucred : NOCRED;
/*
@ -389,6 +791,7 @@ lfs_mountfs(devvp, mp, p)
* using the older of the two. This is necessary to ensure that
* the filesystem is valid if it was not unmounted cleanly.
*/
if (dfs->dlfs_sboffs[1] &&
dfs->dlfs_sboffs[1]-(LFS_LABELPAD/size) > LFS_SBPAD/size)
{
@ -398,7 +801,18 @@ lfs_mountfs(devvp, mp, p)
adfs = (struct dlfs *)abp->b_data;
if (adfs->dlfs_tstamp < dfs->dlfs_tstamp) /* XXX 1s? */
dfs = adfs;
tdfs = adfs;
else
tdfs = dfs;
/* Check the basics. */
if (tdfs->dlfs_magic != LFS_MAGIC ||
tdfs->dlfs_bsize > MAXBSIZE ||
tdfs->dlfs_version > LFS_VERSION ||
tdfs->dlfs_bsize < sizeof(struct dlfs)) {
error = EINVAL; /* XXX needs translation */
goto out;
}
} else {
printf("lfs_mountfs: invalid alt superblock daddr=0x%x\n",
dfs->dlfs_sboffs[1]);
@ -408,12 +822,23 @@ lfs_mountfs(devvp, mp, p)
/* Allocate the mount structure, copy the superblock into it. */
fs = malloc(sizeof(struct lfs), M_UFSMNT, M_WAITOK);
memcpy(&fs->lfs_dlfs, dfs, sizeof(struct dlfs));
memcpy(&fs->lfs_dlfs, tdfs, sizeof(struct dlfs));
#ifdef LFS_DO_ROLLFORWARD
/* Before rolling forward, lock so vget will sleep for other procs */
fs->lfs_flags = LFS_NOTYET;
fs->lfs_rfpid = p->p_pid;
#else
fs->lfs_flags = 0;
#endif
ump = malloc(sizeof *ump, M_UFSMNT, M_WAITOK);
memset((caddr_t)ump, 0, sizeof *ump);
ump->um_lfs = fs;
if (sizeof(struct lfs) < LFS_SBPAD) /* XXX why? */
if (sizeof(struct lfs) < LFS_SBPAD) { /* XXX why? */
bp->b_flags |= B_INVAL;
abp->b_flags |= B_INVAL;
}
brelse(bp);
bp = NULL;
brelse(abp);
@ -477,6 +902,97 @@ lfs_mountfs(devvp, mp, p)
VREF(vp);
vput(vp);
#ifdef LFS_DO_ROLLFORWARD
/*
* Roll forward.
*/
/*
* Phase I:
* Find the address of the last good partial segment that was written
* after the checkpoint. Mark the segments in question dirty, so
* they won't be reallocated.
*/
lastgoodpseg = oldoffset = offset = fs->lfs_offset;
flags = 0x0;
#ifdef DEBUG_LFS_RFW
printf("LFS roll forward phase 1: starting at offset 0x%x\n", offset);
#endif
LFS_SEGENTRY(sup, fs, datosn(fs, offset), bp);
if (!(sup->su_flags & SEGUSE_DIRTY))
--fs->lfs_nclean;
sup->su_flags |= SEGUSE_DIRTY;
(void) VOP_BWRITE(bp);
while ((offset = check_segsum(fs, offset, cred, CHECK_CKSUM, &flags,
p)) > 0) {
if(sntoda(fs, oldoffset) != sntoda(fs, offset)) {
LFS_SEGENTRY(sup, fs, datosn(fs, oldoffset), bp);
if (!(sup->su_flags & SEGUSE_DIRTY))
--fs->lfs_nclean;
sup->su_flags |= SEGUSE_DIRTY;
(void) VOP_BWRITE(bp);
}
#ifdef DEBUG_LFS_RFW
printf("LFS roll forward phase 1: offset=0x%x\n", offset);
if(flags & SS_DIROP) {
printf("lfs_mountfs: dirops at 0x%x\n", oldoffset);
if(!(flags & SS_CONT))
printf("lfs_mountfs: dirops end at 0x%x\n",
oldoffset);
}
#endif
if(!(flags & SS_CONT))
lastgoodpseg = offset;
oldoffset = offset;
}
#ifdef DEBUG_LFS_RFW
if (flags & SS_CONT) {
printf("LFS roll forward: warning: incomplete dirops discarded\n");
}
printf("LFS roll forward phase 1: completed: lastgoodpseg=0x%x\n",
lastgoodpseg);
#endif
/* Don't accidentally overwrite what we're trying to preserve */
offset = fs->lfs_offset;
fs->lfs_offset = lastgoodpseg;
fs->lfs_curseg = sntoda(fs, datosn(fs, fs->lfs_offset));
for (sn = curseg = datosn(fs, fs->lfs_curseg);;) {
sn = (sn + 1) % fs->lfs_nseg;
if (sn == curseg)
panic("lfs_mountfs: no clean segments");
LFS_SEGENTRY(sup, fs, sn, bp);
dirty = (sup->su_flags & SEGUSE_DIRTY);
brelse(bp);
if (!dirty)
break;
}
fs->lfs_nextseg = sntoda(fs, sn);
/*
* Phase II: Roll forward from the first superblock.
*/
while (offset != lastgoodpseg) {
#ifdef DEBUG_LFS_RFW
printf("LFS roll forward phase 2: 0x%x\n", offset);
#endif
oldoffset = offset;
offset = check_segsum(fs, offset, cred, CHECK_UPDATE, NULL, p);
}
/*
* Finish: flush our changes to disk.
*/
lfs_segwrite(fs->lfs_ivnode->v_mount, SEGM_CKP | SEGM_SYNC);
#ifdef DEBUG_LFS_RFW
printf("LFS roll forward complete\n");
#endif
/* Allow vget now that roll-forward is complete */
fs->lfs_flags &= ~(LFS_NOTYET);
wakeup(&fs->lfs_flags);
#endif /* LFS_DO_ROLLFORWARD */
/*
* Initialize the ifile cleaner info with information from
* the superblock.
@ -675,6 +1191,14 @@ lfs_vget(mp, ino, vpp)
ump = VFSTOUFS(mp);
dev = ump->um_dev;
fs = ump->um_lfs;
/*
* If the filesystem is not completely mounted yet, suspend
* any access requests (wait for roll-forward to complete).
*/
while((fs->lfs_flags & LFS_NOTYET) && curproc->p_pid != fs->lfs_rfpid)
tsleep(&fs->lfs_flags, PRIBIO+1, "lfs_notyet", 0);
if ((*vpp = ufs_ihashget(dev, ino, LK_EXCLUSIVE)) != NULL)
return (0);
@ -692,10 +1216,10 @@ lfs_vget(mp, ino, vpp)
} while (lockmgr(&ufs_hashlock, LK_EXCLUSIVE|LK_SLEEPFAIL, 0));
/* Translate the inode number to a disk address. */
fs = ump->um_lfs;
if (ino == LFS_IFILE_INUM)
daddr = fs->lfs_idaddr;
else {
/* XXX bounds-check this too */
LFS_IENTRY(ifp, fs, ino, bp);
daddr = ifp->if_daddr;
#ifdef LFS_ATIME_IFILE
@ -703,6 +1227,8 @@ lfs_vget(mp, ino, vpp)
#endif
brelse(bp);
if (daddr == LFS_UNUSED_DADDR) {
*vpp = NULLVP;
ungetnewvnode(vp);
lockmgr(&ufs_hashlock, LK_RELEASE, 0);
return (ENOENT);
}