Phase one of my three-phase plan to make LFS play nice with UBC, and bug-fixes

I found while making sure there weren't any new ones.

* Make the write clusters keep track of the buffers whose blocks they contain.
  This should make it possible to (1) write clusters using a page mapping
  instead of malloc, if desired, and (2) schedule blocks for rewriting
  (somewhere else) if a write error occurs.  Code is present to use
  pagemove() to construct the clusters but that is untested and will go away
  anyway in favor of page mapping.
* DEBUG now keeps a log of Ifile writes, so that any lingering instances of
  the "dirty bufs" problem can be properly debugged.
* Keep track of whether the Ifile has been dirtied by various routines that
  can be called by lfs_segwrite, and loop on that until it is clean, for
  a checkpoint.  Checkpoints need to be squeaky clean.
* Warn the user (once) if the Ifile grows larger than is reasonable for their
  buffer cache.  Both lfs_mountfs and lfs_unmount check since the Ifile can
  grow.
* If an inode is not found in a disk block, try rereading the block, under
  the assumption that the block was copied to a cluster and then freed.
* Protect WRITEINPROG() with splbio() to fix a hang in lfs_update.
This commit is contained in:
perseant 2002-05-14 20:03:53 +00:00
parent 56deade0b7
commit 8886b0f4b2
12 changed files with 832 additions and 263 deletions

View File

@ -1,4 +1,4 @@
/* $NetBSD: lfs.h,v 1.38 2001/11/23 21:44:25 chs Exp $ */
/* $NetBSD: lfs.h,v 1.39 2002/05/14 20:03:53 perseant Exp $ */
/*-
* Copyright (c) 1999, 2000 The NetBSD Foundation, Inc.
@ -73,12 +73,19 @@
/*
* Compile-time options for LFS.
*/
#define LFS_IFIND_RETRIES 16
#define LFS_EAGAIN_FAIL /* markv fail with EAGAIN if ino is locked */
#define LFS_TRACK_IOS /* attempt to avoid cleaning segments not yet fully written to disk */
#define LFS_DEBUG_RFW /* print roll-forward debugging info */
#define LFS_NO_PAGEMOVE /* Use malloc/copy to write clusters */
#define LFS_AGGRESSIVE_SEGLOCK
#define LFS_LOGLENGTH 1024
/* #define DEBUG_LFS */ /* Intensive debugging of LFS subsystem */
#ifdef LFS_NO_PAGEMOVE
# define LFS_MALLOC_SUMMARY
#endif
/*
* Parameters and generic definitions
*/
@ -120,6 +127,20 @@
(bp)->b_flags &= ~B_LOCKED; \
} while (0)
#ifdef DEBUG_LOCKED_LIST
# define LFS_DEBUG_COUNTLOCKED(m) do { \
int _s; \
extern int locked_queue_count; \
extern long locked_queue_bytes; \
_s = splbio(); \
lfs_countlocked(&locked_queue_count, &locked_queue_bytes, (m)); \
splx(_s); \
wakeup(&locked_queue_count); \
} while (0)
#else
# define LFS_DEBUG_COUNTLOCKED(m)
#endif
/* For convenience */
#define IN_ALLMOD (IN_MODIFIED|IN_ACCESS|IN_CHANGE|IN_UPDATE|IN_ACCESSED|IN_CLEANING)
@ -146,7 +167,42 @@
} \
} while (0)
#ifdef DEBUG
struct lfs_log_entry {
char *op;
char *file;
int line;
ufs_daddr_t block;
unsigned long flags;
};
extern int lfs_lognum;
extern struct lfs_log_entry lfs_log[LFS_LOGLENGTH];
# define LFS_BWRITE_LOG(bp) lfs_bwrite_log((bp), __FILE__, __LINE__)
# define LFS_ENTER_LOG(theop, thefile, theline, lbn, theflags) do { \
int _s; \
\
_s = splbio(); \
lfs_log[lfs_lognum].op = theop; \
lfs_log[lfs_lognum].file = thefile; \
lfs_log[lfs_lognum].line = (theline); \
lfs_log[lfs_lognum].block = (lbn); \
lfs_log[lfs_lognum].flags = (theflags); \
lfs_lognum = (lfs_lognum + 1) % LFS_LOGLENGTH; \
splx(_s); \
} while (0)
# define LFS_BCLEAN_LOG(fs, bp) do { \
if ((bp)->b_vp == (fs)->lfs_ivnode) \
LFS_ENTER_LOG("clear", __FILE__, __LINE__, bp->b_lblkno, bp->b_flags); \
} while (0)
#else
# define LFS_BCLEAN_LOG(fs, bp)
# define LFS_BWRITE_LOG(bp) VOP_BWRITE((bp))
#endif
#define LFS_ITIMES(ip, acc, mod, cre) do { \
struct lfs *_fs = (ip)->i_lfs; \
\
if ((ip)->i_flag & IN_ACCESS) { \
(ip)->i_ffs_atime = (acc)->tv_sec; \
(ip)->i_ffs_atimensec = (acc)->tv_nsec; \
@ -157,7 +213,8 @@
LFS_IENTRY(ifp, ip->i_lfs, ip->i_number, ibp); \
ifp->if_atime_sec = (acc)->tv_sec; \
ifp->if_atime_nsec = (acc)->tv_nsec; \
VOP_BWRITE(ibp); \
LFS_BWRITE_LOG(ibp); \
_fs->lfs_flags |= LFS_IFDIRTY; \
} else { \
LFS_SET_UINO(ip, IN_ACCESSED); \
} \
@ -310,7 +367,7 @@ struct dlfs {
};
/* Maximum number of io's we can have pending at once */
#define LFS_THROTTLE 16 /* XXX should be better paramtrized - ? */
#define LFS_THROTTLE 32 /* XXX should be better paramtrized - ? */
/* In-memory super block. */
struct lfs {
@ -388,7 +445,9 @@ struct lfs {
u_int32_t lfs_nactive; /* Number of segments since last ckp */
int8_t lfs_fmod; /* super block modified flag */
int8_t lfs_ronly; /* mounted read-only flag */
#define LFS_NOTYET 0x01
#define LFS_NOTYET 0x01
#define LFS_IFDIRTY 0x02
#define LFS_WARNED 0x04
int8_t lfs_flags; /* currently unused flag */
u_int16_t lfs_activesb; /* toggle between superblocks */
#ifdef LFS_TRACK_IOS
@ -570,13 +629,13 @@ struct segsum {
((ufs_daddr_t)(segtod((fs), (sn)) + (fs)->lfs_start))
/* Read in the block with the cleaner info from the ifile. */
#define LFS_CLEANERINFO(CP, F, BP) { \
#define LFS_CLEANERINFO(CP, F, BP) do { \
VTOI((F)->lfs_ivnode)->i_flag |= IN_ACCESS; \
if (bread((F)->lfs_ivnode, \
(ufs_daddr_t)0, (F)->lfs_bsize, NOCRED, &(BP))) \
panic("lfs: ifile read"); \
(CP) = (CLEANERINFO *)(BP)->b_data; \
}
} while(0)
/* Synchronize the Ifile cleaner info with current avail and bfree */
#define LFS_SYNC_CLEANERINFO(cip, fs, bp, w) do { \
@ -584,7 +643,9 @@ struct segsum {
(cip)->avail != (fs)->lfs_avail - (fs)->lfs_ravail) { \
(cip)->bfree = (fs)->lfs_bfree; \
(cip)->avail = (fs)->lfs_avail - (fs)->lfs_ravail; \
(void) VOP_BWRITE(bp); /* Ifile */ \
if (((bp)->b_flags & B_GATHERED) == 0) \
(fs)->lfs_flags |= LFS_IFDIRTY; \
(void) LFS_BWRITE_LOG(bp); /* Ifile */ \
} else \
brelse(bp); \
} while (0)
@ -603,7 +664,8 @@ struct segsum {
if ((FS)->lfs_version > 1) { \
LFS_CLEANERINFO((CIP), (FS), (BP)); \
(CIP)->free_head = (VAL); \
VOP_BWRITE(BP); \
LFS_BWRITE_LOG(BP); \
(FS)->lfs_flags |= LFS_IFDIRTY; \
} \
} while (0)
@ -616,7 +678,8 @@ struct segsum {
#define LFS_PUT_TAILFREE(FS, CIP, BP, VAL) do { \
LFS_CLEANERINFO((CIP), (FS), (BP)); \
(CIP)->free_tail = (VAL); \
VOP_BWRITE(BP); \
LFS_BWRITE_LOG(BP); \
(FS)->lfs_flags |= LFS_IFDIRTY; \
} while (0)
/*
@ -624,7 +687,7 @@ struct segsum {
* may not be mapped!
*/
/* Read in the block with a specific inode from the ifile. */
#define LFS_IENTRY(IP, F, IN, BP) { \
#define LFS_IENTRY(IP, F, IN, BP) do { \
int _e; \
VTOI((F)->lfs_ivnode)->i_flag |= IN_ACCESS; \
if ((_e = bread((F)->lfs_ivnode, \
@ -635,10 +698,10 @@ struct segsum {
(IP) = (IFILE *)((IFILE_V1 *)(BP)->b_data + (IN) % (F)->lfs_ifpb); \
else \
(IP) = (IFILE *)(BP)->b_data + (IN) % (F)->lfs_ifpb; \
}
} while(0)
/* Read in the block with a specific segment usage entry from the ifile. */
#define LFS_SEGENTRY(SP, F, IN, BP) { \
#define LFS_SEGENTRY(SP, F, IN, BP) do { \
int _e; \
VTOI((F)->lfs_ivnode)->i_flag |= IN_ACCESS; \
if ((_e = bread((F)->lfs_ivnode, \
@ -650,7 +713,7 @@ struct segsum {
((IN) & ((F)->lfs_sepb - 1))); \
else \
(SP) = (SEGUSE *)(BP)->b_data + ((IN) % (F)->lfs_sepb); \
}
} while(0)
/* Determine if a buffer belongs to the ifile */
#define IS_IFILE(bp) (VTOI(bp->b_vp)->i_number == LFS_IFILE_INUM)
@ -704,6 +767,18 @@ struct segment {
u_int16_t seg_flags; /* run-time flags for this segment */
};
struct lfs_cluster {
struct buf **bpp; /* Array of kept buffers */
int bufcount; /* Number of kept buffers */
size_t bufsize; /* Size of kept data */
#define LFS_CL_MALLOC 0x00000001
#define LFS_CL_SHIFT 0x00000002
u_int32_t flags; /* Flags */
struct lfs *fs; /* LFS that this belongs to */
void *saveaddr; /* Original contents of saveaddr */
char *olddata; /* Original b_data, if LFS_CL_MALLOC */
};
/*
* Macros for determining free space on the disk, with the variable metadata
* of segment summaries and inode blocks taken into account.

View File

@ -1,4 +1,4 @@
/* $NetBSD: lfs_alloc.c,v 1.55 2002/02/04 03:32:16 perseant Exp $ */
/* $NetBSD: lfs_alloc.c,v 1.56 2002/05/14 20:03:53 perseant Exp $ */
/*-
* Copyright (c) 1999, 2000 The NetBSD Foundation, Inc.
@ -71,7 +71,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: lfs_alloc.c,v 1.55 2002/02/04 03:32:16 perseant Exp $");
__KERNEL_RCSID(0, "$NetBSD: lfs_alloc.c,v 1.56 2002/05/14 20:03:53 perseant Exp $");
#if defined(_KERNEL_OPT)
#include "opt_quota.h"
@ -81,6 +81,7 @@ __KERNEL_RCSID(0, "$NetBSD: lfs_alloc.c,v 1.55 2002/02/04 03:32:16 perseant Exp
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/buf.h>
#include <sys/lock.h>
#include <sys/vnode.h>
#include <sys/syslog.h>
#include <sys/mount.h>
@ -109,6 +110,8 @@ static int lfs_ialloc(struct lfs *, struct vnode *, ino_t, int, struct vnode **)
*
* XXX this function does not have appropriate locking to be used on a live fs;
* XXX but something similar could probably be used for an "undelete" call.
*
* Called with the Ifile inode locked.
*/
int
lfs_rf_valloc(struct lfs *fs, ino_t ino, int version, struct proc *p,
@ -182,7 +185,7 @@ lfs_rf_valloc(struct lfs *fs, ino_t ino, int version, struct proc *p,
return ENOENT;
}
ifp->if_nextfree = oldnext;
VOP_BWRITE(bp);
LFS_BWRITE_LOG(bp);
}
error = lfs_ialloc(fs, fs->lfs_ivnode, ino, version, &vp);
@ -211,6 +214,9 @@ lfs_rf_valloc(struct lfs *fs, ino_t ino, int version, struct proc *p,
return error;
}
/*
* Called with the Ifile inode locked.
*/
static int
extend_ifile(struct lfs *fs, struct ucred *cred)
{
@ -225,19 +231,14 @@ extend_ifile(struct lfs *fs, struct ucred *cred)
CLEANERINFO *cip;
vp = fs->lfs_ivnode;
(void)lfs_vref(vp);
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
ip = VTOI(vp);
blkno = lblkno(fs, ip->i_ffs_size);
if ((error = VOP_BALLOC(vp, ip->i_ffs_size, fs->lfs_bsize, cred, 0,
&bp)) != 0) {
VOP_UNLOCK(vp, 0);
lfs_vunref(vp);
return (error);
}
ip->i_ffs_size += fs->lfs_bsize;
uvm_vnp_setsize(vp, ip->i_ffs_size);
VOP_UNLOCK(vp, 0);
i = (blkno - fs->lfs_segtabsz - fs->lfs_cleansz) *
fs->lfs_ifpb;
@ -269,8 +270,7 @@ extend_ifile(struct lfs *fs, struct ucred *cred)
}
LFS_PUT_TAILFREE(fs, cip, cbp, max - 1);
(void) VOP_BWRITE(bp); /* Ifile */
lfs_vunref(vp);
(void) LFS_BWRITE_LOG(bp); /* Ifile */
return 0;
}
@ -300,6 +300,9 @@ lfs_valloc(void *v)
return EROFS;
*ap->a_vpp = NULL;
#ifdef LFS_AGGRESSIVE_SEGLOCK
lfs_seglock(fs, SEGM_PROT);
#else
if (fs->lfs_version == 1) {
/*
* Use lfs_seglock here, instead of fs->lfs_freelock, to
@ -311,6 +314,7 @@ lfs_valloc(void *v)
} else {
lockmgr(&fs->lfs_freelock, LK_EXCLUSIVE, 0);
}
#endif
/* Get the head of the freelist. */
LFS_GET_HEADFREE(fs, cip, cbp, &new_ino);
@ -344,10 +348,14 @@ lfs_valloc(void *v)
if (fs->lfs_free == LFS_UNUSED_INUM) {
if ((error = extend_ifile(fs, ap->a_cred)) != 0) {
LFS_PUT_HEADFREE(fs, cip, cbp, new_ino);
#ifdef LFS_AGGRESSIVE_SEGLOCK
lfs_segunlock(fs);
#else
if (fs->lfs_version == 1)
lfs_segunlock(fs);
else
lockmgr(&fs->lfs_freelock, LK_RELEASE, 0);
#endif
return error;
}
}
@ -356,11 +364,14 @@ lfs_valloc(void *v)
panic("inode 0 allocated [3]");
#endif /* DIAGNOSTIC */
#ifdef LFS_AGGRESSIVE_SEGLOCK
lfs_segunlock(fs);
#else
if (fs->lfs_version == 1)
lfs_segunlock(fs);
else
lockmgr(&fs->lfs_freelock, LK_RELEASE, 0);
#endif
return lfs_ialloc(fs, ap->a_pvp, new_ino, new_gen, ap->a_vpp);
}
@ -426,11 +437,17 @@ lfs_ialloc(struct lfs *fs, struct vnode *pvp, ino_t new_ino, int new_gen,
/*
* Put the new inum back on the free list.
*/
#ifdef LFS_AGGRESSIVE_SEGLOCK
lfs_seglock(fs, SEGM_PROT);
#endif
LFS_IENTRY(ifp, fs, new_ino, bp);
ifp->if_daddr = LFS_UNUSED_DADDR;
LFS_GET_HEADFREE(fs, cip, cbp, &(ifp->if_nextfree));
LFS_PUT_HEADFREE(fs, cip, cbp, new_ino);
(void) VOP_BWRITE(bp); /* Ifile */
(void) LFS_BWRITE_LOG(bp); /* Ifile */
#ifdef LFS_AGGRESSIVE_SEGLOCK
lfs_segunlock(fs);
#endif
*vpp = NULLVP;
return (error);
@ -470,6 +487,11 @@ lfs_vcreate(struct mount *mp, ino_t ino, struct vnode *vp)
ip->i_flag = 0;
/* Why was IN_MODIFIED ever set here? */
/* LFS_SET_UINO(ip, IN_CHANGE | IN_MODIFIED); */
#ifdef DEBUG_LFS_VNLOCK
if (ino == LFS_IFILE_INUM)
vp->v_vnlock->lk_wmesg = "inlock";
#endif
}
/* Free an inode. */
@ -493,6 +515,7 @@ lfs_vfree(void *v)
ufs_daddr_t old_iaddr;
ino_t ino, otail;
extern int lfs_dirvcount;
int s;
/* Get the inode number and file system. */
vp = ap->a_pvp;
@ -501,13 +524,19 @@ lfs_vfree(void *v)
ino = ip->i_number;
/* Drain of pending writes */
s = splbio();
if (fs->lfs_version > 1 && WRITEINPROG(vp))
tsleep(vp, (PRIBIO+1), "lfs_vfree", 0);
splx(s);
#ifdef LFS_AGGRESSIVE_SEGLOCK
lfs_seglock(fs, SEGM_PROT); /* XXX */;
#else
if (fs->lfs_version == 1)
lfs_seglock(fs, SEGM_PROT);
else
lockmgr(&fs->lfs_freelock, LK_EXCLUSIVE, 0);
#endif
if (vp->v_flag & VDIROP) {
--lfs_dirvcount;
@ -534,7 +563,7 @@ lfs_vfree(void *v)
if (fs->lfs_version == 1) {
LFS_GET_HEADFREE(fs, cip, cbp, &(ifp->if_nextfree));
LFS_PUT_HEADFREE(fs, cip, cbp, ino);
(void) VOP_BWRITE(bp); /* Ifile */
(void) LFS_BWRITE_LOG(bp); /* Ifile */
} else {
ifp->if_nextfree = LFS_UNUSED_INUM;
/*
@ -543,11 +572,11 @@ lfs_vfree(void *v)
* XXX (the ifile could be written before the rest of this
* XXX completes).
*/
(void) VOP_BWRITE(bp); /* Ifile */
(void) LFS_BWRITE_LOG(bp); /* Ifile */
LFS_GET_TAILFREE(fs, cip, cbp, &otail);
LFS_IENTRY(ifp, fs, otail, bp);
ifp->if_nextfree = ino;
VOP_BWRITE(bp);
LFS_BWRITE_LOG(bp);
LFS_PUT_TAILFREE(fs, cip, cbp, ino);
/* printf("lfs_vfree: tailfree %d -> %d\n", otail, ino); */
}
@ -569,16 +598,20 @@ lfs_vfree(void *v)
}
#endif
sup->su_nbytes -= DINODE_SIZE;
(void) VOP_BWRITE(bp); /* Ifile */
(void) LFS_BWRITE_LOG(bp); /* Ifile */
}
/* Set superblock modified bit and decrement file count. */
fs->lfs_fmod = 1;
--fs->lfs_nfiles;
#ifdef LFS_AGGRESSIVE_SEGLOCK
lfs_segunlock(fs);
#else
if (fs->lfs_version == 1)
lfs_segunlock(fs);
else
lockmgr(&fs->lfs_freelock, LK_RELEASE, 0);
#endif
return (0);
}

View File

@ -1,4 +1,4 @@
/* $NetBSD: lfs_balloc.c,v 1.31 2001/11/23 21:44:26 chs Exp $ */
/* $NetBSD: lfs_balloc.c,v 1.32 2002/05/14 20:03:53 perseant Exp $ */
/*-
* Copyright (c) 1999, 2000 The NetBSD Foundation, Inc.
@ -71,7 +71,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: lfs_balloc.c,v 1.31 2001/11/23 21:44:26 chs Exp $");
__KERNEL_RCSID(0, "$NetBSD: lfs_balloc.c,v 1.32 2002/05/14 20:03:53 perseant Exp $");
#if defined(_KERNEL_OPT)
#include "opt_quota.h"
@ -400,13 +400,15 @@ lfs_fragextend(struct vnode *vp, int osize, int nsize, ufs_daddr_t lbn, struct b
if ((*bpp)->b_blkno > 0) {
LFS_SEGENTRY(sup, fs, dtosn(fs, dbtofsb(fs, (*bpp)->b_blkno)), ibp);
sup->su_nbytes += (nsize - osize);
VOP_BWRITE(ibp);
LFS_BWRITE_LOG(ibp);
ip->i_ffs_blocks += bb;
}
fs->lfs_bfree -= bb;
ip->i_lfs_effnblks += bb;
ip->i_flag |= IN_CHANGE | IN_UPDATE;
LFS_DEBUG_COUNTLOCKED("frag1");
obufsize = (*bpp)->b_bufsize;
allocbuf(*bpp, nsize);
@ -414,6 +416,8 @@ lfs_fragextend(struct vnode *vp, int osize, int nsize, ufs_daddr_t lbn, struct b
if (((*bpp)->b_flags & (B_LOCKED | B_CALL)) == B_LOCKED)
locked_queue_bytes += (*bpp)->b_bufsize - obufsize;
LFS_DEBUG_COUNTLOCKED("frag2");
bzero((char *)((*bpp)->b_data) + osize, (u_int)(nsize - osize));
out:

View File

@ -1,4 +1,4 @@
/* $NetBSD: lfs_bio.c,v 1.42 2002/05/12 23:06:29 matt Exp $ */
/* $NetBSD: lfs_bio.c,v 1.43 2002/05/14 20:03:53 perseant Exp $ */
/*-
* Copyright (c) 1999, 2000 The NetBSD Foundation, Inc.
@ -71,7 +71,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: lfs_bio.c,v 1.42 2002/05/12 23:06:29 matt Exp $");
__KERNEL_RCSID(0, "$NetBSD: lfs_bio.c,v 1.43 2002/05/14 20:03:53 perseant Exp $");
#include <sys/param.h>
#include <sys/systm.h>
@ -298,11 +298,10 @@ lfs_bwrite_ext(struct buf *bp, int flags)
bp->b_flags |= B_DELWRI;
LFS_LOCK_BUF(bp);
bp->b_flags &= ~(B_READ | B_ERROR);
bp->b_flags &= ~(B_READ | B_DONE | B_ERROR);
s = splbio();
reassignbuf(bp, bp->b_vp);
splx(s);
}
if (bp->b_flags & B_CALL)
@ -351,7 +350,6 @@ lfs_flush_fs(struct lfs *fs, int flags)
void
lfs_flush(struct lfs *fs, int flags)
{
int s;
struct mount *mp, *nmp;
if (lfs_dostats)
@ -378,12 +376,7 @@ lfs_flush(struct lfs *fs, int flags)
}
simple_unlock(&mountlist_slock);
#if 1 || defined(DEBUG)
s = splbio();
lfs_countlocked(&locked_queue_count, &locked_queue_bytes);
splx(s);
wakeup(&locked_queue_count);
#endif /* 1 || DEBUG */
LFS_DEBUG_COUNTLOCKED("flush");
lfs_writing = 0;
}
@ -488,9 +481,8 @@ lfs_newbuf(struct lfs *fs, struct vnode *vp, ufs_daddr_t daddr, size_t size)
bp = DOMALLOC(sizeof(struct buf), M_SEGMENT, M_WAITOK);
bzero(bp, sizeof(struct buf));
if (nbytes)
bp->b_data = DOMALLOC(nbytes, M_SEGMENT, M_WAITOK);
if (nbytes) {
bp->b_data = DOMALLOC(nbytes, M_SEGMENT, M_WAITOK);
bzero(bp->b_data, nbytes);
}
#ifdef DIAGNOSTIC
@ -503,6 +495,7 @@ lfs_newbuf(struct lfs *fs, struct vnode *vp, ufs_daddr_t daddr, size_t size)
bgetvp(vp, bp);
splx(s);
bp->b_saveaddr = (caddr_t)fs;
bp->b_bufsize = size;
bp->b_bcount = size;
bp->b_lblkno = daddr;
@ -555,7 +548,7 @@ extern TAILQ_HEAD(bqueues, buf) bufqueues[BQUEUES];
* Don't count malloced buffers, since they don't detract from the total.
*/
void
lfs_countlocked(int *count, long *bytes)
lfs_countlocked(int *count, long *bytes, char *msg)
{
struct buf *bp;
int n = 0;
@ -573,14 +566,14 @@ lfs_countlocked(int *count, long *bytes)
" buffers locked than exist");
#endif
}
#ifdef DEBUG
#ifdef DEBUG_LOCKED_LIST
/* Theoretically this function never really does anything */
if (n != *count)
printf("lfs_countlocked: adjusted buf count from %d to %d\n",
*count, n);
printf("lfs_countlocked: %s: adjusted buf count from %d to %d\n",
msg, *count, n);
if (size != *bytes)
printf("lfs_countlocked: adjusted byte count from %ld to %ld\n",
*bytes, size);
printf("lfs_countlocked: %s: adjusted byte count from %ld to %ld\n",
msg, *bytes, size);
#endif
*count = n;
*bytes = size;

View File

@ -1,4 +1,4 @@
/* $NetBSD: lfs_debug.c,v 1.15 2001/11/23 21:44:27 chs Exp $ */
/* $NetBSD: lfs_debug.c,v 1.16 2002/05/14 20:03:53 perseant Exp $ */
/*-
* Copyright (c) 1999, 2000 The NetBSD Foundation, Inc.
@ -73,18 +73,47 @@
#ifdef DEBUG
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: lfs_debug.c,v 1.15 2001/11/23 21:44:27 chs Exp $");
__KERNEL_RCSID(0, "$NetBSD: lfs_debug.c,v 1.16 2002/05/14 20:03:53 perseant Exp $");
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/namei.h>
#include <sys/vnode.h>
#include <sys/mount.h>
#include <sys/buf.h>
#include <ufs/ufs/inode.h>
#include <ufs/lfs/lfs.h>
#include <ufs/lfs/lfs_extern.h>
int lfs_lognum;
struct lfs_log_entry lfs_log[LFS_LOGLENGTH];
int lfs_bwrite_log(struct buf *bp, char *file, int line)
{
struct vop_bwrite_args a;
a.a_desc = VDESC(vop_bwrite);
a.a_bp = bp;
if (!(bp->b_flags & (B_DELWRI | B_GATHERED)))
LFS_ENTER_LOG("write", file, line, bp->b_lblkno, bp->b_flags);
return (VCALL(bp->b_vp, VOFFSET(vop_bwrite), &a));
}
void lfs_dumplog(void)
{
int i;
for (i = lfs_lognum; i != (lfs_lognum - 1) % LFS_LOGLENGTH; i = (i + 1) % LFS_LOGLENGTH)
if (lfs_log[i].file) {
printf("lbn %d %s %lx %d %s\n",
lfs_log[i].block,
lfs_log[i].op,
lfs_log[i].flags,
lfs_log[i].line,
lfs_log[i].file + 56);
}
}
void
lfs_dump_super(struct lfs *lfsp)
{

View File

@ -1,4 +1,4 @@
/* $NetBSD: lfs_extern.h,v 1.29 2002/05/12 23:06:29 matt Exp $ */
/* $NetBSD: lfs_extern.h,v 1.30 2002/05/14 20:03:53 perseant Exp $ */
/*-
* Copyright (c) 1999, 2000 The NetBSD Foundation, Inc.
@ -131,7 +131,7 @@ struct buf *lfs_newbuf_malloclog(struct lfs *, struct vnode *,
void lfs_freebuf(struct buf *);
struct buf *lfs_newbuf(struct lfs *, struct vnode *, ufs_daddr_t, size_t);
#endif
void lfs_countlocked(int *, long *);
void lfs_countlocked(int *, long *, char *);
int lfs_reserve(struct lfs *, struct vnode *, int);
/* lfs_cksum.c */
@ -140,6 +140,8 @@ u_int32_t lfs_sb_cksum(struct dlfs *);
/* lfs_debug.c */
#ifdef DEBUG
int lfs_bwrite_log(struct buf *, char *, int);
void lfs_dumplog(void);
void lfs_dump_super(struct lfs *);
void lfs_dump_dinode(struct dinode *);
void lfs_check_bpp(struct lfs *, struct segment *, char *, int);
@ -180,7 +182,7 @@ void lfs_segunlock(struct lfs *);
/* lfs_syscalls.c */
int lfs_fastvget(struct mount *, ino_t, ufs_daddr_t, struct vnode **, struct dinode *, int *);
struct buf *lfs_fakebuf(struct vnode *, int, size_t, caddr_t);
struct buf *lfs_fakebuf(struct lfs *, struct vnode *, int, size_t, caddr_t);
/* lfs_vfsops.c */
void lfs_init(void);

View File

@ -1,4 +1,4 @@
/* $NetBSD: lfs_inode.c,v 1.56 2001/11/23 21:44:27 chs Exp $ */
/* $NetBSD: lfs_inode.c,v 1.57 2002/05/14 20:03:54 perseant Exp $ */
/*-
* Copyright (c) 1999, 2000 The NetBSD Foundation, Inc.
@ -71,7 +71,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: lfs_inode.c,v 1.56 2001/11/23 21:44:27 chs Exp $");
__KERNEL_RCSID(0, "$NetBSD: lfs_inode.c,v 1.57 2002/05/14 20:03:54 perseant Exp $");
#if defined(_KERNEL_OPT)
#include "opt_quota.h"
@ -137,8 +137,8 @@ lfs_ifind(struct lfs *fs, ino_t ino, struct buf *bp)
dtosn(fs, fs->lfs_offset));
printf("block is 0x%x (seg %d)\n", dbtofsb(fs, bp->b_blkno),
dtosn(fs, dbtofsb(fs, bp->b_blkno)));
panic("lfs_ifind: dinode %u not found", ino);
/* NOTREACHED */
return NULL;
}
int
@ -154,6 +154,7 @@ lfs_update(void *v)
struct vnode *vp = ap->a_vp;
struct timespec ts;
struct lfs *fs = VFSTOUFS(vp->v_mount)->um_lfs;
int s;
if (vp->v_mount->mnt_flag & MNT_RDONLY)
return (0);
@ -166,6 +167,7 @@ lfs_update(void *v)
* will cause a panic. So, we must wait until any pending write
* for our inode completes, if we are called with UPDATE_WAIT set.
*/
s = splbio();
while ((ap->a_flags & (UPDATE_WAIT|UPDATE_DIROP)) == UPDATE_WAIT &&
WRITEINPROG(vp)) {
#ifdef DEBUG_LFS
@ -174,6 +176,7 @@ lfs_update(void *v)
#endif
tsleep(vp, (PRIBIO+1), "lfs_update", 0);
}
splx(s);
TIMEVAL_TO_TIMESPEC(&time, &ts);
LFS_ITIMES(ip,
ap->a_access ? ap->a_access : &ts,
@ -313,11 +316,15 @@ lfs_truncate(void *v)
* (We don't need to *hold* the seglock, though, because we already
* hold the inode lock; draining the seglock is sufficient.)
*/
#ifdef LFS_AGGRESSIVE_SEGLOCK
lfs_seglock(fs, SEGM_PROT);
#else
if (ovp != fs->lfs_unlockvp) {
while (fs->lfs_seglock) {
tsleep(&fs->lfs_seglock, PRIBIO+1, "lfs_truncate", 0);
}
}
#endif
/*
* Shorten the size of the file. If the file is not being
@ -340,6 +347,9 @@ lfs_truncate(void *v)
error = VOP_BALLOC(ovp, length - 1, 1, ap->a_cred, aflags, &bp);
if (error) {
lfs_reserve(fs, ovp, -btofsb(fs, (2 * NIADDR + 3) << fs->lfs_bshift));
#ifdef LFS_AGGRESSIVE_SEGLOCK
lfs_segunlock(fs);
#endif
return (error);
}
obufsize = bp->b_bufsize;
@ -350,11 +360,10 @@ lfs_truncate(void *v)
memset((char *)bp->b_data + offset, 0,
(u_int)(size - offset));
allocbuf(bp, size);
if (bp->b_flags & B_DELWRI) {
if ((bp->b_flags & (B_LOCKED | B_CALL)) == B_LOCKED)
locked_queue_bytes -= obufsize - bp->b_bufsize;
if ((bp->b_flags & (B_LOCKED | B_CALL)) == B_LOCKED)
locked_queue_bytes -= obufsize - bp->b_bufsize;
if (bp->b_flags & B_DELWRI)
fs->lfs_avail += odb - btofsb(fs, size);
}
(void) VOP_BWRITE(bp);
}
uvm_vnp_setsize(ovp, length);
@ -494,6 +503,9 @@ done:
(void) chkdq(oip, -blocksreleased, NOCRED, 0);
#endif
lfs_reserve(fs, ovp, -btofsb(fs, (2 * NIADDR + 3) << fs->lfs_bshift));
#ifdef LFS_AGGRESSIVE_SEGLOCK
lfs_segunlock(fs);
#endif
return (allerror);
}
@ -523,10 +535,10 @@ lfs_update_seguse(struct lfs *fs, long lastseg, size_t num)
{
SEGUSE *sup;
struct buf *bp;
int error;
if (lastseg < 0 || num == 0)
return 0;
LFS_SEGENTRY(sup, fs, lastseg, bp);
if (num > sup->su_nbytes) {
@ -536,7 +548,8 @@ lfs_update_seguse(struct lfs *fs, long lastseg, size_t num)
sup->su_nbytes = num;
}
sup->su_nbytes -= num;
return (VOP_BWRITE(bp)); /* Ifile */
error = LFS_BWRITE_LOG(bp); /* Ifile */
return error;
}
/*

View File

@ -1,4 +1,4 @@
/* $NetBSD: lfs_segment.c,v 1.73 2001/11/23 21:44:27 chs Exp $ */
/* $NetBSD: lfs_segment.c,v 1.74 2002/05/14 20:03:54 perseant Exp $ */
/*-
* Copyright (c) 1999, 2000 The NetBSD Foundation, Inc.
@ -71,7 +71,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: lfs_segment.c,v 1.73 2001/11/23 21:44:27 chs Exp $");
__KERNEL_RCSID(0, "$NetBSD: lfs_segment.c,v 1.74 2002/05/14 20:03:54 perseant Exp $");
#define ivndebug(vp,str) printf("ino %d: %s\n",VTOI(vp)->i_number,(str))
@ -104,9 +104,14 @@ __KERNEL_RCSID(0, "$NetBSD: lfs_segment.c,v 1.73 2001/11/23 21:44:27 chs Exp $")
#include <ufs/lfs/lfs.h>
#include <ufs/lfs/lfs_extern.h>
#include <uvm/uvm_extern.h>
extern int count_lock_queue(void);
extern struct simplelock vnode_free_list_slock; /* XXX */
static void lfs_cluster_callback(struct buf *);
static struct buf **lookahead_pagemove(struct buf **, int, size_t *);
/*
* Determine if it's OK to start a partial in this segment, or if we need
* to go on to a new segment.
@ -235,12 +240,14 @@ lfs_vflush(struct vnode *vp)
}
/* If the node is being written, wait until that is done */
s = splbio();
if (WRITEINPROG(vp)) {
#ifdef DEBUG_LFS
ivndebug(vp,"vflush/writeinprog");
#endif
tsleep(vp, PRIBIO+1, "lfs_vw", 0);
}
splx(s);
/* Protect against VXLOCK deadlock in vinvalbuf() */
lfs_seglock(fs, SEGM_SYNC);
@ -299,8 +306,7 @@ lfs_vflush(struct vnode *vp)
ivndebug(vp,"vflush/clean");
#endif
lfs_writevnodes(fs, vp->v_mount, sp, VN_CLEAN);
}
else if (lfs_dostats) {
} else if (lfs_dostats) {
if (vp->v_dirtyblkhd.lh_first || (VTOI(vp)->i_flag & IN_ALLMOD))
++lfs_stats.vflush_invoked;
#ifdef DEBUG_LFS
@ -334,6 +340,23 @@ lfs_vflush(struct vnode *vp)
if (sp->seg_flags & SEGM_CKP)
++lfs_stats.ncheckpoints;
}
/*
* If we were called from somewhere that has already held the seglock
* (e.g., lfs_markv()), the lfs_segunlock will not wait for
* the write to complete because we are still locked.
* Since lfs_vflush() must return the vnode with no dirty buffers,
* we must explicitly wait, if that is the case.
*
* We compare the iocount against 1, not 0, because it is
* artificially incremented by lfs_seglock().
*/
if (fs->lfs_seglock > 1) {
s = splbio();
while (fs->lfs_iocount > 1)
(void)tsleep(&fs->lfs_iocount, PRIBIO + 1,
"lfs_vflush", 0);
splx(s);
}
lfs_segunlock(fs);
CLR_FLUSHING(fs,vp);
@ -483,6 +506,7 @@ lfs_segwrite(struct mount *mp, int flags)
int do_ckp, did_ckp, error, i;
int writer_set = 0;
int dirty;
int redo;
fs = VFSTOUFS(mp)->um_lfs;
@ -598,7 +622,7 @@ lfs_segwrite(struct mount *mp, int flags)
--dirty;
}
if (dirty)
error = VOP_BWRITE(bp); /* Ifile */
error = LFS_BWRITE_LOG(bp); /* Ifile */
else
brelse(bp);
}
@ -610,18 +634,42 @@ lfs_segwrite(struct mount *mp, int flags)
vp = fs->lfs_ivnode;
vget(vp, LK_EXCLUSIVE | LK_CANRECURSE | LK_RETRY);
#ifdef DEBUG
LFS_ENTER_LOG("pretend", __FILE__, __LINE__, 0, 0);
#endif
fs->lfs_flags &= ~LFS_IFDIRTY;
ip = VTOI(vp);
if (vp->v_dirtyblkhd.lh_first != NULL)
/* if (vp->v_dirtyblkhd.lh_first != NULL) */
lfs_writefile(fs, sp, vp);
if (ip->i_flag & IN_ALLMOD)
++did_ckp;
(void) lfs_writeinode(fs, sp, ip);
redo = lfs_writeinode(fs, sp, ip);
vput(vp);
} while (lfs_writeseg(fs, sp) && do_ckp);
redo += lfs_writeseg(fs, sp);
redo += (fs->lfs_flags & LFS_IFDIRTY);
} while (redo && do_ckp);
/* The ifile should now be all clear */
if (do_ckp && vp->v_dirtyblkhd.lh_first) {
struct buf *bp;
int s, warned = 0, dopanic = 0;
s = splbio();
for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = bp->b_vnbufs.le_next) {
if (!(bp->b_flags & B_GATHERED)) {
if (!warned)
printf("lfs_segwrite: ifile still has dirty blocks?!\n");
++dopanic;
++warned;
printf("bp=%p, lbn %d, flags 0x%lx\n",
bp, bp->b_lblkno, bp->b_flags);
}
}
if (dopanic)
panic("dirty blocks");
splx(s);
}
LFS_CLR_UINO(ip, IN_ALLMOD);
} else {
(void) lfs_writeseg(fs, sp);
@ -688,8 +736,7 @@ lfs_writefile(struct lfs *fs, struct segment *sp, struct vnode *vp)
fip->fi_version = ifp->if_version;
brelse(bp);
if (sp->seg_flags & SEGM_CLEAN)
{
if (sp->seg_flags & SEGM_CLEAN) {
lfs_gather(fs, sp, vp, lfs_match_fake);
/*
* For a file being flushed, we need to write *all* blocks.
@ -780,7 +827,9 @@ lfs_writeinode(struct lfs *fs, struct segment *sp, struct inode *ip)
/* Update the inode times and copy the inode onto the inode page. */
TIMEVAL_TO_TIMESPEC(&time, &ts);
LFS_ITIMES(ip, &ts, &ts, &ts);
/* XXX kludge --- don't redirty the ifile just to put times on it */
if (ip->i_number != LFS_IFILE_INUM)
LFS_ITIMES(ip, &ts, &ts, &ts);
/*
* If this is the Ifile, and we've already written the Ifile in this
@ -873,7 +922,7 @@ lfs_writeinode(struct lfs *fs, struct segment *sp, struct inode *ip)
ip->i_number);
}
#endif
error = VOP_BWRITE(ibp); /* Ifile */
error = LFS_BWRITE_LOG(ibp); /* Ifile */
}
/*
@ -913,7 +962,9 @@ lfs_writeinode(struct lfs *fs, struct segment *sp, struct inode *ip)
sup->su_nbytes -= DINODE_SIZE;
redo_ifile =
(ino == LFS_IFILE_INUM && !(bp->b_flags & B_GATHERED));
error = VOP_BWRITE(bp); /* Ifile */
if (redo_ifile)
fs->lfs_flags |= LFS_IFDIRTY;
error = LFS_BWRITE_LOG(bp); /* Ifile */
}
return (redo_ifile);
}
@ -963,6 +1014,8 @@ lfs_gatherblock(struct segment *sp, struct buf *bp, int *sptr)
#endif
/* Insert into the buffer list, update the FINFO block. */
bp->b_flags |= B_GATHERED;
bp->b_flags &= ~B_DONE;
*sp->cbpp++ = bp;
sp->fip->fi_blocks[sp->fip->fi_nblocks++] = bp->b_lblkno;
@ -992,8 +1045,13 @@ loop: for (bp = vp->v_dirtyblkhd.lh_first; bp && bp->b_vnbufs.le_next != NULL;
bp = bp->b_vnbufs.le_next);
for (; bp && bp != BEG_OF_LIST; bp = BACK_BUF(bp)) {
#endif /* LFS_NO_BACKBUF_HACK */
if ((bp->b_flags & (B_BUSY|B_GATHERED)) || !match(fs, bp))
if ((bp->b_flags & (B_BUSY|B_GATHERED)) || !match(fs, bp)) {
#ifdef DEBUG_LFS
if (vp == fs->lfs_ivnode && (bp->b_flags & (B_BUSY|B_GATHERED)) == B_BUSY)
printf("(%d:%lx)", bp->b_lblkno, bp->b_flags);
#endif
continue;
}
if (vp->v_type == VBLK) {
/* For block devices, just write the blocks. */
/* XXX Do we really need to even do this? */
@ -1187,7 +1245,9 @@ lfs_updatemeta(struct segment *sp)
(*sp->start_bpp)->b_lblkno, daddr);
#endif
sup->su_nbytes -= (*sp->start_bpp)->b_bcount;
error = VOP_BWRITE(bp); /* Ifile */
if (!(bp->b_flags & B_GATHERED))
fs->lfs_flags |= LFS_IFDIRTY;
error = LFS_BWRITE_LOG(bp); /* Ifile */
}
}
}
@ -1201,7 +1261,7 @@ lfs_initseg(struct lfs *fs)
struct segment *sp;
SEGUSE *sup;
SEGSUM *ssp;
struct buf *bp;
struct buf *bp, *sbp;
int repeat;
sp = fs->lfs_sp;
@ -1250,9 +1310,16 @@ lfs_initseg(struct lfs *fs)
/* Get a new buffer for SEGSUM and enter it into the buffer list. */
sp->cbpp = sp->bpp;
*sp->cbpp = lfs_newbuf(fs, VTOI(fs->lfs_ivnode)->i_devvp,
fsbtodb(fs, fs->lfs_offset), fs->lfs_sumsize);
sp->segsum = (*sp->cbpp)->b_data;
#ifdef LFS_MALLOC_SUMMARY
sbp = *sp->cbpp = lfs_newbuf(fs, VTOI(fs->lfs_ivnode)->i_devvp,
fsbtodb(fs, fs->lfs_offset), fs->lfs_sumsize);
sp->segsum = (*sp->cbpp)->b_data;
#else
sbp = *sp->cbpp = getblk(VTOI(fs->lfs_ivnode)->i_devvp,
fsbtodb(fs, fs->lfs_offset), NBPG, 0, 0);
memset(sbp->b_data, 0x5a, NBPG);
sp->segsum = (*sp->cbpp)->b_data + NBPG - fs->lfs_sumsize;
#endif
bzero(sp->segsum, fs->lfs_sumsize);
sp->start_bpp = ++sp->cbpp;
fs->lfs_offset += btofsb(fs, fs->lfs_sumsize);
@ -1272,6 +1339,10 @@ lfs_initseg(struct lfs *fs)
sp->seg_bytes_left -= fs->lfs_sumsize;
sp->sum_bytes_left = fs->lfs_sumsize - SEGSUM_SIZE(fs);
#ifndef LFS_MALLOC_SUMMARY
LFS_LOCK_BUF(sbp);
brelse(sbp);
#endif
return (repeat);
}
@ -1295,7 +1366,7 @@ lfs_newseg(struct lfs *fs)
sup->su_nbytes = 0;
sup->su_nsums = 0;
sup->su_ninos = 0;
(void) VOP_BWRITE(bp); /* Ifile */
(void) LFS_BWRITE_LOG(bp); /* Ifile */
LFS_CLEANERINFO(cip, fs, bp);
--cip->clean;
@ -1323,19 +1394,109 @@ lfs_newseg(struct lfs *fs)
}
}
static struct buf **
lookahead_pagemove(struct buf **bpp, int nblocks, size_t *size)
{
size_t maxsize;
#ifndef LFS_NO_PAGEMOVE
struct buf *bp;
#endif
maxsize = *size;
*size = 0;
#ifdef LFS_NO_PAGEMOVE
return bpp;
#else
while((bp = *bpp) != NULL && *size < maxsize && nblocks--) {
if(bp->b_flags & B_CALL)
return bpp;
if(bp->b_bcount % NBPG)
return bpp;
*size += bp->b_bcount;
++bpp;
}
return NULL;
#endif
}
#define BQUEUES 4 /* XXX */
#define BQ_EMPTY 3 /* XXX */
extern TAILQ_HEAD(bqueues, buf) bufqueues[BQUEUES];
#define BUFHASH(dvp, lbn) \
(&bufhashtbl[((long)(dvp) / sizeof(*(dvp)) + (int)(lbn)) & bufhash])
extern LIST_HEAD(bufhashhdr, buf) invalhash;
/*
* Insq/Remq for the buffer hash lists.
*/
#define binshash(bp, dp) LIST_INSERT_HEAD(dp, bp, b_hash)
#define bremhash(bp) LIST_REMOVE(bp, b_hash)
static struct buf *
lfs_newclusterbuf(struct lfs *fs, struct vnode *vp, daddr_t addr, int n)
{
struct lfs_cluster *cl;
struct buf **bpp, *bp;
int s;
cl = (struct lfs_cluster *)malloc(sizeof(*cl), M_SEGMENT, M_WAITOK);
bpp = (struct buf **)malloc(n*sizeof(*bpp), M_SEGMENT, M_WAITOK);
memset(cl,0,sizeof(*cl));
cl->fs = fs;
cl->bpp = bpp;
cl->bufcount = 0;
cl->bufsize = 0;
/* Get an empty buffer header, or maybe one with something on it */
s = splbio();
if((bp = bufqueues[BQ_EMPTY].tqh_first) != NULL) {
bremfree(bp);
/* clear out various other fields */
bp->b_flags = B_BUSY;
bp->b_dev = NODEV;
bp->b_blkno = bp->b_lblkno = 0;
bp->b_error = 0;
bp->b_resid = 0;
bp->b_bcount = 0;
/* nuke any credentials we were holding */
/* XXXXXX */
bremhash(bp);
/* disassociate us from our vnode, if we had one... */
if (bp->b_vp)
brelvp(bp);
}
splx(s);
while (!bp)
bp = getnewbuf(0, 0);
s = splbio();
bgetvp(vp, bp);
binshash(bp,&invalhash);
splx(s);
bp->b_bcount = 0;
bp->b_blkno = bp->b_lblkno = addr;
bp->b_flags |= B_CALL;
bp->b_iodone = lfs_cluster_callback;
cl->saveaddr = bp->b_saveaddr; /* XXX is this ever used? */
bp->b_saveaddr = (caddr_t)cl;
return bp;
}
int
lfs_writeseg(struct lfs *fs, struct segment *sp)
{
struct buf **bpp, *bp, *cbp, *newbp;
struct buf **bpp, *bp, *cbp, *newbp, **pmlastbpp;
SEGUSE *sup;
SEGSUM *ssp;
dev_t i_dev;
char *datap, *dp;
int do_again, i, nblocks, s;
size_t el_size;
#ifdef LFS_TRACK_IOS
int j;
#endif
struct lfs_cluster *cl;
int (*strategy)(void *);
struct vop_strategy_args vop_strategy_a;
u_short ninos;
@ -1343,6 +1504,9 @@ lfs_writeseg(struct lfs *fs, struct segment *sp)
char *p;
struct vnode *vp;
struct inode *ip;
size_t pmsize;
int use_pagemove;
daddr_t pseg_daddr;
daddr_t *daddrp;
int changed;
#if defined(DEBUG) && defined(LFS_PROPELLER)
@ -1353,7 +1517,8 @@ lfs_writeseg(struct lfs *fs, struct segment *sp)
if (propeller == 4)
propeller = 0;
#endif
pseg_daddr = (*(sp->bpp))->b_blkno;
/*
* If there are no buffers other than the segment summary to write
* and it is not a checkpoint, don't do anything. On a checkpoint,
@ -1402,7 +1567,7 @@ lfs_writeseg(struct lfs *fs, struct segment *sp)
fs->lfs_avail -= btofsb(fs, fs->lfs_sumsize);
do_again = !(bp->b_flags & B_GATHERED);
(void)VOP_BWRITE(bp); /* Ifile */
(void)LFS_BWRITE_LOG(bp); /* Ifile */
/*
* Mark blocks B_BUSY, to prevent then from being changed between
* the checksum computation and the actual write.
@ -1488,7 +1653,6 @@ lfs_writeseg(struct lfs *fs, struct segment *sp)
} else {
bp->b_flags &= ~(B_ERROR | B_READ | B_DELWRI |
B_GATHERED);
LFS_UNLOCK_BUF(bp);
if (bp->b_flags & B_CALL) {
lfs_freebuf(bp);
bp = NULL;
@ -1496,6 +1660,7 @@ lfs_writeseg(struct lfs *fs, struct segment *sp)
bremfree(bp);
bp->b_flags |= B_DONE;
reassignbuf(bp, bp->b_vp);
LFS_UNLOCK_BUF(bp);
brelse(bp);
}
}
@ -1533,6 +1698,10 @@ lfs_writeseg(struct lfs *fs, struct segment *sp)
ssp->ss_serial = ++fs->lfs_serial;
ssp->ss_ident = fs->lfs_ident;
}
#ifndef LFS_MALLOC_SUMMARY
/* Set the summary block busy too */
(*(sp->bpp))->b_flags |= B_BUSY;
#endif
ssp->ss_datasum = cksum(datap, (nblocks - 1) * el_size);
ssp->ss_sumsum =
cksum(&ssp->ss_datasum, fs->lfs_sumsize - sizeof(ssp->ss_sumsum));
@ -1548,51 +1717,85 @@ lfs_writeseg(struct lfs *fs, struct segment *sp)
strategy = devvp->v_op[VOFFSET(vop_strategy)];
/*
* When we simply write the blocks we lose a rotation for every block
* written. To avoid this problem, we allocate memory in chunks, copy
* the buffers into the chunk and write the chunk. CHUNKSIZE is the
* largest size I/O devices can handle.
* When the data is copied to the chunk, turn off the B_LOCKED bit
* and brelse the buffer (which will move them to the LRU list). Add
* the B_CALL flag to the buffer header so we can count I/O's for the
* checkpoints and so we can release the allocated memory.
*
* XXX
* This should be removed if the new virtual memory system allows us to
* easily make the buffers contiguous in kernel memory and if that's
* fast enough.
* When we simply write the blocks we lose a rotation for every block
* written. To avoid this problem, we use pagemove to cluster
* the buffers into a chunk and write the chunk. CHUNKSIZE is the
* largest size I/O devices can handle.
*
* XXX - right now MAXPHYS is only 64k; could it be larger?
*/
#define CHUNKSIZE MAXPHYS
if (devvp == NULL)
panic("devvp is NULL");
for (bpp = sp->bpp,i = nblocks; i;) {
cbp = lfs_newbuf(fs, devvp, (*bpp)->b_blkno, CHUNKSIZE);
for (bpp = sp->bpp, i = nblocks; i;) {
cbp = lfs_newclusterbuf(fs, devvp, (*bpp)->b_blkno, i);
cl = (struct lfs_cluster *)cbp->b_saveaddr;
cbp->b_dev = i_dev;
cbp->b_flags |= B_ASYNC | B_BUSY;
cbp->b_bcount = 0;
#ifdef DIAGNOSTIC
if (dtosn(fs, dbtofsb(fs, (*bpp)->b_blkno) + btofsb(fs, (*bpp)->b_bcount) - 1) !=
/*
* Find out if we can use pagemove to build the cluster,
* or if we are stuck using malloc/copy. If this is the
* first cluster, set the shift flag (see below).
*/
pmsize = CHUNKSIZE;
use_pagemove = 0;
if(bpp == sp->bpp) {
/* Summary blocks have to get special treatment */
pmlastbpp = lookahead_pagemove(bpp + 1, i - 1, &pmsize);
if(pmsize >= CHUNKSIZE - fs->lfs_sumsize ||
pmlastbpp == NULL) {
use_pagemove = 1;
cl->flags |= LFS_CL_SHIFT;
} else {
/*
* If we're not using pagemove, we have
* to copy the summary down to the bottom
* end of the block.
*/
#ifndef LFS_MALLOC_SUMMARY
memcpy((*bpp)->b_data, (*bpp)->b_data +
NBPG - fs->lfs_sumsize,
fs->lfs_sumsize);
#endif /* LFS_MALLOC_SUMMARY */
}
} else {
pmlastbpp = lookahead_pagemove(bpp, i, &pmsize);
if(pmsize >= CHUNKSIZE || pmlastbpp == NULL) {
use_pagemove = 1;
}
}
if(use_pagemove == 0) {
cl->flags |= LFS_CL_MALLOC;
cl->olddata = cbp->b_data;
cbp->b_data = malloc(CHUNKSIZE, M_SEGMENT, M_WAITOK);
}
#if defined(DEBUG) && defined(DIAGNOSTIC)
if(dtosn(fs, dbtofsb(fs, (*bpp)->b_blkno + btodb((*bpp)->b_bcount - 1))) !=
dtosn(fs, dbtofsb(fs, cbp->b_blkno))) {
printf("block at %x (%d), cbp at %x (%d)\n",
(*bpp)->b_blkno, dtosn(fs, dbtofsb(fs, (*bpp)->b_blkno)),
cbp->b_blkno, dtosn(fs, dbtofsb(fs, cbp->b_blkno)));
panic("lfs_writeseg: Segment overwrite");
}
#endif
/*
* Construct the cluster.
*/
s = splbio();
if (fs->lfs_iocount >= LFS_THROTTLE) {
tsleep(&fs->lfs_iocount, PRIBIO+1, "lfs throttle", 0);
while (fs->lfs_iocount >= LFS_THROTTLE) {
#ifdef DEBUG_LFS
printf("[%d]", fs->lfs_iocount);
#endif
tsleep(&fs->lfs_iocount, PRIBIO+1, "lfs_throttle", 0);
}
++fs->lfs_iocount;
#ifdef LFS_TRACK_IOS
for (j = 0; j < LFS_THROTTLE; j++) {
if (fs->lfs_pending[j] == LFS_UNUSED_DADDR) {
fs->lfs_pending[j] = dbtofsb(fs, cbp->b_blkno);
break;
}
}
#endif /* LFS_TRACK_IOS */
for (p = cbp->b_data; i && cbp->b_bcount < CHUNKSIZE; i--) {
bp = *bpp;
@ -1608,26 +1811,54 @@ lfs_writeseg(struct lfs *fs, struct segment *sp)
if ((bp->b_flags & (B_CALL|B_INVAL)) == (B_CALL|B_INVAL)) {
if (copyin(bp->b_saveaddr, p, bp->b_bcount))
panic("lfs_writeseg: copyin failed [2]");
} else
} else if (use_pagemove) {
pagemove(bp->b_data, p, bp->b_bcount);
cbp->b_bufsize += bp->b_bcount;
bp->b_bufsize -= bp->b_bcount;
} else {
bcopy(bp->b_data, p, bp->b_bcount);
p += bp->b_bcount;
cbp->b_bcount += bp->b_bcount;
LFS_UNLOCK_BUF(bp);
bp->b_flags &= ~(B_ERROR | B_READ | B_DELWRI |
B_GATHERED);
vp = bp->b_vp;
if (bp->b_flags & B_CALL) {
/* if B_CALL, it was created with newbuf */
lfs_freebuf(bp);
bp = NULL;
/* printf("copy in %p\n", bp->b_data); */
}
/*
* XXX If we are *not* shifting, the summary
* block is only fs->lfs_sumsize. Otherwise,
* it is NBPG but shifted.
*/
if(bpp == sp->bpp && !(cl->flags & LFS_CL_SHIFT)) {
p += fs->lfs_sumsize;
cbp->b_bcount += fs->lfs_sumsize;
cl->bufsize += fs->lfs_sumsize;
} else {
bremfree(bp);
bp->b_flags |= B_DONE;
if (vp)
reassignbuf(bp, vp);
brelse(bp);
p += bp->b_bcount;
cbp->b_bcount += bp->b_bcount;
cl->bufsize += bp->b_bcount;
}
bp->b_flags &= ~(B_ERROR | B_READ | B_DELWRI | B_DONE);
cl->bpp[cl->bufcount++] = bp;
vp = bp->b_vp;
++vp->v_numoutput;
/*
* Although it cannot be freed for reuse before the
* cluster is written to disk, this buffer does not
* need to be held busy. Therefore we unbusy it,
* while leaving it on the locked list. It will
* be freed or requeued by the callback depending
* on whether it has had B_DELWRI set again in the
* meantime.
*
* If we are using pagemove, we have to hold the block
* busy to prevent its contents from changing before
* it hits the disk, and invalidating the checksum.
*/
bp->b_flags &= ~(B_DELWRI | B_READ | B_ERROR);
#ifdef LFS_MNOBUSY
if (cl->flags & LFS_CL_MALLOC) {
if (!(bp->b_flags & B_CALL))
brelse(bp); /* Still B_LOCKED */
}
#endif
bpp++;
/*
@ -1641,10 +1872,10 @@ lfs_writeseg(struct lfs *fs, struct segment *sp)
* of blocks are present (traverse the dirty list?)
*/
if ((i == 1 ||
(i > 1 && vp && *bpp && (*bpp)->b_vp != vp)) &&
(bp = vp->v_dirtyblkhd.lh_first) != NULL &&
vp->v_mount == fs->lfs_ivnode->v_mount)
{
(i > 1 && vp && *bpp && (*bpp)->b_vp != vp)) &&
(bp = vp->v_dirtyblkhd.lh_first) != NULL &&
vp->v_mount == fs->lfs_ivnode->v_mount)
{
ip = VTOI(vp);
#ifdef DEBUG_LFS
printf("lfs_writeseg: marking ino %d\n",
@ -1660,29 +1891,21 @@ lfs_writeseg(struct lfs *fs, struct segment *sp)
++cbp->b_vp->v_numoutput;
splx(s);
/*
* XXXX This is a gross and disgusting hack. Since these
* buffers are physically addressed, they hang off the
* device vnode (devvp). As a result, they have no way
* of getting to the LFS superblock or lfs structure to
* keep track of the number of I/O's pending. So, I am
* going to stuff the fs into the saveaddr field of
* the buffer (yuk).
* In order to include the summary in a clustered block,
* it may be necessary to shift the block forward (since
* summary blocks are in generay smaller than can be
* addressed by pagemove(). After the write, the block
* will be corrected before disassembly.
*/
cbp->b_saveaddr = (caddr_t)fs;
if(cl->flags & LFS_CL_SHIFT) {
cbp->b_data += (NBPG - fs->lfs_sumsize);
cbp->b_bcount -= (NBPG - fs->lfs_sumsize);
}
vop_strategy_a.a_desc = VDESC(vop_strategy);
vop_strategy_a.a_bp = cbp;
(strategy)(&vop_strategy_a);
}
#if 1 || defined(DEBUG)
/*
* After doing a big write, we recalculate how many buffers are
* really still left on the locked queue.
*/
s = splbio();
lfs_countlocked(&locked_queue_count, &locked_queue_bytes);
splx(s);
wakeup(&locked_queue_count);
#endif /* 1 || DEBUG */
if (lfs_dostats) {
++lfs_stats.psegwrites;
lfs_stats.blocktot += nblocks - 1;
@ -1798,28 +2021,8 @@ lfs_match_tindir(struct lfs *fs, struct buf *bp)
void
lfs_callback(struct buf *bp)
{
struct lfs *fs;
#ifdef LFS_TRACK_IOS
int j;
#endif
fs = (struct lfs *)bp->b_saveaddr;
#ifdef DIAGNOSTIC
if (fs->lfs_iocount == 0)
panic("lfs_callback: zero iocount\n");
#endif
if (--fs->lfs_iocount < LFS_THROTTLE)
wakeup(&fs->lfs_iocount);
#ifdef LFS_TRACK_IOS
for (j = 0; j < LFS_THROTTLE; j++) {
if (fs->lfs_pending[j] == dbtofsb(fs, bp->b_blkno)) {
fs->lfs_pending[j] = LFS_UNUSED_DADDR;
wakeup(&(fs->lfs_pending[j]));
break;
}
}
#endif /* LFS_TRACK_IOS */
/* struct lfs *fs; */
/* fs = (struct lfs *)bp->b_saveaddr; */
lfs_freebuf(bp);
}
@ -1836,6 +2039,122 @@ lfs_supercallback(struct buf *bp)
lfs_freebuf(bp);
}
static void
lfs_cluster_callback(struct buf *bp)
{
struct lfs_cluster *cl;
struct lfs *fs;
struct buf *tbp;
struct vnode *vp;
int error=0;
char *cp;
extern int locked_queue_count;
extern long locked_queue_bytes;
if(bp->b_flags & B_ERROR)
error = bp->b_error;
cl = (struct lfs_cluster *)bp->b_saveaddr;
fs = cl->fs;
bp->b_saveaddr = cl->saveaddr;
/* If shifted, shift back now */
if(cl->flags & LFS_CL_SHIFT) {
bp->b_data -= (NBPG - fs->lfs_sumsize);
bp->b_bcount += (NBPG - fs->lfs_sumsize);
}
cp = (char *)bp->b_data + cl->bufsize;
/* Put the pages back, and release the buffer */
while(cl->bufcount--) {
tbp = cl->bpp[cl->bufcount];
if(!(cl->flags & LFS_CL_MALLOC)) {
cp -= tbp->b_bcount;
printf("pm(%p,%p,%lx)",cp,tbp->b_data,tbp->b_bcount);
pagemove(cp, tbp->b_data, tbp->b_bcount);
bp->b_bufsize -= tbp->b_bcount;
tbp->b_bufsize += tbp->b_bcount;
}
if(error) {
tbp->b_flags |= B_ERROR;
tbp->b_error = error;
}
/*
* We're done with tbp. If it has not been re-dirtied since
* the cluster was written, free it. Otherwise, keep it on
* the locked list to be written again.
*/
if ((tbp->b_flags & (B_LOCKED | B_DELWRI)) == B_LOCKED)
LFS_UNLOCK_BUF(tbp);
tbp->b_flags &= ~B_GATHERED;
LFS_BCLEAN_LOG(fs, tbp);
vp = tbp->b_vp;
/* Segment summary for a shifted cluster */
if(!cl->bufcount && (cl->flags & LFS_CL_SHIFT))
tbp->b_flags |= B_INVAL;
if(!(tbp->b_flags & B_CALL)) {
bremfree(tbp);
if(vp)
reassignbuf(tbp, vp);
tbp->b_flags |= B_ASYNC; /* for biodone */
}
#ifdef DIAGNOSTIC
if (tbp->b_flags & B_DONE) {
printf("blk %d biodone already (flags %lx)\n",
cl->bufcount, (long)tbp->b_flags);
}
#endif
if (tbp->b_flags & (B_BUSY | B_CALL)) {
biodone(tbp);
}
}
/* Fix up the cluster buffer, and release it */
if(!(cl->flags & LFS_CL_MALLOC) && bp->b_bufsize) {
printf("PM(%p,%p,%lx)", (char *)bp->b_data + bp->b_bcount,
(char *)bp->b_data, bp->b_bufsize);
pagemove((char *)bp->b_data + bp->b_bcount,
(char *)bp->b_data, bp->b_bufsize);
}
if(cl->flags & LFS_CL_MALLOC) {
free(bp->b_data, M_SEGMENT);
bp->b_data = cl->olddata;
}
bp->b_bcount = 0;
bp->b_iodone = NULL;
bp->b_flags &= ~B_DELWRI;
bp->b_flags |= B_DONE;
reassignbuf(bp, bp->b_vp);
brelse(bp);
free(cl->bpp, M_SEGMENT);
free(cl, M_SEGMENT);
#ifdef DIAGNOSTIC
if (fs->lfs_iocount == 0)
panic("lfs_callback: zero iocount\n");
#endif
if (--fs->lfs_iocount < LFS_THROTTLE)
wakeup(&fs->lfs_iocount);
#if 0
if (fs->lfs_iocount == 0) {
/*
* XXX - do we really want to do this in a callback?
*
* Vinvalbuf can move locked buffers off the locked queue
* and we have no way of knowing about this. So, after
* doing a big write, we recalculate how many buffers are
* really still left on the locked queue.
*/
lfs_countlocked(&locked_queue_count, &locked_queue_bytes, "lfs_cluster_callback");
wakeup(&locked_queue_count);
}
#endif
}
/*
* Shellsort (diminishing increment sort) from Data Structures and
* Algorithms, Aho, Hopcraft and Ullman, 1983 Edition, page 290;

View File

@ -1,4 +1,4 @@
/* $NetBSD: lfs_subr.c,v 1.21 2001/11/23 21:44:28 chs Exp $ */
/* $NetBSD: lfs_subr.c,v 1.22 2002/05/14 20:03:54 perseant Exp $ */
/*-
* Copyright (c) 1999, 2000 The NetBSD Foundation, Inc.
@ -71,7 +71,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: lfs_subr.c,v 1.21 2001/11/23 21:44:28 chs Exp $");
__KERNEL_RCSID(0, "$NetBSD: lfs_subr.c,v 1.22 2002/05/14 20:03:54 perseant Exp $");
#include <sys/param.h>
#include <sys/systm.h>
@ -175,9 +175,14 @@ lfs_segunlock(struct lfs *fs)
struct segment *sp;
unsigned long sync, ckp;
int s;
struct buf *bp;
struct vnode *vp;
struct mount *mp;
extern int lfs_dirvcount;
#ifdef LFS_MALLOC_SUMMARY
extern int locked_queue_count;
extern long locked_queue_bytes;
#endif
sp = fs->lfs_sp;
@ -207,8 +212,10 @@ lfs_segunlock(struct lfs *fs)
vp != NULL;
vp = vp->v_mntvnodes.le_next) {
#endif
if (vp->v_mount != mp)
if (vp->v_mount != mp) {
printf("lfs_segunlock: starting over\n");
goto loop;
}
if (vp->v_type == VNON)
continue;
if (lfs_vref(vp))
@ -239,7 +246,18 @@ lfs_segunlock(struct lfs *fs)
if (sp->bpp != sp->cbpp) {
/* Free allocated segment summary */
fs->lfs_offset -= btofsb(fs, fs->lfs_sumsize);
lfs_freebuf(*sp->bpp);
bp = *sp->bpp;
#ifdef LFS_MALLOC_SUMMARY
lfs_freebuf(bp);
#else
s = splbio();
bremfree(bp);
splx(s);
bp->b_flags |= B_DONE|B_INVAL;
bp->b_flags &= ~B_DELWRI;
reassignbuf(bp,bp->b_vp);
brelse(bp);
#endif
} else
printf ("unlock to 0 with no summary");
@ -254,7 +272,14 @@ lfs_segunlock(struct lfs *fs)
* sleep.
*/
s = splbio();
--fs->lfs_iocount;
if (--fs->lfs_iocount < LFS_THROTTLE)
wakeup(&fs->lfs_iocount);
if(fs->lfs_iocount == 0) {
lfs_countlocked(&locked_queue_count,
&locked_queue_bytes, "lfs_segunlock");
wakeup(&locked_queue_count);
wakeup(&fs->lfs_iocount);
}
/*
* We let checkpoints happen asynchronously. That means
* that during recovery, we have to roll forward between

View File

@ -1,4 +1,4 @@
/* $NetBSD: lfs_syscalls.c,v 1.64 2002/05/12 23:06:29 matt Exp $ */
/* $NetBSD: lfs_syscalls.c,v 1.65 2002/05/14 20:03:54 perseant Exp $ */
/*-
* Copyright (c) 1999, 2000 The NetBSD Foundation, Inc.
@ -71,7 +71,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: lfs_syscalls.c,v 1.64 2002/05/12 23:06:29 matt Exp $");
__KERNEL_RCSID(0, "$NetBSD: lfs_syscalls.c,v 1.65 2002/05/14 20:03:54 perseant Exp $");
#define LFS /* for prototypes in syscallargs.h */
@ -100,7 +100,7 @@ __KERNEL_RCSID(0, "$NetBSD: lfs_syscalls.c,v 1.64 2002/05/12 23:06:29 matt Exp $
/* Max block count for lfs_markv() */
#define MARKV_MAXBLKCNT 65536
struct buf *lfs_fakebuf(struct vnode *, int, size_t, caddr_t);
struct buf *lfs_fakebuf(struct lfs *, struct vnode *, int, size_t, caddr_t);
int lfs_fasthashget(dev_t, ino_t, int *, struct vnode **);
int debug_cleaner = 0;
@ -258,9 +258,6 @@ lfs_markv(struct proc *p, fsid_t *fsidp, BLOCK_INFO *blkiov, int blkcnt)
#ifdef CHECK_COPYIN
int i;
#endif /* CHECK_COPYIN */
#ifdef LFS_TRACK_IOS
int j;
#endif
int numlocked = 0, numrefed = 0;
ino_t maxino;
@ -311,23 +308,6 @@ lfs_markv(struct proc *p, fsid_t *fsidp, BLOCK_INFO *blkiov, int blkcnt)
if (blkp->bi_daddr == LFS_FORCE_WRITE)
printf("lfs_markv: warning: force-writing ino %d lbn %d\n",
blkp->bi_inode, blkp->bi_lbn);
#ifdef LFS_TRACK_IOS
/*
* If there is I/O on this segment that is not yet complete,
* the cleaner probably does not have the right information.
* Send it packing.
*/
for (j = 0; j < LFS_THROTTLE; j++) {
if (fs->lfs_pending[j] != LFS_UNUSED_DADDR
&& dtosn(fs,fs->lfs_pending[j]) == dtosn(fs,blkp->bi_daddr)
&& blkp->bi_daddr != LFS_FORCE_WRITE)
{
printf("lfs_markv: attempt to clean pending segment? (#%d)\n",
dtosn(fs, fs->lfs_pending[j]));
/* return (EBUSY); */
}
}
#endif /* LFS_TRACK_IOS */
/* Bounds-check incoming data, avoid panic for failed VGET */
if (blkp->bi_inode <= 0 || blkp->bi_inode >= maxino) {
error = EINVAL;
@ -493,7 +473,7 @@ lfs_markv(struct proc *p, fsid_t *fsidp, BLOCK_INFO *blkiov, int blkcnt)
}
if (ip->i_number != LFS_IFILE_INUM && blkp->bi_lbn >= 0) {
/* Data Block */
bp = lfs_fakebuf(vp, blkp->bi_lbn,
bp = lfs_fakebuf(fs, vp, blkp->bi_lbn,
blkp->bi_size, blkp->bi_bp);
/* Pretend we used bread() to get it */
bp->b_blkno = fsbtodb(fs, blkp->bi_daddr);
@ -716,9 +696,6 @@ lfs_bmapv(struct proc *p, fsid_t *fsidp, BLOCK_INFO *blkiov, int blkcnt)
ufs_daddr_t v_daddr;
int cnt, error, need_unlock = 0;
int numlocked = 0, numrefed = 0;
#ifdef LFS_TRACK_IOS
int j;
#endif
lfs_cleaner_pid = p->p_pid;
@ -748,24 +725,6 @@ lfs_bmapv(struct proc *p, fsid_t *fsidp, BLOCK_INFO *blkiov, int blkcnt)
return (EBUSY);
}
#endif /* DEBUG */
#ifdef LFS_TRACK_IOS
/*
* If there is I/O on this segment that is not yet complete,
* the cleaner probably does not have the right information.
* Send it packing.
*/
for (j = 0; j < LFS_THROTTLE; j++) {
if (fs->lfs_pending[j] != LFS_UNUSED_DADDR
&& dtosn(fs,fs->lfs_pending[j]) == dtosn(fs,blkp->bi_daddr))
{
printf("lfs_bmapv: attempt to clean pending segment? (#%d)\n",
dtosn(fs, fs->lfs_pending[j]));
vfs_unbusy(mntp);
return (EBUSY);
}
}
#endif /* LFS_TRACK_IOS */
/*
* Get the IFILE entry (only once) and see if the file still
* exists.
@ -939,14 +898,23 @@ sys_lfs_segclean(struct proc *p, void *v, register_t *retval)
if ((error = vfs_busy(mntp, LK_NOWAIT, NULL)) != 0)
return (error);
#ifdef LFS_AGGRESSIVE_SEGLOCK
lfs_seglock(fs, SEGM_PROT);
#endif
LFS_SEGENTRY(sup, fs, SCARG(uap, segment), bp);
if (sup->su_flags & SEGUSE_ACTIVE) {
brelse(bp);
#ifdef LFS_AGGRESSIVE_SEGLOCK
lfs_segunlock(fs);
#endif
vfs_unbusy(mntp);
return (EBUSY);
}
if (!(sup->su_flags & SEGUSE_DIRTY)) {
brelse(bp);
#ifdef LFS_AGGRESSIVE_SEGLOCK
lfs_segunlock(fs);
#endif
vfs_unbusy(mntp);
return (EALREADY);
}
@ -964,7 +932,7 @@ sys_lfs_segclean(struct proc *p, void *v, register_t *retval)
if (fs->lfs_dmeta < 0)
fs->lfs_dmeta = 0;
sup->su_flags &= ~SEGUSE_DIRTY;
(void) VOP_BWRITE(bp);
(void) LFS_BWRITE_LOG(bp);
LFS_CLEANERINFO(cip, fs, bp);
++cip->clean;
@ -972,8 +940,11 @@ sys_lfs_segclean(struct proc *p, void *v, register_t *retval)
fs->lfs_nclean = cip->clean;
cip->bfree = fs->lfs_bfree;
cip->avail = fs->lfs_avail - fs->lfs_ravail;
(void) VOP_BWRITE(bp);
(void) LFS_BWRITE_LOG(bp);
wakeup(&fs->lfs_avail);
#ifdef LFS_AGGRESSIVE_SEGLOCK
lfs_segunlock(fs);
#endif
vfs_unbusy(mntp);
return (0);
@ -1100,10 +1071,11 @@ int
lfs_fastvget(struct mount *mp, ino_t ino, ufs_daddr_t daddr, struct vnode **vpp, struct dinode *dinp, int *need_unlock)
{
struct inode *ip;
struct dinode *dip;
struct vnode *vp;
struct ufsmount *ump;
dev_t dev;
int error;
int error, retries;
struct buf *bp;
struct lfs *fs;
@ -1179,6 +1151,8 @@ lfs_fastvget(struct mount *mp, ino_t ino, ufs_daddr_t daddr, struct vnode **vpp,
if (ip->i_number != ino)
panic("lfs_fastvget: I was fed the wrong inode!");
} else {
retries = 0;
again:
error = bread(ump->um_devvp, fsbtodb(fs, daddr), fs->lfs_ibsize,
NOCRED, &bp);
if (error) {
@ -1197,7 +1171,18 @@ lfs_fastvget(struct mount *mp, ino_t ino, ufs_daddr_t daddr, struct vnode **vpp,
*vpp = NULL;
return (error);
}
ip->i_din.ffs_din = *lfs_ifind(fs, ino, bp);
dip = lfs_ifind(ump->um_lfs, ino, bp);
if (dip == NULL) {
/* Assume write has not completed yet; try again */
bp->b_flags |= B_INVAL;
brelse(bp);
++retries;
if (retries > LFS_IFIND_RETRIES)
panic("lfs_fastvget: dinode not found");
printf("lfs_fastvget: dinode not found, retrying...\n");
goto again;
}
ip->i_din.ffs_din = *dip;
brelse(bp);
}
ip->i_ffs_effnlink = ip->i_ffs_nlink;
@ -1234,7 +1219,7 @@ lfs_fastvget(struct mount *mp, ino_t ino, ufs_daddr_t daddr, struct vnode **vpp,
}
struct buf *
lfs_fakebuf(struct vnode *vp, int lbn, size_t size, caddr_t uaddr)
lfs_fakebuf(struct lfs *fs, struct vnode *vp, int lbn, size_t size, caddr_t uaddr)
{
struct buf *bp;
int error;
@ -1251,7 +1236,12 @@ lfs_fakebuf(struct vnode *vp, int lbn, size_t size, caddr_t uaddr)
bp->b_flags |= B_INVAL;
bp->b_saveaddr = uaddr;
#endif
#if 0
bp->b_saveaddr = (caddr_t)fs;
s = splbio();
++fs->lfs_iocount;
splx(s);
#endif
bp->b_bufsize = size;
bp->b_bcount = size;
return (bp);

View File

@ -1,4 +1,4 @@
/* $NetBSD: lfs_vfsops.c,v 1.73 2002/05/12 23:06:29 matt Exp $ */
/* $NetBSD: lfs_vfsops.c,v 1.74 2002/05/14 20:03:54 perseant Exp $ */
/*-
* Copyright (c) 1999, 2000 The NetBSD Foundation, Inc.
@ -71,7 +71,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: lfs_vfsops.c,v 1.73 2002/05/12 23:06:29 matt Exp $");
__KERNEL_RCSID(0, "$NetBSD: lfs_vfsops.c,v 1.74 2002/05/14 20:03:54 perseant Exp $");
#if defined(_KERNEL_OPT)
#include "opt_quota.h"
@ -165,6 +165,9 @@ lfs_init()
*/
pool_init(&lfs_inode_pool, sizeof(struct inode), 0, 0, 0,
"lfsinopl", &pool_allocator_nointr);
#ifdef DEBUG
memset(lfs_log, 0, sizeof(lfs_log));
#endif
}
void
@ -436,11 +439,11 @@ update_meta(struct lfs *fs, ino_t ino, int version, ufs_daddr_t lbn,
}
#endif
sup->su_nbytes -= size;
VOP_BWRITE(bp);
LFS_BWRITE_LOG(bp);
}
LFS_SEGENTRY(sup, fs, dtosn(fs, ndaddr), bp);
sup->su_nbytes += size;
VOP_BWRITE(bp);
LFS_BWRITE_LOG(bp);
/* Fix this so it can be released */
/* ip->i_lfs_effnblks = ip->i_ffs_blocks; */
@ -521,19 +524,19 @@ update_inoblk(struct lfs *fs, daddr_t offset, struct ucred *cred,
LFS_IENTRY(ifp, fs, dip->di_inumber, ibp);
daddr = ifp->if_daddr;
ifp->if_daddr = dbtofsb(fs, dbp->b_blkno);
error = VOP_BWRITE(ibp); /* Ifile */
error = LFS_BWRITE_LOG(ibp); /* Ifile */
/* And do segment accounting */
if (dtosn(fs, daddr) != dtosn(fs, dbtofsb(fs, dbp->b_blkno))) {
if (daddr > 0) {
LFS_SEGENTRY(sup, fs, dtosn(fs, daddr),
ibp);
sup->su_nbytes -= DINODE_SIZE;
VOP_BWRITE(ibp);
LFS_BWRITE_LOG(ibp);
}
LFS_SEGENTRY(sup, fs, dtosn(fs, dbtofsb(fs, dbp->b_blkno)),
ibp);
sup->su_nbytes += DINODE_SIZE;
VOP_BWRITE(ibp);
LFS_BWRITE_LOG(ibp);
}
}
}
@ -943,10 +946,6 @@ lfs_mountfs(struct vnode *devvp, struct mount *mp, struct proc *p)
fs->lfs_uinodes = 0;
fs->lfs_ravail = 0;
fs->lfs_sbactive = 0;
#ifdef LFS_TRACK_IOS
for (i = 0; i < LFS_THROTTLE; i++)
fs->lfs_pending[i] = LFS_UNUSED_DADDR;
#endif
/* Set up the ifile and lock aflags */
fs->lfs_doifile = 0;
@ -995,7 +994,6 @@ lfs_mountfs(struct vnode *devvp, struct mount *mp, struct proc *p)
}
fs->lfs_ivnode = vp;
VREF(vp);
vput(vp);
/*
* Roll forward.
@ -1030,7 +1028,7 @@ lfs_mountfs(struct vnode *devvp, struct mount *mp, struct proc *p)
if (!(sup->su_flags & SEGUSE_DIRTY))
--fs->lfs_nclean;
sup->su_flags |= SEGUSE_DIRTY;
(void) VOP_BWRITE(bp);
(void) LFS_BWRITE_LOG(bp);
while ((offset = check_segsum(fs, offset, cred, CHECK_CKSUM,
&flags, p)) > 0)
{
@ -1040,7 +1038,7 @@ lfs_mountfs(struct vnode *devvp, struct mount *mp, struct proc *p)
if (!(sup->su_flags & SEGUSE_DIRTY))
--fs->lfs_nclean;
sup->su_flags |= SEGUSE_DIRTY;
(void) VOP_BWRITE(bp);
(void) LFS_BWRITE_LOG(bp);
}
#ifdef DEBUG_LFS_RFW
@ -1126,7 +1124,7 @@ lfs_mountfs(struct vnode *devvp, struct mount *mp, struct proc *p)
cip->dirty = fs->lfs_nseg - fs->lfs_nclean;
cip->avail = fs->lfs_avail;
cip->bfree = fs->lfs_bfree;
(void) VOP_BWRITE(bp); /* Ifile */
(void) LFS_BWRITE_LOG(bp); /* Ifile */
/*
* Mark the current segment as ACTIVE, since we're going to
@ -1134,7 +1132,22 @@ lfs_mountfs(struct vnode *devvp, struct mount *mp, struct proc *p)
*/
LFS_SEGENTRY(sup, fs, dtosn(fs, fs->lfs_offset), bp);
sup->su_flags |= SEGUSE_DIRTY | SEGUSE_ACTIVE;
(void) VOP_BWRITE(bp); /* Ifile */
(void) LFS_BWRITE_LOG(bp); /* Ifile */
/* Now that roll-forward is done, unlock the Ifile */
vput(vp);
/* Comment on ifile size if it is too large */
if (fs->lfs_ivnode->v_size / fs->lfs_bsize > LFS_MAX_BUFS) {
fs->lfs_flags |= LFS_WARNED;
printf("lfs_mountfs: please consider increasing NBUF to at least %lld\n",
(long long)(fs->lfs_ivnode->v_size / fs->lfs_bsize) * (nbuf / LFS_MAX_BUFS));
}
if (fs->lfs_ivnode->v_size > LFS_MAX_BYTES) {
fs->lfs_flags |= LFS_WARNED;
printf("lfs_mountfs: please consider increasing BUFPAGES to at least %lld\n",
(long long)fs->lfs_ivnode->v_size * bufpages / LFS_MAX_BYTES);
}
return (0);
out:
@ -1198,6 +1211,20 @@ lfs_unmount(struct mount *mp, int mntflags, struct proc *p)
lfs_writesuper(fs, fs->lfs_sboffs[0]);
lfs_writesuper(fs, fs->lfs_sboffs[1]);
/* Comment on ifile size if it has become too large */
if (!(fs->lfs_flags & LFS_WARNED)) {
if (fs->lfs_ivnode->v_size / fs->lfs_bsize > LFS_MAX_BUFS)
printf("lfs_unmount: please consider increasing"
" NBUF to at least %lld\n",
(fs->lfs_ivnode->v_size / fs->lfs_bsize) *
(long long)(nbuf / LFS_MAX_BUFS));
if (fs->lfs_ivnode->v_size > LFS_MAX_BYTES)
printf("lfs_unmount: please consider increasing"
" BUFPAGES to at least %lld\n",
(long long)fs->lfs_ivnode->v_size *
bufpages / LFS_MAX_BYTES);
}
/* Finish with the Ifile, now that we're done with it */
vrele(fs->lfs_ivnode);
vgone(fs->lfs_ivnode);
@ -1300,6 +1327,7 @@ int
lfs_vget(struct mount *mp, ino_t ino, struct vnode **vpp)
{
struct lfs *fs;
struct dinode *dip;
struct inode *ip;
struct buf *bp;
struct ifile *ifp;
@ -1307,7 +1335,7 @@ lfs_vget(struct mount *mp, ino_t ino, struct vnode **vpp)
struct ufsmount *ump;
ufs_daddr_t daddr;
dev_t dev;
int error;
int error, retries;
struct timespec ts;
ump = VFSTOUFS(mp);
@ -1379,8 +1407,10 @@ lfs_vget(struct mount *mp, ino_t ino, struct vnode **vpp)
ip->i_lfs = ump->um_lfs;
/* Read in the disk contents for the inode, copy into the inode. */
retries = 0;
again:
error = bread(ump->um_devvp, fsbtodb(fs, daddr),
(fs->lfs_version == 1 ? fs->lfs_bsize : fs->lfs_fsize),
(fs->lfs_version == 1 ? fs->lfs_bsize : fs->lfs_ibsize),
NOCRED, &bp);
if (error) {
/*
@ -1394,7 +1424,45 @@ lfs_vget(struct mount *mp, ino_t ino, struct vnode **vpp)
*vpp = NULL;
return (error);
}
ip->i_din.ffs_din = *lfs_ifind(fs, ino, bp);
dip = lfs_ifind(fs, ino, bp);
if (dip == NULL) {
/* Assume write has not completed yet; try again */
bp->b_flags |= B_INVAL;
brelse(bp);
++retries;
if (retries > LFS_IFIND_RETRIES) {
#ifdef DEBUG
/* If the seglock is held look at the bpp to see
what is there anyway */
if (fs->lfs_seglock > 0) {
struct buf **bpp;
struct dinode *dp;
int i;
for (bpp = fs->lfs_sp->bpp;
bpp != fs->lfs_sp->cbpp; ++bpp) {
if ((*bpp)->b_vp == fs->lfs_ivnode &&
bpp != fs->lfs_sp->bpp) {
/* Inode block */
printf("block 0x%x: ", (*bpp)->b_blkno);
dp = (struct dinode *)(*bpp)->b_data;
for (i = 0; i < INOPB(fs); i++)
if (dp[i].di_u.inumber)
printf("%d ", dp[i].di_u.inumber);
printf("\n");
}
}
}
#endif
panic("lfs_vget: dinode not found");
}
printf("lfs_vget: dinode %d not found, retrying...\n", ino);
(void)tsleep(&fs->lfs_iocount, PRIBIO + 1, "lfs ifind", 1);
goto again;
}
ip->i_din.ffs_din = *dip;
ip->i_ffs_effnlink = ip->i_ffs_nlink;
ip->i_lfs_effnblks = ip->i_ffs_blocks;
if (fs->lfs_version > 1) {

View File

@ -1,4 +1,4 @@
/* $NetBSD: lfs_vnops.c,v 1.62 2002/04/27 01:00:46 perseant Exp $ */
/* $NetBSD: lfs_vnops.c,v 1.63 2002/05/14 20:03:55 perseant Exp $ */
/*-
* Copyright (c) 1999, 2000 The NetBSD Foundation, Inc.
@ -71,7 +71,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: lfs_vnops.c,v 1.62 2002/04/27 01:00:46 perseant Exp $");
__KERNEL_RCSID(0, "$NetBSD: lfs_vnops.c,v 1.63 2002/05/14 20:03:55 perseant Exp $");
#include <sys/param.h>
#include <sys/systm.h>
@ -300,11 +300,29 @@ lfs_fsync(void *v)
simple_lock(&vp->v_interlock);
error = VOP_PUTPAGES(vp, trunc_page(ap->a_offlo),
round_page(ap->a_offhi), PGO_CLEANIT | PGO_SYNCIO);
if (error) {
if (error)
return error;
error = VOP_UPDATE(vp, NULL, NULL,
(ap->a_flags & FSYNC_WAIT) != 0 ? UPDATE_WAIT : 0);
#ifdef DEBUG
/*
* If we were called from vinvalbuf and lfs_update
* didn't flush all our buffers, we're in trouble.
*/
if ((ap->a_flags & FSYNC_WAIT) && vp->v_dirtyblkhd.lh_first != NULL) {
struct buf *bp;
bp = vp->v_dirtyblkhd.lh_first;
printf("lfs_fsync: ino %d failed to sync", VTOI(vp)->i_number);
printf("lfs_fsync: iocount = %d\n", VTOI(vp)->i_lfs->lfs_iocount);
printf("lfs_fsync: flags are 0x%x, numoutput=%d\n",
VTOI(vp)->i_flag, vp->v_numoutput);
printf("lfs_fsync: writecount=%ld\n", vp->v_writecount);
printf("lfs_fsync: first bp: %p, flags=0x%lx, lbn=%d\n",
bp, bp->b_flags, bp->b_lblkno);
}
return (VOP_UPDATE(vp, NULL, NULL,
(ap->a_flags & FSYNC_WAIT) != 0 ? UPDATE_WAIT : 0));
#endif
return error;
}
/*
@ -358,7 +376,7 @@ lfs_set_dirop(struct vnode *vp)
lfs_check(vp, LFS_UNUSED_LBN, 0);
while (fs->lfs_writer || lfs_dirvcount > LFS_MAXDIROP) {
if (fs->lfs_writer)
tsleep(&fs->lfs_dirops, PRIBIO + 1, "lfs_dirop", 0);
tsleep(&fs->lfs_dirops, PRIBIO + 1, "lfs_sdirop", 0);
if (lfs_dirvcount > LFS_MAXDIROP && fs->lfs_dirops == 0) {
++fs->lfs_writer;
lfs_flush(fs, 0);