Add an lfs_strategy() that checks to make sure we're not trying to read
where the cleaner is trying to write, instead of tying up the "live" buffers (or pages). Fix a bug in the LFS_UBC case where oversized buffers would not be checksummed correctly, causing uncleanable segments. Make sure that wakeup(fs->lfs_iocount) is done if fs->lfs_iocount is 1 as well as 0, since we wait in some places for it to drop to 1. Activate all pages that make it into lfs_gop_write without the segment lock held, since they must have been dirtied very recently, even if PG_DELWRI is not set.
This commit is contained in:
parent
385f6e3afe
commit
4b4f884b89
|
@ -1,4 +1,4 @@
|
|||
/* $NetBSD: lfs.h,v 1.54 2003/03/02 04:34:30 perseant Exp $ */
|
||||
/* $NetBSD: lfs.h,v 1.55 2003/03/08 02:55:47 perseant Exp $ */
|
||||
|
||||
/*-
|
||||
* Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc.
|
||||
|
@ -154,7 +154,7 @@ typedef struct lfs_res_blk {
|
|||
(((uvmexp.active + uvmexp.inactive + uvmexp.free) * uvmexp.filemax) >> 8)
|
||||
|
||||
#define LFS_IS_MALLOC_BUF(bp) (((bp)->b_flags & B_CALL) && \
|
||||
((bp)->b_iodone == lfs_callback || (bp)->b_iodone == lfs_fakebuf_iodone))
|
||||
(bp)->b_iodone == lfs_callback)
|
||||
|
||||
#define LFS_LOCK_BUF(bp) do { \
|
||||
if (((bp)->b_flags & (B_LOCKED | B_CALL)) == 0) { \
|
||||
|
@ -534,6 +534,9 @@ struct lfs {
|
|||
struct pool lfs_bpppool; /* Pool for bpp */
|
||||
struct pool lfs_segpool; /* Pool for struct segment */
|
||||
#endif /* KERNEL */
|
||||
#define LFS_MAX_CLEANIND 64
|
||||
int32_t lfs_cleanint[LFS_MAX_CLEANIND]; /* Active cleaning intervals */
|
||||
int lfs_cleanind; /* Index into intervals */
|
||||
};
|
||||
|
||||
/*
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/* $NetBSD: lfs_extern.h,v 1.44 2003/02/25 23:12:07 perseant Exp $ */
|
||||
/* $NetBSD: lfs_extern.h,v 1.45 2003/03/08 02:55:48 perseant Exp $ */
|
||||
|
||||
/*-
|
||||
* Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc.
|
||||
|
@ -248,6 +248,7 @@ int lfsfifo_close(void *);
|
|||
int lfs_fcntl (void *);
|
||||
int lfs_inactive (void *);
|
||||
int lfs_reclaim (void *);
|
||||
int lfs_strategy (void *);
|
||||
int lfs_write (void *);
|
||||
int lfs_getpages (void *);
|
||||
int lfs_putpages (void *);
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/* $NetBSD: lfs_segment.c,v 1.106 2003/03/04 19:19:43 perseant Exp $ */
|
||||
/* $NetBSD: lfs_segment.c,v 1.107 2003/03/08 02:55:48 perseant Exp $ */
|
||||
|
||||
/*-
|
||||
* Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc.
|
||||
|
@ -71,7 +71,7 @@
|
|||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__KERNEL_RCSID(0, "$NetBSD: lfs_segment.c,v 1.106 2003/03/04 19:19:43 perseant Exp $");
|
||||
__KERNEL_RCSID(0, "$NetBSD: lfs_segment.c,v 1.107 2003/03/08 02:55:48 perseant Exp $");
|
||||
|
||||
#define ivndebug(vp,str) printf("ino %d: %s\n",VTOI(vp)->i_number,(str))
|
||||
|
||||
|
@ -1459,6 +1459,7 @@ lfs_initseg(struct lfs *fs)
|
|||
sp = fs->lfs_sp;
|
||||
|
||||
repeat = 0;
|
||||
|
||||
/* Advance to the next segment. */
|
||||
if (!LFS_PARTIAL_FITS(fs)) {
|
||||
/* lfs_avail eats the remaining space */
|
||||
|
@ -1497,6 +1498,18 @@ lfs_initseg(struct lfs *fs)
|
|||
}
|
||||
fs->lfs_lastpseg = fs->lfs_offset;
|
||||
|
||||
/* Record first address of this partial segment */
|
||||
if (sp->seg_flags & SEGM_CLEAN) {
|
||||
fs->lfs_cleanint[fs->lfs_cleanind] = fs->lfs_offset;
|
||||
if (++fs->lfs_cleanind >= LFS_MAX_CLEANIND) {
|
||||
/* "1" is the artificial inc in lfs_seglock */
|
||||
while (fs->lfs_iocount > 1) {
|
||||
tsleep(&fs->lfs_iocount, PRIBIO + 1, "lfs_initseg", 0);
|
||||
}
|
||||
fs->lfs_cleanind = 0;
|
||||
}
|
||||
}
|
||||
|
||||
sp->fs = fs;
|
||||
sp->ibp = NULL;
|
||||
sp->idp = NULL;
|
||||
|
@ -1952,7 +1965,7 @@ lfs_writeseg(struct lfs *fs, struct segment *sp)
|
|||
/* Set the summary block busy too */
|
||||
(*(sp->bpp))->b_flags |= B_BUSY;
|
||||
#endif
|
||||
ssp->ss_datasum = cksum(datap, (nblocks - 1) * el_size);
|
||||
ssp->ss_datasum = cksum(datap, dp - datap);
|
||||
ssp->ss_sumsum =
|
||||
cksum(&ssp->ss_datasum, fs->lfs_sumsize - sizeof(ssp->ss_sumsum));
|
||||
pool_put(&fs->lfs_bpppool, datap);
|
||||
|
@ -2311,7 +2324,7 @@ lfs_super_aiodone(struct buf *bp)
|
|||
fs = (struct lfs *)bp->b_saveaddr;
|
||||
fs->lfs_sbactive = 0;
|
||||
wakeup(&fs->lfs_sbactive);
|
||||
if (--fs->lfs_iocount == 0)
|
||||
if (--fs->lfs_iocount <= 1)
|
||||
wakeup(&fs->lfs_iocount);
|
||||
lfs_freebuf(fs, bp);
|
||||
}
|
||||
|
@ -2451,7 +2464,7 @@ lfs_cluster_aiodone(struct buf *bp)
|
|||
if (fs->lfs_iocount == 0)
|
||||
panic("lfs_cluster_aiodone: zero iocount");
|
||||
#endif
|
||||
if (--fs->lfs_iocount == 0)
|
||||
if (--fs->lfs_iocount <= 1)
|
||||
wakeup(&fs->lfs_iocount);
|
||||
|
||||
pool_put(&fs->lfs_bpppool, cl->bpp);
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/* $NetBSD: lfs_subr.c,v 1.35 2003/03/04 19:15:26 perseant Exp $ */
|
||||
/* $NetBSD: lfs_subr.c,v 1.36 2003/03/08 02:55:49 perseant Exp $ */
|
||||
|
||||
/*-
|
||||
* Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc.
|
||||
|
@ -71,7 +71,7 @@
|
|||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__KERNEL_RCSID(0, "$NetBSD: lfs_subr.c,v 1.35 2003/03/04 19:15:26 perseant Exp $");
|
||||
__KERNEL_RCSID(0, "$NetBSD: lfs_subr.c,v 1.36 2003/03/08 02:55:49 perseant Exp $");
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/systm.h>
|
||||
|
@ -333,7 +333,8 @@ lfs_seglock(struct lfs *fs, unsigned long flags)
|
|||
|
||||
fs->lfs_seglock = 1;
|
||||
fs->lfs_lockpid = curproc->p_pid;
|
||||
|
||||
fs->lfs_cleanind = 0;
|
||||
|
||||
/* Drain fragment size changes out */
|
||||
lockmgr(&fs->lfs_fraglock, LK_EXCLUSIVE, 0);
|
||||
|
||||
|
@ -455,7 +456,13 @@ lfs_segunlock(struct lfs *fs)
|
|||
|
||||
pool_put(&fs->lfs_bpppool, sp->bpp);
|
||||
sp->bpp = NULL;
|
||||
/* The sync case holds a reference in `sp' to be freed below */
|
||||
|
||||
/*
|
||||
* If we're not sync, we're done with sp, get rid of it.
|
||||
* Otherwise, we keep a local copy around but free
|
||||
* fs->lfs_sp so another process can use it (we have to
|
||||
* wait but they don't have to wait for us).
|
||||
*/
|
||||
if (!sync)
|
||||
pool_put(&fs->lfs_segpool, sp);
|
||||
fs->lfs_sp = NULL;
|
||||
|
@ -469,8 +476,9 @@ lfs_segunlock(struct lfs *fs)
|
|||
lfs_countlocked(&locked_queue_count,
|
||||
&locked_queue_bytes, "lfs_segunlock");
|
||||
wakeup(&locked_queue_count);
|
||||
wakeup(&fs->lfs_iocount);
|
||||
}
|
||||
if (fs->lfs_iocount <= 1)
|
||||
wakeup(&fs->lfs_iocount);
|
||||
/*
|
||||
* If we're not checkpointing, we don't have to block
|
||||
* other processes to wait for a synchronous write
|
||||
|
@ -498,6 +506,7 @@ lfs_segunlock(struct lfs *fs)
|
|||
}
|
||||
if (sync)
|
||||
pool_put(&fs->lfs_segpool, sp);
|
||||
|
||||
if (ckp) {
|
||||
fs->lfs_nactive = 0;
|
||||
/* If we *know* everything's on disk, write both sbs */
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/* $NetBSD: lfs_syscalls.c,v 1.84 2003/02/24 08:42:49 perseant Exp $ */
|
||||
/* $NetBSD: lfs_syscalls.c,v 1.85 2003/03/08 02:55:49 perseant Exp $ */
|
||||
|
||||
/*-
|
||||
* Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc.
|
||||
|
@ -71,7 +71,7 @@
|
|||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__KERNEL_RCSID(0, "$NetBSD: lfs_syscalls.c,v 1.84 2003/02/24 08:42:49 perseant Exp $");
|
||||
__KERNEL_RCSID(0, "$NetBSD: lfs_syscalls.c,v 1.85 2003/03/08 02:55:49 perseant Exp $");
|
||||
|
||||
#define LFS /* for prototypes in syscallargs.h */
|
||||
|
||||
|
@ -1224,57 +1224,25 @@ lfs_fastvget(struct mount *mp, ino_t ino, daddr_t daddr, struct vnode **vpp, str
|
|||
return (0);
|
||||
}
|
||||
|
||||
void
|
||||
lfs_fakebuf_iodone(struct buf *bp)
|
||||
{
|
||||
struct buf *obp = bp->b_saveaddr;
|
||||
|
||||
if (!(obp->b_flags & (B_DELWRI | B_DONE)))
|
||||
obp->b_flags |= B_INVAL;
|
||||
bp->b_saveaddr = (caddr_t)(VTOI(obp->b_vp)->i_lfs);
|
||||
brelse(obp);
|
||||
lfs_callback(bp);
|
||||
}
|
||||
|
||||
/*
|
||||
* Make up a "fake" cleaner buffer, copy the data from userland into it.
|
||||
*/
|
||||
struct buf *
|
||||
lfs_fakebuf(struct lfs *fs, struct vnode *vp, int lbn, size_t size, caddr_t uaddr)
|
||||
{
|
||||
struct buf *bp;
|
||||
int error;
|
||||
|
||||
struct buf *obp;
|
||||
|
||||
KASSERT(VTOI(vp)->i_number != LFS_IFILE_INUM);
|
||||
|
||||
/*
|
||||
* make corresponding buffer busy to avoid
|
||||
* reading blocks that isn't written yet.
|
||||
* it's needed because we'll update metadatas in lfs_updatemeta
|
||||
* before data pointed by them is actually written to disk.
|
||||
*
|
||||
* XXX no need to allocbuf.
|
||||
*
|
||||
* XXX this can cause buf starvation.
|
||||
*/
|
||||
obp = getblk(vp, lbn, size, 0, 0);
|
||||
if (obp == NULL)
|
||||
panic("lfs_fakebuf: getblk failed");
|
||||
|
||||
bp = lfs_newbuf(VTOI(vp)->i_lfs, vp, lbn, size, LFS_NB_CLEAN);
|
||||
error = copyin(uaddr, bp->b_data, size);
|
||||
if (error) {
|
||||
lfs_freebuf(fs, bp);
|
||||
return NULL;
|
||||
}
|
||||
bp->b_saveaddr = obp;
|
||||
KDASSERT(bp->b_iodone == lfs_callback);
|
||||
bp->b_iodone = lfs_fakebuf_iodone;
|
||||
|
||||
#ifdef DIAGNOSTIC
|
||||
if (obp->b_flags & B_GATHERED)
|
||||
panic("lfs_fakebuf: gathered bp: %p, ino=%u, lbn=%d",
|
||||
bp, VTOI(vp)->i_number, lbn);
|
||||
#endif
|
||||
#if 0
|
||||
bp->b_saveaddr = (caddr_t)fs;
|
||||
++fs->lfs_iocount;
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/* $NetBSD: lfs_vfsops.c,v 1.102 2003/03/02 04:34:31 perseant Exp $ */
|
||||
/* $NetBSD: lfs_vfsops.c,v 1.103 2003/03/08 02:55:49 perseant Exp $ */
|
||||
|
||||
/*-
|
||||
* Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc.
|
||||
|
@ -71,7 +71,7 @@
|
|||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__KERNEL_RCSID(0, "$NetBSD: lfs_vfsops.c,v 1.102 2003/03/02 04:34:31 perseant Exp $");
|
||||
__KERNEL_RCSID(0, "$NetBSD: lfs_vfsops.c,v 1.103 2003/03/08 02:55:49 perseant Exp $");
|
||||
|
||||
#if defined(_KERNEL_OPT)
|
||||
#include "opt_quota.h"
|
||||
|
@ -1811,17 +1811,23 @@ lfs_gop_write(struct vnode *vp, struct vm_page **pgs, int npages, int flags)
|
|||
for (i = 0; i < npages; i++) {
|
||||
pg = pgs[i];
|
||||
|
||||
if (pg->flags & PG_WANTED)
|
||||
wakeup(pg);
|
||||
if (pg->flags & PG_PAGEOUT)
|
||||
uvmexp.paging--;
|
||||
if (pg->flags & PG_DELWRI) {
|
||||
uvm_pageunwire(pg);
|
||||
uvm_pageactivate(pg);
|
||||
}
|
||||
pg->flags &= ~(PG_BUSY|PG_CLEAN|PG_WANTED|PG_DELWRI|PG_PAGEOUT|PG_RELEASED);
|
||||
UVM_PAGE_OWN(pg, NULL);
|
||||
uvm_pageactivate(pg);
|
||||
pg->flags &= ~(PG_CLEAN|PG_DELWRI|PG_PAGEOUT|PG_RELEASED);
|
||||
#ifdef DEBUG_LFS
|
||||
printf("pg[%d]->flags = %x\n", i, pg->flags);
|
||||
printf("pg[%d]->pqflags = %x\n", i, pg->pqflags);
|
||||
printf("pg[%d]->uanon = %p\n", i, pg->uanon);
|
||||
printf("pg[%d]->uobject = %p\n", i, pg->uobject);
|
||||
printf("pg[%d]->wire_count = %d\n", i, pg->wire_count);
|
||||
printf("pg[%d]->loan_count = %d\n", i, pg->loan_count);
|
||||
#endif
|
||||
}
|
||||
/* uvm_pageunbusy takes care of PG_BUSY, PG_WANTED */
|
||||
uvm_page_unbusy(pgs, npages);
|
||||
uvm_unlock_pageq();
|
||||
simple_unlock(&vp->v_interlock);
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/* $NetBSD: lfs_vnops.c,v 1.93 2003/03/04 19:19:43 perseant Exp $ */
|
||||
/* $NetBSD: lfs_vnops.c,v 1.94 2003/03/08 02:55:50 perseant Exp $ */
|
||||
|
||||
/*-
|
||||
* Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc.
|
||||
|
@ -71,7 +71,7 @@
|
|||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__KERNEL_RCSID(0, "$NetBSD: lfs_vnops.c,v 1.93 2003/03/04 19:19:43 perseant Exp $");
|
||||
__KERNEL_RCSID(0, "$NetBSD: lfs_vnops.c,v 1.94 2003/03/08 02:55:50 perseant Exp $");
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/systm.h>
|
||||
|
@ -152,7 +152,7 @@ const struct vnodeopv_entry_desc lfs_vnodeop_entries[] = {
|
|||
{ &vop_lock_desc, ufs_lock }, /* lock */
|
||||
{ &vop_unlock_desc, ufs_unlock }, /* unlock */
|
||||
{ &vop_bmap_desc, ufs_bmap }, /* bmap */
|
||||
{ &vop_strategy_desc, ufs_strategy }, /* strategy */
|
||||
{ &vop_strategy_desc, lfs_strategy }, /* strategy */
|
||||
{ &vop_print_desc, ufs_print }, /* print */
|
||||
{ &vop_islocked_desc, ufs_islocked }, /* islocked */
|
||||
{ &vop_pathconf_desc, ufs_pathconf }, /* pathconf */
|
||||
|
@ -1011,6 +1011,97 @@ lfs_reclaim(void *v)
|
|||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Read a block from, or write a block to, a storage device.
|
||||
* In order to avoid reading blocks that are in the process of being
|
||||
* written by the cleaner---and hence are not mutexed by the normal
|
||||
* buffer cache / page cache mechanisms---check for collisions before
|
||||
* reading.
|
||||
*
|
||||
* We inline ufs_strategy to make sure that the VOP_BMAP occurs *before*
|
||||
* the active cleaner test.
|
||||
*
|
||||
* XXX This code assumes that lfs_markv makes synchronous checkpoints.
|
||||
*/
|
||||
int
|
||||
lfs_strategy(void *v)
|
||||
{
|
||||
struct vop_strategy_args /* {
|
||||
struct buf *a_bp;
|
||||
} */ *ap = v;
|
||||
struct buf *bp;
|
||||
struct lfs *fs;
|
||||
struct vnode *vp;
|
||||
struct inode *ip;
|
||||
daddr_t tbn;
|
||||
int i, sn, error, slept;
|
||||
|
||||
bp = ap->a_bp;
|
||||
vp = bp->b_vp;
|
||||
ip = VTOI(vp);
|
||||
fs = ip->i_lfs;
|
||||
|
||||
if (vp->v_type == VBLK || vp->v_type == VCHR)
|
||||
panic("lfs_strategy: spec");
|
||||
KASSERT(bp->b_bcount != 0);
|
||||
if (bp->b_blkno == bp->b_lblkno) {
|
||||
error = VOP_BMAP(vp, bp->b_lblkno, NULL, &bp->b_blkno,
|
||||
NULL);
|
||||
if (error) {
|
||||
bp->b_error = error;
|
||||
bp->b_flags |= B_ERROR;
|
||||
biodone(bp);
|
||||
return (error);
|
||||
}
|
||||
if ((long)bp->b_blkno == -1) /* no valid data */
|
||||
clrbuf(bp);
|
||||
}
|
||||
if ((long)bp->b_blkno < 0) { /* block is not on disk */
|
||||
biodone(bp);
|
||||
return (0);
|
||||
}
|
||||
|
||||
/* XXX simplelock seglock */
|
||||
slept = 1;
|
||||
while (slept && bp->b_flags & B_READ && fs->lfs_seglock) {
|
||||
/*
|
||||
* Look through list of intervals.
|
||||
* There will only be intervals to look through
|
||||
* if the cleaner holds the seglock.
|
||||
* Since the cleaner is synchronous, we can trust
|
||||
* the list of intervals to be current.
|
||||
*/
|
||||
tbn = dbtofsb(fs, bp->b_blkno);
|
||||
sn = dtosn(fs, tbn);
|
||||
slept = 0;
|
||||
for (i = 0; i < fs->lfs_cleanind; i++) {
|
||||
if (sn == dtosn(fs, fs->lfs_cleanint[i]) &&
|
||||
tbn >= fs->lfs_cleanint[i]) {
|
||||
#ifdef DEBUG_LFS
|
||||
printf("lfs_strategy: ino %d lbn %" PRId64
|
||||
" ind %d sn %d fsb %" PRIx32
|
||||
" given sn %d fsb %" PRIx64 "\n",
|
||||
ip->i_number, bp->b_lblkno, i,
|
||||
dtosn(fs, fs->lfs_cleanint[i]),
|
||||
fs->lfs_cleanint[i], sn, tbn);
|
||||
printf("lfs_strategy: sleeping on ino %d lbn %"
|
||||
PRId64 "\n", ip->i_number, bp->b_lblkno);
|
||||
#endif
|
||||
tsleep(&fs->lfs_seglock, PRIBIO+1,
|
||||
"lfs_strategy", 0);
|
||||
/* Things may be different now; start over. */
|
||||
slept = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
vp = ip->i_devvp;
|
||||
bp->b_dev = vp->v_rdev;
|
||||
VOCALL (vp->v_op, VOFFSET(vop_strategy), ap);
|
||||
return (0);
|
||||
}
|
||||
|
||||
static void
|
||||
lfs_flush_dirops(struct lfs *fs)
|
||||
{
|
||||
|
|
Loading…
Reference in New Issue