Add an lfs_strategy() that checks to make sure we're not trying to read

where the cleaner is trying to write, instead of tying up the "live"
buffers (or pages).

Fix a bug in the LFS_UBC case where oversized buffers would not be
checksummed correctly, causing uncleanable segments.

Make sure that wakeup(fs->lfs_iocount) is done if fs->lfs_iocount is 1
as well as 0, since we wait in some places for it to drop to 1.

Activate all pages that make it into lfs_gop_write without the segment
lock held, since they must have been dirtied very recently, even if
PG_DELWRI is not set.
This commit is contained in:
perseant 2003-03-08 02:55:47 +00:00
parent 385f6e3afe
commit 4b4f884b89
7 changed files with 151 additions and 60 deletions

View File

@ -1,4 +1,4 @@
/* $NetBSD: lfs.h,v 1.54 2003/03/02 04:34:30 perseant Exp $ */
/* $NetBSD: lfs.h,v 1.55 2003/03/08 02:55:47 perseant Exp $ */
/*-
* Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc.
@ -154,7 +154,7 @@ typedef struct lfs_res_blk {
(((uvmexp.active + uvmexp.inactive + uvmexp.free) * uvmexp.filemax) >> 8)
#define LFS_IS_MALLOC_BUF(bp) (((bp)->b_flags & B_CALL) && \
((bp)->b_iodone == lfs_callback || (bp)->b_iodone == lfs_fakebuf_iodone))
(bp)->b_iodone == lfs_callback)
#define LFS_LOCK_BUF(bp) do { \
if (((bp)->b_flags & (B_LOCKED | B_CALL)) == 0) { \
@ -534,6 +534,9 @@ struct lfs {
struct pool lfs_bpppool; /* Pool for bpp */
struct pool lfs_segpool; /* Pool for struct segment */
#endif /* KERNEL */
#define LFS_MAX_CLEANIND 64
int32_t lfs_cleanint[LFS_MAX_CLEANIND]; /* Active cleaning intervals */
int lfs_cleanind; /* Index into intervals */
};
/*

View File

@ -1,4 +1,4 @@
/* $NetBSD: lfs_extern.h,v 1.44 2003/02/25 23:12:07 perseant Exp $ */
/* $NetBSD: lfs_extern.h,v 1.45 2003/03/08 02:55:48 perseant Exp $ */
/*-
* Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc.
@ -248,6 +248,7 @@ int lfsfifo_close(void *);
int lfs_fcntl (void *);
int lfs_inactive (void *);
int lfs_reclaim (void *);
int lfs_strategy (void *);
int lfs_write (void *);
int lfs_getpages (void *);
int lfs_putpages (void *);

View File

@ -1,4 +1,4 @@
/* $NetBSD: lfs_segment.c,v 1.106 2003/03/04 19:19:43 perseant Exp $ */
/* $NetBSD: lfs_segment.c,v 1.107 2003/03/08 02:55:48 perseant Exp $ */
/*-
* Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc.
@ -71,7 +71,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: lfs_segment.c,v 1.106 2003/03/04 19:19:43 perseant Exp $");
__KERNEL_RCSID(0, "$NetBSD: lfs_segment.c,v 1.107 2003/03/08 02:55:48 perseant Exp $");
#define ivndebug(vp,str) printf("ino %d: %s\n",VTOI(vp)->i_number,(str))
@ -1459,6 +1459,7 @@ lfs_initseg(struct lfs *fs)
sp = fs->lfs_sp;
repeat = 0;
/* Advance to the next segment. */
if (!LFS_PARTIAL_FITS(fs)) {
/* lfs_avail eats the remaining space */
@ -1497,6 +1498,18 @@ lfs_initseg(struct lfs *fs)
}
fs->lfs_lastpseg = fs->lfs_offset;
/* Record first address of this partial segment */
if (sp->seg_flags & SEGM_CLEAN) {
fs->lfs_cleanint[fs->lfs_cleanind] = fs->lfs_offset;
if (++fs->lfs_cleanind >= LFS_MAX_CLEANIND) {
/* "1" is the artificial inc in lfs_seglock */
while (fs->lfs_iocount > 1) {
tsleep(&fs->lfs_iocount, PRIBIO + 1, "lfs_initseg", 0);
}
fs->lfs_cleanind = 0;
}
}
sp->fs = fs;
sp->ibp = NULL;
sp->idp = NULL;
@ -1952,7 +1965,7 @@ lfs_writeseg(struct lfs *fs, struct segment *sp)
/* Set the summary block busy too */
(*(sp->bpp))->b_flags |= B_BUSY;
#endif
ssp->ss_datasum = cksum(datap, (nblocks - 1) * el_size);
ssp->ss_datasum = cksum(datap, dp - datap);
ssp->ss_sumsum =
cksum(&ssp->ss_datasum, fs->lfs_sumsize - sizeof(ssp->ss_sumsum));
pool_put(&fs->lfs_bpppool, datap);
@ -2311,7 +2324,7 @@ lfs_super_aiodone(struct buf *bp)
fs = (struct lfs *)bp->b_saveaddr;
fs->lfs_sbactive = 0;
wakeup(&fs->lfs_sbactive);
if (--fs->lfs_iocount == 0)
if (--fs->lfs_iocount <= 1)
wakeup(&fs->lfs_iocount);
lfs_freebuf(fs, bp);
}
@ -2451,7 +2464,7 @@ lfs_cluster_aiodone(struct buf *bp)
if (fs->lfs_iocount == 0)
panic("lfs_cluster_aiodone: zero iocount");
#endif
if (--fs->lfs_iocount == 0)
if (--fs->lfs_iocount <= 1)
wakeup(&fs->lfs_iocount);
pool_put(&fs->lfs_bpppool, cl->bpp);

View File

@ -1,4 +1,4 @@
/* $NetBSD: lfs_subr.c,v 1.35 2003/03/04 19:15:26 perseant Exp $ */
/* $NetBSD: lfs_subr.c,v 1.36 2003/03/08 02:55:49 perseant Exp $ */
/*-
* Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc.
@ -71,7 +71,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: lfs_subr.c,v 1.35 2003/03/04 19:15:26 perseant Exp $");
__KERNEL_RCSID(0, "$NetBSD: lfs_subr.c,v 1.36 2003/03/08 02:55:49 perseant Exp $");
#include <sys/param.h>
#include <sys/systm.h>
@ -333,7 +333,8 @@ lfs_seglock(struct lfs *fs, unsigned long flags)
fs->lfs_seglock = 1;
fs->lfs_lockpid = curproc->p_pid;
fs->lfs_cleanind = 0;
/* Drain fragment size changes out */
lockmgr(&fs->lfs_fraglock, LK_EXCLUSIVE, 0);
@ -455,7 +456,13 @@ lfs_segunlock(struct lfs *fs)
pool_put(&fs->lfs_bpppool, sp->bpp);
sp->bpp = NULL;
/* The sync case holds a reference in `sp' to be freed below */
/*
* If we're not sync, we're done with sp, get rid of it.
* Otherwise, we keep a local copy around but free
* fs->lfs_sp so another process can use it (we have to
* wait but they don't have to wait for us).
*/
if (!sync)
pool_put(&fs->lfs_segpool, sp);
fs->lfs_sp = NULL;
@ -469,8 +476,9 @@ lfs_segunlock(struct lfs *fs)
lfs_countlocked(&locked_queue_count,
&locked_queue_bytes, "lfs_segunlock");
wakeup(&locked_queue_count);
wakeup(&fs->lfs_iocount);
}
if (fs->lfs_iocount <= 1)
wakeup(&fs->lfs_iocount);
/*
* If we're not checkpointing, we don't have to block
* other processes to wait for a synchronous write
@ -498,6 +506,7 @@ lfs_segunlock(struct lfs *fs)
}
if (sync)
pool_put(&fs->lfs_segpool, sp);
if (ckp) {
fs->lfs_nactive = 0;
/* If we *know* everything's on disk, write both sbs */

View File

@ -1,4 +1,4 @@
/* $NetBSD: lfs_syscalls.c,v 1.84 2003/02/24 08:42:49 perseant Exp $ */
/* $NetBSD: lfs_syscalls.c,v 1.85 2003/03/08 02:55:49 perseant Exp $ */
/*-
* Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc.
@ -71,7 +71,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: lfs_syscalls.c,v 1.84 2003/02/24 08:42:49 perseant Exp $");
__KERNEL_RCSID(0, "$NetBSD: lfs_syscalls.c,v 1.85 2003/03/08 02:55:49 perseant Exp $");
#define LFS /* for prototypes in syscallargs.h */
@ -1224,57 +1224,25 @@ lfs_fastvget(struct mount *mp, ino_t ino, daddr_t daddr, struct vnode **vpp, str
return (0);
}
void
lfs_fakebuf_iodone(struct buf *bp)
{
struct buf *obp = bp->b_saveaddr;
if (!(obp->b_flags & (B_DELWRI | B_DONE)))
obp->b_flags |= B_INVAL;
bp->b_saveaddr = (caddr_t)(VTOI(obp->b_vp)->i_lfs);
brelse(obp);
lfs_callback(bp);
}
/*
* Make up a "fake" cleaner buffer, copy the data from userland into it.
*/
struct buf *
lfs_fakebuf(struct lfs *fs, struct vnode *vp, int lbn, size_t size, caddr_t uaddr)
{
struct buf *bp;
int error;
struct buf *obp;
KASSERT(VTOI(vp)->i_number != LFS_IFILE_INUM);
/*
* make corresponding buffer busy to avoid
* reading blocks that isn't written yet.
* it's needed because we'll update metadatas in lfs_updatemeta
* before data pointed by them is actually written to disk.
*
* XXX no need to allocbuf.
*
* XXX this can cause buf starvation.
*/
obp = getblk(vp, lbn, size, 0, 0);
if (obp == NULL)
panic("lfs_fakebuf: getblk failed");
bp = lfs_newbuf(VTOI(vp)->i_lfs, vp, lbn, size, LFS_NB_CLEAN);
error = copyin(uaddr, bp->b_data, size);
if (error) {
lfs_freebuf(fs, bp);
return NULL;
}
bp->b_saveaddr = obp;
KDASSERT(bp->b_iodone == lfs_callback);
bp->b_iodone = lfs_fakebuf_iodone;
#ifdef DIAGNOSTIC
if (obp->b_flags & B_GATHERED)
panic("lfs_fakebuf: gathered bp: %p, ino=%u, lbn=%d",
bp, VTOI(vp)->i_number, lbn);
#endif
#if 0
bp->b_saveaddr = (caddr_t)fs;
++fs->lfs_iocount;

View File

@ -1,4 +1,4 @@
/* $NetBSD: lfs_vfsops.c,v 1.102 2003/03/02 04:34:31 perseant Exp $ */
/* $NetBSD: lfs_vfsops.c,v 1.103 2003/03/08 02:55:49 perseant Exp $ */
/*-
* Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc.
@ -71,7 +71,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: lfs_vfsops.c,v 1.102 2003/03/02 04:34:31 perseant Exp $");
__KERNEL_RCSID(0, "$NetBSD: lfs_vfsops.c,v 1.103 2003/03/08 02:55:49 perseant Exp $");
#if defined(_KERNEL_OPT)
#include "opt_quota.h"
@ -1811,17 +1811,23 @@ lfs_gop_write(struct vnode *vp, struct vm_page **pgs, int npages, int flags)
for (i = 0; i < npages; i++) {
pg = pgs[i];
if (pg->flags & PG_WANTED)
wakeup(pg);
if (pg->flags & PG_PAGEOUT)
uvmexp.paging--;
if (pg->flags & PG_DELWRI) {
uvm_pageunwire(pg);
uvm_pageactivate(pg);
}
pg->flags &= ~(PG_BUSY|PG_CLEAN|PG_WANTED|PG_DELWRI|PG_PAGEOUT|PG_RELEASED);
UVM_PAGE_OWN(pg, NULL);
uvm_pageactivate(pg);
pg->flags &= ~(PG_CLEAN|PG_DELWRI|PG_PAGEOUT|PG_RELEASED);
#ifdef DEBUG_LFS
printf("pg[%d]->flags = %x\n", i, pg->flags);
printf("pg[%d]->pqflags = %x\n", i, pg->pqflags);
printf("pg[%d]->uanon = %p\n", i, pg->uanon);
printf("pg[%d]->uobject = %p\n", i, pg->uobject);
printf("pg[%d]->wire_count = %d\n", i, pg->wire_count);
printf("pg[%d]->loan_count = %d\n", i, pg->loan_count);
#endif
}
/* uvm_pageunbusy takes care of PG_BUSY, PG_WANTED */
uvm_page_unbusy(pgs, npages);
uvm_unlock_pageq();
simple_unlock(&vp->v_interlock);

View File

@ -1,4 +1,4 @@
/* $NetBSD: lfs_vnops.c,v 1.93 2003/03/04 19:19:43 perseant Exp $ */
/* $NetBSD: lfs_vnops.c,v 1.94 2003/03/08 02:55:50 perseant Exp $ */
/*-
* Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc.
@ -71,7 +71,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: lfs_vnops.c,v 1.93 2003/03/04 19:19:43 perseant Exp $");
__KERNEL_RCSID(0, "$NetBSD: lfs_vnops.c,v 1.94 2003/03/08 02:55:50 perseant Exp $");
#include <sys/param.h>
#include <sys/systm.h>
@ -152,7 +152,7 @@ const struct vnodeopv_entry_desc lfs_vnodeop_entries[] = {
{ &vop_lock_desc, ufs_lock }, /* lock */
{ &vop_unlock_desc, ufs_unlock }, /* unlock */
{ &vop_bmap_desc, ufs_bmap }, /* bmap */
{ &vop_strategy_desc, ufs_strategy }, /* strategy */
{ &vop_strategy_desc, lfs_strategy }, /* strategy */
{ &vop_print_desc, ufs_print }, /* print */
{ &vop_islocked_desc, ufs_islocked }, /* islocked */
{ &vop_pathconf_desc, ufs_pathconf }, /* pathconf */
@ -1011,6 +1011,97 @@ lfs_reclaim(void *v)
return (0);
}
/*
* Read a block from, or write a block to, a storage device.
* In order to avoid reading blocks that are in the process of being
* written by the cleaner---and hence are not mutexed by the normal
* buffer cache / page cache mechanisms---check for collisions before
* reading.
*
* We inline ufs_strategy to make sure that the VOP_BMAP occurs *before*
* the active cleaner test.
*
* XXX This code assumes that lfs_markv makes synchronous checkpoints.
*/
int
lfs_strategy(void *v)
{
struct vop_strategy_args /* {
struct buf *a_bp;
} */ *ap = v;
struct buf *bp;
struct lfs *fs;
struct vnode *vp;
struct inode *ip;
daddr_t tbn;
int i, sn, error, slept;
bp = ap->a_bp;
vp = bp->b_vp;
ip = VTOI(vp);
fs = ip->i_lfs;
if (vp->v_type == VBLK || vp->v_type == VCHR)
panic("lfs_strategy: spec");
KASSERT(bp->b_bcount != 0);
if (bp->b_blkno == bp->b_lblkno) {
error = VOP_BMAP(vp, bp->b_lblkno, NULL, &bp->b_blkno,
NULL);
if (error) {
bp->b_error = error;
bp->b_flags |= B_ERROR;
biodone(bp);
return (error);
}
if ((long)bp->b_blkno == -1) /* no valid data */
clrbuf(bp);
}
if ((long)bp->b_blkno < 0) { /* block is not on disk */
biodone(bp);
return (0);
}
/* XXX simplelock seglock */
slept = 1;
while (slept && bp->b_flags & B_READ && fs->lfs_seglock) {
/*
* Look through list of intervals.
* There will only be intervals to look through
* if the cleaner holds the seglock.
* Since the cleaner is synchronous, we can trust
* the list of intervals to be current.
*/
tbn = dbtofsb(fs, bp->b_blkno);
sn = dtosn(fs, tbn);
slept = 0;
for (i = 0; i < fs->lfs_cleanind; i++) {
if (sn == dtosn(fs, fs->lfs_cleanint[i]) &&
tbn >= fs->lfs_cleanint[i]) {
#ifdef DEBUG_LFS
printf("lfs_strategy: ino %d lbn %" PRId64
" ind %d sn %d fsb %" PRIx32
" given sn %d fsb %" PRIx64 "\n",
ip->i_number, bp->b_lblkno, i,
dtosn(fs, fs->lfs_cleanint[i]),
fs->lfs_cleanint[i], sn, tbn);
printf("lfs_strategy: sleeping on ino %d lbn %"
PRId64 "\n", ip->i_number, bp->b_lblkno);
#endif
tsleep(&fs->lfs_seglock, PRIBIO+1,
"lfs_strategy", 0);
/* Things may be different now; start over. */
slept = 1;
break;
}
}
}
vp = ip->i_devvp;
bp->b_dev = vp->v_rdev;
VOCALL (vp->v_op, VOFFSET(vop_strategy), ap);
return (0);
}
static void
lfs_flush_dirops(struct lfs *fs)
{