Add code to UBCify LFS. This is still behind "#ifdef LFS_UBC" for now

(there are still some details to work out) but expect that to go
away soon.  To support these basic changes (creation of lfs_putpages,
lfs_gop_write, mods to lfs_balloc) several other changes were made, to
wit:

* Create a writer daemon kernel thread whose purpose is to handle page
  writes for the pagedaemon, but which also takes over some of the
  functions of lfs_check().  This thread is started the first time an
  LFS is mounted.

* Add a "flags" parameter to GOP_SIZE.  Current values are
  GOP_SIZE_READ, meaning that the call should return the size of the
  in-core version of the file, and GOP_SIZE_WRITE, meaning that it
  should return the on-disk size.  One of GOP_SIZE_READ or
  GOP_SIZE_WRITE must be specified.

* Instead of using malloc(...M_WAITOK) for everything, reserve enough
  resources to get by and use malloc(...M_NOWAIT), using the reserves if
  necessary.  Use the pool subsystem for structures small enough that
  this is feasible.  This also obsoletes LFS_THROTTLE.

And a few that are not strictly necessary:

* Moves the LFS inode extensions off onto a separately allocated
  structure; getting closer to LFS as an LKM.  "Welcome to 1.6O."

* Unified GOP_ALLOC between FFS and LFS.

* Update LFS copyright headers to correct values.

* Actually cast to unsigned in lfs_shellsort, like the comment says.

* Keep track of which segments were empty before the previous
  checkpoint; any segments that pass two checkpoints both dirty and
  empty can be summarily cleaned.  Do this.  Right now lfs_segclean
  still works, but this should be turned into an effectless
  compatibility syscall.
This commit is contained in:
perseant 2003-02-17 23:48:08 +00:00
parent 0cfe2d1c69
commit b397c875ae
34 changed files with 2401 additions and 783 deletions

View File

@ -1,4 +1,4 @@
/* $NetBSD: print.c,v 1.13 2003/01/24 21:55:05 fvdl Exp $ */
/* $NetBSD: print.c,v 1.14 2003/02/17 23:48:08 perseant Exp $ */
/*-
* Copyright (c) 1992, 1993
@ -38,7 +38,7 @@
#if 0
static char sccsid[] = "from: @(#)print.c 8.1 (Berkeley) 6/4/93";
#else
__RCSID("$NetBSD: print.c,v 1.13 2003/01/24 21:55:05 fvdl Exp $");
__RCSID("$NetBSD: print.c,v 1.14 2003/02/17 23:48:08 perseant Exp $");
#endif
#endif /* not lint */
@ -261,7 +261,7 @@ dump_super(struct lfs *lfsp)
syslog(LOG_DEBUG, "Checkpoint Info\n");
syslog(LOG_DEBUG, "%s%d\t%s0x%X\t%s%d\n",
"free ", lfsp->lfs_free,
"freehd ", lfsp->lfs_freehd,
"idaddr ", lfsp->lfs_idaddr,
"ifile ", lfsp->lfs_ifile);
syslog(LOG_DEBUG, "%s%d\t%s%d\t%s%d\n",

View File

@ -1,4 +1,4 @@
/* $NetBSD: inode.c,v 1.14 2003/01/24 21:55:10 fvdl Exp $ */
/* $NetBSD: inode.c,v 1.15 2003/02/17 23:48:09 perseant Exp $ */
/*
* Copyright (c) 1997, 1998
@ -348,8 +348,8 @@ lfs_ginode(ino_t inumber)
if (reply("free")) {
ifp = lfs_ientry(inumber, &bp);
ifp->if_daddr = LFS_UNUSED_DADDR;
ifp->if_nextfree = sblock.lfs_free;
sblock.lfs_free = inumber;
ifp->if_nextfree = sblock.lfs_freehd;
sblock.lfs_freehd = inumber;
sbdirty();
dirty(bp);
bp->b_flags &= ~B_INUSE;
@ -700,8 +700,8 @@ clri(struct inodesc *idesc, char *type, int flag)
ifp = lfs_ientry(idesc->id_number, &bp);
ifp->if_daddr = LFS_UNUSED_DADDR;
ifp->if_nextfree = sblock.lfs_free;
sblock.lfs_free = idesc->id_number;
ifp->if_nextfree = sblock.lfs_freehd;
sblock.lfs_freehd = idesc->id_number;
sbdirty();
dirty(bp);
bp->b_flags &= ~B_INUSE;

View File

@ -1,4 +1,4 @@
/* $NetBSD: pass0.c,v 1.12 2003/01/24 21:55:10 fvdl Exp $ */
/* $NetBSD: pass0.c,v 1.13 2003/02/17 23:48:09 perseant Exp $ */
/*
* Copyright (c) 1998 Konrad E. Schroder.
@ -86,7 +86,7 @@ pass0()
memset(visited, 0, maxino * sizeof(ino_t));
plastino = 0;
ino = sblock.lfs_free;
ino = sblock.lfs_freehd;
while (ino) {
if (ino >= maxino) {
printf("! Ino %d out of range (last was %d)\n", ino,
@ -115,7 +115,7 @@ pass0()
ino, (long long)daddr);
if (preen || reply("FIX") == 1) {
if (plastino == 0) {
sblock.lfs_free = nextino;
sblock.lfs_freehd = nextino;
sbdirty();
} else {
ifp = lfs_ientry(plastino, &bp);
@ -145,8 +145,8 @@ pass0()
pwarn("! Ino %d free, but not on the free list\n", ino);
if (preen || reply("FIX") == 1) {
ifp->if_nextfree = sblock.lfs_free;
sblock.lfs_free = ino;
ifp->if_nextfree = sblock.lfs_freehd;
sblock.lfs_freehd = ino;
sbdirty();
dirty(bp);
}

View File

@ -1,6 +1,6 @@
#!/bin/sh
#
# $NetBSD: osrelease.sh,v 1.90 2003/02/01 06:26:30 thorpej Exp $
# $NetBSD: osrelease.sh,v 1.91 2003/02/17 23:48:09 perseant Exp $
#
# Copyright (c) 1997 The NetBSD Foundation, Inc.
# All rights reserved.
@ -42,7 +42,7 @@
# sys/sys/param.h: __NetBSD_Version__
# share/tmac/doc-common: ds oS
#
release=1.6N
release=1.6O
case $1 in
-s)

View File

@ -1,4 +1,4 @@
/* $NetBSD: genfs_node.h,v 1.3 2001/12/18 07:49:36 chs Exp $ */
/* $NetBSD: genfs_node.h,v 1.4 2003/02/17 23:48:10 perseant Exp $ */
/*
* Copyright (c) 2001 Chuck Silvers.
@ -36,18 +36,22 @@
struct vm_page;
struct genfs_ops {
void (*gop_size)(struct vnode *, off_t, off_t *);
void (*gop_size)(struct vnode *, off_t, off_t *, int);
int (*gop_alloc)(struct vnode *, off_t, off_t, int, struct ucred *);
int (*gop_write)(struct vnode *, struct vm_page **, int, int);
};
#define GOP_SIZE(vp, size, eobp) \
(*VTOG(vp)->g_op->gop_size)((vp), (size), (eobp))
#define GOP_SIZE(vp, size, eobp, flags) \
(*VTOG(vp)->g_op->gop_size)((vp), (size), (eobp), (flags))
#define GOP_ALLOC(vp, off, len, flags, cred) \
(*VTOG(vp)->g_op->gop_alloc)((vp), (off), (len), (flags), (cred))
#define GOP_WRITE(vp, pgs, npages, flags) \
(*VTOG(vp)->g_op->gop_write)((vp), (pgs), (npages), (flags))
/* Flags to GOP_SIZE */
#define GOP_SIZE_READ 0x1 /* Advise how many pages to read/create */
#define GOP_SIZE_WRITE 0x2 /* Tell how many pages to write */
struct genfs_node {
struct genfs_ops *g_op; /* ops vector */
struct lock g_glock; /* getpages lock */
@ -55,7 +59,7 @@ struct genfs_node {
#define VTOG(vp) ((struct genfs_node *)(vp)->v_data)
void genfs_size(struct vnode *, off_t, off_t *);
void genfs_size(struct vnode *, off_t, off_t *, int);
void genfs_node_init(struct vnode *, struct genfs_ops *);
int genfs_gop_write(struct vnode *, struct vm_page **, int, int);
int genfs_compat_gop_write(struct vnode *, struct vm_page **, int, int);

View File

@ -1,4 +1,4 @@
/* $NetBSD: genfs_vnops.c,v 1.71 2003/02/05 21:38:42 pk Exp $ */
/* $NetBSD: genfs_vnops.c,v 1.72 2003/02/17 23:48:11 perseant Exp $ */
/*
* Copyright (c) 1982, 1986, 1989, 1993
@ -35,7 +35,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: genfs_vnops.c,v 1.71 2003/02/05 21:38:42 pk Exp $");
__KERNEL_RCSID(0, "$NetBSD: genfs_vnops.c,v 1.72 2003/02/17 23:48:11 perseant Exp $");
#include "opt_nfsserver.h"
@ -495,11 +495,11 @@ genfs_getpages(void *v)
error = 0;
origoffset = ap->a_offset;
orignpages = *ap->a_count;
GOP_SIZE(vp, vp->v_size, &diskeof);
GOP_SIZE(vp, vp->v_size, &diskeof, GOP_SIZE_READ);
if (flags & PGO_PASTEOF) {
newsize = MAX(vp->v_size,
origoffset + (orignpages << PAGE_SHIFT));
GOP_SIZE(vp, newsize, &memeof);
GOP_SIZE(vp, newsize, &memeof, GOP_SIZE_READ);
} else {
memeof = diskeof;
}
@ -1139,8 +1139,13 @@ genfs_putpages(void *v)
yield = (l->l_cpu->ci_schedstate.spc_flags &
SPCF_SHOULDYIELD) && !pagedaemon;
if (pg->flags & PG_BUSY || yield) {
KASSERT(!pagedaemon);
UVMHIST_LOG(ubchist, "busy %p", pg,0,0,0);
if (flags & PGO_BUSYFAIL && pg->flags & PG_BUSY) {
UVMHIST_LOG(ubchist, "busyfail %p", pg, 0,0,0);
error = EDEADLK;
break;
}
KASSERT(!pagedaemon);
if (by_list) {
TAILQ_INSERT_BEFORE(pg, &curmp, listq);
UVMHIST_LOG(ubchist, "curmp next %p",
@ -1381,7 +1386,7 @@ genfs_gop_write(struct vnode *vp, struct vm_page **pgs, int npages, int flags)
UVMHIST_LOG(ubchist, "vp %p pgs %p npages %d flags 0x%x",
vp, pgs, npages, flags);
GOP_SIZE(vp, vp->v_size, &eof);
GOP_SIZE(vp, vp->v_size, &eof, GOP_SIZE_WRITE);
if (vp->v_type == VREG) {
fs_bshift = vp->v_mount->mnt_fs_bshift;
dev_bshift = vp->v_mount->mnt_dev_bshift;
@ -1523,7 +1528,7 @@ genfs_node_init(struct vnode *vp, struct genfs_ops *ops)
}
void
genfs_size(struct vnode *vp, off_t size, off_t *eobp)
genfs_size(struct vnode *vp, off_t size, off_t *eobp, int flags)
{
int bsize;

View File

@ -1,4 +1,4 @@
/* $NetBSD: nfs_node.c,v 1.60 2003/02/15 18:00:25 drochner Exp $ */
/* $NetBSD: nfs_node.c,v 1.61 2003/02/17 23:48:12 perseant Exp $ */
/*
* Copyright (c) 1989, 1993
@ -39,7 +39,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: nfs_node.c,v 1.60 2003/02/15 18:00:25 drochner Exp $");
__KERNEL_RCSID(0, "$NetBSD: nfs_node.c,v 1.61 2003/02/17 23:48:12 perseant Exp $");
#include "opt_nfs.h"
@ -80,7 +80,7 @@ extern int prtactive;
#define nfs_hash(x,y) hash32_buf((x), (y), HASH32_BUF_INIT)
void nfs_gop_size(struct vnode *, off_t, off_t *);
void nfs_gop_size(struct vnode *, off_t, off_t *, int);
int nfs_gop_alloc(struct vnode *, off_t, off_t, int, struct ucred *);
int nfs_gop_write(struct vnode *, struct vm_page **, int, int);
@ -315,8 +315,11 @@ nfs_reclaim(v)
}
void
nfs_gop_size(struct vnode *vp, off_t size, off_t *eobp)
nfs_gop_size(struct vnode *vp, off_t size, off_t *eobp, int flags)
{
KASSERT(flags & (GOP_SIZE_READ | GOP_SIZE_WRITE));
KASSERT((flags & (GOP_SIZE_READ | GOP_SIZE_WRITE))
!= (GOP_SIZE_READ | GOP_SIZE_WRITE));
*eobp = MAX(size, vp->v_size);
}

View File

@ -1,4 +1,4 @@
/* $NetBSD: param.h,v 1.159 2003/02/01 06:26:30 thorpej Exp $ */
/* $NetBSD: param.h,v 1.160 2003/02/17 23:48:13 perseant Exp $ */
/*-
* Copyright (c) 1982, 1986, 1989, 1993
@ -67,7 +67,7 @@
* Don't forget to change conf/osrelease.sh too.
*/
#define __NetBSD_Version__ 106140000 /* NetBSD 1.6N */
#define __NetBSD_Version__ 106150000 /* NetBSD 1.6O */
/*
* Historical NetBSD #define

View File

@ -1,4 +1,4 @@
/* $NetBSD: ffs_extern.h,v 1.25 2003/01/24 21:55:22 fvdl Exp $ */
/* $NetBSD: ffs_extern.h,v 1.26 2003/02/17 23:48:14 perseant Exp $ */
/*-
* Copyright (c) 1991, 1993, 1994
@ -151,7 +151,7 @@ int ffs_fsync __P((void *));
int ffs_reclaim __P((void *));
int ffs_getpages __P((void *));
int ffs_putpages __P((void *));
void ffs_gop_size __P((struct vnode *, off_t, off_t *));
void ffs_gop_size __P((struct vnode *, off_t, off_t *, int));
__END_DECLS

View File

@ -1,4 +1,4 @@
/* $NetBSD: ffs_vfsops.c,v 1.106 2003/01/24 21:55:23 fvdl Exp $ */
/* $NetBSD: ffs_vfsops.c,v 1.107 2003/02/17 23:48:14 perseant Exp $ */
/*
* Copyright (c) 1989, 1991, 1993, 1994
@ -36,7 +36,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: ffs_vfsops.c,v 1.106 2003/01/24 21:55:23 fvdl Exp $");
__KERNEL_RCSID(0, "$NetBSD: ffs_vfsops.c,v 1.107 2003/02/17 23:48:14 perseant Exp $");
#if defined(_KERNEL_OPT)
#include "opt_ffs.h"
@ -117,7 +117,7 @@ struct vfsops ffs_vfsops = {
struct genfs_ops ffs_genfsops = {
ffs_gop_size,
ffs_gop_alloc,
ufs_gop_alloc,
genfs_gop_write,
};

View File

@ -1,4 +1,4 @@
/* $NetBSD: ffs_vnops.c,v 1.54 2003/02/05 21:38:44 pk Exp $ */
/* $NetBSD: ffs_vnops.c,v 1.55 2003/02/17 23:48:15 perseant Exp $ */
/*
* Copyright (c) 1982, 1986, 1989, 1993
@ -36,7 +36,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: ffs_vnops.c,v 1.54 2003/02/05 21:38:44 pk Exp $");
__KERNEL_RCSID(0, "$NetBSD: ffs_vnops.c,v 1.55 2003/02/17 23:48:15 perseant Exp $");
#include <sys/param.h>
#include <sys/systm.h>
@ -567,12 +567,16 @@ ffs_putpages(void *v)
*/
void
ffs_gop_size(struct vnode *vp, off_t size, off_t *eobp)
ffs_gop_size(struct vnode *vp, off_t size, off_t *eobp, int flags)
{
struct inode *ip = VTOI(vp);
struct fs *fs = ip->i_fs;
daddr_t olbn, nlbn;
KASSERT(flags & (GOP_SIZE_READ | GOP_SIZE_WRITE));
KASSERT((flags & (GOP_SIZE_READ | GOP_SIZE_WRITE))
!= (GOP_SIZE_READ | GOP_SIZE_WRITE));
olbn = lblkno(fs, ip->i_ffs_size);
nlbn = lblkno(fs, size);
if (nlbn < NDADDR && olbn <= nlbn) {

View File

@ -1,4 +1,19 @@
# $NetBSD: TODO,v 1.5 2001/07/13 20:30:22 perseant Exp $
# $NetBSD: TODO,v 1.6 2003/02/17 23:48:16 perseant Exp $
- Lock audit. Need to check locking for multiprocessor case in particular.
- Get rid of the syscalls: make them into ioctl calls instead. This would
allow LFS to be loaded as a module. We would then ideally have an
in-kernel cleaner that runs if no userland cleaner has asserted itself.
- Get rid of lfs_segclean(); the kernel should clean a dirty segment IFF it
has passed two checkpoints containing zero live bytes.
- Now that our cache is basically all of physical memory, we need to make
sure that segwrite is not starving other important things. Need a way
to prioritize which blocks are most important to write, and write only
those before giving up the seglock to do the rest. How does this change
our notion of what a checkpoint is?
- Investigate alternate inode locking strategy: Inode locks are useful
for locking against simultaneous changes to inode size (balloc,
@ -11,12 +26,6 @@
- Fully working fsck_lfs. (Really, need a general-purpose external
partial-segment writer.)
- Inode blocks are currently the same size as the fs block size; but all
the ones I've seen are mostly empty, and this will be especially true
if atime information is kept in the ifile instead of the inode. Could
we shrink the inode block size to DEV_BSIZE? Or parametrize it at fs
creation time?
- Get rid of DEV_BSIZE, pay attention to the media block size at mount time.
- More fs ops need to call lfs_imtime. Which ones? (Blackwell et al., 1995)

View File

@ -1,7 +1,7 @@
/* $NetBSD: lfs.h,v 1.45 2003/01/29 13:14:33 yamt Exp $ */
/* $NetBSD: lfs.h,v 1.46 2003/02/17 23:48:16 perseant Exp $ */
/*-
* Copyright (c) 1999, 2000 The NetBSD Foundation, Inc.
* Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
@ -95,11 +95,44 @@
#define BW_CLEAN 1
#define MIN_FREE_SEGS 2
#define LFS_MAX_ACTIVE 10
#define LFS_MAXDIROP (desiredvnodes >> 2)
#ifndef LFS_ATIME_IFILE
# define LFS_ATIME_IFILE 0
#endif
/* Local definition for LFS's usage of PG_PAGER1 */
#define PG_DELWRI PG_PAGER1
/* Types for lfs_newbuf and lfs_malloc */
#define LFS_NB_UNKNOWN -1
#define LFS_NB_SUMMARY 0
#define LFS_NB_SBLOCK 1
#define LFS_NB_IBLOCK 2
#define LFS_NB_CLUSTER 3
#define LFS_NB_CLEAN 4
#define LFS_NB_COUNT 5 /* always last */
/* Number of reserved memory blocks of each type */
#define LFS_N_SUMMARIES 2
#define LFS_N_SBLOCKS 1 /* Always 1, to throttle superblock writes */
#define LFS_N_IBLOCKS 16 /* In theory ssize/bsize; in practice around 2 */
#define LFS_N_CLUSTERS 16 /* In theory ssize/MAXPHYS */
#define LFS_N_CLEAN 0
/* Total count of "large" (non-pool) types */
#define LFS_N_TOTAL (LFS_N_SUMMARIES + LFS_N_SBLOCKS + LFS_N_IBLOCKS + LFS_N_CLUSTERS + LFS_N_CLEAN)
/* Counts for pool types */
#define LFS_N_CL LFS_N_CLUSTERS
#define LFS_N_BPP 2
#define LFS_N_SEG 2
/* Structure to keep reserved blocks */
typedef struct lfs_res_blk {
void *p;
LIST_ENTRY(lfs_res_blk) res;
char inuse;
} res_t;
/*
* #define WRITE_THRESHHOLD ((nbuf >> 1) - 10)
* #define WAIT_THRESHHOLD (nbuf - (nbuf >> 2) - 10)
@ -109,8 +142,17 @@
/* These are new ... is LFS taking up too much memory in its buffers? */
#define LFS_MAX_BYTES (((bufpages >> 2) - 10) * NBPG)
#define LFS_WAIT_BYTES (((bufpages >> 1) - (bufpages >> 3) - 10) * NBPG)
#define LFS_MAX_DIROP ((desiredvnodes >> 2) + (desiredvnodes >> 3))
#define LFS_BUFWAIT 2
#define LFS_MAX_PAGES \
(((uvmexp.active + uvmexp.inactive + uvmexp.free) * uvmexp.filemin) >> 8)
#define LFS_WAIT_PAGES \
(((uvmexp.active + uvmexp.inactive + uvmexp.free) * uvmexp.filemax) >> 8)
#define LFS_IS_MALLOC_BUF(bp) (((bp)->b_flags & B_CALL) && \
((bp)->b_iodone == lfs_callback || (bp)->b_iodone == lfs_fakebuf_iodone))
#define LFS_LOCK_BUF(bp) do { \
if (((bp)->b_flags & (B_LOCKED | B_CALL)) == 0) { \
++locked_queue_count; \
@ -237,7 +279,21 @@ extern struct lfs_log_entry lfs_log[LFS_LOGLENGTH];
(ip)->i_flag &= ~(IN_ACCESS | IN_CHANGE | IN_UPDATE); \
} while (0)
#define WRITEINPROG(vp) (vp->v_dirtyblkhd.lh_first && !(VTOI(vp)->i_flag & \
/*
* How to find out whether a vnode had dirty buffers or pages,
* to know whether it needs to retain IN_MODIFIED after a write.
*/
#ifdef LFS_UBC
int lfs_checkifempty(struct vnode *);
# define VPISEMPTY(vp) lfs_checkifempty(vp)
#else
# define VPISEMPTY(vp) ((vp)->v_dirtyblkhd.lh_first == NULL)
#endif
/*
* WRITEINPROG does not use VPISEMPTY because any dirty pages will
* have been given buffer headers, if they are "in progress".
*/
#define WRITEINPROG(vp) ((vp)->v_dirtyblkhd.lh_first && !(VTOI(vp)->i_flag & \
(IN_MODIFIED | IN_ACCESSED | IN_CLEANING)))
/* Here begins the berkeley code */
@ -257,6 +313,7 @@ struct segusage {
#define SEGUSE_DIRTY 0x02 /* segment has data in it */
#define SEGUSE_SUPERBLOCK 0x04 /* segment contains a superblock */
#define SEGUSE_ERROR 0x08 /* cleaner: do not clean segment */
#define SEGUSE_EMPTY 0x10 /* segment is empty */
u_int32_t su_flags; /* 12: segment flags */
u_int64_t su_lastmod; /* 16: last modified timestamp */
};
@ -304,7 +361,7 @@ struct dlfs {
u_int32_t dlfs_frag; /* 28: number of frags in a block in fs */
/* Checkpoint region. */
u_int32_t dlfs_free; /* 32: start of the free list */
u_int32_t dlfs_freehd; /* 32: start of the free list */
u_int32_t dlfs_bfree; /* 36: number of free disk blocks */
u_int32_t dlfs_nfiles; /* 40: number of allocated inodes */
int32_t dlfs_avail; /* 44: blocks available for writing */
@ -371,9 +428,6 @@ struct dlfs {
u_int32_t dlfs_cksum; /* 508: checksum for superblock checking */
};
/* Maximum number of io's we can have pending at once */
#define LFS_THROTTLE 32 /* XXX should be better paramtrized - ? */
/* In-memory super block. */
struct lfs {
struct dlfs lfs_dlfs; /* on-disk parameters */
@ -385,7 +439,7 @@ struct lfs {
#define lfs_bsize lfs_dlfs.dlfs_bsize
#define lfs_fsize lfs_dlfs.dlfs_fsize
#define lfs_frag lfs_dlfs.dlfs_frag
#define lfs_free lfs_dlfs.dlfs_free
#define lfs_freehd lfs_dlfs.dlfs_freehd
#define lfs_bfree lfs_dlfs.dlfs_bfree
#define lfs_nfiles lfs_dlfs.dlfs_nfiles
#define lfs_avail lfs_dlfs.dlfs_avail
@ -455,20 +509,26 @@ struct lfs {
#define LFS_WARNED 0x04
int8_t lfs_flags; /* currently unused flag */
u_int16_t lfs_activesb; /* toggle between superblocks */
#ifdef LFS_TRACK_IOS
daddr_t lfs_pending[LFS_THROTTLE]; /* daddrs of pending writes */
#endif /* LFS_TRACK_IOS */
daddr_t lfs_sbactive; /* disk address of in-progress sb write */
struct vnode *lfs_flushvp; /* vnode being flushed */
struct vnode *lfs_unlockvp; /* being inactivated in lfs_segunlock */
u_int32_t lfs_diropwait; /* # procs waiting on dirop flush */
size_t lfs_devbsize; /* Device block size */
size_t lfs_devbshift; /* Device block shift */
struct lock lfs_freelock;
struct lock lfs_fraglock;
pid_t lfs_rfpid; /* Process ID of roll-forward agent */
int lfs_nadirop; /* number of active dirop nodes */
long lfs_ravail; /* blocks pre-reserved for writing */
res_t *lfs_resblk; /* Reserved memory for pageout */
TAILQ_HEAD(, inode) lfs_dchainhd; /* dirop vnodes */
TAILQ_HEAD(, inode) lfs_pchainhd; /* paging vnodes */
#define LFS_RESHASH_WIDTH 17
LIST_HEAD(, lfs_res_blk) lfs_reshash[LFS_RESHASH_WIDTH];
int lfs_pdflush; /* pagedaemon wants us to flush */
u_int32_t **lfs_suflags; /* Segment use flags */
struct pool lfs_clpool; /* Pool for struct lfs_cluster */
struct pool lfs_bpppool; /* Pool for bpp */
struct pool lfs_segpool; /* Pool for struct segment */
};
/*
@ -659,14 +719,14 @@ struct segsum {
#define LFS_GET_HEADFREE(FS, CIP, BP, FREEP) do { \
if ((FS)->lfs_version > 1) { \
LFS_CLEANERINFO((CIP), (FS), (BP)); \
(FS)->lfs_free = (CIP)->free_head; \
(FS)->lfs_freehd = (CIP)->free_head; \
brelse(BP); \
} \
*(FREEP) = (FS)->lfs_free; \
*(FREEP) = (FS)->lfs_freehd; \
} while (0)
#define LFS_PUT_HEADFREE(FS, CIP, BP, VAL) do { \
(FS)->lfs_free = (VAL); \
(FS)->lfs_freehd = (VAL); \
if ((FS)->lfs_version > 1) { \
LFS_CLEANERINFO((CIP), (FS), (BP)); \
(CIP)->free_head = (VAL); \
@ -721,6 +781,15 @@ struct segsum {
(SP) = (SEGUSE *)(BP)->b_data + ((IN) % (F)->lfs_sepb); \
} while(0)
#define LFS_WRITESEGENTRY(SP, F, IN, BP) do { \
if ((SP)->su_nbytes == 0) \
(SP)->su_flags |= SEGUSE_EMPTY; \
else \
(SP)->su_flags &= ~SEGUSE_EMPTY; \
(F)->lfs_suflags[(F)->lfs_activesb][(IN)] = (SP)->su_flags; \
LFS_BWRITE_LOG(BP); \
} while(0)
/* Determine if a buffer belongs to the ifile */
#define IS_IFILE(bp) (VTOI(bp->b_vp)->i_number == LFS_IFILE_INUM)
@ -773,15 +842,16 @@ struct segment {
#define SEGM_CLEAN 0x02 /* cleaner call; don't sort */
#define SEGM_SYNC 0x04 /* wait for segment */
#define SEGM_PROT 0x08 /* don't inactivate at segunlock */
#define SEGM_PAGEDAEMON 0x10 /* pagedaemon called us */
u_int16_t seg_flags; /* run-time flags for this segment */
u_int32_t seg_iocount; /* number of ios pending */
int ndupino; /* number of duplicate inodes */
};
struct lfs_cluster {
size_t bufsize; /* Size of kept data */
struct buf **bpp; /* Array of kept buffers */
int bufcount; /* Number of kept buffers */
size_t bufsize; /* Size of kept data */
#define LFS_CL_MALLOC 0x00000001
#define LFS_CL_SHIFT 0x00000002
#define LFS_CL_SYNC 0x00000004
@ -789,9 +859,25 @@ struct lfs_cluster {
struct lfs *fs; /* LFS that this belongs to */
struct segment *seg; /* Segment structure, for LFS_CL_SYNC */
void *saveaddr; /* Original contents of saveaddr */
char *olddata; /* Original b_data, if LFS_CL_MALLOC */
char *olddata; /* Original b_data, if LFS_CL_MALLOC */
};
/*
* LFS inode extensions; moved from <ufs/ufs/inode.h> so that file didn't
* have to change every time LFS changed.
*/
struct lfs_inode_ext {
off_t lfs_osize; /* size of file on disk */
u_int32_t lfs_effnblocks; /* number of blocks when i/o completes */
size_t lfs_fragsize[NDADDR]; /* size of on-disk direct blocks */
TAILQ_ENTRY(inode) lfs_dchain; /* Dirop chain. */
TAILQ_ENTRY(inode) lfs_pchain; /* Paging chain. */
};
#define i_lfs_osize inode_ext.lfs->lfs_osize
#define i_lfs_effnblks inode_ext.lfs->lfs_effnblocks
#define i_lfs_fragsize inode_ext.lfs->lfs_fragsize
#define i_lfs_dchain inode_ext.lfs->lfs_dchain
/*
* Macros for determining free space on the disk, with the variable metadata
* of segment summaries and inode blocks taken into account.

View File

@ -1,7 +1,7 @@
/* $NetBSD: lfs_alloc.c,v 1.62 2003/01/27 23:17:56 yamt Exp $ */
/* $NetBSD: lfs_alloc.c,v 1.63 2003/02/17 23:48:16 perseant Exp $ */
/*-
* Copyright (c) 1999, 2000 The NetBSD Foundation, Inc.
* Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
@ -71,7 +71,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: lfs_alloc.c,v 1.62 2003/01/27 23:17:56 yamt Exp $");
__KERNEL_RCSID(0, "$NetBSD: lfs_alloc.c,v 1.63 2003/02/17 23:48:16 perseant Exp $");
#if defined(_KERNEL_OPT)
#include "opt_quota.h"
@ -85,7 +85,6 @@ __KERNEL_RCSID(0, "$NetBSD: lfs_alloc.c,v 1.62 2003/01/27 23:17:56 yamt Exp $");
#include <sys/vnode.h>
#include <sys/syslog.h>
#include <sys/mount.h>
#include <sys/malloc.h>
#include <sys/pool.h>
#include <sys/proc.h>
@ -99,6 +98,8 @@ __KERNEL_RCSID(0, "$NetBSD: lfs_alloc.c,v 1.62 2003/01/27 23:17:56 yamt Exp $");
extern int lfs_dirvcount;
extern struct lock ufs_hashlock;
extern struct simplelock lfs_subsys_lock;
extern int lfs_subsys_pages;
static int extend_ifile(struct lfs *, struct ucred *);
static int lfs_ialloc(struct lfs *, struct vnode *, ino_t, int, struct vnode **);
@ -207,6 +208,7 @@ lfs_rf_valloc(struct lfs *fs, ino_t ino, int version, struct proc *p,
(void)lfs_vunref(vp);
--lfs_dirvcount;
vp->v_flag &= ~VDIROP;
TAILQ_REMOVE(&fs->lfs_dchainhd, ip, i_lfs_dchain);
--fs->lfs_nadirop;
ip->i_flag &= ~IN_ADIROP;
}
@ -245,7 +247,7 @@ extend_ifile(struct lfs *fs, struct ucred *cred)
LFS_GET_HEADFREE(fs, cip, cbp, &oldlast);
LFS_PUT_HEADFREE(fs, cip, cbp, i);
#ifdef DIAGNOSTIC
if (fs->lfs_free == LFS_UNUSED_INUM)
if (fs->lfs_freehd == LFS_UNUSED_INUM)
panic("inode 0 allocated [2]");
#endif /* DIAGNOSTIC */
max = i + fs->lfs_ifpb;
@ -300,21 +302,7 @@ lfs_valloc(void *v)
return EROFS;
*ap->a_vpp = NULL;
#ifdef LFS_AGGRESSIVE_SEGLOCK
lfs_seglock(fs, SEGM_PROT);
#else
if (fs->lfs_version == 1) {
/*
* Use lfs_seglock here, instead of fs->lfs_freelock, to
* ensure that the free list is not changed in between
* the time that the ifile blocks are written to disk
* and the time that the superblock is written to disk.
*/
lfs_seglock(fs, SEGM_PROT);
} else {
lockmgr(&fs->lfs_freelock, LK_EXCLUSIVE, 0);
}
#endif
/* Get the head of the freelist. */
LFS_GET_HEADFREE(fs, cip, cbp, &new_ino);
@ -345,33 +333,20 @@ lfs_valloc(void *v)
brelse(bp);
/* Extend IFILE so that the next lfs_valloc will succeed. */
if (fs->lfs_free == LFS_UNUSED_INUM) {
if (fs->lfs_freehd == LFS_UNUSED_INUM) {
if ((error = extend_ifile(fs, ap->a_cred)) != 0) {
LFS_PUT_HEADFREE(fs, cip, cbp, new_ino);
#ifdef LFS_AGGRESSIVE_SEGLOCK
lfs_segunlock(fs);
#else
if (fs->lfs_version == 1)
lfs_segunlock(fs);
else
lockmgr(&fs->lfs_freelock, LK_RELEASE, 0);
#endif
return error;
}
}
#ifdef DIAGNOSTIC
if (fs->lfs_free == LFS_UNUSED_INUM)
if (fs->lfs_freehd == LFS_UNUSED_INUM)
panic("inode 0 allocated [3]");
#endif /* DIAGNOSTIC */
#ifdef LFS_AGGRESSIVE_SEGLOCK
lfs_segunlock(fs);
#else
if (fs->lfs_version == 1)
lfs_segunlock(fs);
else
lockmgr(&fs->lfs_freelock, LK_RELEASE, 0);
#endif
return lfs_ialloc(fs, ap->a_pvp, new_ino, new_gen, ap->a_vpp);
}
@ -417,17 +392,16 @@ lfs_ialloc(struct lfs *fs, struct vnode *pvp, ino_t new_ino, int new_gen,
uvm_vnp_setsize(vp, 0);
*vpp = vp;
#if 1
if (!(vp->v_flag & VDIROP)) {
(void)lfs_vref(vp);
++lfs_dirvcount;
TAILQ_INSERT_TAIL(&fs->lfs_dchainhd, ip, i_lfs_dchain);
}
vp->v_flag |= VDIROP;
if (!(ip->i_flag & IN_ADIROP))
++fs->lfs_nadirop;
ip->i_flag |= IN_ADIROP;
#endif
genfs_node_init(vp, &lfs_genfsops);
VREF(ip->i_devvp);
/* Set superblock modified bit and increment file count. */
@ -439,17 +413,13 @@ lfs_ialloc(struct lfs *fs, struct vnode *pvp, ino_t new_ino, int new_gen,
/*
* Put the new inum back on the free list.
*/
#ifdef LFS_AGGRESSIVE_SEGLOCK
lfs_seglock(fs, SEGM_PROT);
#endif
LFS_IENTRY(ifp, fs, new_ino, bp);
ifp->if_daddr = LFS_UNUSED_DADDR;
LFS_GET_HEADFREE(fs, cip, cbp, &(ifp->if_nextfree));
LFS_PUT_HEADFREE(fs, cip, cbp, new_ino);
(void) LFS_BWRITE_LOG(bp); /* Ifile */
#ifdef LFS_AGGRESSIVE_SEGLOCK
lfs_segunlock(fs);
#endif
*vpp = NULLVP;
return (error);
@ -470,6 +440,7 @@ lfs_vcreate(struct mount *mp, ino_t ino, struct vnode *vp)
/* Initialize the inode. */
ip = pool_get(&lfs_inode_pool, PR_WAITOK);
ip->inode_ext.lfs = pool_get(&lfs_inoext_pool, PR_WAITOK);
vp->v_data = ip;
ip->i_vnode = vp;
ip->i_devvp = ump->um_devvp;
@ -487,8 +458,6 @@ lfs_vcreate(struct mount *mp, ino_t ino, struct vnode *vp)
ip->i_ffs_blocks = 0;
ip->i_lfs_effnblks = 0;
ip->i_flag = 0;
/* Why was IN_MODIFIED ever set here? */
/* LFS_SET_UINO(ip, IN_CHANGE | IN_MODIFIED); */
#ifdef DEBUG_LFS_VNLOCK
if (ino == LFS_IFILE_INUM)
@ -531,18 +500,12 @@ lfs_vfree(void *v)
tsleep(vp, (PRIBIO+1), "lfs_vfree", 0);
splx(s);
#ifdef LFS_AGGRESSIVE_SEGLOCK
lfs_seglock(fs, SEGM_PROT); /* XXX */;
#else
if (fs->lfs_version == 1)
lfs_seglock(fs, SEGM_PROT);
else
lockmgr(&fs->lfs_freelock, LK_EXCLUSIVE, 0);
#endif
lfs_seglock(fs, SEGM_PROT);
if (vp->v_flag & VDIROP) {
--lfs_dirvcount;
vp->v_flag &= ~VDIROP;
TAILQ_REMOVE(&fs->lfs_dchainhd, ip, i_lfs_dchain);
wakeup(&lfs_dirvcount);
lfs_vunref(vp);
}
@ -597,20 +560,14 @@ lfs_vfree(void *v)
}
#endif
sup->su_nbytes -= DINODE_SIZE;
(void) LFS_BWRITE_LOG(bp); /* Ifile */
LFS_WRITESEGENTRY(sup, fs, dtosn(fs, old_iaddr), bp); /* Ifile */
}
/* Set superblock modified bit and decrement file count. */
fs->lfs_fmod = 1;
--fs->lfs_nfiles;
#ifdef LFS_AGGRESSIVE_SEGLOCK
lfs_segunlock(fs);
#else
if (fs->lfs_version == 1)
lfs_segunlock(fs);
else
lockmgr(&fs->lfs_freelock, LK_RELEASE, 0);
#endif
return (0);
}

View File

@ -1,7 +1,7 @@
/* $NetBSD: lfs_balloc.c,v 1.35 2003/01/24 21:55:26 fvdl Exp $ */
/* $NetBSD: lfs_balloc.c,v 1.36 2003/02/17 23:48:16 perseant Exp $ */
/*-
* Copyright (c) 1999, 2000 The NetBSD Foundation, Inc.
* Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
@ -71,7 +71,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: lfs_balloc.c,v 1.35 2003/01/24 21:55:26 fvdl Exp $");
__KERNEL_RCSID(0, "$NetBSD: lfs_balloc.c,v 1.36 2003/02/17 23:48:16 perseant Exp $");
#if defined(_KERNEL_OPT)
#include "opt_quota.h"
@ -96,6 +96,10 @@ __KERNEL_RCSID(0, "$NetBSD: lfs_balloc.c,v 1.35 2003/01/24 21:55:26 fvdl Exp $")
#include <ufs/lfs/lfs.h>
#include <ufs/lfs/lfs_extern.h>
#include <uvm/uvm.h>
extern int lfs_subsys_pages;
int lfs_fragextend(struct vnode *, int, int, daddr_t, struct buf **, struct ucred *);
/*
@ -127,7 +131,7 @@ lfs_balloc(void *v)
int offset;
u_long iosize;
daddr_t daddr, idaddr;
struct buf *ibp, *bp;
struct buf *ibp, *bp, **bpp;
struct inode *ip;
struct lfs *fs;
struct indir indirs[NIADDR+2], *idp;
@ -141,8 +145,9 @@ lfs_balloc(void *v)
offset = blkoff(fs, ap->a_startoffset);
iosize = ap->a_size;
lbn = lblkno(fs, ap->a_startoffset);
(void)lfs_check(vp, lbn, 0);
/* (void)lfs_check(vp, lbn, 0); */
bpp = ap->a_bpp;
/*
* Three cases: it's a block beyond the end of file, it's a block in
* the file that may or may not have been assigned a disk address or
@ -159,7 +164,8 @@ lfs_balloc(void *v)
* to rewrite it.
*/
*ap->a_bpp = NULL;
if (bpp)
*bpp = NULL;
/* Check for block beyond end of file and fragment extension needed. */
lastblock = lblkno(fs, ip->i_ffs_size);
@ -167,13 +173,15 @@ lfs_balloc(void *v)
osize = blksize(fs, ip, lastblock);
if (osize < fs->lfs_bsize && osize > 0) {
if ((error = lfs_fragextend(vp, osize, fs->lfs_bsize,
lastblock, &bp,
lastblock,
(bpp ? &bp : NULL),
ap->a_cred)))
return (error);
ip->i_ffs_size = (lastblock + 1) * fs->lfs_bsize;
uvm_vnp_setsize(vp, ip->i_ffs_size);
ip->i_flag |= IN_CHANGE | IN_UPDATE;
(void) VOP_BWRITE(bp);
if (bpp)
(void) VOP_BWRITE(bp);
}
}
@ -192,25 +200,30 @@ lfs_balloc(void *v)
/* Brand new block or fragment */
frags = numfrags(fs, nsize);
bb = fragstofsb(fs, frags);
*ap->a_bpp = bp = getblk(vp, lbn, nsize, 0, 0);
if (bpp) {
*ap->a_bpp = bp = getblk(vp, lbn, nsize, 0, 0);
bp->b_blkno = UNWRITTEN;
}
if (ap->a_flags & B_CLRBUF)
clrbuf(bp);
ip->i_lfs_effnblks += bb;
ip->i_lfs->lfs_bfree -= bb;
ip->i_ffs_db[lbn] = bp->b_blkno = UNWRITTEN;
ip->i_ffs_db[lbn] = UNWRITTEN;
} else {
if (nsize <= osize) {
/* No need to extend */
if ((error = bread(vp, lbn, osize, NOCRED, &bp)))
if (bpp && (error = bread(vp, lbn, osize, NOCRED, &bp)))
return error;
} else {
/* Extend existing block */
if ((error =
lfs_fragextend(vp, osize, nsize, lbn, &bp,
lfs_fragextend(vp, osize, nsize, lbn,
(bpp ? &bp : NULL),
ap->a_cred)))
return error;
}
*ap->a_bpp = bp;
if (bpp)
*bpp = bp;
}
return 0;
}
@ -279,10 +292,11 @@ lfs_balloc(void *v)
/*
* Get the existing block from the cache.
* Get the existing block from the cache, if requested.
*/
frags = fsbtofrags(fs, bb);
*ap->a_bpp = bp = getblk(vp, lbn, blksize(fs, ip, lbn), 0, 0);
if (bpp)
*bpp = bp = getblk(vp, lbn, blksize(fs, ip, lbn), 0, 0);
/*
* The block we are writing may be a brand new block
@ -293,11 +307,13 @@ lfs_balloc(void *v)
* disk address UNWRITTEN.
*/
if (daddr == UNASSIGNED) {
if (ap->a_flags & B_CLRBUF)
clrbuf(bp);
if (bpp) {
if (ap->a_flags & B_CLRBUF)
clrbuf(bp);
/* Note the new address */
bp->b_blkno = UNWRITTEN;
/* Note the new address */
bp->b_blkno = UNWRITTEN;
}
switch (num) {
case 0:
@ -316,7 +332,7 @@ lfs_balloc(void *v)
((int32_t *)ibp->b_data)[idp->in_off] = UNWRITTEN;
VOP_BWRITE(ibp);
}
} else if (!(bp->b_flags & (B_DONE|B_DELWRI))) {
} else if (bpp && !(bp->b_flags & (B_DONE|B_DELWRI))) {
/*
* Not a brand new block, also not in the cache;
* read it in from disk.
@ -356,26 +372,35 @@ lfs_fragextend(struct vnode *vp, int osize, int nsize, daddr_t lbn, struct buf *
error = 0;
/*
* Get the seglock so we don't enlarge blocks or change the segment
* accounting information while a segment is being written.
* Get the seglock so we don't enlarge blocks while a segment
* is being written. If we're called with bpp==NULL, though,
* we are only pretending to change a buffer, so we don't have to
* lock.
*/
top:
#ifdef LFS_MALLOC_SEGLOCK
lfs_seglock(fs, SEGM_PROT);
#else
lockmgr(&fs->lfs_fraglock, LK_SHARED, 0);
#endif
if (bpp) {
lockmgr(&fs->lfs_fraglock, LK_SHARED, 0);
}
if (!ISSPACE(fs, bb, cred)) {
error = ENOSPC;
goto out;
}
if ((error = bread(vp, lbn, osize, NOCRED, bpp))) {
/*
* If we are not asked to actually return the block, all we need
* to do is allocate space for it. UBC will handle dirtying the
* appropriate things and making sure it all goes to disk.
* Don't bother to read in that case.
*/
if (bpp && (error = bread(vp, lbn, osize, NOCRED, bpp))) {
brelse(*bpp);
goto out;
}
#ifdef QUOTA
if ((error = chkdq(ip, bb, cred, 0))) {
brelse(*bpp);
if (bpp)
brelse(*bpp);
goto out;
}
#endif
@ -386,17 +411,14 @@ lfs_fragextend(struct vnode *vp, int osize, int nsize, daddr_t lbn, struct buf *
* release both and start over after waiting.
*/
if ((*bpp)->b_flags & B_DELWRI) {
if (bpp && ((*bpp)->b_flags & B_DELWRI)) {
if (!lfs_fits(fs, bb)) {
brelse(*bpp);
if (bpp)
brelse(*bpp);
#ifdef QUOTA
chkdq(ip, -bb, cred, 0);
#endif
#ifdef LFS_FRAGSIZE_SEGLOCK
lfs_segunlock(fs);
#else
lockmgr(&fs->lfs_fraglock, LK_RELEASE, 0);
#endif
lfs_availwait(fs, bb);
goto top;
}
@ -407,24 +429,24 @@ lfs_fragextend(struct vnode *vp, int osize, int nsize, daddr_t lbn, struct buf *
ip->i_lfs_effnblks += bb;
ip->i_flag |= IN_CHANGE | IN_UPDATE;
LFS_DEBUG_COUNTLOCKED("frag1");
if (bpp) {
LFS_DEBUG_COUNTLOCKED("frag1");
obufsize = (*bpp)->b_bufsize;
allocbuf(*bpp, nsize);
obufsize = (*bpp)->b_bufsize;
allocbuf(*bpp, nsize);
/* Adjust locked-list accounting */
if (((*bpp)->b_flags & (B_LOCKED | B_CALL)) == B_LOCKED)
locked_queue_bytes += (*bpp)->b_bufsize - obufsize;
/* Adjust locked-list accounting */
if (((*bpp)->b_flags & (B_LOCKED | B_CALL)) == B_LOCKED)
locked_queue_bytes += (*bpp)->b_bufsize - obufsize;
LFS_DEBUG_COUNTLOCKED("frag2");
LFS_DEBUG_COUNTLOCKED("frag2");
bzero((char *)((*bpp)->b_data) + osize, (u_int)(nsize - osize));
bzero((char *)((*bpp)->b_data) + osize, (u_int)(nsize - osize));
}
out:
#ifdef LFS_FRAGSIZE_SEGLOCK
lfs_segunlock(fs);
#else
lockmgr(&fs->lfs_fraglock, LK_RELEASE, 0);
#endif
if (bpp) {
lockmgr(&fs->lfs_fraglock, LK_RELEASE, 0);
}
return (error);
}

View File

@ -1,7 +1,7 @@
/* $NetBSD: lfs_bio.c,v 1.57 2003/02/05 21:38:45 pk Exp $ */
/* $NetBSD: lfs_bio.c,v 1.58 2003/02/17 23:48:17 perseant Exp $ */
/*-
* Copyright (c) 1999, 2000 The NetBSD Foundation, Inc.
* Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
@ -71,7 +71,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: lfs_bio.c,v 1.57 2003/02/05 21:38:45 pk Exp $");
__KERNEL_RCSID(0, "$NetBSD: lfs_bio.c,v 1.58 2003/02/17 23:48:17 perseant Exp $");
#include <sys/param.h>
#include <sys/systm.h>
@ -86,10 +86,11 @@ __KERNEL_RCSID(0, "$NetBSD: lfs_bio.c,v 1.57 2003/02/05 21:38:45 pk Exp $");
#include <ufs/ufs/ufsmount.h>
#include <ufs/ufs/ufs_extern.h>
#include <sys/malloc.h>
#include <ufs/lfs/lfs.h>
#include <ufs/lfs/lfs_extern.h>
#include <uvm/uvm.h>
/* Macros to clear/set/test flags. */
# define SET(t, f) (t) |= (f)
# define CLR(t, f) (t) &= ~(f)
@ -102,11 +103,14 @@ __KERNEL_RCSID(0, "$NetBSD: lfs_bio.c,v 1.57 2003/02/05 21:38:45 pk Exp $");
* No write cost accounting is done.
* This is almost certainly wrong for synchronous operations and NFS.
*/
int locked_queue_count = 0; /* XXX Count of locked-down buffers. */
long locked_queue_bytes = 0L; /* XXX Total size of locked buffers. */
int locked_queue_count = 0; /* Count of locked-down buffers. */
long locked_queue_bytes = 0L; /* Total size of locked buffers. */
int lfs_subsys_pages = 0L; /* Total number LFS-written pages */
int lfs_writing = 0; /* Set if already kicked off a writer
because of buffer space */
struct simplelock lfs_subsys_lock; /* Lock on subsys_pages */
extern int lfs_dostats;
extern int lfs_do_flush;
/*
* reserved number/bytes of locked buffers
@ -402,7 +406,7 @@ lfs_bwrite_ext(struct buf *bp, int flags)
int fsb, s;
KASSERT(bp->b_flags & B_BUSY);
KASSERT(flags & BW_CLEAN || !(bp->b_flags & B_CALL));
KASSERT(flags & BW_CLEAN || !LFS_IS_MALLOC_BUF(bp));
/*
* Don't write *any* blocks if we're mounted read-only.
@ -411,7 +415,7 @@ lfs_bwrite_ext(struct buf *bp, int flags)
if (VTOI(bp->b_vp)->i_lfs->lfs_ronly) {
bp->b_flags &= ~(B_DELWRI | B_READ | B_ERROR);
LFS_UNLOCK_BUF(bp);
if (bp->b_flags & B_CALL)
if (LFS_IS_MALLOC_BUF(bp))
bp->b_flags &= ~B_BUSY;
else
brelse(bp);
@ -465,28 +469,26 @@ lfs_bwrite_ext(struct buf *bp, int flags)
void
lfs_flush_fs(struct lfs *fs, int flags)
{
if (fs->lfs_ronly == 0 && fs->lfs_dirops == 0)
{
/* disallow dirops during flush */
fs->lfs_writer++;
if (fs->lfs_ronly)
return;
/*
* We set the queue to 0 here because we
* are about to write all the dirty
* buffers we have. If more come in
* while we're writing the segment, they
* may not get written, so we want the
* count to reflect these new writes
* after the segwrite completes.
*/
if (lfs_dostats)
++lfs_stats.flush_invoked;
lfs_segwrite(fs->lfs_ivnode->v_mount, flags);
/* disallow dirops during flush */
fs->lfs_writer++;
/* XXX KS - allow dirops again */
if (--fs->lfs_writer == 0)
wakeup(&fs->lfs_dirops);
/* drain dirops */
while (fs->lfs_dirops > 0) {
++fs->lfs_diropwait;
tsleep(&fs->lfs_writer, PRIBIO+1, "fldirop", 0);
--fs->lfs_diropwait;
}
if (lfs_dostats)
++lfs_stats.flush_invoked;
lfs_segwrite(fs->lfs_ivnode->v_mount, flags);
/* allow dirops again */
if (--fs->lfs_writer == 0)
wakeup(&fs->lfs_dirops);
}
/*
@ -512,6 +514,9 @@ lfs_flush(struct lfs *fs, int flags)
}
lfs_writing = 1;
lfs_subsys_pages = 0; /* XXXUBC need a better way to count this */
wakeup(&lfs_subsys_pages);
simple_lock(&mountlist_slock);
for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) {
if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock)) {
@ -525,7 +530,6 @@ lfs_flush(struct lfs *fs, int flags)
vfs_unbusy(mp);
}
simple_unlock(&mountlist_slock);
LFS_DEBUG_COUNTLOCKED("flush");
lfs_writing = 0;
@ -562,25 +566,40 @@ lfs_check(struct vnode *vp, daddr_t blkno, int flags)
while (fs->lfs_dirops > 0 &&
(locked_queue_count + INOCOUNT(fs) > LFS_MAX_BUFS ||
locked_queue_bytes + INOBYTES(fs) > LFS_MAX_BYTES ||
lfs_dirvcount > LFS_MAXDIROP || fs->lfs_diropwait > 0))
lfs_subsys_pages > LFS_MAX_PAGES ||
lfs_dirvcount > LFS_MAX_DIROP || fs->lfs_diropwait > 0))
{
++fs->lfs_diropwait;
tsleep(&fs->lfs_writer, PRIBIO+1, "bufdirop", 0);
--fs->lfs_diropwait;
}
#ifdef DEBUG_LFS_FLUSH
if (locked_queue_count + INOCOUNT(fs) > LFS_MAX_BUFS)
printf("lqc = %d, max %d\n", locked_queue_count + INOCOUNT(fs),
LFS_MAX_BUFS);
if (locked_queue_bytes + INOBYTES(fs) > LFS_MAX_BYTES)
printf("lqb = %ld, max %d\n", locked_queue_bytes + INOBYTES(fs),
LFS_MAX_BYTES);
if (lfs_subsys_pages > LFS_MAX_PAGES)
printf("lssp = %d, max %d\n", lfs_subsys_pages, LFS_MAX_PAGES);
if (lfs_dirvcount > LFS_MAX_DIROP)
printf("ldvc = %d, max %d\n", lfs_dirvcount, LFS_MAX_DIROP);
if (fs->lfs_diropwait > 0)
printf("ldvw = %d\n", fs->lfs_diropwait);
#endif
if (locked_queue_count + INOCOUNT(fs) > LFS_MAX_BUFS ||
locked_queue_bytes + INOBYTES(fs) > LFS_MAX_BYTES ||
lfs_dirvcount > LFS_MAXDIROP || fs->lfs_diropwait > 0)
lfs_subsys_pages > LFS_MAX_PAGES ||
lfs_dirvcount > LFS_MAX_DIROP || fs->lfs_diropwait > 0)
{
++fs->lfs_writer;
lfs_flush(fs, flags);
if (--fs->lfs_writer == 0)
wakeup(&fs->lfs_dirops);
}
while (locked_queue_count + INOCOUNT(fs) > LFS_WAIT_BUFS
|| locked_queue_bytes + INOBYTES(fs) > LFS_WAIT_BYTES)
while (locked_queue_count + INOCOUNT(fs) > LFS_WAIT_BUFS ||
locked_queue_bytes + INOBYTES(fs) > LFS_WAIT_BYTES ||
lfs_subsys_pages > LFS_WAIT_PAGES ||
lfs_dirvcount > LFS_MAX_DIROP)
{
if (lfs_dostats)
++lfs_stats.wait_exceeded;
@ -601,10 +620,7 @@ lfs_check(struct vnode *vp, daddr_t blkno, int flags)
if (locked_queue_count + INOCOUNT(fs) > LFS_MAX_BUFS ||
locked_queue_bytes + INOBYTES(fs) > LFS_MAX_BYTES)
{
++fs->lfs_writer;
lfs_flush(fs, flags | SEGM_CKP);
if (--fs->lfs_writer == 0)
wakeup(&fs->lfs_dirops);
}
}
return (error);
@ -613,15 +629,8 @@ lfs_check(struct vnode *vp, daddr_t blkno, int flags)
/*
* Allocate a new buffer header.
*/
#ifdef MALLOCLOG
# define DOMALLOC(S, T, F) _malloc((S), (T), (F), file, line)
struct buf *
lfs_newbuf_malloclog(struct lfs *fs, struct vnode *vp, daddr_t daddr, size_t size, char *file, int line)
#else
# define DOMALLOC(S, T, F) malloc((S), (T), (F))
struct buf *
lfs_newbuf(struct lfs *fs, struct vnode *vp, daddr_t daddr, size_t size)
#endif
lfs_newbuf(struct lfs *fs, struct vnode *vp, daddr_t daddr, size_t size, int type)
{
struct buf *bp;
size_t nbytes;
@ -629,11 +638,13 @@ lfs_newbuf(struct lfs *fs, struct vnode *vp, daddr_t daddr, size_t size)
nbytes = roundup(size, fsbtob(fs, 1));
bp = DOMALLOC(sizeof(struct buf), M_SEGMENT, M_WAITOK);
bzero(bp, sizeof(struct buf));
s = splbio();
bp = pool_get(&bufpool, PR_WAITOK);
splx(s);
memset(bp, 0, sizeof(struct buf));
if (nbytes) {
bp->b_data = DOMALLOC(nbytes, M_SEGMENT, M_WAITOK);
bzero(bp->b_data, nbytes);
bp->b_data = lfs_malloc(fs, nbytes, type);
/* memset(bp->b_data, 0, nbytes); */
}
#ifdef DIAGNOSTIC
if (vp == NULL)
@ -659,27 +670,20 @@ lfs_newbuf(struct lfs *fs, struct vnode *vp, daddr_t daddr, size_t size)
return (bp);
}
#ifdef MALLOCLOG
# define DOFREE(A, T) _free((A), (T), file, line)
void
lfs_freebuf_malloclog(struct buf *bp, char *file, int line)
#else
# define DOFREE(A, T) free((A), (T))
void
lfs_freebuf(struct buf *bp)
#endif
lfs_freebuf(struct lfs *fs, struct buf *bp)
{
int s;
s = splbio();
if (bp->b_vp)
brelvp(bp);
splx(s);
if (!(bp->b_flags & B_INVAL)) { /* B_INVAL indicates a "fake" buffer */
DOFREE(bp->b_data, M_SEGMENT);
lfs_free(fs, bp->b_data, LFS_NB_UNKNOWN);
bp->b_data = NULL;
}
DOFREE(bp, M_SEGMENT);
pool_put(&bufpool, bp);
splx(s);
}
/*
@ -707,7 +711,7 @@ lfs_countlocked(int *count, long *bytes, char *msg)
for (bp = bufqueues[BQ_LOCKED].tqh_first; bp;
bp = bp->b_freelist.tqe_next) {
if (bp->b_flags & B_CALL) /* Malloced buffer */
if (bp->b_flags & B_CALL)
continue;
n++;
size += bp->b_bufsize;

View File

@ -1,7 +1,7 @@
/* $NetBSD: lfs_cksum.c,v 1.20 2002/06/16 00:13:15 perseant Exp $ */
/* $NetBSD: lfs_cksum.c,v 1.21 2003/02/17 23:48:18 perseant Exp $ */
/*-
* Copyright (c) 1999, 2000 The NetBSD Foundation, Inc.
* Copyright (c) 1999, 2000, 2001, 2002 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
@ -71,7 +71,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: lfs_cksum.c,v 1.20 2002/06/16 00:13:15 perseant Exp $");
__KERNEL_RCSID(0, "$NetBSD: lfs_cksum.c,v 1.21 2003/02/17 23:48:18 perseant Exp $");
#include <sys/param.h>
#ifdef _KERNEL

View File

@ -1,7 +1,7 @@
/* $NetBSD: lfs_debug.c,v 1.19 2003/01/29 13:14:34 yamt Exp $ */
/* $NetBSD: lfs_debug.c,v 1.20 2003/02/17 23:48:18 perseant Exp $ */
/*-
* Copyright (c) 1999, 2000 The NetBSD Foundation, Inc.
* Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
@ -73,7 +73,7 @@
#ifdef DEBUG
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: lfs_debug.c,v 1.19 2003/01/29 13:14:34 yamt Exp $");
__KERNEL_RCSID(0, "$NetBSD: lfs_debug.c,v 1.20 2003/02/17 23:48:18 perseant Exp $");
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/namei.h>
@ -167,7 +167,7 @@ lfs_dump_super(struct lfs *lfsp)
printf("Checkpoint Info\n");
printf("%s%d\t%s%x\t%s%d\n",
"free ", lfsp->lfs_free,
"freehd ", lfsp->lfs_freehd,
"idaddr ", lfsp->lfs_idaddr,
"ifile ", lfsp->lfs_ifile);
printf("%s%x\t%s%d\t%s%x\t%s%x\t%s%x\t%s%x\n",

View File

@ -1,7 +1,7 @@
/* $NetBSD: lfs_extern.h,v 1.38 2003/02/01 18:34:14 tron Exp $ */
/* $NetBSD: lfs_extern.h,v 1.39 2003/02/17 23:48:18 perseant Exp $ */
/*-
* Copyright (c) 1999, 2000 The NetBSD Foundation, Inc.
* Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
@ -88,7 +88,7 @@ MALLOC_DECLARE(M_SEGMENT);
#define LFS_WRITEINDIR 1 /* flush indirect blocks on non-checkpoint writes */
#define LFS_CLEAN_VNHEAD 2 /* put prev unrefed cleaned vnodes on head of free list */
#define LFS_DOSTATS 3
#define LFS_STATS 4
#define LFS_MAXPAGES 4
#define LFS_MAXID 5
#define LFS_NAMES { \
@ -96,7 +96,7 @@ MALLOC_DECLARE(M_SEGMENT);
{ "flushindir", CTLTYPE_INT }, \
{ "clean_vnhead", CTLTYPE_INT }, \
{ "dostats", CTLTYPE_INT }, \
{ "stats", CTLTYPE_STRUCT }, \
{ "maxpages", CTLTYPE_INT }, \
}
struct fid;
@ -117,7 +117,8 @@ struct segment;
struct ucred;
extern int lfs_allclean_wakeup;
extern struct pool lfs_inode_pool; /* memory pool for inodes */
extern struct pool lfs_inode_pool; /* memory pool for inodes */
extern struct pool lfs_inoext_pool; /* memory pool for inode extension */
__BEGIN_DECLS
/* lfs_alloc.c */
@ -130,16 +131,8 @@ int lfs_fits(struct lfs *, int);
void lfs_flush_fs(struct lfs *, int);
void lfs_flush(struct lfs *, int);
int lfs_check(struct vnode *, daddr_t, int);
#ifdef MALLOCLOG
void lfs_freebuf_malloclog(struct buf *, char *, int);
struct buf *lfs_newbuf_malloclog(struct lfs *, struct vnode *,
daddr_t, size_t, char *, int);
#define lfs_freebuf(BP) lfs_freebuf_malloclog((BP), __FILE__, __LINE__)
#define lfs_newbuf(F, V, A, S) lfs_newbuf_malloclog((F),(V),(A),(S),__FILE__,__LINE__)
#else
void lfs_freebuf(struct buf *);
struct buf *lfs_newbuf(struct lfs *, struct vnode *, daddr_t, size_t);
#endif
void lfs_freebuf(struct lfs *, struct buf *);
struct buf *lfs_newbuf(struct lfs *, struct vnode *, daddr_t, size_t, int);
void lfs_countlocked(int *, long *, char *);
int lfs_reserve(struct lfs *, struct vnode *, struct vnode *, int);
@ -169,6 +162,7 @@ void lfs_writefile(struct lfs *, struct segment *, struct vnode *);
int lfs_writeinode(struct lfs *, struct segment *, struct inode *);
int lfs_gatherblock(struct segment *, struct buf *, int *);
int lfs_gather(struct lfs *, struct segment *, struct vnode *, int (*match )(struct lfs *, struct buf *));
void lfs_update_single(struct lfs *, struct segment *, daddr_t, int32_t, int, int);
void lfs_updatemeta(struct segment *);
int lfs_initseg(struct lfs *);
void lfs_newseg(struct lfs *);
@ -187,12 +181,17 @@ void lfs_vunref(struct vnode *);
void lfs_vunref_head(struct vnode *);
/* lfs_subr.c */
void lfs_seglock(struct lfs *, unsigned long);
void lfs_setup_resblks(struct lfs *);
void lfs_free_resblks(struct lfs *);
void *lfs_malloc(struct lfs *, size_t, int);
void lfs_free(struct lfs *, void *, int);
int lfs_seglock(struct lfs *, unsigned long);
void lfs_segunlock(struct lfs *);
/* lfs_syscalls.c */
int lfs_fastvget(struct mount *, ino_t, daddr_t, struct vnode **, struct dinode *);
struct buf *lfs_fakebuf(struct lfs *, struct vnode *, int, size_t, caddr_t);
int lfs_do_segclean(struct lfs *, unsigned long);
/* lfs_vfsops.c */
void lfs_init(void);
@ -200,7 +199,6 @@ void lfs_reinit(void);
void lfs_done(void);
int lfs_mountroot(void);
int lfs_mount(struct mount *, const char *, void *, struct nameidata *, struct proc *);
int lfs_mountfs(struct vnode *, struct mount *, struct proc *);
int lfs_unmount(struct mount *, int, struct proc *);
int lfs_statfs(struct mount *, struct statfs *, struct proc *);
int lfs_sync(struct mount *, int, struct ucred *, struct proc *);
@ -213,6 +211,10 @@ int lfs_sysctl(int *, u_int, void *, size_t *, void *, size_t, struct proc *);
void lfs_unmark_vnode(struct vnode *);
void lfs_itimes(struct inode *, struct timespec *, struct timespec *,
struct timespec *);
int lfs_gop_alloc(struct vnode *, off_t, off_t, int, struct ucred *);
void lfs_gop_size(struct vnode *, off_t, off_t *, int);
int lfs_putpages_ext(void *, int);
int lfs_gatherpages(struct vnode *);
int lfs_balloc (void *);
int lfs_valloc (void *);
@ -230,6 +232,7 @@ int lfs_read (void *);
int lfs_remove (void *);
int lfs_rmdir (void *);
int lfs_link (void *);
int lfs_mmap (void *);
int lfs_rename (void *);
int lfs_getattr (void *);
int lfs_setattr (void *);

View File

@ -1,7 +1,7 @@
/* $NetBSD: lfs_inode.c,v 1.63 2003/01/25 16:40:29 fvdl Exp $ */
/* $NetBSD: lfs_inode.c,v 1.64 2003/02/17 23:48:18 perseant Exp $ */
/*-
* Copyright (c) 1999, 2000 The NetBSD Foundation, Inc.
* Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
@ -71,7 +71,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: lfs_inode.c,v 1.63 2003/01/25 16:40:29 fvdl Exp $");
__KERNEL_RCSID(0, "$NetBSD: lfs_inode.c,v 1.64 2003/02/17 23:48:18 perseant Exp $");
#if defined(_KERNEL_OPT)
#include "opt_quota.h"
@ -231,6 +231,9 @@ lfs_truncate(void *v)
struct proc *a_p;
} */ *ap = v;
struct vnode *ovp = ap->a_vp;
#ifdef LFS_UBC
struct genfs_node *gp = VTOG(ovp);
#endif
daddr_t lastblock;
struct inode *oip;
daddr_t bn, lbn, lastiblock[NIADDR], indir_lbn[NIADDR];
@ -247,6 +250,7 @@ lfs_truncate(void *v)
long lastseg;
size_t bc;
int obufsize, odb;
int usepc, needunlock;
if (length < 0)
return (EINVAL);
@ -282,6 +286,10 @@ lfs_truncate(void *v)
fs = oip->i_lfs;
lfs_imtime(fs);
osize = oip->i_ffs_size;
needunlock = usepc = 0;
#ifdef LFS_UBC
usepc = (ovp->v_type == VREG && osize > length && ovp != fs->lfs_ivnode);
#endif
/*
* Lengthen the size of the file. We must ensure that the
@ -313,18 +321,7 @@ lfs_truncate(void *v)
if ((error = lfs_reserve(fs, ovp, NULL,
btofsb(fs, (2 * NIADDR + 3) << fs->lfs_bshift))) != 0)
return (error);
/*
* Make sure no writes to this inode can happen while we're
* truncating. Otherwise, blocks which are accounted for on the
* inode *and* which have been created for cleaning can coexist,
* and cause an overcounting.
*/
#ifdef LFS_FRAGSIZE_SEGLOCK
lfs_seglock(fs, SEGM_PROT);
#else
lockmgr(&fs->lfs_fraglock, LK_SHARED, 0);
#endif
/*
* Shorten the size of the file. If the file is not being
* truncated to a block boundary, the contents of the
@ -338,7 +335,12 @@ lfs_truncate(void *v)
bc = 0;
if (offset == 0) {
oip->i_ffs_size = length;
} else {
} else
#ifdef LFS_UBC
if (!usepc)
#endif
{
lockmgr(&fs->lfs_fraglock, LK_SHARED, 0);
lbn = lblkno(fs, length);
aflags = B_CLRBUF;
if (ap->a_flags & IO_SYNC)
@ -347,11 +349,7 @@ lfs_truncate(void *v)
if (error) {
lfs_reserve(fs, ovp, NULL,
-btofsb(fs, (2 * NIADDR + 3) << fs->lfs_bshift));
#ifdef LFS_FRAGSIZE_SEGLOCK
lfs_segunlock(fs);
#else
lockmgr(&fs->lfs_fraglock, LK_RELEASE, 0);
#endif
return (error);
}
obufsize = bp->b_bufsize;
@ -367,7 +365,45 @@ lfs_truncate(void *v)
if (bp->b_flags & B_DELWRI)
fs->lfs_avail += odb - btofsb(fs, size);
(void) VOP_BWRITE(bp);
lockmgr(&fs->lfs_fraglock, LK_RELEASE, 0);
}
#ifdef LFS_UBC
/*
* When truncating a regular file down to a non-block-aligned size,
* we must zero the part of last block which is past the new EOF.
* We must synchronously flush the zeroed pages to disk
* since the new pages will be invalidated as soon as we
* inform the VM system of the new, smaller size.
* We must do this before acquiring the GLOCK, since fetching
* the pages will acquire the GLOCK internally.
* So there is a window where another thread could see a whole
* zeroed page past EOF, but that's life.
*/
else { /* vp->v_type == VREG && length < osize && offset != 0 */
voff_t eoz;
aflags = ap->a_flags & IO_SYNC ? B_SYNC : 0;
error = ufs_balloc_range(ovp, length - 1, 1, ap->a_cred,
aflags);
if (error) {
return error;
}
size = blksize(fs, oip, lblkno(fs, length));
eoz = MIN(lblktosize(fs, lblkno(fs, length)) + size, osize);
uvm_vnp_zerorange(ovp, length, eoz - length);
simple_lock(&ovp->v_interlock);
error = VOP_PUTPAGES(ovp, trunc_page(length), round_page(eoz),
PGO_CLEANIT | PGO_DEACTIVATE | PGO_SYNCIO);
if (error) {
return error;
}
}
lockmgr(&gp->g_glock, LK_EXCLUSIVE, NULL);
#endif
oip->i_ffs_size = length;
uvm_vnp_setsize(ovp, length);
/*
* Calculate index into inode's block list of
@ -428,6 +464,10 @@ lfs_truncate(void *v)
goto done;
}
if (!usepc) {
lockmgr(&fs->lfs_fraglock, LK_SHARED, 0);
needunlock = 1;
}
/*
* All whole direct blocks or frags.
*/
@ -516,10 +556,10 @@ done:
#endif
lfs_reserve(fs, ovp, NULL,
-btofsb(fs, (2 * NIADDR + 3) << fs->lfs_bshift));
#ifdef LFS_FRAGSIZE_SEGLOCK
lfs_segunlock(fs);
#else
lockmgr(&fs->lfs_fraglock, LK_RELEASE, 0);
if (needunlock)
lockmgr(&fs->lfs_fraglock, LK_RELEASE, 0);
#ifdef LFS_UBC
lockmgr(&gp->g_glock, LK_RELEASE, NULL);
#endif
return (allerror);
}
@ -550,7 +590,6 @@ lfs_update_seguse(struct lfs *fs, long lastseg, size_t num)
{
SEGUSE *sup;
struct buf *bp;
int error;
if (lastseg < 0 || num == 0)
return 0;
@ -563,8 +602,9 @@ lfs_update_seguse(struct lfs *fs, long lastseg, size_t num)
sup->su_nbytes = num;
}
sup->su_nbytes -= num;
error = LFS_BWRITE_LOG(bp); /* Ifile */
return error;
LFS_WRITESEGENTRY(sup, fs, lastseg, bp);
return 0;
}
/*
@ -707,6 +747,8 @@ lfs_indirtrunc(struct inode *ip, daddr_t lbn, daddr_t dbn,
/*
* Destroy any in core blocks past the truncation length.
* Inlined from vtruncbuf, so that lfs_avail could be updated.
* We take the fraglock to prevent cleaning from occurring while we are
* invalidating blocks.
*/
static int
lfs_vtruncbuf(struct vnode *vp, daddr_t lbn, int slpflag, int slptimeo)
@ -714,10 +756,19 @@ lfs_vtruncbuf(struct vnode *vp, daddr_t lbn, int slpflag, int slptimeo)
struct buf *bp, *nbp;
int s, error;
struct lfs *fs;
voff_t off;
off = round_page((voff_t)lbn << vp->v_mount->mnt_fs_bshift);
simple_lock(&vp->v_interlock);
error = VOP_PUTPAGES(vp, off, 0, PGO_FREE | PGO_SYNCIO);
if (error) {
return error;
}
fs = VTOI(vp)->i_lfs;
s = splbio();
lockmgr(&fs->lfs_fraglock, LK_SHARED, 0);
restart:
for (bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) {
nbp = LIST_NEXT(bp, b_vnbufs);
@ -729,6 +780,7 @@ restart:
"lfs_vtruncbuf", slptimeo);
if (error) {
splx(s);
lockmgr(&fs->lfs_fraglock, LK_RELEASE, 0);
return (error);
}
goto restart;
@ -753,6 +805,7 @@ restart:
"lfs_vtruncbuf", slptimeo);
if (error) {
splx(s);
lockmgr(&fs->lfs_fraglock, LK_RELEASE, 0);
return (error);
}
goto restart;
@ -768,6 +821,7 @@ restart:
}
splx(s);
lockmgr(&fs->lfs_fraglock, LK_RELEASE, 0);
return (0);
}

File diff suppressed because it is too large Load Diff

View File

@ -1,7 +1,7 @@
/* $NetBSD: lfs_subr.c,v 1.30 2003/01/29 13:14:35 yamt Exp $ */
/* $NetBSD: lfs_subr.c,v 1.31 2003/02/17 23:48:20 perseant Exp $ */
/*-
* Copyright (c) 1999, 2000 The NetBSD Foundation, Inc.
* Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
@ -71,7 +71,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: lfs_subr.c,v 1.30 2003/01/29 13:14:35 yamt Exp $");
__KERNEL_RCSID(0, "$NetBSD: lfs_subr.c,v 1.31 2003/02/17 23:48:20 perseant Exp $");
#include <sys/param.h>
#include <sys/systm.h>
@ -86,6 +86,8 @@ __KERNEL_RCSID(0, "$NetBSD: lfs_subr.c,v 1.30 2003/01/29 13:14:35 yamt Exp $");
#include <ufs/lfs/lfs.h>
#include <ufs/lfs/lfs_extern.h>
#include <uvm/uvm.h>
/*
* Return buffer with the contents of block "offset" from the beginning of
* directory "ip". If "res" is non-zero, fill it in with a pointer to the
@ -122,12 +124,177 @@ lfs_blkatoff(void *v)
return (0);
}
#ifdef LFS_DEBUG_MALLOC
char *lfs_res_names[LFS_NB_COUNT] = {
"summary",
"superblock",
"ifile block",
"cluster",
"clean",
};
#endif
int lfs_res_qty[LFS_NB_COUNT] = {
LFS_N_SUMMARIES,
LFS_N_SBLOCKS,
LFS_N_IBLOCKS,
LFS_N_CLUSTERS,
LFS_N_CLEAN,
};
void
lfs_setup_resblks(struct lfs *fs)
{
int i, j;
int maxbpp;
fs->lfs_resblk = (res_t *)malloc(LFS_N_TOTAL * sizeof(res_t), M_SEGMENT,
M_WAITOK);
for (i = 0; i < LFS_N_TOTAL; i++) {
fs->lfs_resblk[i].inuse = 0;
fs->lfs_resblk[i].p = NULL;
}
for (i = 0; i < LFS_RESHASH_WIDTH; i++)
LIST_INIT(fs->lfs_reshash + i);
/*
* These types of allocations can be larger than a page,
* so we can't use the pool subsystem for them.
*/
for (i = 0, j = 0; j < LFS_N_SUMMARIES; j++, i++)
fs->lfs_resblk[i].p = malloc(fs->lfs_sumsize, M_SEGMENT,
M_WAITOK);
for (j = 0; j < LFS_N_SBLOCKS; j++, i++)
fs->lfs_resblk[i].p = malloc(LFS_SBPAD, M_SEGMENT, M_WAITOK);
for (j = 0; j < LFS_N_IBLOCKS; j++, i++)
fs->lfs_resblk[i].p = malloc(fs->lfs_bsize, M_SEGMENT, M_WAITOK);
for (j = 0; j < LFS_N_CLUSTERS; j++, i++)
fs->lfs_resblk[i].p = malloc(MAXPHYS, M_SEGMENT, M_WAITOK);
for (j = 0; j < LFS_N_CLEAN; j++, i++)
fs->lfs_resblk[i].p = malloc(MAXPHYS, M_SEGMENT, M_WAITOK);
/*
* Initialize pools for small types (XXX is BPP small?)
*/
maxbpp = ((fs->lfs_sumsize - SEGSUM_SIZE(fs)) / sizeof(int32_t) + 2);
maxbpp = MIN(maxbpp, fs->lfs_ssize / fs->lfs_fsize + 2);
pool_init(&fs->lfs_bpppool, maxbpp * sizeof(struct buf *), 0, 0,
LFS_N_BPP, "lfsbpppl", &pool_allocator_nointr);
pool_init(&fs->lfs_clpool, sizeof(struct lfs_cluster), 0, 0,
LFS_N_CL, "lfsclpl", &pool_allocator_nointr);
pool_init(&fs->lfs_segpool, sizeof(struct segment), 0, 0,
LFS_N_SEG, "lfssegpool", &pool_allocator_nointr);
}
void
lfs_free_resblks(struct lfs *fs)
{
int i;
pool_destroy(&fs->lfs_bpppool);
pool_destroy(&fs->lfs_segpool);
pool_destroy(&fs->lfs_clpool);
for (i = 0; i < LFS_N_TOTAL; i++) {
while(fs->lfs_resblk[i].inuse)
tsleep(&fs->lfs_resblk, PRIBIO + 1, "lfs_free", 0);
if (fs->lfs_resblk[i].p != NULL)
free(fs->lfs_resblk[i].p, M_SEGMENT);
}
free(fs->lfs_resblk, M_SEGMENT);
}
static unsigned int
lfs_mhash(void *vp)
{
return (unsigned int)(((unsigned long)vp) >> 2) % LFS_RESHASH_WIDTH;
}
/*
* Return memory of the given size for the given purpose, or use one of a
* number of spare last-resort buffers, if malloc returns NULL.
*/
void *
lfs_malloc(struct lfs *fs, size_t size, int type)
{
struct lfs_res_blk *re;
void *r;
int i, s, start;
unsigned int h;
/* If no mem allocated for this type, it just waits */
if (lfs_res_qty[type] == 0)
return malloc(size, M_SEGMENT, M_WAITOK);
/* Otherwise try a quick malloc, and if it works, great */
if ((r = malloc(size, M_SEGMENT, M_NOWAIT)) != NULL)
return r;
/*
* If malloc returned NULL, we are forced to use one of our
* reserve blocks. We have on hand at least one summary block,
* at least one cluster block, at least one superblock,
* and several indirect blocks.
*/
/* skip over blocks of other types */
for (i = 0, start = 0; i < type; i++)
start += lfs_res_qty[i];
while (r == NULL) {
for (i = 0; i < lfs_res_qty[type]; i++) {
if (fs->lfs_resblk[start + i].inuse == 0) {
re = fs->lfs_resblk + start + i;
re->inuse = 1;
r = re->p;
h = lfs_mhash(r);
s = splbio();
LIST_INSERT_HEAD(&fs->lfs_reshash[h], re, res);
splx(s);
return r;
}
}
#ifdef LFS_DEBUG_MALLOC
printf("sleeping on %s (%d)\n", lfs_res_names[type], lfs_res_qty[type]);
#endif
tsleep(&fs->lfs_resblk, PVM, "lfs_malloc", 0);
#ifdef LFS_DEBUG_MALLOC
printf("done sleeping on %s\n", lfs_res_names[type]);
#endif
}
/* NOTREACHED */
return r;
}
void
lfs_free(struct lfs *fs, void *p, int type)
{
int s;
unsigned int h;
res_t *re;
h = lfs_mhash(p);
s = splbio();
LIST_FOREACH(re, &fs->lfs_reshash[h], res) {
if (re->p == p) {
LIST_REMOVE(re, res);
re->inuse = 0;
wakeup(&fs->lfs_resblk);
splx(s);
return;
}
}
splx(s);
/*
* If we didn't find it, free it.
*/
free(p, M_SEGMENT);
}
/*
* lfs_seglock --
* Single thread the segment writer.
*/
void
int
lfs_seglock(struct lfs *fs, unsigned long flags)
{
struct segment *sp;
@ -136,8 +303,10 @@ lfs_seglock(struct lfs *fs, unsigned long flags)
if (fs->lfs_lockpid == curproc->p_pid) {
++fs->lfs_seglock;
fs->lfs_sp->seg_flags |= flags;
return;
} else while (fs->lfs_seglock)
return 0;
} else if (flags & SEGM_PAGEDAEMON)
return EWOULDBLOCK;
else while (fs->lfs_seglock)
(void)tsleep(&fs->lfs_seglock, PRIBIO + 1,
"lfs seglock", 0);
}
@ -148,10 +317,8 @@ lfs_seglock(struct lfs *fs, unsigned long flags)
/* Drain fragment size changes out */
lockmgr(&fs->lfs_fraglock, LK_EXCLUSIVE, 0);
sp = fs->lfs_sp = malloc(sizeof(struct segment), M_SEGMENT, M_WAITOK);
sp->bpp = malloc(((fs->lfs_sumsize - SEGSUM_SIZE(fs)) /
sizeof(int32_t) + 1) * sizeof(struct buf *),
M_SEGMENT, M_WAITOK);
sp = fs->lfs_sp = pool_get(&fs->lfs_segpool, PR_WAITOK);
sp->bpp = pool_get(&fs->lfs_bpppool, PR_WAITOK);
sp->seg_flags = flags;
sp->vp = NULL;
sp->seg_iocount = 0;
@ -164,8 +331,70 @@ lfs_seglock(struct lfs *fs, unsigned long flags)
* the writes we intend to do.
*/
++fs->lfs_iocount;
return 0;
}
static void lfs_unmark_dirop(struct lfs *);
static void
lfs_unmark_dirop(struct lfs *fs)
{
struct inode *ip, *nip;
struct vnode *vp;
extern int lfs_dirvcount;
for (ip = TAILQ_FIRST(&fs->lfs_dchainhd); ip != NULL; ip = nip) {
nip = TAILQ_NEXT(ip, i_lfs_dchain);
vp = ITOV(ip);
if (VOP_ISLOCKED(vp) &&
vp->v_lock.lk_lockholder != curproc->p_pid) {
continue;
}
if ((VTOI(vp)->i_flag & IN_ADIROP) == 0) {
--lfs_dirvcount;
vp->v_flag &= ~VDIROP;
TAILQ_REMOVE(&fs->lfs_dchainhd, ip, i_lfs_dchain);
wakeup(&lfs_dirvcount);
fs->lfs_unlockvp = vp;
vrele(vp);
fs->lfs_unlockvp = NULL;
}
}
}
#ifndef LFS_NO_AUTO_SEGCLEAN
static void
lfs_auto_segclean(struct lfs *fs)
{
int i, error;
/*
* Now that we've swapped lfs_activesb, but while we still
* hold the segment lock, run through the segment list marking
* the empty ones clean.
* XXX - do we really need to do them all at once?
*/
for (i = 0; i < fs->lfs_nseg; i++) {
if ((fs->lfs_suflags[0][i] &
(SEGUSE_ACTIVE | SEGUSE_DIRTY | SEGUSE_EMPTY)) ==
(SEGUSE_DIRTY | SEGUSE_EMPTY) &&
(fs->lfs_suflags[1][i] &
(SEGUSE_ACTIVE | SEGUSE_DIRTY | SEGUSE_EMPTY)) ==
(SEGUSE_DIRTY | SEGUSE_EMPTY)) {
if ((error = lfs_do_segclean(fs, i)) != 0) {
#ifdef DEBUG
printf("lfs_auto_segclean: lfs_do_segclean returned %d for seg %d\n", error, i);
#endif /* DEBUG */
}
}
fs->lfs_suflags[1 - fs->lfs_activesb][i] =
fs->lfs_suflags[fs->lfs_activesb][i];
}
}
#endif /* LFS_AUTO_SEGCLEAN */
/*
* lfs_segunlock --
* Single thread the segment writer.
@ -176,9 +405,6 @@ lfs_segunlock(struct lfs *fs)
struct segment *sp;
unsigned long sync, ckp;
struct buf *bp;
struct vnode *vp, *nvp;
struct mount *mp;
extern int lfs_dirvcount;
#ifdef LFS_MALLOC_SUMMARY
extern int locked_queue_count;
extern long locked_queue_bytes;
@ -186,63 +412,9 @@ lfs_segunlock(struct lfs *fs)
sp = fs->lfs_sp;
if (fs->lfs_seglock == 1 && !(sp->seg_flags & SEGM_PROT)) {
mp = fs->lfs_ivnode->v_mount;
/*
* Go through and unmark all DIROP vnodes, possibly
* calling VOP_INACTIVE (through vrele). This is
* delayed until now in order not to accidentally
* write a DIROP node through lfs_flush.
*/
#ifndef LFS_NO_BACKVP_HACK
/* BEGIN HACK */
#define VN_OFFSET (((caddr_t)&LIST_NEXT(vp, v_mntvnodes)) - (caddr_t)vp)
#define BACK_VP(VP) ((struct vnode *)(((caddr_t)(VP)->v_mntvnodes.le_prev) - VN_OFFSET))
#define BEG_OF_VLIST ((struct vnode *)(((caddr_t)&LIST_FIRST(&mp->mnt_vnodelist)) - VN_OFFSET))
/* Find last vnode. */
loop: for (vp = LIST_FIRST(&mp->mnt_vnodelist);
vp && LIST_NEXT(vp, v_mntvnodes) != NULL;
vp = LIST_NEXT(vp, v_mntvnodes));
for (; vp && vp != BEG_OF_VLIST; vp = nvp) {
nvp = BACK_VP(vp);
#else
loop:
for (vp = LIST_FIRST(&mp->mnt_vnodelist);
vp != NULL;
vp = nvp) {
nvp = LIST_NEXT(vp, v_mntvnodes);
#endif
if (vp->v_mount != mp) {
printf("lfs_segunlock: starting over\n");
goto loop;
}
if (vp->v_type == VNON)
continue;
if (lfs_vref(vp))
continue;
if (VOP_ISLOCKED(vp) &&
vp->v_lock.lk_lockholder != curproc->p_pid) {
lfs_vunref(vp);
continue;
}
if ((vp->v_flag & VDIROP) &&
!(VTOI(vp)->i_flag & IN_ADIROP)) {
--lfs_dirvcount;
vp->v_flag &= ~VDIROP;
wakeup(&lfs_dirvcount);
fs->lfs_unlockvp = vp;
lfs_vunref(vp);
vrele(vp);
fs->lfs_unlockvp = NULL;
} else {
lfs_vunref(vp);
}
}
}
if (fs->lfs_seglock == 1) {
if ((sp->seg_flags & SEGM_PROT) == 0)
lfs_unmark_dirop(fs);
sync = sp->seg_flags & SEGM_SYNC;
ckp = sp->seg_flags & SEGM_CKP;
if (sp->bpp != sp->cbpp) {
@ -250,7 +422,7 @@ lfs_segunlock(struct lfs *fs)
fs->lfs_offset -= btofsb(fs, fs->lfs_sumsize);
bp = *sp->bpp;
#ifdef LFS_MALLOC_SUMMARY
lfs_freebuf(bp);
lfs_freebuf(fs, bp);
#else
s = splbio();
bremfree(bp);
@ -263,11 +435,11 @@ lfs_segunlock(struct lfs *fs)
} else
printf ("unlock to 0 with no summary");
free(sp->bpp, M_SEGMENT);
pool_put(&fs->lfs_bpppool, sp->bpp);
sp->bpp = NULL;
/* The sync case holds a reference in `sp' to be freed below */
if (!sync)
free(sp, M_SEGMENT);
pool_put(&fs->lfs_segpool, sp);
fs->lfs_sp = NULL;
/*
@ -275,9 +447,7 @@ lfs_segunlock(struct lfs *fs)
* At the moment, the user's process hangs around so we can
* sleep.
*/
if (--fs->lfs_iocount < LFS_THROTTLE)
wakeup(&fs->lfs_iocount);
if(fs->lfs_iocount == 0) {
if (--fs->lfs_iocount == 0) {
lfs_countlocked(&locked_queue_count,
&locked_queue_bytes, "lfs_segunlock");
wakeup(&locked_queue_count);
@ -309,15 +479,18 @@ lfs_segunlock(struct lfs *fs)
/* printf("sleeping on iocount %x == %d\n", sp, sp->seg_iocount); */
}
if (sync)
free(sp, M_SEGMENT);
pool_put(&fs->lfs_segpool, sp);
if (ckp) {
fs->lfs_nactive = 0;
/* If we *know* everything's on disk, write both sbs */
/* XXX should wait for this one */
if (sync)
lfs_writesuper(fs,fs->lfs_sboffs[fs->lfs_activesb]);
lfs_writesuper(fs, fs->lfs_sboffs[fs->lfs_activesb]);
lfs_writesuper(fs, fs->lfs_sboffs[1 - fs->lfs_activesb]);
#ifndef LFS_NO_AUTO_SEGCLEAN
lfs_auto_segclean(fs);
#endif
fs->lfs_activesb = 1 - fs->lfs_activesb;
lfs_writesuper(fs,fs->lfs_sboffs[fs->lfs_activesb]);
--fs->lfs_seglock;
fs->lfs_lockpid = 0;
wakeup(&fs->lfs_seglock);

View File

@ -1,7 +1,7 @@
/* $NetBSD: lfs_syscalls.c,v 1.79 2003/01/24 21:55:28 fvdl Exp $ */
/* $NetBSD: lfs_syscalls.c,v 1.80 2003/02/17 23:48:20 perseant Exp $ */
/*-
* Copyright (c) 1999, 2000 The NetBSD Foundation, Inc.
* Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
@ -71,7 +71,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: lfs_syscalls.c,v 1.79 2003/01/24 21:55:28 fvdl Exp $");
__KERNEL_RCSID(0, "$NetBSD: lfs_syscalls.c,v 1.80 2003/02/17 23:48:20 perseant Exp $");
#define LFS /* for prototypes in syscallargs.h */
@ -107,6 +107,9 @@ int verbose_debug = 0;
pid_t lfs_cleaner_pid = 0;
extern int lfs_subsys_pages;
extern struct simplelock lfs_subsys_lock;
/*
* Definitions for the buffer free lists.
*/
@ -578,7 +581,7 @@ lfs_markv(struct proc *p, fsid_t *fsidp, BLOCK_INFO *blkiov, int blkcnt)
s = splbio();
for (bp = bufqueues[BQ_LOCKED].tqh_first; bp; bp = nbp) {
nbp = bp->b_freelist.tqe_next;
if (bp->b_flags & B_CALL) {
if (LFS_IS_MALLOC_BUF(bp)) {
if (bp->b_flags & B_BUSY) { /* not bloody likely */
bp->b_flags |= B_WANTED;
tsleep(bp, PRIBIO+1, "markv", 0);
@ -878,15 +881,12 @@ sys_lfs_segclean(struct lwp *l, void *v, register_t *retval)
syscallarg(fsid_t *) fsidp;
syscallarg(u_long) segment;
} */ *uap = v;
struct proc *p = l->l_proc;
CLEANERINFO *cip;
SEGUSE *sup;
struct buf *bp;
struct mount *mntp;
struct lfs *fs;
struct mount *mntp;
fsid_t fsid;
int error;
unsigned long segnum;
struct proc *p = l->l_proc;
if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
return (error);
@ -899,39 +899,44 @@ sys_lfs_segclean(struct lwp *l, void *v, register_t *retval)
fs = VFSTOUFS(mntp)->um_lfs;
segnum = SCARG(uap, segment);
if (dtosn(fs, fs->lfs_curseg) == segnum)
return (EBUSY);
if ((error = vfs_busy(mntp, LK_NOWAIT, NULL)) != 0)
return (error);
#ifdef LFS_AGGRESSIVE_SEGLOCK
lfs_seglock(fs, SEGM_PROT);
#endif
error = lfs_do_segclean(fs, segnum);
lfs_segunlock(fs);
vfs_unbusy(mntp);
return error;
}
/*
* Actually mark the segment clean.
* Must be called with the segment lock held.
*/
int
lfs_do_segclean(struct lfs *fs, unsigned long segnum)
{
struct buf *bp;
CLEANERINFO *cip;
SEGUSE *sup;
if (dtosn(fs, fs->lfs_curseg) == segnum) {
return (EBUSY);
}
LFS_SEGENTRY(sup, fs, segnum, bp);
if (sup->su_nbytes) {
printf("lfs_segclean: not cleaning segment %lu: %d live bytes\n",
segnum, sup->su_nbytes);
brelse(bp);
#ifdef LFS_AGGRESSIVE_SEGLOCK
lfs_segunlock(fs);
#endif
vfs_unbusy(mntp);
return (EBUSY);
}
if (sup->su_flags & SEGUSE_ACTIVE) {
brelse(bp);
#ifdef LFS_AGGRESSIVE_SEGLOCK
lfs_segunlock(fs);
#endif
vfs_unbusy(mntp);
return (EBUSY);
}
if (!(sup->su_flags & SEGUSE_DIRTY)) {
brelse(bp);
#ifdef LFS_AGGRESSIVE_SEGLOCK
lfs_segunlock(fs);
#endif
vfs_unbusy(mntp);
return (EALREADY);
}
@ -948,7 +953,7 @@ sys_lfs_segclean(struct lwp *l, void *v, register_t *retval)
if (fs->lfs_dmeta < 0)
fs->lfs_dmeta = 0;
sup->su_flags &= ~SEGUSE_DIRTY;
(void) LFS_BWRITE_LOG(bp);
LFS_WRITESEGENTRY(sup, fs, segnum, bp);
LFS_CLEANERINFO(cip, fs, bp);
++cip->clean;
@ -958,10 +963,6 @@ sys_lfs_segclean(struct lwp *l, void *v, register_t *retval)
cip->avail = fs->lfs_avail - fs->lfs_ravail;
(void) LFS_BWRITE_LOG(bp);
wakeup(&fs->lfs_avail);
#ifdef LFS_AGGRESSIVE_SEGLOCK
lfs_segunlock(fs);
#endif
vfs_unbusy(mntp);
return (0);
}
@ -1228,6 +1229,7 @@ lfs_fakebuf_iodone(struct buf *bp)
if (!(obp->b_flags & (B_DELWRI | B_DONE)))
obp->b_flags |= B_INVAL;
bp->b_saveaddr = (caddr_t)(VTOI(obp->b_vp)->i_lfs);
brelse(obp);
lfs_callback(bp);
}
@ -1256,11 +1258,10 @@ lfs_fakebuf(struct lfs *fs, struct vnode *vp, int lbn, size_t size, caddr_t uadd
if (obp == NULL)
panic("lfs_fakebuf: getblk failed");
#ifndef ALLOW_VFLUSH_CORRUPTION
bp = lfs_newbuf(VTOI(vp)->i_lfs, vp, lbn, size);
bp = lfs_newbuf(VTOI(vp)->i_lfs, vp, lbn, size, LFS_NB_CLEAN);
error = copyin(uaddr, bp->b_data, size);
if (error) {
lfs_freebuf(bp);
lfs_freebuf(fs, bp);
return NULL;
}
bp->b_saveaddr = obp;
@ -1272,11 +1273,6 @@ lfs_fakebuf(struct lfs *fs, struct vnode *vp, int lbn, size_t size, caddr_t uadd
panic("lfs_fakebuf: gathered bp: %p, ino=%u, lbn=%d",
bp, VTOI(vp)->i_number, lbn);
#endif
#else
bp = lfs_newbuf(VTOI(vp)->i_lfs, vp, lbn, 0);
bp->b_flags |= B_INVAL;
bp->b_saveaddr = uaddr;
#endif
#if 0
bp->b_saveaddr = (caddr_t)fs;
++fs->lfs_iocount;

View File

@ -1,7 +1,7 @@
/* $NetBSD: lfs_vfsops.c,v 1.90 2003/01/29 13:14:36 yamt Exp $ */
/* $NetBSD: lfs_vfsops.c,v 1.91 2003/02/17 23:48:21 perseant Exp $ */
/*-
* Copyright (c) 1999, 2000 The NetBSD Foundation, Inc.
* Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
@ -71,7 +71,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: lfs_vfsops.c,v 1.90 2003/01/29 13:14:36 yamt Exp $");
__KERNEL_RCSID(0, "$NetBSD: lfs_vfsops.c,v 1.91 2003/02/17 23:48:21 perseant Exp $");
#if defined(_KERNEL_OPT)
#include "opt_quota.h"
@ -84,6 +84,7 @@ __KERNEL_RCSID(0, "$NetBSD: lfs_vfsops.c,v 1.90 2003/01/29 13:14:36 yamt Exp $")
#include <sys/kernel.h>
#include <sys/vnode.h>
#include <sys/mount.h>
#include <sys/kthread.h>
#include <sys/buf.h>
#include <sys/device.h>
#include <sys/mbuf.h>
@ -105,14 +106,32 @@ __KERNEL_RCSID(0, "$NetBSD: lfs_vfsops.c,v 1.90 2003/01/29 13:14:36 yamt Exp $")
#include <ufs/ufs/ufsmount.h>
#include <ufs/ufs/ufs_extern.h>
#include <uvm/uvm.h>
#include <uvm/uvm_stat.h>
#include <uvm/uvm_pager.h>
#include <uvm/uvm_pdaemon.h>
#include <ufs/lfs/lfs.h>
#include <ufs/lfs/lfs_extern.h>
int lfs_mountfs(struct vnode *, struct mount *, struct proc *);
#ifdef LFS_UBC
#include <miscfs/genfs/genfs.h>
#include <miscfs/genfs/genfs_node.h>
static int lfs_gop_write(struct vnode *, struct vm_page **, int, int);
#endif
static int lfs_mountfs(struct vnode *, struct mount *, struct proc *);
extern const struct vnodeopv_desc lfs_vnodeop_opv_desc;
extern const struct vnodeopv_desc lfs_specop_opv_desc;
extern const struct vnodeopv_desc lfs_fifoop_opv_desc;
extern int lfs_subsys_pages;
extern int locked_queue_count;
extern long locked_queue_bytes;
extern struct simplelock lfs_subsys_lock;
int lfs_writer_daemon = 0;
int lfs_do_flush = 0;
const struct vnodeopv_desc * const lfs_vnodeopv_descs[] = {
&lfs_vnodeop_opv_desc,
@ -143,15 +162,95 @@ struct vfsops lfs_vfsops = {
};
struct genfs_ops lfs_genfsops = {
#ifdef LFS_UBC
lfs_gop_size,
ufs_gop_alloc,
lfs_gop_write,
#else
NULL,
NULL,
genfs_compat_gop_write,
#endif
};
struct pool lfs_inode_pool;
struct pool lfs_inode_pool, lfs_inoext_pool;
extern int locked_queue_count;
extern long locked_queue_bytes;
/*
* The writer daemon. UVM keeps track of how many dirty pages we are holding
* in lfs_subsys_pages; the daemon flushes the filesystem when this value
* crosses the (user-defined) threshhold LFS_MAX_PAGES.
*/
static void
lfs_writerd(void *arg)
{
#ifdef LFS_PD
struct mount *mp, *nmp;
struct lfs *fs;
#endif
lfs_writer_daemon = curproc->p_pid;
for (;;) {
tsleep(&lfs_writer_daemon, PVM, "lfswriter", 0);
#ifdef LFS_PD
/*
* Look through the list of LFSs to see if any of them
* have requested pageouts.
*/
simple_lock(&mountlist_slock);
for (mp = mountlist.cqh_first; mp != (void *)&mountlist;
mp = nmp) {
if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock)) {
nmp = mp->mnt_list.cqe_next;
continue;
}
if (strncmp(&mp->mnt_stat.f_fstypename[0], MOUNT_LFS,
MFSNAMELEN) == 0) {
fs = ((struct ufsmount *)mp->mnt_data)->ufsmount_u.lfs;
if (fs->lfs_pdflush ||
!TAILQ_EMPTY(&fs->lfs_pchainhd)) {
fs->lfs_pdflush = 0;
simple_unlock(&mountlist_slock);
lfs_flush_fs(fs, 0);
simple_lock(&mountlist_slock);
}
}
simple_lock(&mountlist_slock);
nmp = mp->mnt_list.cqe_next;
vfs_unbusy(mp);
}
simple_unlock(&mountlist_slock);
#endif /* LFS_PD */
/*
* If global state wants a flush, flush everything.
*/
while (lfs_do_flush || locked_queue_count > LFS_MAX_BUFS ||
locked_queue_bytes > LFS_MAX_BYTES ||
lfs_subsys_pages > LFS_MAX_PAGES) {
#ifdef DEBUG_LFS_FLUSH
if (lfs_do_flush)
printf("daemon: lfs_do_flush\n");
if (locked_queue_count > LFS_MAX_BUFS)
printf("daemon: lqc = %d, max %d\n",
locked_queue_count, LFS_MAX_BUFS);
if (locked_queue_bytes > LFS_MAX_BYTES)
printf("daemon: lqb = %ld, max %d\n",
locked_queue_bytes, LFS_MAX_BYTES);
if (lfs_subsys_pages > LFS_MAX_PAGES)
printf("daemon: lssp = %d, max %d\n",
lfs_subsys_pages, LFS_MAX_PAGES);
#endif /* DEBUG_LFS_FLUSH */
lfs_flush(NULL, 0);
lfs_do_flush = 0;
}
wakeup(&lfs_subsys_pages);
}
/* NOTREACHED */
}
/*
* Initialize the filesystem, most work done by ufs_init.
@ -166,9 +265,12 @@ lfs_init()
*/
pool_init(&lfs_inode_pool, sizeof(struct inode), 0, 0, 0,
"lfsinopl", &pool_allocator_nointr);
pool_init(&lfs_inoext_pool, sizeof(struct lfs_inode_ext), 8, 0, 0,
"lfsinoextpl", &pool_allocator_nointr);
#ifdef DEBUG
memset(lfs_log, 0, sizeof(lfs_log));
#endif
simple_lock_init(&lfs_subsys_lock);
}
void
@ -452,11 +554,11 @@ update_meta(struct lfs *fs, ino_t ino, int version, daddr_t lbn,
}
#endif
sup->su_nbytes -= size;
LFS_BWRITE_LOG(bp);
LFS_WRITESEGENTRY(sup, fs, dtosn(fs, dbtofsb(fs, odaddr)), bp);
}
LFS_SEGENTRY(sup, fs, dtosn(fs, ndaddr), bp);
sup->su_nbytes += size;
LFS_BWRITE_LOG(bp);
LFS_WRITESEGENTRY(sup, fs, dtosn(fs, ndaddr), bp);
/* Fix this so it can be released */
/* ip->i_lfs_effnblks = ip->i_ffs_blocks; */
@ -544,12 +646,16 @@ update_inoblk(struct lfs *fs, daddr_t offset, struct ucred *cred,
LFS_SEGENTRY(sup, fs, dtosn(fs, daddr),
ibp);
sup->su_nbytes -= DINODE_SIZE;
LFS_BWRITE_LOG(ibp);
LFS_WRITESEGENTRY(sup, fs,
dtosn(fs, daddr),
ibp);
}
LFS_SEGENTRY(sup, fs, dtosn(fs, dbtofsb(fs, dbp->b_blkno)),
ibp);
sup->su_nbytes += DINODE_SIZE;
LFS_BWRITE_LOG(ibp);
LFS_WRITESEGENTRY(sup, fs,
dtosn(fs, dbtofsb(fs, dbp->b_blkno)),
ibp);
}
}
}
@ -969,7 +1075,7 @@ lfs_mountfs(struct vnode *devvp, struct mount *mp, struct proc *p)
fs->lfs_dirops = 0;
fs->lfs_nadirop = 0;
fs->lfs_seglock = 0;
lockinit(&fs->lfs_freelock, PINOD, "lfs_freelock", 0, 0);
fs->lfs_pdflush = 0;
lockinit(&fs->lfs_fraglock, PINOD, "lfs_fraglock", 0, 0);
/* Set the file system readonly/modify bits. */
@ -985,6 +1091,7 @@ lfs_mountfs(struct vnode *devvp, struct mount *mp, struct proc *p)
mp->mnt_stat.f_iosize = fs->lfs_bsize;
mp->mnt_maxsymlinklen = fs->lfs_maxsymlinklen;
mp->mnt_flag |= MNT_LOCAL;
mp->mnt_fs_bshift = fs->lfs_bshift;
ump->um_flags = 0;
ump->um_mountp = mp;
ump->um_dev = dev;
@ -997,6 +1104,16 @@ lfs_mountfs(struct vnode *devvp, struct mount *mp, struct proc *p)
ump->um_quotas[i] = NULLVP;
devvp->v_specmountpoint = mp;
/* Set up reserved memory for pageout */
lfs_setup_resblks(fs);
/* Set up vdirop tailq */
TAILQ_INIT(&fs->lfs_dchainhd);
/* and paging tailq */
TAILQ_INIT(&fs->lfs_pchainhd);
#if 0 /* XXXDEBUG */
fs->lfs_lastwrit = dbtofsb(fs, fs->lfs_offset - 1);
#endif
/*
* We use the ifile vnode for almost every operation. Instead of
* retrieving it from the hash table each time we retrieve it here,
@ -1012,6 +1129,32 @@ lfs_mountfs(struct vnode *devvp, struct mount *mp, struct proc *p)
fs->lfs_ivnode = vp;
VREF(vp);
/* Set up segment usage flags for the autocleaner. */
fs->lfs_suflags = (u_int32_t **)malloc(2 * sizeof(u_int32_t *),
M_SEGMENT, M_WAITOK);
fs->lfs_suflags[0] = (u_int32_t *)malloc(fs->lfs_nseg * sizeof(u_int32_t),
M_SEGMENT, M_WAITOK);
fs->lfs_suflags[1] = (u_int32_t *)malloc(fs->lfs_nseg * sizeof(u_int32_t),
M_SEGMENT, M_WAITOK);
memset(fs->lfs_suflags[1], 0, fs->lfs_nseg * sizeof(u_int32_t));
for (i = 0; i < fs->lfs_nseg; i++) {
LFS_SEGENTRY(sup, fs, i, bp);
if (!ronly && sup->su_nbytes == 0 &&
!(sup->su_flags & SEGUSE_EMPTY)) {
sup->su_flags |= SEGUSE_EMPTY;
fs->lfs_suflags[0][i] = sup->su_flags;
LFS_WRITESEGENTRY(sup, fs, i, bp);
} else if (!ronly && !(sup->su_nbytes == 0) &&
(sup->su_flags & SEGUSE_EMPTY)) {
sup->su_flags &= ~SEGUSE_EMPTY;
fs->lfs_suflags[0][i] = sup->su_flags;
LFS_WRITESEGENTRY(sup, fs, i, bp);
} else {
fs->lfs_suflags[0][i] = sup->su_flags;
brelse(bp);
}
}
/*
* Roll forward.
*
@ -1045,7 +1188,7 @@ lfs_mountfs(struct vnode *devvp, struct mount *mp, struct proc *p)
if (!(sup->su_flags & SEGUSE_DIRTY))
--fs->lfs_nclean;
sup->su_flags |= SEGUSE_DIRTY;
(void) LFS_BWRITE_LOG(bp);
LFS_WRITESEGENTRY(sup, fs, dtosn(fs, offset), bp);
while ((offset = check_segsum(fs, offset, cred, CHECK_CKSUM,
&flags, p)) > 0)
{
@ -1055,7 +1198,8 @@ lfs_mountfs(struct vnode *devvp, struct mount *mp, struct proc *p)
if (!(sup->su_flags & SEGUSE_DIRTY))
--fs->lfs_nclean;
sup->su_flags |= SEGUSE_DIRTY;
(void) LFS_BWRITE_LOG(bp);
LFS_WRITESEGENTRY(sup, fs, dtosn(fs, oldoffset),
bp);
}
#ifdef DEBUG_LFS_RFW
@ -1149,7 +1293,7 @@ lfs_mountfs(struct vnode *devvp, struct mount *mp, struct proc *p)
*/
LFS_SEGENTRY(sup, fs, dtosn(fs, fs->lfs_offset), bp);
sup->su_flags |= SEGUSE_DIRTY | SEGUSE_ACTIVE;
(void) LFS_BWRITE_LOG(bp); /* Ifile */
LFS_WRITESEGENTRY(sup, fs, dtosn(fs, fs->lfs_offset), bp); /* Ifile */
/* Now that roll-forward is done, unlock the Ifile */
vput(vp);
@ -1180,6 +1324,12 @@ out:
free(ump, M_UFSMNT);
mp->mnt_data = NULL;
}
/* Start the pagedaemon-anticipating daemon */
if (lfs_writer_daemon == 0 &&
kthread_create1(lfs_writerd, NULL, NULL, "lfs_writer") != 0)
panic("fork lfs_writer");
return (error);
}
@ -1259,12 +1409,18 @@ lfs_unmount(struct mount *mp, int mntflags, struct proc *p)
ronly ? FREAD : FREAD|FWRITE, NOCRED, p);
vput(ump->um_devvp);
/* XXX KS - wake up the cleaner so it can die */
/* wake up the cleaner so it can die */
wakeup(&fs->lfs_nextseg);
wakeup(&lfs_allclean_wakeup);
/* Free per-mount data structures */
free(fs->lfs_suflags[0], M_SEGMENT);
free(fs->lfs_suflags[1], M_SEGMENT);
free(fs->lfs_suflags, M_SEGMENT);
lfs_free_resblks(fs);
free(fs, M_UFSMNT);
free(ump, M_UFSMNT);
mp->mnt_data = NULL;
mp->mnt_flag &= ~MNT_LOCAL;
return (error);
@ -1586,11 +1742,251 @@ lfs_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp, si
if (lfs_dostats == 0)
memset(&lfs_stats,0,sizeof(lfs_stats));
return 0;
case LFS_STATS:
return (sysctl_rdstruct(oldp, oldlenp, newp,
&lfs_stats, sizeof(lfs_stats)));
default:
return (EOPNOTSUPP);
}
/* NOTREACHED */
}
#ifdef LFS_UBC
/*
* lfs_gop_write functions exactly like genfs_gop_write, except that
* (1) it requires the seglock to be held by its caller, and sp->fip
* to be properly initialized (it will return without re-initializing
* sp->fip, and without calling lfs_writeseg).
* (2) it uses the remaining space in the segment, rather than VOP_BMAP,
* to determine how large a block it can write at once (though it does
* still use VOP_BMAP to find holes in the file);
* (3) it calls lfs_gatherblock instead of VOP_STRATEGY on its blocks
* (leaving lfs_writeseg to deal with the cluster blocks, so we might
* now have clusters of clusters, ick.)
*/
static int
lfs_gop_write(struct vnode *vp, struct vm_page **pgs, int npages, int flags)
{
int i, s, error, run;
int fs_bshift, dev_bshift;
vaddr_t kva;
off_t eof, offset, startoffset;
size_t bytes, iobytes, skipbytes;
daddr_t lbn, blkno;
struct vm_page *pg;
struct buf *mbp, *bp;
struct vnode *devvp;
struct inode *ip = VTOI(vp);
struct lfs *fs = ip->i_lfs;
struct segment *sp = fs->lfs_sp;
UVMHIST_FUNC("lfs_gop_write"); UVMHIST_CALLED(ubchist);
/* The Ifile lives in the buffer cache */
if (vp == fs->lfs_ivnode)
return genfs_compat_gop_write(vp, pgs, npages, flags);
/*
* Sometimes things slip past the filters in lfs_putpages,
* and the pagedaemon tries to write pages---problem is
* that the pagedaemon never acquires the segment lock.
*
* Unbusy and unclean the pages, and put them on the ACTIVE
* queue under the hypothesis that they couldn't have got here
* unless they were modified *quite* recently.
*
* XXXUBC that last statement is an oversimplification of course.
*/
if (!(fs->lfs_seglock) || fs->lfs_lockpid != curproc->p_pid) {
simple_lock(&vp->v_interlock);
#ifdef DEBUG
printf("lfs_gop_write: seglock not held\n");
#endif
uvm_lock_pageq();
for (i = 0; i < npages; i++) {
if (pgs[i]->flags & PG_WANTED)
wakeup(pgs[i]);
if (pgs[i]->flags & PG_PAGEOUT)
uvmexp.paging--;
pgs[i]->flags &= ~(PG_BUSY|PG_CLEAN|PG_WANTED|PG_DELWRI|PG_PAGEOUT|PG_RELEASED);
UVM_PAGE_OWN(pg, NULL);
uvm_pageactivate(pgs[i]);
}
uvm_page_unbusy(pgs, npages);
uvm_unlock_pageq();
simple_unlock(&vp->v_interlock);
return EAGAIN;
}
UVMHIST_LOG(ubchist, "vp %p pgs %p npages %d flags 0x%x",
vp, pgs, npages, flags);
GOP_SIZE(vp, vp->v_size, &eof, GOP_SIZE_WRITE);
if (vp->v_type == VREG) {
fs_bshift = vp->v_mount->mnt_fs_bshift;
dev_bshift = vp->v_mount->mnt_dev_bshift;
} else {
fs_bshift = DEV_BSHIFT;
dev_bshift = DEV_BSHIFT;
}
error = 0;
pg = pgs[0];
startoffset = pg->offset;
bytes = MIN(npages << PAGE_SHIFT, eof - startoffset);
skipbytes = 0;
KASSERT(bytes != 0);
/* Swap PG_DELWRI for PG_PAGEOUT */
for (i = 0; i < npages; i++)
if (pgs[i]->flags & PG_DELWRI) {
KASSERT(!(pgs[i]->flags & PG_PAGEOUT));
pgs[i]->flags &= ~PG_DELWRI;
pgs[i]->flags |= PG_PAGEOUT;
uvmexp.paging++;
}
/*
* Check to make sure we're starting on a block boundary.
* We'll check later to make sure we always write entire
* blocks (or fragments).
*/
if (startoffset & fs->lfs_bmask)
printf("%" PRId64 " & %" PRId64 " = %" PRId64 "\n",
startoffset, fs->lfs_bmask,
startoffset & fs->lfs_bmask);
KASSERT((startoffset & fs->lfs_bmask) == 0);
if (bytes & fs->lfs_ffmask) {
printf("lfs_gop_write: asked to write %ld bytes\n", (long)bytes);
panic("lfs_gop_write: non-integer blocks");
}
kva = uvm_pagermapin(pgs, npages,
UVMPAGER_MAPIN_WRITE | UVMPAGER_MAPIN_WAITOK);
s = splbio();
simple_lock(&global_v_numoutput_slock);
vp->v_numoutput += 2; /* one for biodone, one for aiodone */
simple_unlock(&global_v_numoutput_slock);
mbp = pool_get(&bufpool, PR_WAITOK);
splx(s);
memset(mbp, 0, sizeof(*bp));
UVMHIST_LOG(ubchist, "vp %p mbp %p num now %d bytes 0x%x",
vp, mbp, vp->v_numoutput, bytes);
mbp->b_bufsize = npages << PAGE_SHIFT;
mbp->b_data = (void *)kva;
mbp->b_resid = mbp->b_bcount = bytes;
mbp->b_flags = B_BUSY|B_WRITE|B_AGE|B_CALL;
mbp->b_iodone = uvm_aio_biodone;
mbp->b_vp = vp;
LIST_INIT(&mbp->b_dep);
bp = NULL;
for (offset = startoffset;
bytes > 0;
offset += iobytes, bytes -= iobytes) {
lbn = offset >> fs_bshift;
error = VOP_BMAP(vp, lbn, &devvp, &blkno, &run);
if (error) {
UVMHIST_LOG(ubchist, "VOP_BMAP() -> %d", error,0,0,0);
skipbytes += bytes;
bytes = 0;
break;
}
iobytes = MIN((((off_t)lbn + 1 + run) << fs_bshift) - offset,
bytes);
if (blkno == (daddr_t)-1) {
skipbytes += iobytes;
continue;
}
/*
* Discover how much we can really pack into this buffer.
*/
#ifdef LFS_UBC_BIGBUFS
/* If no room in the current segment, finish it up */
if (sp->sum_bytes_left < sizeof(int32_t) ||
sp->seg_bytes_left < MIN(iobytes, (1 << fs->lfs_bshift))) {
int version;
lfs_updatemeta(sp);
version = sp->fip->fi_version;
(void) lfs_writeseg(fs, sp);
sp->fip->fi_version = version;
sp->fip->fi_ino = ip->i_number;
/* Add the current file to the segment summary. */
++((SEGSUM *)(sp->segsum))->ss_nfinfo;
sp->sum_bytes_left -= FINFOSIZE;
}
iobytes = MIN(iobytes, ((sp->seg_bytes_left >> fs_bshift) << fs_bshift));
#else
iobytes = MIN(iobytes, (1 << fs_bshift));
if (iobytes != blksize(fs, ip, lblkno(fs, offset))) {
printf("iobytes = %" PRId64 ", blk = %" PRId64 "\n",
(int64_t)iobytes,
(int64_t)blksize(fs, ip, lblkno(fs, offset)));
}
KASSERT(iobytes == blksize(fs, ip, lblkno(fs, offset)));
#endif
KASSERT(iobytes > 0);
/* if it's really one i/o, don't make a second buf */
if (offset == startoffset && iobytes == bytes) {
bp = mbp;
/* printf("bp is mbp\n"); */
/* correct overcount if there is no second buffer */
s = splbio();
simple_lock(&global_v_numoutput_slock);
--vp->v_numoutput;
simple_unlock(&global_v_numoutput_slock);
splx(s);
} else {
/* printf("bp is not mbp\n"); */
s = splbio();
bp = pool_get(&bufpool, PR_WAITOK);
UVMHIST_LOG(ubchist, "vp %p bp %p num now %d",
vp, bp, vp->v_numoutput, 0);
memset(bp, 0, sizeof(*bp));
splx(s);
bp->b_data = (char *)kva +
(vaddr_t)(offset - pg->offset);
bp->b_resid = bp->b_bcount = iobytes;
bp->b_flags = B_BUSY|B_WRITE|B_CALL;
bp->b_iodone = uvm_aio_biodone1;
LIST_INIT(&bp->b_dep);
}
/* XXX This is silly ... is this necessary? */
bp->b_vp = NULL;
s = splbio();
bgetvp(vp, bp);
splx(s);
bp->b_lblkno = lblkno(fs, offset);
bp->b_private = mbp;
if (devvp->v_type == VBLK) {
bp->b_dev = devvp->v_rdev;
}
VOP_BWRITE(bp);
while(lfs_gatherblock(sp, bp, NULL))
;
}
if (skipbytes) {
UVMHIST_LOG(ubchist, "skipbytes %d", skipbytes, 0,0,0);
s = splbio();
if (error) {
mbp->b_flags |= B_ERROR;
mbp->b_error = error;
}
mbp->b_resid -= skipbytes;
if (mbp->b_resid == 0) {
biodone(mbp);
}
splx(s);
}
UVMHIST_LOG(ubchist, "returning 0", 0,0,0,0);
return (0);
}
#endif /* LFS_UBC */

View File

@ -1,7 +1,7 @@
/* $NetBSD: lfs_vnops.c,v 1.83 2003/02/03 00:32:35 perseant Exp $ */
/* $NetBSD: lfs_vnops.c,v 1.84 2003/02/17 23:48:22 perseant Exp $ */
/*-
* Copyright (c) 1999, 2000 The NetBSD Foundation, Inc.
* Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
@ -71,7 +71,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: lfs_vnops.c,v 1.83 2003/02/03 00:32:35 perseant Exp $");
__KERNEL_RCSID(0, "$NetBSD: lfs_vnops.c,v 1.84 2003/02/17 23:48:22 perseant Exp $");
#include <sys/param.h>
#include <sys/systm.h>
@ -97,9 +97,19 @@ __KERNEL_RCSID(0, "$NetBSD: lfs_vnops.c,v 1.83 2003/02/03 00:32:35 perseant Exp
#include <ufs/ufs/ufsmount.h>
#include <ufs/ufs/ufs_extern.h>
#include <uvm/uvm.h>
#ifdef LFS_UBC
# include <uvm/uvm_pmap.h>
# include <uvm/uvm_stat.h>
# include <uvm/uvm_pager.h>
#endif
#include <ufs/lfs/lfs.h>
#include <ufs/lfs/lfs_extern.h>
extern int lfs_writer_daemon;
extern int lfs_subsys_pages;
/* Global vfs data structures for lfs. */
int (**lfs_vnodeop_p)(void *);
const struct vnodeopv_entry_desc lfs_vnodeop_entries[] = {
@ -121,7 +131,11 @@ const struct vnodeopv_entry_desc lfs_vnodeop_entries[] = {
{ &vop_poll_desc, ufs_poll }, /* poll */
{ &vop_kqfilter_desc, genfs_kqfilter }, /* kqfilter */
{ &vop_revoke_desc, ufs_revoke }, /* revoke */
#ifdef LFS_UBC
{ &vop_mmap_desc, lfs_mmap }, /* mmap */
#else
{ &vop_mmap_desc, ufs_mmap }, /* mmap */
#endif
{ &vop_fsync_desc, lfs_fsync }, /* fsync */
{ &vop_seek_desc, ufs_seek }, /* seek */
{ &vop_remove_desc, lfs_remove }, /* remove */
@ -150,7 +164,11 @@ const struct vnodeopv_entry_desc lfs_vnodeop_entries[] = {
{ &vop_truncate_desc, lfs_truncate }, /* truncate */
{ &vop_update_desc, lfs_update }, /* update */
{ &vop_bwrite_desc, lfs_bwrite }, /* bwrite */
#ifdef LFS_UBC
{ &vop_getpages_desc, genfs_getpages }, /* getpages */
#else
{ &vop_getpages_desc, lfs_getpages }, /* getpages */
#endif
{ &vop_putpages_desc, lfs_putpages }, /* putpages */
{ NULL, NULL }
};
@ -293,37 +311,46 @@ lfs_fsync(void *v)
struct proc *a_p;
} */ *ap = v;
struct vnode *vp = ap->a_vp;
int error;
/* Ignore the trickle syncer */
if (ap->a_flags & FSYNC_LAZY)
int error, wait;
/*
* Trickle sync checks for need to do a checkpoint after possible
* activity from the pagedaemon.
*/
if (ap->a_flags & FSYNC_LAZY) {
wakeup(&lfs_writer_daemon);
return 0;
simple_lock(&vp->v_interlock);
error = VOP_PUTPAGES(vp, trunc_page(ap->a_offlo),
round_page(ap->a_offhi), PGO_CLEANIT | PGO_SYNCIO);
if (error)
return error;
error = VOP_UPDATE(vp, NULL, NULL,
(ap->a_flags & FSYNC_WAIT) != 0 ? UPDATE_WAIT : 0);
#ifdef DEBUG
/*
* If we were called from vinvalbuf and lfs_update
* didn't flush all our buffers, we're in trouble.
*/
if ((ap->a_flags & FSYNC_WAIT) && LIST_FIRST(&vp->v_dirtyblkhd) != NULL) {
struct buf *bp;
bp = LIST_FIRST(&vp->v_dirtyblkhd);
printf("lfs_fsync: ino %d failed to sync", VTOI(vp)->i_number);
printf("lfs_fsync: iocount = %d\n", VTOI(vp)->i_lfs->lfs_iocount);
printf("lfs_fsync: flags are 0x%x, numoutput=%d\n",
VTOI(vp)->i_flag, vp->v_numoutput);
printf("lfs_fsync: writecount=%ld\n", vp->v_writecount);
printf("lfs_fsync: first bp: %p, flags=0x%lx, lbn=%" PRId64 "\n",
bp, bp->b_flags, bp->b_lblkno);
}
wait = (ap->a_flags & FSYNC_WAIT);
do {
#ifdef DEBUG
struct buf *bp;
#endif
simple_lock(&vp->v_interlock);
error = VOP_PUTPAGES(vp, trunc_page(ap->a_offlo),
round_page(ap->a_offhi),
PGO_CLEANIT | (wait ? PGO_SYNCIO : 0));
if (error)
return error;
error = VOP_UPDATE(vp, NULL, NULL, wait ? UPDATE_WAIT : 0);
if (wait && error == 0 && !VPISEMPTY(vp)) {
#ifdef DEBUG
printf("lfs_fsync: reflushing ino %d\n",
VTOI(vp)->i_number);
printf("vflags %x iflags %x npages %d\n",
vp->v_flag, VTOI(vp)->i_flag,
vp->v_uobj.uo_npages);
LIST_FOREACH(bp, &vp->v_dirtyblkhd, b_vnbufs)
printf("%" PRId64 " (%lx)", bp->b_lblkno,
bp->b_flags);
printf("\n");
#endif
VTOI(vp)->i_flag |= IN_MODIFIED;
}
} while (wait && error == 0 && !VPISEMPTY(vp));
return error;
}
@ -361,6 +388,7 @@ lfs_inactive(void *v)
#define SET_DIROP2(vp, vp2) lfs_set_dirop((vp), (vp2))
static int lfs_set_dirop(struct vnode *, struct vnode *);
extern int lfs_dirvcount;
extern int lfs_do_flush;
#define NRESERVE(fs) (btofsb(fs, (NIADDR + 3 + (2 * NIADDR + 3)) << fs->lfs_bshift))
@ -383,17 +411,15 @@ lfs_set_dirop(struct vnode *vp, struct vnode *vp2)
if (fs->lfs_dirops == 0)
lfs_check(vp, LFS_UNUSED_LBN, 0);
while (fs->lfs_writer || lfs_dirvcount > LFS_MAXDIROP) {
while (fs->lfs_writer || lfs_dirvcount > LFS_MAX_DIROP) {
if (fs->lfs_writer)
tsleep(&fs->lfs_dirops, PRIBIO + 1, "lfs_sdirop", 0);
if (lfs_dirvcount > LFS_MAXDIROP && fs->lfs_dirops == 0) {
++fs->lfs_writer;
lfs_flush(fs, 0);
if (--fs->lfs_writer == 0)
wakeup(&fs->lfs_dirops);
if (lfs_dirvcount > LFS_MAX_DIROP && fs->lfs_dirops == 0) {
wakeup(&lfs_writer_daemon);
preempt(NULL);
}
if (lfs_dirvcount > LFS_MAXDIROP) {
if (lfs_dirvcount > LFS_MAX_DIROP) {
#ifdef DEBUG_LFS
printf("lfs_set_dirop: sleeping with dirops=%d, "
"dirvcount=%d\n", fs->lfs_dirops,
@ -438,15 +464,19 @@ unreserve:
}
#define MARK_VNODE(dvp) do { \
struct inode *_ip = VTOI(dvp); \
struct lfs *_fs = _ip->i_lfs; \
\
if (!((dvp)->v_flag & VDIROP)) { \
(void)lfs_vref(dvp); \
++lfs_dirvcount; \
TAILQ_INSERT_TAIL(&_fs->lfs_dchainhd, _ip, i_lfs_dchain); \
} \
(dvp)->v_flag |= VDIROP; \
if (!(VTOI(dvp)->i_flag & IN_ADIROP)) { \
++VTOI(dvp)->i_lfs->lfs_nadirop; \
if (!(_ip->i_flag & IN_ADIROP)) { \
++_fs->lfs_nadirop; \
} \
VTOI(dvp)->i_flag |= IN_ADIROP; \
_ip->i_flag |= IN_ADIROP; \
} while (0)
#define UNMARK_VNODE(vp) lfs_unmark_vnode(vp)
@ -656,22 +686,24 @@ lfs_rmdir(void *v)
struct vnode *a_vp;
struct componentname *a_cnp;
} */ *ap = v;
struct vnode *vp;
int error;
vp = ap->a_vp;
if ((error = SET_DIROP2(ap->a_dvp, ap->a_vp)) != 0) {
vrele(ap->a_dvp);
if (ap->a_vp != ap->a_dvp)
VOP_UNLOCK(ap->a_dvp, 0);
vput(ap->a_vp);
vput(vp);
return error;
}
MARK_VNODE(ap->a_dvp);
MARK_VNODE(ap->a_vp);
MARK_VNODE(vp);
error = ufs_rmdir(ap);
UNMARK_VNODE(ap->a_dvp);
UNMARK_VNODE(ap->a_vp);
UNMARK_VNODE(vp);
SET_ENDOP2(VTOI(ap->a_dvp)->i_lfs, ap->a_dvp, ap->a_vp, "rmdir");
SET_ENDOP2(VTOI(ap->a_dvp)->i_lfs, ap->a_dvp, vp, "rmdir");
return (error);
}
@ -844,7 +876,7 @@ lfs_getattr(void *v)
vap->va_blocksize = MAXBSIZE;
else
vap->va_blocksize = vp->v_mount->mnt_stat.f_iosize;
vap->va_bytes = fsbtob(fs, (u_quad_t)ip->i_ffs_blocks);
vap->va_bytes = fsbtob(fs, (u_quad_t)ip->i_lfs_effnblks);
vap->va_type = vp->v_type;
vap->va_filerev = ip->i_modrev;
return (0);
@ -964,18 +996,22 @@ lfs_reclaim(void *v)
struct proc *a_p;
} */ *ap = v;
struct vnode *vp = ap->a_vp;
struct inode *ip = VTOI(vp);
int error;
KASSERT(VTOI(vp)->i_ffs_nlink == VTOI(vp)->i_ffs_effnlink);
KASSERT(ip->i_ffs_nlink == ip->i_ffs_effnlink);
LFS_CLR_UINO(VTOI(vp), IN_ALLMOD);
LFS_CLR_UINO(ip, IN_ALLMOD);
if ((error = ufs_reclaim(vp, ap->a_p)))
return (error);
pool_put(&lfs_inoext_pool, ip->inode_ext.lfs);
ip->inode_ext.lfs = NULL;
pool_put(&lfs_inode_pool, vp->v_data);
vp->v_data = NULL;
return (0);
}
#ifndef LFS_UBC
int
lfs_getpages(void *v)
{
@ -1004,3 +1040,645 @@ lfs_putpages(void *v)
error = genfs_putpages(v);
return error;
}
#else /* LFS_UBC */
/*
* Make sure that for all pages in every block in the given range,
* either all are dirty or all are clean. If any of the pages
* we've seen so far are dirty, put the vnode on the paging chain,
* and mark it IN_PAGING.
*/
static int
check_dirty(struct lfs *fs, struct vnode *vp,
off_t startoffset, off_t endoffset, off_t blkeof,
int flags)
{
int by_list;
struct vm_page *curpg, *pgs[MAXBSIZE / PAGE_SIZE], *pg;
struct lwp *l = curlwp ? curlwp : &lwp0;
off_t soff;
voff_t off;
int i, dirty, tdirty, nonexistent, any_dirty;
int pages_per_block = fs->lfs_bsize >> PAGE_SHIFT;
top:
by_list = (vp->v_uobj.uo_npages <=
((endoffset - startoffset) >> PAGE_SHIFT) *
UVM_PAGE_HASH_PENALTY);
any_dirty = 0;
if (by_list) {
curpg = TAILQ_FIRST(&vp->v_uobj.memq);
PHOLD(l);
} else {
soff = startoffset;
}
while (by_list || soff < MIN(blkeof, endoffset)) {
if (by_list) {
if (pages_per_block > 1) {
while (curpg && (curpg->offset & fs->lfs_bmask))
curpg = TAILQ_NEXT(curpg, listq);
}
if (curpg == NULL)
break;
soff = curpg->offset;
}
/*
* Mark all pages in extended range busy; find out if any
* of them are dirty.
*/
nonexistent = dirty = 0;
for (i = 0; i == 0 || i < pages_per_block; i++) {
if (by_list && pages_per_block <= 1) {
pgs[i] = pg = curpg;
} else {
off = soff + (i << PAGE_SHIFT);
pgs[i] = pg = uvm_pagelookup(&vp->v_uobj, off);
if (pg == NULL) {
++nonexistent;
continue;
}
}
KASSERT(pg != NULL);
while (pg->flags & PG_BUSY) {
pg->flags |= PG_WANTED;
UVM_UNLOCK_AND_WAIT(pg, &vp->v_interlock, 0,
"lfsput", 0);
simple_lock(&vp->v_interlock);
if (by_list)
goto top;
}
pg->flags |= PG_BUSY;
UVM_PAGE_OWN(pg, "lfs_putpages");
pmap_page_protect(pg, VM_PROT_NONE);
tdirty = (pmap_clear_modify(pg) ||
(pg->flags & PG_CLEAN) == 0);
dirty += tdirty;
}
if (pages_per_block > 0 && nonexistent >= pages_per_block) {
if (by_list) {
curpg = TAILQ_NEXT(curpg, listq);
} else {
soff += fs->lfs_bsize;
}
continue;
}
any_dirty += dirty;
KASSERT(nonexistent == 0);
/*
* If any are dirty make all dirty; unbusy them,
* but if we were asked to clean, take them off
* of their queue so the pagedaemon doesn't bother
* us about them while they're on their way to disk.
*
* (XXXUBC the page is now on *no* page queue.)
*/
for (i = 0; i == 0 || i < pages_per_block; i++) {
pg = pgs[i];
KASSERT(!((pg->flags & PG_CLEAN) && (pg->flags & PG_DELWRI)));
if (dirty) {
pg->flags &= ~PG_CLEAN;
if (flags & PGO_FREE) {
/* XXXUBC need better way to update */
lfs_subsys_pages += MIN(1, pages_per_block);
uvm_lock_pageq();
UVM_PAGE_OWN(pg, NULL);
uvm_pagedequeue(pg);
/* Suspended write flag */
pg->flags |= PG_DELWRI;
uvm_unlock_pageq();
}
} else {
UVM_PAGE_OWN(pg, NULL);
}
if (pg->flags & PG_WANTED)
wakeup(pg);
pg->flags &= ~(PG_WANTED|PG_BUSY);
/* UVM_PAGE_OWN(pg, NULL); */
}
if (by_list) {
curpg = TAILQ_NEXT(curpg, listq);
} else {
soff += MAX(PAGE_SIZE, fs->lfs_bsize);
}
}
if (by_list) {
PRELE(l);
}
/*
* If any pages were dirty, mark this inode as "pageout requested",
* and put it on the paging queue.
* XXXUBC locking (check locking on dchainhd too)
*/
#ifdef notyet
if (any_dirty) {
if (!(ip->i_flags & IN_PAGING)) {
ip->i_flags |= IN_PAGING;
TAILQ_INSERT_TAIL(&fs->lfs_pchainhd, ip, i_lfs_pchain);
}
}
#endif
return any_dirty;
}
/*
* lfs_putpages functions like genfs_putpages except that
*
* (1) It needs to bounds-check the incoming requests to ensure that
* they are block-aligned; if they are not, expand the range and
* do the right thing in case, e.g., the requested range is clean
* but the expanded range is dirty.
* (2) It needs to explicitly send blocks to be written when it is done.
* VOP_PUTPAGES is not ever called with the seglock held, so
* we simply take the seglock and let lfs_segunlock wait for us.
* XXX Actually we can be called with the seglock held, if we have
* XXX to flush a vnode while lfs_markv is in operation. As of this
* XXX writing we panic in this case.
*
* Assumptions:
*
* (1) The caller does not hold any pages in this vnode busy. If it does,
* there is a danger that when we expand the page range and busy the
* pages we will deadlock.
* (2) We are called with vp->v_interlock held; we must return with it
* released.
* (3) We don't absolutely have to free pages right away, provided that
* the request does not have PGO_SYNCIO. When the pagedaemon gives
* us a request with PGO_FREE, we take the pages out of the paging
* queue and wake up the writer, which will handle freeing them for us.
*
* We ensure that for any filesystem block, all pages for that
* block are either resident or not, even if those pages are higher
* than EOF; that means that we will be getting requests to free
* "unused" pages above EOF all the time, and should ignore them.
*/
int
lfs_putpages(void *v)
{
int error;
struct vop_putpages_args /* {
struct vnode *a_vp;
voff_t a_offlo;
voff_t a_offhi;
int a_flags;
} */ *ap = v;
struct vnode *vp;
struct inode *ip;
struct lfs *fs;
struct segment *sp;
off_t origoffset, startoffset, endoffset, origendoffset, blkeof;
off_t max_endoffset;
int pages_per_block;
int s, sync, dirty, pagedaemon;
UVMHIST_FUNC("lfs_putpages"); UVMHIST_CALLED(ubchist);
vp = ap->a_vp;
ip = VTOI(vp);
fs = ip->i_lfs;
sync = (ap->a_flags & PGO_SYNCIO);
pagedaemon = (curproc == uvm.pagedaemon_proc);
/* Putpages does nothing for metadata. */
if (vp == fs->lfs_ivnode || vp->v_type != VREG) {
simple_unlock(&vp->v_interlock);
return 0;
}
/*
* If there are no pages, don't do anything.
*/
if (vp->v_uobj.uo_npages == 0) {
s = splbio();
if (LIST_FIRST(&vp->v_dirtyblkhd) == NULL &&
(vp->v_flag & VONWORKLST)) {
vp->v_flag &= ~VONWORKLST;
LIST_REMOVE(vp, v_synclist);
}
splx(s);
simple_unlock(&vp->v_interlock);
return 0;
}
blkeof = blkroundup(fs, ip->i_ffs_size);
/*
* Ignore requests to free pages past EOF but in the same block
* as EOF, unless the request is synchronous. (XXX why sync?)
* XXXUBC Make these pages look "active" so the pagedaemon won't
* XXXUBC bother us with them again.
*/
if (!sync && ap->a_offlo >= ip->i_ffs_size && ap->a_offlo < blkeof) {
origoffset = ap->a_offlo;
ap->a_offlo = blkeof;
if (ap->a_offhi > 0 && ap->a_offhi <= ap->a_offlo) {
simple_unlock(&vp->v_interlock);
return 0;
}
}
/*
* Extend page range to start and end at block boundaries.
* (For the purposes of VOP_PUTPAGES, fragments don't exist.)
*/
pages_per_block = fs->lfs_bsize >> PAGE_SHIFT;
origoffset = ap->a_offlo;
origendoffset = ap->a_offhi;
startoffset = origoffset & ~(fs->lfs_bmask);
max_endoffset = (trunc_page(LLONG_MAX) >> fs->lfs_bshift)
<< fs->lfs_bshift;
if (origendoffset == 0 || ap->a_flags & PGO_ALLPAGES) {
endoffset = max_endoffset;
origendoffset = endoffset;
} else {
origendoffset = round_page(ap->a_offhi);
endoffset = round_page(blkroundup(fs, origendoffset));
}
KASSERT(startoffset > 0 || endoffset >= startoffset);
if (startoffset == endoffset) {
/* Nothing to do, why were we called? */
simple_unlock(&vp->v_interlock);
#ifdef DEBUG
printf("lfs_putpages: startoffset = endoffset = %" PRId64 "\n",
startoffset);
#endif
return 0;
}
ap->a_offlo = startoffset;
ap->a_offhi = endoffset;
if (!(ap->a_flags & PGO_CLEANIT))
return genfs_putpages(v);
/*
* Make sure that all pages in any given block are dirty, or
* none of them are. Find out if any of the pages we've been
* asked about are dirty. If none are dirty, send them on
* through genfs_putpages(), albeit with adjusted offsets.
* XXXUBC I am assuming here that they can't be dirtied in
* XXXUBC the meantime, but I bet that's wrong.
*/
dirty = check_dirty(fs, vp, startoffset, endoffset, blkeof, ap->a_flags);
if (!dirty)
return genfs_putpages(v);
/*
* Dirty and asked to clean.
*
* Pagedaemon can't actually write LFS pages; wake up
* the writer to take care of that. The writer will
* notice the pager inode queue and act on that.
*/
if (pagedaemon) {
++fs->lfs_pdflush;
wakeup(&lfs_writer_daemon);
return EWOULDBLOCK;
}
/*
* If this is a file created in a recent dirop, we can't flush its
* inode until the dirop is complete. Drain dirops, then flush the
* filesystem (taking care of any other pending dirops while we're
* at it).
*/
if ((ap->a_flags & (PGO_CLEANIT|PGO_LOCKED)) == PGO_CLEANIT &&
(vp->v_flag & VDIROP)) {
int locked;
/* printf("putpages to clean VDIROP, flushing\n"); */
while (fs->lfs_dirops > 0) {
++fs->lfs_diropwait;
tsleep(&fs->lfs_writer, PRIBIO+1, "ppdirop", 0);
--fs->lfs_diropwait;
}
++fs->lfs_writer;
locked = VOP_ISLOCKED(vp) && /* XXX */
vp->v_lock.lk_lockholder == curproc->p_pid;
if (locked)
VOP_UNLOCK(vp, 0);
simple_unlock(&vp->v_interlock);
lfs_flush_fs(fs, sync ? SEGM_SYNC : 0);
simple_lock(&vp->v_interlock);
if (locked)
VOP_LOCK(vp, LK_EXCLUSIVE);
if (--fs->lfs_writer == 0)
wakeup(&fs->lfs_dirops);
/* XXX the flush should have taken care of this one too! */
}
/*
* This is it. We are going to write some pages. From here on
* down it's all just mechanics.
*
* If there are more than one page per block, we don't want to get
* caught locking them backwards; so set PGO_BUSYFAIL to avoid
* deadlocks. Also, don't let genfs_putpages wait;
* lfs_segunlock will wait for us, if need be.
*/
ap->a_flags &= ~PGO_SYNCIO;
if (pages_per_block > 1)
ap->a_flags |= PGO_BUSYFAIL;
/*
* If we've already got the seglock, flush the node and return.
* The FIP has already been set up for us by lfs_writefile,
* and FIP cleanup and lfs_updatemeta will also be done there,
* unless genfs_putpages returns EDEADLK; then we must flush
* what we have, and correct FIP and segment header accounting.
*/
if (ap->a_flags & PGO_LOCKED) {
sp = fs->lfs_sp;
sp->vp = vp;
/*
* XXXUBC
* There is some danger here that we might run out of
* buffers if we flush too much at once. If the number
* of dirty buffers is too great, we should cut the range
* down and write in chunks.
*/
while ((error = genfs_putpages(v)) == EDEADLK) {
#ifdef DEBUG_LFS
printf("lfs_putpages: genfs_putpages returned EDEADLK"
" ino %d off %x (seg %d)\n",
ip->i_number, fs->lfs_offset,
dtosn(fs, fs->lfs_offset));
#endif
/* Write gathered pages */
lfs_updatemeta(sp);
(void) lfs_writeseg(fs, sp);
/* Reinitialize brand new FIP and add us to it */
sp->vp = vp;
sp->fip->fi_version = ip->i_ffs_gen;
sp->fip->fi_ino = ip->i_number;
/* Add us to the new segment summary. */
++((SEGSUM *)(sp->segsum))->ss_nfinfo;
sp->sum_bytes_left -=
sizeof(struct finfo) - sizeof(int32_t);
/* Give the write a chance to complete */
simple_unlock(&vp->v_interlock);
preempt(NULL);
simple_lock(&vp->v_interlock);
}
return error;
}
/*
* Take the seglock, because we are going to be writing pages.
*/
if ((error = lfs_seglock(fs, SEGM_PROT | (sync ? SEGM_SYNC : 0))) != 0)
return error;
/*
* VOP_PUTPAGES should not be called while holding the seglock.
* XXX fix lfs_markv, or do this properly.
*/
KASSERT(fs->lfs_seglock == 1);
/*
* We assume we're being called with sp->fip pointing at blank space.
* Account for a new FIP in the segment header, and set sp->vp.
* (This should duplicate the setup at the top of lfs_writefile().)
*/
sp = fs->lfs_sp;
if (sp->seg_bytes_left < fs->lfs_bsize ||
sp->sum_bytes_left < sizeof(struct finfo))
(void) lfs_writeseg(fs, fs->lfs_sp);
sp->sum_bytes_left -= sizeof(struct finfo) - sizeof(int32_t);
++((SEGSUM *)(sp->segsum))->ss_nfinfo;
sp->vp = vp;
if (vp->v_flag & VDIROP)
((SEGSUM *)(sp->segsum))->ss_flags |= (SS_DIROP|SS_CONT);
sp->fip->fi_nblocks = 0;
sp->fip->fi_ino = ip->i_number;
sp->fip->fi_version = ip->i_ffs_gen;
/*
* Loop through genfs_putpages until all pages are gathered.
*/
/*
* There is some danger here that we might run out of
* buffers if we flush too much at once. If the number
* of dirty buffers is too great, then, cut the range down
* and write in chunks.
*
* XXXUBC this assumes a uniform dirtying of the pages
* XXXUBC across the address space
* XXXXXX do this
*/
while ((error = genfs_putpages(v)) == EDEADLK) {
#ifdef DEBUG_LFS
printf("lfs_putpages: genfs_putpages returned EDEADLK [2]"
" ino %d off %x (seg %d)\n",
ip->i_number, fs->lfs_offset,
dtosn(fs, fs->lfs_offset));
#endif
/* Write gathered pages */
lfs_updatemeta(sp);
(void) lfs_writeseg(fs, sp);
/*
* Reinitialize brand new FIP and add us to it.
* (This should duplicate the fixup in lfs_gatherpages().)
*/
sp->vp = vp;
sp->fip->fi_version = ip->i_ffs_gen;
sp->fip->fi_ino = ip->i_number;
/* Add us to the new segment summary. */
++((SEGSUM *)(sp->segsum))->ss_nfinfo;
sp->sum_bytes_left -=
sizeof(struct finfo) - sizeof(int32_t);
/* Give the write a chance to complete */
simple_unlock(&vp->v_interlock);
preempt(NULL);
simple_lock(&vp->v_interlock);
}
/*
* Blocks are now gathered into a segment waiting to be written.
* All that's left to do is update metadata, and write them.
*/
lfs_updatemeta(fs->lfs_sp);
fs->lfs_sp->vp = NULL;
lfs_writeseg(fs, fs->lfs_sp);
/*
* Clean up FIP.
* (This should duplicate cleanup at the end of lfs_writefile().)
*/
if (sp->fip->fi_nblocks != 0) {
sp->fip = (FINFO*)((caddr_t)sp->fip + sizeof(struct finfo) +
sizeof(int32_t) * (sp->fip->fi_nblocks - 1));
sp->start_lbp = &sp->fip->fi_blocks[0];
} else {
sp->sum_bytes_left += sizeof(FINFO) - sizeof(int32_t);
--((SEGSUM *)(sp->segsum))->ss_nfinfo;
}
/*
* XXX - with the malloc/copy writeseg, the pages are freed by now
* even if we don't wait (e.g. if we hold a nested lock). This
* will not be true if we stop using malloc/copy.
*/
KASSERT(fs->lfs_sp->seg_flags & SEGM_PROT);
lfs_segunlock(fs);
/*
* Wait for v_numoutput to drop to zero. The seglock should
* take care of this, but there is a slight possibility that
* aiodoned might not have got around to our buffers yet.
*/
if (sync) {
int s;
s = splbio();
simple_lock(&global_v_numoutput_slock);
while(vp->v_numoutput > 0) {
#ifdef DEBUG
printf("ino %d sleeping on num %d\n",
ip->i_number, vp->v_numoutput);
#endif
vp->v_flag |= VBWAIT;
simple_unlock(&global_v_numoutput_slock);
tsleep(&vp->v_numoutput, PRIBIO + 1, "lfs_vn", 0);
simple_lock(&global_v_numoutput_slock);
}
simple_unlock(&global_v_numoutput_slock);
splx(s);
}
return error;
}
/*
* Find out whether the vnode has any blocks or pages waiting to be written.
* We used to just check LIST_EMPTY(&vp->v_dirtyblkhd), but there is not
* presently as simple a mechanism for the page cache.
*/
int
lfs_checkifempty(struct vnode *vp)
{
struct vm_page *pg;
struct buf *bp;
int r, s;
if (vp->v_type != VREG || VTOI(vp)->i_number == LFS_IFILE_INUM)
return LIST_EMPTY(&vp->v_dirtyblkhd);
/*
* For vnodes with pages it is a little more complex.
* Pages that have been written (i.e. are "clean" for our purposes)
* might be in seemingly dirty buffers, so we have to troll
* looking for indirect block buffers as well as pages.
*/
simple_lock(&vp->v_interlock);
s = splbio();
for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp;
bp = LIST_NEXT(bp, b_vnbufs)) {
if (bp->b_lblkno < 0) {
splx(s);
return 0;
}
}
splx(s);
/*
* Run through the page list to find dirty pages.
* Right now I just walk the memq.
*/
pg = TAILQ_FIRST(&vp->v_uobj.memq);
r = 1;
while(pg) {
if ((pg->flags & PG_CLEAN) == 0 || pmap_is_modified(pg)) {
r = 0;
break;
}
pg = TAILQ_NEXT(pg, listq);
}
#if 0
if (r != !(vp->v_flag & VONWORKLST)) {
printf("nope, VONWORKLST isn't good enough!\n");
}
#endif
simple_unlock(&vp->v_interlock);
return r;
}
/*
* Return the last logical file offset that should be written for this file
* if we're doing a write that ends at "size". If writing, we need to know
* about sizes on disk, i.e. fragments if there are any; if reading, we need
* to know about entire blocks.
*/
void
lfs_gop_size(struct vnode *vp, off_t size, off_t *eobp, int flags)
{
struct inode *ip = VTOI(vp);
struct lfs *fs = ip->i_lfs;
daddr_t olbn, nlbn;
KASSERT(flags & (GOP_SIZE_READ | GOP_SIZE_WRITE));
KASSERT((flags & (GOP_SIZE_READ | GOP_SIZE_WRITE))
!= (GOP_SIZE_READ | GOP_SIZE_WRITE));
olbn = lblkno(fs, ip->i_ffs_size);
nlbn = lblkno(fs, size);
if ((flags & GOP_SIZE_WRITE) && nlbn < NDADDR && olbn <= nlbn) {
*eobp = fragroundup(fs, size);
} else {
*eobp = blkroundup(fs, size);
}
}
#ifdef DEBUG
void lfs_dump_vop(void *);
void
lfs_dump_vop(void *v)
{
struct vop_putpages_args /* {
struct vnode *a_vp;
voff_t a_offlo;
voff_t a_offhi;
int a_flags;
} */ *ap = v;
vfs_vnode_print(ap->a_vp, 0, printf);
lfs_dump_dinode(&VTOI(ap->a_vp)->i_din.ffs_din);
}
#endif
int
lfs_mmap(void *v)
{
struct vop_mmap_args /* {
const struct vnodeop_desc *a_desc;
struct vnode *a_vp;
int a_fflags;
struct ucred *a_cred;
struct proc *a_p;
} */ *ap = v;
if (VTOI(ap->a_vp)->i_number == LFS_IFILE_INUM)
return EOPNOTSUPP;
return ufs_mmap(v);
}
#endif /* LFS_UBC */

View File

@ -1,4 +1,4 @@
/* $NetBSD: inode.h,v 1.32 2003/01/24 21:55:29 fvdl Exp $ */
/* $NetBSD: inode.h,v 1.33 2003/02/17 23:48:23 perseant Exp $ */
/*
* Copyright (c) 1982, 1989, 1993
@ -58,11 +58,7 @@ struct ext2fs_inode_ext {
daddr_t ext2fs_last_blk; /* last block allocated on disk */
};
struct lfs_inode_ext {
off_t lfs_osize; /* size of file on disk */
u_int32_t lfs_effnblocks; /* number of blocks when i/o completes */
size_t lfs_fragsize[NDADDR]; /* size of on-disk direct blocks */
};
struct lfs_inode_ext;
/*
* The inode is used to describe each active (or recently active) file in the
@ -111,13 +107,10 @@ struct inode {
union {
/* Other extensions could go here... */
struct ext2fs_inode_ext e2fs;
struct lfs_inode_ext lfs;
struct lfs_inode_ext *lfs;
} inode_ext;
#define i_e2fs_last_lblk inode_ext.e2fs.ext2fs_last_lblk
#define i_e2fs_last_blk inode_ext.e2fs.ext2fs_last_blk
#define i_lfs_effnblks inode_ext.lfs.lfs_effnblocks
#define i_lfs_fragsize inode_ext.lfs.lfs_fragsize
#define i_lfs_osize inode_ext.lfs.lfs_osize
/*
* The on-disk dinode itself.
*/
@ -179,6 +172,7 @@ struct inode {
#define IN_CLEANING 0x0100 /* LFS: file is being cleaned */
#define IN_ADIROP 0x0200 /* LFS: dirop in progress */
#define IN_SPACECOUNTED 0x0400 /* Blocks to be freed in free count. */
#define IN_PAGING 0x1000 /* LFS: file is on paging queue */
#if defined(_KERNEL)
/*

View File

@ -1,4 +1,4 @@
/* $NetBSD: ufs_extern.h,v 1.30 2003/01/24 21:55:30 fvdl Exp $ */
/* $NetBSD: ufs_extern.h,v 1.31 2003/02/17 23:48:23 perseant Exp $ */
/*-
* Copyright (c) 1991, 1993, 1994
@ -167,6 +167,7 @@ void ufs_vinit __P((struct mount *, int (**) __P((void *)),
int (**) __P((void *)), struct vnode **));
int ufs_makeinode __P((int, struct vnode *, struct vnode **,
struct componentname *));
int ufs_gop_alloc __P((struct vnode *, off_t, off_t, int, struct ucred *));
/*
* Soft dependency function prototypes.

View File

@ -1,4 +1,4 @@
/* $NetBSD: ufs_inode.c,v 1.33 2002/01/26 08:32:05 chs Exp $ */
/* $NetBSD: ufs_inode.c,v 1.34 2003/02/17 23:48:23 perseant Exp $ */
/*
* Copyright (c) 1991, 1993
@ -41,7 +41,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: ufs_inode.c,v 1.33 2002/01/26 08:32:05 chs Exp $");
__KERNEL_RCSID(0, "$NetBSD: ufs_inode.c,v 1.34 2003/02/17 23:48:23 perseant Exp $");
#include "opt_quota.h"
@ -192,10 +192,10 @@ ufs_balloc_range(vp, off, len, cred, flags)
vp, off, len, vp->v_size);
oldeof = vp->v_size;
GOP_SIZE(vp, oldeof, &oldeob);
GOP_SIZE(vp, oldeof, &oldeob, GOP_SIZE_WRITE);
neweof = MAX(vp->v_size, off + len);
GOP_SIZE(vp, neweof, &neweob);
GOP_SIZE(vp, neweof, &neweob, GOP_SIZE_WRITE);
error = 0;
uobj = &vp->v_uobj;

View File

@ -1,4 +1,4 @@
/* $NetBSD: ufs_readwrite.c,v 1.47 2003/01/24 21:55:30 fvdl Exp $ */
/* $NetBSD: ufs_readwrite.c,v 1.48 2003/02/17 23:48:23 perseant Exp $ */
/*-
* Copyright (c) 1993
@ -36,7 +36,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(1, "$NetBSD: ufs_readwrite.c,v 1.47 2003/01/24 21:55:30 fvdl Exp $");
__KERNEL_RCSID(1, "$NetBSD: ufs_readwrite.c,v 1.48 2003/02/17 23:48:23 perseant Exp $");
#ifdef LFS_READWRITE
#define BLKSIZE(a, b, c) blksize(a, b, c)
@ -110,9 +110,13 @@ READ(void *v)
goto out;
}
#ifndef LFS_READWRITE
#ifdef LFS_READWRITE
# ifdef LFS_UBC
usepc = (vp->v_type == VREG && ip->i_number != LFS_IFILE_INUM);
# endif
#else /* !LFS_READWRITE */
usepc = vp->v_type == VREG;
#endif
#endif /* !LFS_READWRITE */
if (usepc) {
while (uio->uio_resid > 0) {
bytelen = MIN(ip->i_ffs_size - uio->uio_offset,
@ -278,9 +282,14 @@ WRITE(void *v)
bsize = fs->fs_bsize;
error = 0;
#ifndef LFS_READWRITE
#ifdef LFS_READWRITE
# ifdef LFS_UBC
async = TRUE;
usepc = vp->v_type == VREG;
#endif
# endif
#else /* !LFS_READWRITE */
usepc = vp->v_type == VREG;
#endif /* !LFS_READWRITE */
if (!usepc) {
goto bcache;
}

View File

@ -1,4 +1,4 @@
/* $NetBSD: ufs_vnops.c,v 1.89 2002/12/31 15:00:18 yamt Exp $ */
/* $NetBSD: ufs_vnops.c,v 1.90 2003/02/17 23:48:23 perseant Exp $ */
/*
* Copyright (c) 1982, 1986, 1989, 1993, 1995
@ -41,7 +41,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: ufs_vnops.c,v 1.89 2002/12/31 15:00:18 yamt Exp $");
__KERNEL_RCSID(0, "$NetBSD: ufs_vnops.c,v 1.90 2003/02/17 23:48:23 perseant Exp $");
#include "opt_quota.h"
#include "fs_lfs.h"
@ -73,6 +73,8 @@ __KERNEL_RCSID(0, "$NetBSD: ufs_vnops.c,v 1.89 2002/12/31 15:00:18 yamt Exp $");
#include <ufs/ext2fs/ext2fs_extern.h>
#include <ufs/lfs/lfs_extern.h>
#include <uvm/uvm.h>
static int ufs_chmod(struct vnode *, int, struct ucred *, struct proc *);
static int ufs_chown(struct vnode *, uid_t, gid_t, struct ucred *,
struct proc *);
@ -2071,3 +2073,49 @@ ufs_makeinode(int mode, struct vnode *dvp, struct vnode **vpp,
vput(tvp);
return (error);
}
/*
* Allocate len bytes at offset off.
*/
int
ufs_gop_alloc(struct vnode *vp, off_t off, off_t len, int flags,
struct ucred *cred)
{
struct inode *ip = VTOI(vp);
int error, delta, bshift, bsize;
UVMHIST_FUNC("ufs_gop_alloc"); UVMHIST_CALLED(ubchist);
error = 0;
bshift = vp->v_mount->mnt_fs_bshift;
bsize = 1 << bshift;
delta = off & (bsize - 1);
off -= delta;
len += delta;
while (len > 0) {
bsize = MIN(bsize, len);
error = VOP_BALLOC(vp, off, bsize, cred, flags, NULL);
if (error) {
goto out;
}
/*
* increase file size now, VOP_BALLOC() requires that
* EOF be up-to-date before each call.
*/
if (ip->i_ffs_size < off + bsize) {
UVMHIST_LOG(ubchist, "vp %p old 0x%x new 0x%x",
vp, ip->i_ffs_size, off + bsize, 0);
ip->i_ffs_size = off + bsize;
}
off += bsize;
len -= bsize;
}
out:
return error;
}

View File

@ -1,4 +1,4 @@
/* $NetBSD: uvm_page.c,v 1.83 2003/02/01 06:23:55 thorpej Exp $ */
/* $NetBSD: uvm_page.c,v 1.84 2003/02/17 23:48:24 perseant Exp $ */
/*
* Copyright (c) 1997 Charles D. Cranor and Washington University.
@ -71,7 +71,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: uvm_page.c,v 1.83 2003/02/01 06:23:55 thorpej Exp $");
__KERNEL_RCSID(0, "$NetBSD: uvm_page.c,v 1.84 2003/02/17 23:48:24 perseant Exp $");
#include "opt_uvmhist.h"
@ -1254,7 +1254,7 @@ uvm_pagefree(pg)
if (pg->flags & PG_WANTED) {
wakeup(pg);
}
pg->flags &= ~(PG_WANTED|PG_BUSY|PG_RELEASED);
pg->flags &= ~(PG_WANTED|PG_BUSY|PG_RELEASED|PG_PAGER1);
#ifdef UVM_PAGE_TRKOWN
pg->owner_tag = NULL;
#endif

View File

@ -1,4 +1,4 @@
/* $NetBSD: uvm_pager.h,v 1.25 2002/03/25 02:08:10 chs Exp $ */
/* $NetBSD: uvm_pager.h,v 1.26 2003/02/17 23:48:24 perseant Exp $ */
/*
*
@ -124,6 +124,7 @@ struct uvm_pagerops {
#define PGO_ALLPAGES 0x010 /* flush whole object/get all pages */
#define PGO_LOCKED 0x040 /* fault data structures are locked [get] */
#define PGO_BUSYFAIL 0x080 /* fail if a page is busy [put] */
#define PGO_OVERWRITE 0x200 /* pages will be overwritten before unlocked */
#define PGO_PASTEOF 0x400 /* allow allocation of pages past EOF */

View File

@ -1,4 +1,4 @@
/* $NetBSD: dumplfs.c,v 1.21 2003/01/28 07:44:54 mrg Exp $ */
/* $NetBSD: dumplfs.c,v 1.22 2003/02/17 23:48:25 perseant Exp $ */
/*-
* Copyright (c) 1991, 1993
@ -45,7 +45,7 @@ __COPYRIGHT(
#if 0
static char sccsid[] = "@(#)dumplfs.c 8.5 (Berkeley) 5/24/95";
#else
__RCSID("$NetBSD: dumplfs.c,v 1.21 2003/01/28 07:44:54 mrg Exp $");
__RCSID("$NetBSD: dumplfs.c,v 1.22 2003/02/17 23:48:25 perseant Exp $");
#endif
#endif /* not lint */
@ -678,7 +678,7 @@ dump_super(struct lfs *lfsp)
(void)printf(" Checkpoint Info\n");
(void)printf(" %s%-10d %s0x%-8x %s%-10d\n",
"free ", lfsp->lfs_free,
"freehd ", lfsp->lfs_freehd,
"idaddr ", lfsp->lfs_idaddr,
"ifile ", lfsp->lfs_ifile);
(void)printf(" %s%-10d %s%-10d %s%-10d\n",

View File

@ -1,4 +1,4 @@
/* $NetBSD: quotaon.c,v 1.17 2002/07/20 08:40:20 grant Exp $ */
/* $NetBSD: quotaon.c,v 1.18 2003/02/17 23:48:25 perseant Exp $ */
/*
* Copyright (c) 1980, 1990, 1993
@ -46,7 +46,7 @@ __COPYRIGHT("@(#) Copyright (c) 1980, 1990, 1993\n\
#if 0
static char sccsid[] = "@(#)quotaon.c 8.1 (Berkeley) 6/6/93";
#else
__RCSID("$NetBSD: quotaon.c,v 1.17 2002/07/20 08:40:20 grant Exp $");
__RCSID("$NetBSD: quotaon.c,v 1.18 2003/02/17 23:48:25 perseant Exp $");
#endif
#endif /* not lint */
@ -128,7 +128,8 @@ main(argc, argv)
}
setfsent();
while ((fs = getfsent()) != NULL) {
if (strcmp(fs->fs_vfstype, "ffs") ||
if ((strcmp(fs->fs_vfstype, "ffs") &&
strcmp(fs->fs_vfstype, "lfs")) ||
strcmp(fs->fs_type, FSTAB_RW))
continue;
if (aflag) {