Merge the simonb-wapbl branch. From the original branch commit:

Add Wasabi System's WAPBL (Write Ahead Physical Block Logging)
   journaling code.  Originally written by Darrin B. Jewell while
   at Wasabi and updated to -current by Antti Kantee, Andy Doran,
   Greg Oster and Simon Burge.

OK'd by core@, releng@.
This commit is contained in:
simonb 2008-07-31 05:38:04 +00:00
parent d1dfd4fec7
commit 36d65f1138
53 changed files with 6837 additions and 296 deletions

View File

@ -1,4 +1,4 @@
# $NetBSD: mi,v 1.1170 2008/07/29 13:17:40 pooka Exp $ # $NetBSD: mi,v 1.1171 2008/07/31 05:38:04 simonb Exp $
# #
# Note: don't delete entries from here - mark them as "obsolete" instead. # Note: don't delete entries from here - mark them as "obsolete" instead.
# #
@ -2069,6 +2069,7 @@
./usr/include/sys/vnode_if.h comp-c-include ./usr/include/sys/vnode_if.h comp-c-include
./usr/include/sys/vsio.h comp-obsolete obsolete ./usr/include/sys/vsio.h comp-obsolete obsolete
./usr/include/sys/wait.h comp-c-include ./usr/include/sys/wait.h comp-c-include
./usr/include/sys/wapbl.h comp-c-include
./usr/include/sys/wdog.h comp-c-include ./usr/include/sys/wdog.h comp-c-include
./usr/include/sysexits.h comp-c-include ./usr/include/sysexits.h comp-c-include
./usr/include/syslog.h comp-c-include ./usr/include/syslog.h comp-c-include
@ -2100,6 +2101,7 @@
./usr/include/ufs/ufs/quota.h comp-c-include ./usr/include/ufs/ufs/quota.h comp-c-include
./usr/include/ufs/ufs/ufs_bswap.h comp-c-include ./usr/include/ufs/ufs/ufs_bswap.h comp-c-include
./usr/include/ufs/ufs/ufs_extern.h comp-c-include ./usr/include/ufs/ufs/ufs_extern.h comp-c-include
./usr/include/ufs/ufs/ufs_wapbl.h comp-c-include
./usr/include/ufs/ufs/ufsmount.h comp-c-include ./usr/include/ufs/ufs/ufsmount.h comp-c-include
./usr/include/ulimit.h comp-c-include ./usr/include/ulimit.h comp-c-include
./usr/include/unctrl.h comp-c-include ./usr/include/unctrl.h comp-c-include

View File

@ -1,4 +1,4 @@
/* $NetBSD: mntopts.h,v 1.10 2006/10/31 08:12:46 mjf Exp $ */ /* $NetBSD: mntopts.h,v 1.11 2008/07/31 05:38:04 simonb Exp $ */
/*- /*-
* Copyright (c) 1994 * Copyright (c) 1994
@ -55,6 +55,7 @@ struct mntopt {
#define MOPT_NOATIME { "atime", 1, MNT_NOATIME, 0 } #define MOPT_NOATIME { "atime", 1, MNT_NOATIME, 0 }
#define MOPT_SYMPERM { "symperm", 0, MNT_SYMPERM, 0 } #define MOPT_SYMPERM { "symperm", 0, MNT_SYMPERM, 0 }
#define MOPT_SOFTDEP { "softdep", 0, MNT_SOFTDEP, 0 } #define MOPT_SOFTDEP { "softdep", 0, MNT_SOFTDEP, 0 }
#define MOPT_LOG { "log", 0, MNT_LOG, 0 }
#define MOPT_IGNORE { "hidden", 0, MNT_IGNORE, 0 } #define MOPT_IGNORE { "hidden", 0, MNT_IGNORE, 0 }
/* Control flags. */ /* Control flags. */

View File

@ -1,4 +1,4 @@
# $NetBSD: Makefile,v 1.35 2008/05/04 15:37:19 tsutsui Exp $ # $NetBSD: Makefile,v 1.36 2008/07/31 05:38:04 simonb Exp $
# @(#)Makefile 8.2 (Berkeley) 4/27/95 # @(#)Makefile 8.2 (Berkeley) 4/27/95
.include <bsd.own.mk> .include <bsd.own.mk>
@ -19,6 +19,10 @@ SRCS+= progress.c
.PATH: ${NETBSDSRCDIR}/sys/ufs/ffs ${FSCK} .PATH: ${NETBSDSRCDIR}/sys/ufs/ffs ${FSCK}
SRCS+= vfs_wapbl.c wapbl.c
.PATH: ${NETBSDSRCDIR}/sys/kern
CPPFLAGS+=-DWAPBL_DEBUG_PRINT=0
LDADD+=-lutil LDADD+=-lutil
DPADD+=${LIBUTIL} DPADD+=${LIBUTIL}

View File

@ -1,4 +1,4 @@
/* $NetBSD: extern.h,v 1.22 2005/06/27 01:25:35 christos Exp $ */ /* $NetBSD: extern.h,v 1.23 2008/07/31 05:38:04 simonb Exp $ */
/* /*
* Copyright (c) 1994 James A. Jegers * Copyright (c) 1994 James A. Jegers
@ -27,7 +27,7 @@
void adjust(struct inodesc *, int); void adjust(struct inodesc *, int);
daddr_t allocblk(long); daddr_t allocblk(long);
ino_t allocdir(ino_t, ino_t, int); ino_t allocdir(ino_t, ino_t, int);
ino_t allocino(ino_t request, int type); ino_t allocino(ino_t, int);
void blkerror(ino_t, const char *, daddr_t); void blkerror(ino_t, const char *, daddr_t);
int bread(int, char *, daddr_t, long); int bread(int, char *, daddr_t, long);
void bufinit(void); void bufinit(void);
@ -82,7 +82,12 @@ void setinodebuf(ino_t);
int setup(const char *); int setup(const char *);
void voidquit(int); void voidquit(int);
void swap_cg(struct cg *, struct cg *); void replay_wapbl(void);
void copyback_cg(struct bufarea *); void cleanup_wapbl(void);
void sb_oldfscompat_write(struct fs *, struct fs *); int read_wapbl(char *, long, daddr_t);
void sb_oldfscompat_read(struct fs *, struct fs **); int is_journal_inode(ino_t);
void swap_cg(struct cg *, struct cg *);
void copyback_cg(struct bufarea *);
void sb_oldfscompat_write(struct fs *, struct fs *);
void sb_oldfscompat_read(struct fs *, struct fs **);

View File

@ -1,4 +1,4 @@
.\" $NetBSD: fsck_ffs.8,v 1.40 2005/01/19 16:41:04 wiz Exp $ .\" $NetBSD: fsck_ffs.8,v 1.41 2008/07/31 05:38:04 simonb Exp $
.\" .\"
.\" Copyright (c) 1980, 1989, 1991, 1993 .\" Copyright (c) 1980, 1989, 1991, 1993
.\" The Regents of the University of California. All rights reserved. .\" The Regents of the University of California. All rights reserved.
@ -198,7 +198,7 @@ possible without user interaction.
Conversion in preen mode is best used when all the file systems Conversion in preen mode is best used when all the file systems
are being converted at once. are being converted at once.
The format of a file system can be determined from the The format of a file system can be determined from the
second line of output from third line of output from
.Xr dumpfs 8 . .Xr dumpfs 8 .
.It Fl d .It Fl d
Print debugging output. Print debugging output.

View File

@ -1,4 +1,4 @@
/* $NetBSD: pass4.c,v 1.24 2008/02/23 21:41:48 christos Exp $ */ /* $NetBSD: pass4.c,v 1.25 2008/07/31 05:38:04 simonb Exp $ */
/* /*
* Copyright (c) 1980, 1986, 1993 * Copyright (c) 1980, 1986, 1993
@ -34,7 +34,7 @@
#if 0 #if 0
static char sccsid[] = "@(#)pass4.c 8.4 (Berkeley) 4/28/95"; static char sccsid[] = "@(#)pass4.c 8.4 (Berkeley) 4/28/95";
#else #else
__RCSID("$NetBSD: pass4.c,v 1.24 2008/02/23 21:41:48 christos Exp $"); __RCSID("$NetBSD: pass4.c,v 1.25 2008/07/31 05:38:04 simonb Exp $");
#endif #endif
#endif /* not lint */ #endif /* not lint */
@ -89,7 +89,14 @@ pass4(void)
case DFOUND: case DFOUND:
n = info->ino_linkcnt; n = info->ino_linkcnt;
if (n) { if (n) {
adjust(&idesc, (short)n); if (is_journal_inode(inumber)) {
if (debug)
printf(
"skipping unreferenced journal inode %" PRId64 "\n", inumber);
break;
} else {
adjust(&idesc, (short)n);
}
break; break;
} }
for (zlnp = zlnhead; zlnp; zlnp = zlnp->next) for (zlnp = zlnhead; zlnp; zlnp = zlnp->next)

View File

@ -1,4 +1,4 @@
/* $NetBSD: setup.c,v 1.82 2008/02/23 21:41:48 christos Exp $ */ /* $NetBSD: setup.c,v 1.83 2008/07/31 05:38:04 simonb Exp $ */
/* /*
* Copyright (c) 1980, 1986, 1993 * Copyright (c) 1980, 1986, 1993
@ -34,7 +34,7 @@
#if 0 #if 0
static char sccsid[] = "@(#)setup.c 8.10 (Berkeley) 5/9/95"; static char sccsid[] = "@(#)setup.c 8.10 (Berkeley) 5/9/95";
#else #else
__RCSID("$NetBSD: setup.c,v 1.82 2008/02/23 21:41:48 christos Exp $"); __RCSID("$NetBSD: setup.c,v 1.83 2008/07/31 05:38:04 simonb Exp $");
#endif #endif
#endif /* not lint */ #endif /* not lint */
@ -159,6 +159,25 @@ setup(const char *dev)
doskipclean = 0; doskipclean = 0;
pwarn("USING ALTERNATE SUPERBLOCK AT %d\n", bflag); pwarn("USING ALTERNATE SUPERBLOCK AT %d\n", bflag);
} }
if (sblock->fs_flags & FS_DOWAPBL) {
if (preen) {
if (!quiet)
pwarn("file system is journaled; not checking\n");
return (-1);
}
if (!quiet)
pwarn("** File system is journaled; replaying journal\n");
replay_wapbl();
doskipclean = 0;
sblock->fs_flags &= ~FS_DOWAPBL;
sbdirty();
/* Although we may have updated the superblock from the
* journal, we are still going to do a full check, so we
* don't bother to re-read the superblock from the journal.
* XXX, instead we could re-read the superblock and then not
* force doskipclean = 0
*/
}
if (debug) if (debug)
printf("clean = %d\n", sblock->fs_clean); printf("clean = %d\n", sblock->fs_clean);
if (doswap) if (doswap)
@ -218,6 +237,13 @@ setup(const char *dev)
/* /*
* Check and potentially fix certain fields in the super block. * Check and potentially fix certain fields in the super block.
*/ */
if (sblock->fs_flags & ~(FS_KNOWN_FLAGS)) {
pfatal("UNKNOWN FLAGS=0x%08x IN SUPERBLOCK", sblock->fs_flags);
if (reply("CLEAR") == 1) {
sblock->fs_flags &= FS_KNOWN_FLAGS;
sbdirty();
}
}
if (sblock->fs_optim != FS_OPTTIME && sblock->fs_optim != FS_OPTSPACE) { if (sblock->fs_optim != FS_OPTTIME && sblock->fs_optim != FS_OPTSPACE) {
pfatal("UNDEFINED OPTIMIZATION IN SUPERBLOCK"); pfatal("UNDEFINED OPTIMIZATION IN SUPERBLOCK");
if (reply("SET TO DEFAULT") == 1) { if (reply("SET TO DEFAULT") == 1) {

View File

@ -1,4 +1,4 @@
/* $NetBSD: utilities.c,v 1.55 2008/02/23 21:41:48 christos Exp $ */ /* $NetBSD: utilities.c,v 1.56 2008/07/31 05:38:04 simonb Exp $ */
/* /*
* Copyright (c) 1980, 1986, 1993 * Copyright (c) 1980, 1986, 1993
@ -34,7 +34,7 @@
#if 0 #if 0
static char sccsid[] = "@(#)utilities.c 8.6 (Berkeley) 5/19/95"; static char sccsid[] = "@(#)utilities.c 8.6 (Berkeley) 5/19/95";
#else #else
__RCSID("$NetBSD: utilities.c,v 1.55 2008/02/23 21:41:48 christos Exp $"); __RCSID("$NetBSD: utilities.c,v 1.56 2008/07/31 05:38:04 simonb Exp $");
#endif #endif
#endif /* not lint */ #endif /* not lint */
@ -322,6 +322,7 @@ ckfini(void)
if (debug) if (debug)
printf("cache missed %ld of %ld (%d%%)\n", diskreads, printf("cache missed %ld of %ld (%d%%)\n", diskreads,
totalreads, (int)(diskreads * 100 / totalreads)); totalreads, (int)(diskreads * 100 / totalreads));
cleanup_wapbl();
(void)close(fsreadfd); (void)close(fsreadfd);
(void)close(fswritefd); (void)close(fswritefd);
} }
@ -335,7 +336,8 @@ bread(int fd, char *buf, daddr_t blk, long size)
offset = blk; offset = blk;
offset *= dev_bsize; offset *= dev_bsize;
if (pread(fd, buf, (int)size, offset) == size) if ((pread(fd, buf, (int)size, offset) == size) &&
read_wapbl(buf, size, blk) == 0)
return (0); return (0);
rwerror("READ", blk); rwerror("READ", blk);
errs = 0; errs = 0;

202
sbin/fsck_ffs/wapbl.c Normal file
View File

@ -0,0 +1,202 @@
/* $NetBSD: wapbl.c,v 1.2 2008/07/31 05:38:04 simonb Exp $ */
/*-
* Copyright (c) 2005,2008 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
* by Wasabi Systems, Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/* This file contains fsck support for wapbl
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: wapbl.c,v 1.2 2008/07/31 05:38:04 simonb Exp $");
#include <sys/stat.h>
#include <sys/time.h>
#include <sys/uio.h>
#include <assert.h>
#include <errno.h>
#include <inttypes.h>
#include <stdio.h>
#include <stdbool.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <ufs/ufs/dinode.h>
#include <ufs/ufs/dir.h>
#include <ufs/ufs/ufs_bswap.h>
#include <ufs/ufs/ufs_wapbl.h>
#include <ufs/ffs/fs.h>
#include <ufs/ffs/ffs_extern.h>
#include <sys/wapbl.h>
#include "fsck.h"
#include "fsutil.h"
#include "extern.h"
#include "exitvalues.h"
int
wapbl_write(void *data, size_t len, struct vnode *devvp, daddr_t pbn)
{
WAPBL_PRINTF(WAPBL_PRINT_IO,
("wapbl_write: %zd bytes at block %"PRId64" on fd 0x%x\n",
len, pbn, fswritefd));
bwrite(fswritefd, data, pbn, len);
return 0;
}
int
wapbl_read(void *data, size_t len, struct vnode *devvp, daddr_t pbn)
{
WAPBL_PRINTF(WAPBL_PRINT_IO,
("wapbl_read: %zd bytes at block %"PRId64" on fd 0x%x\n",
len, pbn, fsreadfd));
bread(fsreadfd, data, pbn, len);
return 0;
}
struct wapbl_replay *wapbl_replay;
void
replay_wapbl(void)
{
uint64_t addr, count, blksize;
int error;
if (debug)
wapbl_debug_print = WAPBL_PRINT_ERROR | WAPBL_PRINT_REPLAY;
if (debug > 1)
wapbl_debug_print |= WAPBL_PRINT_IO;
if (sblock->fs_journal_version != UFS_WAPBL_VERSION) {
pfatal("INVALID JOURNAL VERSION %d",
sblock->fs_journal_version);
if (reply("CONTINUE") == 0) {
exit(FSCK_EXIT_CHECK_FAILED);
}
return;
}
switch (sblock->fs_journal_location) {
case UFS_WAPBL_JOURNALLOC_NONE:
pfatal("INVALID JOURNAL LOCATION 'NONE'");
if (reply("CONTINUE") == 0) {
exit(FSCK_EXIT_CHECK_FAILED);
}
return;
case UFS_WAPBL_JOURNALLOC_END_PARTITION:
addr = sblock->fs_journallocs[UFS_WAPBL_EPART_ADDR];
count = sblock->fs_journallocs[UFS_WAPBL_EPART_COUNT];
blksize = sblock->fs_journallocs[UFS_WAPBL_EPART_BLKSZ];
break;
case UFS_WAPBL_JOURNALLOC_IN_FILESYSTEM:
addr = sblock->fs_journallocs[UFS_WAPBL_INFS_ADDR];
count = sblock->fs_journallocs[UFS_WAPBL_INFS_COUNT];
blksize = sblock->fs_journallocs[UFS_WAPBL_INFS_BLKSZ];
break;
default:
pfatal("INVALID JOURNAL LOCATION %d",
sblock->fs_journal_location);
if (reply("CONTINUE") == 0) {
exit(FSCK_EXIT_CHECK_FAILED);
}
return;
}
error = wapbl_replay_start(&wapbl_replay, 0, addr, count, blksize);
if (error) {
pfatal("UNABLE TO READ JOURNAL FOR REPLAY");
if (reply("CONTINUE") == 0) {
exit(FSCK_EXIT_CHECK_FAILED);
}
return;
}
if (!nflag) {
error = wapbl_replay_write(wapbl_replay, 0);
if (error) {
pfatal("UNABLE TO REPLAY JOURNAL BLOCKS");
if (reply("CONTINUE") == 0) {
exit(FSCK_EXIT_CHECK_FAILED);
}
} else {
wapbl_replay_stop(wapbl_replay);
}
}
{
int i;
for (i = 0; i < wapbl_replay->wr_inodescnt; i++) {
WAPBL_PRINTF(WAPBL_PRINT_REPLAY,("wapbl_replay: "
"not cleaning inode %"PRIu32" mode %"PRIo32"\n",
wapbl_replay->wr_inodes[i].wr_inumber,
wapbl_replay->wr_inodes[i].wr_imode));
}
}
}
void
cleanup_wapbl(void)
{
if (wapbl_replay) {
if (wapbl_replay_isopen(wapbl_replay))
wapbl_replay_stop(wapbl_replay);
wapbl_replay_free(wapbl_replay);
wapbl_replay = 0;
}
}
int
read_wapbl(char *buf, long size, daddr_t blk)
{
if (!wapbl_replay || !wapbl_replay_isopen(wapbl_replay))
return 0;
return wapbl_replay_read(wapbl_replay, buf, blk, size);
}
int
is_journal_inode(ino_t ino)
{
union dinode *dp;
dp = ginode(ino);
if ((iswap32(DIP(dp, flags)) & SF_LOG) != 0 &&
sblock->fs_journal_version == UFS_WAPBL_VERSION &&
sblock->fs_journal_location == UFS_WAPBL_JOURNALLOC_IN_FILESYSTEM &&
sblock->fs_journallocs[UFS_WAPBL_INFS_INO] == ino)
return 1;
return 0;
}

View File

@ -1,4 +1,4 @@
# $NetBSD: Makefile,v 1.22 2008/05/04 15:37:19 tsutsui Exp $ # $NetBSD: Makefile,v 1.23 2008/07/31 05:38:04 simonb Exp $
# @(#)Makefile 8.1 (Berkeley) 6/5/93 # @(#)Makefile 8.1 (Berkeley) 6/5/93
.include <bsd.own.mk> .include <bsd.own.mk>
@ -16,6 +16,10 @@ FSCK_FFS=${NETBSDSRCDIR}/sbin/fsck_ffs
CPPFLAGS+= -I${FSCK} -I${FSCK_FFS} CPPFLAGS+= -I${FSCK} -I${FSCK_FFS}
.PATH: ${FSCK} ${FSCK_FFS} ${NETBSDSRCDIR}/sys/ufs/ffs .PATH: ${FSCK} ${FSCK_FFS} ${NETBSDSRCDIR}/sys/ufs/ffs
SRCS+= vfs_wapbl.c wapbl.c
.PATH: ${NETBSDSRCDIR}/sys/kern
CPPFLAGS+=-DWAPBL_DEBUG_PRINT=0
LDADD+= -lutil -ledit -ltermcap LDADD+= -lutil -ledit -ltermcap
.ifndef HOSTPROG .ifndef HOSTPROG
DPADD+= ${LIBUTIL} ${LIBEDIT} ${LIBTERMCAP} DPADD+= ${LIBUTIL} ${LIBEDIT} ${LIBTERMCAP}

View File

@ -1,4 +1,4 @@
/* $NetBSD: mount_ffs.c,v 1.23 2008/07/20 01:20:22 lukem Exp $ */ /* $NetBSD: mount_ffs.c,v 1.24 2008/07/31 05:38:04 simonb Exp $ */
/*- /*-
* Copyright (c) 1993, 1994 * Copyright (c) 1993, 1994
@ -39,7 +39,7 @@ __COPYRIGHT("@(#) Copyright (c) 1993, 1994\
#if 0 #if 0
static char sccsid[] = "@(#)mount_ufs.c 8.4 (Berkeley) 4/26/95"; static char sccsid[] = "@(#)mount_ufs.c 8.4 (Berkeley) 4/26/95";
#else #else
__RCSID("$NetBSD: mount_ffs.c,v 1.23 2008/07/20 01:20:22 lukem Exp $"); __RCSID("$NetBSD: mount_ffs.c,v 1.24 2008/07/31 05:38:04 simonb Exp $");
#endif #endif
#endif /* not lint */ #endif /* not lint */
@ -70,6 +70,7 @@ static const struct mntopt mopts[] = {
MOPT_NODEVMTIME, MOPT_NODEVMTIME,
MOPT_FORCE, MOPT_FORCE,
MOPT_SOFTDEP, MOPT_SOFTDEP,
MOPT_LOG,
MOPT_GETARGS, MOPT_GETARGS,
MOPT_NULL, MOPT_NULL,
}; };

View File

@ -1,4 +1,4 @@
.\" $NetBSD: tunefs.8,v 1.36 2004/12/20 10:28:47 hubertf Exp $ .\" $NetBSD: tunefs.8,v 1.37 2008/07/31 05:38:04 simonb Exp $
.\" .\"
.\" Copyright (c) 1983, 1991, 1993 .\" Copyright (c) 1983, 1991, 1993
.\" The Regents of the University of California. All rights reserved. .\" The Regents of the University of California. All rights reserved.
@ -41,6 +41,7 @@
.Op Fl e Ar maxbpg .Op Fl e Ar maxbpg
.Op Fl g Ar avgfilesize .Op Fl g Ar avgfilesize
.Op Fl h Ar avgfpdir .Op Fl h Ar avgfpdir
.Op Fl l Ar logsize
.Op Fl m Ar minfree .Op Fl m Ar minfree
.Bk -words .Bk -words
.\" .Op Fl n Ar soft_dependency_enabling .\" .Op Fl n Ar soft_dependency_enabling
@ -97,6 +98,13 @@ this parameter should be set higher.
This specifies the expected average file size. This specifies the expected average file size.
.It Fl h Ar avgfpdir .It Fl h Ar avgfpdir
This specifies the expected number of files per directory. This specifies the expected number of files per directory.
.It Fl l Ar logsize
This value specifies the size of the in-filesystem journaling log file.
The default journaling log file size is described in
.Xr wapbl 4 .
Specifying a size of zero will cause the in-filesystem journaling log file
to be removed the next time the filesystem is mounted.
The size of an existing in-filesystem journaling log file can not be changed.
.It Fl m Ar minfree .It Fl m Ar minfree
This value specifies the percentage of space held back This value specifies the percentage of space held back
from normal users; the minimum free space threshold. from normal users; the minimum free space threshold.
@ -145,6 +153,7 @@ or
.Li time . .Li time .
.El .El
.Sh SEE ALSO .Sh SEE ALSO
.Xr wapbl 4 ,
.Xr fs 5 , .Xr fs 5 ,
.Xr dumpfs 8 , .Xr dumpfs 8 ,
.Xr fsck_ffs 8 , .Xr fsck_ffs 8 ,

View File

@ -1,4 +1,4 @@
/* $NetBSD: tunefs.c,v 1.34 2008/07/20 01:20:23 lukem Exp $ */ /* $NetBSD: tunefs.c,v 1.35 2008/07/31 05:38:04 simonb Exp $ */
/* /*
* Copyright (c) 1983, 1993 * Copyright (c) 1983, 1993
@ -39,7 +39,7 @@ __COPYRIGHT("@(#) Copyright (c) 1983, 1993\
#if 0 #if 0
static char sccsid[] = "@(#)tunefs.c 8.3 (Berkeley) 5/3/95"; static char sccsid[] = "@(#)tunefs.c 8.3 (Berkeley) 5/3/95";
#else #else
__RCSID("$NetBSD: tunefs.c,v 1.34 2008/07/20 01:20:23 lukem Exp $"); __RCSID("$NetBSD: tunefs.c,v 1.35 2008/07/31 05:38:04 simonb Exp $");
#endif #endif
#endif /* not lint */ #endif /* not lint */
@ -48,9 +48,9 @@ __RCSID("$NetBSD: tunefs.c,v 1.34 2008/07/20 01:20:23 lukem Exp $");
*/ */
#include <sys/param.h> #include <sys/param.h>
#include <ufs/ufs/dinode.h>
#include <ufs/ffs/fs.h> #include <ufs/ffs/fs.h>
#include <ufs/ffs/ffs_extern.h> #include <ufs/ffs/ffs_extern.h>
#include <ufs/ufs/ufs_wapbl.h>
#include <machine/bswap.h> #include <machine/bswap.h>
@ -85,15 +85,16 @@ static off_t sblock_try[] = SBLOCKSEARCH;
static void bwrite(daddr_t, char *, int, const char *); static void bwrite(daddr_t, char *, int, const char *);
static void bread(daddr_t, char *, int, const char *); static void bread(daddr_t, char *, int, const char *);
static int getnum(const char *, const char *, int, int); static void change_log_info(long long);
static void getsb(struct fs *, const char *); static void getsb(struct fs *, const char *);
static int openpartition(const char *, int, char *, size_t); static int openpartition(const char *, int, char *, size_t);
static void show_log_info(void);
static void usage(void); static void usage(void);
int int
main(int argc, char *argv[]) main(int argc, char *argv[])
{ {
#define OPTSTRINGBASE "AFNe:g:h:m:o:" #define OPTSTRINGBASE "AFNe:g:h:l:m:o:"
#ifdef TUNEFS_SOFTDEP #ifdef TUNEFS_SOFTDEP
int softdep; int softdep;
#define OPTSTRING OPTSTRINGBASE ## "n:" #define OPTSTRING OPTSTRINGBASE ## "n:"
@ -105,10 +106,12 @@ main(int argc, char *argv[])
char device[MAXPATHLEN]; char device[MAXPATHLEN];
int maxbpg, minfree, optim; int maxbpg, minfree, optim;
int avgfilesize, avgfpdir; int avgfilesize, avgfpdir;
long long logfilesize;
Aflag = Fflag = Nflag = 0; Aflag = Fflag = Nflag = 0;
maxbpg = minfree = optim = -1; maxbpg = minfree = optim = -1;
avgfilesize = avgfpdir = -1; avgfilesize = avgfpdir = -1;
logfilesize = -1;
#ifdef TUNEFS_SOFTDEP #ifdef TUNEFS_SOFTDEP
softdep = -1; softdep = -1;
#endif #endif
@ -131,25 +134,30 @@ main(int argc, char *argv[])
break; break;
case 'e': case 'e':
maxbpg = getnum(optarg, maxbpg = strsuftoll(
"maximum blocks per file in a cylinder group", "maximum blocks per file in a cylinder group",
1, INT_MAX); optarg, 1, INT_MAX);
break; break;
case 'g': case 'g':
avgfilesize = getnum(optarg, avgfilesize = strsuftoll("average file size", optarg,
"average file size", 1, INT_MAX);
break;
case 'h':
avgfpdir = getnum(optarg,
"expected number of files per directory",
1, INT_MAX); 1, INT_MAX);
break; break;
case 'h':
avgfpdir = strsuftoll(
"expected number of files per directory",
optarg, 1, INT_MAX);
break;
case 'l':
logfilesize = strsuftoll("journal log file size",
optarg, 0, INT_MAX);
break;
case 'm': case 'm':
minfree = getnum(optarg, minfree = strsuftoll("minimum percentage of free space",
"minimum percentage of free space", 0, 99); optarg, 0, 99);
break; break;
#ifdef TUNEFS_SOFTDEP #ifdef TUNEFS_SOFTDEP
@ -254,6 +262,9 @@ main(int argc, char *argv[])
CHANGEVAL(sblock.fs_avgfpdir, avgfpdir, CHANGEVAL(sblock.fs_avgfpdir, avgfpdir,
"expected number of files per directory", ""); "expected number of files per directory", "");
if (logfilesize >= 0)
change_log_info(logfilesize);
if (Nflag) { if (Nflag) {
fprintf(stdout, "tunefs: current settings of %s\n", special); fprintf(stdout, "tunefs: current settings of %s\n", special);
fprintf(stdout, "\tmaximum contiguous block count %d\n", fprintf(stdout, "\tmaximum contiguous block count %d\n",
@ -274,6 +285,7 @@ main(int argc, char *argv[])
fprintf(stdout, fprintf(stdout,
"\texpected number of files per directory: %d\n", "\texpected number of files per directory: %d\n",
sblock.fs_avgfpdir); sblock.fs_avgfpdir);
show_log_info();
fprintf(stdout, "tunefs: no changes made\n"); fprintf(stdout, "tunefs: no changes made\n");
exit(0); exit(0);
} }
@ -290,20 +302,123 @@ main(int argc, char *argv[])
exit(0); exit(0);
} }
static int static void
getnum(const char *num, const char *desc, int min, int max) show_log_info(void)
{ {
long n; const char *loc;
char *ep; uint64_t size, blksize;
int print;
n = strtol(num, &ep, 10); switch (sblock.fs_journal_location) {
if (ep[0] != '\0') case UFS_WAPBL_JOURNALLOC_NONE:
errx(1, "Invalid number `%s' for %s", num, desc); print = blksize = 0;
if ((int) n < min) /* nothing */
errx(1, "%s `%s' too small (minimum is %d)", desc, num, min); break;
if ((int) n > max) case UFS_WAPBL_JOURNALLOC_END_PARTITION:
errx(1, "%s `%s' too large (maximum is %d)", desc, num, max); loc = "end of partition";
return ((int)n); size = sblock.fs_journallocs[UFS_WAPBL_EPART_COUNT];
blksize = sblock.fs_journallocs[UFS_WAPBL_EPART_BLKSZ];
print = 1;
break;
case UFS_WAPBL_JOURNALLOC_IN_FILESYSTEM:
loc = "in filesystem";
size = sblock.fs_journallocs[UFS_WAPBL_INFS_COUNT];
blksize = sblock.fs_journallocs[UFS_WAPBL_INFS_BLKSZ];
print = 1;
break;
default:
loc = "unknown";
size = blksize = 0;
print = 1;
break;
}
if (print) {
fprintf(stdout, "\tjournal log file location: %s\n", loc);
fprintf(stdout, "\tjournal log file size: %" PRIu64 "\n",
size * blksize);
fprintf(stdout, "\tjournal log flags:");
if (sblock.fs_journal_flags & UFS_WAPBL_FLAGS_CREATE_LOG)
fprintf(stdout, " clear-log");
if (sblock.fs_journal_flags & UFS_WAPBL_FLAGS_CLEAR_LOG)
fprintf(stdout, " clear-log");
fprintf(stdout, "\n");
}
}
static void
change_log_info(long long logfilesize)
{
/*
* NOTES:
* - only operate on in-filesystem log sizes
* - can't change size of existing log
* - if current is same, no action
* - if current is zero and new is non-zero, set flag to create log
* on next mount
* - if current is non-zero and new is zero, set flag to clear log
* on next mount
*/
int in_fs_log;
uint64_t old_size;
old_size = 0;
switch (sblock.fs_journal_location) {
case UFS_WAPBL_JOURNALLOC_END_PARTITION:
in_fs_log = 0;
old_size = sblock.fs_journallocs[UFS_WAPBL_EPART_COUNT] *
sblock.fs_journallocs[UFS_WAPBL_EPART_BLKSZ];
break;
case UFS_WAPBL_JOURNALLOC_IN_FILESYSTEM:
in_fs_log = 1;
old_size = sblock.fs_journallocs[UFS_WAPBL_INFS_COUNT] *
sblock.fs_journallocs[UFS_WAPBL_INFS_BLKSZ];
break;
case UFS_WAPBL_JOURNALLOC_NONE:
default:
in_fs_log = 0;
old_size = 0;
break;
}
if (!in_fs_log)
errx(1, "Can't change size of non-in-filesystem log");
if (old_size == logfilesize && logfilesize > 0) {
/* no action */
warnx("log file size remains unchanged at %lld", logfilesize);
return;
}
if (logfilesize == 0) {
/*
* Don't clear out the locators - the kernel might need
* these to find the log! Just set the "clear the log"
* flag and let the kernel do the rest.
*/
sblock.fs_journal_flags |= UFS_WAPBL_FLAGS_CLEAR_LOG;
sblock.fs_journal_flags &= ~UFS_WAPBL_FLAGS_CREATE_LOG;
warnx("log file size cleared from %" PRIu64 "", old_size);
return;
}
if (old_size == 0) {
/* create new log of desired size next mount */
sblock.fs_journal_location = UFS_WAPBL_JOURNALLOC_IN_FILESYSTEM;
sblock.fs_journallocs[UFS_WAPBL_INFS_ADDR] = 0;
sblock.fs_journallocs[UFS_WAPBL_INFS_COUNT] = logfilesize;
sblock.fs_journallocs[UFS_WAPBL_INFS_BLKSZ] = 0;
sblock.fs_journallocs[UFS_WAPBL_INFS_INO] = 0;
sblock.fs_journal_flags |= UFS_WAPBL_FLAGS_CREATE_LOG;
sblock.fs_journal_flags &= ~UFS_WAPBL_FLAGS_CLEAR_LOG;
warnx("log file size set to %lld", logfilesize);
} else {
errx(1,
"Can't change existing log size from %" PRIu64 " to %lld",
old_size, logfilesize);
}
} }
static void static void
@ -315,6 +430,7 @@ usage(void)
fprintf(stderr, "\t-e maximum blocks per file in a cylinder group\n"); fprintf(stderr, "\t-e maximum blocks per file in a cylinder group\n");
fprintf(stderr, "\t-g average file size\n"); fprintf(stderr, "\t-g average file size\n");
fprintf(stderr, "\t-h expected number of files per directory\n"); fprintf(stderr, "\t-h expected number of files per directory\n");
fprintf(stderr, "\t-l journal log file size (`0' to clear journal)\n");
fprintf(stderr, "\t-m minimum percentage of free space\n"); fprintf(stderr, "\t-m minimum percentage of free space\n");
#ifdef TUNEFS_SOFTDEP #ifdef TUNEFS_SOFTDEP
fprintf(stderr, "\t-n soft dependencies (`enable' or `disable')\n"); fprintf(stderr, "\t-n soft dependencies (`enable' or `disable')\n");

View File

@ -1,4 +1,4 @@
# $NetBSD: files,v 1.910 2008/07/16 20:06:19 pooka Exp $ # $NetBSD: files,v 1.911 2008/07/31 05:38:04 simonb Exp $
# @(#)files.newconf 7.5 (Berkeley) 5/10/93 # @(#)files.newconf 7.5 (Berkeley) 5/10/93
@ -110,6 +110,10 @@ defflag opt_fileassoc.h FILEASSOC
defflag opt_gre.h GRE_DEBUG defflag opt_gre.h GRE_DEBUG
# Write Ahead Physical Block Logging
defflag opt_wapbl.h WAPBL WAPBL_DEBUG
defparam opt_wapbl.h WAPBL_DEBUG_PRINT
# compatibility options # compatibility options
# #
defflag opt_compat_netbsd.h COMPAT_40 defflag opt_compat_netbsd.h COMPAT_40
@ -1475,6 +1479,7 @@ file kern/vfs_subr.c
file kern/vfs_syscalls.c file kern/vfs_syscalls.c
file kern/vfs_trans.c file kern/vfs_trans.c
file kern/vfs_vnops.c file kern/vfs_vnops.c
file kern/vfs_wapbl.c wapbl
file kern/vfs_xattr.c file kern/vfs_xattr.c
file kern/vnode_if.c file kern/vnode_if.c
file miscfs/deadfs/dead_vnops.c file miscfs/deadfs/dead_vnops.c

View File

@ -1,4 +1,4 @@
/* $NetBSD: init_main.c,v 1.360 2008/06/18 09:06:27 yamt Exp $ */ /* $NetBSD: init_main.c,v 1.361 2008/07/31 05:38:05 simonb Exp $ */
/*- /*-
* Copyright (c) 2008 The NetBSD Foundation, Inc. * Copyright (c) 2008 The NetBSD Foundation, Inc.
@ -97,7 +97,7 @@
*/ */
#include <sys/cdefs.h> #include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: init_main.c,v 1.360 2008/06/18 09:06:27 yamt Exp $"); __KERNEL_RCSID(0, "$NetBSD: init_main.c,v 1.361 2008/07/31 05:38:05 simonb Exp $");
#include "opt_ipsec.h" #include "opt_ipsec.h"
#include "opt_ntp.h" #include "opt_ntp.h"
@ -108,6 +108,7 @@ __KERNEL_RCSID(0, "$NetBSD: init_main.c,v 1.360 2008/06/18 09:06:27 yamt Exp $")
#include "opt_fileassoc.h" #include "opt_fileassoc.h"
#include "opt_ktrace.h" #include "opt_ktrace.h"
#include "opt_pax.h" #include "opt_pax.h"
#include "opt_wapbl.h"
#include "rnd.h" #include "rnd.h"
#include "sysmon_envsys.h" #include "sysmon_envsys.h"
@ -192,6 +193,9 @@ __KERNEL_RCSID(0, "$NetBSD: init_main.c,v 1.360 2008/06/18 09:06:27 yamt Exp $")
#include <sys/ktrace.h> #include <sys/ktrace.h>
#endif #endif
#include <sys/kauth.h> #include <sys/kauth.h>
#ifdef WAPBL
#include <sys/wapbl.h>
#endif
#include <net80211/ieee80211_netbsd.h> #include <net80211/ieee80211_netbsd.h>
#include <sys/syscall.h> #include <sys/syscall.h>
@ -570,6 +574,11 @@ main(void)
/* Initialize the UUID system calls. */ /* Initialize the UUID system calls. */
uuid_init(); uuid_init();
#ifdef WAPBL
/* Initialize write-ahead physical block logging. */
wapbl_init();
#endif
/* /*
* Create process 1 (init(8)). We do this now, as Unix has * Create process 1 (init(8)). We do this now, as Unix has
* historically had init be process 1, and changing this would * historically had init be process 1, and changing this would

View File

@ -1,4 +1,4 @@
/* $NetBSD: vfs_bio.c,v 1.207 2008/07/14 16:22:42 hannken Exp $ */ /* $NetBSD: vfs_bio.c,v 1.208 2008/07/31 05:38:05 simonb Exp $ */
/*- /*-
* Copyright (c) 2007, 2008 The NetBSD Foundation, Inc. * Copyright (c) 2007, 2008 The NetBSD Foundation, Inc.
@ -6,6 +6,8 @@
* *
* This code is derived from software contributed to The NetBSD Foundation * This code is derived from software contributed to The NetBSD Foundation
* by Andrew Doran. * by Andrew Doran.
* This code is derived from software contributed to The NetBSD Foundation
* by Wasabi Systems, Inc.
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions * modification, are permitted provided that the following conditions
@ -107,7 +109,7 @@
*/ */
#include <sys/cdefs.h> #include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: vfs_bio.c,v 1.207 2008/07/14 16:22:42 hannken Exp $"); __KERNEL_RCSID(0, "$NetBSD: vfs_bio.c,v 1.208 2008/07/31 05:38:05 simonb Exp $");
#include "fs_ffs.h" #include "fs_ffs.h"
#include "opt_bufcache.h" #include "opt_bufcache.h"
@ -126,6 +128,7 @@ __KERNEL_RCSID(0, "$NetBSD: vfs_bio.c,v 1.207 2008/07/14 16:22:42 hannken Exp $"
#include <sys/fstrans.h> #include <sys/fstrans.h>
#include <sys/intr.h> #include <sys/intr.h>
#include <sys/cpu.h> #include <sys/cpu.h>
#include <sys/wapbl.h>
#include <uvm/uvm.h> #include <uvm/uvm.h>
@ -714,8 +717,23 @@ bread(struct vnode *vp, daddr_t blkno, int size, kauth_cred_t cred,
/* Wait for the read to complete, and return result. */ /* Wait for the read to complete, and return result. */
error = biowait(bp); error = biowait(bp);
if (error == 0 && (flags & B_MODIFY) != 0) if (error == 0 && (flags & B_MODIFY) != 0) /* XXXX before the next code block or after? */
error = fscow_run(bp, true); error = fscow_run(bp, true);
if (!error) {
struct mount *mp = wapbl_vptomp(vp);
if (mp && mp->mnt_wapbl_replay &&
WAPBL_REPLAY_ISOPEN(mp)) {
error = WAPBL_REPLAY_READ(mp, bp->b_data, bp->b_blkno,
bp->b_bcount);
if (error) {
mutex_enter(&bufcache_lock);
SET(bp->b_cflags, BC_INVAL);
mutex_exit(&bufcache_lock);
}
}
}
return error; return error;
} }
@ -793,6 +811,13 @@ bwrite(buf_t *bp)
mp = NULL; mp = NULL;
} }
if (mp && mp->mnt_wapbl) {
if (bp->b_iodone != mp->mnt_wapbl_op->wo_wapbl_biodone) {
bdwrite(bp);
return 0;
}
}
/* /*
* Remember buffer type, to switch on it later. If the write was * Remember buffer type, to switch on it later. If the write was
* synchronous, but the file system was mounted with MNT_ASYNC, * synchronous, but the file system was mounted with MNT_ASYNC,
@ -897,6 +922,14 @@ bdwrite(buf_t *bp)
return; return;
} }
if (wapbl_vphaswapbl(bp->b_vp)) {
struct mount *mp = wapbl_vptomp(bp->b_vp);
if (bp->b_iodone != mp->mnt_wapbl_op->wo_wapbl_biodone) {
WAPBL_ADD_BUF(mp, bp);
}
}
/* /*
* If the block hasn't been seen before: * If the block hasn't been seen before:
* (1) Mark it as having been seen, * (1) Mark it as having been seen,
@ -1028,6 +1061,16 @@ brelsel(buf_t *bp, int set)
if (bioopsp != NULL) if (bioopsp != NULL)
(*bioopsp->io_deallocate)(bp); (*bioopsp->io_deallocate)(bp);
if (ISSET(bp->b_flags, B_LOCKED)) {
if (wapbl_vphaswapbl(vp = bp->b_vp)) {
struct mount *mp = wapbl_vptomp(vp);
KASSERT(bp->b_iodone
!= mp->mnt_wapbl_op->wo_wapbl_biodone);
WAPBL_REMOVE_BUF(mp, bp);
}
}
mutex_enter(bp->b_objlock); mutex_enter(bp->b_objlock);
CLR(bp->b_oflags, BO_DONE|BO_DELWRI); CLR(bp->b_oflags, BO_DONE|BO_DELWRI);
if ((vp = bp->b_vp) != NULL) { if ((vp = bp->b_vp) != NULL) {
@ -1224,19 +1267,22 @@ geteblk(int size)
int int
allocbuf(buf_t *bp, int size, int preserve) allocbuf(buf_t *bp, int size, int preserve)
{ {
vsize_t oldsize, desired_size;
void *addr; void *addr;
vsize_t oldsize, desired_size;
int oldcount;
int delta; int delta;
desired_size = buf_roundsize(size); desired_size = buf_roundsize(size);
if (desired_size > MAXBSIZE) if (desired_size > MAXBSIZE)
printf("allocbuf: buffer larger than MAXBSIZE requested"); printf("allocbuf: buffer larger than MAXBSIZE requested");
oldcount = bp->b_bcount;
bp->b_bcount = size; bp->b_bcount = size;
oldsize = bp->b_bufsize; oldsize = bp->b_bufsize;
if (oldsize == desired_size) if (oldsize == desired_size)
return 0; goto out;
/* /*
* If we want a buffer of a different size, re-allocate the * If we want a buffer of a different size, re-allocate the
@ -1274,6 +1320,11 @@ allocbuf(buf_t *bp, int size, int preserve)
} }
} }
mutex_exit(&bufcache_lock); mutex_exit(&bufcache_lock);
out:
if (wapbl_vphaswapbl(bp->b_vp))
WAPBL_RESIZE_BUF(wapbl_vptomp(bp->b_vp), bp, oldsize, oldcount);
return 0; return 0;
} }

View File

@ -1,4 +1,4 @@
/* $NetBSD: vfs_lookup.c,v 1.108 2008/05/06 18:43:44 ad Exp $ */ /* $NetBSD: vfs_lookup.c,v 1.109 2008/07/31 05:38:05 simonb Exp $ */
/* /*
* Copyright (c) 1982, 1986, 1989, 1993 * Copyright (c) 1982, 1986, 1989, 1993
@ -37,7 +37,7 @@
*/ */
#include <sys/cdefs.h> #include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: vfs_lookup.c,v 1.108 2008/05/06 18:43:44 ad Exp $"); __KERNEL_RCSID(0, "$NetBSD: vfs_lookup.c,v 1.109 2008/07/31 05:38:05 simonb Exp $");
#include "opt_magiclinks.h" #include "opt_magiclinks.h"
@ -956,8 +956,10 @@ relookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp)
if (cnp->cn_nameptr[0] == '\0') if (cnp->cn_nameptr[0] == '\0')
panic("relookup: null name"); panic("relookup: null name");
#ifdef ohcrap
if (cnp->cn_flags & ISDOTDOT) if (cnp->cn_flags & ISDOTDOT)
panic("relookup: lookup on dot-dot"); panic("relookup: lookup on dot-dot");
#endif
/* /*
* We now have a segment name to search for, and a directory to search. * We now have a segment name to search for, and a directory to search.

View File

@ -1,4 +1,4 @@
/* $NetBSD: vfs_subr.c,v 1.354 2008/07/27 15:08:37 pooka Exp $ */ /* $NetBSD: vfs_subr.c,v 1.355 2008/07/31 05:38:05 simonb Exp $ */
/*- /*-
* Copyright (c) 1997, 1998, 2004, 2005, 2007, 2008 The NetBSD Foundation, Inc. * Copyright (c) 1997, 1998, 2004, 2005, 2007, 2008 The NetBSD Foundation, Inc.
@ -81,7 +81,7 @@
*/ */
#include <sys/cdefs.h> #include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: vfs_subr.c,v 1.354 2008/07/27 15:08:37 pooka Exp $"); __KERNEL_RCSID(0, "$NetBSD: vfs_subr.c,v 1.355 2008/07/31 05:38:05 simonb Exp $");
#include "opt_ddb.h" #include "opt_ddb.h"
#include "opt_compat_netbsd.h" #include "opt_compat_netbsd.h"
@ -106,6 +106,7 @@ __KERNEL_RCSID(0, "$NetBSD: vfs_subr.c,v 1.354 2008/07/27 15:08:37 pooka Exp $")
#include <sys/kauth.h> #include <sys/kauth.h>
#include <sys/atomic.h> #include <sys/atomic.h>
#include <sys/kthread.h> #include <sys/kthread.h>
#include <sys/wapbl.h>
#include <miscfs/specfs/specdev.h> #include <miscfs/specfs/specdev.h>
#include <miscfs/syncfs/syncfs.h> #include <miscfs/syncfs/syncfs.h>
@ -1804,8 +1805,13 @@ vclean(vnode_t *vp, int flags)
*/ */
if (flags & DOCLOSE) { if (flags & DOCLOSE) {
error = vinvalbuf(vp, V_SAVE, NOCRED, l, 0, 0); error = vinvalbuf(vp, V_SAVE, NOCRED, l, 0, 0);
if (error != 0) if (error != 0) {
/* XXX, fix vn_start_write's grab of mp and use that. */
if (wapbl_vphaswapbl(vp))
WAPBL_DISCARD(wapbl_vptomp(vp));
error = vinvalbuf(vp, 0, NOCRED, l, 0, 0); error = vinvalbuf(vp, 0, NOCRED, l, 0, 0);
}
KASSERT(error == 0); KASSERT(error == 0);
KASSERT((vp->v_iflag & VI_ONWORKLST) == 0); KASSERT((vp->v_iflag & VI_ONWORKLST) == 0);
if (active && (vp->v_type == VBLK || vp->v_type == VCHR)) { if (active && (vp->v_type == VBLK || vp->v_type == VCHR)) {

View File

@ -1,4 +1,4 @@
/* $NetBSD: vfs_syscalls.c,v 1.369 2008/06/24 11:21:46 ad Exp $ */ /* $NetBSD: vfs_syscalls.c,v 1.370 2008/07/31 05:38:05 simonb Exp $ */
/*- /*-
* Copyright (c) 2008 The NetBSD Foundation, Inc. * Copyright (c) 2008 The NetBSD Foundation, Inc.
@ -63,7 +63,7 @@
*/ */
#include <sys/cdefs.h> #include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: vfs_syscalls.c,v 1.369 2008/06/24 11:21:46 ad Exp $"); __KERNEL_RCSID(0, "$NetBSD: vfs_syscalls.c,v 1.370 2008/07/31 05:38:05 simonb Exp $");
#include "opt_compat_netbsd.h" #include "opt_compat_netbsd.h"
#include "opt_compat_43.h" #include "opt_compat_43.h"
@ -208,12 +208,13 @@ mount_update(struct lwp *l, struct vnode *vp, const char *path, int flags,
mp->mnt_flag &= mp->mnt_flag &=
~(MNT_NOSUID | MNT_NOEXEC | MNT_NODEV | ~(MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOCOREDUMP | MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOCOREDUMP |
MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP); MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP |
MNT_LOG);
mp->mnt_flag |= flags & mp->mnt_flag |= flags &
(MNT_NOSUID | MNT_NOEXEC | MNT_NODEV | (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOCOREDUMP | MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOCOREDUMP |
MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP | MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP |
MNT_IGNORE); MNT_LOG | MNT_IGNORE);
error = VFS_MOUNT(mp, path, data, data_len); error = VFS_MOUNT(mp, path, data, data_len);
@ -367,7 +368,7 @@ mount_domount(struct lwp *l, struct vnode **vpp, struct vfsops *vfsops,
(MNT_FORCE | MNT_NOSUID | MNT_NOEXEC | MNT_NODEV | (MNT_FORCE | MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOCOREDUMP | MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOCOREDUMP |
MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP | MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP |
MNT_IGNORE | MNT_RDONLY); MNT_LOG | MNT_IGNORE | MNT_RDONLY);
error = VFS_MOUNT(mp, path, data, data_len); error = VFS_MOUNT(mp, path, data, data_len);
mp->mnt_flag &= ~MNT_OP_FLAGS; mp->mnt_flag &= ~MNT_OP_FLAGS;

View File

@ -1,4 +1,4 @@
/* $NetBSD: vfs_vnops.c,v 1.158 2008/06/02 16:08:41 ad Exp $ */ /* $NetBSD: vfs_vnops.c,v 1.159 2008/07/31 05:38:05 simonb Exp $ */
/* /*
* Copyright (c) 1982, 1986, 1989, 1993 * Copyright (c) 1982, 1986, 1989, 1993
@ -37,7 +37,7 @@
*/ */
#include <sys/cdefs.h> #include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: vfs_vnops.c,v 1.158 2008/06/02 16:08:41 ad Exp $"); __KERNEL_RCSID(0, "$NetBSD: vfs_vnops.c,v 1.159 2008/07/31 05:38:05 simonb Exp $");
#include "fs_union.h" #include "fs_union.h"
#include "veriexec.h" #include "veriexec.h"
@ -61,6 +61,7 @@ __KERNEL_RCSID(0, "$NetBSD: vfs_vnops.c,v 1.158 2008/06/02 16:08:41 ad Exp $");
#include <sys/fstrans.h> #include <sys/fstrans.h>
#include <sys/atomic.h> #include <sys/atomic.h>
#include <sys/filedesc.h> #include <sys/filedesc.h>
#include <sys/wapbl.h>
#include <miscfs/specfs/specdev.h> #include <miscfs/specfs/specdev.h>
@ -692,6 +693,11 @@ vn_lock(struct vnode *vp, int flags)
LK_CANRECURSE)) LK_CANRECURSE))
== 0); == 0);
#ifdef DIAGNOSTIC
if (wapbl_vphaswapbl(vp))
WAPBL_JUNLOCK_ASSERT(wapbl_vptomp(vp));
#endif
do { do {
/* /*
* XXX PR 37706 forced unmount of file systems is unsafe. * XXX PR 37706 forced unmount of file systems is unsafe.

2783
sys/kern/vfs_wapbl.c Normal file

File diff suppressed because it is too large Load Diff

View File

@ -1,4 +1,4 @@
/* $NetBSD: vnode_if.c,v 1.76 2008/01/25 14:32:46 ad Exp $ */ /* $NetBSD: vnode_if.c,v 1.77 2008/07/31 05:38:05 simonb Exp $ */
/* /*
* Warning: DO NOT EDIT! This file is automatically generated! * Warning: DO NOT EDIT! This file is automatically generated!
@ -40,7 +40,7 @@
*/ */
#include <sys/cdefs.h> #include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: vnode_if.c,v 1.76 2008/01/25 14:32:46 ad Exp $"); __KERNEL_RCSID(0, "$NetBSD: vnode_if.c,v 1.77 2008/07/31 05:38:05 simonb Exp $");
#include "opt_vnode_lockdebug.h" #include "opt_vnode_lockdebug.h"
@ -802,6 +802,7 @@ VOP_FSYNC(struct vnode *vp,
mpsafe = (vp->v_vflag & VV_MPSAFE); mpsafe = (vp->v_vflag & VV_MPSAFE);
if (!mpsafe) { KERNEL_LOCK(1, curlwp); } if (!mpsafe) { KERNEL_LOCK(1, curlwp); }
error = (VCALL(vp, VOFFSET(vop_fsync), &a)); error = (VCALL(vp, VOFFSET(vop_fsync), &a));
if (!mpsafe) { KERNEL_UNLOCK_ONE(curlwp); } if (!mpsafe) { KERNEL_UNLOCK_ONE(curlwp); }
return error; return error;
} }

View File

@ -1,4 +1,4 @@
/* $NetBSD: genfs_io.c,v 1.8 2008/06/04 12:41:40 ad Exp $ */ /* $NetBSD: genfs_io.c,v 1.9 2008/07/31 05:38:05 simonb Exp $ */
/* /*
* Copyright (c) 1982, 1986, 1989, 1993 * Copyright (c) 1982, 1986, 1989, 1993
@ -31,7 +31,7 @@
*/ */
#include <sys/cdefs.h> #include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: genfs_io.c,v 1.8 2008/06/04 12:41:40 ad Exp $"); __KERNEL_RCSID(0, "$NetBSD: genfs_io.c,v 1.9 2008/07/31 05:38:05 simonb Exp $");
#include <sys/param.h> #include <sys/param.h>
#include <sys/systm.h> #include <sys/systm.h>
@ -589,8 +589,22 @@ loopdone:
*/ */
if (!error && sawhole && blockalloc) { if (!error && sawhole && blockalloc) {
error = GOP_ALLOC(vp, startoffset, npages << PAGE_SHIFT, 0, /*
cred); * XXX: This assumes that we come here only via
* the mmio path
*/
if (vp->v_mount->mnt_wapbl && write) {
error = WAPBL_BEGIN(vp->v_mount);
}
if (!error) {
error = GOP_ALLOC(vp, startoffset,
npages << PAGE_SHIFT, 0, cred);
if (vp->v_mount->mnt_wapbl && write) {
WAPBL_END(vp->v_mount);
}
}
UVMHIST_LOG(ubchist, "gop_alloc off 0x%x/0x%x -> %d", UVMHIST_LOG(ubchist, "gop_alloc off 0x%x/0x%x -> %d",
startoffset, npages << PAGE_SHIFT, error,0); startoffset, npages << PAGE_SHIFT, error,0);
if (!error) { if (!error) {

View File

@ -1,4 +1,4 @@
# $NetBSD: Makefile,v 1.4 2008/07/29 13:17:42 pooka Exp $ # $NetBSD: Makefile,v 1.5 2008/07/31 05:38:05 simonb Exp $
# #
.PATH: ${.CURDIR}/../../../../ufs/ffs .PATH: ${.CURDIR}/../../../../ufs/ffs
@ -7,9 +7,9 @@ LIB= rumpfs_ffs
SRCS= ffs_alloc.c ffs_balloc.c ffs_bswap.c ffs_inode.c \ SRCS= ffs_alloc.c ffs_balloc.c ffs_bswap.c ffs_inode.c \
ffs_softdep.stub.c ffs_subr.c ffs_tables.c ffs_vfsops.c \ ffs_softdep.stub.c ffs_subr.c ffs_tables.c ffs_vfsops.c \
ffs_vnops.c ffs_snapshot.c ffs_vnops.c ffs_snapshot.c ffs_wapbl.c
CPPFLAGS+= -DFFS_NO_SNAPSHOT -DFFS_EI CPPFLAGS+= -DFFS_NO_SNAPSHOT -DFFS_EI -DWAPBL
CFLAGS+= -Wno-pointer-sign CFLAGS+= -Wno-pointer-sign
.include <bsd.lib.mk> .include <bsd.lib.mk>

View File

@ -1,4 +1,4 @@
# $NetBSD: Makefile,v 1.5 2008/07/29 13:17:47 pooka Exp $ # $NetBSD: Makefile,v 1.6 2008/07/31 05:38:05 simonb Exp $
# #
.PATH: ${.CURDIR}/../../../../ufs/ufs .PATH: ${.CURDIR}/../../../../ufs/ufs
@ -6,9 +6,9 @@
LIB= rumpfs_ufs LIB= rumpfs_ufs
SRCS= ufs_bmap.c ufs_dirhash.c ufs_ihash.c ufs_inode.c ufs_lookup.c \ SRCS= ufs_bmap.c ufs_dirhash.c ufs_ihash.c ufs_inode.c ufs_lookup.c \
ufs_vfsops.c ufs_vnops.c ufs_vfsops.c ufs_vnops.c ufs_wapbl.c
CPPFLAGS+= -DUFS_DIRHASH -DFFS_EI CPPFLAGS+= -DUFS_DIRHASH -DFFS_EI -DWAPBL
.include <bsd.lib.mk> .include <bsd.lib.mk>
.include <bsd.klinks.mk> .include <bsd.klinks.mk>

View File

@ -1,4 +1,4 @@
# $NetBSD: Makefile.rumpkern,v 1.2 2008/07/30 01:32:47 oster Exp $ # $NetBSD: Makefile.rumpkern,v 1.3 2008/07/31 05:38:05 simonb Exp $
# #
.include "${RUMPTOP}/Makefile.rump" .include "${RUMPTOP}/Makefile.rump"
@ -26,7 +26,7 @@ SRCS+= clock_subr.c kern_descrip.c kern_stub.c param.c \
subr_bufq.c subr_hash.c subr_prf2.c subr_specificdata.c \ subr_bufq.c subr_hash.c subr_prf2.c subr_specificdata.c \
subr_time.c subr_workqueue.c sys_descrip.c sys_generic.c vfs_bio.c \ subr_time.c subr_workqueue.c sys_descrip.c sys_generic.c vfs_bio.c \
vfs_cache.c vfs_getcwd.c vfs_hooks.c vfs_init.c vfs_lookup.c \ vfs_cache.c vfs_getcwd.c vfs_hooks.c vfs_init.c vfs_lookup.c \
vfs_subr.c vfs_vnops.c vfs_syscalls.c vnode_if.c \ vfs_subr.c vfs_vnops.c vfs_syscalls.c vfs_wapbl.c vnode_if.c \
subr_kobj.c kern_module.c subr_kobj.c kern_module.c
# sys/miscfs # sys/miscfs

View File

@ -1,4 +1,4 @@
/* $NetBSD: rump.c,v 1.48 2008/07/29 13:17:47 pooka Exp $ */ /* $NetBSD: rump.c,v 1.49 2008/07/31 05:38:05 simonb Exp $ */
/* /*
* Copyright (c) 2007 Antti Kantee. All Rights Reserved. * Copyright (c) 2007 Antti Kantee. All Rights Reserved.
@ -32,6 +32,7 @@
#include <sys/filedesc.h> #include <sys/filedesc.h>
#include <sys/kauth.h> #include <sys/kauth.h>
#include <sys/kmem.h> #include <sys/kmem.h>
#include <sys/module.h>
#include <sys/mount.h> #include <sys/mount.h>
#include <sys/namei.h> #include <sys/namei.h>
#include <sys/queue.h> #include <sys/queue.h>
@ -39,7 +40,7 @@
#include <sys/select.h> #include <sys/select.h>
#include <sys/vnode.h> #include <sys/vnode.h>
#include <sys/vfs_syscalls.h> #include <sys/vfs_syscalls.h>
#include <sys/module.h> #include <sys/wapbl.h>
#include <miscfs/specfs/specdev.h> #include <miscfs/specfs/specdev.h>
@ -135,6 +136,7 @@ rump_init()
module_init(); module_init();
vfsinit(); vfsinit();
bufinit(); bufinit();
wapbl_init();
rumpvfs_init(); rumpvfs_init();

View File

@ -1,4 +1,4 @@
# $NetBSD: Makefile,v 1.109 2008/06/04 14:31:15 ad Exp $ # $NetBSD: Makefile,v 1.110 2008/07/31 05:38:05 simonb Exp $
.include <bsd.sys.mk> .include <bsd.sys.mk>
@ -19,12 +19,13 @@ INCS= acct.h agpio.h aio.h ansi.h ataio.h atomic.h audioio.h \
joystick.h \ joystick.h \
kcore.h kgdb.h kmem.h ksem.h ksyms.h ktrace.h \ kcore.h kgdb.h kmem.h ksem.h ksyms.h ktrace.h \
lkm.h localedef.h lock.h lockf.h lwp.h lwpctl.h \ lkm.h localedef.h lock.h lockf.h lwp.h lwpctl.h \
malloc.h mallocvar.h mbuf.h md4.h \ malloc.h mallocvar.h mbuf.h md4.h md5.h midiio.h \
md5.h midiio.h mman.h module.h mount.h mqueue.h msg.h msgbuf.h mtio.h mutex.h \ mman.h module.h mount.h mqueue.h msg.h msgbuf.h mtio.h mutex.h \
namei.h null.h \ namei.h null.h \
param.h pipe.h pmc.h poll.h pool.h power.h proc.h \ param.h pipe.h pmc.h poll.h pool.h power.h proc.h \
protosw.h pset.h ptrace.h queue.h \ protosw.h pset.h ptrace.h queue.h \
ras.h rb.h reboot.h radioio.h resource.h resourcevar.h rmd160.h rnd.h rwlock.h \ ras.h rb.h reboot.h radioio.h resource.h resourcevar.h rmd160.h \
rnd.h rwlock.h \
scanio.h sched.h scsiio.h select.h selinfo.h sem.h sha1.h sha2.h \ scanio.h sched.h scsiio.h select.h selinfo.h sem.h sha1.h sha2.h \
shm.h siginfo.h signal.h signalvar.h sigtypes.h simplelock.h \ shm.h siginfo.h signal.h signalvar.h sigtypes.h simplelock.h \
sleepq.h socket.h \ sleepq.h socket.h \
@ -36,7 +37,7 @@ INCS= acct.h agpio.h aio.h ansi.h ataio.h atomic.h audioio.h \
ttydefaults.h ttydev.h types.h \ ttydefaults.h ttydev.h types.h \
ucontext.h ucred.h uio.h un.h unistd.h unpcb.h user.h utsname.h uuid.h \ ucontext.h ucred.h uio.h un.h unistd.h unpcb.h user.h utsname.h uuid.h \
vadvise.h verified_exec.h vmmeter.h vnode.h vnode_if.h \ vadvise.h verified_exec.h vmmeter.h vnode.h vnode_if.h \
wait.h wdog.h wait.h wapbl.h wdog.h
INCSYMLINKS=\ INCSYMLINKS=\
sys/exec_elf.h /usr/include/elf.h \ sys/exec_elf.h /usr/include/elf.h \

View File

@ -1,4 +1,4 @@
/* $NetBSD: buf.h,v 1.109 2008/06/09 15:42:01 ad Exp $ */ /* $NetBSD: buf.h,v 1.110 2008/07/31 05:38:05 simonb Exp $ */
/*- /*-
* Copyright (c) 1999, 2000, 2007 The NetBSD Foundation, Inc. * Copyright (c) 1999, 2000, 2007 The NetBSD Foundation, Inc.
@ -162,6 +162,7 @@ struct buf {
LIST_ENTRY(buf) b_hash; /* c: hash chain */ LIST_ENTRY(buf) b_hash; /* c: hash chain */
LIST_ENTRY(buf) b_vnbufs; /* c: associated vnode */ LIST_ENTRY(buf) b_vnbufs; /* c: associated vnode */
TAILQ_ENTRY(buf) b_freelist; /* c: position if not active */ TAILQ_ENTRY(buf) b_freelist; /* c: position if not active */
LIST_ENTRY(buf) b_wapbllist; /* c: transaction buffer list */
daddr_t b_lblkno; /* c: logical block number */ daddr_t b_lblkno; /* c: logical block number */
int b_freelistindex;/* c: free list index (BQ_) */ int b_freelistindex;/* c: free list index (BQ_) */
u_int b_cflags; /* c: BC_* flags */ u_int b_cflags; /* c: BC_* flags */
@ -244,6 +245,7 @@ do { \
#define B_CLRBUF 0x01 /* Request allocated buffer be cleared. */ #define B_CLRBUF 0x01 /* Request allocated buffer be cleared. */
#define B_SYNC 0x02 /* Do all allocations synchronously. */ #define B_SYNC 0x02 /* Do all allocations synchronously. */
#define B_METAONLY 0x04 /* Return indirect block buffer. */ #define B_METAONLY 0x04 /* Return indirect block buffer. */
#define B_CONTIG 0x08 /* Allocate file contiguously. */
/* Flags to bread(), breadn() and breada(). */ /* Flags to bread(), breadn() and breada(). */
#define B_MODIFY 0x01 /* Hint: caller might modify buffer */ #define B_MODIFY 0x01 /* Hint: caller might modify buffer */

View File

@ -1,4 +1,4 @@
/* $NetBSD: fstypes.h,v 1.23 2008/05/06 18:43:45 ad Exp $ */ /* $NetBSD: fstypes.h,v 1.24 2008/07/31 05:38:05 simonb Exp $ */
/* /*
* Copyright (c) 1989, 1991, 1993 * Copyright (c) 1989, 1991, 1993
@ -87,7 +87,6 @@ typedef struct fhandle fhandle_t;
#define __MNT_UNUSED2 0x00200000 #define __MNT_UNUSED2 0x00200000
#define __MNT_UNUSED3 0x00800000 #define __MNT_UNUSED3 0x00800000
#define __MNT_UNUSED4 0x01000000 #define __MNT_UNUSED4 0x01000000
#define __MNT_UNUSED5 0x02000000
#define MNT_RDONLY 0x00000001 /* read only filesystem */ #define MNT_RDONLY 0x00000001 /* read only filesystem */
#define MNT_SYNCHRONOUS 0x00000002 /* file system written synchronously */ #define MNT_SYNCHRONOUS 0x00000002 /* file system written synchronously */
@ -98,6 +97,7 @@ typedef struct fhandle fhandle_t;
#define MNT_ASYNC 0x00000040 /* file system written asynchronously */ #define MNT_ASYNC 0x00000040 /* file system written asynchronously */
#define MNT_NOCOREDUMP 0x00008000 /* don't write core dumps to this FS */ #define MNT_NOCOREDUMP 0x00008000 /* don't write core dumps to this FS */
#define MNT_IGNORE 0x00100000 /* don't show entry in df */ #define MNT_IGNORE 0x00100000 /* don't show entry in df */
#define MNT_LOG 0x02000000 /* Use logging */
#define MNT_NOATIME 0x04000000 /* Never update access times in fs */ #define MNT_NOATIME 0x04000000 /* Never update access times in fs */
#define MNT_SYMPERM 0x20000000 /* recognize symlink permission */ #define MNT_SYMPERM 0x20000000 /* recognize symlink permission */
#define MNT_NODEVMTIME 0x40000000 /* Never update mod times for devs */ #define MNT_NODEVMTIME 0x40000000 /* Never update mod times for devs */
@ -116,7 +116,8 @@ typedef struct fhandle fhandle_t;
{ MNT_NOATIME, 0, "noatime" }, \ { MNT_NOATIME, 0, "noatime" }, \
{ MNT_SYMPERM, 0, "symperm" }, \ { MNT_SYMPERM, 0, "symperm" }, \
{ MNT_NODEVMTIME, 0, "nodevmtime" }, \ { MNT_NODEVMTIME, 0, "nodevmtime" }, \
{ MNT_SOFTDEP, 0, "soft dependencies" }, { MNT_SOFTDEP, 0, "soft dependencies" }, \
{ MNT_LOG, 0, "log" },
/* /*
* exported mount flags. * exported mount flags.
@ -176,7 +177,8 @@ typedef struct fhandle fhandle_t;
MNT_EXPUBLIC | \ MNT_EXPUBLIC | \
MNT_LOCAL | \ MNT_LOCAL | \
MNT_QUOTA | \ MNT_QUOTA | \
MNT_ROOTFS) MNT_ROOTFS | \
MNT_LOG)
/* /*
* External filesystem control flags. * External filesystem control flags.
@ -223,7 +225,7 @@ typedef struct fhandle fhandle_t;
"\35MNT_EXPUBLIC" \ "\35MNT_EXPUBLIC" \
"\34MNT_EXNORESPORT" \ "\34MNT_EXNORESPORT" \
"\33MNT_NOATIME" \ "\33MNT_NOATIME" \
"\32MNT_UNUSED" \ "\32MNT_LOG" \
"\31MNT_UNUSED" \ "\31MNT_UNUSED" \
"\30MNT_UNUSED" \ "\30MNT_UNUSED" \
"\27MNT_GETARGS" \ "\27MNT_GETARGS" \

View File

@ -1,4 +1,4 @@
/* $NetBSD: mount.h,v 1.180 2008/07/30 18:10:38 pooka Exp $ */ /* $NetBSD: mount.h,v 1.181 2008/07/31 05:38:05 simonb Exp $ */
/* /*
* Copyright (c) 1989, 1991, 1993 * Copyright (c) 1989, 1991, 1993
@ -121,6 +121,11 @@ struct mount {
specificdata_reference specificdata_reference
mnt_specdataref; /* subsystem specific data */ mnt_specdataref; /* subsystem specific data */
kmutex_t mnt_updating; /* to serialize updates */ kmutex_t mnt_updating; /* to serialize updates */
struct wapbl_ops
*mnt_wapbl_op; /* logging ops */
struct wapbl *mnt_wapbl; /* log info */
struct wapbl_replay
*mnt_wapbl_replay; /* replay support XXX: what? */
}; };
/* /*
@ -278,6 +283,45 @@ int fsname##_extattrctl(struct mount *, int, struct vnode *, int, \
const char *); \ const char *); \
int fsname##_suspendctl(struct mount *, int) int fsname##_suspendctl(struct mount *, int)
/*
* This operations vector is so wapbl can be wrapped into a filesystem lkm.
* XXX Eventually, we want to move this functionality
* down into the filesystems themselves so that this isn't needed.
*/
struct wapbl_ops {
void (*wo_wapbl_discard)(struct wapbl *);
int (*wo_wapbl_replay_isopen)(struct wapbl_replay *);
int (*wo_wapbl_replay_read)(struct wapbl_replay *, void *, daddr_t, long);
void (*wo_wapbl_add_buf)(struct wapbl *, struct buf *);
void (*wo_wapbl_remove_buf)(struct wapbl *, struct buf *);
void (*wo_wapbl_resize_buf)(struct wapbl *, struct buf *, long, long);
int (*wo_wapbl_begin)(struct wapbl *, const char *, int);
void (*wo_wapbl_end)(struct wapbl *);
void (*wo_wapbl_junlock_assert)(struct wapbl *);
void (*wo_wapbl_biodone)(struct buf *);
};
#define WAPBL_DISCARD(MP) \
(*(MP)->mnt_wapbl_op->wo_wapbl_discard)((MP)->mnt_wapbl)
#define WAPBL_REPLAY_ISOPEN(MP) \
(*(MP)->mnt_wapbl_op->wo_wapbl_replay_isopen)((MP)->mnt_wapbl_replay)
#define WAPBL_REPLAY_READ(MP, DATA, BLK, LEN) \
(*(MP)->mnt_wapbl_op->wo_wapbl_replay_read)((MP)->mnt_wapbl_replay, \
(DATA), (BLK), (LEN))
#define WAPBL_ADD_BUF(MP, BP) \
(*(MP)->mnt_wapbl_op->wo_wapbl_add_buf)((MP)->mnt_wapbl, (BP))
#define WAPBL_REMOVE_BUF(MP, BP) \
(*(MP)->mnt_wapbl_op->wo_wapbl_remove_buf)((MP)->mnt_wapbl, (BP))
#define WAPBL_RESIZE_BUF(MP, BP, OLDSZ, OLDCNT) \
(*(MP)->mnt_wapbl_op->wo_wapbl_resize_buf)((MP)->mnt_wapbl, (BP), \
(OLDSZ), (OLDCNT))
#define WAPBL_BEGIN(MP) \
(*(MP)->mnt_wapbl_op->wo_wapbl_begin)((MP)->mnt_wapbl, \
__FILE__, __LINE__)
#define WAPBL_END(MP) \
(*(MP)->mnt_wapbl_op->wo_wapbl_end)((MP)->mnt_wapbl)
#define WAPBL_JUNLOCK_ASSERT(MP) \
(*(MP)->mnt_wapbl_op->wo_wapbl_junlock_assert)((MP)->mnt_wapbl)
struct vfs_hooks { struct vfs_hooks {
void (*vh_unmount)(struct mount *); void (*vh_unmount)(struct mount *);
LIST_ENTRY(vfs_hooks) vfs_hooks_list; LIST_ENTRY(vfs_hooks) vfs_hooks_list;

View File

@ -1,4 +1,4 @@
/* $NetBSD: stat.h,v 1.56 2007/10/19 15:58:52 christos Exp $ */ /* $NetBSD: stat.h,v 1.57 2008/07/31 05:38:06 simonb Exp $ */
/*- /*-
* Copyright (c) 1982, 1986, 1989, 1993 * Copyright (c) 1982, 1986, 1989, 1993
@ -214,6 +214,7 @@ struct stat {
#define SF_APPEND 0x00040000 /* writes to file may only append */ #define SF_APPEND 0x00040000 /* writes to file may only append */
/* SF_NOUNLINK 0x00100000 [NOT IMPLEMENTED] */ /* SF_NOUNLINK 0x00100000 [NOT IMPLEMENTED] */
#define SF_SNAPSHOT 0x00200000 /* snapshot inode */ #define SF_SNAPSHOT 0x00200000 /* snapshot inode */
#define SF_LOG 0x00400000 /* WAPBL log file inode */
#ifdef _KERNEL #ifdef _KERNEL
/* /*

View File

@ -1,4 +1,4 @@
/* $NetBSD: statvfs.h,v 1.14 2008/04/28 20:24:11 martin Exp $ */ /* $NetBSD: statvfs.h,v 1.15 2008/07/31 05:38:06 simonb Exp $ */
/*- /*-
* Copyright (c) 2004 The NetBSD Foundation, Inc. * Copyright (c) 2004 The NetBSD Foundation, Inc.
@ -118,6 +118,7 @@ struct statvfs {
#define ST_SYMPERM MNT_SYMPERM #define ST_SYMPERM MNT_SYMPERM
#define ST_NODEVMTIME MNT_NODEVMTIME #define ST_NODEVMTIME MNT_NODEVMTIME
#define ST_SOFTDEP MNT_SOFTDEP #define ST_SOFTDEP MNT_SOFTDEP
#define ST_LOG MNT_LOG
#define ST_EXRDONLY MNT_EXRDONLY #define ST_EXRDONLY MNT_EXRDONLY
#define ST_EXPORTED MNT_EXPORTED #define ST_EXPORTED MNT_EXPORTED

View File

@ -1,4 +1,4 @@
/* $NetBSD: vnode.h,v 1.196 2008/06/24 11:21:46 ad Exp $ */ /* $NetBSD: vnode.h,v 1.197 2008/07/31 05:38:06 simonb Exp $ */
/*- /*-
* Copyright (c) 2008 The NetBSD Foundation, Inc. * Copyright (c) 2008 The NetBSD Foundation, Inc.
@ -296,6 +296,7 @@ struct vattr {
#define IO_NORMAL 0x00800 /* operate on regular data */ #define IO_NORMAL 0x00800 /* operate on regular data */
#define IO_EXT 0x01000 /* operate on extended attributes */ #define IO_EXT 0x01000 /* operate on extended attributes */
#define IO_DIRECT 0x02000 /* direct I/O hint */ #define IO_DIRECT 0x02000 /* direct I/O hint */
#define IO_JOURNALLOCKED 0x04000 /* journal is already locked */
#define IO_ADV_MASK 0x00003 /* access pattern hint */ #define IO_ADV_MASK 0x00003 /* access pattern hint */
#define IO_ADV_SHIFT 0 #define IO_ADV_SHIFT 0
@ -342,6 +343,7 @@ extern const int vttoif_tab[];
#define FSYNC_DATAONLY 0x0002 /* fsync: hint: sync file data only */ #define FSYNC_DATAONLY 0x0002 /* fsync: hint: sync file data only */
#define FSYNC_RECLAIM 0x0004 /* fsync: hint: vnode is being reclaimed */ #define FSYNC_RECLAIM 0x0004 /* fsync: hint: vnode is being reclaimed */
#define FSYNC_LAZY 0x0008 /* fsync: lazy sync (trickle) */ #define FSYNC_LAZY 0x0008 /* fsync: lazy sync (trickle) */
#define FSYNC_NOLOG 0x0010 /* fsync: do not flush the log */
#define FSYNC_CACHE 0x0100 /* fsync: flush disk caches too */ #define FSYNC_CACHE 0x0100 /* fsync: flush disk caches too */
#define FSYNC_VFS 0x0200 /* fsync: via FSYNC_VFS() */ #define FSYNC_VFS 0x0200 /* fsync: via FSYNC_VFS() */

381
sys/sys/wapbl.h Normal file
View File

@ -0,0 +1,381 @@
/* $NetBSD: wapbl.h,v 1.2 2008/07/31 05:38:06 simonb Exp $ */
/*-
* Copyright (c) 2003,2008 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
* by Wasabi Systems, Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef _SYS_WAPBL_H
#define _SYS_WAPBL_H
#include <sys/mutex.h>
#include <miscfs/specfs/specdev.h>
/* This header file describes the api and data structures for
* write ahead physical block logging (WAPBL) support.
*/
#if defined(_KERNEL_OPT)
#include "opt_wapbl.h"
#endif
#ifdef WAPBL_DEBUG
#ifndef WAPBL_DEBUG_PRINT
#define WAPBL_DEBUG_PRINT (WAPBL_PRINT_REPLAY | WAPBL_PRINT_OPEN)
#endif
#if 0
#define WAPBL_DEBUG_BUFBYTES
#define WAPBL_DEBUG_SERIALIZE
#endif
#endif
#ifdef WAPBL_DEBUG_PRINT
enum {
WAPBL_PRINT_OPEN = 0x1,
WAPBL_PRINT_FLUSH = 0x2,
WAPBL_PRINT_TRUNCATE = 0x4,
WAPBL_PRINT_TRANSACTION = 0x8,
WAPBL_PRINT_BUFFER = 0x10,
WAPBL_PRINT_BUFFER2 = 0x20,
WAPBL_PRINT_ALLOC = 0x40,
WAPBL_PRINT_INODE = 0x80,
WAPBL_PRINT_WRITE = 0x100,
WAPBL_PRINT_IO = 0x200,
WAPBL_PRINT_REPLAY = 0x400,
WAPBL_PRINT_ERROR = 0x800,
WAPBL_PRINT_DISCARD = 0x1000,
WAPBL_PRINT_BIODONE = 0x2000,
};
#define WAPBL_PRINTF(mask, a) if (wapbl_debug_print & (mask)) printf a
extern int wapbl_debug_print;
#else
#define WAPBL_PRINTF(mask, a)
#endif
/****************************************************************/
/* The WAPBL journal layout.
*
* The journal consists of a header followed by a circular buffer
* region. The circular data area is described by the header
* wc_circ_off, wc_circ_size, wc_head and wc_tail fields as bytes
* from the start of the journal header. New records are inserted
* at wc_head and the oldest valid record can be found at wc_tail.
* When ((wc_head == wc_tail) && (wc_head == 0)), the journal is empty.
* The condition of ((wc_head == wc_tail) && (wc_head != 0))
* indicates a full journal, although this condition is rare.
*
* The journal header as well as its records are marked by a 32bit
* type tag and length for ease of parsing. Journal records are
* padded so as to fall on journal device block boundaries.
* (XXX i think there is currently a bug wrt WC_BLOCKS not ending
* correctly on a journal device block boundary. this would need
* to be fixed if the journal blocksize does not match filesystem.)
*/
/*
* The following are the 4 record types used by the journal:
* Each tag indicates journal data organized by one of the
* structures used below.
*/
enum {
WAPBL_WC_HEADER = 0x5741424c, /* "WABL", struct wapbl_wc_header */
WAPBL_WC_INODES, /* struct wapbl_wc_inodelist */
WAPBL_WC_REVOCATIONS, /* struct wapbl_wc_blocklist */
WAPBL_WC_BLOCKS, /* struct wapbl_wc_blocklist */
};
/* null entry (on disk) */
/* This structure isn't used directly, but shares its header
* layout with all the other log structures for the purpose
* of reading a log structure and determining its type
*/
struct wapbl_wc_null {
uint32_t wc_type; /* WAPBL_WC_* */
int32_t wc_len;
uint8_t wc_spare[0]; /* actually longer */
};
/* journal header (on-disk)
* This record is found at the start of the
* journal, but not within the circular buffer region. As well as
* describing the journal parameters and matching filesystem, it
* additionally serves as the atomic update record for journal
* updates.
*/
struct wapbl_wc_header {
uint32_t wc_type; /* WAPBL_WC_HEADER log magic number */
int32_t wc_len; /* length of this journal entry */
uint32_t wc_checksum;
uint32_t wc_generation;
int32_t wc_fsid[2];
uint64_t wc_time;
uint32_t wc_timensec;
uint32_t wc_version;
uint32_t wc_log_dev_bshift;
uint32_t wc_fs_dev_bshift;
int64_t wc_head;
int64_t wc_tail;
int64_t wc_circ_off; /* offset of of circ buffer region */
int64_t wc_circ_size; /* size of circular buffer region */
uint8_t wc_spare[0]; /* actually longer */
};
/* list of blocks (on disk)
* This record is used to describe a set of filesystem blocks,
* and is used with two type tags, WAPBL_WC_BLOCKS and
* WAPBL_WC_REVOCATIONS.
*
* For WAPBL_WC_BLOCKS, a copy of each listed block can be found
* starting at the next log device blocksize boundary. starting at
* one log device block since the start of the record. This contains
* the bulk of the filesystem journal data which is written using
* these records before being written into the filesystem.
*
* The WAPBL_WC_REVOCATIONS record is used to indicate that any
* previously listed blocks should not be written into the filesystem.
* This is important so that deallocated and reallocated data blocks
* do not get overwritten with stale data from the journal. The
* revocation records to not contain a copy of any actual block data.
*/
struct wapbl_wc_blocklist {
uint32_t wc_type; /* WAPBL_WC_{REVOCATIONS,BLOCKS} */
int32_t wc_len;
int32_t wc_blkcount;
int32_t wc_unused;
struct {
int64_t wc_daddr;
int32_t wc_unused;
int32_t wc_dlen;
} wc_blocks[0]; /* actually longer */
};
/* list of inodes (on disk)
* This record is used to describe the set of inodes which
* may be allocated but are unlinked. Inodes end up listed here
* while they are in the process of being initialized and
* deinitialized. Inodes unlinked while in use by a process
* will be listed here and the actual deletion must be completed
* on journal replay.
*/
struct wapbl_wc_inodelist {
uint32_t wc_type; /* WAPBL_WC_INODES */
int32_t wc_len;
int32_t wc_inocnt;
int32_t wc_clear; /* set if previously listed inodes
hould be ignored */
struct {
uint32_t wc_inumber;
uint32_t wc_imode;
} wc_inodes[0]; /* actually longer */
};
/****************************************************************/
#include <sys/queue.h>
#include <sys/vnode.h>
#include <sys/buf.h>
typedef void (*wapbl_flush_fn_t)(struct mount *, daddr_t *, int *, int);
#ifdef _KERNEL
struct wapbl_entry;
struct wapbl_wc_header;
struct wapbl_replay;
struct wapbl;
/*
* This structure holds per transaction log information
*/
struct wapbl_entry {
struct wapbl *we_wapbl;
SIMPLEQ_ENTRY(wapbl_entry) we_entries;
size_t we_bufcount; /* Count of unsynced buffers */
size_t we_reclaimable_bytes; /* Number on disk bytes for this
transaction */
int we_error;
#ifdef WAPBL_DEBUG_BUFBYTES
size_t we_unsynced_bufbytes; /* Byte count of unsynced buffers */
#endif
};
void wapbl_init(void);
/* Start using a log */
int wapbl_start(struct wapbl **, struct mount *, struct vnode *, daddr_t,
size_t, size_t, struct wapbl_replay *,
wapbl_flush_fn_t, wapbl_flush_fn_t);
/* Discard the current transaction, potentially dangerous */
void wapbl_discard(struct wapbl *);
/* stop using a log */
int wapbl_stop(struct wapbl *, int);
/*
* Begin a new transaction or increment transaction recursion
* level if called while a transaction is already in progress
* by the current process.
*/
int wapbl_begin(struct wapbl *, const char *, int);
/* End a transaction or decrement the transaction recursion level */
void wapbl_end(struct wapbl *);
/*
* Add a new buffer to the current transaction. The buffers
* data will be copied to the current transaction log and the
* buffer will be marked B_LOCKED so that it will not be
* flushed to disk by the syncer or reallocated.
*/
void wapbl_add_buf(struct wapbl *, struct buf *);
/* Remove a buffer from the current transaction. */
void wapbl_remove_buf(struct wapbl *, struct buf *);
void wapbl_resize_buf(struct wapbl *, struct buf *, long, long);
/*
* This will flush all completed transactions to disk and
* start asynchronous writes on the associated buffers
*/
int wapbl_flush(struct wapbl *, int);
/*
* Inodes that are allocated but have zero link count
* must be registered with the current transaction
* so they may be recorded in the log and cleaned up later.
* registration/unregistration of ino numbers already registered is ok.
*/
void wapbl_register_inode(struct wapbl *, ino_t, mode_t);
void wapbl_unregister_inode(struct wapbl *, ino_t, mode_t);
/*
* Metadata block deallocations must be registered so
* that revocations records can be written and to prevent
* the corresponding blocks from being reused as data
* blocks until the log is on disk.
*/
void wapbl_register_deallocation(struct wapbl *, daddr_t, int);
void wapbl_jlock_assert(struct wapbl *wl);
void wapbl_junlock_assert(struct wapbl *wl);
void wapbl_print(struct wapbl *wl, int full, void (*pr)(const char *, ...));
#if defined(WAPBL_DEBUG) || defined(DDB)
void wapbl_dump(struct wapbl *);
#endif
void wapbl_biodone(struct buf *);
extern struct wapbl_ops wapbl_ops;
static __inline struct mount *
wapbl_vptomp(struct vnode *vp)
{
struct mount *mp;
mp = NULL;
if (vp != NULL) {
if (vp->v_type == VBLK)
mp = vp->v_specmountpoint;
else
mp = vp->v_mount;
}
return mp;
}
static __inline bool
wapbl_vphaswapbl(struct vnode *vp)
{
struct mount *mp;
if (vp == NULL)
return false;
mp = wapbl_vptomp(vp);
if (mp && mp->mnt_wapbl)
return true;
else
return false;
}
#endif /* _KERNEL */
/****************************************************************/
/* Replay support */
struct wapbl_replay {
struct vnode *wr_logvp;
struct vnode *wr_devvp;
daddr_t wr_logpbn;
struct wapbl_wc_header wr_wc_header;
void *wr_scratch;
LIST_HEAD(wapbl_blk_head, wapbl_blk) *wr_blkhash;
u_long wr_blkhashmask;
int wr_blkhashcnt;
off_t wr_inodeshead;
off_t wr_inodestail;
int wr_inodescnt;
struct {
uint32_t wr_inumber;
uint32_t wr_imode;
} *wr_inodes;
};
#define wapbl_replay_isopen(wr) ((wr)->wr_scratch != 0)
int wapbl_replay_isopen1(struct wapbl_replay *);
int wapbl_replay_start(struct wapbl_replay **, struct vnode *,
daddr_t, size_t, size_t);
void wapbl_replay_stop(struct wapbl_replay *);
void wapbl_replay_free(struct wapbl_replay *);
int wapbl_replay_verify(struct wapbl_replay *, struct vnode *);
int wapbl_replay_write(struct wapbl_replay *, struct vnode *);
int wapbl_replay_read(struct wapbl_replay *, void *, daddr_t, long);
/****************************************************************/
/* Supply this to provide i/o support */
int wapbl_write(void *, size_t, struct vnode *, daddr_t);
int wapbl_read(void *, size_t, struct vnode *, daddr_t);
/****************************************************************/
#endif /* !_SYS_WAPBL_H */

View File

@ -1,4 +1,33 @@
/* $NetBSD: ffs_alloc.c,v 1.110 2008/07/11 05:31:44 simonb Exp $ */ /* $NetBSD: ffs_alloc.c,v 1.111 2008/07/31 05:38:06 simonb Exp $ */
/*-
* Copyright (c) 2008 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
* by Wasabi Systems, Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/* /*
* Copyright (c) 2002 Networks Associates Technology, Inc. * Copyright (c) 2002 Networks Associates Technology, Inc.
@ -41,7 +70,7 @@
*/ */
#include <sys/cdefs.h> #include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: ffs_alloc.c,v 1.110 2008/07/11 05:31:44 simonb Exp $"); __KERNEL_RCSID(0, "$NetBSD: ffs_alloc.c,v 1.111 2008/07/31 05:38:06 simonb Exp $");
#if defined(_KERNEL_OPT) #if defined(_KERNEL_OPT)
#include "opt_ffs.h" #include "opt_ffs.h"
@ -51,13 +80,14 @@ __KERNEL_RCSID(0, "$NetBSD: ffs_alloc.c,v 1.110 2008/07/11 05:31:44 simonb Exp $
#include <sys/param.h> #include <sys/param.h>
#include <sys/systm.h> #include <sys/systm.h>
#include <sys/buf.h> #include <sys/buf.h>
#include <sys/proc.h>
#include <sys/vnode.h>
#include <sys/mount.h>
#include <sys/kernel.h>
#include <sys/syslog.h>
#include <sys/kauth.h>
#include <sys/fstrans.h> #include <sys/fstrans.h>
#include <sys/kauth.h>
#include <sys/kernel.h>
#include <sys/mount.h>
#include <sys/proc.h>
#include <sys/syslog.h>
#include <sys/vnode.h>
#include <sys/wapbl.h>
#include <miscfs/specfs/specdev.h> #include <miscfs/specfs/specdev.h>
#include <ufs/ufs/quota.h> #include <ufs/ufs/quota.h>
@ -65,21 +95,22 @@ __KERNEL_RCSID(0, "$NetBSD: ffs_alloc.c,v 1.110 2008/07/11 05:31:44 simonb Exp $
#include <ufs/ufs/inode.h> #include <ufs/ufs/inode.h>
#include <ufs/ufs/ufs_extern.h> #include <ufs/ufs/ufs_extern.h>
#include <ufs/ufs/ufs_bswap.h> #include <ufs/ufs/ufs_bswap.h>
#include <ufs/ufs/ufs_wapbl.h>
#include <ufs/ffs/fs.h> #include <ufs/ffs/fs.h>
#include <ufs/ffs/ffs_extern.h> #include <ufs/ffs/ffs_extern.h>
static daddr_t ffs_alloccg(struct inode *, int, daddr_t, int); static daddr_t ffs_alloccg(struct inode *, int, daddr_t, int, int);
static daddr_t ffs_alloccgblk(struct inode *, struct buf *, daddr_t); static daddr_t ffs_alloccgblk(struct inode *, struct buf *, daddr_t, int);
#ifdef XXXUBC #ifdef XXXUBC
static daddr_t ffs_clusteralloc(struct inode *, int, daddr_t, int); static daddr_t ffs_clusteralloc(struct inode *, int, daddr_t, int);
#endif #endif
static ino_t ffs_dirpref(struct inode *); static ino_t ffs_dirpref(struct inode *);
static daddr_t ffs_fragextend(struct inode *, int, daddr_t, int, int); static daddr_t ffs_fragextend(struct inode *, int, daddr_t, int, int);
static void ffs_fserr(struct fs *, u_int, const char *); static void ffs_fserr(struct fs *, u_int, const char *);
static daddr_t ffs_hashalloc(struct inode *, int, daddr_t, int, static daddr_t ffs_hashalloc(struct inode *, int, daddr_t, int, int,
daddr_t (*)(struct inode *, int, daddr_t, int)); daddr_t (*)(struct inode *, int, daddr_t, int, int));
static daddr_t ffs_nodealloccg(struct inode *, int, daddr_t, int); static daddr_t ffs_nodealloccg(struct inode *, int, daddr_t, int, int);
static int32_t ffs_mapsearch(struct fs *, struct cg *, static int32_t ffs_mapsearch(struct fs *, struct cg *,
daddr_t, int); daddr_t, int);
#if defined(DIAGNOSTIC) || defined(DEBUG) #if defined(DIAGNOSTIC) || defined(DEBUG)
@ -118,7 +149,7 @@ extern const u_char * const fragtbl[];
* => releases um_lock before returning * => releases um_lock before returning
*/ */
int int
ffs_alloc(struct inode *ip, daddr_t lbn, daddr_t bpref, int size, ffs_alloc(struct inode *ip, daddr_t lbn, daddr_t bpref, int size, int flags,
kauth_cred_t cred, daddr_t *bnp) kauth_cred_t cred, daddr_t *bnp)
{ {
struct ufsmount *ump; struct ufsmount *ump;
@ -174,13 +205,14 @@ ffs_alloc(struct inode *ip, daddr_t lbn, daddr_t bpref, int size,
return (error); return (error);
mutex_enter(&ump->um_lock); mutex_enter(&ump->um_lock);
#endif #endif
if (bpref >= fs->fs_size) if (bpref >= fs->fs_size)
bpref = 0; bpref = 0;
if (bpref == 0) if (bpref == 0)
cg = ino_to_cg(fs, ip->i_number); cg = ino_to_cg(fs, ip->i_number);
else else
cg = dtog(fs, bpref); cg = dtog(fs, bpref);
bno = ffs_hashalloc(ip, cg, bpref, size, ffs_alloccg); bno = ffs_hashalloc(ip, cg, bpref, size, flags, ffs_alloccg);
if (bno > 0) { if (bno > 0) {
DIP_ADD(ip, blocks, btodb(size)); DIP_ADD(ip, blocks, btodb(size));
ip->i_flag |= IN_CHANGE | IN_UPDATE; ip->i_flag |= IN_CHANGE | IN_UPDATE;
@ -193,6 +225,20 @@ ffs_alloc(struct inode *ip, daddr_t lbn, daddr_t bpref, int size,
*/ */
(void) chkdq(ip, -btodb(size), cred, FORCE); (void) chkdq(ip, -btodb(size), cred, FORCE);
#endif #endif
if (flags & B_CONTIG) {
/*
* XXX ump->um_lock handling is "suspect" at best.
* For the case where ffs_hashalloc() fails early
* in the B_CONTIG case we reach here with um_lock
* already unlocked, so we can't release it again
* like in the normal error path. See kern/39206.
*
*
* Fail silently - it's up to our caller to report
* errors.
*/
return (ENOSPC);
}
nospace: nospace:
mutex_exit(&ump->um_lock); mutex_exit(&ump->um_lock);
ffs_fserr(fs, kauth_cred_geteuid(cred), "file system full"); ffs_fserr(fs, kauth_cred_geteuid(cred), "file system full");
@ -372,14 +418,30 @@ ffs_realloccg(struct inode *ip, daddr_t lbprev, daddr_t bpref, int osize,
panic("ffs_realloccg: bad optim"); panic("ffs_realloccg: bad optim");
/* NOTREACHED */ /* NOTREACHED */
} }
bno = ffs_hashalloc(ip, cg, bpref, request, ffs_alloccg); bno = ffs_hashalloc(ip, cg, bpref, request, 0, ffs_alloccg);
if (bno > 0) { if (bno > 0) {
if (!DOINGSOFTDEP(ITOV(ip))) if (!DOINGSOFTDEP(ITOV(ip))) {
ffs_blkfree(fs, ip->i_devvp, bprev, (long)osize, if ((ip->i_ump->um_mountp->mnt_wapbl) &&
ip->i_number); (ITOV(ip)->v_type != VREG)) {
if (nsize < request) UFS_WAPBL_REGISTER_DEALLOCATION(
ffs_blkfree(fs, ip->i_devvp, bno + numfrags(fs, nsize), ip->i_ump->um_mountp, fsbtodb(fs, bprev),
(long)(request - nsize), ip->i_number); osize);
} else
ffs_blkfree(fs, ip->i_devvp, bprev, (long)osize,
ip->i_number);
}
if (nsize < request) {
if ((ip->i_ump->um_mountp->mnt_wapbl) &&
(ITOV(ip)->v_type != VREG)) {
UFS_WAPBL_REGISTER_DEALLOCATION(
ip->i_ump->um_mountp,
fsbtodb(fs, (bno + numfrags(fs, nsize))),
request - nsize);
} else
ffs_blkfree(fs, ip->i_devvp,
bno + numfrags(fs, nsize),
(long)(request - nsize), ip->i_number);
}
DIP_ADD(ip, blocks, btodb(nsize - osize)); DIP_ADD(ip, blocks, btodb(nsize - osize));
ip->i_flag |= IN_CHANGE | IN_UPDATE; ip->i_flag |= IN_CHANGE | IN_UPDATE;
if (bpp != NULL) { if (bpp != NULL) {
@ -443,7 +505,7 @@ struct ctldebug debug15 = { "prtrealloc", &prtrealloc };
#endif #endif
/* /*
* NOTE: when re-enabling this, it must be updated for UFS2. * NOTE: when re-enabling this, it must be updated for UFS2 and WAPBL.
*/ */
int doasyncfree = 1; int doasyncfree = 1;
@ -548,7 +610,7 @@ ffs_reallocblks(void *v)
* Search the block map looking for an allocation of the desired size. * Search the block map looking for an allocation of the desired size.
*/ */
if ((newblk = (daddr_t)ffs_hashalloc(ip, dtog(fs, pref), (long)pref, if ((newblk = (daddr_t)ffs_hashalloc(ip, dtog(fs, pref), (long)pref,
len, ffs_clusteralloc)) == 0) { len, flags, ffs_clusteralloc)) == 0) {
mutex_exit(&ump->um_lock); mutex_exit(&ump->um_lock);
goto fail; goto fail;
} }
@ -696,11 +758,17 @@ ffs_valloc(struct vnode *pvp, int mode, kauth_cred_t cred,
ino_t ino, ipref; ino_t ino, ipref;
int cg, error; int cg, error;
UFS_WAPBL_JUNLOCK_ASSERT(pvp->v_mount);
*vpp = NULL; *vpp = NULL;
pip = VTOI(pvp); pip = VTOI(pvp);
fs = pip->i_fs; fs = pip->i_fs;
ump = pip->i_ump; ump = pip->i_ump;
error = UFS_WAPBL_BEGIN(pvp->v_mount);
if (error) {
return error;
}
mutex_enter(&ump->um_lock); mutex_enter(&ump->um_lock);
if (fs->fs_cstotal.cs_nifree == 0) if (fs->fs_cstotal.cs_nifree == 0)
goto noinodes; goto noinodes;
@ -723,12 +791,18 @@ ffs_valloc(struct vnode *pvp, int mode, kauth_cred_t cred,
if (fs->fs_contigdirs[cg] > 0) if (fs->fs_contigdirs[cg] > 0)
fs->fs_contigdirs[cg]--; fs->fs_contigdirs[cg]--;
} }
ino = (ino_t)ffs_hashalloc(pip, cg, ipref, mode, ffs_nodealloccg); ino = (ino_t)ffs_hashalloc(pip, cg, ipref, mode, 0, ffs_nodealloccg);
if (ino == 0) if (ino == 0)
goto noinodes; goto noinodes;
UFS_WAPBL_END(pvp->v_mount);
error = VFS_VGET(pvp->v_mount, ino, vpp); error = VFS_VGET(pvp->v_mount, ino, vpp);
if (error) { if (error) {
ffs_vfree(pvp, ino, mode); int err;
err = UFS_WAPBL_BEGIN(pvp->v_mount);
if (err == 0)
ffs_vfree(pvp, ino, mode);
if (err == 0)
UFS_WAPBL_END(pvp->v_mount);
return (error); return (error);
} }
KASSERT((*vpp)->v_type == VNON); KASSERT((*vpp)->v_type == VNON);
@ -774,6 +848,7 @@ ffs_valloc(struct vnode *pvp, int mode, kauth_cred_t cred,
return (0); return (0);
noinodes: noinodes:
mutex_exit(&ump->um_lock); mutex_exit(&ump->um_lock);
UFS_WAPBL_END(pvp->v_mount);
ffs_fserr(fs, kauth_cred_geteuid(cred), "out of inodes"); ffs_fserr(fs, kauth_cred_geteuid(cred), "out of inodes");
uprintf("\n%s: create/symlink failed, no inodes free\n", fs->fs_fsmnt); uprintf("\n%s: create/symlink failed, no inodes free\n", fs->fs_fsmnt);
return (ENOSPC); return (ENOSPC);
@ -922,7 +997,7 @@ ffs_dirpref(struct inode *pip)
* => um_lock held on entry and exit * => um_lock held on entry and exit
*/ */
daddr_t daddr_t
ffs_blkpref_ufs1(struct inode *ip, daddr_t lbn, int indx, ffs_blkpref_ufs1(struct inode *ip, daddr_t lbn, int indx, int flags,
int32_t *bap /* XXX ondisk32 */) int32_t *bap /* XXX ondisk32 */)
{ {
struct fs *fs; struct fs *fs;
@ -932,6 +1007,26 @@ ffs_blkpref_ufs1(struct inode *ip, daddr_t lbn, int indx,
KASSERT(mutex_owned(&ip->i_ump->um_lock)); KASSERT(mutex_owned(&ip->i_ump->um_lock));
fs = ip->i_fs; fs = ip->i_fs;
/*
* If allocating a contiguous file with B_CONTIG, use the hints
* in the inode extentions to return the desired block.
*
* For metadata (indirect blocks) return the address of where
* the first indirect block resides - we'll scan for the next
* available slot if we need to allocate more than one indirect
* block. For data, return the address of the actual block
* relative to the address of the first data block.
*/
if (flags & B_CONTIG) {
KASSERT(ip->i_ffs_first_data_blk != 0);
KASSERT(ip->i_ffs_first_indir_blk != 0);
if (flags & B_METAONLY)
return ip->i_ffs_first_indir_blk;
else
return ip->i_ffs_first_data_blk + blkstofrags(fs, lbn);
}
if (indx % fs->fs_maxbpg == 0 || bap[indx - 1] == 0) { if (indx % fs->fs_maxbpg == 0 || bap[indx - 1] == 0) {
if (lbn < NDADDR + NINDIR(fs)) { if (lbn < NDADDR + NINDIR(fs)) {
cg = ino_to_cg(fs, ip->i_number); cg = ino_to_cg(fs, ip->i_number);
@ -966,7 +1061,8 @@ ffs_blkpref_ufs1(struct inode *ip, daddr_t lbn, int indx,
} }
daddr_t daddr_t
ffs_blkpref_ufs2(struct inode *ip, daddr_t lbn, int indx, int64_t *bap) ffs_blkpref_ufs2(struct inode *ip, daddr_t lbn, int indx, int flags,
int64_t *bap)
{ {
struct fs *fs; struct fs *fs;
int cg; int cg;
@ -975,6 +1071,26 @@ ffs_blkpref_ufs2(struct inode *ip, daddr_t lbn, int indx, int64_t *bap)
KASSERT(mutex_owned(&ip->i_ump->um_lock)); KASSERT(mutex_owned(&ip->i_ump->um_lock));
fs = ip->i_fs; fs = ip->i_fs;
/*
* If allocating a contiguous file with B_CONTIG, use the hints
* in the inode extentions to return the desired block.
*
* For metadata (indirect blocks) return the address of where
* the first indirect block resides - we'll scan for the next
* available slot if we need to allocate more than one indirect
* block. For data, return the address of the actual block
* relative to the address of the first data block.
*/
if (flags & B_CONTIG) {
KASSERT(ip->i_ffs_first_data_blk != 0);
KASSERT(ip->i_ffs_first_indir_blk != 0);
if (flags & B_METAONLY)
return ip->i_ffs_first_indir_blk;
else
return ip->i_ffs_first_data_blk + blkstofrags(fs, lbn);
}
if (indx % fs->fs_maxbpg == 0 || bap[indx - 1] == 0) { if (indx % fs->fs_maxbpg == 0 || bap[indx - 1] == 0) {
if (lbn < NDADDR + NINDIR(fs)) { if (lbn < NDADDR + NINDIR(fs)) {
cg = ino_to_cg(fs, ip->i_number); cg = ino_to_cg(fs, ip->i_number);
@ -1025,7 +1141,7 @@ ffs_blkpref_ufs2(struct inode *ip, daddr_t lbn, int indx, int64_t *bap)
static daddr_t static daddr_t
ffs_hashalloc(struct inode *ip, int cg, daddr_t pref, ffs_hashalloc(struct inode *ip, int cg, daddr_t pref,
int size /* size for data blocks, mode for inodes */, int size /* size for data blocks, mode for inodes */,
daddr_t (*allocator)(struct inode *, int, daddr_t, int)) int flags, daddr_t (*allocator)(struct inode *, int, daddr_t, int, int))
{ {
struct fs *fs; struct fs *fs;
daddr_t result; daddr_t result;
@ -1035,9 +1151,12 @@ ffs_hashalloc(struct inode *ip, int cg, daddr_t pref,
/* /*
* 1: preferred cylinder group * 1: preferred cylinder group
*/ */
result = (*allocator)(ip, cg, pref, size); result = (*allocator)(ip, cg, pref, size, flags);
if (result) if (result)
return (result); return (result);
if (flags & B_CONTIG)
return (result);
/* /*
* 2: quadratic rehash * 2: quadratic rehash
*/ */
@ -1045,7 +1164,7 @@ ffs_hashalloc(struct inode *ip, int cg, daddr_t pref,
cg += i; cg += i;
if (cg >= fs->fs_ncg) if (cg >= fs->fs_ncg)
cg -= fs->fs_ncg; cg -= fs->fs_ncg;
result = (*allocator)(ip, cg, 0, size); result = (*allocator)(ip, cg, 0, size, flags);
if (result) if (result)
return (result); return (result);
} }
@ -1056,7 +1175,7 @@ ffs_hashalloc(struct inode *ip, int cg, daddr_t pref,
*/ */
cg = (icg + 2) % fs->fs_ncg; cg = (icg + 2) % fs->fs_ncg;
for (i = 2; i < fs->fs_ncg; i++) { for (i = 2; i < fs->fs_ncg; i++) {
result = (*allocator)(ip, cg, 0, size); result = (*allocator)(ip, cg, 0, size, flags);
if (result) if (result)
return (result); return (result);
cg++; cg++;
@ -1157,7 +1276,7 @@ ffs_fragextend(struct inode *ip, int cg, daddr_t bprev, int osize, int nsize)
* and if it is, allocate it. * and if it is, allocate it.
*/ */
static daddr_t static daddr_t
ffs_alloccg(struct inode *ip, int cg, daddr_t bpref, int size) ffs_alloccg(struct inode *ip, int cg, daddr_t bpref, int size, int flags)
{ {
struct ufsmount *ump; struct ufsmount *ump;
struct fs *fs = ip->i_fs; struct fs *fs = ip->i_fs;
@ -1192,7 +1311,7 @@ ffs_alloccg(struct inode *ip, int cg, daddr_t bpref, int size)
cgp->cg_time = ufs_rw64(time_second, needswap); cgp->cg_time = ufs_rw64(time_second, needswap);
if (size == fs->fs_bsize) { if (size == fs->fs_bsize) {
mutex_enter(&ump->um_lock); mutex_enter(&ump->um_lock);
blkno = ffs_alloccgblk(ip, bp, bpref); blkno = ffs_alloccgblk(ip, bp, bpref, flags);
ACTIVECG_CLR(fs, cg); ACTIVECG_CLR(fs, cg);
mutex_exit(&ump->um_lock); mutex_exit(&ump->um_lock);
bdwrite(bp); bdwrite(bp);
@ -1216,7 +1335,7 @@ ffs_alloccg(struct inode *ip, int cg, daddr_t bpref, int size)
if (cgp->cg_cs.cs_nbfree == 0) if (cgp->cg_cs.cs_nbfree == 0)
goto fail; goto fail;
mutex_enter(&ump->um_lock); mutex_enter(&ump->um_lock);
blkno = ffs_alloccgblk(ip, bp, bpref); blkno = ffs_alloccgblk(ip, bp, bpref, flags);
bno = dtogd(fs, blkno); bno = dtogd(fs, blkno);
for (i = frags; i < fs->fs_frag; i++) for (i = frags; i < fs->fs_frag; i++)
setbit(blksfree, bno + i); setbit(blksfree, bno + i);
@ -1276,7 +1395,7 @@ ffs_alloccg(struct inode *ip, int cg, daddr_t bpref, int size)
* blocks may be fragmented by the routine that allocates them. * blocks may be fragmented by the routine that allocates them.
*/ */
static daddr_t static daddr_t
ffs_alloccgblk(struct inode *ip, struct buf *bp, daddr_t bpref) ffs_alloccgblk(struct inode *ip, struct buf *bp, daddr_t bpref, int flags)
{ {
struct ufsmount *ump; struct ufsmount *ump;
struct fs *fs = ip->i_fs; struct fs *fs = ip->i_fs;
@ -1304,7 +1423,14 @@ ffs_alloccgblk(struct inode *ip, struct buf *bp, daddr_t bpref)
*/ */
if (ffs_isblock(fs, blksfree, fragstoblks(fs, bno))) if (ffs_isblock(fs, blksfree, fragstoblks(fs, bno)))
goto gotit; goto gotit;
/*
* if the requested data block isn't available and we are
* trying to allocate a contiguous file, return an error.
*/
if ((flags & (B_CONTIG | B_METAONLY)) == B_CONTIG)
return (0);
} }
/* /*
* Take the next available block in this cylinder group. * Take the next available block in this cylinder group.
*/ */
@ -1453,7 +1579,7 @@ ffs_clusteralloc(struct inode *ip, int cg, daddr_t bpref, int len)
len = blkstofrags(fs, len); len = blkstofrags(fs, len);
mutex_enter(&ump->um_lock); mutex_enter(&ump->um_lock);
for (i = 0; i < len; i += fs->fs_frag) for (i = 0; i < len; i += fs->fs_frag)
if ((got = ffs_alloccgblk(ip, bp, bno + i)) != bno + i) if ((got = ffs_alloccgblk(ip, bp, bno + i, flags)) != bno + i)
panic("ffs_clusteralloc: lost block"); panic("ffs_clusteralloc: lost block");
ACTIVECG_CLR(fs, cg); ACTIVECG_CLR(fs, cg);
mutex_exit(&ump->um_lock); mutex_exit(&ump->um_lock);
@ -1477,7 +1603,7 @@ fail:
* inode in the specified cylinder group. * inode in the specified cylinder group.
*/ */
static daddr_t static daddr_t
ffs_nodealloccg(struct inode *ip, int cg, daddr_t ipref, int mode) ffs_nodealloccg(struct inode *ip, int cg, daddr_t ipref, int mode, int flags)
{ {
struct ufsmount *ump = ip->i_ump; struct ufsmount *ump = ip->i_ump;
struct fs *fs = ip->i_fs; struct fs *fs = ip->i_fs;
@ -1492,6 +1618,7 @@ ffs_nodealloccg(struct inode *ip, int cg, daddr_t ipref, int mode)
#endif #endif
KASSERT(mutex_owned(&ump->um_lock)); KASSERT(mutex_owned(&ump->um_lock));
UFS_WAPBL_JLOCK_ASSERT(ip->i_ump->um_mountp);
if (fs->fs_cs(fs, cg).cs_nifree == 0) if (fs->fs_cs(fs, cg).cs_nifree == 0)
return (0); return (0);
@ -1542,6 +1669,8 @@ ffs_nodealloccg(struct inode *ip, int cg, daddr_t ipref, int mode)
panic("ffs_nodealloccg: block not in map"); panic("ffs_nodealloccg: block not in map");
/* NOTREACHED */ /* NOTREACHED */
gotit: gotit:
UFS_WAPBL_REGISTER_INODE(ip->i_ump->um_mountp, cg * fs->fs_ipg + ipref,
mode);
/* /*
* Check to see if we need to initialize more inodes. * Check to see if we need to initialize more inodes.
*/ */
@ -1593,6 +1722,122 @@ gotit:
return (0); return (0);
} }
/*
* Allocate a block or fragment.
*
* The specified block or fragment is removed from the
* free map, possibly fragmenting a block in the process.
*
* This implementation should mirror fs_blkfree
*
* => um_lock not held on entry or exit
*/
int
ffs_blkalloc(struct inode *ip, daddr_t bno, long size)
{
struct ufsmount *ump = ip->i_ump;
struct fs *fs = ip->i_fs;
struct cg *cgp;
struct buf *bp;
int32_t fragno, cgbno;
int i, error, cg, blk, frags, bbase;
u_int8_t *blksfree;
const int needswap = UFS_FSNEEDSWAP(fs);
if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0 ||
fragnum(fs, bno) + numfrags(fs, size) > fs->fs_frag) {
printf("dev = 0x%x, bno = %" PRId64 " bsize = %d, "
"size = %ld, fs = %s\n",
ip->i_dev, bno, fs->fs_bsize, size, fs->fs_fsmnt);
panic("blkalloc: bad size");
}
cg = dtog(fs, bno);
if (bno >= fs->fs_size) {
printf("bad block %" PRId64 ", ino %" PRId64 "\n", bno,
ip->i_number);
ffs_fserr(fs, ip->i_uid, "bad block");
return EINVAL;
}
error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
(int)fs->fs_cgsize, NOCRED, B_MODIFY, &bp);
if (error) {
brelse(bp, 0);
return error;
}
cgp = (struct cg *)bp->b_data;
if (!cg_chkmagic(cgp, needswap)) {
brelse(bp, 0);
return EIO;
}
cgp->cg_old_time = ufs_rw32(time_second, needswap);
cgp->cg_time = ufs_rw64(time_second, needswap);
cgbno = dtogd(fs, bno);
blksfree = cg_blksfree(cgp, needswap);
mutex_enter(&ump->um_lock);
if (size == fs->fs_bsize) {
fragno = fragstoblks(fs, cgbno);
if (!ffs_isblock(fs, blksfree, fragno)) {
mutex_exit(&ump->um_lock);
brelse(bp, 0);
return EBUSY;
}
ffs_clrblock(fs, blksfree, fragno);
ffs_clusteracct(fs, cgp, fragno, -1);
ufs_add32(cgp->cg_cs.cs_nbfree, -1, needswap);
fs->fs_cstotal.cs_nbfree--;
fs->fs_cs(fs, cg).cs_nbfree--;
} else {
bbase = cgbno - fragnum(fs, cgbno);
frags = numfrags(fs, size);
for (i = 0; i < frags; i++) {
if (isclr(blksfree, cgbno + i)) {
mutex_exit(&ump->um_lock);
brelse(bp, 0);
return EBUSY;
}
}
/*
* if a complete block is being split, account for it
*/
fragno = fragstoblks(fs, bbase);
if (ffs_isblock(fs, blksfree, fragno)) {
ufs_add32(cgp->cg_cs.cs_nffree, fs->fs_frag, needswap);
fs->fs_cstotal.cs_nffree += fs->fs_frag;
fs->fs_cs(fs, cg).cs_nffree += fs->fs_frag;
ffs_clusteracct(fs, cgp, fragno, -1);
ufs_add32(cgp->cg_cs.cs_nbfree, -1, needswap);
fs->fs_cstotal.cs_nbfree--;
fs->fs_cs(fs, cg).cs_nbfree--;
}
/*
* decrement the counts associated with the old frags
*/
blk = blkmap(fs, blksfree, bbase);
ffs_fragacct(fs, blk, cgp->cg_frsum, -1, needswap);
/*
* allocate the fragment
*/
for (i = 0; i < frags; i++) {
clrbit(blksfree, cgbno + i);
}
ufs_add32(cgp->cg_cs.cs_nffree, -i, needswap);
fs->fs_cstotal.cs_nffree -= i;
fs->fs_cs(fs, cg).cs_nffree -= i;
/*
* add back in counts associated with the new frags
*/
blk = blkmap(fs, blksfree, bbase);
ffs_fragacct(fs, blk, cgp->cg_frsum, 1, needswap);
}
fs->fs_fmod = 1;
ACTIVECG_CLR(fs, cg);
mutex_exit(&ump->um_lock);
bdwrite(bp);
return 0;
}
/* /*
* Free a block or fragment. * Free a block or fragment.
* *
@ -1817,6 +2062,8 @@ ffs_vfree(struct vnode *vp, ino_t ino, int mode)
/* /*
* Do the actual free operation. * Do the actual free operation.
* The specified inode is placed back in the free map. * The specified inode is placed back in the free map.
*
* => um_lock not held on entry or exit
*/ */
int int
ffs_freefile(struct fs *fs, struct vnode *devvp, ino_t ino, int mode) ffs_freefile(struct fs *fs, struct vnode *devvp, ino_t ino, int mode)
@ -1832,6 +2079,8 @@ ffs_freefile(struct fs *fs, struct vnode *devvp, ino_t ino, int mode)
const int needswap = UFS_FSNEEDSWAP(fs); const int needswap = UFS_FSNEEDSWAP(fs);
#endif #endif
UFS_WAPBL_JLOCK_ASSERT(devvp->v_specinfo->si_mountpoint);
cg = ino_to_cg(fs, ino); cg = ino_to_cg(fs, ino);
if (devvp->v_type != VBLK) { if (devvp->v_type != VBLK) {
/* devvp is a snapshot */ /* devvp is a snapshot */
@ -1871,6 +2120,8 @@ ffs_freefile(struct fs *fs, struct vnode *devvp, ino_t ino, int mode)
panic("ifree: freeing free inode"); panic("ifree: freeing free inode");
} }
clrbit(inosused, ino); clrbit(inosused, ino);
UFS_WAPBL_UNREGISTER_INODE(devvp->v_specmountpoint,
ino + cg * fs->fs_ipg, mode);
if (ino < ufs_rw32(cgp->cg_irotor, needswap)) if (ino < ufs_rw32(cgp->cg_irotor, needswap))
cgp->cg_irotor = ufs_rw32(ino, needswap); cgp->cg_irotor = ufs_rw32(ino, needswap);
ufs_add32(cgp->cg_cs.cs_nifree, 1, needswap); ufs_add32(cgp->cg_cs.cs_nifree, 1, needswap);

View File

@ -1,4 +1,4 @@
/* $NetBSD: ffs_balloc.c,v 1.50 2008/06/03 09:47:49 hannken Exp $ */ /* $NetBSD: ffs_balloc.c,v 1.51 2008/07/31 05:38:06 simonb Exp $ */
/* /*
* Copyright (c) 2002 Networks Associates Technology, Inc. * Copyright (c) 2002 Networks Associates Technology, Inc.
@ -41,7 +41,7 @@
*/ */
#include <sys/cdefs.h> #include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: ffs_balloc.c,v 1.50 2008/06/03 09:47:49 hannken Exp $"); __KERNEL_RCSID(0, "$NetBSD: ffs_balloc.c,v 1.51 2008/07/31 05:38:06 simonb Exp $");
#if defined(_KERNEL_OPT) #if defined(_KERNEL_OPT)
#include "opt_quota.h" #include "opt_quota.h"
@ -141,7 +141,7 @@ ffs_balloc_ufs1(struct vnode *vp, off_t off, int size, kauth_cred_t cred,
if (osize < fs->fs_bsize && osize > 0) { if (osize < fs->fs_bsize && osize > 0) {
mutex_enter(&ump->um_lock); mutex_enter(&ump->um_lock);
error = ffs_realloccg(ip, nb, error = ffs_realloccg(ip, nb,
ffs_blkpref_ufs1(ip, lastlbn, nb, ffs_blkpref_ufs1(ip, lastlbn, nb, flags,
&ip->i_ffs1_db[0]), &ip->i_ffs1_db[0]),
osize, (int)fs->fs_bsize, cred, bpp, &newb); osize, (int)fs->fs_bsize, cred, bpp, &newb);
if (error) if (error)
@ -222,9 +222,9 @@ ffs_balloc_ufs1(struct vnode *vp, off_t off, int size, kauth_cred_t cred,
*/ */
mutex_enter(&ump->um_lock); mutex_enter(&ump->um_lock);
error = ffs_realloccg(ip, lbn, error = ffs_realloccg(ip, lbn,
ffs_blkpref_ufs1(ip, lbn, (int)lbn, ffs_blkpref_ufs1(ip, lbn, (int)lbn, flags,
&ip->i_ffs1_db[0]), osize, nsize, cred, &ip->i_ffs1_db[0]),
bpp, &newb); osize, nsize, cred, bpp, &newb);
if (error) if (error)
return (error); return (error);
if (DOINGSOFTDEP(vp)) if (DOINGSOFTDEP(vp))
@ -245,9 +245,9 @@ ffs_balloc_ufs1(struct vnode *vp, off_t off, int size, kauth_cred_t cred,
nsize = fs->fs_bsize; nsize = fs->fs_bsize;
mutex_enter(&ump->um_lock); mutex_enter(&ump->um_lock);
error = ffs_alloc(ip, lbn, error = ffs_alloc(ip, lbn,
ffs_blkpref_ufs1(ip, lbn, (int)lbn, ffs_blkpref_ufs1(ip, lbn, (int)lbn, flags,
&ip->i_ffs1_db[0]), &ip->i_ffs1_db[0]),
nsize, cred, &newb); nsize, flags, cred, &newb);
if (error) if (error)
return (error); return (error);
if (bpp != NULL) { if (bpp != NULL) {
@ -284,9 +284,9 @@ ffs_balloc_ufs1(struct vnode *vp, off_t off, int size, kauth_cred_t cred,
allocblk = allociblk; allocblk = allociblk;
if (nb == 0) { if (nb == 0) {
mutex_enter(&ump->um_lock); mutex_enter(&ump->um_lock);
pref = ffs_blkpref_ufs1(ip, lbn, 0, (int32_t *)0); pref = ffs_blkpref_ufs1(ip, lbn, 0, flags | B_METAONLY, NULL);
error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred, error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
&newb); flags | B_METAONLY, cred, &newb);
if (error) if (error)
goto fail; goto fail;
nb = newb; nb = newb;
@ -341,9 +341,10 @@ ffs_balloc_ufs1(struct vnode *vp, off_t off, int size, kauth_cred_t cred,
} }
mutex_enter(&ump->um_lock); mutex_enter(&ump->um_lock);
if (pref == 0) if (pref == 0)
pref = ffs_blkpref_ufs1(ip, lbn, 0, (int32_t *)0); pref = ffs_blkpref_ufs1(ip, lbn, 0, flags | B_METAONLY,
error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred, NULL);
&newb); error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
flags | B_METAONLY, cred, &newb);
if (error) { if (error) {
brelse(bp, 0); brelse(bp, 0);
goto fail; goto fail;
@ -404,8 +405,9 @@ ffs_balloc_ufs1(struct vnode *vp, off_t off, int size, kauth_cred_t cred,
goto fail; goto fail;
} }
mutex_enter(&ump->um_lock); mutex_enter(&ump->um_lock);
pref = ffs_blkpref_ufs1(ip, lbn, indirs[num].in_off, &bap[0]); pref = ffs_blkpref_ufs1(ip, lbn, indirs[num].in_off, flags,
error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred, &bap[0]);
error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, flags, cred,
&newb); &newb);
if (error) { if (error) {
brelse(bp, 0); brelse(bp, 0);
@ -619,7 +621,8 @@ ffs_balloc_ufs2(struct vnode *vp, off_t off, int size, kauth_cred_t cred,
error = ffs_realloccg(ip, -1 - nb, error = ffs_realloccg(ip, -1 - nb,
dp->di_extb[nb], dp->di_extb[nb],
ffs_blkpref_ufs2(ip, lastlbn, (int)nb, ffs_blkpref_ufs2(ip, lastlbn, (int)nb,
&dp->di_extb[0]), osize, flags, &dp->di_extb[0]),
osize,
(int)fs->fs_bsize, cred, &bp); (int)fs->fs_bsize, cred, &bp);
if (error) if (error)
return (error); return (error);
@ -679,8 +682,9 @@ ffs_balloc_ufs2(struct vnode *vp, off_t off, int size, kauth_cred_t cred,
mutex_enter(&ump->um_lock); mutex_enter(&ump->um_lock);
error = ffs_realloccg(ip, -1 - lbn, error = ffs_realloccg(ip, -1 - lbn,
dp->di_extb[lbn], dp->di_extb[lbn],
ffs_blkpref_ufs2(ip, lbn, (int)lbn, ffs_blkpref_ufs2(ip, lbn, (int)lbn, flags,
&dp->di_extb[0]), osize, nsize, cred, &bp); &dp->di_extb[0]),
osize, nsize, cred, &bp);
if (error) if (error)
return (error); return (error);
bp->b_xflags |= BX_ALTDATA; bp->b_xflags |= BX_ALTDATA;
@ -696,8 +700,9 @@ ffs_balloc_ufs2(struct vnode *vp, off_t off, int size, kauth_cred_t cred,
nsize = fs->fs_bsize; nsize = fs->fs_bsize;
mutex_enter(&ump->um_lock); mutex_enter(&ump->um_lock);
error = ffs_alloc(ip, lbn, error = ffs_alloc(ip, lbn,
ffs_blkpref_ufs2(ip, lbn, (int)lbn, &dp->di_extb[0]), ffs_blkpref_ufs2(ip, lbn, (int)lbn, flags,
nsize, cred, &newb); &dp->di_extb[0]),
nsize, flags, cred, &newb);
if (error) if (error)
return (error); return (error);
error = ffs_getblk(vp, -1 - lbn, fsbtodb(fs, newb), error = ffs_getblk(vp, -1 - lbn, fsbtodb(fs, newb),
@ -728,7 +733,7 @@ ffs_balloc_ufs2(struct vnode *vp, off_t off, int size, kauth_cred_t cred,
if (osize < fs->fs_bsize && osize > 0) { if (osize < fs->fs_bsize && osize > 0) {
mutex_enter(&ump->um_lock); mutex_enter(&ump->um_lock);
error = ffs_realloccg(ip, nb, error = ffs_realloccg(ip, nb,
ffs_blkpref_ufs2(ip, lastlbn, nb, ffs_blkpref_ufs2(ip, lastlbn, nb, flags,
&ip->i_ffs2_db[0]), &ip->i_ffs2_db[0]),
osize, (int)fs->fs_bsize, cred, bpp, &newb); osize, (int)fs->fs_bsize, cred, bpp, &newb);
if (error) if (error)
@ -809,9 +814,9 @@ ffs_balloc_ufs2(struct vnode *vp, off_t off, int size, kauth_cred_t cred,
*/ */
mutex_enter(&ump->um_lock); mutex_enter(&ump->um_lock);
error = ffs_realloccg(ip, lbn, error = ffs_realloccg(ip, lbn,
ffs_blkpref_ufs2(ip, lbn, (int)lbn, ffs_blkpref_ufs2(ip, lbn, (int)lbn, flags,
&ip->i_ffs2_db[0]), osize, nsize, cred, &ip->i_ffs2_db[0]),
bpp, &newb); osize, nsize, cred, bpp, &newb);
if (error) if (error)
return (error); return (error);
if (DOINGSOFTDEP(vp)) if (DOINGSOFTDEP(vp))
@ -832,8 +837,9 @@ ffs_balloc_ufs2(struct vnode *vp, off_t off, int size, kauth_cred_t cred,
nsize = fs->fs_bsize; nsize = fs->fs_bsize;
mutex_enter(&ump->um_lock); mutex_enter(&ump->um_lock);
error = ffs_alloc(ip, lbn, error = ffs_alloc(ip, lbn,
ffs_blkpref_ufs2(ip, lbn, (int)lbn, ffs_blkpref_ufs2(ip, lbn, (int)lbn, flags,
&ip->i_ffs2_db[0]), nsize, cred, &newb); &ip->i_ffs2_db[0]),
nsize, flags, cred, &newb);
if (error) if (error)
return (error); return (error);
if (bpp != NULL) { if (bpp != NULL) {
@ -870,9 +876,9 @@ ffs_balloc_ufs2(struct vnode *vp, off_t off, int size, kauth_cred_t cred,
allocblk = allociblk; allocblk = allociblk;
if (nb == 0) { if (nb == 0) {
mutex_enter(&ump->um_lock); mutex_enter(&ump->um_lock);
pref = ffs_blkpref_ufs2(ip, lbn, 0, (int64_t *)0); pref = ffs_blkpref_ufs2(ip, lbn, 0, flags | B_METAONLY, NULL);
error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred, error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
&newb); flags | B_METAONLY, cred, &newb);
if (error) if (error)
goto fail; goto fail;
nb = newb; nb = newb;
@ -927,9 +933,10 @@ ffs_balloc_ufs2(struct vnode *vp, off_t off, int size, kauth_cred_t cred,
} }
mutex_enter(&ump->um_lock); mutex_enter(&ump->um_lock);
if (pref == 0) if (pref == 0)
pref = ffs_blkpref_ufs2(ip, lbn, 0, (int64_t *)0); pref = ffs_blkpref_ufs2(ip, lbn, 0, flags | B_METAONLY,
error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred, NULL);
&newb); error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
flags | B_METAONLY, cred, &newb);
if (error) { if (error) {
brelse(bp, 0); brelse(bp, 0);
goto fail; goto fail;
@ -990,8 +997,9 @@ ffs_balloc_ufs2(struct vnode *vp, off_t off, int size, kauth_cred_t cred,
goto fail; goto fail;
} }
mutex_enter(&ump->um_lock); mutex_enter(&ump->um_lock);
pref = ffs_blkpref_ufs2(ip, lbn, indirs[num].in_off, &bap[0]); pref = ffs_blkpref_ufs2(ip, lbn, indirs[num].in_off, flags,
error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred, &bap[0]);
error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, flags, cred,
&newb); &newb);
if (error) { if (error) {
brelse(bp, 0); brelse(bp, 0);

View File

@ -1,4 +1,4 @@
/* $NetBSD: ffs_extern.h,v 1.66 2008/06/28 01:34:05 rumble Exp $ */ /* $NetBSD: ffs_extern.h,v 1.67 2008/07/31 05:38:06 simonb Exp $ */
/*- /*-
* Copyright (c) 1991, 1993, 1994 * Copyright (c) 1991, 1993, 1994
@ -84,9 +84,10 @@ __BEGIN_DECLS
#include <sys/param.h> #include <sys/param.h>
#include <sys/mount.h> #include <sys/mount.h>
#include <sys/wapbl.h>
/* ffs_alloc.c */ /* ffs_alloc.c */
int ffs_alloc(struct inode *, daddr_t, daddr_t , int, kauth_cred_t, int ffs_alloc(struct inode *, daddr_t, daddr_t , int, int, kauth_cred_t,
daddr_t *); daddr_t *);
int ffs_realloccg(struct inode *, daddr_t, daddr_t, int, int , int ffs_realloccg(struct inode *, daddr_t, daddr_t, int, int ,
kauth_cred_t, struct buf **, daddr_t *); kauth_cred_t, struct buf **, daddr_t *);
@ -94,8 +95,9 @@ int ffs_realloccg(struct inode *, daddr_t, daddr_t, int, int ,
int ffs_reallocblks(void *); int ffs_reallocblks(void *);
#endif #endif
int ffs_valloc(struct vnode *, int, kauth_cred_t, struct vnode **); int ffs_valloc(struct vnode *, int, kauth_cred_t, struct vnode **);
daddr_t ffs_blkpref_ufs1(struct inode *, daddr_t, int, int32_t *); daddr_t ffs_blkpref_ufs1(struct inode *, daddr_t, int, int, int32_t *);
daddr_t ffs_blkpref_ufs2(struct inode *, daddr_t, int, int64_t *); daddr_t ffs_blkpref_ufs2(struct inode *, daddr_t, int, int, int64_t *);
int ffs_blkalloc(struct inode *, daddr_t, long);
void ffs_blkfree(struct fs *, struct vnode *, daddr_t, long, ino_t); void ffs_blkfree(struct fs *, struct vnode *, daddr_t, long, ino_t);
int ffs_vfree(struct vnode *, ino_t, int); int ffs_vfree(struct vnode *, ino_t, int);
void ffs_clusteracct(struct fs *, struct cg *, int32_t, int); void ffs_clusteracct(struct fs *, struct cg *, int32_t, int);
@ -175,6 +177,17 @@ void softdep_setup_allocindir_page(struct inode *, daddr_t,
void softdep_fsync_mountdev(struct vnode *); void softdep_fsync_mountdev(struct vnode *);
int softdep_sync_metadata(struct vnode *); int softdep_sync_metadata(struct vnode *);
/* Write Ahead Physical Block Logging */
void ffs_wapbl_verify_inodes(struct mount *, const char *);
void ffs_wapbl_replay_finish(struct mount *);
int ffs_wapbl_start(struct mount *);
int ffs_wapbl_stop(struct mount *, int);
int ffs_wapbl_replay_start(struct mount *, struct fs *, struct vnode *);
void ffs_wapbl_blkalloc(struct fs *, struct vnode *, daddr_t, int);
void ffs_wapbl_sync_metadata(struct mount *, daddr_t *, int *, int);
void ffs_wapbl_abort_sync_metadata(struct mount *, daddr_t *, int *, int);
extern int (**ffs_vnodeop_p)(void *); extern int (**ffs_vnodeop_p)(void *);
extern int (**ffs_specop_p)(void *); extern int (**ffs_specop_p)(void *);
extern int (**ffs_fifoop_p)(void *); extern int (**ffs_fifoop_p)(void *);

View File

@ -1,4 +1,33 @@
/* $NetBSD: ffs_inode.c,v 1.97 2008/06/03 09:47:49 hannken Exp $ */ /* $NetBSD: ffs_inode.c,v 1.98 2008/07/31 05:38:06 simonb Exp $ */
/*-
* Copyright (c) 2008 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
* by Wasabi Systems, Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/* /*
* Copyright (c) 1982, 1986, 1989, 1993 * Copyright (c) 1982, 1986, 1989, 1993
@ -32,7 +61,7 @@
*/ */
#include <sys/cdefs.h> #include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: ffs_inode.c,v 1.97 2008/06/03 09:47:49 hannken Exp $"); __KERNEL_RCSID(0, "$NetBSD: ffs_inode.c,v 1.98 2008/07/31 05:38:06 simonb Exp $");
#if defined(_KERNEL_OPT) #if defined(_KERNEL_OPT)
#include "opt_ffs.h" #include "opt_ffs.h"
@ -41,23 +70,25 @@ __KERNEL_RCSID(0, "$NetBSD: ffs_inode.c,v 1.97 2008/06/03 09:47:49 hannken Exp $
#include <sys/param.h> #include <sys/param.h>
#include <sys/systm.h> #include <sys/systm.h>
#include <sys/mount.h>
#include <sys/proc.h>
#include <sys/file.h>
#include <sys/buf.h> #include <sys/buf.h>
#include <sys/vnode.h> #include <sys/file.h>
#include <sys/fstrans.h>
#include <sys/kauth.h>
#include <sys/kernel.h> #include <sys/kernel.h>
#include <sys/malloc.h> #include <sys/malloc.h>
#include <sys/trace.h> #include <sys/mount.h>
#include <sys/proc.h>
#include <sys/resourcevar.h> #include <sys/resourcevar.h>
#include <sys/kauth.h> #include <sys/trace.h>
#include <sys/fstrans.h> #include <sys/vnode.h>
#include <sys/wapbl.h>
#include <ufs/ufs/quota.h> #include <ufs/ufs/quota.h>
#include <ufs/ufs/inode.h> #include <ufs/ufs/inode.h>
#include <ufs/ufs/ufsmount.h> #include <ufs/ufs/ufsmount.h>
#include <ufs/ufs/ufs_extern.h> #include <ufs/ufs/ufs_extern.h>
#include <ufs/ufs/ufs_bswap.h> #include <ufs/ufs/ufs_bswap.h>
#include <ufs/ufs/ufs_wapbl.h>
#include <ufs/ffs/fs.h> #include <ufs/ffs/fs.h>
#include <ufs/ffs/ffs_extern.h> #include <ufs/ffs/ffs_extern.h>
@ -128,6 +159,17 @@ ffs_update(struct vnode *vp, const struct timespec *acc,
softdep_update_inodeblock(ip, bp, waitfor); softdep_update_inodeblock(ip, bp, waitfor);
} else if (ip->i_ffs_effnlink != ip->i_nlink) } else if (ip->i_ffs_effnlink != ip->i_nlink)
panic("ffs_update: bad link cnt"); panic("ffs_update: bad link cnt");
/* Keep unlinked inode list up to date */
KDASSERT(DIP(ip, nlink) == ip->i_nlink);
if (ip->i_mode) {
if (ip->i_nlink > 0) {
UFS_WAPBL_UNREGISTER_INODE(ip->i_ump->um_mountp,
ip->i_number, ip->i_mode);
} else {
UFS_WAPBL_REGISTER_INODE(ip->i_ump->um_mountp,
ip->i_number, ip->i_mode);
}
}
if (fs->fs_magic == FS_UFS1_MAGIC) { if (fs->fs_magic == FS_UFS1_MAGIC) {
cp = (char *)bp->b_data + cp = (char *)bp->b_data +
(ino_to_fsbo(fs, ip->i_number) * DINODE1_SIZE); (ino_to_fsbo(fs, ip->i_number) * DINODE1_SIZE);
@ -411,8 +453,13 @@ ffs_truncate(struct vnode *ovp, off_t length, int ioflag, kauth_cred_t cred)
blocksreleased += count; blocksreleased += count;
if (lastiblock[level] < 0) { if (lastiblock[level] < 0) {
DIP_ASSIGN(oip, ib[level], 0); DIP_ASSIGN(oip, ib[level], 0);
ffs_blkfree(fs, oip->i_devvp, bn, fs->fs_bsize, if (oip->i_ump->um_mountp->mnt_wapbl) {
oip->i_number); UFS_WAPBL_REGISTER_DEALLOCATION(
oip->i_ump->um_mountp,
fsbtodb(fs, bn), fs->fs_bsize);
} else
ffs_blkfree(fs, oip->i_devvp, bn,
fs->fs_bsize, oip->i_number);
blocksreleased += nblocks; blocksreleased += nblocks;
} }
} }
@ -434,7 +481,12 @@ ffs_truncate(struct vnode *ovp, off_t length, int ioflag, kauth_cred_t cred)
continue; continue;
DIP_ASSIGN(oip, db[i], 0); DIP_ASSIGN(oip, db[i], 0);
bsize = blksize(fs, oip, i); bsize = blksize(fs, oip, i);
ffs_blkfree(fs, oip->i_devvp, bn, bsize, oip->i_number); if ((oip->i_ump->um_mountp->mnt_wapbl) &&
(ovp->v_type != VREG)) {
UFS_WAPBL_REGISTER_DEALLOCATION(oip->i_ump->um_mountp,
fsbtodb(fs, bn), bsize);
} else
ffs_blkfree(fs, oip->i_devvp, bn, bsize, oip->i_number);
blocksreleased += btodb(bsize); blocksreleased += btodb(bsize);
} }
if (lastblock < 0) if (lastblock < 0)
@ -468,8 +520,14 @@ ffs_truncate(struct vnode *ovp, off_t length, int ioflag, kauth_cred_t cred)
* required for the storage we're keeping. * required for the storage we're keeping.
*/ */
bn += numfrags(fs, newspace); bn += numfrags(fs, newspace);
ffs_blkfree(fs, oip->i_devvp, bn, oldspace - newspace, if ((oip->i_ump->um_mountp->mnt_wapbl) &&
oip->i_number); (ovp->v_type != VREG)) {
UFS_WAPBL_REGISTER_DEALLOCATION(
oip->i_ump->um_mountp, fsbtodb(fs, bn),
oldspace - newspace);
} else
ffs_blkfree(fs, oip->i_devvp, bn,
oldspace - newspace, oip->i_number);
blocksreleased += btodb(oldspace - newspace); blocksreleased += btodb(oldspace - newspace);
} }
} }
@ -494,6 +552,7 @@ done:
DIP_ADD(oip, blocks, -blocksreleased); DIP_ADD(oip, blocks, -blocksreleased);
genfs_node_unlock(ovp); genfs_node_unlock(ovp);
oip->i_flag |= IN_CHANGE; oip->i_flag |= IN_CHANGE;
UFS_WAPBL_UPDATE(ovp, NULL, NULL, 0);
#ifdef QUOTA #ifdef QUOTA
(void) chkdq(oip, -blocksreleased, NOCRED, 0); (void) chkdq(oip, -blocksreleased, NOCRED, 0);
#endif #endif
@ -621,7 +680,13 @@ ffs_indirtrunc(struct inode *ip, daddr_t lbn, daddr_t dbn, daddr_t lastbn,
allerror = error; allerror = error;
blocksreleased += blkcount; blocksreleased += blkcount;
} }
ffs_blkfree(fs, ip->i_devvp, nb, fs->fs_bsize, ip->i_number); if ((ip->i_ump->um_mountp->mnt_wapbl) &&
((level > SINGLE) || (ITOV(ip)->v_type != VREG))) {
UFS_WAPBL_REGISTER_DEALLOCATION(ip->i_ump->um_mountp,
fsbtodb(fs, nb), fs->fs_bsize);
} else
ffs_blkfree(fs, ip->i_devvp, nb, fs->fs_bsize,
ip->i_number);
blocksreleased += nblocks; blocksreleased += nblocks;
} }

View File

@ -1,4 +1,33 @@
/* $NetBSD: ffs_vfsops.c,v 1.230 2008/06/28 01:34:05 rumble Exp $ */ /* $NetBSD: ffs_vfsops.c,v 1.231 2008/07/31 05:38:06 simonb Exp $ */
/*-
* Copyright (c) 2008 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
* by Wasabi Systems, Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/* /*
* Copyright (c) 1989, 1991, 1993, 1994 * Copyright (c) 1989, 1991, 1993, 1994
@ -32,12 +61,13 @@
*/ */
#include <sys/cdefs.h> #include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: ffs_vfsops.c,v 1.230 2008/06/28 01:34:05 rumble Exp $"); __KERNEL_RCSID(0, "$NetBSD: ffs_vfsops.c,v 1.231 2008/07/31 05:38:06 simonb Exp $");
#if defined(_KERNEL_OPT) #if defined(_KERNEL_OPT)
#include "opt_ffs.h" #include "opt_ffs.h"
#include "opt_quota.h" #include "opt_quota.h"
#include "opt_softdep.h" #include "opt_softdep.h"
#include "opt_wapbl.h"
#endif #endif
#include <sys/param.h> #include <sys/param.h>
@ -61,6 +91,7 @@ __KERNEL_RCSID(0, "$NetBSD: ffs_vfsops.c,v 1.230 2008/06/28 01:34:05 rumble Exp
#include <sys/sysctl.h> #include <sys/sysctl.h>
#include <sys/conf.h> #include <sys/conf.h>
#include <sys/kauth.h> #include <sys/kauth.h>
#include <sys/wapbl.h>
#include <sys/fstrans.h> #include <sys/fstrans.h>
#include <sys/module.h> #include <sys/module.h>
@ -73,6 +104,7 @@ __KERNEL_RCSID(0, "$NetBSD: ffs_vfsops.c,v 1.230 2008/06/28 01:34:05 rumble Exp
#include <ufs/ufs/dir.h> #include <ufs/ufs/dir.h>
#include <ufs/ufs/ufs_extern.h> #include <ufs/ufs/ufs_extern.h>
#include <ufs/ufs/ufs_bswap.h> #include <ufs/ufs/ufs_bswap.h>
#include <ufs/ufs/ufs_wapbl.h>
#include <ufs/ffs/fs.h> #include <ufs/ffs/fs.h>
#include <ufs/ffs/ffs_extern.h> #include <ufs/ffs/ffs_extern.h>
@ -243,11 +275,17 @@ ffs_mountroot(void)
vrele(rootvp); vrele(rootvp);
return (error); return (error);
} }
/*
* We always need to be able to mount the root file system.
*/
mp->mnt_flag |= MNT_FORCE;
if ((error = ffs_mountfs(rootvp, mp, l)) != 0) { if ((error = ffs_mountfs(rootvp, mp, l)) != 0) {
vfs_unbusy(mp, false, NULL); vfs_unbusy(mp, false, NULL);
vfs_destroy(mp); vfs_destroy(mp);
return (error); return (error);
} }
mp->mnt_flag &= ~MNT_FORCE;
mutex_enter(&mountlist_lock); mutex_enter(&mountlist_lock);
CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list); CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list);
mutex_exit(&mountlist_lock); mutex_exit(&mountlist_lock);
@ -261,6 +299,8 @@ ffs_mountroot(void)
return (0); return (0);
} }
static int dolog;
/* /*
* VFS Operations. * VFS Operations.
* *
@ -278,6 +318,9 @@ ffs_mount(struct mount *mp, const char *path, void *data, size_t *data_len)
int error = 0, flags, update; int error = 0, flags, update;
mode_t accessmode; mode_t accessmode;
if (dolog)
mp->mnt_flag |= MNT_LOG;
if (*data_len < sizeof *args) if (*data_len < sizeof *args)
return EINVAL; return EINVAL;
@ -378,13 +421,31 @@ ffs_mount(struct mount *mp, const char *path, void *data, size_t *data_len)
return (error); return (error);
} }
#ifdef WAPBL
/*
* WAPBL can only be enabled on a r/w mount
* that does not use softdep.
*/
if ((mp->mnt_flag & MNT_RDONLY) && !(mp->mnt_iflag & IMNT_WANTRDWR)) {
mp->mnt_flag &= ~MNT_LOG;
}
if ((mp->mnt_flag & (MNT_SOFTDEP | MNT_LOG)) ==
(MNT_SOFTDEP | MNT_LOG)) {
printf("%s fs is journalled, ignoring soft update mode\n",
VFSTOUFS(mp)->um_fs->fs_fsmnt);
mp->mnt_flag &= ~MNT_SOFTDEP;
}
#else /* !WAPBL */
mp->mnt_flag &= ~MNT_LOG;
#endif /* !WAPBL */
if (!update) { if (!update) {
int xflags; int xflags;
if (mp->mnt_flag & MNT_RDONLY) if (mp->mnt_flag & MNT_RDONLY)
xflags = FREAD; xflags = FREAD;
else else
xflags = FREAD|FWRITE; xflags = FREAD | FWRITE;
error = VOP_OPEN(devvp, xflags, FSCRED); error = VOP_OPEN(devvp, xflags, FSCRED);
if (error) if (error)
goto fail; goto fail;
@ -439,6 +500,8 @@ ffs_mount(struct mount *mp, const char *path, void *data, size_t *data_len)
fs->fs_pendingblocks = 0; fs->fs_pendingblocks = 0;
fs->fs_pendinginodes = 0; fs->fs_pendinginodes = 0;
} }
if (error == 0)
error = UFS_WAPBL_BEGIN(mp);
if (error == 0 && if (error == 0 &&
ffs_cgupdate(ump, MNT_WAIT) == 0 && ffs_cgupdate(ump, MNT_WAIT) == 0 &&
fs->fs_clean & FS_WASCLEAN) { fs->fs_clean & FS_WASCLEAN) {
@ -447,8 +510,24 @@ ffs_mount(struct mount *mp, const char *path, void *data, size_t *data_len)
fs->fs_clean = FS_ISCLEAN; fs->fs_clean = FS_ISCLEAN;
(void) ffs_sbupdate(ump, MNT_WAIT); (void) ffs_sbupdate(ump, MNT_WAIT);
} }
if (error == 0)
UFS_WAPBL_END(mp);
if (error) if (error)
return (error); return (error);
}
#ifdef WAPBL
if ((mp->mnt_flag & MNT_LOG) == 0) {
error = ffs_wapbl_stop(mp, mp->mnt_flag & MNT_FORCE);
if (error)
return error;
}
#endif /* WAPBL */
if (fs->fs_ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) {
/*
* Finish change from r/w to r/o
*/
fs->fs_ronly = 1; fs->fs_ronly = 1;
fs->fs_fmod = 0; fs->fs_fmod = 0;
} }
@ -508,9 +587,30 @@ ffs_mount(struct mount *mp, const char *path, void *data, size_t *data_len)
if (error) if (error)
return (error); return (error);
} }
#ifdef WAPBL
if (fs->fs_flags & FS_DOWAPBL) {
printf("%s: replaying log to disk\n",
fs->fs_fsmnt);
KDASSERT(mp->mnt_wapbl_replay);
error = wapbl_replay_write(mp->mnt_wapbl_replay,
devvp);
if (error) {
return error;
}
wapbl_replay_stop(mp->mnt_wapbl_replay);
fs->fs_clean = FS_WASCLEAN;
}
#endif /* WAPBL */
if (fs->fs_snapinum[0] != 0) if (fs->fs_snapinum[0] != 0)
ffs_snapshot_mount(mp); ffs_snapshot_mount(mp);
} }
#ifdef WAPBL
error = ffs_wapbl_start(mp);
if (error)
return error;
#endif /* WAPBL */
if (args->fspec == NULL) if (args->fspec == NULL)
return EINVAL; return EINVAL;
if ((mp->mnt_flag & (MNT_SOFTDEP | MNT_ASYNC)) == if ((mp->mnt_flag & (MNT_SOFTDEP | MNT_ASYNC)) ==
@ -531,17 +631,24 @@ ffs_mount(struct mount *mp, const char *path, void *data, size_t *data_len)
else else
fs->fs_flags &= ~FS_DOSOFTDEP; fs->fs_flags &= ~FS_DOSOFTDEP;
if (fs->fs_fmod != 0) { /* XXX */ if (fs->fs_fmod != 0) { /* XXX */
int err;
fs->fs_fmod = 0; fs->fs_fmod = 0;
if (fs->fs_clean & FS_WASCLEAN) if (fs->fs_clean & FS_WASCLEAN)
fs->fs_time = time_second; fs->fs_time = time_second;
else { else {
printf("%s: file system not clean (fs_clean=%x); please fsck(8)\n", printf("%s: file system not clean (fs_clean=%#x); "
mp->mnt_stat.f_mntfromname, fs->fs_clean); "please fsck(8)\n", mp->mnt_stat.f_mntfromname,
fs->fs_clean);
printf("%s: lost blocks %" PRId64 " files %d\n", printf("%s: lost blocks %" PRId64 " files %d\n",
mp->mnt_stat.f_mntfromname, fs->fs_pendingblocks, mp->mnt_stat.f_mntfromname, fs->fs_pendingblocks,
fs->fs_pendinginodes); fs->fs_pendinginodes);
} }
(void) ffs_cgupdate(ump, MNT_WAIT); err = UFS_WAPBL_BEGIN(mp);
if (err == 0) {
(void) ffs_cgupdate(ump, MNT_WAIT);
UFS_WAPBL_END(mp);
}
} }
return (error); return (error);
@ -659,7 +766,7 @@ ffs_reload(struct mount *mp, kauth_cred_t cred, struct lwp *l)
return (error); return (error);
} }
error = ffs_appleufs_validate(fs->fs_fsmnt, error = ffs_appleufs_validate(fs->fs_fsmnt,
(struct appleufslabel *)bp->b_data,NULL); (struct appleufslabel *)bp->b_data, NULL);
if (error == 0) if (error == 0)
ump->um_flags |= UFS_ISAPPLEUFS; ump->um_flags |= UFS_ISAPPLEUFS;
brelse(bp, 0); brelse(bp, 0);
@ -686,6 +793,17 @@ ffs_reload(struct mount *mp, kauth_cred_t cred, struct lwp *l)
ffs_oldfscompat_read(fs, ump, sblockloc); ffs_oldfscompat_read(fs, ump, sblockloc);
mutex_enter(&ump->um_lock); mutex_enter(&ump->um_lock);
ump->um_maxfilesize = fs->fs_maxfilesize; ump->um_maxfilesize = fs->fs_maxfilesize;
if (fs->fs_flags & ~(FS_KNOWN_FLAGS | FS_INTERNAL)) {
uprintf("%s: unknown ufs flags: 0x%08"PRIx32"%s\n",
mp->mnt_stat.f_mntonname, fs->fs_flags,
(mp->mnt_flag & MNT_FORCE) ? "" : ", not mounting");
if ((mp->mnt_flag & MNT_FORCE) == 0) {
mutex_exit(&ump->um_lock);
return (EINVAL);
}
}
if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) { if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
fs->fs_pendingblocks = 0; fs->fs_pendingblocks = 0;
fs->fs_pendinginodes = 0; fs->fs_pendinginodes = 0;
@ -839,6 +957,17 @@ ffs_mountfs(struct vnode *devvp, struct mount *mp, struct lwp *l)
if (error) if (error)
return error; return error;
ump = malloc(sizeof *ump, M_UFSMNT, M_WAITOK);
memset(ump, 0, sizeof *ump);
mutex_init(&ump->um_lock, MUTEX_DEFAULT, IPL_NONE);
error = ffs_snapshot_init(ump);
if (error)
goto out;
ump->um_ops = &ffs_ufsops;
#ifdef WAPBL
sbagain:
#endif
/* /*
* Try reading the superblock in each of its possible locations. * Try reading the superblock in each of its possible locations.
*/ */
@ -916,15 +1045,7 @@ ffs_mountfs(struct vnode *devvp, struct mount *mp, struct lwp *l)
fs = malloc((u_long)sbsize, M_UFSMNT, M_WAITOK); fs = malloc((u_long)sbsize, M_UFSMNT, M_WAITOK);
memcpy(fs, bp->b_data, sbsize); memcpy(fs, bp->b_data, sbsize);
ump = malloc(sizeof *ump, M_UFSMNT, M_WAITOK);
memset(ump, 0, sizeof *ump);
mutex_init(&ump->um_lock, MUTEX_DEFAULT, IPL_NONE);
error = ffs_snapshot_init(ump);
if (error)
goto out;
ump->um_fs = fs; ump->um_fs = fs;
ump->um_ops = &ffs_ufsops;
#ifdef FFS_EI #ifdef FFS_EI
if (needswap) { if (needswap) {
@ -934,9 +1055,52 @@ ffs_mountfs(struct vnode *devvp, struct mount *mp, struct lwp *l)
#endif #endif
fs->fs_flags &= ~FS_SWAPPED; fs->fs_flags &= ~FS_SWAPPED;
#ifdef WAPBL
if ((mp->mnt_wapbl_replay == 0) && (fs->fs_flags & FS_DOWAPBL)) {
error = ffs_wapbl_replay_start(mp, fs, devvp);
if (error)
goto out;
if (!ronly) {
/* XXX fsmnt may be stale. */
printf("%s: replaying log to disk\n", fs->fs_fsmnt);
error = wapbl_replay_write(mp->mnt_wapbl_replay, devvp);
if (error)
goto out;
wapbl_replay_stop(mp->mnt_wapbl_replay);
fs->fs_clean = FS_WASCLEAN;
} else {
/* XXX fsmnt may be stale */
printf("%s: replaying log to memory\n", fs->fs_fsmnt);
}
/* Force a re-read of the superblock */
brelse(bp, BC_INVAL);
bp = NULL;
free(fs, M_UFSMNT);
fs = NULL;
goto sbagain;
}
#else /* !WAPBL */
if ((fs->fs_flags & FS_DOWAPBL) && (mp->mnt_flag & MNT_FORCE) == 0) {
error = EPERM;
goto out;
}
#endif /* !WAPBL */
ffs_oldfscompat_read(fs, ump, sblockloc); ffs_oldfscompat_read(fs, ump, sblockloc);
ump->um_maxfilesize = fs->fs_maxfilesize; ump->um_maxfilesize = fs->fs_maxfilesize;
if (fs->fs_flags & ~(FS_KNOWN_FLAGS | FS_INTERNAL)) {
uprintf("%s: unknown ufs flags: 0x%08"PRIx32"%s\n",
mp->mnt_stat.f_mntonname, fs->fs_flags,
(mp->mnt_flag & MNT_FORCE) ? "" : ", not mounting");
if ((mp->mnt_flag & MNT_FORCE) == 0) {
error = EINVAL;
goto out;
}
}
if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) { if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
fs->fs_pendingblocks = 0; fs->fs_pendingblocks = 0;
fs->fs_pendinginodes = 0; fs->fs_pendinginodes = 0;
@ -966,7 +1130,7 @@ ffs_mountfs(struct vnode *devvp, struct mount *mp, struct lwp *l)
if (error) if (error)
goto out; goto out;
error = ffs_appleufs_validate(fs->fs_fsmnt, error = ffs_appleufs_validate(fs->fs_fsmnt,
(struct appleufslabel *)bp->b_data,NULL); (struct appleufslabel *)bp->b_data, NULL);
if (error == 0) { if (error == 0) {
ump->um_flags |= UFS_ISAPPLEUFS; ump->um_flags |= UFS_ISAPPLEUFS;
} }
@ -980,6 +1144,36 @@ ffs_mountfs(struct vnode *devvp, struct mount *mp, struct lwp *l)
} }
#endif #endif
#if 0
/*
* XXX This code changes the behaviour of mounting dirty filesystems, to
* XXX require "mount -f ..." to mount them. This doesn't match what
* XXX mount(8) describes and is disabled for now.
*/
/*
* If the file system is not clean, don't allow it to be mounted
* unless MNT_FORCE is specified. (Note: MNT_FORCE is always set
* for the root file system.)
*/
if (fs->fs_flags & FS_DOWAPBL) {
/*
* wapbl normally expects to be FS_WASCLEAN when the FS_DOWAPBL
* bit is set, although there's a window in unmount where it
* could be FS_ISCLEAN
*/
if ((mp->mnt_flag & MNT_FORCE) == 0 &&
(fs->fs_clean & (FS_WASCLEAN | FS_ISCLEAN)) == 0) {
error = EPERM;
goto out;
}
} else
if ((fs->fs_clean & FS_ISCLEAN) == 0 &&
(mp->mnt_flag & MNT_FORCE) == 0) {
error = EPERM;
goto out;
}
#endif
/* /*
* verify that we can access the last block in the fs * verify that we can access the last block in the fs
* if we're mounting read/write. * if we're mounting read/write.
@ -999,10 +1193,12 @@ ffs_mountfs(struct vnode *devvp, struct mount *mp, struct lwp *l)
} }
fs->fs_ronly = ronly; fs->fs_ronly = ronly;
if (ronly == 0) { /* Don't bump fs_clean if we're replaying journal */
fs->fs_clean <<= 1; if (!((fs->fs_flags & FS_DOWAPBL) && (fs->fs_clean & FS_WASCLEAN)))
fs->fs_fmod = 1; if (ronly == 0) {
} fs->fs_clean <<= 1;
fs->fs_fmod = 1;
}
size = fs->fs_cssize; size = fs->fs_cssize;
blks = howmany(size, fs->fs_fsize); blks = howmany(size, fs->fs_fsize);
if (fs->fs_contigsumsize > 0) if (fs->fs_contigsumsize > 0)
@ -1095,6 +1291,24 @@ ffs_mountfs(struct vnode *devvp, struct mount *mp, struct lwp *l)
goto out; goto out;
} }
} }
#ifdef WAPBL
if (!ronly) {
KDASSERT(fs->fs_ronly == 0);
/*
* ffs_wapbl_start() needs mp->mnt_stat initialised if it
* needs to create a new log file in-filesystem.
*/
ffs_statvfs(mp, &mp->mnt_stat);
error = ffs_wapbl_start(mp);
if (error) {
free(fs->fs_csp, M_UFSMNT);
goto out;
}
}
#endif /* WAPBL */
if (ronly == 0 && fs->fs_snapinum[0] != 0) if (ronly == 0 && fs->fs_snapinum[0] != 0)
ffs_snapshot_mount(mp); ffs_snapshot_mount(mp);
#ifdef UFS_EXTATTR #ifdef UFS_EXTATTR
@ -1115,6 +1329,15 @@ ffs_mountfs(struct vnode *devvp, struct mount *mp, struct lwp *l)
#endif /* UFS_EXTATTR */ #endif /* UFS_EXTATTR */
return (0); return (0);
out: out:
#ifdef WAPBL
if (mp->mnt_wapbl_replay) {
if (wapbl_replay_isopen(mp->mnt_wapbl_replay))
wapbl_replay_stop(mp->mnt_wapbl_replay);
wapbl_replay_free(mp->mnt_wapbl_replay);
mp->mnt_wapbl_replay = 0;
}
#endif
fstrans_unmount(mp); fstrans_unmount(mp);
if (fs) if (fs)
free(fs, M_UFSMNT); free(fs, M_UFSMNT);
@ -1175,7 +1398,7 @@ ffs_oldfscompat_read(struct fs *fs, struct ufsmount *ump, daddr_t sblockloc)
fs->fs_csaddr = fs->fs_old_csaddr; fs->fs_csaddr = fs->fs_old_csaddr;
fs->fs_sblockloc = sblockloc; fs->fs_sblockloc = sblockloc;
fs->fs_flags = fs->fs_old_flags | (fs->fs_flags & FS_INTERNAL); fs->fs_flags = fs->fs_old_flags | (fs->fs_flags & FS_INTERNAL);
if (fs->fs_old_postblformat == FS_42POSTBLFMT) { if (fs->fs_old_postblformat == FS_42POSTBLFMT) {
fs->fs_old_nrpos = 8; fs->fs_old_nrpos = 8;
@ -1256,6 +1479,9 @@ ffs_unmount(struct mount *mp, int mntflags)
struct ufsmount *ump = VFSTOUFS(mp); struct ufsmount *ump = VFSTOUFS(mp);
struct fs *fs = ump->um_fs; struct fs *fs = ump->um_fs;
int error, flags, penderr; int error, flags, penderr;
#ifdef WAPBL
extern int doforce;
#endif
penderr = 0; penderr = 0;
flags = 0; flags = 0;
@ -1284,25 +1510,42 @@ ffs_unmount(struct mount *mp, int mntflags)
penderr = 1; penderr = 1;
} }
mutex_exit(&ump->um_lock); mutex_exit(&ump->um_lock);
if (fs->fs_ronly == 0 && error = UFS_WAPBL_BEGIN(mp);
ffs_cgupdate(ump, MNT_WAIT) == 0 && if (error == 0)
fs->fs_clean & FS_WASCLEAN) { if (fs->fs_ronly == 0 &&
/* ffs_cgupdate(ump, MNT_WAIT) == 0 &&
* XXXX don't mark fs clean in the case of softdep fs->fs_clean & FS_WASCLEAN) {
* pending block errors, until they are fixed. /*
*/ * XXXX don't mark fs clean in the case of softdep
if (penderr == 0) { * pending block errors, until they are fixed.
if (mp->mnt_flag & MNT_SOFTDEP) */
fs->fs_flags &= ~FS_DOSOFTDEP; if (penderr == 0) {
fs->fs_clean = FS_ISCLEAN; if (mp->mnt_flag & MNT_SOFTDEP)
fs->fs_flags &= ~FS_DOSOFTDEP;
fs->fs_clean = FS_ISCLEAN;
}
fs->fs_fmod = 0;
(void) ffs_sbupdate(ump, MNT_WAIT);
} }
fs->fs_fmod = 0; if (error == 0)
(void) ffs_sbupdate(ump, MNT_WAIT); UFS_WAPBL_END(mp);
#ifdef WAPBL
KASSERT(!(mp->mnt_wapbl_replay && mp->mnt_wapbl));
if (mp->mnt_wapbl_replay) {
KDASSERT(fs->fs_ronly);
wapbl_replay_stop(mp->mnt_wapbl_replay);
wapbl_replay_free(mp->mnt_wapbl_replay);
mp->mnt_wapbl_replay = 0;
} }
error = ffs_wapbl_stop(mp, doforce && (mntflags & MNT_FORCE));
if (error) {
return error;
}
#endif /* WAPBL */
if (ump->um_devvp->v_type != VBAD) if (ump->um_devvp->v_type != VBAD)
ump->um_devvp->v_specmountpoint = NULL; ump->um_devvp->v_specmountpoint = NULL;
vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY); vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY);
(void)VOP_CLOSE(ump->um_devvp, fs->fs_ronly ? FREAD : FREAD|FWRITE, (void)VOP_CLOSE(ump->um_devvp, fs->fs_ronly ? FREAD : FREAD | FWRITE,
NOCRED); NOCRED);
vput(ump->um_devvp); vput(ump->um_devvp);
free(fs->fs_csp, M_UFSMNT); free(fs->fs_csp, M_UFSMNT);
@ -1335,7 +1578,7 @@ ffs_flushfiles(struct mount *mp, int flags, struct lwp *l)
#ifdef QUOTA #ifdef QUOTA
if (mp->mnt_flag & MNT_QUOTA) { if (mp->mnt_flag & MNT_QUOTA) {
int i; int i;
if ((error = vflush(mp, NULLVP, SKIPSYSTEM|flags)) != 0) if ((error = vflush(mp, NULLVP, SKIPSYSTEM | flags)) != 0)
return (error); return (error);
for (i = 0; i < MAXQUOTAS; i++) { for (i = 0; i < MAXQUOTAS; i++) {
if (ump->um_quotas[i] == NULLVP) if (ump->um_quotas[i] == NULLVP)
@ -1363,6 +1606,19 @@ ffs_flushfiles(struct mount *mp, int flags, struct lwp *l)
vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY); vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY);
error = VOP_FSYNC(ump->um_devvp, l->l_cred, FSYNC_WAIT, 0, 0); error = VOP_FSYNC(ump->um_devvp, l->l_cred, FSYNC_WAIT, 0, 0);
VOP_UNLOCK(ump->um_devvp, 0); VOP_UNLOCK(ump->um_devvp, 0);
if (flags & FORCECLOSE) /* XXXDBJ */
error = 0;
#ifdef WAPBL
if (error)
return error;
if (mp->mnt_wapbl) {
error = wapbl_flush(mp->mnt_wapbl, 1);
if (flags & FORCECLOSE)
error = 0;
}
#endif
return (error); return (error);
} }
@ -1447,10 +1703,11 @@ loop:
continue; continue;
mutex_enter(&vp->v_interlock); mutex_enter(&vp->v_interlock);
ip = VTOI(vp); ip = VTOI(vp);
if (ip == NULL || (vp->v_iflag & (VI_XLOCK|VI_CLEAN)) != 0 || /* XXXpooka: why wapbl check? */
if (ip == NULL || (vp->v_iflag & (VI_XLOCK | VI_CLEAN)) != 0 ||
vp->v_type == VNON || ((ip->i_flag & vp->v_type == VNON || ((ip->i_flag &
(IN_CHANGE | IN_UPDATE | IN_MODIFIED)) == 0 && (IN_CHANGE | IN_UPDATE | IN_MODIFIED)) == 0 &&
LIST_EMPTY(&vp->v_dirtyblkhd) && (LIST_EMPTY(&vp->v_dirtyblkhd) || (mp->mnt_wapbl)) &&
UVM_OBJ_IS_CLEAN(&vp->v_uobj))) UVM_OBJ_IS_CLEAN(&vp->v_uobj)))
{ {
mutex_exit(&vp->v_interlock); mutex_exit(&vp->v_interlock);
@ -1471,11 +1728,16 @@ loop:
} }
continue; continue;
} }
if (vp->v_type == VREG && waitfor == MNT_LAZY) if (vp->v_type == VREG && waitfor == MNT_LAZY) {
error = ffs_update(vp, NULL, NULL, 0); error = UFS_WAPBL_BEGIN(vp->v_mount);
else if (!error) {
error = VOP_FSYNC(vp, cred, error = ffs_update(vp, NULL, NULL, 0);
waitfor == MNT_WAIT ? FSYNC_WAIT : 0, 0, 0); UFS_WAPBL_END(vp->v_mount);
}
} else {
error = VOP_FSYNC(vp, cred, FSYNC_NOLOG |
(waitfor == MNT_WAIT ? FSYNC_WAIT : 0), 0, 0);
}
if (error) if (error)
allerror = error; allerror = error;
vput(vp); vput(vp);
@ -1498,10 +1760,11 @@ loop:
!LIST_EMPTY(&ump->um_devvp->v_dirtyblkhd))) { !LIST_EMPTY(&ump->um_devvp->v_dirtyblkhd))) {
vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY); vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY);
if ((error = VOP_FSYNC(ump->um_devvp, cred, if ((error = VOP_FSYNC(ump->um_devvp, cred,
waitfor == MNT_WAIT ? FSYNC_WAIT : 0, 0, 0)) != 0) (waitfor == MNT_WAIT ? FSYNC_WAIT : 0) | FSYNC_NOLOG,
0, 0)) != 0)
allerror = error; allerror = error;
VOP_UNLOCK(ump->um_devvp, 0); VOP_UNLOCK(ump->um_devvp, 0);
if (allerror == 0 && waitfor == MNT_WAIT) { if (allerror == 0 && waitfor == MNT_WAIT && !mp->mnt_wapbl) {
mutex_enter(&mntvnode_lock); mutex_enter(&mntvnode_lock);
goto loop; goto loop;
} }
@ -1515,9 +1778,24 @@ loop:
if (fs->fs_fmod != 0) { if (fs->fs_fmod != 0) {
fs->fs_fmod = 0; fs->fs_fmod = 0;
fs->fs_time = time_second; fs->fs_time = time_second;
if ((error = ffs_cgupdate(ump, waitfor))) error = UFS_WAPBL_BEGIN(mp);
if (error)
allerror = error;
else {
if ((error = ffs_cgupdate(ump, waitfor)))
allerror = error;
UFS_WAPBL_END(mp);
}
}
#ifdef WAPBL
if (mp->mnt_wapbl) {
error = wapbl_flush(mp->mnt_wapbl, 0);
if (error)
allerror = error; allerror = error;
} }
#endif
fstrans_done(mp); fstrans_done(mp);
vnfree(mvp); vnfree(mvp);
return (allerror); return (allerror);

View File

@ -1,4 +1,33 @@
/* $NetBSD: ffs_vnops.c,v 1.99 2008/04/29 18:18:09 ad Exp $ */ /* $NetBSD: ffs_vnops.c,v 1.100 2008/07/31 05:38:06 simonb Exp $ */
/*-
* Copyright (c) 2008 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
* by Wasabi Systems, Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/* /*
* Copyright (c) 1982, 1986, 1989, 1993 * Copyright (c) 1982, 1986, 1989, 1993
@ -32,7 +61,12 @@
*/ */
#include <sys/cdefs.h> #include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: ffs_vnops.c,v 1.99 2008/04/29 18:18:09 ad Exp $"); __KERNEL_RCSID(0, "$NetBSD: ffs_vnops.c,v 1.100 2008/07/31 05:38:06 simonb Exp $");
#if defined(_KERNEL_OPT)
#include "opt_ffs.h"
#include "opt_wapbl.h"
#endif
#include <sys/param.h> #include <sys/param.h>
#include <sys/systm.h> #include <sys/systm.h>
@ -48,6 +82,7 @@ __KERNEL_RCSID(0, "$NetBSD: ffs_vnops.c,v 1.99 2008/04/29 18:18:09 ad Exp $");
#include <sys/pool.h> #include <sys/pool.h>
#include <sys/signalvar.h> #include <sys/signalvar.h>
#include <sys/kauth.h> #include <sys/kauth.h>
#include <sys/wapbl.h>
#include <sys/fstrans.h> #include <sys/fstrans.h>
#include <miscfs/fifofs/fifo.h> #include <miscfs/fifofs/fifo.h>
@ -58,6 +93,7 @@ __KERNEL_RCSID(0, "$NetBSD: ffs_vnops.c,v 1.99 2008/04/29 18:18:09 ad Exp $");
#include <ufs/ufs/dir.h> #include <ufs/ufs/dir.h>
#include <ufs/ufs/ufs_extern.h> #include <ufs/ufs/ufs_extern.h>
#include <ufs/ufs/ufsmount.h> #include <ufs/ufs/ufsmount.h>
#include <ufs/ufs/ufs_wapbl.h>
#include <ufs/ffs/fs.h> #include <ufs/ffs/fs.h>
#include <ufs/ffs/ffs_extern.h> #include <ufs/ffs/ffs_extern.h>
@ -246,6 +282,9 @@ ffs_fsync(void *v)
int bsize; int bsize;
daddr_t blk_high; daddr_t blk_high;
struct vnode *vp; struct vnode *vp;
#ifdef WAPBL
struct mount *mp;
#endif
vp = ap->a_vp; vp = ap->a_vp;
@ -255,7 +294,11 @@ ffs_fsync(void *v)
*/ */
if ((ap->a_offlo == 0 && ap->a_offhi == 0) || DOINGSOFTDEP(vp) || if ((ap->a_offlo == 0 && ap->a_offhi == 0) || DOINGSOFTDEP(vp) ||
(vp->v_type != VREG)) { (vp->v_type != VREG)) {
error = ffs_full_fsync(vp, ap->a_flags); int flags = ap->a_flags;
if (vp->v_type == VBLK)
flags |= FSYNC_VFS;
error = ffs_full_fsync(vp, flags);
goto out; goto out;
} }
@ -276,6 +319,36 @@ ffs_fsync(void *v)
goto out; goto out;
} }
#ifdef WAPBL
mp = wapbl_vptomp(vp);
if (mp->mnt_wapbl) {
if (ap->a_flags & FSYNC_DATAONLY) {
fstrans_done(vp->v_mount);
return 0;
}
error = 0;
if (vp->v_tag == VT_UFS && VTOI(vp)->i_flag &
(IN_ACCESS | IN_CHANGE | IN_UPDATE | IN_MODIFY |
IN_MODIFIED | IN_ACCESSED)) {
error = UFS_WAPBL_BEGIN(mp);
if (error) {
fstrans_done(vp->v_mount);
return error;
}
error = ffs_update(vp, NULL, NULL,
(ap->a_flags & FSYNC_WAIT) ? UPDATE_WAIT : 0);
UFS_WAPBL_END(mp);
}
if (error || (ap->a_flags & FSYNC_NOLOG)) {
fstrans_done(vp->v_mount);
return error;
}
error = wapbl_flush(mp->mnt_wapbl, 0);
fstrans_done(vp->v_mount);
return error;
}
#endif /* WAPBL */
/* /*
* Then, flush indirect blocks. * Then, flush indirect blocks.
*/ */
@ -350,7 +423,7 @@ ffs_full_fsync(struct vnode *vp, int flags)
*/ */
if (vp->v_type == VREG || vp->v_type == VBLK) { if (vp->v_type == VREG || vp->v_type == VBLK) {
if ((flags & FSYNC_VFS) != 0) if ((flags & FSYNC_VFS) != 0 && vp->v_specmountpoint != NULL)
mp = vp->v_specmountpoint; mp = vp->v_specmountpoint;
else else
mp = vp->v_mount; mp = vp->v_mount;
@ -360,8 +433,55 @@ ffs_full_fsync(struct vnode *vp, int flags)
PGO_FREE : 0)); PGO_FREE : 0));
if (error) if (error)
return error; return error;
} else } else {
mp = vp->v_mount;
mutex_exit(&vp->v_interlock); mutex_exit(&vp->v_interlock);
}
#ifdef WAPBL
if (mp && mp->mnt_wapbl) {
error = 0;
if (flags & FSYNC_DATAONLY)
return error;
if (VTOI(vp) && (VTOI(vp)->i_flag &
(IN_ACCESS | IN_CHANGE | IN_UPDATE | IN_MODIFY |
IN_MODIFIED | IN_ACCESSED))) {
error = UFS_WAPBL_BEGIN(mp);
if (error)
return error;
error = ffs_update(vp, NULL, NULL,
(flags & FSYNC_WAIT) ? UPDATE_WAIT : 0);
UFS_WAPBL_END(mp);
}
if (error || (flags & FSYNC_NOLOG))
return error;
/*
* Don't flush the log if the vnode being flushed
* contains no dirty buffers that could be in the log.
*/
if (!((flags & FSYNC_RECLAIM) &&
LIST_EMPTY(&vp->v_dirtyblkhd))) {
error = wapbl_flush(mp->mnt_wapbl, 0);
if (error)
return error;
}
/*
* XXX temporary workaround for "dirty bufs" panic in
* vinvalbuf. need a full fix for the v_numoutput
* waiters issues.
*/
if (flags & FSYNC_WAIT) {
mutex_enter(&vp->v_interlock);
while (vp->v_numoutput)
cv_wait(&vp->v_cv, &vp->v_interlock);
mutex_exit(&vp->v_interlock);
}
return error;
}
#endif /* WAPBL */
passes = NIADDR + 1; passes = NIADDR + 1;
skipmeta = 0; skipmeta = 0;
@ -453,8 +573,10 @@ loop:
if (error == 0 && flags & FSYNC_CACHE) { if (error == 0 && flags & FSYNC_CACHE) {
int i = 0; int i = 0;
if ((flags & FSYNC_VFS) == 0) if ((flags & FSYNC_VFS) == 0) {
KASSERT(VTOI(vp) != NULL);
vp = VTOI(vp)->i_devvp; vp = VTOI(vp)->i_devvp;
}
VOP_IOCTL(vp, DIOCCACHESYNC, &i, FWRITE, curlwp->l_cred); VOP_IOCTL(vp, DIOCCACHESYNC, &i, FWRITE, curlwp->l_cred);
} }

858
sys/ufs/ffs/ffs_wapbl.c Normal file
View File

@ -0,0 +1,858 @@
/* $NetBSD: ffs_wapbl.c,v 1.2 2008/07/31 05:38:06 simonb Exp $ */
/*-
* Copyright (c) 2003,2006,2008 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
* by Wasabi Systems, Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: ffs_wapbl.c,v 1.2 2008/07/31 05:38:06 simonb Exp $");
#if defined(_KERNEL_OPT)
#include "opt_ffs.h"
#endif
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/vnode.h>
#include <sys/mount.h>
#include <sys/file.h>
#include <sys/disk.h>
#include <sys/disklabel.h>
#include <sys/ioctl.h>
#include <sys/errno.h>
#include <sys/kauth.h>
#include <sys/wapbl.h>
#include <ufs/ufs/inode.h>
#include <ufs/ufs/quota.h>
#include <ufs/ufs/ufsmount.h>
#include <ufs/ufs/ufs_bswap.h>
#include <ufs/ufs/ufs_extern.h>
#include <ufs/ufs/ufs_wapbl.h>
#include <ufs/ffs/fs.h>
#include <ufs/ffs/ffs_extern.h>
#undef WAPBL_DEBUG
#ifdef WAPBL_DEBUG
int ffs_wapbl_debug = 1;
#define DPRINTF(fmt, args...) \
do { \
if (ffs_wapbl_debug) \
printf("%s:%d "fmt, __func__ , __LINE__, ##args); \
} while (/* CONSTCOND */0)
#else
#define DPRINTF(fmt, args...) \
do { \
/* nothing */ \
} while (/* CONSTCOND */0)
#endif
static int wapbl_log_position(struct mount *, struct fs *, struct vnode *,
daddr_t *, size_t *, size_t *, uint64_t *);
static int wapbl_create_infs_log(struct mount *, struct fs *, struct vnode *,
daddr_t *, size_t *, size_t *, uint64_t *);
static void wapbl_find_log_start(struct mount *, struct vnode *, off_t,
daddr_t *, daddr_t *, size_t *);
static int wapbl_remove_log(struct mount *);
static int wapbl_allocate_log_file(struct mount *, struct vnode *);
/*
* This function is invoked after a log is replayed to
* disk to perform logical cleanup actions as described by
* the log
*/
void
ffs_wapbl_replay_finish(struct mount *mp)
{
struct wapbl_replay *wr = mp->mnt_wapbl_replay;
int i;
int error;
if (!wr)
return;
KDASSERT((mp->mnt_flag & MNT_RDONLY) == 0);
for (i = 0; i < wr->wr_inodescnt; i++) {
struct vnode *vp;
struct inode *ip;
error = VFS_VGET(mp, wr->wr_inodes[i].wr_inumber, &vp);
if (error) {
printf("ffs_wapbl_replay_finish: "
"unable to cleanup inode %" PRIu32 "\n",
wr->wr_inodes[i].wr_inumber);
continue;
}
ip = VTOI(vp);
KDASSERT(wr->wr_inodes[i].wr_inumber == ip->i_number);
printf("ffs_wapbl_replay_finish: "
"cleaning inode %" PRIu64 " size=%" PRIu64 " mode=%o nlink=%d\n",
ip->i_number, ip->i_size, ip->i_mode, ip->i_nlink);
KASSERT(ip->i_nlink == 0);
/*
* The journal may have left partially allocated inodes in mode
* zero. This may occur if a crash occurs betweeen the node
* allocation in ffs_nodeallocg and when the node is properly
* initialized in ufs_makeinode. If so, just dallocate them.
*/
if (ip->i_mode == 0) {
UFS_WAPBL_BEGIN(mp);
ffs_vfree(vp, ip->i_number, wr->wr_inodes[i].wr_imode);
UFS_WAPBL_END(mp);
}
vput(vp);
}
mp->mnt_wapbl_replay = 0;
wapbl_replay_free(wr);
}
/* Callback for wapbl */
void
ffs_wapbl_sync_metadata(struct mount *mp, daddr_t *deallocblks,
int *dealloclens, int dealloccnt)
{
struct ufsmount *ump = VFSTOUFS(mp);
struct fs *fs = ump->um_fs;
int i, error;
#ifdef WAPBL_DEBUG_INODES
ufs_wapbl_verify_inodes(mp, "ffs_wapbl_sync_metadata");
#endif
for (i = 0; i< dealloccnt; i++) {
/*
* blkfree errors are unreported, might silently fail
* if it cannot read the cylinder group block
*/
ffs_blkfree(fs, ump->um_devvp,
dbtofsb(fs, deallocblks[i]), dealloclens[i], -1);
}
fs->fs_fmod = 0;
fs->fs_time = time_second;
error = ffs_cgupdate(ump, 0);
KASSERT(error == 0);
}
void
ffs_wapbl_abort_sync_metadata(struct mount *mp, daddr_t *deallocblks,
int *dealloclens, int dealloccnt)
{
struct ufsmount *ump = VFSTOUFS(mp);
struct fs *fs = ump->um_fs;
int i;
/*
* I suppose we could dig around for an in use inode, but
* its not really used by ffs_blkalloc, so we just fake
* the couple of fields that it touches.
*/
struct inode in;
in.i_fs = fs;
in.i_devvp = ump->um_devvp;
in.i_dev = ump->um_dev;
in.i_number = -1;
in.i_uid = 0;
for (i = 0; i < dealloccnt; i++) {
/*
* Since the above blkfree may have failed, this blkalloc might
* fail as well, so don't check its error. Note that if the
* blkfree succeeded above, then this shouldn't fail because
* the buffer will be locked in the current transaction.
*/
ffs_blkalloc(&in, dbtofsb(fs, deallocblks[i]),
dealloclens[i]);
}
}
static int
wapbl_remove_log(struct mount *mp)
{
struct ufsmount *ump = VFSTOUFS(mp);
struct fs *fs = ump->um_fs;
struct vnode *vp;
struct inode *ip;
ino_t log_ino;
int error;
/* If all the log locators are 0, just clean up */
if (fs->fs_journallocs[0] == 0 &&
fs->fs_journallocs[1] == 0 &&
fs->fs_journallocs[2] == 0 &&
fs->fs_journallocs[3] == 0) {
DPRINTF("empty locators, just clear\n");
goto done;
}
switch (fs->fs_journal_location) {
case UFS_WAPBL_JOURNALLOC_NONE:
/* nothing! */
DPRINTF("no log\n");
break;
case UFS_WAPBL_JOURNALLOC_IN_FILESYSTEM:
log_ino = fs->fs_journallocs[UFS_WAPBL_INFS_INO];
DPRINTF("in-fs log, ino = %" PRId64 "\n",log_ino);
/* if no existing log inode, just clear all fields and bail */
if (log_ino == 0)
goto done;
error = VFS_VGET(mp, log_ino, &vp);
if (error != 0) {
printf("ffs_wapbl: vget failed %d\n",
error);
/* clear out log info on error */
goto done;
}
ip = VTOI(vp);
KASSERT(log_ino == ip->i_number);
if ((ip->i_flags & SF_LOG) == 0) {
printf("ffs_wapbl: try to clear non-log inode "
"%" PRId64 "\n", log_ino);
vput(vp);
/* clear out log info on error */
goto done;
}
/*
* remove the log inode by setting its link count back
* to zero and bail.
*/
ip->i_ffs_effnlink = 0;
ip->i_nlink = 0;
DIP_ASSIGN(ip, nlink, 0);
if (DOINGSOFTDEP(vp))
softdep_change_linkcnt(ip);
vput(vp);
case UFS_WAPBL_JOURNALLOC_END_PARTITION:
DPRINTF("end-of-partition log\n");
/* no extra work required */
break;
default:
printf("ffs_wapbl: unknown journal type %d\n",
fs->fs_journal_location);
return EINVAL;
}
done:
/* Clear out all previous knowledge of journal */
fs->fs_journal_version = 0;
fs->fs_journal_location = 0;
fs->fs_journal_flags = 0;
fs->fs_journallocs[0] = 0;
fs->fs_journallocs[1] = 0;
fs->fs_journallocs[2] = 0;
fs->fs_journallocs[3] = 0;
(void) ffs_sbupdate(ump, MNT_WAIT);
return 0;
}
int
ffs_wapbl_start(struct mount *mp)
{
struct ufsmount *ump = VFSTOUFS(mp);
struct fs *fs = ump->um_fs;
struct vnode *devvp = ump->um_devvp;
daddr_t off;
size_t count;
size_t blksize;
uint64_t extradata;
int error;
if (mp->mnt_wapbl == 0) {
if (fs->fs_journal_flags & UFS_WAPBL_FLAGS_CLEAR_LOG) {
/* Clear out any existing journal file */
error = wapbl_remove_log(mp);
if (error != 0)
return error;
}
if (mp->mnt_flag & MNT_LOG) {
KDASSERT(fs->fs_ronly == 0);
error = wapbl_log_position(mp, fs, devvp, &off,
&count, &blksize, &extradata);
if (error)
return error;
/* XXX any other consistancy checks here? */
if (blksize != DEV_BSIZE) {
printf("%s: bad blocksize %zd\n", __func__,
blksize);
return EINVAL;
}
error = wapbl_start(&mp->mnt_wapbl, mp, devvp, off,
count, blksize, mp->mnt_wapbl_replay,
ffs_wapbl_sync_metadata,
ffs_wapbl_abort_sync_metadata);
if (error)
return error;
mp->mnt_wapbl_op = &wapbl_ops;
#ifdef WAPBL_DEBUG
printf("%s: enabling logging\n", fs->fs_fsmnt);
#endif
if ((fs->fs_flags & FS_DOWAPBL) == 0) {
UFS_WAPBL_BEGIN(mp);
fs->fs_flags |= FS_DOWAPBL;
error = ffs_sbupdate(ump, MNT_WAIT);
if (error) {
UFS_WAPBL_END(mp);
ffs_wapbl_stop(mp, MNT_FORCE);
return error;
}
UFS_WAPBL_END(mp);
error = wapbl_flush(mp->mnt_wapbl, 1);
if (error) {
ffs_wapbl_stop(mp, MNT_FORCE);
return error;
}
}
} else if (fs->fs_flags & FS_DOWAPBL) {
fs->fs_fmod = 1;
fs->fs_flags &= ~FS_DOWAPBL;
}
}
/*
* It is recommended that you finish replay with logging enabled.
* However, even if logging is not enabled, the remaining log
* replay should be safely recoverable with an fsck, so perform
* it anyway.
*/
if ((fs->fs_ronly == 0) && mp->mnt_wapbl_replay) {
int saveflag = mp->mnt_flag & MNT_RDONLY;
/*
* Make sure MNT_RDONLY is not set so that the inode
* cleanup in ufs_inactive will actually do its work.
*/
mp->mnt_flag &= ~MNT_RDONLY;
ffs_wapbl_replay_finish(mp);
mp->mnt_flag |= saveflag;
KASSERT(fs->fs_ronly == 0);
}
return 0;
}
int
ffs_wapbl_stop(struct mount *mp, int force)
{
struct ufsmount *ump = VFSTOUFS(mp);
struct fs *fs = ump->um_fs;
int error;
if (mp->mnt_wapbl) {
KDASSERT(fs->fs_ronly == 0);
/*
* Make sure turning off FS_DOWAPBL is only removed
* as the only change in the final flush since otherwise
* a transaction may reorder writes.
*/
error = wapbl_flush(mp->mnt_wapbl, 1);
if (error && !force)
return error;
if (error && force)
goto forceout;
error = UFS_WAPBL_BEGIN(mp);
if (error && !force)
return error;
if (error && force)
goto forceout;
KASSERT(fs->fs_flags & FS_DOWAPBL);
fs->fs_flags &= ~FS_DOWAPBL;
error = ffs_sbupdate(ump, MNT_WAIT);
KASSERT(error == 0); /* XXX a bit drastic! */
UFS_WAPBL_END(mp);
forceout:
error = wapbl_stop(mp->mnt_wapbl, force);
if (error) {
KASSERT(!force);
fs->fs_flags |= FS_DOWAPBL;
return error;
}
fs->fs_flags &= ~FS_DOWAPBL; /* Repeat in case of forced error */
mp->mnt_wapbl = 0;
#ifdef WAPBL_DEBUG
printf("%s: disabled logging\n", fs->fs_fsmnt);
#endif
}
return 0;
}
int
ffs_wapbl_replay_start(struct mount *mp, struct fs *fs, struct vnode *devvp)
{
int error;
daddr_t off;
size_t count;
size_t blksize;
uint64_t extradata;
error = wapbl_log_position(mp, fs, devvp, &off, &count, &blksize,
&extradata);
if (error)
return error;
error = wapbl_replay_start(&mp->mnt_wapbl_replay, devvp, off,
count, blksize);
if (error)
return error;
mp->mnt_wapbl_op = &wapbl_ops;
return 0;
}
/*
* If the superblock doesn't already have a recorded journal location
* then we allocate the journal in one of two positions:
*
* - At the end of the partition after the filesystem if there's
* enough space. "Enough space" is defined as >= 1MB of journal
* per 1GB of filesystem or 64MB, whichever is smaller.
*
* - Inside the filesystem. We try to allocate a contiguous journal
* based on the total filesystem size - the target is 1MB of journal
* per 1GB of filesystem, up to a maximum journal size of 64MB. As
* a worst case allowing for fragmentation, we'll allocate a journal
* 1/4 of the desired size but never smaller than 1MB.
*
* XXX In the future if we allow for non-contiguous journal files we
* can tighten the above restrictions.
*
* XXX
* These seems like a lot of duplication both here and in some of
* the userland tools (fsck_ffs, dumpfs, tunefs) with similar
* "switch (fs_journal_location)" constructs. Can we centralise
* this sort of code somehow/somewhere?
*/
static int
wapbl_log_position(struct mount *mp, struct fs *fs, struct vnode *devvp,
daddr_t *startp, size_t *countp, size_t *blksizep, uint64_t *extradatap)
{
struct ufsmount *ump = VFSTOUFS(mp);
struct partinfo dpart;
daddr_t logstart, logend, desired_logsize;
size_t blksize;
int error;
if (fs->fs_journal_version == UFS_WAPBL_VERSION) {
switch (fs->fs_journal_location) {
case UFS_WAPBL_JOURNALLOC_END_PARTITION:
DPRINTF("found existing end-of-partition log\n");
*startp = fs->fs_journallocs[UFS_WAPBL_EPART_ADDR];
*countp = fs->fs_journallocs[UFS_WAPBL_EPART_COUNT];
*blksizep = fs->fs_journallocs[UFS_WAPBL_EPART_BLKSZ];
DPRINTF(" start = %" PRId64 ", size = %zd, "
"blksize = %zd\n", *startp, *countp, *blksizep);
return 0;
case UFS_WAPBL_JOURNALLOC_IN_FILESYSTEM:
DPRINTF("found existing in-filesystem log\n");
*startp = fs->fs_journallocs[UFS_WAPBL_INFS_ADDR];
*countp = fs->fs_journallocs[UFS_WAPBL_INFS_COUNT];
*blksizep = fs->fs_journallocs[UFS_WAPBL_INFS_BLKSZ];
DPRINTF(" start = %" PRId64 ", size = %zd, "
"blksize = %zd\n", *startp, *countp, *blksizep);
return 0;
default:
printf("ffs_wapbl: unknown journal type %d\n",
fs->fs_journal_location);
return EINVAL;
}
}
desired_logsize =
lfragtosize(fs, fs->fs_size) / UFS_WAPBL_JOURNAL_SCALE;
DPRINTF("desired log size = %" PRId64 " kB\n", desired_logsize / 1024);
desired_logsize = max(desired_logsize, UFS_WAPBL_MIN_JOURNAL_SIZE);
desired_logsize = min(desired_logsize, UFS_WAPBL_MAX_JOURNAL_SIZE);
DPRINTF("adjusted desired log size = %" PRId64 " kB\n",
desired_logsize / 1024);
/* Is there space after after filesystem on partition for log? */
logstart = fsbtodb(fs, fs->fs_size);
error = VOP_IOCTL(devvp, DIOCGPART, &dpart, FREAD, FSCRED);
if (!error) {
logend = dpart.part->p_size;
blksize = dpart.disklab->d_secsize;
} else {
struct dkwedge_info dkw;
error = VOP_IOCTL(devvp, DIOCGWEDGEINFO, &dkw, FREAD, FSCRED);
if (error)
return error;
blksize = DEV_BSIZE;
logend = dkw.dkw_size;
}
if ((logend - logstart) >= desired_logsize) {
KDASSERT(blksize != 0);
DPRINTF("enough space, use end-of-partition log\n");
*startp = logstart;
*countp = (logend - logstart);
*blksizep = blksize;
*extradatap = 0;
/* update superblock with log location */
fs->fs_journal_version = UFS_WAPBL_VERSION;
fs->fs_journal_location = UFS_WAPBL_JOURNALLOC_END_PARTITION;
fs->fs_journal_flags = 0;
fs->fs_journallocs[UFS_WAPBL_EPART_ADDR] = *startp;
fs->fs_journallocs[UFS_WAPBL_EPART_COUNT] = *countp;
fs->fs_journallocs[UFS_WAPBL_EPART_BLKSZ] = *blksizep;
fs->fs_journallocs[UFS_WAPBL_EPART_UNUSED] = *extradatap;
error = ffs_sbupdate(ump, MNT_WAIT);
return error;
}
DPRINTF("end-of-partition has only %" PRId64 " free\n",
logend - logstart);
error = wapbl_create_infs_log(mp, fs, devvp, startp, countp, blksizep,
extradatap);
ffs_sync(mp, 1, FSCRED);
return error;
}
/*
* Try to create a journal log inside the filesystem.
*/
static int
wapbl_create_infs_log(struct mount *mp, struct fs *fs, struct vnode *devvp,
daddr_t *startp, size_t *countp, size_t *blksizep, uint64_t *extradatap)
{
struct vnode *vp, *rvp;
struct inode *ip;
int error;
if ((error = VFS_ROOT(mp, &rvp)) != 0)
return error;
if ((error = UFS_VALLOC(rvp, 0 | S_IFREG, NOCRED, &vp)) != 0) {
vput(rvp);
return error;
}
vput(rvp);
vp->v_type = VREG;
ip = VTOI(vp);
ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE;
ip->i_mode = 0 | IFREG;
DIP_ASSIGN(ip, mode, ip->i_mode);
ip->i_flags = SF_LOG;
DIP_ASSIGN(ip, flags, ip->i_flags);
ip->i_ffs_effnlink = 1;
ip->i_nlink = 1;
DIP_ASSIGN(ip, nlink, 1);
if (DOINGSOFTDEP(vp))
softdep_change_linkcnt(ip);
ffs_update(vp, NULL, NULL, UPDATE_WAIT);
if ((error = wapbl_allocate_log_file(mp, vp)) != 0) {
/*
* If we couldn't allocate the space for the log file,
* remove the inode by setting its link count back to
* zero and bail.
*/
ip->i_ffs_effnlink = 0;
ip->i_nlink = 0;
DIP_ASSIGN(ip, nlink, 0);
if (DOINGSOFTDEP(vp))
softdep_change_linkcnt(ip);
vput(vp);
return error;
}
/*
* Now that we have the place-holder inode for the journal,
* we don't need the vnode ever again.
*/
vput(vp);
*startp = fs->fs_journallocs[UFS_WAPBL_INFS_ADDR];
*countp = fs->fs_journallocs[UFS_WAPBL_INFS_COUNT];
*blksizep = fs->fs_journallocs[UFS_WAPBL_INFS_BLKSZ];
*extradatap = fs->fs_journallocs[UFS_WAPBL_INFS_INO];
return 0;
}
int
wapbl_allocate_log_file(struct mount *mp, struct vnode *vp)
{
struct ufsmount *ump = VFSTOUFS(mp);
struct fs *fs = ump->um_fs;
daddr_t addr, indir_addr;
off_t logsize;
size_t size;
int error;
logsize = 0;
/* check if there's a suggested log size */
if (fs->fs_journal_flags & UFS_WAPBL_FLAGS_CREATE_LOG &&
fs->fs_journal_location == UFS_WAPBL_JOURNALLOC_IN_FILESYSTEM)
logsize = fs->fs_journallocs[UFS_WAPBL_INFS_COUNT];
if (vp->v_size > 0) {
printf("%s: file size (%" PRId64 ") non zero\n", __func__,
vp->v_size);
return EEXIST;
}
wapbl_find_log_start(mp, vp, logsize, &addr, &indir_addr, &size);
if (addr == 0) {
printf("%s: log not allocated, largest extent is "
"%" PRId64 "MB\n", __func__,
lblktosize(fs, size) / (1024 * 1024));
return ENOSPC;
}
logsize = lblktosize(fs, size); /* final log size */
VTOI(vp)->i_ffs_first_data_blk = addr;
VTOI(vp)->i_ffs_first_indir_blk = indir_addr;
error = GOP_ALLOC(vp, 0, logsize, B_CONTIG, FSCRED);
if (error) {
printf("%s: GOP_ALLOC error %d\n", __func__, error);
return error;
}
fs->fs_journal_version = UFS_WAPBL_VERSION;
fs->fs_journal_location = UFS_WAPBL_JOURNALLOC_IN_FILESYSTEM;
fs->fs_journal_flags = 0;
fs->fs_journallocs[UFS_WAPBL_INFS_ADDR] =
lfragtosize(fs, addr) / DEV_BSIZE;
fs->fs_journallocs[UFS_WAPBL_INFS_COUNT] = logsize / DEV_BSIZE;
fs->fs_journallocs[UFS_WAPBL_INFS_BLKSZ] = DEV_BSIZE;
fs->fs_journallocs[UFS_WAPBL_INFS_INO] = VTOI(vp)->i_number;
error = ffs_sbupdate(ump, MNT_WAIT);
return error;
}
/*
* Find a suitable location for the journal in the filesystem.
*
* Our strategy here is to look for a contiguous block of free space
* at least "logfile" MB in size (plus room for any indirect blocks).
* We start at the middle of the filesystem and check each cylinder
* group working outwards. If "logfile" MB is not available as a
* single contigous chunk, then return the address and size of the
* largest chunk found.
*
* XXX
* At what stage does the search fail? Is if the largest space we could
* find is less than a quarter the requested space reasonable? If the
* search fails entirely, return a block address if "0" it indicate this.
*/
static void
wapbl_find_log_start(struct mount *mp, struct vnode *vp, off_t logsize,
daddr_t *addr, daddr_t *indir_addr, size_t *size)
{
struct ufsmount *ump = VFSTOUFS(mp);
struct fs *fs = ump->um_fs;
struct vnode *devvp = ump->um_devvp;
struct cg *cgp;
struct buf *bp;
uint8_t *blksfree;
daddr_t blkno, best_addr, start_addr;
daddr_t desired_blks, min_desired_blks;
daddr_t freeblks, best_blks;
int bpcg, cg, error, fixedsize, indir_blks, n, s;
#ifdef FFS_EI
const int needswap = UFS_FSNEEDSWAP(fs);
#endif
if (logsize == 0) {
fixedsize = 0; /* We can adjust the size if tight */
logsize = lfragtosize(fs, fs->fs_dsize) /
UFS_WAPBL_JOURNAL_SCALE;
DPRINTF("suggested log size = %" PRId64 "\n", logsize);
logsize = max(logsize, UFS_WAPBL_MIN_JOURNAL_SIZE);
logsize = min(logsize, UFS_WAPBL_MAX_JOURNAL_SIZE);
DPRINTF("adjusted log size = %" PRId64 "\n", logsize);
} else {
fixedsize = 1;
DPRINTF("fixed log size = %" PRId64 "\n", logsize);
}
desired_blks = logsize / fs->fs_bsize;
DPRINTF("desired blocks = %" PRId64 "\n", desired_blks);
/* add in number of indirect blocks needed */
indir_blks = 0;
if (desired_blks >= NDADDR) {
struct indir indirs[NIADDR + 2];
int num;
error = ufs_getlbns(vp, desired_blks, indirs, &num);
if (error) {
printf("%s: ufs_getlbns failed, error %d!\n",
__func__, error);
goto bad;
}
switch (num) {
case 2:
indir_blks = 1; /* 1st level indirect */
break;
case 3:
indir_blks = 1 + /* 1st level indirect */
1 + /* 2nd level indirect */
indirs[1].in_off + 1; /* extra 1st level indirect */
break;
default:
printf("%s: unexpected numlevels %d from ufs_getlbns\n",
__func__, num);
*size = 0;
goto bad;
}
desired_blks += indir_blks;
}
DPRINTF("desired blocks = %" PRId64 " (including indirect)\n",
desired_blks);
/*
* If a specific size wasn't requested, allow for a smaller log
* if we're really tight for space...
*/
min_desired_blks = desired_blks;
if (!fixedsize)
min_desired_blks = desired_blks / 4;
/* Look at number of blocks per CG. If it's too small, bail early. */
bpcg = fragstoblks(fs, fs->fs_fpg);
if (min_desired_blks > bpcg) {
printf("ffs_wapbl: cylinder group size of %" PRId64 " MB "
" is not big enough for journal\n",
lblktosize(fs, bpcg) / (1024 * 1024));
goto bad;
}
/*
* Start with the middle cylinder group, and search outwards in
* both directions until we either find the requested log size
* or reach the start/end of the file system. If we reach the
* start/end without finding enough space for the full requested
* log size, use the largest extent found if it is large enough
* to satisfy the our minimum size.
*
* XXX
* Can we just use the cluster contigsum stuff (esp on UFS2)
* here to simplify this search code?
*/
best_addr = 0;
best_blks = 0;
for (cg = fs->fs_ncg / 2, s = 0, n = 1;
best_blks < desired_blks && cg >= 0 && cg < fs->fs_ncg;
s++, n = -n, cg += n * s) {
DPRINTF("check cg %d of %d\n", cg, fs->fs_ncg);
error = bread(devvp, fsbtodb(fs, cgtod(fs, cg)),
fs->fs_cgsize, FSCRED, 0, &bp);
cgp = (struct cg *)bp->b_data;
if (error || !cg_chkmagic(cgp, UFS_FSNEEDSWAP(fs))) {
brelse(bp, 0);
continue;
}
blksfree = cg_blksfree(cgp, needswap);
for (blkno = 0; blkno < bpcg;) {
/* look for next free block */
/* XXX use scanc() and fragtbl[] here? */
for (; blkno < bpcg - min_desired_blks; blkno++)
if (ffs_isblock(fs, blksfree, blkno))
break;
/* past end of search space in this CG? */
if (blkno >= bpcg - min_desired_blks)
break;
/* count how many free blocks in this extent */
start_addr = blkno;
for (freeblks = 0; blkno < bpcg; blkno++, freeblks++)
if (!ffs_isblock(fs, blksfree, blkno))
break;
if (freeblks > best_blks) {
best_blks = freeblks;
best_addr = blkstofrags(fs, start_addr) +
cgbase(fs, cg);
if (freeblks >= desired_blks) {
DPRINTF("found len %" PRId64
" at offset %" PRId64 " in gc\n",
freeblks, start_addr);
break;
}
}
}
brelse(bp, 0);
}
DPRINTF("best found len = %" PRId64 ", wanted %" PRId64
" at addr %" PRId64 "\n", best_blks, desired_blks, best_addr);
if (best_blks < min_desired_blks) {
*addr = 0;
*indir_addr = 0;
} else {
/* put indirect blocks at start, and data blocks after */
*addr = best_addr + blkstofrags(fs, indir_blks);
*indir_addr = best_addr;
}
*size = min(desired_blks, best_blks) - indir_blks;
return;
bad:
*addr = 0;
*indir_addr = 0;
*size = 0;
return;
}

View File

@ -1,4 +1,4 @@
/* $NetBSD: fs.h,v 1.49 2007/12/25 18:33:49 perry Exp $ */ /* $NetBSD: fs.h,v 1.50 2008/07/31 05:38:06 simonb Exp $ */
/* /*
* Copyright (c) 1982, 1986, 1993 * Copyright (c) 1982, 1986, 1993
@ -327,7 +327,12 @@ struct fs {
int32_t fs_old_cpc; /* cyl per cycle in postbl */ int32_t fs_old_cpc; /* cyl per cycle in postbl */
/* this area is otherwise allocated unless fs_old_flags & FS_FLAGS_UPDATED */ /* this area is otherwise allocated unless fs_old_flags & FS_FLAGS_UPDATED */
int32_t fs_maxbsize; /* maximum blocking factor permitted */ int32_t fs_maxbsize; /* maximum blocking factor permitted */
int64_t fs_sparecon64[17]; /* old rotation block list head */ uint8_t fs_journal_version; /* journal format version */
uint8_t fs_journal_location; /* journal location type */
uint8_t fs_journal_reserved[2];/* reserved for future use */
uint32_t fs_journal_flags; /* journal flags */
uint64_t fs_journallocs[4]; /* location info for journal */
int64_t fs_sparecon64[12]; /* reserved for future use */
int64_t fs_sblockloc; /* byte offset of standard superblock */ int64_t fs_sblockloc; /* byte offset of standard superblock */
struct csum_total fs_cstotal; /* cylinder summary information */ struct csum_total fs_cstotal; /* cylinder summary information */
int64_t fs_time; /* last time written */ int64_t fs_time; /* last time written */
@ -406,13 +411,17 @@ struct fs {
/* /*
* File system flags * File system flags
*/ */
#define FS_UNCLEAN 0x01 /* file system not clean at mount (unused) */ #define FS_UNCLEAN 0x001 /* file system not clean at mount (unused) */
#define FS_DOSOFTDEP 0x02 /* file system using soft dependencies */ #define FS_DOSOFTDEP 0x002 /* file system using soft dependencies */
#define FS_NEEDSFSCK 0x04 /* needs sync fsck (FreeBSD compat, unused) */ #define FS_NEEDSFSCK 0x004 /* needs sync fsck (FreeBSD compat, unused) */
#define FS_INDEXDIRS 0x08 /* kernel supports indexed directories */ #define FS_INDEXDIRS 0x008 /* kernel supports indexed directories */
#define FS_ACLS 0x10 /* file system has ACLs enabled */ #define FS_ACLS 0x010 /* file system has ACLs enabled */
#define FS_MULTILABEL 0x20 /* file system is MAC multi-label */ #define FS_MULTILABEL 0x020 /* file system is MAC multi-label */
#define FS_FLAGS_UPDATED 0x80 /* flags have been moved to new location */ #define FS_FLAGS_UPDATED 0x80 /* flags have been moved to new location */
#define FS_DOWAPBL 0x100 /* Write ahead physical block logging */
/* File system flags that are ok for NetBSD if set in fs_flags */
#define FS_KNOWN_FLAGS (FS_DOSOFTDEP | FS_DOWAPBL)
/* /*
* File system internal flags, also in fs_flags. * File system internal flags, also in fs_flags.

View File

@ -1,4 +1,4 @@
# $NetBSD: files.ufs,v 1.17 2007/12/12 02:56:03 lukem Exp $ # $NetBSD: files.ufs,v 1.18 2008/07/31 05:38:06 simonb Exp $
deffs fs_ffs.h FFS deffs fs_ffs.h FFS
deffs EXT2FS deffs EXT2FS
@ -34,6 +34,7 @@ file ufs/ffs/ffs_subr.c ffs | mfs | ext2fs
file ufs/ffs/ffs_tables.c ffs | mfs | ext2fs file ufs/ffs/ffs_tables.c ffs | mfs | ext2fs
file ufs/ffs/ffs_vfsops.c ffs | mfs | ext2fs file ufs/ffs/ffs_vfsops.c ffs | mfs | ext2fs
file ufs/ffs/ffs_vnops.c ffs | mfs | ext2fs file ufs/ffs/ffs_vnops.c ffs | mfs | ext2fs
file ufs/ffs/ffs_wapbl.c ffs & wapbl
file ufs/ffs/ffs_appleufs.c ffs & apple_ufs file ufs/ffs/ffs_appleufs.c ffs & apple_ufs
file ufs/lfs/lfs_alloc.c lfs file ufs/lfs/lfs_alloc.c lfs
@ -62,3 +63,4 @@ file ufs/ufs/ufs_lookup.c ffs | lfs | mfs | ext2fs
file ufs/ufs/ufs_quota.c quota & (ffs | lfs | mfs | ext2fs) file ufs/ufs/ufs_quota.c quota & (ffs | lfs | mfs | ext2fs)
file ufs/ufs/ufs_vfsops.c ffs | lfs | mfs | ext2fs file ufs/ufs/ufs_vfsops.c ffs | lfs | mfs | ext2fs
file ufs/ufs/ufs_vnops.c ffs | lfs | mfs | ext2fs file ufs/ufs/ufs_vnops.c ffs | lfs | mfs | ext2fs
file ufs/ufs/ufs_wapbl.c ffs & wapbl

View File

@ -1,8 +1,8 @@
# $NetBSD: Makefile,v 1.5 2005/12/11 12:25:28 christos Exp $ # $NetBSD: Makefile,v 1.6 2008/07/31 05:38:06 simonb Exp $
INCSDIR= /usr/include/ufs/ufs INCSDIR= /usr/include/ufs/ufs
INCS= dinode.h dir.h extattr.h inode.h quota.h ufs_bswap.h ufs_extern.h \ INCS= dinode.h dir.h extattr.h inode.h quota.h ufs_bswap.h ufs_extern.h \
ufsmount.h ufs_wapbl.h ufsmount.h
.include <bsd.kinc.mk> .include <bsd.kinc.mk>

View File

@ -1,4 +1,4 @@
/* $NetBSD: inode.h,v 1.51 2008/01/09 16:15:23 ad Exp $ */ /* $NetBSD: inode.h,v 1.52 2008/07/31 05:38:06 simonb Exp $ */
/* /*
* Copyright (c) 1982, 1989, 1993 * Copyright (c) 1982, 1989, 1993
@ -51,6 +51,9 @@
*/ */
struct ffs_inode_ext { struct ffs_inode_ext {
daddr_t *ffs_snapblklist; /* Collect expunged snapshot blocks. */ daddr_t *ffs_snapblklist; /* Collect expunged snapshot blocks. */
/* follow two fields are used by contiguous allocation code only. */
daddr_t ffs_first_data_blk; /* first indirect block on disk. */
daddr_t ffs_first_indir_blk; /* first data block on disk. */
}; };
struct ext2fs_inode_ext { struct ext2fs_inode_ext {
@ -113,6 +116,8 @@ struct inode {
struct lfs_inode_ext *lfs; struct lfs_inode_ext *lfs;
} inode_ext; } inode_ext;
#define i_snapblklist inode_ext.ffs.ffs_snapblklist #define i_snapblklist inode_ext.ffs.ffs_snapblklist
#define i_ffs_first_data_blk inode_ext.ffs.ffs_first_data_blk
#define i_ffs_first_indir_blk inode_ext.ffs.ffs_first_indir_blk
#define i_e2fs_last_lblk inode_ext.e2fs.ext2fs_last_lblk #define i_e2fs_last_lblk inode_ext.e2fs.ext2fs_last_lblk
#define i_e2fs_last_blk inode_ext.e2fs.ext2fs_last_blk #define i_e2fs_last_blk inode_ext.e2fs.ext2fs_last_blk
/* /*
@ -219,7 +224,7 @@ struct inode {
#define IN_CLEANING 0x0100 /* LFS: file is being cleaned */ #define IN_CLEANING 0x0100 /* LFS: file is being cleaned */
#define IN_ADIROP 0x0200 /* LFS: dirop in progress */ #define IN_ADIROP 0x0200 /* LFS: dirop in progress */
#define IN_SPACECOUNTED 0x0400 /* Blocks to be freed in free count. */ #define IN_SPACECOUNTED 0x0400 /* Blocks to be freed in free count. */
#define IN_PAGING 0x1000 /* LFS: file is on paging queue */ #define IN_PAGING 0x1000 /* LFS: file is on paging queue */
#if defined(_KERNEL) #if defined(_KERNEL)

View File

@ -1,4 +1,4 @@
/* $NetBSD: ufs_inode.c,v 1.75 2008/01/17 10:39:15 ad Exp $ */ /* $NetBSD: ufs_inode.c,v 1.76 2008/07/31 05:38:06 simonb Exp $ */
/* /*
* Copyright (c) 1991, 1993 * Copyright (c) 1991, 1993
@ -37,11 +37,12 @@
*/ */
#include <sys/cdefs.h> #include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: ufs_inode.c,v 1.75 2008/01/17 10:39:15 ad Exp $"); __KERNEL_RCSID(0, "$NetBSD: ufs_inode.c,v 1.76 2008/07/31 05:38:06 simonb Exp $");
#if defined(_KERNEL_OPT) #if defined(_KERNEL_OPT)
#include "opt_ffs.h" #include "opt_ffs.h"
#include "opt_quota.h" #include "opt_quota.h"
#include "opt_wapbl.h"
#endif #endif
#include <sys/param.h> #include <sys/param.h>
@ -52,12 +53,14 @@ __KERNEL_RCSID(0, "$NetBSD: ufs_inode.c,v 1.75 2008/01/17 10:39:15 ad Exp $");
#include <sys/kernel.h> #include <sys/kernel.h>
#include <sys/namei.h> #include <sys/namei.h>
#include <sys/kauth.h> #include <sys/kauth.h>
#include <sys/wapbl.h>
#include <sys/fstrans.h> #include <sys/fstrans.h>
#include <sys/kmem.h> #include <sys/kmem.h>
#include <ufs/ufs/inode.h> #include <ufs/ufs/inode.h>
#include <ufs/ufs/ufsmount.h> #include <ufs/ufs/ufsmount.h>
#include <ufs/ufs/ufs_extern.h> #include <ufs/ufs/ufs_extern.h>
#include <ufs/ufs/ufs_wapbl.h>
#ifdef UFS_DIRHASH #ifdef UFS_DIRHASH
#include <ufs/ufs/dirhash.h> #include <ufs/ufs/dirhash.h>
#endif #endif
@ -84,6 +87,9 @@ ufs_inactive(void *v)
struct mount *transmp; struct mount *transmp;
mode_t mode; mode_t mode;
int error = 0; int error = 0;
int logged = 0;
UFS_WAPBL_JUNLOCK_ASSERT(vp->v_mount);
transmp = vp->v_mount; transmp = vp->v_mount;
fstrans_start(transmp, FSTRANS_SHARED); fstrans_start(transmp, FSTRANS_SHARED);
@ -96,6 +102,10 @@ ufs_inactive(void *v)
softdep_releasefile(ip); softdep_releasefile(ip);
if (ip->i_nlink <= 0 && (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) { if (ip->i_nlink <= 0 && (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
error = UFS_WAPBL_BEGIN(vp->v_mount);
if (error)
goto out;
logged = 1;
#ifdef QUOTA #ifdef QUOTA
(void)chkiq(ip, -1, NOCRED, 0); (void)chkiq(ip, -1, NOCRED, 0);
#endif #endif
@ -103,7 +113,35 @@ ufs_inactive(void *v)
ufs_extattr_vnode_inactive(vp, curlwp); ufs_extattr_vnode_inactive(vp, curlwp);
#endif #endif
if (ip->i_size != 0) { if (ip->i_size != 0) {
error = UFS_TRUNCATE(vp, (off_t)0, 0, NOCRED); /*
* When journaling, only truncate one indirect block
* at a time
*/
if (vp->v_mount->mnt_wapbl) {
uint64_t incr = MNINDIR(ip->i_ump) <<
vp->v_mount->mnt_fs_bshift; /* Power of 2 */
uint64_t base = NDADDR <<
vp->v_mount->mnt_fs_bshift;
while (!error && ip->i_size > base + incr) {
/*
* round down to next full indirect
* block boundary.
*/
uint64_t nsize = base +
((ip->i_size - base - 1) &
~(incr - 1));
error = UFS_TRUNCATE(vp, nsize, 0,
NOCRED);
if (error)
break;
UFS_WAPBL_END(vp->v_mount);
error = UFS_WAPBL_BEGIN(vp->v_mount);
if (error)
goto out;
}
}
if (!error)
error = UFS_TRUNCATE(vp, (off_t)0, 0, NOCRED);
} }
/* /*
* Setting the mode to zero needs to wait for the inode * Setting the mode to zero needs to wait for the inode
@ -125,8 +163,16 @@ ufs_inactive(void *v)
} }
if (ip->i_flag & (IN_CHANGE | IN_UPDATE | IN_MODIFIED)) { if (ip->i_flag & (IN_CHANGE | IN_UPDATE | IN_MODIFIED)) {
if (!logged++) {
int err;
err = UFS_WAPBL_BEGIN(vp->v_mount);
if (err)
goto out;
}
UFS_UPDATE(vp, NULL, NULL, 0); UFS_UPDATE(vp, NULL, NULL, 0);
} }
if (logged)
UFS_WAPBL_END(vp->v_mount);
out: out:
/* /*
* If we are done with the inode, reclaim it * If we are done with the inode, reclaim it
@ -149,6 +195,10 @@ ufs_reclaim(struct vnode *vp)
if (prtactive && vp->v_usecount > 1) if (prtactive && vp->v_usecount > 1)
vprint("ufs_reclaim: pushing active", vp); vprint("ufs_reclaim: pushing active", vp);
if (!UFS_WAPBL_BEGIN(vp->v_mount)) {
UFS_UPDATE(vp, NULL, NULL, UPDATE_CLOSE);
UFS_WAPBL_END(vp->v_mount);
}
UFS_UPDATE(vp, NULL, NULL, UPDATE_CLOSE); UFS_UPDATE(vp, NULL, NULL, UPDATE_CLOSE);
/* /*

View File

@ -1,4 +1,4 @@
/* $NetBSD: ufs_lookup.c,v 1.98 2008/06/05 09:32:29 hannken Exp $ */ /* $NetBSD: ufs_lookup.c,v 1.99 2008/07/31 05:38:06 simonb Exp $ */
/* /*
* Copyright (c) 1989, 1993 * Copyright (c) 1989, 1993
@ -37,7 +37,7 @@
*/ */
#include <sys/cdefs.h> #include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: ufs_lookup.c,v 1.98 2008/06/05 09:32:29 hannken Exp $"); __KERNEL_RCSID(0, "$NetBSD: ufs_lookup.c,v 1.99 2008/07/31 05:38:06 simonb Exp $");
#ifdef _KERNEL_OPT #ifdef _KERNEL_OPT
#include "opt_ffs.h" #include "opt_ffs.h"
@ -53,6 +53,7 @@ __KERNEL_RCSID(0, "$NetBSD: ufs_lookup.c,v 1.98 2008/06/05 09:32:29 hannken Exp
#include <sys/vnode.h> #include <sys/vnode.h>
#include <sys/kernel.h> #include <sys/kernel.h>
#include <sys/kauth.h> #include <sys/kauth.h>
#include <sys/wapbl.h>
#include <sys/fstrans.h> #include <sys/fstrans.h>
#include <sys/proc.h> #include <sys/proc.h>
#include <sys/kmem.h> #include <sys/kmem.h>
@ -65,6 +66,7 @@ __KERNEL_RCSID(0, "$NetBSD: ufs_lookup.c,v 1.98 2008/06/05 09:32:29 hannken Exp
#include <ufs/ufs/ufsmount.h> #include <ufs/ufs/ufsmount.h>
#include <ufs/ufs/ufs_extern.h> #include <ufs/ufs/ufs_extern.h>
#include <ufs/ufs/ufs_bswap.h> #include <ufs/ufs/ufs_bswap.h>
#include <ufs/ufs/ufs_wapbl.h>
#include "fs_ffs.h" #include "fs_ffs.h"
@ -158,7 +160,7 @@ ufs_lookup(void *v)
return (error); return (error);
if ((flags & ISLASTCN) && (vdp->v_mount->mnt_flag & MNT_RDONLY) && if ((flags & ISLASTCN) && (vdp->v_mount->mnt_flag & MNT_RDONLY) &&
(cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) (nameiop == DELETE || nameiop == RENAME))
return (EROFS); return (EROFS);
/* /*
@ -495,6 +497,7 @@ found:
dp->i_size = dp->i_offset + DIRSIZ(FSFMT(vdp), ep, needswap); dp->i_size = dp->i_offset + DIRSIZ(FSFMT(vdp), ep, needswap);
DIP_ASSIGN(dp, size, dp->i_size); DIP_ASSIGN(dp, size, dp->i_size);
dp->i_flag |= IN_CHANGE | IN_UPDATE; dp->i_flag |= IN_CHANGE | IN_UPDATE;
UFS_WAPBL_UPDATE(vdp, NULL, NULL, UPDATE_DIROP);
} }
brelse(bp, 0); brelse(bp, 0);
@ -690,11 +693,12 @@ ufs_dirbadentry(struct vnode *dp, struct direct *ep, int entryoffsetinblock)
DIRSIZ(FSFMT(dp), ep, needswap) || DIRSIZ(FSFMT(dp), ep, needswap) ||
namlen > FFS_MAXNAMLEN) { namlen > FFS_MAXNAMLEN) {
/*return (1); */ /*return (1); */
printf("First bad, reclen=%x, DIRSIZ=%lu, namlen=%d, flags=%x " printf("First bad, reclen=%#x, DIRSIZ=%lu, namlen=%d, "
"entryoffsetinblock=%d, dirblksiz = %d\n", "flags=%#x, entryoffsetinblock=%d, dirblksiz = %d\n",
ufs_rw16(ep->d_reclen, needswap), ufs_rw16(ep->d_reclen, needswap),
(u_long)DIRSIZ(FSFMT(dp), ep, needswap), (u_long)DIRSIZ(FSFMT(dp), ep, needswap),
namlen, dp->v_mount->mnt_flag, entryoffsetinblock,dirblksiz); namlen, dp->v_mount->mnt_flag, entryoffsetinblock,
dirblksiz);
goto bad; goto bad;
} }
if (ep->d_ino == 0) if (ep->d_ino == 0)
@ -762,6 +766,8 @@ ufs_direnter(struct vnode *dvp, struct vnode *tvp, struct direct *dirp,
const int needswap = UFS_MPNEEDSWAP(ump); const int needswap = UFS_MPNEEDSWAP(ump);
int dirblksiz = ump->um_dirblksiz; int dirblksiz = ump->um_dirblksiz;
UFS_WAPBL_JLOCK_ASSERT(dvp->v_mount);
error = 0; error = 0;
cr = cnp->cn_cred; cr = cnp->cn_cred;
l = curlwp; l = curlwp;
@ -882,6 +888,7 @@ ufs_direnter(struct vnode *dvp, struct vnode *tvp, struct direct *dirp,
dp->i_size = dp->i_offset + dp->i_count; dp->i_size = dp->i_offset + dp->i_count;
DIP_ASSIGN(dp, size, dp->i_size); DIP_ASSIGN(dp, size, dp->i_size);
dp->i_flag |= IN_CHANGE | IN_UPDATE; dp->i_flag |= IN_CHANGE | IN_UPDATE;
UFS_WAPBL_UPDATE(dvp, NULL, NULL, UPDATE_DIROP);
} }
/* /*
* Get the block containing the space for the new directory entry. * Get the block containing the space for the new directory entry.
@ -1014,6 +1021,7 @@ ufs_direnter(struct vnode *dvp, struct vnode *tvp, struct direct *dirp,
if (DOINGSOFTDEP(dvp) && (tvp != NULL)) if (DOINGSOFTDEP(dvp) && (tvp != NULL))
vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY); vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY);
} }
UFS_WAPBL_UPDATE(dvp, NULL, NULL, UPDATE_DIROP);
return (error); return (error);
} }
@ -1040,6 +1048,8 @@ ufs_dirremove(struct vnode *dvp, struct inode *ip, int flags, int isrmdir)
const int needswap = UFS_MPNEEDSWAP(dp->i_ump); const int needswap = UFS_MPNEEDSWAP(dp->i_ump);
#endif #endif
UFS_WAPBL_JLOCK_ASSERT(dvp->v_mount);
if (flags & DOWHITEOUT) { if (flags & DOWHITEOUT) {
/* /*
* Whiteout entry: set d_ino to WINO. * Whiteout entry: set d_ino to WINO.
@ -1105,6 +1115,7 @@ out:
ip->i_nlink--; ip->i_nlink--;
DIP_ASSIGN(ip, nlink, ip->i_nlink); DIP_ASSIGN(ip, nlink, ip->i_nlink);
ip->i_flag |= IN_CHANGE; ip->i_flag |= IN_CHANGE;
UFS_WAPBL_UPDATE(ITOV(ip), NULL, NULL, 0);
} }
error = VOP_BWRITE(bp); error = VOP_BWRITE(bp);
} }
@ -1118,6 +1129,7 @@ out:
if (ip != 0 && (ip->i_flags & SF_SNAPSHOT) != 0 && if (ip != 0 && (ip->i_flags & SF_SNAPSHOT) != 0 &&
ip->i_ffs_effnlink == 0) ip->i_ffs_effnlink == 0)
ffs_snapgone(ip); ffs_snapgone(ip);
UFS_WAPBL_UPDATE(dvp, NULL, NULL, 0);
#endif #endif
return (error); return (error);
} }
@ -1151,6 +1163,7 @@ ufs_dirrewrite(struct inode *dp, struct inode *oip, ino_t newinum, int newtype,
oip->i_nlink--; oip->i_nlink--;
DIP_ASSIGN(oip, nlink, oip->i_nlink); DIP_ASSIGN(oip, nlink, oip->i_nlink);
oip->i_flag |= IN_CHANGE; oip->i_flag |= IN_CHANGE;
UFS_WAPBL_UPDATE(ITOV(oip), NULL, NULL, UPDATE_DIROP);
error = VOP_BWRITE(bp); error = VOP_BWRITE(bp);
} }
dp->i_flag |= iflags; dp->i_flag |= iflags;
@ -1162,6 +1175,7 @@ ufs_dirrewrite(struct inode *dp, struct inode *oip, ino_t newinum, int newtype,
*/ */
if ((oip->i_flags & SF_SNAPSHOT) != 0 && oip->i_ffs_effnlink == 0) if ((oip->i_flags & SF_SNAPSHOT) != 0 && oip->i_ffs_effnlink == 0)
ffs_snapgone(oip); ffs_snapgone(oip);
UFS_WAPBL_UPDATE(vdp, NULL, NULL, UPDATE_DIROP);
#endif #endif
return (error); return (error);
} }
@ -1333,8 +1347,8 @@ ufs_blkatoff(struct vnode *vp, off_t offset, char **res, struct buf **bpp,
const int bsize = 1 << bshift; const int bsize = 1 << bshift;
off_t eof; off_t eof;
blks = kmem_alloc((1+dirrablks) * sizeof(daddr_t), KM_SLEEP); blks = kmem_alloc((1 + dirrablks) * sizeof(daddr_t), KM_SLEEP);
blksizes = kmem_alloc((1+dirrablks) * sizeof(int), KM_SLEEP); blksizes = kmem_alloc((1 + dirrablks) * sizeof(int), KM_SLEEP);
ip = VTOI(vp); ip = VTOI(vp);
KASSERT(vp->v_size == ip->i_size); KASSERT(vp->v_size == ip->i_size);
GOP_SIZE(vp, vp->v_size, &eof, 0); GOP_SIZE(vp, vp->v_size, &eof, 0);
@ -1370,7 +1384,7 @@ ufs_blkatoff(struct vnode *vp, off_t offset, char **res, struct buf **bpp,
*bpp = bp; *bpp = bp;
out: out:
kmem_free(blks, (1+dirrablks) * sizeof(daddr_t)); kmem_free(blks, (1 + dirrablks) * sizeof(daddr_t));
kmem_free(blksizes, (1+dirrablks) * sizeof(int)); kmem_free(blksizes, (1 + dirrablks) * sizeof(int));
return error; return error;
} }

View File

@ -1,4 +1,4 @@
/* $NetBSD: ufs_readwrite.c,v 1.88 2008/05/16 09:22:01 hannken Exp $ */ /* $NetBSD: ufs_readwrite.c,v 1.89 2008/07/31 05:38:06 simonb Exp $ */
/*- /*-
* Copyright (c) 1993 * Copyright (c) 1993
@ -32,7 +32,7 @@
*/ */
#include <sys/cdefs.h> #include <sys/cdefs.h>
__KERNEL_RCSID(1, "$NetBSD: ufs_readwrite.c,v 1.88 2008/05/16 09:22:01 hannken Exp $"); __KERNEL_RCSID(1, "$NetBSD: ufs_readwrite.c,v 1.89 2008/07/31 05:38:06 simonb Exp $");
#ifdef LFS_READWRITE #ifdef LFS_READWRITE
#define FS struct lfs #define FS struct lfs
@ -43,6 +43,9 @@ __KERNEL_RCSID(1, "$NetBSD: ufs_readwrite.c,v 1.88 2008/05/16 09:22:01 hannken E
#define WRITE_S "lfs_write" #define WRITE_S "lfs_write"
#define fs_bsize lfs_bsize #define fs_bsize lfs_bsize
#define fs_bmask lfs_bmask #define fs_bmask lfs_bmask
#define UFS_WAPBL_BEGIN(mp) 0
#define UFS_WAPBL_END(mp) do { } while (0)
#define UFS_WAPBL_UPDATE(vp, access, modify, flags) do { } while (0)
#else #else
#define FS struct fs #define FS struct fs
#define I_FS i_fs #define I_FS i_fs
@ -177,8 +180,15 @@ READ(void *v)
out: out:
if (!(vp->v_mount->mnt_flag & MNT_NOATIME)) { if (!(vp->v_mount->mnt_flag & MNT_NOATIME)) {
ip->i_flag |= IN_ACCESS; ip->i_flag |= IN_ACCESS;
if ((ap->a_ioflag & IO_SYNC) == IO_SYNC) if ((ap->a_ioflag & IO_SYNC) == IO_SYNC) {
error = UFS_WAPBL_BEGIN(vp->v_mount);
if (error) {
fstrans_done(vp->v_mount);
return error;
}
error = UFS_UPDATE(vp, NULL, NULL, UPDATE_WAIT); error = UFS_UPDATE(vp, NULL, NULL, UPDATE_WAIT);
UFS_WAPBL_END(vp->v_mount);
}
} }
fstrans_done(vp->v_mount); fstrans_done(vp->v_mount);
@ -283,6 +293,15 @@ WRITE(void *v)
error = 0; error = 0;
usepc = vp->v_type == VREG; usepc = vp->v_type == VREG;
if ((ioflag & IO_JOURNALLOCKED) == 0) {
error = UFS_WAPBL_BEGIN(vp->v_mount);
if (error) {
fstrans_done(vp->v_mount);
return error;
}
}
#ifdef LFS_READWRITE #ifdef LFS_READWRITE
async = true; async = true;
lfs_check(vp, LFS_UNUSED_LBN, 0); lfs_check(vp, LFS_UNUSED_LBN, 0);
@ -511,8 +530,11 @@ out:
uio->uio_resid = resid; uio->uio_resid = resid;
} else if (resid > uio->uio_resid && (ioflag & IO_SYNC) == IO_SYNC) } else if (resid > uio->uio_resid && (ioflag & IO_SYNC) == IO_SYNC)
error = UFS_UPDATE(vp, NULL, NULL, UPDATE_WAIT); error = UFS_UPDATE(vp, NULL, NULL, UPDATE_WAIT);
else
UFS_WAPBL_UPDATE(vp, NULL, NULL, 0);
KASSERT(vp->v_size == ip->i_size); KASSERT(vp->v_size == ip->i_size);
if ((ioflag & IO_JOURNALLOCKED) == 0)
UFS_WAPBL_END(vp->v_mount);
fstrans_done(vp->v_mount); fstrans_done(vp->v_mount);
return (error); return (error);

View File

@ -1,4 +1,33 @@
/* $NetBSD: ufs_vnops.c,v 1.166 2008/06/02 16:00:33 ad Exp $ */ /* $NetBSD: ufs_vnops.c,v 1.167 2008/07/31 05:38:06 simonb Exp $ */
/*-
* Copyright (c) 2008 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
* by Wasabi Systems, Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/* /*
* Copyright (c) 1982, 1986, 1989, 1993, 1995 * Copyright (c) 1982, 1986, 1989, 1993, 1995
@ -37,7 +66,7 @@
*/ */
#include <sys/cdefs.h> #include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: ufs_vnops.c,v 1.166 2008/06/02 16:00:33 ad Exp $"); __KERNEL_RCSID(0, "$NetBSD: ufs_vnops.c,v 1.167 2008/07/31 05:38:06 simonb Exp $");
#if defined(_KERNEL_OPT) #if defined(_KERNEL_OPT)
#include "opt_ffs.h" #include "opt_ffs.h"
@ -60,6 +89,7 @@ __KERNEL_RCSID(0, "$NetBSD: ufs_vnops.c,v 1.166 2008/06/02 16:00:33 ad Exp $");
#include <sys/dirent.h> #include <sys/dirent.h>
#include <sys/lockf.h> #include <sys/lockf.h>
#include <sys/kauth.h> #include <sys/kauth.h>
#include <sys/wapbl.h>
#include <sys/fstrans.h> #include <sys/fstrans.h>
#include <miscfs/specfs/specdev.h> #include <miscfs/specfs/specdev.h>
@ -70,6 +100,7 @@ __KERNEL_RCSID(0, "$NetBSD: ufs_vnops.c,v 1.166 2008/06/02 16:00:33 ad Exp $");
#include <ufs/ufs/ufsmount.h> #include <ufs/ufs/ufsmount.h>
#include <ufs/ufs/ufs_bswap.h> #include <ufs/ufs/ufs_bswap.h>
#include <ufs/ufs/ufs_extern.h> #include <ufs/ufs/ufs_extern.h>
#include <ufs/ufs/ufs_wapbl.h>
#ifdef UFS_DIRHASH #ifdef UFS_DIRHASH
#include <ufs/ufs/dirhash.h> #include <ufs/ufs/dirhash.h>
#endif #endif
@ -105,13 +136,20 @@ ufs_create(void *v)
} */ *ap = v; } */ *ap = v;
int error; int error;
/*
* UFS_WAPBL_BEGIN1(dvp->v_mount, dvp) performed by successful
* ufs_makeinode
*/
fstrans_start(ap->a_dvp->v_mount, FSTRANS_SHARED); fstrans_start(ap->a_dvp->v_mount, FSTRANS_SHARED);
error = error =
ufs_makeinode(MAKEIMODE(ap->a_vap->va_type, ap->a_vap->va_mode), ufs_makeinode(MAKEIMODE(ap->a_vap->va_type, ap->a_vap->va_mode),
ap->a_dvp, ap->a_vpp, ap->a_cnp); ap->a_dvp, ap->a_vpp, ap->a_cnp);
fstrans_done(ap->a_dvp->v_mount); if (error) {
if (error) fstrans_done(ap->a_dvp->v_mount);
return (error); return (error);
}
UFS_WAPBL_END1(ap->a_dvp->v_mount, ap->a_dvp);
fstrans_done(ap->a_dvp->v_mount);
VN_KNOTE(ap->a_dvp, NOTE_WRITE); VN_KNOTE(ap->a_dvp, NOTE_WRITE);
return (0); return (0);
} }
@ -138,6 +176,11 @@ ufs_mknod(void *v)
vap = ap->a_vap; vap = ap->a_vap;
vpp = ap->a_vpp; vpp = ap->a_vpp;
/*
* UFS_WAPBL_BEGIN1(dvp->v_mount, dvp) performed by successful
* ufs_makeinode
*/
fstrans_start(ap->a_dvp->v_mount, FSTRANS_SHARED); fstrans_start(ap->a_dvp->v_mount, FSTRANS_SHARED);
if ((error = if ((error =
ufs_makeinode(MAKEIMODE(vap->va_type, vap->va_mode), ufs_makeinode(MAKEIMODE(vap->va_type, vap->va_mode),
@ -161,6 +204,8 @@ ufs_mknod(void *v)
ip->i_ffs2_rdev = ufs_rw64(vap->va_rdev, ip->i_ffs2_rdev = ufs_rw64(vap->va_rdev,
UFS_MPNEEDSWAP(ump)); UFS_MPNEEDSWAP(ump));
} }
UFS_WAPBL_UPDATE(*vpp, NULL, NULL, 0);
UFS_WAPBL_END1(ap->a_dvp->v_mount, ap->a_dvp);
/* /*
* Remove inode so that it will be reloaded by VFS_VGET and * Remove inode so that it will be reloaded by VFS_VGET and
* checked to see if it is an alias of an existing entry in * checked to see if it is an alias of an existing entry in
@ -394,8 +439,8 @@ ufs_setattr(void *v)
goto out; goto out;
} }
if (kauth_cred_geteuid(cred) != ip->i_uid && if (kauth_cred_geteuid(cred) != ip->i_uid &&
(error = kauth_authorize_generic(cred, KAUTH_GENERIC_ISSUSER, (error = kauth_authorize_generic(cred,
NULL))) KAUTH_GENERIC_ISSUSER, NULL)))
goto out; goto out;
if (kauth_authorize_generic(cred, KAUTH_GENERIC_ISSUSER, if (kauth_authorize_generic(cred, KAUTH_GENERIC_ISSUSER,
NULL) == 0) { NULL) == 0) {
@ -411,6 +456,9 @@ ufs_setattr(void *v)
error = EPERM; error = EPERM;
goto out; goto out;
} }
error = UFS_WAPBL_BEGIN(vp->v_mount);
if (error)
goto out;
ip->i_flags = vap->va_flags; ip->i_flags = vap->va_flags;
DIP_ASSIGN(ip, flags, ip->i_flags); DIP_ASSIGN(ip, flags, ip->i_flags);
} else { } else {
@ -424,11 +472,16 @@ ufs_setattr(void *v)
error = EPERM; error = EPERM;
goto out; goto out;
} }
error = UFS_WAPBL_BEGIN(vp->v_mount);
if (error)
goto out;
ip->i_flags &= SF_SETTABLE; ip->i_flags &= SF_SETTABLE;
ip->i_flags |= (vap->va_flags & UF_SETTABLE); ip->i_flags |= (vap->va_flags & UF_SETTABLE);
DIP_ASSIGN(ip, flags, ip->i_flags); DIP_ASSIGN(ip, flags, ip->i_flags);
} }
ip->i_flag |= IN_CHANGE; ip->i_flag |= IN_CHANGE;
UFS_WAPBL_UPDATE(vp, NULL, NULL, 0);
UFS_WAPBL_END(vp->v_mount);
if (vap->va_flags & (IMMUTABLE | APPEND)) { if (vap->va_flags & (IMMUTABLE | APPEND)) {
error = 0; error = 0;
goto out; goto out;
@ -446,7 +499,11 @@ ufs_setattr(void *v)
error = EROFS; error = EROFS;
goto out; goto out;
} }
error = UFS_WAPBL_BEGIN(vp->v_mount);
if (error)
goto out;
error = ufs_chown(vp, vap->va_uid, vap->va_gid, cred, l); error = ufs_chown(vp, vap->va_uid, vap->va_gid, cred, l);
UFS_WAPBL_END(vp->v_mount);
if (error) if (error)
goto out; goto out;
} }
@ -466,14 +523,46 @@ ufs_setattr(void *v)
break; break;
case VREG: case VREG:
if (vp->v_mount->mnt_flag & MNT_RDONLY) { if (vp->v_mount->mnt_flag & MNT_RDONLY) {
error = EROFS; error = EROFS;
goto out; goto out;
} }
if ((ip->i_flags & SF_SNAPSHOT) != 0) { if ((ip->i_flags & SF_SNAPSHOT) != 0) {
error = EPERM; error = EPERM;
goto out; goto out;
} }
error = UFS_TRUNCATE(vp, vap->va_size, 0, cred); error = UFS_WAPBL_BEGIN(vp->v_mount);
if (error)
goto out;
/*
* When journaling, only truncate one indirect block
* at a time.
*/
if (vp->v_mount->mnt_wapbl) {
uint64_t incr = MNINDIR(ip->i_ump) <<
vp->v_mount->mnt_fs_bshift; /* Power of 2 */
uint64_t base = NDADDR <<
vp->v_mount->mnt_fs_bshift;
while (!error && ip->i_size > base + incr &&
ip->i_size > vap->va_size + incr) {
/*
* round down to next full indirect
* block boundary.
*/
uint64_t nsize = base +
((ip->i_size - base - 1) &
~(incr - 1));
error = UFS_TRUNCATE(vp, nsize, 0,
cred);
if (error == 0) {
UFS_WAPBL_END(vp->v_mount);
error =
UFS_WAPBL_BEGIN(vp->v_mount);
}
}
}
if (!error)
error = UFS_TRUNCATE(vp, vap->va_size, 0, cred);
UFS_WAPBL_END(vp->v_mount);
if (error) if (error)
goto out; goto out;
break; break;
@ -494,11 +583,14 @@ ufs_setattr(void *v)
goto out; goto out;
} }
if (kauth_cred_geteuid(cred) != ip->i_uid && if (kauth_cred_geteuid(cred) != ip->i_uid &&
(error = kauth_authorize_generic(cred, KAUTH_GENERIC_ISSUSER, (error = kauth_authorize_generic(cred,
NULL)) && KAUTH_GENERIC_ISSUSER, NULL)) &&
((vap->va_vaflags & VA_UTIMES_NULL) == 0 || ((vap->va_vaflags & VA_UTIMES_NULL) == 0 ||
(error = VOP_ACCESS(vp, VWRITE, cred)))) (error = VOP_ACCESS(vp, VWRITE, cred))))
goto out; goto out;
error = UFS_WAPBL_BEGIN(vp->v_mount);
if (error)
goto out;
if (vap->va_atime.tv_sec != VNOVAL) if (vap->va_atime.tv_sec != VNOVAL)
if (!(vp->v_mount->mnt_flag & MNT_NOATIME)) if (!(vp->v_mount->mnt_flag & MNT_NOATIME))
ip->i_flag |= IN_ACCESS; ip->i_flag |= IN_ACCESS;
@ -510,6 +602,7 @@ ufs_setattr(void *v)
ip->i_ffs2_birthnsec = vap->va_birthtime.tv_nsec; ip->i_ffs2_birthnsec = vap->va_birthtime.tv_nsec;
} }
error = UFS_UPDATE(vp, &vap->va_atime, &vap->va_mtime, 0); error = UFS_UPDATE(vp, &vap->va_atime, &vap->va_mtime, 0);
UFS_WAPBL_END(vp->v_mount);
if (error) if (error)
goto out; goto out;
} }
@ -525,7 +618,11 @@ ufs_setattr(void *v)
error = EPERM; error = EPERM;
goto out; goto out;
} }
error = UFS_WAPBL_BEGIN(vp->v_mount);
if (error)
goto out;
error = ufs_chmod(vp, (int)vap->va_mode, cred, l); error = ufs_chmod(vp, (int)vap->va_mode, cred, l);
UFS_WAPBL_END(vp->v_mount);
} }
VN_KNOTE(vp, NOTE_ATTRIB); VN_KNOTE(vp, NOTE_ATTRIB);
out: out:
@ -543,6 +640,8 @@ ufs_chmod(struct vnode *vp, int mode, kauth_cred_t cred, struct lwp *l)
struct inode *ip; struct inode *ip;
int error, ismember = 0; int error, ismember = 0;
UFS_WAPBL_JLOCK_ASSERT(vp->v_mount);
ip = VTOI(vp); ip = VTOI(vp);
if (kauth_cred_geteuid(cred) != ip->i_uid && if (kauth_cred_geteuid(cred) != ip->i_uid &&
(error = kauth_authorize_generic(cred, KAUTH_GENERIC_ISSUSER, NULL))) (error = kauth_authorize_generic(cred, KAUTH_GENERIC_ISSUSER, NULL)))
@ -558,6 +657,7 @@ ufs_chmod(struct vnode *vp, int mode, kauth_cred_t cred, struct lwp *l)
ip->i_mode |= (mode & ALLPERMS); ip->i_mode |= (mode & ALLPERMS);
ip->i_flag |= IN_CHANGE; ip->i_flag |= IN_CHANGE;
DIP_ASSIGN(ip, mode, ip->i_mode); DIP_ASSIGN(ip, mode, ip->i_mode);
UFS_WAPBL_UPDATE(vp, NULL, NULL, 0);
return (0); return (0);
} }
@ -626,6 +726,7 @@ ufs_chown(struct vnode *vp, uid_t uid, gid_t gid, kauth_cred_t cred,
good: good:
#endif /* QUOTA */ #endif /* QUOTA */
ip->i_flag |= IN_CHANGE; ip->i_flag |= IN_CHANGE;
UFS_WAPBL_UPDATE(vp, NULL, NULL, 0);
return (0); return (0);
} }
@ -649,8 +750,13 @@ ufs_remove(void *v)
if (vp->v_type == VDIR || (ip->i_flags & (IMMUTABLE | APPEND)) || if (vp->v_type == VDIR || (ip->i_flags & (IMMUTABLE | APPEND)) ||
(VTOI(dvp)->i_flags & APPEND)) (VTOI(dvp)->i_flags & APPEND))
error = EPERM; error = EPERM;
else else {
error = ufs_dirremove(dvp, ip, ap->a_cnp->cn_flags, 0); error = UFS_WAPBL_BEGIN(dvp->v_mount);
if (error == 0) {
error = ufs_dirremove(dvp, ip, ap->a_cnp->cn_flags, 0);
UFS_WAPBL_END(dvp->v_mount);
}
}
VN_KNOTE(vp, NOTE_DELETE); VN_KNOTE(vp, NOTE_DELETE);
VN_KNOTE(dvp, NOTE_WRITE); VN_KNOTE(dvp, NOTE_WRITE);
if (dvp == vp) if (dvp == vp)
@ -720,6 +826,11 @@ ufs_link(void *v)
error = EPERM; error = EPERM;
goto out1; goto out1;
} }
error = UFS_WAPBL_BEGIN(vp->v_mount);
if (error) {
VOP_ABORTOP(dvp, cnp);
goto out1;
}
ip->i_ffs_effnlink++; ip->i_ffs_effnlink++;
ip->i_nlink++; ip->i_nlink++;
DIP_ASSIGN(ip, nlink, ip->i_nlink); DIP_ASSIGN(ip, nlink, ip->i_nlink);
@ -738,10 +849,12 @@ ufs_link(void *v)
ip->i_nlink--; ip->i_nlink--;
DIP_ASSIGN(ip, nlink, ip->i_nlink); DIP_ASSIGN(ip, nlink, ip->i_nlink);
ip->i_flag |= IN_CHANGE; ip->i_flag |= IN_CHANGE;
UFS_WAPBL_UPDATE(vp, NULL, NULL, UPDATE_DIROP);
if (DOINGSOFTDEP(vp)) if (DOINGSOFTDEP(vp))
softdep_change_linkcnt(ip); softdep_change_linkcnt(ip);
} }
PNBUF_PUT(cnp->cn_pnbuf); PNBUF_PUT(cnp->cn_pnbuf);
UFS_WAPBL_END(vp->v_mount);
out1: out1:
if (dvp != vp) if (dvp != vp)
VOP_UNLOCK(vp, 0); VOP_UNLOCK(vp, 0);
@ -865,6 +978,11 @@ ufs_rename(void *v)
struct direct *newdir; struct direct *newdir;
int doingdirectory, oldparent, newparent, error; int doingdirectory, oldparent, newparent, error;
#ifdef WAPBL
if (ap->a_tdvp->v_mount->mnt_wapbl)
return wapbl_ufs_rename(v);
#endif
tvp = ap->a_tvp; tvp = ap->a_tvp;
tdvp = ap->a_tdvp; tdvp = ap->a_tdvp;
fvp = ap->a_fvp; fvp = ap->a_fvp;
@ -1297,6 +1415,9 @@ ufs_mkdir(void *v)
*/ */
if ((error = UFS_VALLOC(dvp, dmode, cnp->cn_cred, ap->a_vpp)) != 0) if ((error = UFS_VALLOC(dvp, dmode, cnp->cn_cred, ap->a_vpp)) != 0)
goto out; goto out;
error = UFS_WAPBL_BEGIN(ap->a_dvp->v_mount);
if (error)
goto out;
tvp = *ap->a_vpp; tvp = *ap->a_vpp;
ip = VTOI(tvp); ip = VTOI(tvp);
ip->i_uid = kauth_cred_geteuid(cnp->cn_cred); ip->i_uid = kauth_cred_geteuid(cnp->cn_cred);
@ -1307,6 +1428,7 @@ ufs_mkdir(void *v)
if ((error = chkiq(ip, 1, cnp->cn_cred, 0))) { if ((error = chkiq(ip, 1, cnp->cn_cred, 0))) {
PNBUF_PUT(cnp->cn_pnbuf); PNBUF_PUT(cnp->cn_pnbuf);
UFS_VFREE(tvp, ip->i_number, dmode); UFS_VFREE(tvp, ip->i_number, dmode);
UFS_WAPBL_END(dvp->v_mount);
fstrans_done(dvp->v_mount); fstrans_done(dvp->v_mount);
vput(tvp); vput(tvp);
vput(dvp); vput(dvp);
@ -1412,11 +1534,13 @@ ufs_mkdir(void *v)
bad: bad:
if (error == 0) { if (error == 0) {
VN_KNOTE(dvp, NOTE_WRITE | NOTE_LINK); VN_KNOTE(dvp, NOTE_WRITE | NOTE_LINK);
UFS_WAPBL_END(dvp->v_mount);
} else { } else {
dp->i_ffs_effnlink--; dp->i_ffs_effnlink--;
dp->i_nlink--; dp->i_nlink--;
DIP_ASSIGN(dp, nlink, dp->i_nlink); DIP_ASSIGN(dp, nlink, dp->i_nlink);
dp->i_flag |= IN_CHANGE; dp->i_flag |= IN_CHANGE;
UFS_WAPBL_UPDATE(dvp, NULL, NULL, UPDATE_DIROP);
if (DOINGSOFTDEP(dvp)) if (DOINGSOFTDEP(dvp))
softdep_change_linkcnt(dp); softdep_change_linkcnt(dp);
/* /*
@ -1431,8 +1555,10 @@ ufs_mkdir(void *v)
/* If IN_ADIROP, account for it */ /* If IN_ADIROP, account for it */
lfs_unmark_vnode(tvp); lfs_unmark_vnode(tvp);
#endif #endif
UFS_WAPBL_UPDATE(tvp, NULL, NULL, UPDATE_DIROP);
if (DOINGSOFTDEP(tvp)) if (DOINGSOFTDEP(tvp))
softdep_change_linkcnt(ip); softdep_change_linkcnt(ip);
UFS_WAPBL_END(dvp->v_mount);
vput(tvp); vput(tvp);
} }
out: out:
@ -1496,6 +1622,9 @@ ufs_rmdir(void *v)
error = EPERM; error = EPERM;
goto out; goto out;
} }
error = UFS_WAPBL_BEGIN(dvp->v_mount);
if (error)
goto out;
/* /*
* Delete reference to directory before purging * Delete reference to directory before purging
* inode. If we crash in between, the directory * inode. If we crash in between, the directory
@ -1515,6 +1644,7 @@ ufs_rmdir(void *v)
softdep_change_linkcnt(dp); softdep_change_linkcnt(dp);
softdep_change_linkcnt(ip); softdep_change_linkcnt(ip);
} }
UFS_WAPBL_END(dvp->v_mount);
goto out; goto out;
} }
VN_KNOTE(dvp, NOTE_WRITE | NOTE_LINK); VN_KNOTE(dvp, NOTE_WRITE | NOTE_LINK);
@ -1531,6 +1661,7 @@ ufs_rmdir(void *v)
dp->i_ffs_effnlink--; dp->i_ffs_effnlink--;
DIP_ASSIGN(dp, nlink, dp->i_nlink); DIP_ASSIGN(dp, nlink, dp->i_nlink);
dp->i_flag |= IN_CHANGE; dp->i_flag |= IN_CHANGE;
UFS_WAPBL_UPDATE(dvp, NULL, NULL, UPDATE_DIROP);
ip->i_nlink--; ip->i_nlink--;
ip->i_ffs_effnlink--; ip->i_ffs_effnlink--;
DIP_ASSIGN(ip, nlink, ip->i_nlink); DIP_ASSIGN(ip, nlink, ip->i_nlink);
@ -1538,6 +1669,11 @@ ufs_rmdir(void *v)
error = UFS_TRUNCATE(vp, (off_t)0, IO_SYNC, cnp->cn_cred); error = UFS_TRUNCATE(vp, (off_t)0, IO_SYNC, cnp->cn_cred);
} }
cache_purge(vp); cache_purge(vp);
/*
* Unlock the log while we still have reference to unlinked
* directory vp so that it will not get locked for recycling
*/
UFS_WAPBL_END(dvp->v_mount);
#ifdef UFS_DIRHASH #ifdef UFS_DIRHASH
if (ip->i_dirhash != NULL) if (ip->i_dirhash != NULL)
ufsdirhash_free(ip); ufsdirhash_free(ip);
@ -1576,6 +1712,10 @@ ufs_symlink(void *v)
int len, error; int len, error;
vpp = ap->a_vpp; vpp = ap->a_vpp;
/*
* UFS_WAPBL_BEGIN1(dvp->v_mount, dvp) performed by successful
* ufs_makeinode
*/
fstrans_start(ap->a_dvp->v_mount, FSTRANS_SHARED); fstrans_start(ap->a_dvp->v_mount, FSTRANS_SHARED);
error = ufs_makeinode(IFLNK | ap->a_vap->va_mode, ap->a_dvp, error = ufs_makeinode(IFLNK | ap->a_vap->va_mode, ap->a_dvp,
vpp, ap->a_cnp); vpp, ap->a_cnp);
@ -1591,10 +1731,12 @@ ufs_symlink(void *v)
DIP_ASSIGN(ip, size, len); DIP_ASSIGN(ip, size, len);
uvm_vnp_setsize(vp, ip->i_size); uvm_vnp_setsize(vp, ip->i_size);
ip->i_flag |= IN_CHANGE | IN_UPDATE; ip->i_flag |= IN_CHANGE | IN_UPDATE;
UFS_WAPBL_UPDATE(vp, NULL, NULL, 0);
} else } else
error = vn_rdwr(UIO_WRITE, vp, ap->a_target, len, (off_t)0, error = vn_rdwr(UIO_WRITE, vp, ap->a_target, len, (off_t)0,
UIO_SYSSPACE, IO_NODELOCKED, ap->a_cnp->cn_cred, NULL, UIO_SYSSPACE, IO_NODELOCKED | IO_JOURNALLOCKED,
NULL); ap->a_cnp->cn_cred, NULL, NULL);
UFS_WAPBL_END1(ap->a_dvp->v_mount, ap->a_dvp);
if (error) if (error)
vput(vp); vput(vp);
out: out:
@ -2096,6 +2238,8 @@ ufs_makeinode(int mode, struct vnode *dvp, struct vnode **vpp,
struct vnode *tvp; struct vnode *tvp;
int error, ismember = 0; int error, ismember = 0;
UFS_WAPBL_JUNLOCK_ASSERT(dvp->v_mount);
pdir = VTOI(dvp); pdir = VTOI(dvp);
#ifdef DIAGNOSTIC #ifdef DIAGNOSTIC
if ((cnp->cn_flags & HASBUF) == 0) if ((cnp->cn_flags & HASBUF) == 0)
@ -2115,9 +2259,22 @@ ufs_makeinode(int mode, struct vnode *dvp, struct vnode **vpp,
DIP_ASSIGN(ip, gid, ip->i_gid); DIP_ASSIGN(ip, gid, ip->i_gid);
ip->i_uid = kauth_cred_geteuid(cnp->cn_cred); ip->i_uid = kauth_cred_geteuid(cnp->cn_cred);
DIP_ASSIGN(ip, uid, ip->i_uid); DIP_ASSIGN(ip, uid, ip->i_uid);
error = UFS_WAPBL_BEGIN1(dvp->v_mount, dvp);
if (error) {
/*
* Note, we can't VOP_VFREE(tvp) here like we should
* because we can't write to the disk. Instead, we leave
* the vnode dangling from the journal.
*/
vput(tvp);
PNBUF_PUT(cnp->cn_pnbuf);
vput(dvp);
return (error);
}
#ifdef QUOTA #ifdef QUOTA
if ((error = chkiq(ip, 1, cnp->cn_cred, 0))) { if ((error = chkiq(ip, 1, cnp->cn_cred, 0))) {
UFS_VFREE(tvp, ip->i_number, mode); UFS_VFREE(tvp, ip->i_number, mode);
UFS_WAPBL_END1(dvp->v_mount, dvp);
vput(tvp); vput(tvp);
PNBUF_PUT(cnp->cn_pnbuf); PNBUF_PUT(cnp->cn_pnbuf);
vput(dvp); vput(dvp);
@ -2175,9 +2332,11 @@ ufs_makeinode(int mode, struct vnode *dvp, struct vnode **vpp,
/* If IN_ADIROP, account for it */ /* If IN_ADIROP, account for it */
lfs_unmark_vnode(tvp); lfs_unmark_vnode(tvp);
#endif #endif
UFS_WAPBL_UPDATE(tvp, NULL, NULL, 0);
if (DOINGSOFTDEP(tvp)) if (DOINGSOFTDEP(tvp))
softdep_change_linkcnt(ip); softdep_change_linkcnt(ip);
tvp->v_type = VNON; /* explodes later if VBLK */ tvp->v_type = VNON; /* explodes later if VBLK */
UFS_WAPBL_END1(dvp->v_mount, dvp);
vput(tvp); vput(tvp);
PNBUF_PUT(cnp->cn_pnbuf); PNBUF_PUT(cnp->cn_pnbuf);
vput(dvp); vput(dvp);
@ -2228,7 +2387,8 @@ ufs_gop_alloc(struct vnode *vp, off_t off, off_t len, int flags,
} }
out: out:
return error; UFS_WAPBL_UPDATE(vp, NULL, NULL, 0);
return error;
} }
void void

805
sys/ufs/ufs/ufs_wapbl.c Normal file
View File

@ -0,0 +1,805 @@
/* $NetBSD: ufs_wapbl.c,v 1.2 2008/07/31 05:38:06 simonb Exp $ */
/*-
* Copyright (c) 2003,2006,2008 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
* by Wasabi Systems, Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/*
* Copyright (c) 1982, 1986, 1989, 1993, 1995
* The Regents of the University of California. All rights reserved.
* (c) UNIX System Laboratories, Inc.
* All or some portions of this file are derived from material licensed
* to the University of California by American Telephone and Telegraph
* Co. or Unix System Laboratories, Inc. and are reproduced herein with
* the permission of UNIX System Laboratories, Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)ufs_vnops.c 8.28 (Berkeley) 7/31/95
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: ufs_wapbl.c,v 1.2 2008/07/31 05:38:06 simonb Exp $");
#if defined(_KERNEL_OPT)
#include "opt_quota.h"
#include "fs_lfs.h"
#endif
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/namei.h>
#include <sys/resourcevar.h>
#include <sys/kernel.h>
#include <sys/file.h>
#include <sys/stat.h>
#include <sys/buf.h>
#include <sys/proc.h>
#include <sys/mount.h>
#include <sys/vnode.h>
#include <sys/malloc.h>
#include <sys/dirent.h>
#include <sys/lockf.h>
#include <sys/kauth.h>
#include <sys/wapbl.h>
#include <sys/fstrans.h>
#include <miscfs/specfs/specdev.h>
#include <miscfs/fifofs/fifo.h>
#include <ufs/ufs/quota.h>
#include <ufs/ufs/inode.h>
#include <ufs/ufs/dir.h>
#include <ufs/ufs/ufsmount.h>
#include <ufs/ufs/ufs_bswap.h>
#include <ufs/ufs/ufs_extern.h>
#include <ufs/ufs/ufs_wapbl.h>
#include <ufs/ext2fs/ext2fs_extern.h>
#include <ufs/lfs/lfs_extern.h>
#include <uvm/uvm.h>
/* XXX following lifted from ufs_lookup.c */
#define FSFMT(vp) (((vp)->v_mount->mnt_iflag & IMNT_DTYPE) == 0)
/*
* A virgin directory (no blushing please).
*/
static const struct dirtemplate mastertemplate = {
0, 12, DT_DIR, 1, ".",
0, DIRBLKSIZ - 12, DT_DIR, 2, ".."
};
/*
* Rename vnode operation
* rename("foo", "bar");
* is essentially
* unlink("bar");
* link("foo", "bar");
* unlink("foo");
* but ``atomically''. Can't do full commit without saving state in the
* inode on disk which isn't feasible at this time. Best we can do is
* always guarantee the target exists.
*
* Basic algorithm is:
*
* 1) Bump link count on source while we're linking it to the
* target. This also ensure the inode won't be deleted out
* from underneath us while we work (it may be truncated by
* a concurrent `trunc' or `open' for creation).
* 2) Link source to destination. If destination already exists,
* delete it first.
* 3) Unlink source reference to inode if still around. If a
* directory was moved and the parent of the destination
* is different from the source, patch the ".." entry in the
* directory.
*
* WAPBL NOTE: wapbl_ufs_rename derived from ufs_rename in ufs_vnops.c
* ufs_vnops.c netbsd cvs revision 1.108
* which has the berkeley copyright above
* changes introduced to ufs_rename since netbsd cvs revision 1.164
* will need to be ported into wapbl_ufs_rename
*/
int
wapbl_ufs_rename(void *v)
{
struct vop_rename_args /* {
struct vnode *a_fdvp;
struct vnode *a_fvp;
struct componentname *a_fcnp;
struct vnode *a_tdvp;
struct vnode *a_tvp;
struct componentname *a_tcnp;
} */ *ap = v;
struct vnode *tvp, *tdvp, *fvp, *fdvp;
struct componentname *tcnp, *fcnp;
struct inode *ip, *txp, *fxp, *tdp, *fdp;
struct mount *mp;
struct direct *newdir;
int doingdirectory, oldparent, newparent, error;
int32_t saved_f_count;
doff_t saved_f_diroff;
doff_t saved_f_offset;
u_int32_t saved_f_reclen;
int32_t saved_t_count;
doff_t saved_t_endoff;
doff_t saved_t_diroff;
doff_t saved_t_offset;
u_int32_t saved_t_reclen;
tvp = ap->a_tvp;
tdvp = ap->a_tdvp;
fvp = ap->a_fvp;
fdvp = ap->a_fdvp;
tcnp = ap->a_tcnp;
fcnp = ap->a_fcnp;
doingdirectory = oldparent = newparent = error = 0;
#ifdef DIAGNOSTIC
if ((tcnp->cn_flags & HASBUF) == 0 ||
(fcnp->cn_flags & HASBUF) == 0)
panic("ufs_rename: no name");
#endif
/*
* Check for cross-device rename.
*/
if ((fvp->v_mount != tdvp->v_mount) ||
(tvp && (fvp->v_mount != tvp->v_mount))) {
error = EXDEV;
abortit:
VOP_ABORTOP(tdvp, tcnp); /* XXX, why not in NFS? */
if (tdvp == tvp)
vrele(tdvp);
else
vput(tdvp);
if (tvp)
vput(tvp);
VOP_ABORTOP(fdvp, fcnp); /* XXX, why not in NFS? */
vrele(fdvp);
vrele(fvp);
return (error);
}
/*
* Check if just deleting a link name.
*/
if (tvp && ((VTOI(tvp)->i_flags & (IMMUTABLE | APPEND)) ||
(VTOI(tdvp)->i_flags & APPEND))) {
error = EPERM;
goto abortit;
}
if (fvp == tvp) {
if (fvp->v_type == VDIR) {
error = EINVAL;
goto abortit;
}
/* Release destination completely. */
VOP_ABORTOP(tdvp, tcnp);
vput(tdvp);
vput(tvp);
/* Delete source. */
vrele(fvp);
fcnp->cn_flags &= ~(MODMASK | SAVESTART);
fcnp->cn_flags |= LOCKPARENT | LOCKLEAF;
fcnp->cn_nameiop = DELETE;
vn_lock(fdvp, LK_EXCLUSIVE | LK_RETRY);
if ((error = relookup(fdvp, &fvp, fcnp))) {
vput(fdvp);
return (error);
}
return (VOP_REMOVE(fdvp, fvp, fcnp));
}
if ((error = vn_lock(fvp, LK_EXCLUSIVE)) != 0)
goto abortit;
fdp = VTOI(fdvp);
ip = VTOI(fvp);
if ((nlink_t) ip->i_nlink >= LINK_MAX) {
VOP_UNLOCK(fvp, 0);
error = EMLINK;
goto abortit;
}
if ((ip->i_flags & (IMMUTABLE | APPEND)) ||
(fdp->i_flags & APPEND)) {
VOP_UNLOCK(fvp, 0);
error = EPERM;
goto abortit;
}
if ((ip->i_mode & IFMT) == IFDIR) {
/*
* Avoid ".", "..", and aliases of "." for obvious reasons.
*/
if ((fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.') ||
fdp == ip ||
(fcnp->cn_flags & ISDOTDOT) ||
(tcnp->cn_flags & ISDOTDOT) ||
(ip->i_flag & IN_RENAME)) {
VOP_UNLOCK(fvp, 0);
error = EINVAL;
goto abortit;
}
ip->i_flag |= IN_RENAME;
doingdirectory = 1;
}
oldparent = fdp->i_number;
VN_KNOTE(fdvp, NOTE_WRITE); /* XXXLUKEM/XXX: right place? */
/*
* When the target exists, both the directory
* and target vnodes are returned locked.
*/
tdp = VTOI(tdvp);
txp = NULL;
if (tvp)
txp = VTOI(tvp);
mp = fdvp->v_mount;
fstrans_start(mp, FSTRANS_SHARED);
/*
* If ".." must be changed (ie the directory gets a new
* parent) then the source directory must not be in the
* directory hierarchy above the target, as this would
* orphan everything below the source directory. Also
* the user must have write permission in the source so
* as to be able to change "..". We must repeat the call
* to namei, as the parent directory is unlocked by the
* call to checkpath().
*/
error = VOP_ACCESS(fvp, VWRITE, tcnp->cn_cred);
VOP_UNLOCK(fvp, 0);
if (oldparent != tdp->i_number)
newparent = tdp->i_number;
if (doingdirectory && newparent) {
if (error) /* write access check above */
goto out;
if (txp != NULL)
vput(tvp);
txp = NULL;
vref(tdvp); /* compensate for the ref checkpath loses */
if ((error = ufs_checkpath(ip, tdp, tcnp->cn_cred)) != 0) {
vrele(tdvp);
tdp = NULL;
goto out;
}
tcnp->cn_flags &= ~SAVESTART;
tdp = NULL;
vn_lock(tdvp, LK_EXCLUSIVE | LK_RETRY);
error = relookup(tdvp, &tvp, tcnp);
if (error != 0) {
vput(tdvp);
goto out;
}
tdp = VTOI(tdvp);
if (tvp)
txp = VTOI(tvp);
}
/*
* XXX handle case where fdvp is parent of tdvp,
* by unlocking tdvp and regrabbing it with vget after?
*/
/* save directory lookup information in case tdvp == fdvp */
saved_t_count = tdp->i_count;
saved_t_endoff = tdp->i_endoff;
saved_t_diroff = tdp->i_diroff;
saved_t_offset = tdp->i_offset;
saved_t_reclen = tdp->i_reclen;
/*
* This was moved up to before the journal lock to
* avoid potential deadlock
*/
fcnp->cn_flags &= ~(MODMASK | SAVESTART);
fcnp->cn_flags |= LOCKPARENT | LOCKLEAF;
if (newparent) {
vn_lock(fdvp, LK_EXCLUSIVE | LK_RETRY);
if ((error = relookup(fdvp, &fvp, fcnp))) {
vput(fdvp);
vrele(ap->a_fvp);
goto out2;
}
} else {
error = VOP_LOOKUP(fdvp, &fvp, fcnp);
if (error && (error != EJUSTRETURN)) {
vrele(ap->a_fvp);
goto out2;
}
error = 0;
}
if (fvp != NULL) {
fxp = VTOI(fvp);
fdp = VTOI(fdvp);
} else {
/*
* From name has disappeared.
*/
if (doingdirectory)
panic("rename: lost dir entry");
vrele(ap->a_fvp);
error = ENOENT; /* XXX ufs_rename sets "0" here */
goto out2;
}
vrele(ap->a_fvp);
/* save directory lookup information in case tdvp == fdvp */
saved_f_count = fdp->i_count;
saved_f_diroff = fdp->i_diroff;
saved_f_offset = fdp->i_offset;
saved_f_reclen = fdp->i_reclen;
/* restore directory lookup information in case tdvp == fdvp */
tdp->i_offset = saved_t_offset;
tdp->i_reclen = saved_t_reclen;
tdp->i_count = saved_t_count;
tdp->i_endoff = saved_t_endoff;
tdp->i_diroff = saved_t_diroff;
error = UFS_WAPBL_BEGIN(fdvp->v_mount);
if (error)
goto out2;
/*
* 1) Bump link count while we're moving stuff
* around. If we crash somewhere before
* completing our work, the link count
* may be wrong, but correctable.
*/
ip->i_ffs_effnlink++;
ip->i_nlink++;
DIP_ASSIGN(ip, nlink, ip->i_nlink);
ip->i_flag |= IN_CHANGE;
if (DOINGSOFTDEP(fvp))
softdep_change_linkcnt(ip);
if ((error = UFS_UPDATE(fvp, NULL, NULL, UPDATE_DIROP)) != 0) {
goto bad;
}
/*
* 2) If target doesn't exist, link the target
* to the source and unlink the source.
* Otherwise, rewrite the target directory
* entry to reference the source inode and
* expunge the original entry's existence.
*/
if (txp == NULL) {
if (tdp->i_dev != ip->i_dev)
panic("rename: EXDEV");
/*
* Account for ".." in new directory.
* When source and destination have the same
* parent we don't fool with the link count.
*/
if (doingdirectory && newparent) {
if ((nlink_t)tdp->i_nlink >= LINK_MAX) {
error = EMLINK;
goto bad;
}
tdp->i_ffs_effnlink++;
tdp->i_nlink++;
DIP_ASSIGN(tdp, nlink, tdp->i_nlink);
tdp->i_flag |= IN_CHANGE;
if (DOINGSOFTDEP(tdvp))
softdep_change_linkcnt(tdp);
if ((error = UFS_UPDATE(tdvp, NULL, NULL,
UPDATE_DIROP)) != 0) {
tdp->i_ffs_effnlink--;
tdp->i_nlink--;
DIP_ASSIGN(tdp, nlink, tdp->i_nlink);
tdp->i_flag |= IN_CHANGE;
if (DOINGSOFTDEP(tdvp))
softdep_change_linkcnt(tdp);
goto bad;
}
}
newdir = pool_cache_get(ufs_direct_cache, PR_WAITOK);
ufs_makedirentry(ip, tcnp, newdir);
error = ufs_direnter(tdvp, NULL, newdir, tcnp, NULL);
pool_cache_put(ufs_direct_cache, newdir);
if (error != 0) {
if (doingdirectory && newparent) {
tdp->i_ffs_effnlink--;
tdp->i_nlink--;
DIP_ASSIGN(tdp, nlink, tdp->i_nlink);
tdp->i_flag |= IN_CHANGE;
if (DOINGSOFTDEP(tdvp))
softdep_change_linkcnt(tdp);
(void)UFS_UPDATE(tdvp, NULL, NULL,
UPDATE_WAIT | UPDATE_DIROP);
}
goto bad;
}
VN_KNOTE(tdvp, NOTE_WRITE);
} else {
if (txp->i_dev != tdp->i_dev || txp->i_dev != ip->i_dev)
panic("rename: EXDEV");
/*
* Short circuit rename(foo, foo).
*/
if (txp->i_number == ip->i_number)
panic("rename: same file");
/*
* If the parent directory is "sticky", then the user must
* own the parent directory, or the destination of the rename,
* otherwise the destination may not be changed (except by
* root). This implements append-only directories.
*/
if ((tdp->i_mode & S_ISTXT) &&
kauth_authorize_generic(tcnp->cn_cred,
KAUTH_GENERIC_ISSUSER, NULL) != 0 &&
kauth_cred_geteuid(tcnp->cn_cred) != tdp->i_uid &&
txp->i_uid != kauth_cred_geteuid(tcnp->cn_cred)) {
error = EPERM;
goto bad;
}
/*
* Target must be empty if a directory and have no links
* to it. Also, ensure source and target are compatible
* (both directories, or both not directories).
*/
if ((txp->i_mode & IFMT) == IFDIR) {
if (txp->i_ffs_effnlink > 2 ||
!ufs_dirempty(txp, tdp->i_number, tcnp->cn_cred)) {
error = ENOTEMPTY;
goto bad;
}
if (!doingdirectory) {
error = ENOTDIR;
goto bad;
}
cache_purge(tdvp);
} else if (doingdirectory) {
error = EISDIR;
goto bad;
}
if ((error = ufs_dirrewrite(tdp, txp, ip->i_number,
IFTODT(ip->i_mode), doingdirectory && newparent ?
newparent : doingdirectory, IN_CHANGE | IN_UPDATE)) != 0)
goto bad;
if (doingdirectory) {
if (!newparent) {
tdp->i_ffs_effnlink--;
if (DOINGSOFTDEP(tdvp))
softdep_change_linkcnt(tdp);
}
txp->i_ffs_effnlink--;
if (DOINGSOFTDEP(tvp))
softdep_change_linkcnt(txp);
}
if (doingdirectory && !DOINGSOFTDEP(tvp)) {
/*
* Truncate inode. The only stuff left in the directory
* is "." and "..". The "." reference is inconsequential
* since we are quashing it. We have removed the "."
* reference and the reference in the parent directory,
* but there may be other hard links. The soft
* dependency code will arrange to do these operations
* after the parent directory entry has been deleted on
* disk, so when running with that code we avoid doing
* them now.
*/
if (!newparent) {
tdp->i_nlink--;
DIP_ASSIGN(tdp, nlink, tdp->i_nlink);
tdp->i_flag |= IN_CHANGE;
UFS_WAPBL_UPDATE(tdvp, NULL, NULL, 0);
}
txp->i_nlink--;
DIP_ASSIGN(txp, nlink, txp->i_nlink);
txp->i_flag |= IN_CHANGE;
if ((error = UFS_TRUNCATE(tvp, (off_t)0, IO_SYNC,
tcnp->cn_cred)))
goto bad;
}
VN_KNOTE(tdvp, NOTE_WRITE);
VN_KNOTE(tvp, NOTE_DELETE);
}
/* restore directory lookup information in case tdvp == fdvp */
fdp->i_offset = saved_f_offset;
fdp->i_reclen = saved_f_reclen;
fdp->i_count = saved_f_count;
fdp->i_diroff = saved_f_diroff;
/*
* Handle case where the directory we need to remove may have
* been moved when the directory insertion above performed compaction.
* or when i_count may be wrong due to insertion before this entry.
*/
if ((tdp->i_number == fdp->i_number) &&
(((saved_f_offset >= saved_t_offset) &&
(saved_f_offset < saved_t_offset + saved_t_count)) ||
((saved_f_offset - saved_f_count >= saved_t_offset) &&
(saved_f_offset - saved_f_count <
saved_t_offset + saved_t_count)))) {
struct buf *bp;
struct direct *ep;
struct ufsmount *ump = fdp->i_ump;
doff_t endsearch; /* offset to end directory search */
int dirblksiz = ump->um_dirblksiz;
const int needswap = UFS_MPNEEDSWAP(ump);
u_long bmask;
int namlen, entryoffsetinblock;
char *dirbuf;
bmask = fdvp->v_mount->mnt_stat.f_iosize - 1;
/*
* the fcnp entry will be somewhere between the start of
* compaction and the original location.
*/
fdp->i_offset = saved_t_offset;
error = ufs_blkatoff(fdvp, (off_t)fdp->i_offset, &dirbuf, &bp,
false);
if (error)
goto bad;
/*
* keep existing fdp->i_count in case
* compaction started at the same location as the fcnp entry.
*/
endsearch = saved_f_offset + saved_f_reclen;
entryoffsetinblock = 0;
while (fdp->i_offset < endsearch) {
int reclen;
/*
* If necessary, get the next directory block.
*/
if ((fdp->i_offset & bmask) == 0) {
if (bp != NULL)
brelse(bp, 0);
error = ufs_blkatoff(fdvp, (off_t)fdp->i_offset,
&dirbuf, &bp, false);
if (error)
goto bad;
entryoffsetinblock = 0;
}
KASSERT(bp != NULL);
ep = (struct direct *)(dirbuf + entryoffsetinblock);
reclen = ufs_rw16(ep->d_reclen, needswap);
#if (BYTE_ORDER == LITTLE_ENDIAN)
if (FSFMT(fdvp) && needswap == 0)
namlen = ep->d_type;
else
namlen = ep->d_namlen;
#else
if (FSFMT(fdvp) && needswap != 0)
namlen = ep->d_type;
else
namlen = ep->d_namlen;
#endif
if ((ep->d_ino != 0) &&
(ufs_rw32(ep->d_ino, needswap) != WINO) &&
(namlen == fcnp->cn_namelen) &&
memcmp(ep->d_name, fcnp->cn_nameptr, namlen) == 0) {
fdp->i_reclen = reclen;
break;
}
fdp->i_offset += reclen;
fdp->i_count = reclen;
entryoffsetinblock += reclen;
}
KASSERT(fdp->i_offset <= endsearch);
/*
* If fdp->i_offset points to start of a directory block,
* set fdp->i_count so ufs_dirremove() doesn't compact over
* a directory block boundary.
*/
if ((fdp->i_offset & (dirblksiz - 1)) == 0)
fdp->i_count = 0;
brelse(bp, 0);
}
/*
* 3) Unlink the source.
*/
/*
* Ensure that the directory entry still exists and has not
* changed while the new name has been entered. If the source is
* a file then the entry may have been unlinked or renamed. In
* either case there is no further work to be done. If the source
* is a directory then it cannot have been rmdir'ed; The IRENAME
* flag ensures that it cannot be moved by another rename or removed
* by a rmdir.
*/
if (fxp != ip) {
if (doingdirectory)
panic("rename: lost dir entry");
} else {
/*
* If the source is a directory with a
* new parent, the link count of the old
* parent directory must be decremented
* and ".." set to point to the new parent.
*/
if (doingdirectory && newparent) {
KASSERT(fdp != NULL);
fxp->i_offset = mastertemplate.dot_reclen;
ufs_dirrewrite(fxp, fdp, newparent, DT_DIR, 0, IN_CHANGE);
cache_purge(fdvp);
}
error = ufs_dirremove(fdvp, fxp, fcnp->cn_flags, 0);
fxp->i_flag &= ~IN_RENAME;
}
VN_KNOTE(fvp, NOTE_RENAME);
goto done;
out:
vrele(fvp);
vrele(fdvp);
goto out2;
/* exit routines from steps 1 & 2 */
bad:
if (doingdirectory)
ip->i_flag &= ~IN_RENAME;
ip->i_ffs_effnlink--;
ip->i_nlink--;
DIP_ASSIGN(ip, nlink, ip->i_nlink);
ip->i_flag |= IN_CHANGE;
ip->i_flag &= ~IN_RENAME;
UFS_WAPBL_UPDATE(fvp, NULL, NULL, 0);
if (DOINGSOFTDEP(fvp))
softdep_change_linkcnt(ip);
done:
UFS_WAPBL_END(fdvp->v_mount);
vput(fdvp);
vput(fvp);
out2:
/*
* clear IN_RENAME - some exit paths happen too early to go
* through the cleanup done in the "bad" case above, so we
* always do this mini-cleanup here.
*/
ip->i_flag &= ~IN_RENAME;
if (txp)
vput(ITOV(txp));
if (tdp) {
if (newparent)
vput(ITOV(tdp));
else
vrele(ITOV(tdp));
}
fstrans_done(mp);
return (error);
}
#ifdef WAPBL_DEBUG_INODES
void
ufs_wapbl_verify_inodes(struct mount *mp, const char *str)
{
struct vnode *vp, *nvp;
struct inode *ip;
simple_lock(&mntvnode_slock);
loop:
TAILQ_FOREACH_REVERSE(vp, &mp->mnt_vnodelist, vnodelst, v_mntvnodes) {
/*
* If the vnode that we are about to sync is no longer
* associated with this mount point, start over.
*/
if (vp->v_mount != mp)
goto loop;
simple_lock(&vp->v_interlock);
nvp = TAILQ_NEXT(vp, v_mntvnodes);
ip = VTOI(vp);
if (vp->v_type == VNON) {
simple_unlock(&vp->v_interlock);
continue;
}
/* verify that update has been called on all inodes */
if (ip->i_flag & (IN_CHANGE | IN_UPDATE)) {
panic("wapbl_verify: mp %p: dirty vnode %p (inode %p): 0x%x\n",
mp, vp, ip, ip->i_flag);
}
KDASSERT(ip->i_nlink == ip->i_ffs_effnlink);
simple_unlock(&mntvnode_slock);
{
int s;
struct buf *bp;
struct buf *nbp;
s = splbio();
for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
nbp = LIST_NEXT(bp, b_vnbufs);
simple_lock(&bp->b_interlock);
if ((bp->b_flags & B_BUSY)) {
simple_unlock(&bp->b_interlock);
continue;
}
if ((bp->b_flags & B_DELWRI) == 0)
panic("wapbl_verify: not dirty, bp %p", bp);
if ((bp->b_flags & B_LOCKED) == 0)
panic("wapbl_verify: not locked, bp %p", bp);
simple_unlock(&bp->b_interlock);
}
splx(s);
}
simple_unlock(&vp->v_interlock);
simple_lock(&mntvnode_slock);
}
simple_unlock(&mntvnode_slock);
vp = VFSTOUFS(mp)->um_devvp;
simple_lock(&vp->v_interlock);
{
int s;
struct buf *bp;
struct buf *nbp;
s = splbio();
for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
nbp = LIST_NEXT(bp, b_vnbufs);
simple_lock(&bp->b_interlock);
if ((bp->b_flags & B_BUSY)) {
simple_unlock(&bp->b_interlock);
continue;
}
if ((bp->b_flags & B_DELWRI) == 0)
panic("wapbl_verify: devvp not dirty, bp %p", bp);
if ((bp->b_flags & B_LOCKED) == 0)
panic("wapbl_verify: devvp not locked, bp %p", bp);
simple_unlock(&bp->b_interlock);
}
splx(s);
}
simple_unlock(&vp->v_interlock);
}
#endif /* WAPBL_DEBUG_INODES */

176
sys/ufs/ufs/ufs_wapbl.h Normal file
View File

@ -0,0 +1,176 @@
/* $NetBSD: ufs_wapbl.h,v 1.2 2008/07/31 05:38:07 simonb Exp $ */
/*-
* Copyright (c) 2003,2006,2008 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
* by Wasabi Systems, Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef _UFS_UFS_UFS_WAPBL_H_
#define _UFS_UFS_UFS_WAPBL_H_
#if defined(_KERNEL_OPT)
#include "opt_wapbl.h"
#endif
/*
* Information for the journal location stored in the superblock.
* We store the journal version, some flags, the journal location
* type, and some location specific "locators" that identify where
* the log itself is located.
*/
/* fs->fs_journal_version */
#define UFS_WAPBL_VERSION 1
/* fs->fs_journal_location */
#define UFS_WAPBL_JOURNALLOC_NONE 0
#define UFS_WAPBL_JOURNALLOC_END_PARTITION 1
#define UFS_WAPBL_EPART_ADDR 0 /* locator slots */
#define UFS_WAPBL_EPART_COUNT 1
#define UFS_WAPBL_EPART_BLKSZ 2
#define UFS_WAPBL_EPART_UNUSED 3
#define UFS_WAPBL_JOURNALLOC_IN_FILESYSTEM 2
#define UFS_WAPBL_INFS_ADDR 0 /* locator slots */
#define UFS_WAPBL_INFS_COUNT 1
#define UFS_WAPBL_INFS_BLKSZ 2
#define UFS_WAPBL_INFS_INO 3
/* fs->fs_journal_flags */
#define UFS_WAPBL_FLAGS_CREATE_LOG 0x1
#define UFS_WAPBL_FLAGS_CLEAR_LOG 0x2
/*
* The journal size is limited to between 1MB and 64MB.
* The default journal size is the filesystem size divided by
* the scale factor - this is 1M of journal per 1GB of filesystem
* space.
*
* XXX: Is 64MB too limiting? If user explicitly asks for more, allow it?
*/
#define UFS_WAPBL_JOURNAL_SCALE 1024
#define UFS_WAPBL_MIN_JOURNAL_SIZE (1024 * 1024)
#define UFS_WAPBL_MAX_JOURNAL_SIZE (64 * 1024 * 1024)
#if defined(WAPBL)
#if defined(WAPBL_DEBUG)
#define WAPBL_DEBUG_INODES
#endif
int wapbl_ufs_rename(void *v);
#ifdef WAPBL_DEBUG_INODES
void ufs_wapbl_verify_inodes(struct mount *, const char *);
#endif
static __inline int
ufs_wapbl_begin2(struct mount *mp, struct vnode *vp1, struct vnode *vp2,
const char *file, int line)
{
if (mp->mnt_wapbl) {
int error;
if (vp1)
vref(vp1);
if (vp2)
vref(vp2);
error = wapbl_begin(mp->mnt_wapbl, file, line);
if (error)
return error;
#ifdef WAPBL_DEBUG_INODES
if (mp->mnt_wapbl->wl_lock.lk_exclusivecount == 1)
ufs_wapbl_verify_inodes(mp, "wapbl_begin");
#endif
}
return 0;
}
static __inline void
ufs_wapbl_end2(struct mount *mp, struct vnode *vp1, struct vnode *vp2)
{
if (mp->mnt_wapbl) {
#ifdef WAPBL_DEBUG_INODES
if (mp->mnt_wapbl->wl_lock.lk_exclusivecount == 1)
ufs_wapbl_verify_inodes(mp, "wapbl_end");
#endif
wapbl_end(mp->mnt_wapbl);
if (vp2)
vrele(vp2);
if (vp1)
vrele(vp1);
}
}
#define UFS_WAPBL_BEGIN(mp) \
ufs_wapbl_begin2(mp, 0, 0, __FUNCTION__, __LINE__)
#define UFS_WAPBL_BEGIN1(mp, v1) \
ufs_wapbl_begin2(mp, v1, 0, __FUNCTION__, __LINE__)
#define UFS_WAPBL_END(mp) ufs_wapbl_end2(mp, 0, 0)
#define UFS_WAPBL_END1(mp, v1) ufs_wapbl_end2(mp, v1, 0)
#define UFS_WAPBL_UPDATE(vp, access, modify, flags) \
if ((vp)->v_mount->mnt_wapbl) { \
UFS_UPDATE(vp, access, modify, flags); \
}
#ifdef UFS_WAPBL_DEBUG_JLOCK
#define UFS_WAPBL_JLOCK_ASSERT(mp) \
if (mp->mnt_wapbl) wapbl_jlock_assert(mp->mnt_wapbl)
#define UFS_WAPBL_JUNLOCK_ASSERT(mp) \
if (mp->mnt_wapbl) wapbl_junlock_assert(mp->mnt_wapbl)
#else
#define UFS_WAPBL_JLOCK_ASSERT(mp)
#define UFS_WAPBL_JUNLOCK_ASSERT(mp)
#endif
#define UFS_WAPBL_REGISTER_INODE(mp, ino, mode) \
if (mp->mnt_wapbl) wapbl_register_inode(mp->mnt_wapbl, ino, mode)
#define UFS_WAPBL_UNREGISTER_INODE(mp, ino, mode) \
if (mp->mnt_wapbl) wapbl_unregister_inode(mp->mnt_wapbl, ino, mode)
#define UFS_WAPBL_REGISTER_DEALLOCATION(mp, blk, len) \
if (mp->mnt_wapbl) wapbl_register_deallocation(mp->mnt_wapbl, blk, len)
#else /* ! WAPBL */
#define UFS_WAPBL_BEGIN(mp) 0
#define UFS_WAPBL_BEGIN1(mp, v1) 0
#define UFS_WAPBL_END(mp) do { } while (0)
#define UFS_WAPBL_END1(mp, v1)
#define UFS_WAPBL_UPDATE(vp, access, modify, flags) do { } while (0)
#define UFS_WAPBL_JLOCK_ASSERT(mp)
#define UFS_WAPBL_JUNLOCK_ASSERT(mp)
#define UFS_WAPBL_REGISTER_INODE(mp, ino, mode)
#define UFS_WAPBL_UNREGISTER_INODE(mp, ino, mode)
#define UFS_WAPBL_REGISTER_DEALLOCATION(mp, blk, len)
#endif
#endif /* !_UFS_UFS_UFS_WAPBL_H_ */

View File

@ -1,4 +1,4 @@
/* $NetBSD: dumpfs.c,v 1.49 2008/07/21 13:36:58 lukem Exp $ */ /* $NetBSD: dumpfs.c,v 1.50 2008/07/31 05:38:07 simonb Exp $ */
/* /*
* Copyright (c) 1983, 1992, 1993 * Copyright (c) 1983, 1992, 1993
@ -39,7 +39,7 @@ __COPYRIGHT("@(#) Copyright (c) 1983, 1992, 1993\
#if 0 #if 0
static char sccsid[] = "@(#)dumpfs.c 8.5 (Berkeley) 4/29/95"; static char sccsid[] = "@(#)dumpfs.c 8.5 (Berkeley) 4/29/95";
#else #else
__RCSID("$NetBSD: dumpfs.c,v 1.49 2008/07/21 13:36:58 lukem Exp $"); __RCSID("$NetBSD: dumpfs.c,v 1.50 2008/07/31 05:38:07 simonb Exp $");
#endif #endif
#endif /* not lint */ #endif /* not lint */
@ -379,6 +379,13 @@ print_superblock(struct fs *fs, uint16_t *opostbl,
fs->fs_old_csshift, fs->fs_old_csmask); fs->fs_old_csshift, fs->fs_old_csmask);
printf("\ncgrotor\t%d\tfmod\t%d\tronly\t%d\tclean\t0x%02x\n", printf("\ncgrotor\t%d\tfmod\t%d\tronly\t%d\tclean\t0x%02x\n",
fs->fs_cgrotor, fs->fs_fmod, fs->fs_ronly, fs->fs_clean); fs->fs_cgrotor, fs->fs_fmod, fs->fs_ronly, fs->fs_clean);
printf("wapbl version 0x%x\tlocation %u\tflags 0x%x\n",
fs->fs_journal_version, fs->fs_journal_location,
fs->fs_journal_flags);
printf("wapbl loc0 %" PRIu64 "\tloc1 %" PRIu64,
fs->fs_journallocs[0], fs->fs_journallocs[1]);
printf("\tloc1 %" PRIu64 "\tloc2 %" PRIu64 "\n",
fs->fs_journallocs[2], fs->fs_journallocs[3]);
printf("flags\t"); printf("flags\t");
if (fs->fs_flags == 0) if (fs->fs_flags == 0)
printf("none"); printf("none");
@ -396,8 +403,11 @@ print_superblock(struct fs *fs, uint16_t *opostbl,
printf("multilabel "); printf("multilabel ");
if (fs->fs_flags & FS_FLAGS_UPDATED) if (fs->fs_flags & FS_FLAGS_UPDATED)
printf("fs_flags expanded "); printf("fs_flags expanded ");
fsflags = fs->fs_flags & ~(FS_UNCLEAN | FS_DOSOFTDEP | FS_NEEDSFSCK | FS_INDEXDIRS | if (fs->fs_flags & FS_DOWAPBL)
FS_ACLS | FS_MULTILABEL | FS_FLAGS_UPDATED); printf("wapbl ");
fsflags = fs->fs_flags & ~(FS_UNCLEAN | FS_DOSOFTDEP | FS_NEEDSFSCK |
FS_INDEXDIRS | FS_ACLS | FS_MULTILABEL |
FS_FLAGS_UPDATED | FS_DOWAPBL);
if (fsflags != 0) if (fsflags != 0)
printf("unknown flags (%#x)", fsflags); printf("unknown flags (%#x)", fsflags);
printf("\nfsmnt\t%s\n", fs->fs_fsmnt); printf("\nfsmnt\t%s\n", fs->fs_fsmnt);