Merge the simonb-wapbl branch. From the original branch commit:

Add Wasabi System's WAPBL (Write Ahead Physical Block Logging)
   journaling code.  Originally written by Darrin B. Jewell while
   at Wasabi and updated to -current by Antti Kantee, Andy Doran,
   Greg Oster and Simon Burge.

OK'd by core@, releng@.
This commit is contained in:
simonb 2008-07-31 05:38:04 +00:00
parent d1dfd4fec7
commit 36d65f1138
53 changed files with 6837 additions and 296 deletions

View File

@ -1,4 +1,4 @@
# $NetBSD: mi,v 1.1170 2008/07/29 13:17:40 pooka Exp $
# $NetBSD: mi,v 1.1171 2008/07/31 05:38:04 simonb Exp $
#
# Note: don't delete entries from here - mark them as "obsolete" instead.
#
@ -2069,6 +2069,7 @@
./usr/include/sys/vnode_if.h comp-c-include
./usr/include/sys/vsio.h comp-obsolete obsolete
./usr/include/sys/wait.h comp-c-include
./usr/include/sys/wapbl.h comp-c-include
./usr/include/sys/wdog.h comp-c-include
./usr/include/sysexits.h comp-c-include
./usr/include/syslog.h comp-c-include
@ -2100,6 +2101,7 @@
./usr/include/ufs/ufs/quota.h comp-c-include
./usr/include/ufs/ufs/ufs_bswap.h comp-c-include
./usr/include/ufs/ufs/ufs_extern.h comp-c-include
./usr/include/ufs/ufs/ufs_wapbl.h comp-c-include
./usr/include/ufs/ufs/ufsmount.h comp-c-include
./usr/include/ulimit.h comp-c-include
./usr/include/unctrl.h comp-c-include

View File

@ -1,4 +1,4 @@
/* $NetBSD: mntopts.h,v 1.10 2006/10/31 08:12:46 mjf Exp $ */
/* $NetBSD: mntopts.h,v 1.11 2008/07/31 05:38:04 simonb Exp $ */
/*-
* Copyright (c) 1994
@ -55,6 +55,7 @@ struct mntopt {
#define MOPT_NOATIME { "atime", 1, MNT_NOATIME, 0 }
#define MOPT_SYMPERM { "symperm", 0, MNT_SYMPERM, 0 }
#define MOPT_SOFTDEP { "softdep", 0, MNT_SOFTDEP, 0 }
#define MOPT_LOG { "log", 0, MNT_LOG, 0 }
#define MOPT_IGNORE { "hidden", 0, MNT_IGNORE, 0 }
/* Control flags. */

View File

@ -1,4 +1,4 @@
# $NetBSD: Makefile,v 1.35 2008/05/04 15:37:19 tsutsui Exp $
# $NetBSD: Makefile,v 1.36 2008/07/31 05:38:04 simonb Exp $
# @(#)Makefile 8.2 (Berkeley) 4/27/95
.include <bsd.own.mk>
@ -19,6 +19,10 @@ SRCS+= progress.c
.PATH: ${NETBSDSRCDIR}/sys/ufs/ffs ${FSCK}
SRCS+= vfs_wapbl.c wapbl.c
.PATH: ${NETBSDSRCDIR}/sys/kern
CPPFLAGS+=-DWAPBL_DEBUG_PRINT=0
LDADD+=-lutil
DPADD+=${LIBUTIL}

View File

@ -1,4 +1,4 @@
/* $NetBSD: extern.h,v 1.22 2005/06/27 01:25:35 christos Exp $ */
/* $NetBSD: extern.h,v 1.23 2008/07/31 05:38:04 simonb Exp $ */
/*
* Copyright (c) 1994 James A. Jegers
@ -27,7 +27,7 @@
void adjust(struct inodesc *, int);
daddr_t allocblk(long);
ino_t allocdir(ino_t, ino_t, int);
ino_t allocino(ino_t request, int type);
ino_t allocino(ino_t, int);
void blkerror(ino_t, const char *, daddr_t);
int bread(int, char *, daddr_t, long);
void bufinit(void);
@ -82,7 +82,12 @@ void setinodebuf(ino_t);
int setup(const char *);
void voidquit(int);
void swap_cg(struct cg *, struct cg *);
void copyback_cg(struct bufarea *);
void sb_oldfscompat_write(struct fs *, struct fs *);
void sb_oldfscompat_read(struct fs *, struct fs **);
void replay_wapbl(void);
void cleanup_wapbl(void);
int read_wapbl(char *, long, daddr_t);
int is_journal_inode(ino_t);
void swap_cg(struct cg *, struct cg *);
void copyback_cg(struct bufarea *);
void sb_oldfscompat_write(struct fs *, struct fs *);
void sb_oldfscompat_read(struct fs *, struct fs **);

View File

@ -1,4 +1,4 @@
.\" $NetBSD: fsck_ffs.8,v 1.40 2005/01/19 16:41:04 wiz Exp $
.\" $NetBSD: fsck_ffs.8,v 1.41 2008/07/31 05:38:04 simonb Exp $
.\"
.\" Copyright (c) 1980, 1989, 1991, 1993
.\" The Regents of the University of California. All rights reserved.
@ -198,7 +198,7 @@ possible without user interaction.
Conversion in preen mode is best used when all the file systems
are being converted at once.
The format of a file system can be determined from the
second line of output from
third line of output from
.Xr dumpfs 8 .
.It Fl d
Print debugging output.

View File

@ -1,4 +1,4 @@
/* $NetBSD: pass4.c,v 1.24 2008/02/23 21:41:48 christos Exp $ */
/* $NetBSD: pass4.c,v 1.25 2008/07/31 05:38:04 simonb Exp $ */
/*
* Copyright (c) 1980, 1986, 1993
@ -34,7 +34,7 @@
#if 0
static char sccsid[] = "@(#)pass4.c 8.4 (Berkeley) 4/28/95";
#else
__RCSID("$NetBSD: pass4.c,v 1.24 2008/02/23 21:41:48 christos Exp $");
__RCSID("$NetBSD: pass4.c,v 1.25 2008/07/31 05:38:04 simonb Exp $");
#endif
#endif /* not lint */
@ -89,7 +89,14 @@ pass4(void)
case DFOUND:
n = info->ino_linkcnt;
if (n) {
adjust(&idesc, (short)n);
if (is_journal_inode(inumber)) {
if (debug)
printf(
"skipping unreferenced journal inode %" PRId64 "\n", inumber);
break;
} else {
adjust(&idesc, (short)n);
}
break;
}
for (zlnp = zlnhead; zlnp; zlnp = zlnp->next)

View File

@ -1,4 +1,4 @@
/* $NetBSD: setup.c,v 1.82 2008/02/23 21:41:48 christos Exp $ */
/* $NetBSD: setup.c,v 1.83 2008/07/31 05:38:04 simonb Exp $ */
/*
* Copyright (c) 1980, 1986, 1993
@ -34,7 +34,7 @@
#if 0
static char sccsid[] = "@(#)setup.c 8.10 (Berkeley) 5/9/95";
#else
__RCSID("$NetBSD: setup.c,v 1.82 2008/02/23 21:41:48 christos Exp $");
__RCSID("$NetBSD: setup.c,v 1.83 2008/07/31 05:38:04 simonb Exp $");
#endif
#endif /* not lint */
@ -159,6 +159,25 @@ setup(const char *dev)
doskipclean = 0;
pwarn("USING ALTERNATE SUPERBLOCK AT %d\n", bflag);
}
if (sblock->fs_flags & FS_DOWAPBL) {
if (preen) {
if (!quiet)
pwarn("file system is journaled; not checking\n");
return (-1);
}
if (!quiet)
pwarn("** File system is journaled; replaying journal\n");
replay_wapbl();
doskipclean = 0;
sblock->fs_flags &= ~FS_DOWAPBL;
sbdirty();
/* Although we may have updated the superblock from the
* journal, we are still going to do a full check, so we
* don't bother to re-read the superblock from the journal.
* XXX, instead we could re-read the superblock and then not
* force doskipclean = 0
*/
}
if (debug)
printf("clean = %d\n", sblock->fs_clean);
if (doswap)
@ -218,6 +237,13 @@ setup(const char *dev)
/*
* Check and potentially fix certain fields in the super block.
*/
if (sblock->fs_flags & ~(FS_KNOWN_FLAGS)) {
pfatal("UNKNOWN FLAGS=0x%08x IN SUPERBLOCK", sblock->fs_flags);
if (reply("CLEAR") == 1) {
sblock->fs_flags &= FS_KNOWN_FLAGS;
sbdirty();
}
}
if (sblock->fs_optim != FS_OPTTIME && sblock->fs_optim != FS_OPTSPACE) {
pfatal("UNDEFINED OPTIMIZATION IN SUPERBLOCK");
if (reply("SET TO DEFAULT") == 1) {

View File

@ -1,4 +1,4 @@
/* $NetBSD: utilities.c,v 1.55 2008/02/23 21:41:48 christos Exp $ */
/* $NetBSD: utilities.c,v 1.56 2008/07/31 05:38:04 simonb Exp $ */
/*
* Copyright (c) 1980, 1986, 1993
@ -34,7 +34,7 @@
#if 0
static char sccsid[] = "@(#)utilities.c 8.6 (Berkeley) 5/19/95";
#else
__RCSID("$NetBSD: utilities.c,v 1.55 2008/02/23 21:41:48 christos Exp $");
__RCSID("$NetBSD: utilities.c,v 1.56 2008/07/31 05:38:04 simonb Exp $");
#endif
#endif /* not lint */
@ -322,6 +322,7 @@ ckfini(void)
if (debug)
printf("cache missed %ld of %ld (%d%%)\n", diskreads,
totalreads, (int)(diskreads * 100 / totalreads));
cleanup_wapbl();
(void)close(fsreadfd);
(void)close(fswritefd);
}
@ -335,7 +336,8 @@ bread(int fd, char *buf, daddr_t blk, long size)
offset = blk;
offset *= dev_bsize;
if (pread(fd, buf, (int)size, offset) == size)
if ((pread(fd, buf, (int)size, offset) == size) &&
read_wapbl(buf, size, blk) == 0)
return (0);
rwerror("READ", blk);
errs = 0;

202
sbin/fsck_ffs/wapbl.c Normal file
View File

@ -0,0 +1,202 @@
/* $NetBSD: wapbl.c,v 1.2 2008/07/31 05:38:04 simonb Exp $ */
/*-
* Copyright (c) 2005,2008 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
* by Wasabi Systems, Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/* This file contains fsck support for wapbl
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: wapbl.c,v 1.2 2008/07/31 05:38:04 simonb Exp $");
#include <sys/stat.h>
#include <sys/time.h>
#include <sys/uio.h>
#include <assert.h>
#include <errno.h>
#include <inttypes.h>
#include <stdio.h>
#include <stdbool.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <ufs/ufs/dinode.h>
#include <ufs/ufs/dir.h>
#include <ufs/ufs/ufs_bswap.h>
#include <ufs/ufs/ufs_wapbl.h>
#include <ufs/ffs/fs.h>
#include <ufs/ffs/ffs_extern.h>
#include <sys/wapbl.h>
#include "fsck.h"
#include "fsutil.h"
#include "extern.h"
#include "exitvalues.h"
int
wapbl_write(void *data, size_t len, struct vnode *devvp, daddr_t pbn)
{
WAPBL_PRINTF(WAPBL_PRINT_IO,
("wapbl_write: %zd bytes at block %"PRId64" on fd 0x%x\n",
len, pbn, fswritefd));
bwrite(fswritefd, data, pbn, len);
return 0;
}
int
wapbl_read(void *data, size_t len, struct vnode *devvp, daddr_t pbn)
{
WAPBL_PRINTF(WAPBL_PRINT_IO,
("wapbl_read: %zd bytes at block %"PRId64" on fd 0x%x\n",
len, pbn, fsreadfd));
bread(fsreadfd, data, pbn, len);
return 0;
}
struct wapbl_replay *wapbl_replay;
void
replay_wapbl(void)
{
uint64_t addr, count, blksize;
int error;
if (debug)
wapbl_debug_print = WAPBL_PRINT_ERROR | WAPBL_PRINT_REPLAY;
if (debug > 1)
wapbl_debug_print |= WAPBL_PRINT_IO;
if (sblock->fs_journal_version != UFS_WAPBL_VERSION) {
pfatal("INVALID JOURNAL VERSION %d",
sblock->fs_journal_version);
if (reply("CONTINUE") == 0) {
exit(FSCK_EXIT_CHECK_FAILED);
}
return;
}
switch (sblock->fs_journal_location) {
case UFS_WAPBL_JOURNALLOC_NONE:
pfatal("INVALID JOURNAL LOCATION 'NONE'");
if (reply("CONTINUE") == 0) {
exit(FSCK_EXIT_CHECK_FAILED);
}
return;
case UFS_WAPBL_JOURNALLOC_END_PARTITION:
addr = sblock->fs_journallocs[UFS_WAPBL_EPART_ADDR];
count = sblock->fs_journallocs[UFS_WAPBL_EPART_COUNT];
blksize = sblock->fs_journallocs[UFS_WAPBL_EPART_BLKSZ];
break;
case UFS_WAPBL_JOURNALLOC_IN_FILESYSTEM:
addr = sblock->fs_journallocs[UFS_WAPBL_INFS_ADDR];
count = sblock->fs_journallocs[UFS_WAPBL_INFS_COUNT];
blksize = sblock->fs_journallocs[UFS_WAPBL_INFS_BLKSZ];
break;
default:
pfatal("INVALID JOURNAL LOCATION %d",
sblock->fs_journal_location);
if (reply("CONTINUE") == 0) {
exit(FSCK_EXIT_CHECK_FAILED);
}
return;
}
error = wapbl_replay_start(&wapbl_replay, 0, addr, count, blksize);
if (error) {
pfatal("UNABLE TO READ JOURNAL FOR REPLAY");
if (reply("CONTINUE") == 0) {
exit(FSCK_EXIT_CHECK_FAILED);
}
return;
}
if (!nflag) {
error = wapbl_replay_write(wapbl_replay, 0);
if (error) {
pfatal("UNABLE TO REPLAY JOURNAL BLOCKS");
if (reply("CONTINUE") == 0) {
exit(FSCK_EXIT_CHECK_FAILED);
}
} else {
wapbl_replay_stop(wapbl_replay);
}
}
{
int i;
for (i = 0; i < wapbl_replay->wr_inodescnt; i++) {
WAPBL_PRINTF(WAPBL_PRINT_REPLAY,("wapbl_replay: "
"not cleaning inode %"PRIu32" mode %"PRIo32"\n",
wapbl_replay->wr_inodes[i].wr_inumber,
wapbl_replay->wr_inodes[i].wr_imode));
}
}
}
void
cleanup_wapbl(void)
{
if (wapbl_replay) {
if (wapbl_replay_isopen(wapbl_replay))
wapbl_replay_stop(wapbl_replay);
wapbl_replay_free(wapbl_replay);
wapbl_replay = 0;
}
}
int
read_wapbl(char *buf, long size, daddr_t blk)
{
if (!wapbl_replay || !wapbl_replay_isopen(wapbl_replay))
return 0;
return wapbl_replay_read(wapbl_replay, buf, blk, size);
}
int
is_journal_inode(ino_t ino)
{
union dinode *dp;
dp = ginode(ino);
if ((iswap32(DIP(dp, flags)) & SF_LOG) != 0 &&
sblock->fs_journal_version == UFS_WAPBL_VERSION &&
sblock->fs_journal_location == UFS_WAPBL_JOURNALLOC_IN_FILESYSTEM &&
sblock->fs_journallocs[UFS_WAPBL_INFS_INO] == ino)
return 1;
return 0;
}

View File

@ -1,4 +1,4 @@
# $NetBSD: Makefile,v 1.22 2008/05/04 15:37:19 tsutsui Exp $
# $NetBSD: Makefile,v 1.23 2008/07/31 05:38:04 simonb Exp $
# @(#)Makefile 8.1 (Berkeley) 6/5/93
.include <bsd.own.mk>
@ -16,6 +16,10 @@ FSCK_FFS=${NETBSDSRCDIR}/sbin/fsck_ffs
CPPFLAGS+= -I${FSCK} -I${FSCK_FFS}
.PATH: ${FSCK} ${FSCK_FFS} ${NETBSDSRCDIR}/sys/ufs/ffs
SRCS+= vfs_wapbl.c wapbl.c
.PATH: ${NETBSDSRCDIR}/sys/kern
CPPFLAGS+=-DWAPBL_DEBUG_PRINT=0
LDADD+= -lutil -ledit -ltermcap
.ifndef HOSTPROG
DPADD+= ${LIBUTIL} ${LIBEDIT} ${LIBTERMCAP}

View File

@ -1,4 +1,4 @@
/* $NetBSD: mount_ffs.c,v 1.23 2008/07/20 01:20:22 lukem Exp $ */
/* $NetBSD: mount_ffs.c,v 1.24 2008/07/31 05:38:04 simonb Exp $ */
/*-
* Copyright (c) 1993, 1994
@ -39,7 +39,7 @@ __COPYRIGHT("@(#) Copyright (c) 1993, 1994\
#if 0
static char sccsid[] = "@(#)mount_ufs.c 8.4 (Berkeley) 4/26/95";
#else
__RCSID("$NetBSD: mount_ffs.c,v 1.23 2008/07/20 01:20:22 lukem Exp $");
__RCSID("$NetBSD: mount_ffs.c,v 1.24 2008/07/31 05:38:04 simonb Exp $");
#endif
#endif /* not lint */
@ -70,6 +70,7 @@ static const struct mntopt mopts[] = {
MOPT_NODEVMTIME,
MOPT_FORCE,
MOPT_SOFTDEP,
MOPT_LOG,
MOPT_GETARGS,
MOPT_NULL,
};

View File

@ -1,4 +1,4 @@
.\" $NetBSD: tunefs.8,v 1.36 2004/12/20 10:28:47 hubertf Exp $
.\" $NetBSD: tunefs.8,v 1.37 2008/07/31 05:38:04 simonb Exp $
.\"
.\" Copyright (c) 1983, 1991, 1993
.\" The Regents of the University of California. All rights reserved.
@ -41,6 +41,7 @@
.Op Fl e Ar maxbpg
.Op Fl g Ar avgfilesize
.Op Fl h Ar avgfpdir
.Op Fl l Ar logsize
.Op Fl m Ar minfree
.Bk -words
.\" .Op Fl n Ar soft_dependency_enabling
@ -97,6 +98,13 @@ this parameter should be set higher.
This specifies the expected average file size.
.It Fl h Ar avgfpdir
This specifies the expected number of files per directory.
.It Fl l Ar logsize
This value specifies the size of the in-filesystem journaling log file.
The default journaling log file size is described in
.Xr wapbl 4 .
Specifying a size of zero will cause the in-filesystem journaling log file
to be removed the next time the filesystem is mounted.
The size of an existing in-filesystem journaling log file can not be changed.
.It Fl m Ar minfree
This value specifies the percentage of space held back
from normal users; the minimum free space threshold.
@ -145,6 +153,7 @@ or
.Li time .
.El
.Sh SEE ALSO
.Xr wapbl 4 ,
.Xr fs 5 ,
.Xr dumpfs 8 ,
.Xr fsck_ffs 8 ,

View File

@ -1,4 +1,4 @@
/* $NetBSD: tunefs.c,v 1.34 2008/07/20 01:20:23 lukem Exp $ */
/* $NetBSD: tunefs.c,v 1.35 2008/07/31 05:38:04 simonb Exp $ */
/*
* Copyright (c) 1983, 1993
@ -39,7 +39,7 @@ __COPYRIGHT("@(#) Copyright (c) 1983, 1993\
#if 0
static char sccsid[] = "@(#)tunefs.c 8.3 (Berkeley) 5/3/95";
#else
__RCSID("$NetBSD: tunefs.c,v 1.34 2008/07/20 01:20:23 lukem Exp $");
__RCSID("$NetBSD: tunefs.c,v 1.35 2008/07/31 05:38:04 simonb Exp $");
#endif
#endif /* not lint */
@ -48,9 +48,9 @@ __RCSID("$NetBSD: tunefs.c,v 1.34 2008/07/20 01:20:23 lukem Exp $");
*/
#include <sys/param.h>
#include <ufs/ufs/dinode.h>
#include <ufs/ffs/fs.h>
#include <ufs/ffs/ffs_extern.h>
#include <ufs/ufs/ufs_wapbl.h>
#include <machine/bswap.h>
@ -85,15 +85,16 @@ static off_t sblock_try[] = SBLOCKSEARCH;
static void bwrite(daddr_t, char *, int, const char *);
static void bread(daddr_t, char *, int, const char *);
static int getnum(const char *, const char *, int, int);
static void change_log_info(long long);
static void getsb(struct fs *, const char *);
static int openpartition(const char *, int, char *, size_t);
static void show_log_info(void);
static void usage(void);
int
main(int argc, char *argv[])
{
#define OPTSTRINGBASE "AFNe:g:h:m:o:"
#define OPTSTRINGBASE "AFNe:g:h:l:m:o:"
#ifdef TUNEFS_SOFTDEP
int softdep;
#define OPTSTRING OPTSTRINGBASE ## "n:"
@ -105,10 +106,12 @@ main(int argc, char *argv[])
char device[MAXPATHLEN];
int maxbpg, minfree, optim;
int avgfilesize, avgfpdir;
long long logfilesize;
Aflag = Fflag = Nflag = 0;
maxbpg = minfree = optim = -1;
avgfilesize = avgfpdir = -1;
logfilesize = -1;
#ifdef TUNEFS_SOFTDEP
softdep = -1;
#endif
@ -131,25 +134,30 @@ main(int argc, char *argv[])
break;
case 'e':
maxbpg = getnum(optarg,
maxbpg = strsuftoll(
"maximum blocks per file in a cylinder group",
1, INT_MAX);
optarg, 1, INT_MAX);
break;
case 'g':
avgfilesize = getnum(optarg,
"average file size", 1, INT_MAX);
break;
case 'h':
avgfpdir = getnum(optarg,
"expected number of files per directory",
avgfilesize = strsuftoll("average file size", optarg,
1, INT_MAX);
break;
case 'h':
avgfpdir = strsuftoll(
"expected number of files per directory",
optarg, 1, INT_MAX);
break;
case 'l':
logfilesize = strsuftoll("journal log file size",
optarg, 0, INT_MAX);
break;
case 'm':
minfree = getnum(optarg,
"minimum percentage of free space", 0, 99);
minfree = strsuftoll("minimum percentage of free space",
optarg, 0, 99);
break;
#ifdef TUNEFS_SOFTDEP
@ -254,6 +262,9 @@ main(int argc, char *argv[])
CHANGEVAL(sblock.fs_avgfpdir, avgfpdir,
"expected number of files per directory", "");
if (logfilesize >= 0)
change_log_info(logfilesize);
if (Nflag) {
fprintf(stdout, "tunefs: current settings of %s\n", special);
fprintf(stdout, "\tmaximum contiguous block count %d\n",
@ -274,6 +285,7 @@ main(int argc, char *argv[])
fprintf(stdout,
"\texpected number of files per directory: %d\n",
sblock.fs_avgfpdir);
show_log_info();
fprintf(stdout, "tunefs: no changes made\n");
exit(0);
}
@ -290,20 +302,123 @@ main(int argc, char *argv[])
exit(0);
}
static int
getnum(const char *num, const char *desc, int min, int max)
static void
show_log_info(void)
{
long n;
char *ep;
const char *loc;
uint64_t size, blksize;
int print;
n = strtol(num, &ep, 10);
if (ep[0] != '\0')
errx(1, "Invalid number `%s' for %s", num, desc);
if ((int) n < min)
errx(1, "%s `%s' too small (minimum is %d)", desc, num, min);
if ((int) n > max)
errx(1, "%s `%s' too large (maximum is %d)", desc, num, max);
return ((int)n);
switch (sblock.fs_journal_location) {
case UFS_WAPBL_JOURNALLOC_NONE:
print = blksize = 0;
/* nothing */
break;
case UFS_WAPBL_JOURNALLOC_END_PARTITION:
loc = "end of partition";
size = sblock.fs_journallocs[UFS_WAPBL_EPART_COUNT];
blksize = sblock.fs_journallocs[UFS_WAPBL_EPART_BLKSZ];
print = 1;
break;
case UFS_WAPBL_JOURNALLOC_IN_FILESYSTEM:
loc = "in filesystem";
size = sblock.fs_journallocs[UFS_WAPBL_INFS_COUNT];
blksize = sblock.fs_journallocs[UFS_WAPBL_INFS_BLKSZ];
print = 1;
break;
default:
loc = "unknown";
size = blksize = 0;
print = 1;
break;
}
if (print) {
fprintf(stdout, "\tjournal log file location: %s\n", loc);
fprintf(stdout, "\tjournal log file size: %" PRIu64 "\n",
size * blksize);
fprintf(stdout, "\tjournal log flags:");
if (sblock.fs_journal_flags & UFS_WAPBL_FLAGS_CREATE_LOG)
fprintf(stdout, " clear-log");
if (sblock.fs_journal_flags & UFS_WAPBL_FLAGS_CLEAR_LOG)
fprintf(stdout, " clear-log");
fprintf(stdout, "\n");
}
}
static void
change_log_info(long long logfilesize)
{
/*
* NOTES:
* - only operate on in-filesystem log sizes
* - can't change size of existing log
* - if current is same, no action
* - if current is zero and new is non-zero, set flag to create log
* on next mount
* - if current is non-zero and new is zero, set flag to clear log
* on next mount
*/
int in_fs_log;
uint64_t old_size;
old_size = 0;
switch (sblock.fs_journal_location) {
case UFS_WAPBL_JOURNALLOC_END_PARTITION:
in_fs_log = 0;
old_size = sblock.fs_journallocs[UFS_WAPBL_EPART_COUNT] *
sblock.fs_journallocs[UFS_WAPBL_EPART_BLKSZ];
break;
case UFS_WAPBL_JOURNALLOC_IN_FILESYSTEM:
in_fs_log = 1;
old_size = sblock.fs_journallocs[UFS_WAPBL_INFS_COUNT] *
sblock.fs_journallocs[UFS_WAPBL_INFS_BLKSZ];
break;
case UFS_WAPBL_JOURNALLOC_NONE:
default:
in_fs_log = 0;
old_size = 0;
break;
}
if (!in_fs_log)
errx(1, "Can't change size of non-in-filesystem log");
if (old_size == logfilesize && logfilesize > 0) {
/* no action */
warnx("log file size remains unchanged at %lld", logfilesize);
return;
}
if (logfilesize == 0) {
/*
* Don't clear out the locators - the kernel might need
* these to find the log! Just set the "clear the log"
* flag and let the kernel do the rest.
*/
sblock.fs_journal_flags |= UFS_WAPBL_FLAGS_CLEAR_LOG;
sblock.fs_journal_flags &= ~UFS_WAPBL_FLAGS_CREATE_LOG;
warnx("log file size cleared from %" PRIu64 "", old_size);
return;
}
if (old_size == 0) {
/* create new log of desired size next mount */
sblock.fs_journal_location = UFS_WAPBL_JOURNALLOC_IN_FILESYSTEM;
sblock.fs_journallocs[UFS_WAPBL_INFS_ADDR] = 0;
sblock.fs_journallocs[UFS_WAPBL_INFS_COUNT] = logfilesize;
sblock.fs_journallocs[UFS_WAPBL_INFS_BLKSZ] = 0;
sblock.fs_journallocs[UFS_WAPBL_INFS_INO] = 0;
sblock.fs_journal_flags |= UFS_WAPBL_FLAGS_CREATE_LOG;
sblock.fs_journal_flags &= ~UFS_WAPBL_FLAGS_CLEAR_LOG;
warnx("log file size set to %lld", logfilesize);
} else {
errx(1,
"Can't change existing log size from %" PRIu64 " to %lld",
old_size, logfilesize);
}
}
static void
@ -315,6 +430,7 @@ usage(void)
fprintf(stderr, "\t-e maximum blocks per file in a cylinder group\n");
fprintf(stderr, "\t-g average file size\n");
fprintf(stderr, "\t-h expected number of files per directory\n");
fprintf(stderr, "\t-l journal log file size (`0' to clear journal)\n");
fprintf(stderr, "\t-m minimum percentage of free space\n");
#ifdef TUNEFS_SOFTDEP
fprintf(stderr, "\t-n soft dependencies (`enable' or `disable')\n");

View File

@ -1,4 +1,4 @@
# $NetBSD: files,v 1.910 2008/07/16 20:06:19 pooka Exp $
# $NetBSD: files,v 1.911 2008/07/31 05:38:04 simonb Exp $
# @(#)files.newconf 7.5 (Berkeley) 5/10/93
@ -110,6 +110,10 @@ defflag opt_fileassoc.h FILEASSOC
defflag opt_gre.h GRE_DEBUG
# Write Ahead Physical Block Logging
defflag opt_wapbl.h WAPBL WAPBL_DEBUG
defparam opt_wapbl.h WAPBL_DEBUG_PRINT
# compatibility options
#
defflag opt_compat_netbsd.h COMPAT_40
@ -1475,6 +1479,7 @@ file kern/vfs_subr.c
file kern/vfs_syscalls.c
file kern/vfs_trans.c
file kern/vfs_vnops.c
file kern/vfs_wapbl.c wapbl
file kern/vfs_xattr.c
file kern/vnode_if.c
file miscfs/deadfs/dead_vnops.c

View File

@ -1,4 +1,4 @@
/* $NetBSD: init_main.c,v 1.360 2008/06/18 09:06:27 yamt Exp $ */
/* $NetBSD: init_main.c,v 1.361 2008/07/31 05:38:05 simonb Exp $ */
/*-
* Copyright (c) 2008 The NetBSD Foundation, Inc.
@ -97,7 +97,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: init_main.c,v 1.360 2008/06/18 09:06:27 yamt Exp $");
__KERNEL_RCSID(0, "$NetBSD: init_main.c,v 1.361 2008/07/31 05:38:05 simonb Exp $");
#include "opt_ipsec.h"
#include "opt_ntp.h"
@ -108,6 +108,7 @@ __KERNEL_RCSID(0, "$NetBSD: init_main.c,v 1.360 2008/06/18 09:06:27 yamt Exp $")
#include "opt_fileassoc.h"
#include "opt_ktrace.h"
#include "opt_pax.h"
#include "opt_wapbl.h"
#include "rnd.h"
#include "sysmon_envsys.h"
@ -192,6 +193,9 @@ __KERNEL_RCSID(0, "$NetBSD: init_main.c,v 1.360 2008/06/18 09:06:27 yamt Exp $")
#include <sys/ktrace.h>
#endif
#include <sys/kauth.h>
#ifdef WAPBL
#include <sys/wapbl.h>
#endif
#include <net80211/ieee80211_netbsd.h>
#include <sys/syscall.h>
@ -570,6 +574,11 @@ main(void)
/* Initialize the UUID system calls. */
uuid_init();
#ifdef WAPBL
/* Initialize write-ahead physical block logging. */
wapbl_init();
#endif
/*
* Create process 1 (init(8)). We do this now, as Unix has
* historically had init be process 1, and changing this would

View File

@ -1,4 +1,4 @@
/* $NetBSD: vfs_bio.c,v 1.207 2008/07/14 16:22:42 hannken Exp $ */
/* $NetBSD: vfs_bio.c,v 1.208 2008/07/31 05:38:05 simonb Exp $ */
/*-
* Copyright (c) 2007, 2008 The NetBSD Foundation, Inc.
@ -6,6 +6,8 @@
*
* This code is derived from software contributed to The NetBSD Foundation
* by Andrew Doran.
* This code is derived from software contributed to The NetBSD Foundation
* by Wasabi Systems, Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@ -107,7 +109,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: vfs_bio.c,v 1.207 2008/07/14 16:22:42 hannken Exp $");
__KERNEL_RCSID(0, "$NetBSD: vfs_bio.c,v 1.208 2008/07/31 05:38:05 simonb Exp $");
#include "fs_ffs.h"
#include "opt_bufcache.h"
@ -126,6 +128,7 @@ __KERNEL_RCSID(0, "$NetBSD: vfs_bio.c,v 1.207 2008/07/14 16:22:42 hannken Exp $"
#include <sys/fstrans.h>
#include <sys/intr.h>
#include <sys/cpu.h>
#include <sys/wapbl.h>
#include <uvm/uvm.h>
@ -714,8 +717,23 @@ bread(struct vnode *vp, daddr_t blkno, int size, kauth_cred_t cred,
/* Wait for the read to complete, and return result. */
error = biowait(bp);
if (error == 0 && (flags & B_MODIFY) != 0)
if (error == 0 && (flags & B_MODIFY) != 0) /* XXXX before the next code block or after? */
error = fscow_run(bp, true);
if (!error) {
struct mount *mp = wapbl_vptomp(vp);
if (mp && mp->mnt_wapbl_replay &&
WAPBL_REPLAY_ISOPEN(mp)) {
error = WAPBL_REPLAY_READ(mp, bp->b_data, bp->b_blkno,
bp->b_bcount);
if (error) {
mutex_enter(&bufcache_lock);
SET(bp->b_cflags, BC_INVAL);
mutex_exit(&bufcache_lock);
}
}
}
return error;
}
@ -793,6 +811,13 @@ bwrite(buf_t *bp)
mp = NULL;
}
if (mp && mp->mnt_wapbl) {
if (bp->b_iodone != mp->mnt_wapbl_op->wo_wapbl_biodone) {
bdwrite(bp);
return 0;
}
}
/*
* Remember buffer type, to switch on it later. If the write was
* synchronous, but the file system was mounted with MNT_ASYNC,
@ -897,6 +922,14 @@ bdwrite(buf_t *bp)
return;
}
if (wapbl_vphaswapbl(bp->b_vp)) {
struct mount *mp = wapbl_vptomp(bp->b_vp);
if (bp->b_iodone != mp->mnt_wapbl_op->wo_wapbl_biodone) {
WAPBL_ADD_BUF(mp, bp);
}
}
/*
* If the block hasn't been seen before:
* (1) Mark it as having been seen,
@ -1028,6 +1061,16 @@ brelsel(buf_t *bp, int set)
if (bioopsp != NULL)
(*bioopsp->io_deallocate)(bp);
if (ISSET(bp->b_flags, B_LOCKED)) {
if (wapbl_vphaswapbl(vp = bp->b_vp)) {
struct mount *mp = wapbl_vptomp(vp);
KASSERT(bp->b_iodone
!= mp->mnt_wapbl_op->wo_wapbl_biodone);
WAPBL_REMOVE_BUF(mp, bp);
}
}
mutex_enter(bp->b_objlock);
CLR(bp->b_oflags, BO_DONE|BO_DELWRI);
if ((vp = bp->b_vp) != NULL) {
@ -1224,19 +1267,22 @@ geteblk(int size)
int
allocbuf(buf_t *bp, int size, int preserve)
{
vsize_t oldsize, desired_size;
void *addr;
vsize_t oldsize, desired_size;
int oldcount;
int delta;
desired_size = buf_roundsize(size);
if (desired_size > MAXBSIZE)
printf("allocbuf: buffer larger than MAXBSIZE requested");
oldcount = bp->b_bcount;
bp->b_bcount = size;
oldsize = bp->b_bufsize;
if (oldsize == desired_size)
return 0;
goto out;
/*
* If we want a buffer of a different size, re-allocate the
@ -1274,6 +1320,11 @@ allocbuf(buf_t *bp, int size, int preserve)
}
}
mutex_exit(&bufcache_lock);
out:
if (wapbl_vphaswapbl(bp->b_vp))
WAPBL_RESIZE_BUF(wapbl_vptomp(bp->b_vp), bp, oldsize, oldcount);
return 0;
}

View File

@ -1,4 +1,4 @@
/* $NetBSD: vfs_lookup.c,v 1.108 2008/05/06 18:43:44 ad Exp $ */
/* $NetBSD: vfs_lookup.c,v 1.109 2008/07/31 05:38:05 simonb Exp $ */
/*
* Copyright (c) 1982, 1986, 1989, 1993
@ -37,7 +37,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: vfs_lookup.c,v 1.108 2008/05/06 18:43:44 ad Exp $");
__KERNEL_RCSID(0, "$NetBSD: vfs_lookup.c,v 1.109 2008/07/31 05:38:05 simonb Exp $");
#include "opt_magiclinks.h"
@ -956,8 +956,10 @@ relookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp)
if (cnp->cn_nameptr[0] == '\0')
panic("relookup: null name");
#ifdef ohcrap
if (cnp->cn_flags & ISDOTDOT)
panic("relookup: lookup on dot-dot");
#endif
/*
* We now have a segment name to search for, and a directory to search.

View File

@ -1,4 +1,4 @@
/* $NetBSD: vfs_subr.c,v 1.354 2008/07/27 15:08:37 pooka Exp $ */
/* $NetBSD: vfs_subr.c,v 1.355 2008/07/31 05:38:05 simonb Exp $ */
/*-
* Copyright (c) 1997, 1998, 2004, 2005, 2007, 2008 The NetBSD Foundation, Inc.
@ -81,7 +81,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: vfs_subr.c,v 1.354 2008/07/27 15:08:37 pooka Exp $");
__KERNEL_RCSID(0, "$NetBSD: vfs_subr.c,v 1.355 2008/07/31 05:38:05 simonb Exp $");
#include "opt_ddb.h"
#include "opt_compat_netbsd.h"
@ -106,6 +106,7 @@ __KERNEL_RCSID(0, "$NetBSD: vfs_subr.c,v 1.354 2008/07/27 15:08:37 pooka Exp $")
#include <sys/kauth.h>
#include <sys/atomic.h>
#include <sys/kthread.h>
#include <sys/wapbl.h>
#include <miscfs/specfs/specdev.h>
#include <miscfs/syncfs/syncfs.h>
@ -1804,8 +1805,13 @@ vclean(vnode_t *vp, int flags)
*/
if (flags & DOCLOSE) {
error = vinvalbuf(vp, V_SAVE, NOCRED, l, 0, 0);
if (error != 0)
if (error != 0) {
/* XXX, fix vn_start_write's grab of mp and use that. */
if (wapbl_vphaswapbl(vp))
WAPBL_DISCARD(wapbl_vptomp(vp));
error = vinvalbuf(vp, 0, NOCRED, l, 0, 0);
}
KASSERT(error == 0);
KASSERT((vp->v_iflag & VI_ONWORKLST) == 0);
if (active && (vp->v_type == VBLK || vp->v_type == VCHR)) {

View File

@ -1,4 +1,4 @@
/* $NetBSD: vfs_syscalls.c,v 1.369 2008/06/24 11:21:46 ad Exp $ */
/* $NetBSD: vfs_syscalls.c,v 1.370 2008/07/31 05:38:05 simonb Exp $ */
/*-
* Copyright (c) 2008 The NetBSD Foundation, Inc.
@ -63,7 +63,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: vfs_syscalls.c,v 1.369 2008/06/24 11:21:46 ad Exp $");
__KERNEL_RCSID(0, "$NetBSD: vfs_syscalls.c,v 1.370 2008/07/31 05:38:05 simonb Exp $");
#include "opt_compat_netbsd.h"
#include "opt_compat_43.h"
@ -208,12 +208,13 @@ mount_update(struct lwp *l, struct vnode *vp, const char *path, int flags,
mp->mnt_flag &=
~(MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOCOREDUMP |
MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP);
MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP |
MNT_LOG);
mp->mnt_flag |= flags &
(MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOCOREDUMP |
MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP |
MNT_IGNORE);
MNT_LOG | MNT_IGNORE);
error = VFS_MOUNT(mp, path, data, data_len);
@ -367,7 +368,7 @@ mount_domount(struct lwp *l, struct vnode **vpp, struct vfsops *vfsops,
(MNT_FORCE | MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOCOREDUMP |
MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP |
MNT_IGNORE | MNT_RDONLY);
MNT_LOG | MNT_IGNORE | MNT_RDONLY);
error = VFS_MOUNT(mp, path, data, data_len);
mp->mnt_flag &= ~MNT_OP_FLAGS;

View File

@ -1,4 +1,4 @@
/* $NetBSD: vfs_vnops.c,v 1.158 2008/06/02 16:08:41 ad Exp $ */
/* $NetBSD: vfs_vnops.c,v 1.159 2008/07/31 05:38:05 simonb Exp $ */
/*
* Copyright (c) 1982, 1986, 1989, 1993
@ -37,7 +37,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: vfs_vnops.c,v 1.158 2008/06/02 16:08:41 ad Exp $");
__KERNEL_RCSID(0, "$NetBSD: vfs_vnops.c,v 1.159 2008/07/31 05:38:05 simonb Exp $");
#include "fs_union.h"
#include "veriexec.h"
@ -61,6 +61,7 @@ __KERNEL_RCSID(0, "$NetBSD: vfs_vnops.c,v 1.158 2008/06/02 16:08:41 ad Exp $");
#include <sys/fstrans.h>
#include <sys/atomic.h>
#include <sys/filedesc.h>
#include <sys/wapbl.h>
#include <miscfs/specfs/specdev.h>
@ -692,6 +693,11 @@ vn_lock(struct vnode *vp, int flags)
LK_CANRECURSE))
== 0);
#ifdef DIAGNOSTIC
if (wapbl_vphaswapbl(vp))
WAPBL_JUNLOCK_ASSERT(wapbl_vptomp(vp));
#endif
do {
/*
* XXX PR 37706 forced unmount of file systems is unsafe.

2783
sys/kern/vfs_wapbl.c Normal file

File diff suppressed because it is too large Load Diff

View File

@ -1,4 +1,4 @@
/* $NetBSD: vnode_if.c,v 1.76 2008/01/25 14:32:46 ad Exp $ */
/* $NetBSD: vnode_if.c,v 1.77 2008/07/31 05:38:05 simonb Exp $ */
/*
* Warning: DO NOT EDIT! This file is automatically generated!
@ -40,7 +40,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: vnode_if.c,v 1.76 2008/01/25 14:32:46 ad Exp $");
__KERNEL_RCSID(0, "$NetBSD: vnode_if.c,v 1.77 2008/07/31 05:38:05 simonb Exp $");
#include "opt_vnode_lockdebug.h"
@ -802,6 +802,7 @@ VOP_FSYNC(struct vnode *vp,
mpsafe = (vp->v_vflag & VV_MPSAFE);
if (!mpsafe) { KERNEL_LOCK(1, curlwp); }
error = (VCALL(vp, VOFFSET(vop_fsync), &a));
if (!mpsafe) { KERNEL_UNLOCK_ONE(curlwp); }
return error;
}

View File

@ -1,4 +1,4 @@
/* $NetBSD: genfs_io.c,v 1.8 2008/06/04 12:41:40 ad Exp $ */
/* $NetBSD: genfs_io.c,v 1.9 2008/07/31 05:38:05 simonb Exp $ */
/*
* Copyright (c) 1982, 1986, 1989, 1993
@ -31,7 +31,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: genfs_io.c,v 1.8 2008/06/04 12:41:40 ad Exp $");
__KERNEL_RCSID(0, "$NetBSD: genfs_io.c,v 1.9 2008/07/31 05:38:05 simonb Exp $");
#include <sys/param.h>
#include <sys/systm.h>
@ -589,8 +589,22 @@ loopdone:
*/
if (!error && sawhole && blockalloc) {
error = GOP_ALLOC(vp, startoffset, npages << PAGE_SHIFT, 0,
cred);
/*
* XXX: This assumes that we come here only via
* the mmio path
*/
if (vp->v_mount->mnt_wapbl && write) {
error = WAPBL_BEGIN(vp->v_mount);
}
if (!error) {
error = GOP_ALLOC(vp, startoffset,
npages << PAGE_SHIFT, 0, cred);
if (vp->v_mount->mnt_wapbl && write) {
WAPBL_END(vp->v_mount);
}
}
UVMHIST_LOG(ubchist, "gop_alloc off 0x%x/0x%x -> %d",
startoffset, npages << PAGE_SHIFT, error,0);
if (!error) {

View File

@ -1,4 +1,4 @@
# $NetBSD: Makefile,v 1.4 2008/07/29 13:17:42 pooka Exp $
# $NetBSD: Makefile,v 1.5 2008/07/31 05:38:05 simonb Exp $
#
.PATH: ${.CURDIR}/../../../../ufs/ffs
@ -7,9 +7,9 @@ LIB= rumpfs_ffs
SRCS= ffs_alloc.c ffs_balloc.c ffs_bswap.c ffs_inode.c \
ffs_softdep.stub.c ffs_subr.c ffs_tables.c ffs_vfsops.c \
ffs_vnops.c ffs_snapshot.c
ffs_vnops.c ffs_snapshot.c ffs_wapbl.c
CPPFLAGS+= -DFFS_NO_SNAPSHOT -DFFS_EI
CPPFLAGS+= -DFFS_NO_SNAPSHOT -DFFS_EI -DWAPBL
CFLAGS+= -Wno-pointer-sign
.include <bsd.lib.mk>

View File

@ -1,4 +1,4 @@
# $NetBSD: Makefile,v 1.5 2008/07/29 13:17:47 pooka Exp $
# $NetBSD: Makefile,v 1.6 2008/07/31 05:38:05 simonb Exp $
#
.PATH: ${.CURDIR}/../../../../ufs/ufs
@ -6,9 +6,9 @@
LIB= rumpfs_ufs
SRCS= ufs_bmap.c ufs_dirhash.c ufs_ihash.c ufs_inode.c ufs_lookup.c \
ufs_vfsops.c ufs_vnops.c
ufs_vfsops.c ufs_vnops.c ufs_wapbl.c
CPPFLAGS+= -DUFS_DIRHASH -DFFS_EI
CPPFLAGS+= -DUFS_DIRHASH -DFFS_EI -DWAPBL
.include <bsd.lib.mk>
.include <bsd.klinks.mk>

View File

@ -1,4 +1,4 @@
# $NetBSD: Makefile.rumpkern,v 1.2 2008/07/30 01:32:47 oster Exp $
# $NetBSD: Makefile.rumpkern,v 1.3 2008/07/31 05:38:05 simonb Exp $
#
.include "${RUMPTOP}/Makefile.rump"
@ -26,7 +26,7 @@ SRCS+= clock_subr.c kern_descrip.c kern_stub.c param.c \
subr_bufq.c subr_hash.c subr_prf2.c subr_specificdata.c \
subr_time.c subr_workqueue.c sys_descrip.c sys_generic.c vfs_bio.c \
vfs_cache.c vfs_getcwd.c vfs_hooks.c vfs_init.c vfs_lookup.c \
vfs_subr.c vfs_vnops.c vfs_syscalls.c vnode_if.c \
vfs_subr.c vfs_vnops.c vfs_syscalls.c vfs_wapbl.c vnode_if.c \
subr_kobj.c kern_module.c
# sys/miscfs

View File

@ -1,4 +1,4 @@
/* $NetBSD: rump.c,v 1.48 2008/07/29 13:17:47 pooka Exp $ */
/* $NetBSD: rump.c,v 1.49 2008/07/31 05:38:05 simonb Exp $ */
/*
* Copyright (c) 2007 Antti Kantee. All Rights Reserved.
@ -32,6 +32,7 @@
#include <sys/filedesc.h>
#include <sys/kauth.h>
#include <sys/kmem.h>
#include <sys/module.h>
#include <sys/mount.h>
#include <sys/namei.h>
#include <sys/queue.h>
@ -39,7 +40,7 @@
#include <sys/select.h>
#include <sys/vnode.h>
#include <sys/vfs_syscalls.h>
#include <sys/module.h>
#include <sys/wapbl.h>
#include <miscfs/specfs/specdev.h>
@ -135,6 +136,7 @@ rump_init()
module_init();
vfsinit();
bufinit();
wapbl_init();
rumpvfs_init();

View File

@ -1,4 +1,4 @@
# $NetBSD: Makefile,v 1.109 2008/06/04 14:31:15 ad Exp $
# $NetBSD: Makefile,v 1.110 2008/07/31 05:38:05 simonb Exp $
.include <bsd.sys.mk>
@ -19,12 +19,13 @@ INCS= acct.h agpio.h aio.h ansi.h ataio.h atomic.h audioio.h \
joystick.h \
kcore.h kgdb.h kmem.h ksem.h ksyms.h ktrace.h \
lkm.h localedef.h lock.h lockf.h lwp.h lwpctl.h \
malloc.h mallocvar.h mbuf.h md4.h \
md5.h midiio.h mman.h module.h mount.h mqueue.h msg.h msgbuf.h mtio.h mutex.h \
malloc.h mallocvar.h mbuf.h md4.h md5.h midiio.h \
mman.h module.h mount.h mqueue.h msg.h msgbuf.h mtio.h mutex.h \
namei.h null.h \
param.h pipe.h pmc.h poll.h pool.h power.h proc.h \
protosw.h pset.h ptrace.h queue.h \
ras.h rb.h reboot.h radioio.h resource.h resourcevar.h rmd160.h rnd.h rwlock.h \
ras.h rb.h reboot.h radioio.h resource.h resourcevar.h rmd160.h \
rnd.h rwlock.h \
scanio.h sched.h scsiio.h select.h selinfo.h sem.h sha1.h sha2.h \
shm.h siginfo.h signal.h signalvar.h sigtypes.h simplelock.h \
sleepq.h socket.h \
@ -36,7 +37,7 @@ INCS= acct.h agpio.h aio.h ansi.h ataio.h atomic.h audioio.h \
ttydefaults.h ttydev.h types.h \
ucontext.h ucred.h uio.h un.h unistd.h unpcb.h user.h utsname.h uuid.h \
vadvise.h verified_exec.h vmmeter.h vnode.h vnode_if.h \
wait.h wdog.h
wait.h wapbl.h wdog.h
INCSYMLINKS=\
sys/exec_elf.h /usr/include/elf.h \

View File

@ -1,4 +1,4 @@
/* $NetBSD: buf.h,v 1.109 2008/06/09 15:42:01 ad Exp $ */
/* $NetBSD: buf.h,v 1.110 2008/07/31 05:38:05 simonb Exp $ */
/*-
* Copyright (c) 1999, 2000, 2007 The NetBSD Foundation, Inc.
@ -162,6 +162,7 @@ struct buf {
LIST_ENTRY(buf) b_hash; /* c: hash chain */
LIST_ENTRY(buf) b_vnbufs; /* c: associated vnode */
TAILQ_ENTRY(buf) b_freelist; /* c: position if not active */
LIST_ENTRY(buf) b_wapbllist; /* c: transaction buffer list */
daddr_t b_lblkno; /* c: logical block number */
int b_freelistindex;/* c: free list index (BQ_) */
u_int b_cflags; /* c: BC_* flags */
@ -244,6 +245,7 @@ do { \
#define B_CLRBUF 0x01 /* Request allocated buffer be cleared. */
#define B_SYNC 0x02 /* Do all allocations synchronously. */
#define B_METAONLY 0x04 /* Return indirect block buffer. */
#define B_CONTIG 0x08 /* Allocate file contiguously. */
/* Flags to bread(), breadn() and breada(). */
#define B_MODIFY 0x01 /* Hint: caller might modify buffer */

View File

@ -1,4 +1,4 @@
/* $NetBSD: fstypes.h,v 1.23 2008/05/06 18:43:45 ad Exp $ */
/* $NetBSD: fstypes.h,v 1.24 2008/07/31 05:38:05 simonb Exp $ */
/*
* Copyright (c) 1989, 1991, 1993
@ -87,7 +87,6 @@ typedef struct fhandle fhandle_t;
#define __MNT_UNUSED2 0x00200000
#define __MNT_UNUSED3 0x00800000
#define __MNT_UNUSED4 0x01000000
#define __MNT_UNUSED5 0x02000000
#define MNT_RDONLY 0x00000001 /* read only filesystem */
#define MNT_SYNCHRONOUS 0x00000002 /* file system written synchronously */
@ -98,6 +97,7 @@ typedef struct fhandle fhandle_t;
#define MNT_ASYNC 0x00000040 /* file system written asynchronously */
#define MNT_NOCOREDUMP 0x00008000 /* don't write core dumps to this FS */
#define MNT_IGNORE 0x00100000 /* don't show entry in df */
#define MNT_LOG 0x02000000 /* Use logging */
#define MNT_NOATIME 0x04000000 /* Never update access times in fs */
#define MNT_SYMPERM 0x20000000 /* recognize symlink permission */
#define MNT_NODEVMTIME 0x40000000 /* Never update mod times for devs */
@ -116,7 +116,8 @@ typedef struct fhandle fhandle_t;
{ MNT_NOATIME, 0, "noatime" }, \
{ MNT_SYMPERM, 0, "symperm" }, \
{ MNT_NODEVMTIME, 0, "nodevmtime" }, \
{ MNT_SOFTDEP, 0, "soft dependencies" },
{ MNT_SOFTDEP, 0, "soft dependencies" }, \
{ MNT_LOG, 0, "log" },
/*
* exported mount flags.
@ -176,7 +177,8 @@ typedef struct fhandle fhandle_t;
MNT_EXPUBLIC | \
MNT_LOCAL | \
MNT_QUOTA | \
MNT_ROOTFS)
MNT_ROOTFS | \
MNT_LOG)
/*
* External filesystem control flags.
@ -223,7 +225,7 @@ typedef struct fhandle fhandle_t;
"\35MNT_EXPUBLIC" \
"\34MNT_EXNORESPORT" \
"\33MNT_NOATIME" \
"\32MNT_UNUSED" \
"\32MNT_LOG" \
"\31MNT_UNUSED" \
"\30MNT_UNUSED" \
"\27MNT_GETARGS" \

View File

@ -1,4 +1,4 @@
/* $NetBSD: mount.h,v 1.180 2008/07/30 18:10:38 pooka Exp $ */
/* $NetBSD: mount.h,v 1.181 2008/07/31 05:38:05 simonb Exp $ */
/*
* Copyright (c) 1989, 1991, 1993
@ -121,6 +121,11 @@ struct mount {
specificdata_reference
mnt_specdataref; /* subsystem specific data */
kmutex_t mnt_updating; /* to serialize updates */
struct wapbl_ops
*mnt_wapbl_op; /* logging ops */
struct wapbl *mnt_wapbl; /* log info */
struct wapbl_replay
*mnt_wapbl_replay; /* replay support XXX: what? */
};
/*
@ -278,6 +283,45 @@ int fsname##_extattrctl(struct mount *, int, struct vnode *, int, \
const char *); \
int fsname##_suspendctl(struct mount *, int)
/*
* This operations vector is so wapbl can be wrapped into a filesystem lkm.
* XXX Eventually, we want to move this functionality
* down into the filesystems themselves so that this isn't needed.
*/
struct wapbl_ops {
void (*wo_wapbl_discard)(struct wapbl *);
int (*wo_wapbl_replay_isopen)(struct wapbl_replay *);
int (*wo_wapbl_replay_read)(struct wapbl_replay *, void *, daddr_t, long);
void (*wo_wapbl_add_buf)(struct wapbl *, struct buf *);
void (*wo_wapbl_remove_buf)(struct wapbl *, struct buf *);
void (*wo_wapbl_resize_buf)(struct wapbl *, struct buf *, long, long);
int (*wo_wapbl_begin)(struct wapbl *, const char *, int);
void (*wo_wapbl_end)(struct wapbl *);
void (*wo_wapbl_junlock_assert)(struct wapbl *);
void (*wo_wapbl_biodone)(struct buf *);
};
#define WAPBL_DISCARD(MP) \
(*(MP)->mnt_wapbl_op->wo_wapbl_discard)((MP)->mnt_wapbl)
#define WAPBL_REPLAY_ISOPEN(MP) \
(*(MP)->mnt_wapbl_op->wo_wapbl_replay_isopen)((MP)->mnt_wapbl_replay)
#define WAPBL_REPLAY_READ(MP, DATA, BLK, LEN) \
(*(MP)->mnt_wapbl_op->wo_wapbl_replay_read)((MP)->mnt_wapbl_replay, \
(DATA), (BLK), (LEN))
#define WAPBL_ADD_BUF(MP, BP) \
(*(MP)->mnt_wapbl_op->wo_wapbl_add_buf)((MP)->mnt_wapbl, (BP))
#define WAPBL_REMOVE_BUF(MP, BP) \
(*(MP)->mnt_wapbl_op->wo_wapbl_remove_buf)((MP)->mnt_wapbl, (BP))
#define WAPBL_RESIZE_BUF(MP, BP, OLDSZ, OLDCNT) \
(*(MP)->mnt_wapbl_op->wo_wapbl_resize_buf)((MP)->mnt_wapbl, (BP), \
(OLDSZ), (OLDCNT))
#define WAPBL_BEGIN(MP) \
(*(MP)->mnt_wapbl_op->wo_wapbl_begin)((MP)->mnt_wapbl, \
__FILE__, __LINE__)
#define WAPBL_END(MP) \
(*(MP)->mnt_wapbl_op->wo_wapbl_end)((MP)->mnt_wapbl)
#define WAPBL_JUNLOCK_ASSERT(MP) \
(*(MP)->mnt_wapbl_op->wo_wapbl_junlock_assert)((MP)->mnt_wapbl)
struct vfs_hooks {
void (*vh_unmount)(struct mount *);
LIST_ENTRY(vfs_hooks) vfs_hooks_list;

View File

@ -1,4 +1,4 @@
/* $NetBSD: stat.h,v 1.56 2007/10/19 15:58:52 christos Exp $ */
/* $NetBSD: stat.h,v 1.57 2008/07/31 05:38:06 simonb Exp $ */
/*-
* Copyright (c) 1982, 1986, 1989, 1993
@ -214,6 +214,7 @@ struct stat {
#define SF_APPEND 0x00040000 /* writes to file may only append */
/* SF_NOUNLINK 0x00100000 [NOT IMPLEMENTED] */
#define SF_SNAPSHOT 0x00200000 /* snapshot inode */
#define SF_LOG 0x00400000 /* WAPBL log file inode */
#ifdef _KERNEL
/*

View File

@ -1,4 +1,4 @@
/* $NetBSD: statvfs.h,v 1.14 2008/04/28 20:24:11 martin Exp $ */
/* $NetBSD: statvfs.h,v 1.15 2008/07/31 05:38:06 simonb Exp $ */
/*-
* Copyright (c) 2004 The NetBSD Foundation, Inc.
@ -118,6 +118,7 @@ struct statvfs {
#define ST_SYMPERM MNT_SYMPERM
#define ST_NODEVMTIME MNT_NODEVMTIME
#define ST_SOFTDEP MNT_SOFTDEP
#define ST_LOG MNT_LOG
#define ST_EXRDONLY MNT_EXRDONLY
#define ST_EXPORTED MNT_EXPORTED

View File

@ -1,4 +1,4 @@
/* $NetBSD: vnode.h,v 1.196 2008/06/24 11:21:46 ad Exp $ */
/* $NetBSD: vnode.h,v 1.197 2008/07/31 05:38:06 simonb Exp $ */
/*-
* Copyright (c) 2008 The NetBSD Foundation, Inc.
@ -296,6 +296,7 @@ struct vattr {
#define IO_NORMAL 0x00800 /* operate on regular data */
#define IO_EXT 0x01000 /* operate on extended attributes */
#define IO_DIRECT 0x02000 /* direct I/O hint */
#define IO_JOURNALLOCKED 0x04000 /* journal is already locked */
#define IO_ADV_MASK 0x00003 /* access pattern hint */
#define IO_ADV_SHIFT 0
@ -342,6 +343,7 @@ extern const int vttoif_tab[];
#define FSYNC_DATAONLY 0x0002 /* fsync: hint: sync file data only */
#define FSYNC_RECLAIM 0x0004 /* fsync: hint: vnode is being reclaimed */
#define FSYNC_LAZY 0x0008 /* fsync: lazy sync (trickle) */
#define FSYNC_NOLOG 0x0010 /* fsync: do not flush the log */
#define FSYNC_CACHE 0x0100 /* fsync: flush disk caches too */
#define FSYNC_VFS 0x0200 /* fsync: via FSYNC_VFS() */

381
sys/sys/wapbl.h Normal file
View File

@ -0,0 +1,381 @@
/* $NetBSD: wapbl.h,v 1.2 2008/07/31 05:38:06 simonb Exp $ */
/*-
* Copyright (c) 2003,2008 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
* by Wasabi Systems, Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef _SYS_WAPBL_H
#define _SYS_WAPBL_H
#include <sys/mutex.h>
#include <miscfs/specfs/specdev.h>
/* This header file describes the api and data structures for
* write ahead physical block logging (WAPBL) support.
*/
#if defined(_KERNEL_OPT)
#include "opt_wapbl.h"
#endif
#ifdef WAPBL_DEBUG
#ifndef WAPBL_DEBUG_PRINT
#define WAPBL_DEBUG_PRINT (WAPBL_PRINT_REPLAY | WAPBL_PRINT_OPEN)
#endif
#if 0
#define WAPBL_DEBUG_BUFBYTES
#define WAPBL_DEBUG_SERIALIZE
#endif
#endif
#ifdef WAPBL_DEBUG_PRINT
enum {
WAPBL_PRINT_OPEN = 0x1,
WAPBL_PRINT_FLUSH = 0x2,
WAPBL_PRINT_TRUNCATE = 0x4,
WAPBL_PRINT_TRANSACTION = 0x8,
WAPBL_PRINT_BUFFER = 0x10,
WAPBL_PRINT_BUFFER2 = 0x20,
WAPBL_PRINT_ALLOC = 0x40,
WAPBL_PRINT_INODE = 0x80,
WAPBL_PRINT_WRITE = 0x100,
WAPBL_PRINT_IO = 0x200,
WAPBL_PRINT_REPLAY = 0x400,
WAPBL_PRINT_ERROR = 0x800,
WAPBL_PRINT_DISCARD = 0x1000,
WAPBL_PRINT_BIODONE = 0x2000,
};
#define WAPBL_PRINTF(mask, a) if (wapbl_debug_print & (mask)) printf a
extern int wapbl_debug_print;
#else
#define WAPBL_PRINTF(mask, a)
#endif
/****************************************************************/
/* The WAPBL journal layout.
*
* The journal consists of a header followed by a circular buffer
* region. The circular data area is described by the header
* wc_circ_off, wc_circ_size, wc_head and wc_tail fields as bytes
* from the start of the journal header. New records are inserted
* at wc_head and the oldest valid record can be found at wc_tail.
* When ((wc_head == wc_tail) && (wc_head == 0)), the journal is empty.
* The condition of ((wc_head == wc_tail) && (wc_head != 0))
* indicates a full journal, although this condition is rare.
*
* The journal header as well as its records are marked by a 32bit
* type tag and length for ease of parsing. Journal records are
* padded so as to fall on journal device block boundaries.
* (XXX i think there is currently a bug wrt WC_BLOCKS not ending
* correctly on a journal device block boundary. this would need
* to be fixed if the journal blocksize does not match filesystem.)
*/
/*
* The following are the 4 record types used by the journal:
* Each tag indicates journal data organized by one of the
* structures used below.
*/
enum {
WAPBL_WC_HEADER = 0x5741424c, /* "WABL", struct wapbl_wc_header */
WAPBL_WC_INODES, /* struct wapbl_wc_inodelist */
WAPBL_WC_REVOCATIONS, /* struct wapbl_wc_blocklist */
WAPBL_WC_BLOCKS, /* struct wapbl_wc_blocklist */
};
/* null entry (on disk) */
/* This structure isn't used directly, but shares its header
* layout with all the other log structures for the purpose
* of reading a log structure and determining its type
*/
struct wapbl_wc_null {
uint32_t wc_type; /* WAPBL_WC_* */
int32_t wc_len;
uint8_t wc_spare[0]; /* actually longer */
};
/* journal header (on-disk)
* This record is found at the start of the
* journal, but not within the circular buffer region. As well as
* describing the journal parameters and matching filesystem, it
* additionally serves as the atomic update record for journal
* updates.
*/
struct wapbl_wc_header {
uint32_t wc_type; /* WAPBL_WC_HEADER log magic number */
int32_t wc_len; /* length of this journal entry */
uint32_t wc_checksum;
uint32_t wc_generation;
int32_t wc_fsid[2];
uint64_t wc_time;
uint32_t wc_timensec;
uint32_t wc_version;
uint32_t wc_log_dev_bshift;
uint32_t wc_fs_dev_bshift;
int64_t wc_head;
int64_t wc_tail;
int64_t wc_circ_off; /* offset of of circ buffer region */
int64_t wc_circ_size; /* size of circular buffer region */
uint8_t wc_spare[0]; /* actually longer */
};
/* list of blocks (on disk)
* This record is used to describe a set of filesystem blocks,
* and is used with two type tags, WAPBL_WC_BLOCKS and
* WAPBL_WC_REVOCATIONS.
*
* For WAPBL_WC_BLOCKS, a copy of each listed block can be found
* starting at the next log device blocksize boundary. starting at
* one log device block since the start of the record. This contains
* the bulk of the filesystem journal data which is written using
* these records before being written into the filesystem.
*
* The WAPBL_WC_REVOCATIONS record is used to indicate that any
* previously listed blocks should not be written into the filesystem.
* This is important so that deallocated and reallocated data blocks
* do not get overwritten with stale data from the journal. The
* revocation records to not contain a copy of any actual block data.
*/
struct wapbl_wc_blocklist {
uint32_t wc_type; /* WAPBL_WC_{REVOCATIONS,BLOCKS} */
int32_t wc_len;
int32_t wc_blkcount;
int32_t wc_unused;
struct {
int64_t wc_daddr;
int32_t wc_unused;
int32_t wc_dlen;
} wc_blocks[0]; /* actually longer */
};
/* list of inodes (on disk)
* This record is used to describe the set of inodes which
* may be allocated but are unlinked. Inodes end up listed here
* while they are in the process of being initialized and
* deinitialized. Inodes unlinked while in use by a process
* will be listed here and the actual deletion must be completed
* on journal replay.
*/
struct wapbl_wc_inodelist {
uint32_t wc_type; /* WAPBL_WC_INODES */
int32_t wc_len;
int32_t wc_inocnt;
int32_t wc_clear; /* set if previously listed inodes
hould be ignored */
struct {
uint32_t wc_inumber;
uint32_t wc_imode;
} wc_inodes[0]; /* actually longer */
};
/****************************************************************/
#include <sys/queue.h>
#include <sys/vnode.h>
#include <sys/buf.h>
typedef void (*wapbl_flush_fn_t)(struct mount *, daddr_t *, int *, int);
#ifdef _KERNEL
struct wapbl_entry;
struct wapbl_wc_header;
struct wapbl_replay;
struct wapbl;
/*
* This structure holds per transaction log information
*/
struct wapbl_entry {
struct wapbl *we_wapbl;
SIMPLEQ_ENTRY(wapbl_entry) we_entries;
size_t we_bufcount; /* Count of unsynced buffers */
size_t we_reclaimable_bytes; /* Number on disk bytes for this
transaction */
int we_error;
#ifdef WAPBL_DEBUG_BUFBYTES
size_t we_unsynced_bufbytes; /* Byte count of unsynced buffers */
#endif
};
void wapbl_init(void);
/* Start using a log */
int wapbl_start(struct wapbl **, struct mount *, struct vnode *, daddr_t,
size_t, size_t, struct wapbl_replay *,
wapbl_flush_fn_t, wapbl_flush_fn_t);
/* Discard the current transaction, potentially dangerous */
void wapbl_discard(struct wapbl *);
/* stop using a log */
int wapbl_stop(struct wapbl *, int);
/*
* Begin a new transaction or increment transaction recursion
* level if called while a transaction is already in progress
* by the current process.
*/
int wapbl_begin(struct wapbl *, const char *, int);
/* End a transaction or decrement the transaction recursion level */
void wapbl_end(struct wapbl *);
/*
* Add a new buffer to the current transaction. The buffers
* data will be copied to the current transaction log and the
* buffer will be marked B_LOCKED so that it will not be
* flushed to disk by the syncer or reallocated.
*/
void wapbl_add_buf(struct wapbl *, struct buf *);
/* Remove a buffer from the current transaction. */
void wapbl_remove_buf(struct wapbl *, struct buf *);
void wapbl_resize_buf(struct wapbl *, struct buf *, long, long);
/*
* This will flush all completed transactions to disk and
* start asynchronous writes on the associated buffers
*/
int wapbl_flush(struct wapbl *, int);
/*
* Inodes that are allocated but have zero link count
* must be registered with the current transaction
* so they may be recorded in the log and cleaned up later.
* registration/unregistration of ino numbers already registered is ok.
*/
void wapbl_register_inode(struct wapbl *, ino_t, mode_t);
void wapbl_unregister_inode(struct wapbl *, ino_t, mode_t);
/*
* Metadata block deallocations must be registered so
* that revocations records can be written and to prevent
* the corresponding blocks from being reused as data
* blocks until the log is on disk.
*/
void wapbl_register_deallocation(struct wapbl *, daddr_t, int);
void wapbl_jlock_assert(struct wapbl *wl);
void wapbl_junlock_assert(struct wapbl *wl);
void wapbl_print(struct wapbl *wl, int full, void (*pr)(const char *, ...));
#if defined(WAPBL_DEBUG) || defined(DDB)
void wapbl_dump(struct wapbl *);
#endif
void wapbl_biodone(struct buf *);
extern struct wapbl_ops wapbl_ops;
static __inline struct mount *
wapbl_vptomp(struct vnode *vp)
{
struct mount *mp;
mp = NULL;
if (vp != NULL) {
if (vp->v_type == VBLK)
mp = vp->v_specmountpoint;
else
mp = vp->v_mount;
}
return mp;
}
static __inline bool
wapbl_vphaswapbl(struct vnode *vp)
{
struct mount *mp;
if (vp == NULL)
return false;
mp = wapbl_vptomp(vp);
if (mp && mp->mnt_wapbl)
return true;
else
return false;
}
#endif /* _KERNEL */
/****************************************************************/
/* Replay support */
struct wapbl_replay {
struct vnode *wr_logvp;
struct vnode *wr_devvp;
daddr_t wr_logpbn;
struct wapbl_wc_header wr_wc_header;
void *wr_scratch;
LIST_HEAD(wapbl_blk_head, wapbl_blk) *wr_blkhash;
u_long wr_blkhashmask;
int wr_blkhashcnt;
off_t wr_inodeshead;
off_t wr_inodestail;
int wr_inodescnt;
struct {
uint32_t wr_inumber;
uint32_t wr_imode;
} *wr_inodes;
};
#define wapbl_replay_isopen(wr) ((wr)->wr_scratch != 0)
int wapbl_replay_isopen1(struct wapbl_replay *);
int wapbl_replay_start(struct wapbl_replay **, struct vnode *,
daddr_t, size_t, size_t);
void wapbl_replay_stop(struct wapbl_replay *);
void wapbl_replay_free(struct wapbl_replay *);
int wapbl_replay_verify(struct wapbl_replay *, struct vnode *);
int wapbl_replay_write(struct wapbl_replay *, struct vnode *);
int wapbl_replay_read(struct wapbl_replay *, void *, daddr_t, long);
/****************************************************************/
/* Supply this to provide i/o support */
int wapbl_write(void *, size_t, struct vnode *, daddr_t);
int wapbl_read(void *, size_t, struct vnode *, daddr_t);
/****************************************************************/
#endif /* !_SYS_WAPBL_H */

View File

@ -1,4 +1,33 @@
/* $NetBSD: ffs_alloc.c,v 1.110 2008/07/11 05:31:44 simonb Exp $ */
/* $NetBSD: ffs_alloc.c,v 1.111 2008/07/31 05:38:06 simonb Exp $ */
/*-
* Copyright (c) 2008 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
* by Wasabi Systems, Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/*
* Copyright (c) 2002 Networks Associates Technology, Inc.
@ -41,7 +70,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: ffs_alloc.c,v 1.110 2008/07/11 05:31:44 simonb Exp $");
__KERNEL_RCSID(0, "$NetBSD: ffs_alloc.c,v 1.111 2008/07/31 05:38:06 simonb Exp $");
#if defined(_KERNEL_OPT)
#include "opt_ffs.h"
@ -51,13 +80,14 @@ __KERNEL_RCSID(0, "$NetBSD: ffs_alloc.c,v 1.110 2008/07/11 05:31:44 simonb Exp $
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/buf.h>
#include <sys/proc.h>
#include <sys/vnode.h>
#include <sys/mount.h>
#include <sys/kernel.h>
#include <sys/syslog.h>
#include <sys/kauth.h>
#include <sys/fstrans.h>
#include <sys/kauth.h>
#include <sys/kernel.h>
#include <sys/mount.h>
#include <sys/proc.h>
#include <sys/syslog.h>
#include <sys/vnode.h>
#include <sys/wapbl.h>
#include <miscfs/specfs/specdev.h>
#include <ufs/ufs/quota.h>
@ -65,21 +95,22 @@ __KERNEL_RCSID(0, "$NetBSD: ffs_alloc.c,v 1.110 2008/07/11 05:31:44 simonb Exp $
#include <ufs/ufs/inode.h>
#include <ufs/ufs/ufs_extern.h>
#include <ufs/ufs/ufs_bswap.h>
#include <ufs/ufs/ufs_wapbl.h>
#include <ufs/ffs/fs.h>
#include <ufs/ffs/ffs_extern.h>
static daddr_t ffs_alloccg(struct inode *, int, daddr_t, int);
static daddr_t ffs_alloccgblk(struct inode *, struct buf *, daddr_t);
static daddr_t ffs_alloccg(struct inode *, int, daddr_t, int, int);
static daddr_t ffs_alloccgblk(struct inode *, struct buf *, daddr_t, int);
#ifdef XXXUBC
static daddr_t ffs_clusteralloc(struct inode *, int, daddr_t, int);
#endif
static ino_t ffs_dirpref(struct inode *);
static daddr_t ffs_fragextend(struct inode *, int, daddr_t, int, int);
static void ffs_fserr(struct fs *, u_int, const char *);
static daddr_t ffs_hashalloc(struct inode *, int, daddr_t, int,
daddr_t (*)(struct inode *, int, daddr_t, int));
static daddr_t ffs_nodealloccg(struct inode *, int, daddr_t, int);
static daddr_t ffs_hashalloc(struct inode *, int, daddr_t, int, int,
daddr_t (*)(struct inode *, int, daddr_t, int, int));
static daddr_t ffs_nodealloccg(struct inode *, int, daddr_t, int, int);
static int32_t ffs_mapsearch(struct fs *, struct cg *,
daddr_t, int);
#if defined(DIAGNOSTIC) || defined(DEBUG)
@ -118,7 +149,7 @@ extern const u_char * const fragtbl[];
* => releases um_lock before returning
*/
int
ffs_alloc(struct inode *ip, daddr_t lbn, daddr_t bpref, int size,
ffs_alloc(struct inode *ip, daddr_t lbn, daddr_t bpref, int size, int flags,
kauth_cred_t cred, daddr_t *bnp)
{
struct ufsmount *ump;
@ -174,13 +205,14 @@ ffs_alloc(struct inode *ip, daddr_t lbn, daddr_t bpref, int size,
return (error);
mutex_enter(&ump->um_lock);
#endif
if (bpref >= fs->fs_size)
bpref = 0;
if (bpref == 0)
cg = ino_to_cg(fs, ip->i_number);
else
cg = dtog(fs, bpref);
bno = ffs_hashalloc(ip, cg, bpref, size, ffs_alloccg);
bno = ffs_hashalloc(ip, cg, bpref, size, flags, ffs_alloccg);
if (bno > 0) {
DIP_ADD(ip, blocks, btodb(size));
ip->i_flag |= IN_CHANGE | IN_UPDATE;
@ -193,6 +225,20 @@ ffs_alloc(struct inode *ip, daddr_t lbn, daddr_t bpref, int size,
*/
(void) chkdq(ip, -btodb(size), cred, FORCE);
#endif
if (flags & B_CONTIG) {
/*
* XXX ump->um_lock handling is "suspect" at best.
* For the case where ffs_hashalloc() fails early
* in the B_CONTIG case we reach here with um_lock
* already unlocked, so we can't release it again
* like in the normal error path. See kern/39206.
*
*
* Fail silently - it's up to our caller to report
* errors.
*/
return (ENOSPC);
}
nospace:
mutex_exit(&ump->um_lock);
ffs_fserr(fs, kauth_cred_geteuid(cred), "file system full");
@ -372,14 +418,30 @@ ffs_realloccg(struct inode *ip, daddr_t lbprev, daddr_t bpref, int osize,
panic("ffs_realloccg: bad optim");
/* NOTREACHED */
}
bno = ffs_hashalloc(ip, cg, bpref, request, ffs_alloccg);
bno = ffs_hashalloc(ip, cg, bpref, request, 0, ffs_alloccg);
if (bno > 0) {
if (!DOINGSOFTDEP(ITOV(ip)))
ffs_blkfree(fs, ip->i_devvp, bprev, (long)osize,
ip->i_number);
if (nsize < request)
ffs_blkfree(fs, ip->i_devvp, bno + numfrags(fs, nsize),
(long)(request - nsize), ip->i_number);
if (!DOINGSOFTDEP(ITOV(ip))) {
if ((ip->i_ump->um_mountp->mnt_wapbl) &&
(ITOV(ip)->v_type != VREG)) {
UFS_WAPBL_REGISTER_DEALLOCATION(
ip->i_ump->um_mountp, fsbtodb(fs, bprev),
osize);
} else
ffs_blkfree(fs, ip->i_devvp, bprev, (long)osize,
ip->i_number);
}
if (nsize < request) {
if ((ip->i_ump->um_mountp->mnt_wapbl) &&
(ITOV(ip)->v_type != VREG)) {
UFS_WAPBL_REGISTER_DEALLOCATION(
ip->i_ump->um_mountp,
fsbtodb(fs, (bno + numfrags(fs, nsize))),
request - nsize);
} else
ffs_blkfree(fs, ip->i_devvp,
bno + numfrags(fs, nsize),
(long)(request - nsize), ip->i_number);
}
DIP_ADD(ip, blocks, btodb(nsize - osize));
ip->i_flag |= IN_CHANGE | IN_UPDATE;
if (bpp != NULL) {
@ -443,7 +505,7 @@ struct ctldebug debug15 = { "prtrealloc", &prtrealloc };
#endif
/*
* NOTE: when re-enabling this, it must be updated for UFS2.
* NOTE: when re-enabling this, it must be updated for UFS2 and WAPBL.
*/
int doasyncfree = 1;
@ -548,7 +610,7 @@ ffs_reallocblks(void *v)
* Search the block map looking for an allocation of the desired size.
*/
if ((newblk = (daddr_t)ffs_hashalloc(ip, dtog(fs, pref), (long)pref,
len, ffs_clusteralloc)) == 0) {
len, flags, ffs_clusteralloc)) == 0) {
mutex_exit(&ump->um_lock);
goto fail;
}
@ -696,11 +758,17 @@ ffs_valloc(struct vnode *pvp, int mode, kauth_cred_t cred,
ino_t ino, ipref;
int cg, error;
UFS_WAPBL_JUNLOCK_ASSERT(pvp->v_mount);
*vpp = NULL;
pip = VTOI(pvp);
fs = pip->i_fs;
ump = pip->i_ump;
error = UFS_WAPBL_BEGIN(pvp->v_mount);
if (error) {
return error;
}
mutex_enter(&ump->um_lock);
if (fs->fs_cstotal.cs_nifree == 0)
goto noinodes;
@ -723,12 +791,18 @@ ffs_valloc(struct vnode *pvp, int mode, kauth_cred_t cred,
if (fs->fs_contigdirs[cg] > 0)
fs->fs_contigdirs[cg]--;
}
ino = (ino_t)ffs_hashalloc(pip, cg, ipref, mode, ffs_nodealloccg);
ino = (ino_t)ffs_hashalloc(pip, cg, ipref, mode, 0, ffs_nodealloccg);
if (ino == 0)
goto noinodes;
UFS_WAPBL_END(pvp->v_mount);
error = VFS_VGET(pvp->v_mount, ino, vpp);
if (error) {
ffs_vfree(pvp, ino, mode);
int err;
err = UFS_WAPBL_BEGIN(pvp->v_mount);
if (err == 0)
ffs_vfree(pvp, ino, mode);
if (err == 0)
UFS_WAPBL_END(pvp->v_mount);
return (error);
}
KASSERT((*vpp)->v_type == VNON);
@ -774,6 +848,7 @@ ffs_valloc(struct vnode *pvp, int mode, kauth_cred_t cred,
return (0);
noinodes:
mutex_exit(&ump->um_lock);
UFS_WAPBL_END(pvp->v_mount);
ffs_fserr(fs, kauth_cred_geteuid(cred), "out of inodes");
uprintf("\n%s: create/symlink failed, no inodes free\n", fs->fs_fsmnt);
return (ENOSPC);
@ -922,7 +997,7 @@ ffs_dirpref(struct inode *pip)
* => um_lock held on entry and exit
*/
daddr_t
ffs_blkpref_ufs1(struct inode *ip, daddr_t lbn, int indx,
ffs_blkpref_ufs1(struct inode *ip, daddr_t lbn, int indx, int flags,
int32_t *bap /* XXX ondisk32 */)
{
struct fs *fs;
@ -932,6 +1007,26 @@ ffs_blkpref_ufs1(struct inode *ip, daddr_t lbn, int indx,
KASSERT(mutex_owned(&ip->i_ump->um_lock));
fs = ip->i_fs;
/*
* If allocating a contiguous file with B_CONTIG, use the hints
* in the inode extentions to return the desired block.
*
* For metadata (indirect blocks) return the address of where
* the first indirect block resides - we'll scan for the next
* available slot if we need to allocate more than one indirect
* block. For data, return the address of the actual block
* relative to the address of the first data block.
*/
if (flags & B_CONTIG) {
KASSERT(ip->i_ffs_first_data_blk != 0);
KASSERT(ip->i_ffs_first_indir_blk != 0);
if (flags & B_METAONLY)
return ip->i_ffs_first_indir_blk;
else
return ip->i_ffs_first_data_blk + blkstofrags(fs, lbn);
}
if (indx % fs->fs_maxbpg == 0 || bap[indx - 1] == 0) {
if (lbn < NDADDR + NINDIR(fs)) {
cg = ino_to_cg(fs, ip->i_number);
@ -966,7 +1061,8 @@ ffs_blkpref_ufs1(struct inode *ip, daddr_t lbn, int indx,
}
daddr_t
ffs_blkpref_ufs2(struct inode *ip, daddr_t lbn, int indx, int64_t *bap)
ffs_blkpref_ufs2(struct inode *ip, daddr_t lbn, int indx, int flags,
int64_t *bap)
{
struct fs *fs;
int cg;
@ -975,6 +1071,26 @@ ffs_blkpref_ufs2(struct inode *ip, daddr_t lbn, int indx, int64_t *bap)
KASSERT(mutex_owned(&ip->i_ump->um_lock));
fs = ip->i_fs;
/*
* If allocating a contiguous file with B_CONTIG, use the hints
* in the inode extentions to return the desired block.
*
* For metadata (indirect blocks) return the address of where
* the first indirect block resides - we'll scan for the next
* available slot if we need to allocate more than one indirect
* block. For data, return the address of the actual block
* relative to the address of the first data block.
*/
if (flags & B_CONTIG) {
KASSERT(ip->i_ffs_first_data_blk != 0);
KASSERT(ip->i_ffs_first_indir_blk != 0);
if (flags & B_METAONLY)
return ip->i_ffs_first_indir_blk;
else
return ip->i_ffs_first_data_blk + blkstofrags(fs, lbn);
}
if (indx % fs->fs_maxbpg == 0 || bap[indx - 1] == 0) {
if (lbn < NDADDR + NINDIR(fs)) {
cg = ino_to_cg(fs, ip->i_number);
@ -1025,7 +1141,7 @@ ffs_blkpref_ufs2(struct inode *ip, daddr_t lbn, int indx, int64_t *bap)
static daddr_t
ffs_hashalloc(struct inode *ip, int cg, daddr_t pref,
int size /* size for data blocks, mode for inodes */,
daddr_t (*allocator)(struct inode *, int, daddr_t, int))
int flags, daddr_t (*allocator)(struct inode *, int, daddr_t, int, int))
{
struct fs *fs;
daddr_t result;
@ -1035,9 +1151,12 @@ ffs_hashalloc(struct inode *ip, int cg, daddr_t pref,
/*
* 1: preferred cylinder group
*/
result = (*allocator)(ip, cg, pref, size);
result = (*allocator)(ip, cg, pref, size, flags);
if (result)
return (result);
if (flags & B_CONTIG)
return (result);
/*
* 2: quadratic rehash
*/
@ -1045,7 +1164,7 @@ ffs_hashalloc(struct inode *ip, int cg, daddr_t pref,
cg += i;
if (cg >= fs->fs_ncg)
cg -= fs->fs_ncg;
result = (*allocator)(ip, cg, 0, size);
result = (*allocator)(ip, cg, 0, size, flags);
if (result)
return (result);
}
@ -1056,7 +1175,7 @@ ffs_hashalloc(struct inode *ip, int cg, daddr_t pref,
*/
cg = (icg + 2) % fs->fs_ncg;
for (i = 2; i < fs->fs_ncg; i++) {
result = (*allocator)(ip, cg, 0, size);
result = (*allocator)(ip, cg, 0, size, flags);
if (result)
return (result);
cg++;
@ -1157,7 +1276,7 @@ ffs_fragextend(struct inode *ip, int cg, daddr_t bprev, int osize, int nsize)
* and if it is, allocate it.
*/
static daddr_t
ffs_alloccg(struct inode *ip, int cg, daddr_t bpref, int size)
ffs_alloccg(struct inode *ip, int cg, daddr_t bpref, int size, int flags)
{
struct ufsmount *ump;
struct fs *fs = ip->i_fs;
@ -1192,7 +1311,7 @@ ffs_alloccg(struct inode *ip, int cg, daddr_t bpref, int size)
cgp->cg_time = ufs_rw64(time_second, needswap);
if (size == fs->fs_bsize) {
mutex_enter(&ump->um_lock);
blkno = ffs_alloccgblk(ip, bp, bpref);
blkno = ffs_alloccgblk(ip, bp, bpref, flags);
ACTIVECG_CLR(fs, cg);
mutex_exit(&ump->um_lock);
bdwrite(bp);
@ -1216,7 +1335,7 @@ ffs_alloccg(struct inode *ip, int cg, daddr_t bpref, int size)
if (cgp->cg_cs.cs_nbfree == 0)
goto fail;
mutex_enter(&ump->um_lock);
blkno = ffs_alloccgblk(ip, bp, bpref);
blkno = ffs_alloccgblk(ip, bp, bpref, flags);
bno = dtogd(fs, blkno);
for (i = frags; i < fs->fs_frag; i++)
setbit(blksfree, bno + i);
@ -1276,7 +1395,7 @@ ffs_alloccg(struct inode *ip, int cg, daddr_t bpref, int size)
* blocks may be fragmented by the routine that allocates them.
*/
static daddr_t
ffs_alloccgblk(struct inode *ip, struct buf *bp, daddr_t bpref)
ffs_alloccgblk(struct inode *ip, struct buf *bp, daddr_t bpref, int flags)
{
struct ufsmount *ump;
struct fs *fs = ip->i_fs;
@ -1304,7 +1423,14 @@ ffs_alloccgblk(struct inode *ip, struct buf *bp, daddr_t bpref)
*/
if (ffs_isblock(fs, blksfree, fragstoblks(fs, bno)))
goto gotit;
/*
* if the requested data block isn't available and we are
* trying to allocate a contiguous file, return an error.
*/
if ((flags & (B_CONTIG | B_METAONLY)) == B_CONTIG)
return (0);
}
/*
* Take the next available block in this cylinder group.
*/
@ -1453,7 +1579,7 @@ ffs_clusteralloc(struct inode *ip, int cg, daddr_t bpref, int len)
len = blkstofrags(fs, len);
mutex_enter(&ump->um_lock);
for (i = 0; i < len; i += fs->fs_frag)
if ((got = ffs_alloccgblk(ip, bp, bno + i)) != bno + i)
if ((got = ffs_alloccgblk(ip, bp, bno + i, flags)) != bno + i)
panic("ffs_clusteralloc: lost block");
ACTIVECG_CLR(fs, cg);
mutex_exit(&ump->um_lock);
@ -1477,7 +1603,7 @@ fail:
* inode in the specified cylinder group.
*/
static daddr_t
ffs_nodealloccg(struct inode *ip, int cg, daddr_t ipref, int mode)
ffs_nodealloccg(struct inode *ip, int cg, daddr_t ipref, int mode, int flags)
{
struct ufsmount *ump = ip->i_ump;
struct fs *fs = ip->i_fs;
@ -1492,6 +1618,7 @@ ffs_nodealloccg(struct inode *ip, int cg, daddr_t ipref, int mode)
#endif
KASSERT(mutex_owned(&ump->um_lock));
UFS_WAPBL_JLOCK_ASSERT(ip->i_ump->um_mountp);
if (fs->fs_cs(fs, cg).cs_nifree == 0)
return (0);
@ -1542,6 +1669,8 @@ ffs_nodealloccg(struct inode *ip, int cg, daddr_t ipref, int mode)
panic("ffs_nodealloccg: block not in map");
/* NOTREACHED */
gotit:
UFS_WAPBL_REGISTER_INODE(ip->i_ump->um_mountp, cg * fs->fs_ipg + ipref,
mode);
/*
* Check to see if we need to initialize more inodes.
*/
@ -1593,6 +1722,122 @@ gotit:
return (0);
}
/*
* Allocate a block or fragment.
*
* The specified block or fragment is removed from the
* free map, possibly fragmenting a block in the process.
*
* This implementation should mirror fs_blkfree
*
* => um_lock not held on entry or exit
*/
int
ffs_blkalloc(struct inode *ip, daddr_t bno, long size)
{
struct ufsmount *ump = ip->i_ump;
struct fs *fs = ip->i_fs;
struct cg *cgp;
struct buf *bp;
int32_t fragno, cgbno;
int i, error, cg, blk, frags, bbase;
u_int8_t *blksfree;
const int needswap = UFS_FSNEEDSWAP(fs);
if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0 ||
fragnum(fs, bno) + numfrags(fs, size) > fs->fs_frag) {
printf("dev = 0x%x, bno = %" PRId64 " bsize = %d, "
"size = %ld, fs = %s\n",
ip->i_dev, bno, fs->fs_bsize, size, fs->fs_fsmnt);
panic("blkalloc: bad size");
}
cg = dtog(fs, bno);
if (bno >= fs->fs_size) {
printf("bad block %" PRId64 ", ino %" PRId64 "\n", bno,
ip->i_number);
ffs_fserr(fs, ip->i_uid, "bad block");
return EINVAL;
}
error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
(int)fs->fs_cgsize, NOCRED, B_MODIFY, &bp);
if (error) {
brelse(bp, 0);
return error;
}
cgp = (struct cg *)bp->b_data;
if (!cg_chkmagic(cgp, needswap)) {
brelse(bp, 0);
return EIO;
}
cgp->cg_old_time = ufs_rw32(time_second, needswap);
cgp->cg_time = ufs_rw64(time_second, needswap);
cgbno = dtogd(fs, bno);
blksfree = cg_blksfree(cgp, needswap);
mutex_enter(&ump->um_lock);
if (size == fs->fs_bsize) {
fragno = fragstoblks(fs, cgbno);
if (!ffs_isblock(fs, blksfree, fragno)) {
mutex_exit(&ump->um_lock);
brelse(bp, 0);
return EBUSY;
}
ffs_clrblock(fs, blksfree, fragno);
ffs_clusteracct(fs, cgp, fragno, -1);
ufs_add32(cgp->cg_cs.cs_nbfree, -1, needswap);
fs->fs_cstotal.cs_nbfree--;
fs->fs_cs(fs, cg).cs_nbfree--;
} else {
bbase = cgbno - fragnum(fs, cgbno);
frags = numfrags(fs, size);
for (i = 0; i < frags; i++) {
if (isclr(blksfree, cgbno + i)) {
mutex_exit(&ump->um_lock);
brelse(bp, 0);
return EBUSY;
}
}
/*
* if a complete block is being split, account for it
*/
fragno = fragstoblks(fs, bbase);
if (ffs_isblock(fs, blksfree, fragno)) {
ufs_add32(cgp->cg_cs.cs_nffree, fs->fs_frag, needswap);
fs->fs_cstotal.cs_nffree += fs->fs_frag;
fs->fs_cs(fs, cg).cs_nffree += fs->fs_frag;
ffs_clusteracct(fs, cgp, fragno, -1);
ufs_add32(cgp->cg_cs.cs_nbfree, -1, needswap);
fs->fs_cstotal.cs_nbfree--;
fs->fs_cs(fs, cg).cs_nbfree--;
}
/*
* decrement the counts associated with the old frags
*/
blk = blkmap(fs, blksfree, bbase);
ffs_fragacct(fs, blk, cgp->cg_frsum, -1, needswap);
/*
* allocate the fragment
*/
for (i = 0; i < frags; i++) {
clrbit(blksfree, cgbno + i);
}
ufs_add32(cgp->cg_cs.cs_nffree, -i, needswap);
fs->fs_cstotal.cs_nffree -= i;
fs->fs_cs(fs, cg).cs_nffree -= i;
/*
* add back in counts associated with the new frags
*/
blk = blkmap(fs, blksfree, bbase);
ffs_fragacct(fs, blk, cgp->cg_frsum, 1, needswap);
}
fs->fs_fmod = 1;
ACTIVECG_CLR(fs, cg);
mutex_exit(&ump->um_lock);
bdwrite(bp);
return 0;
}
/*
* Free a block or fragment.
*
@ -1817,6 +2062,8 @@ ffs_vfree(struct vnode *vp, ino_t ino, int mode)
/*
* Do the actual free operation.
* The specified inode is placed back in the free map.
*
* => um_lock not held on entry or exit
*/
int
ffs_freefile(struct fs *fs, struct vnode *devvp, ino_t ino, int mode)
@ -1832,6 +2079,8 @@ ffs_freefile(struct fs *fs, struct vnode *devvp, ino_t ino, int mode)
const int needswap = UFS_FSNEEDSWAP(fs);
#endif
UFS_WAPBL_JLOCK_ASSERT(devvp->v_specinfo->si_mountpoint);
cg = ino_to_cg(fs, ino);
if (devvp->v_type != VBLK) {
/* devvp is a snapshot */
@ -1871,6 +2120,8 @@ ffs_freefile(struct fs *fs, struct vnode *devvp, ino_t ino, int mode)
panic("ifree: freeing free inode");
}
clrbit(inosused, ino);
UFS_WAPBL_UNREGISTER_INODE(devvp->v_specmountpoint,
ino + cg * fs->fs_ipg, mode);
if (ino < ufs_rw32(cgp->cg_irotor, needswap))
cgp->cg_irotor = ufs_rw32(ino, needswap);
ufs_add32(cgp->cg_cs.cs_nifree, 1, needswap);

View File

@ -1,4 +1,4 @@
/* $NetBSD: ffs_balloc.c,v 1.50 2008/06/03 09:47:49 hannken Exp $ */
/* $NetBSD: ffs_balloc.c,v 1.51 2008/07/31 05:38:06 simonb Exp $ */
/*
* Copyright (c) 2002 Networks Associates Technology, Inc.
@ -41,7 +41,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: ffs_balloc.c,v 1.50 2008/06/03 09:47:49 hannken Exp $");
__KERNEL_RCSID(0, "$NetBSD: ffs_balloc.c,v 1.51 2008/07/31 05:38:06 simonb Exp $");
#if defined(_KERNEL_OPT)
#include "opt_quota.h"
@ -141,7 +141,7 @@ ffs_balloc_ufs1(struct vnode *vp, off_t off, int size, kauth_cred_t cred,
if (osize < fs->fs_bsize && osize > 0) {
mutex_enter(&ump->um_lock);
error = ffs_realloccg(ip, nb,
ffs_blkpref_ufs1(ip, lastlbn, nb,
ffs_blkpref_ufs1(ip, lastlbn, nb, flags,
&ip->i_ffs1_db[0]),
osize, (int)fs->fs_bsize, cred, bpp, &newb);
if (error)
@ -222,9 +222,9 @@ ffs_balloc_ufs1(struct vnode *vp, off_t off, int size, kauth_cred_t cred,
*/
mutex_enter(&ump->um_lock);
error = ffs_realloccg(ip, lbn,
ffs_blkpref_ufs1(ip, lbn, (int)lbn,
&ip->i_ffs1_db[0]), osize, nsize, cred,
bpp, &newb);
ffs_blkpref_ufs1(ip, lbn, (int)lbn, flags,
&ip->i_ffs1_db[0]),
osize, nsize, cred, bpp, &newb);
if (error)
return (error);
if (DOINGSOFTDEP(vp))
@ -245,9 +245,9 @@ ffs_balloc_ufs1(struct vnode *vp, off_t off, int size, kauth_cred_t cred,
nsize = fs->fs_bsize;
mutex_enter(&ump->um_lock);
error = ffs_alloc(ip, lbn,
ffs_blkpref_ufs1(ip, lbn, (int)lbn,
ffs_blkpref_ufs1(ip, lbn, (int)lbn, flags,
&ip->i_ffs1_db[0]),
nsize, cred, &newb);
nsize, flags, cred, &newb);
if (error)
return (error);
if (bpp != NULL) {
@ -284,9 +284,9 @@ ffs_balloc_ufs1(struct vnode *vp, off_t off, int size, kauth_cred_t cred,
allocblk = allociblk;
if (nb == 0) {
mutex_enter(&ump->um_lock);
pref = ffs_blkpref_ufs1(ip, lbn, 0, (int32_t *)0);
error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred,
&newb);
pref = ffs_blkpref_ufs1(ip, lbn, 0, flags | B_METAONLY, NULL);
error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
flags | B_METAONLY, cred, &newb);
if (error)
goto fail;
nb = newb;
@ -341,9 +341,10 @@ ffs_balloc_ufs1(struct vnode *vp, off_t off, int size, kauth_cred_t cred,
}
mutex_enter(&ump->um_lock);
if (pref == 0)
pref = ffs_blkpref_ufs1(ip, lbn, 0, (int32_t *)0);
error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred,
&newb);
pref = ffs_blkpref_ufs1(ip, lbn, 0, flags | B_METAONLY,
NULL);
error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
flags | B_METAONLY, cred, &newb);
if (error) {
brelse(bp, 0);
goto fail;
@ -404,8 +405,9 @@ ffs_balloc_ufs1(struct vnode *vp, off_t off, int size, kauth_cred_t cred,
goto fail;
}
mutex_enter(&ump->um_lock);
pref = ffs_blkpref_ufs1(ip, lbn, indirs[num].in_off, &bap[0]);
error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred,
pref = ffs_blkpref_ufs1(ip, lbn, indirs[num].in_off, flags,
&bap[0]);
error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, flags, cred,
&newb);
if (error) {
brelse(bp, 0);
@ -619,7 +621,8 @@ ffs_balloc_ufs2(struct vnode *vp, off_t off, int size, kauth_cred_t cred,
error = ffs_realloccg(ip, -1 - nb,
dp->di_extb[nb],
ffs_blkpref_ufs2(ip, lastlbn, (int)nb,
&dp->di_extb[0]), osize,
flags, &dp->di_extb[0]),
osize,
(int)fs->fs_bsize, cred, &bp);
if (error)
return (error);
@ -679,8 +682,9 @@ ffs_balloc_ufs2(struct vnode *vp, off_t off, int size, kauth_cred_t cred,
mutex_enter(&ump->um_lock);
error = ffs_realloccg(ip, -1 - lbn,
dp->di_extb[lbn],
ffs_blkpref_ufs2(ip, lbn, (int)lbn,
&dp->di_extb[0]), osize, nsize, cred, &bp);
ffs_blkpref_ufs2(ip, lbn, (int)lbn, flags,
&dp->di_extb[0]),
osize, nsize, cred, &bp);
if (error)
return (error);
bp->b_xflags |= BX_ALTDATA;
@ -696,8 +700,9 @@ ffs_balloc_ufs2(struct vnode *vp, off_t off, int size, kauth_cred_t cred,
nsize = fs->fs_bsize;
mutex_enter(&ump->um_lock);
error = ffs_alloc(ip, lbn,
ffs_blkpref_ufs2(ip, lbn, (int)lbn, &dp->di_extb[0]),
nsize, cred, &newb);
ffs_blkpref_ufs2(ip, lbn, (int)lbn, flags,
&dp->di_extb[0]),
nsize, flags, cred, &newb);
if (error)
return (error);
error = ffs_getblk(vp, -1 - lbn, fsbtodb(fs, newb),
@ -728,7 +733,7 @@ ffs_balloc_ufs2(struct vnode *vp, off_t off, int size, kauth_cred_t cred,
if (osize < fs->fs_bsize && osize > 0) {
mutex_enter(&ump->um_lock);
error = ffs_realloccg(ip, nb,
ffs_blkpref_ufs2(ip, lastlbn, nb,
ffs_blkpref_ufs2(ip, lastlbn, nb, flags,
&ip->i_ffs2_db[0]),
osize, (int)fs->fs_bsize, cred, bpp, &newb);
if (error)
@ -809,9 +814,9 @@ ffs_balloc_ufs2(struct vnode *vp, off_t off, int size, kauth_cred_t cred,
*/
mutex_enter(&ump->um_lock);
error = ffs_realloccg(ip, lbn,
ffs_blkpref_ufs2(ip, lbn, (int)lbn,
&ip->i_ffs2_db[0]), osize, nsize, cred,
bpp, &newb);
ffs_blkpref_ufs2(ip, lbn, (int)lbn, flags,
&ip->i_ffs2_db[0]),
osize, nsize, cred, bpp, &newb);
if (error)
return (error);
if (DOINGSOFTDEP(vp))
@ -832,8 +837,9 @@ ffs_balloc_ufs2(struct vnode *vp, off_t off, int size, kauth_cred_t cred,
nsize = fs->fs_bsize;
mutex_enter(&ump->um_lock);
error = ffs_alloc(ip, lbn,
ffs_blkpref_ufs2(ip, lbn, (int)lbn,
&ip->i_ffs2_db[0]), nsize, cred, &newb);
ffs_blkpref_ufs2(ip, lbn, (int)lbn, flags,
&ip->i_ffs2_db[0]),
nsize, flags, cred, &newb);
if (error)
return (error);
if (bpp != NULL) {
@ -870,9 +876,9 @@ ffs_balloc_ufs2(struct vnode *vp, off_t off, int size, kauth_cred_t cred,
allocblk = allociblk;
if (nb == 0) {
mutex_enter(&ump->um_lock);
pref = ffs_blkpref_ufs2(ip, lbn, 0, (int64_t *)0);
error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred,
&newb);
pref = ffs_blkpref_ufs2(ip, lbn, 0, flags | B_METAONLY, NULL);
error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
flags | B_METAONLY, cred, &newb);
if (error)
goto fail;
nb = newb;
@ -927,9 +933,10 @@ ffs_balloc_ufs2(struct vnode *vp, off_t off, int size, kauth_cred_t cred,
}
mutex_enter(&ump->um_lock);
if (pref == 0)
pref = ffs_blkpref_ufs2(ip, lbn, 0, (int64_t *)0);
error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred,
&newb);
pref = ffs_blkpref_ufs2(ip, lbn, 0, flags | B_METAONLY,
NULL);
error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
flags | B_METAONLY, cred, &newb);
if (error) {
brelse(bp, 0);
goto fail;
@ -990,8 +997,9 @@ ffs_balloc_ufs2(struct vnode *vp, off_t off, int size, kauth_cred_t cred,
goto fail;
}
mutex_enter(&ump->um_lock);
pref = ffs_blkpref_ufs2(ip, lbn, indirs[num].in_off, &bap[0]);
error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred,
pref = ffs_blkpref_ufs2(ip, lbn, indirs[num].in_off, flags,
&bap[0]);
error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, flags, cred,
&newb);
if (error) {
brelse(bp, 0);

View File

@ -1,4 +1,4 @@
/* $NetBSD: ffs_extern.h,v 1.66 2008/06/28 01:34:05 rumble Exp $ */
/* $NetBSD: ffs_extern.h,v 1.67 2008/07/31 05:38:06 simonb Exp $ */
/*-
* Copyright (c) 1991, 1993, 1994
@ -84,9 +84,10 @@ __BEGIN_DECLS
#include <sys/param.h>
#include <sys/mount.h>
#include <sys/wapbl.h>
/* ffs_alloc.c */
int ffs_alloc(struct inode *, daddr_t, daddr_t , int, kauth_cred_t,
int ffs_alloc(struct inode *, daddr_t, daddr_t , int, int, kauth_cred_t,
daddr_t *);
int ffs_realloccg(struct inode *, daddr_t, daddr_t, int, int ,
kauth_cred_t, struct buf **, daddr_t *);
@ -94,8 +95,9 @@ int ffs_realloccg(struct inode *, daddr_t, daddr_t, int, int ,
int ffs_reallocblks(void *);
#endif
int ffs_valloc(struct vnode *, int, kauth_cred_t, struct vnode **);
daddr_t ffs_blkpref_ufs1(struct inode *, daddr_t, int, int32_t *);
daddr_t ffs_blkpref_ufs2(struct inode *, daddr_t, int, int64_t *);
daddr_t ffs_blkpref_ufs1(struct inode *, daddr_t, int, int, int32_t *);
daddr_t ffs_blkpref_ufs2(struct inode *, daddr_t, int, int, int64_t *);
int ffs_blkalloc(struct inode *, daddr_t, long);
void ffs_blkfree(struct fs *, struct vnode *, daddr_t, long, ino_t);
int ffs_vfree(struct vnode *, ino_t, int);
void ffs_clusteracct(struct fs *, struct cg *, int32_t, int);
@ -175,6 +177,17 @@ void softdep_setup_allocindir_page(struct inode *, daddr_t,
void softdep_fsync_mountdev(struct vnode *);
int softdep_sync_metadata(struct vnode *);
/* Write Ahead Physical Block Logging */
void ffs_wapbl_verify_inodes(struct mount *, const char *);
void ffs_wapbl_replay_finish(struct mount *);
int ffs_wapbl_start(struct mount *);
int ffs_wapbl_stop(struct mount *, int);
int ffs_wapbl_replay_start(struct mount *, struct fs *, struct vnode *);
void ffs_wapbl_blkalloc(struct fs *, struct vnode *, daddr_t, int);
void ffs_wapbl_sync_metadata(struct mount *, daddr_t *, int *, int);
void ffs_wapbl_abort_sync_metadata(struct mount *, daddr_t *, int *, int);
extern int (**ffs_vnodeop_p)(void *);
extern int (**ffs_specop_p)(void *);
extern int (**ffs_fifoop_p)(void *);

View File

@ -1,4 +1,33 @@
/* $NetBSD: ffs_inode.c,v 1.97 2008/06/03 09:47:49 hannken Exp $ */
/* $NetBSD: ffs_inode.c,v 1.98 2008/07/31 05:38:06 simonb Exp $ */
/*-
* Copyright (c) 2008 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
* by Wasabi Systems, Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/*
* Copyright (c) 1982, 1986, 1989, 1993
@ -32,7 +61,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: ffs_inode.c,v 1.97 2008/06/03 09:47:49 hannken Exp $");
__KERNEL_RCSID(0, "$NetBSD: ffs_inode.c,v 1.98 2008/07/31 05:38:06 simonb Exp $");
#if defined(_KERNEL_OPT)
#include "opt_ffs.h"
@ -41,23 +70,25 @@ __KERNEL_RCSID(0, "$NetBSD: ffs_inode.c,v 1.97 2008/06/03 09:47:49 hannken Exp $
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/mount.h>
#include <sys/proc.h>
#include <sys/file.h>
#include <sys/buf.h>
#include <sys/vnode.h>
#include <sys/file.h>
#include <sys/fstrans.h>
#include <sys/kauth.h>
#include <sys/kernel.h>
#include <sys/malloc.h>
#include <sys/trace.h>
#include <sys/mount.h>
#include <sys/proc.h>
#include <sys/resourcevar.h>
#include <sys/kauth.h>
#include <sys/fstrans.h>
#include <sys/trace.h>
#include <sys/vnode.h>
#include <sys/wapbl.h>
#include <ufs/ufs/quota.h>
#include <ufs/ufs/inode.h>
#include <ufs/ufs/ufsmount.h>
#include <ufs/ufs/ufs_extern.h>
#include <ufs/ufs/ufs_bswap.h>
#include <ufs/ufs/ufs_wapbl.h>
#include <ufs/ffs/fs.h>
#include <ufs/ffs/ffs_extern.h>
@ -128,6 +159,17 @@ ffs_update(struct vnode *vp, const struct timespec *acc,
softdep_update_inodeblock(ip, bp, waitfor);
} else if (ip->i_ffs_effnlink != ip->i_nlink)
panic("ffs_update: bad link cnt");
/* Keep unlinked inode list up to date */
KDASSERT(DIP(ip, nlink) == ip->i_nlink);
if (ip->i_mode) {
if (ip->i_nlink > 0) {
UFS_WAPBL_UNREGISTER_INODE(ip->i_ump->um_mountp,
ip->i_number, ip->i_mode);
} else {
UFS_WAPBL_REGISTER_INODE(ip->i_ump->um_mountp,
ip->i_number, ip->i_mode);
}
}
if (fs->fs_magic == FS_UFS1_MAGIC) {
cp = (char *)bp->b_data +
(ino_to_fsbo(fs, ip->i_number) * DINODE1_SIZE);
@ -411,8 +453,13 @@ ffs_truncate(struct vnode *ovp, off_t length, int ioflag, kauth_cred_t cred)
blocksreleased += count;
if (lastiblock[level] < 0) {
DIP_ASSIGN(oip, ib[level], 0);
ffs_blkfree(fs, oip->i_devvp, bn, fs->fs_bsize,
oip->i_number);
if (oip->i_ump->um_mountp->mnt_wapbl) {
UFS_WAPBL_REGISTER_DEALLOCATION(
oip->i_ump->um_mountp,
fsbtodb(fs, bn), fs->fs_bsize);
} else
ffs_blkfree(fs, oip->i_devvp, bn,
fs->fs_bsize, oip->i_number);
blocksreleased += nblocks;
}
}
@ -434,7 +481,12 @@ ffs_truncate(struct vnode *ovp, off_t length, int ioflag, kauth_cred_t cred)
continue;
DIP_ASSIGN(oip, db[i], 0);
bsize = blksize(fs, oip, i);
ffs_blkfree(fs, oip->i_devvp, bn, bsize, oip->i_number);
if ((oip->i_ump->um_mountp->mnt_wapbl) &&
(ovp->v_type != VREG)) {
UFS_WAPBL_REGISTER_DEALLOCATION(oip->i_ump->um_mountp,
fsbtodb(fs, bn), bsize);
} else
ffs_blkfree(fs, oip->i_devvp, bn, bsize, oip->i_number);
blocksreleased += btodb(bsize);
}
if (lastblock < 0)
@ -468,8 +520,14 @@ ffs_truncate(struct vnode *ovp, off_t length, int ioflag, kauth_cred_t cred)
* required for the storage we're keeping.
*/
bn += numfrags(fs, newspace);
ffs_blkfree(fs, oip->i_devvp, bn, oldspace - newspace,
oip->i_number);
if ((oip->i_ump->um_mountp->mnt_wapbl) &&
(ovp->v_type != VREG)) {
UFS_WAPBL_REGISTER_DEALLOCATION(
oip->i_ump->um_mountp, fsbtodb(fs, bn),
oldspace - newspace);
} else
ffs_blkfree(fs, oip->i_devvp, bn,
oldspace - newspace, oip->i_number);
blocksreleased += btodb(oldspace - newspace);
}
}
@ -494,6 +552,7 @@ done:
DIP_ADD(oip, blocks, -blocksreleased);
genfs_node_unlock(ovp);
oip->i_flag |= IN_CHANGE;
UFS_WAPBL_UPDATE(ovp, NULL, NULL, 0);
#ifdef QUOTA
(void) chkdq(oip, -blocksreleased, NOCRED, 0);
#endif
@ -621,7 +680,13 @@ ffs_indirtrunc(struct inode *ip, daddr_t lbn, daddr_t dbn, daddr_t lastbn,
allerror = error;
blocksreleased += blkcount;
}
ffs_blkfree(fs, ip->i_devvp, nb, fs->fs_bsize, ip->i_number);
if ((ip->i_ump->um_mountp->mnt_wapbl) &&
((level > SINGLE) || (ITOV(ip)->v_type != VREG))) {
UFS_WAPBL_REGISTER_DEALLOCATION(ip->i_ump->um_mountp,
fsbtodb(fs, nb), fs->fs_bsize);
} else
ffs_blkfree(fs, ip->i_devvp, nb, fs->fs_bsize,
ip->i_number);
blocksreleased += nblocks;
}

View File

@ -1,4 +1,33 @@
/* $NetBSD: ffs_vfsops.c,v 1.230 2008/06/28 01:34:05 rumble Exp $ */
/* $NetBSD: ffs_vfsops.c,v 1.231 2008/07/31 05:38:06 simonb Exp $ */
/*-
* Copyright (c) 2008 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
* by Wasabi Systems, Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/*
* Copyright (c) 1989, 1991, 1993, 1994
@ -32,12 +61,13 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: ffs_vfsops.c,v 1.230 2008/06/28 01:34:05 rumble Exp $");
__KERNEL_RCSID(0, "$NetBSD: ffs_vfsops.c,v 1.231 2008/07/31 05:38:06 simonb Exp $");
#if defined(_KERNEL_OPT)
#include "opt_ffs.h"
#include "opt_quota.h"
#include "opt_softdep.h"
#include "opt_wapbl.h"
#endif
#include <sys/param.h>
@ -61,6 +91,7 @@ __KERNEL_RCSID(0, "$NetBSD: ffs_vfsops.c,v 1.230 2008/06/28 01:34:05 rumble Exp
#include <sys/sysctl.h>
#include <sys/conf.h>
#include <sys/kauth.h>
#include <sys/wapbl.h>
#include <sys/fstrans.h>
#include <sys/module.h>
@ -73,6 +104,7 @@ __KERNEL_RCSID(0, "$NetBSD: ffs_vfsops.c,v 1.230 2008/06/28 01:34:05 rumble Exp
#include <ufs/ufs/dir.h>
#include <ufs/ufs/ufs_extern.h>
#include <ufs/ufs/ufs_bswap.h>
#include <ufs/ufs/ufs_wapbl.h>
#include <ufs/ffs/fs.h>
#include <ufs/ffs/ffs_extern.h>
@ -243,11 +275,17 @@ ffs_mountroot(void)
vrele(rootvp);
return (error);
}
/*
* We always need to be able to mount the root file system.
*/
mp->mnt_flag |= MNT_FORCE;
if ((error = ffs_mountfs(rootvp, mp, l)) != 0) {
vfs_unbusy(mp, false, NULL);
vfs_destroy(mp);
return (error);
}
mp->mnt_flag &= ~MNT_FORCE;
mutex_enter(&mountlist_lock);
CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list);
mutex_exit(&mountlist_lock);
@ -261,6 +299,8 @@ ffs_mountroot(void)
return (0);
}
static int dolog;
/*
* VFS Operations.
*
@ -278,6 +318,9 @@ ffs_mount(struct mount *mp, const char *path, void *data, size_t *data_len)
int error = 0, flags, update;
mode_t accessmode;
if (dolog)
mp->mnt_flag |= MNT_LOG;
if (*data_len < sizeof *args)
return EINVAL;
@ -378,13 +421,31 @@ ffs_mount(struct mount *mp, const char *path, void *data, size_t *data_len)
return (error);
}
#ifdef WAPBL
/*
* WAPBL can only be enabled on a r/w mount
* that does not use softdep.
*/
if ((mp->mnt_flag & MNT_RDONLY) && !(mp->mnt_iflag & IMNT_WANTRDWR)) {
mp->mnt_flag &= ~MNT_LOG;
}
if ((mp->mnt_flag & (MNT_SOFTDEP | MNT_LOG)) ==
(MNT_SOFTDEP | MNT_LOG)) {
printf("%s fs is journalled, ignoring soft update mode\n",
VFSTOUFS(mp)->um_fs->fs_fsmnt);
mp->mnt_flag &= ~MNT_SOFTDEP;
}
#else /* !WAPBL */
mp->mnt_flag &= ~MNT_LOG;
#endif /* !WAPBL */
if (!update) {
int xflags;
if (mp->mnt_flag & MNT_RDONLY)
xflags = FREAD;
else
xflags = FREAD|FWRITE;
xflags = FREAD | FWRITE;
error = VOP_OPEN(devvp, xflags, FSCRED);
if (error)
goto fail;
@ -439,6 +500,8 @@ ffs_mount(struct mount *mp, const char *path, void *data, size_t *data_len)
fs->fs_pendingblocks = 0;
fs->fs_pendinginodes = 0;
}
if (error == 0)
error = UFS_WAPBL_BEGIN(mp);
if (error == 0 &&
ffs_cgupdate(ump, MNT_WAIT) == 0 &&
fs->fs_clean & FS_WASCLEAN) {
@ -447,8 +510,24 @@ ffs_mount(struct mount *mp, const char *path, void *data, size_t *data_len)
fs->fs_clean = FS_ISCLEAN;
(void) ffs_sbupdate(ump, MNT_WAIT);
}
if (error == 0)
UFS_WAPBL_END(mp);
if (error)
return (error);
}
#ifdef WAPBL
if ((mp->mnt_flag & MNT_LOG) == 0) {
error = ffs_wapbl_stop(mp, mp->mnt_flag & MNT_FORCE);
if (error)
return error;
}
#endif /* WAPBL */
if (fs->fs_ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) {
/*
* Finish change from r/w to r/o
*/
fs->fs_ronly = 1;
fs->fs_fmod = 0;
}
@ -508,9 +587,30 @@ ffs_mount(struct mount *mp, const char *path, void *data, size_t *data_len)
if (error)
return (error);
}
#ifdef WAPBL
if (fs->fs_flags & FS_DOWAPBL) {
printf("%s: replaying log to disk\n",
fs->fs_fsmnt);
KDASSERT(mp->mnt_wapbl_replay);
error = wapbl_replay_write(mp->mnt_wapbl_replay,
devvp);
if (error) {
return error;
}
wapbl_replay_stop(mp->mnt_wapbl_replay);
fs->fs_clean = FS_WASCLEAN;
}
#endif /* WAPBL */
if (fs->fs_snapinum[0] != 0)
ffs_snapshot_mount(mp);
}
#ifdef WAPBL
error = ffs_wapbl_start(mp);
if (error)
return error;
#endif /* WAPBL */
if (args->fspec == NULL)
return EINVAL;
if ((mp->mnt_flag & (MNT_SOFTDEP | MNT_ASYNC)) ==
@ -531,17 +631,24 @@ ffs_mount(struct mount *mp, const char *path, void *data, size_t *data_len)
else
fs->fs_flags &= ~FS_DOSOFTDEP;
if (fs->fs_fmod != 0) { /* XXX */
int err;
fs->fs_fmod = 0;
if (fs->fs_clean & FS_WASCLEAN)
fs->fs_time = time_second;
else {
printf("%s: file system not clean (fs_clean=%x); please fsck(8)\n",
mp->mnt_stat.f_mntfromname, fs->fs_clean);
printf("%s: file system not clean (fs_clean=%#x); "
"please fsck(8)\n", mp->mnt_stat.f_mntfromname,
fs->fs_clean);
printf("%s: lost blocks %" PRId64 " files %d\n",
mp->mnt_stat.f_mntfromname, fs->fs_pendingblocks,
fs->fs_pendinginodes);
}
(void) ffs_cgupdate(ump, MNT_WAIT);
err = UFS_WAPBL_BEGIN(mp);
if (err == 0) {
(void) ffs_cgupdate(ump, MNT_WAIT);
UFS_WAPBL_END(mp);
}
}
return (error);
@ -659,7 +766,7 @@ ffs_reload(struct mount *mp, kauth_cred_t cred, struct lwp *l)
return (error);
}
error = ffs_appleufs_validate(fs->fs_fsmnt,
(struct appleufslabel *)bp->b_data,NULL);
(struct appleufslabel *)bp->b_data, NULL);
if (error == 0)
ump->um_flags |= UFS_ISAPPLEUFS;
brelse(bp, 0);
@ -686,6 +793,17 @@ ffs_reload(struct mount *mp, kauth_cred_t cred, struct lwp *l)
ffs_oldfscompat_read(fs, ump, sblockloc);
mutex_enter(&ump->um_lock);
ump->um_maxfilesize = fs->fs_maxfilesize;
if (fs->fs_flags & ~(FS_KNOWN_FLAGS | FS_INTERNAL)) {
uprintf("%s: unknown ufs flags: 0x%08"PRIx32"%s\n",
mp->mnt_stat.f_mntonname, fs->fs_flags,
(mp->mnt_flag & MNT_FORCE) ? "" : ", not mounting");
if ((mp->mnt_flag & MNT_FORCE) == 0) {
mutex_exit(&ump->um_lock);
return (EINVAL);
}
}
if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
fs->fs_pendingblocks = 0;
fs->fs_pendinginodes = 0;
@ -839,6 +957,17 @@ ffs_mountfs(struct vnode *devvp, struct mount *mp, struct lwp *l)
if (error)
return error;
ump = malloc(sizeof *ump, M_UFSMNT, M_WAITOK);
memset(ump, 0, sizeof *ump);
mutex_init(&ump->um_lock, MUTEX_DEFAULT, IPL_NONE);
error = ffs_snapshot_init(ump);
if (error)
goto out;
ump->um_ops = &ffs_ufsops;
#ifdef WAPBL
sbagain:
#endif
/*
* Try reading the superblock in each of its possible locations.
*/
@ -916,15 +1045,7 @@ ffs_mountfs(struct vnode *devvp, struct mount *mp, struct lwp *l)
fs = malloc((u_long)sbsize, M_UFSMNT, M_WAITOK);
memcpy(fs, bp->b_data, sbsize);
ump = malloc(sizeof *ump, M_UFSMNT, M_WAITOK);
memset(ump, 0, sizeof *ump);
mutex_init(&ump->um_lock, MUTEX_DEFAULT, IPL_NONE);
error = ffs_snapshot_init(ump);
if (error)
goto out;
ump->um_fs = fs;
ump->um_ops = &ffs_ufsops;
#ifdef FFS_EI
if (needswap) {
@ -934,9 +1055,52 @@ ffs_mountfs(struct vnode *devvp, struct mount *mp, struct lwp *l)
#endif
fs->fs_flags &= ~FS_SWAPPED;
#ifdef WAPBL
if ((mp->mnt_wapbl_replay == 0) && (fs->fs_flags & FS_DOWAPBL)) {
error = ffs_wapbl_replay_start(mp, fs, devvp);
if (error)
goto out;
if (!ronly) {
/* XXX fsmnt may be stale. */
printf("%s: replaying log to disk\n", fs->fs_fsmnt);
error = wapbl_replay_write(mp->mnt_wapbl_replay, devvp);
if (error)
goto out;
wapbl_replay_stop(mp->mnt_wapbl_replay);
fs->fs_clean = FS_WASCLEAN;
} else {
/* XXX fsmnt may be stale */
printf("%s: replaying log to memory\n", fs->fs_fsmnt);
}
/* Force a re-read of the superblock */
brelse(bp, BC_INVAL);
bp = NULL;
free(fs, M_UFSMNT);
fs = NULL;
goto sbagain;
}
#else /* !WAPBL */
if ((fs->fs_flags & FS_DOWAPBL) && (mp->mnt_flag & MNT_FORCE) == 0) {
error = EPERM;
goto out;
}
#endif /* !WAPBL */
ffs_oldfscompat_read(fs, ump, sblockloc);
ump->um_maxfilesize = fs->fs_maxfilesize;
if (fs->fs_flags & ~(FS_KNOWN_FLAGS | FS_INTERNAL)) {
uprintf("%s: unknown ufs flags: 0x%08"PRIx32"%s\n",
mp->mnt_stat.f_mntonname, fs->fs_flags,
(mp->mnt_flag & MNT_FORCE) ? "" : ", not mounting");
if ((mp->mnt_flag & MNT_FORCE) == 0) {
error = EINVAL;
goto out;
}
}
if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
fs->fs_pendingblocks = 0;
fs->fs_pendinginodes = 0;
@ -966,7 +1130,7 @@ ffs_mountfs(struct vnode *devvp, struct mount *mp, struct lwp *l)
if (error)
goto out;
error = ffs_appleufs_validate(fs->fs_fsmnt,
(struct appleufslabel *)bp->b_data,NULL);
(struct appleufslabel *)bp->b_data, NULL);
if (error == 0) {
ump->um_flags |= UFS_ISAPPLEUFS;
}
@ -980,6 +1144,36 @@ ffs_mountfs(struct vnode *devvp, struct mount *mp, struct lwp *l)
}
#endif
#if 0
/*
* XXX This code changes the behaviour of mounting dirty filesystems, to
* XXX require "mount -f ..." to mount them. This doesn't match what
* XXX mount(8) describes and is disabled for now.
*/
/*
* If the file system is not clean, don't allow it to be mounted
* unless MNT_FORCE is specified. (Note: MNT_FORCE is always set
* for the root file system.)
*/
if (fs->fs_flags & FS_DOWAPBL) {
/*
* wapbl normally expects to be FS_WASCLEAN when the FS_DOWAPBL
* bit is set, although there's a window in unmount where it
* could be FS_ISCLEAN
*/
if ((mp->mnt_flag & MNT_FORCE) == 0 &&
(fs->fs_clean & (FS_WASCLEAN | FS_ISCLEAN)) == 0) {
error = EPERM;
goto out;
}
} else
if ((fs->fs_clean & FS_ISCLEAN) == 0 &&
(mp->mnt_flag & MNT_FORCE) == 0) {
error = EPERM;
goto out;
}
#endif
/*
* verify that we can access the last block in the fs
* if we're mounting read/write.
@ -999,10 +1193,12 @@ ffs_mountfs(struct vnode *devvp, struct mount *mp, struct lwp *l)
}
fs->fs_ronly = ronly;
if (ronly == 0) {
fs->fs_clean <<= 1;
fs->fs_fmod = 1;
}
/* Don't bump fs_clean if we're replaying journal */
if (!((fs->fs_flags & FS_DOWAPBL) && (fs->fs_clean & FS_WASCLEAN)))
if (ronly == 0) {
fs->fs_clean <<= 1;
fs->fs_fmod = 1;
}
size = fs->fs_cssize;
blks = howmany(size, fs->fs_fsize);
if (fs->fs_contigsumsize > 0)
@ -1095,6 +1291,24 @@ ffs_mountfs(struct vnode *devvp, struct mount *mp, struct lwp *l)
goto out;
}
}
#ifdef WAPBL
if (!ronly) {
KDASSERT(fs->fs_ronly == 0);
/*
* ffs_wapbl_start() needs mp->mnt_stat initialised if it
* needs to create a new log file in-filesystem.
*/
ffs_statvfs(mp, &mp->mnt_stat);
error = ffs_wapbl_start(mp);
if (error) {
free(fs->fs_csp, M_UFSMNT);
goto out;
}
}
#endif /* WAPBL */
if (ronly == 0 && fs->fs_snapinum[0] != 0)
ffs_snapshot_mount(mp);
#ifdef UFS_EXTATTR
@ -1115,6 +1329,15 @@ ffs_mountfs(struct vnode *devvp, struct mount *mp, struct lwp *l)
#endif /* UFS_EXTATTR */
return (0);
out:
#ifdef WAPBL
if (mp->mnt_wapbl_replay) {
if (wapbl_replay_isopen(mp->mnt_wapbl_replay))
wapbl_replay_stop(mp->mnt_wapbl_replay);
wapbl_replay_free(mp->mnt_wapbl_replay);
mp->mnt_wapbl_replay = 0;
}
#endif
fstrans_unmount(mp);
if (fs)
free(fs, M_UFSMNT);
@ -1175,7 +1398,7 @@ ffs_oldfscompat_read(struct fs *fs, struct ufsmount *ump, daddr_t sblockloc)
fs->fs_csaddr = fs->fs_old_csaddr;
fs->fs_sblockloc = sblockloc;
fs->fs_flags = fs->fs_old_flags | (fs->fs_flags & FS_INTERNAL);
fs->fs_flags = fs->fs_old_flags | (fs->fs_flags & FS_INTERNAL);
if (fs->fs_old_postblformat == FS_42POSTBLFMT) {
fs->fs_old_nrpos = 8;
@ -1256,6 +1479,9 @@ ffs_unmount(struct mount *mp, int mntflags)
struct ufsmount *ump = VFSTOUFS(mp);
struct fs *fs = ump->um_fs;
int error, flags, penderr;
#ifdef WAPBL
extern int doforce;
#endif
penderr = 0;
flags = 0;
@ -1284,25 +1510,42 @@ ffs_unmount(struct mount *mp, int mntflags)
penderr = 1;
}
mutex_exit(&ump->um_lock);
if (fs->fs_ronly == 0 &&
ffs_cgupdate(ump, MNT_WAIT) == 0 &&
fs->fs_clean & FS_WASCLEAN) {
/*
* XXXX don't mark fs clean in the case of softdep
* pending block errors, until they are fixed.
*/
if (penderr == 0) {
if (mp->mnt_flag & MNT_SOFTDEP)
fs->fs_flags &= ~FS_DOSOFTDEP;
fs->fs_clean = FS_ISCLEAN;
error = UFS_WAPBL_BEGIN(mp);
if (error == 0)
if (fs->fs_ronly == 0 &&
ffs_cgupdate(ump, MNT_WAIT) == 0 &&
fs->fs_clean & FS_WASCLEAN) {
/*
* XXXX don't mark fs clean in the case of softdep
* pending block errors, until they are fixed.
*/
if (penderr == 0) {
if (mp->mnt_flag & MNT_SOFTDEP)
fs->fs_flags &= ~FS_DOSOFTDEP;
fs->fs_clean = FS_ISCLEAN;
}
fs->fs_fmod = 0;
(void) ffs_sbupdate(ump, MNT_WAIT);
}
fs->fs_fmod = 0;
(void) ffs_sbupdate(ump, MNT_WAIT);
if (error == 0)
UFS_WAPBL_END(mp);
#ifdef WAPBL
KASSERT(!(mp->mnt_wapbl_replay && mp->mnt_wapbl));
if (mp->mnt_wapbl_replay) {
KDASSERT(fs->fs_ronly);
wapbl_replay_stop(mp->mnt_wapbl_replay);
wapbl_replay_free(mp->mnt_wapbl_replay);
mp->mnt_wapbl_replay = 0;
}
error = ffs_wapbl_stop(mp, doforce && (mntflags & MNT_FORCE));
if (error) {
return error;
}
#endif /* WAPBL */
if (ump->um_devvp->v_type != VBAD)
ump->um_devvp->v_specmountpoint = NULL;
vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY);
(void)VOP_CLOSE(ump->um_devvp, fs->fs_ronly ? FREAD : FREAD|FWRITE,
(void)VOP_CLOSE(ump->um_devvp, fs->fs_ronly ? FREAD : FREAD | FWRITE,
NOCRED);
vput(ump->um_devvp);
free(fs->fs_csp, M_UFSMNT);
@ -1335,7 +1578,7 @@ ffs_flushfiles(struct mount *mp, int flags, struct lwp *l)
#ifdef QUOTA
if (mp->mnt_flag & MNT_QUOTA) {
int i;
if ((error = vflush(mp, NULLVP, SKIPSYSTEM|flags)) != 0)
if ((error = vflush(mp, NULLVP, SKIPSYSTEM | flags)) != 0)
return (error);
for (i = 0; i < MAXQUOTAS; i++) {
if (ump->um_quotas[i] == NULLVP)
@ -1363,6 +1606,19 @@ ffs_flushfiles(struct mount *mp, int flags, struct lwp *l)
vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY);
error = VOP_FSYNC(ump->um_devvp, l->l_cred, FSYNC_WAIT, 0, 0);
VOP_UNLOCK(ump->um_devvp, 0);
if (flags & FORCECLOSE) /* XXXDBJ */
error = 0;
#ifdef WAPBL
if (error)
return error;
if (mp->mnt_wapbl) {
error = wapbl_flush(mp->mnt_wapbl, 1);
if (flags & FORCECLOSE)
error = 0;
}
#endif
return (error);
}
@ -1447,10 +1703,11 @@ loop:
continue;
mutex_enter(&vp->v_interlock);
ip = VTOI(vp);
if (ip == NULL || (vp->v_iflag & (VI_XLOCK|VI_CLEAN)) != 0 ||
/* XXXpooka: why wapbl check? */
if (ip == NULL || (vp->v_iflag & (VI_XLOCK | VI_CLEAN)) != 0 ||
vp->v_type == VNON || ((ip->i_flag &
(IN_CHANGE | IN_UPDATE | IN_MODIFIED)) == 0 &&
LIST_EMPTY(&vp->v_dirtyblkhd) &&
(LIST_EMPTY(&vp->v_dirtyblkhd) || (mp->mnt_wapbl)) &&
UVM_OBJ_IS_CLEAN(&vp->v_uobj)))
{
mutex_exit(&vp->v_interlock);
@ -1471,11 +1728,16 @@ loop:
}
continue;
}
if (vp->v_type == VREG && waitfor == MNT_LAZY)
error = ffs_update(vp, NULL, NULL, 0);
else
error = VOP_FSYNC(vp, cred,
waitfor == MNT_WAIT ? FSYNC_WAIT : 0, 0, 0);
if (vp->v_type == VREG && waitfor == MNT_LAZY) {
error = UFS_WAPBL_BEGIN(vp->v_mount);
if (!error) {
error = ffs_update(vp, NULL, NULL, 0);
UFS_WAPBL_END(vp->v_mount);
}
} else {
error = VOP_FSYNC(vp, cred, FSYNC_NOLOG |
(waitfor == MNT_WAIT ? FSYNC_WAIT : 0), 0, 0);
}
if (error)
allerror = error;
vput(vp);
@ -1498,10 +1760,11 @@ loop:
!LIST_EMPTY(&ump->um_devvp->v_dirtyblkhd))) {
vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY);
if ((error = VOP_FSYNC(ump->um_devvp, cred,
waitfor == MNT_WAIT ? FSYNC_WAIT : 0, 0, 0)) != 0)
(waitfor == MNT_WAIT ? FSYNC_WAIT : 0) | FSYNC_NOLOG,
0, 0)) != 0)
allerror = error;
VOP_UNLOCK(ump->um_devvp, 0);
if (allerror == 0 && waitfor == MNT_WAIT) {
if (allerror == 0 && waitfor == MNT_WAIT && !mp->mnt_wapbl) {
mutex_enter(&mntvnode_lock);
goto loop;
}
@ -1515,9 +1778,24 @@ loop:
if (fs->fs_fmod != 0) {
fs->fs_fmod = 0;
fs->fs_time = time_second;
if ((error = ffs_cgupdate(ump, waitfor)))
error = UFS_WAPBL_BEGIN(mp);
if (error)
allerror = error;
else {
if ((error = ffs_cgupdate(ump, waitfor)))
allerror = error;
UFS_WAPBL_END(mp);
}
}
#ifdef WAPBL
if (mp->mnt_wapbl) {
error = wapbl_flush(mp->mnt_wapbl, 0);
if (error)
allerror = error;
}
#endif
fstrans_done(mp);
vnfree(mvp);
return (allerror);

View File

@ -1,4 +1,33 @@
/* $NetBSD: ffs_vnops.c,v 1.99 2008/04/29 18:18:09 ad Exp $ */
/* $NetBSD: ffs_vnops.c,v 1.100 2008/07/31 05:38:06 simonb Exp $ */
/*-
* Copyright (c) 2008 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
* by Wasabi Systems, Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/*
* Copyright (c) 1982, 1986, 1989, 1993
@ -32,7 +61,12 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: ffs_vnops.c,v 1.99 2008/04/29 18:18:09 ad Exp $");
__KERNEL_RCSID(0, "$NetBSD: ffs_vnops.c,v 1.100 2008/07/31 05:38:06 simonb Exp $");
#if defined(_KERNEL_OPT)
#include "opt_ffs.h"
#include "opt_wapbl.h"
#endif
#include <sys/param.h>
#include <sys/systm.h>
@ -48,6 +82,7 @@ __KERNEL_RCSID(0, "$NetBSD: ffs_vnops.c,v 1.99 2008/04/29 18:18:09 ad Exp $");
#include <sys/pool.h>
#include <sys/signalvar.h>
#include <sys/kauth.h>
#include <sys/wapbl.h>
#include <sys/fstrans.h>
#include <miscfs/fifofs/fifo.h>
@ -58,6 +93,7 @@ __KERNEL_RCSID(0, "$NetBSD: ffs_vnops.c,v 1.99 2008/04/29 18:18:09 ad Exp $");
#include <ufs/ufs/dir.h>
#include <ufs/ufs/ufs_extern.h>
#include <ufs/ufs/ufsmount.h>
#include <ufs/ufs/ufs_wapbl.h>
#include <ufs/ffs/fs.h>
#include <ufs/ffs/ffs_extern.h>
@ -246,6 +282,9 @@ ffs_fsync(void *v)
int bsize;
daddr_t blk_high;
struct vnode *vp;
#ifdef WAPBL
struct mount *mp;
#endif
vp = ap->a_vp;
@ -255,7 +294,11 @@ ffs_fsync(void *v)
*/
if ((ap->a_offlo == 0 && ap->a_offhi == 0) || DOINGSOFTDEP(vp) ||
(vp->v_type != VREG)) {
error = ffs_full_fsync(vp, ap->a_flags);
int flags = ap->a_flags;
if (vp->v_type == VBLK)
flags |= FSYNC_VFS;
error = ffs_full_fsync(vp, flags);
goto out;
}
@ -276,6 +319,36 @@ ffs_fsync(void *v)
goto out;
}
#ifdef WAPBL
mp = wapbl_vptomp(vp);
if (mp->mnt_wapbl) {
if (ap->a_flags & FSYNC_DATAONLY) {
fstrans_done(vp->v_mount);
return 0;
}
error = 0;
if (vp->v_tag == VT_UFS && VTOI(vp)->i_flag &
(IN_ACCESS | IN_CHANGE | IN_UPDATE | IN_MODIFY |
IN_MODIFIED | IN_ACCESSED)) {
error = UFS_WAPBL_BEGIN(mp);
if (error) {
fstrans_done(vp->v_mount);
return error;
}
error = ffs_update(vp, NULL, NULL,
(ap->a_flags & FSYNC_WAIT) ? UPDATE_WAIT : 0);
UFS_WAPBL_END(mp);
}
if (error || (ap->a_flags & FSYNC_NOLOG)) {
fstrans_done(vp->v_mount);
return error;
}
error = wapbl_flush(mp->mnt_wapbl, 0);
fstrans_done(vp->v_mount);
return error;
}
#endif /* WAPBL */
/*
* Then, flush indirect blocks.
*/
@ -350,7 +423,7 @@ ffs_full_fsync(struct vnode *vp, int flags)
*/
if (vp->v_type == VREG || vp->v_type == VBLK) {
if ((flags & FSYNC_VFS) != 0)
if ((flags & FSYNC_VFS) != 0 && vp->v_specmountpoint != NULL)
mp = vp->v_specmountpoint;
else
mp = vp->v_mount;
@ -360,8 +433,55 @@ ffs_full_fsync(struct vnode *vp, int flags)
PGO_FREE : 0));
if (error)
return error;
} else
} else {
mp = vp->v_mount;
mutex_exit(&vp->v_interlock);
}
#ifdef WAPBL
if (mp && mp->mnt_wapbl) {
error = 0;
if (flags & FSYNC_DATAONLY)
return error;
if (VTOI(vp) && (VTOI(vp)->i_flag &
(IN_ACCESS | IN_CHANGE | IN_UPDATE | IN_MODIFY |
IN_MODIFIED | IN_ACCESSED))) {
error = UFS_WAPBL_BEGIN(mp);
if (error)
return error;
error = ffs_update(vp, NULL, NULL,
(flags & FSYNC_WAIT) ? UPDATE_WAIT : 0);
UFS_WAPBL_END(mp);
}
if (error || (flags & FSYNC_NOLOG))
return error;
/*
* Don't flush the log if the vnode being flushed
* contains no dirty buffers that could be in the log.
*/
if (!((flags & FSYNC_RECLAIM) &&
LIST_EMPTY(&vp->v_dirtyblkhd))) {
error = wapbl_flush(mp->mnt_wapbl, 0);
if (error)
return error;
}
/*
* XXX temporary workaround for "dirty bufs" panic in
* vinvalbuf. need a full fix for the v_numoutput
* waiters issues.
*/
if (flags & FSYNC_WAIT) {
mutex_enter(&vp->v_interlock);
while (vp->v_numoutput)
cv_wait(&vp->v_cv, &vp->v_interlock);
mutex_exit(&vp->v_interlock);
}
return error;
}
#endif /* WAPBL */
passes = NIADDR + 1;
skipmeta = 0;
@ -453,8 +573,10 @@ loop:
if (error == 0 && flags & FSYNC_CACHE) {
int i = 0;
if ((flags & FSYNC_VFS) == 0)
if ((flags & FSYNC_VFS) == 0) {
KASSERT(VTOI(vp) != NULL);
vp = VTOI(vp)->i_devvp;
}
VOP_IOCTL(vp, DIOCCACHESYNC, &i, FWRITE, curlwp->l_cred);
}

858
sys/ufs/ffs/ffs_wapbl.c Normal file
View File

@ -0,0 +1,858 @@
/* $NetBSD: ffs_wapbl.c,v 1.2 2008/07/31 05:38:06 simonb Exp $ */
/*-
* Copyright (c) 2003,2006,2008 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
* by Wasabi Systems, Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: ffs_wapbl.c,v 1.2 2008/07/31 05:38:06 simonb Exp $");
#if defined(_KERNEL_OPT)
#include "opt_ffs.h"
#endif
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/vnode.h>
#include <sys/mount.h>
#include <sys/file.h>
#include <sys/disk.h>
#include <sys/disklabel.h>
#include <sys/ioctl.h>
#include <sys/errno.h>
#include <sys/kauth.h>
#include <sys/wapbl.h>
#include <ufs/ufs/inode.h>
#include <ufs/ufs/quota.h>
#include <ufs/ufs/ufsmount.h>
#include <ufs/ufs/ufs_bswap.h>
#include <ufs/ufs/ufs_extern.h>
#include <ufs/ufs/ufs_wapbl.h>
#include <ufs/ffs/fs.h>
#include <ufs/ffs/ffs_extern.h>
#undef WAPBL_DEBUG
#ifdef WAPBL_DEBUG
int ffs_wapbl_debug = 1;
#define DPRINTF(fmt, args...) \
do { \
if (ffs_wapbl_debug) \
printf("%s:%d "fmt, __func__ , __LINE__, ##args); \
} while (/* CONSTCOND */0)
#else
#define DPRINTF(fmt, args...) \
do { \
/* nothing */ \
} while (/* CONSTCOND */0)
#endif
static int wapbl_log_position(struct mount *, struct fs *, struct vnode *,
daddr_t *, size_t *, size_t *, uint64_t *);
static int wapbl_create_infs_log(struct mount *, struct fs *, struct vnode *,
daddr_t *, size_t *, size_t *, uint64_t *);
static void wapbl_find_log_start(struct mount *, struct vnode *, off_t,
daddr_t *, daddr_t *, size_t *);
static int wapbl_remove_log(struct mount *);
static int wapbl_allocate_log_file(struct mount *, struct vnode *);
/*
* This function is invoked after a log is replayed to
* disk to perform logical cleanup actions as described by
* the log
*/
void
ffs_wapbl_replay_finish(struct mount *mp)
{
struct wapbl_replay *wr = mp->mnt_wapbl_replay;
int i;
int error;
if (!wr)
return;
KDASSERT((mp->mnt_flag & MNT_RDONLY) == 0);
for (i = 0; i < wr->wr_inodescnt; i++) {
struct vnode *vp;
struct inode *ip;
error = VFS_VGET(mp, wr->wr_inodes[i].wr_inumber, &vp);
if (error) {
printf("ffs_wapbl_replay_finish: "
"unable to cleanup inode %" PRIu32 "\n",
wr->wr_inodes[i].wr_inumber);
continue;
}
ip = VTOI(vp);
KDASSERT(wr->wr_inodes[i].wr_inumber == ip->i_number);
printf("ffs_wapbl_replay_finish: "
"cleaning inode %" PRIu64 " size=%" PRIu64 " mode=%o nlink=%d\n",
ip->i_number, ip->i_size, ip->i_mode, ip->i_nlink);
KASSERT(ip->i_nlink == 0);
/*
* The journal may have left partially allocated inodes in mode
* zero. This may occur if a crash occurs betweeen the node
* allocation in ffs_nodeallocg and when the node is properly
* initialized in ufs_makeinode. If so, just dallocate them.
*/
if (ip->i_mode == 0) {
UFS_WAPBL_BEGIN(mp);
ffs_vfree(vp, ip->i_number, wr->wr_inodes[i].wr_imode);
UFS_WAPBL_END(mp);
}
vput(vp);
}
mp->mnt_wapbl_replay = 0;
wapbl_replay_free(wr);
}
/* Callback for wapbl */
void
ffs_wapbl_sync_metadata(struct mount *mp, daddr_t *deallocblks,
int *dealloclens, int dealloccnt)
{
struct ufsmount *ump = VFSTOUFS(mp);
struct fs *fs = ump->um_fs;
int i, error;
#ifdef WAPBL_DEBUG_INODES
ufs_wapbl_verify_inodes(mp, "ffs_wapbl_sync_metadata");
#endif
for (i = 0; i< dealloccnt; i++) {
/*
* blkfree errors are unreported, might silently fail
* if it cannot read the cylinder group block
*/
ffs_blkfree(fs, ump->um_devvp,
dbtofsb(fs, deallocblks[i]), dealloclens[i], -1);
}
fs->fs_fmod = 0;
fs->fs_time = time_second;
error = ffs_cgupdate(ump, 0);
KASSERT(error == 0);
}
void
ffs_wapbl_abort_sync_metadata(struct mount *mp, daddr_t *deallocblks,
int *dealloclens, int dealloccnt)
{
struct ufsmount *ump = VFSTOUFS(mp);
struct fs *fs = ump->um_fs;
int i;
/*
* I suppose we could dig around for an in use inode, but
* its not really used by ffs_blkalloc, so we just fake
* the couple of fields that it touches.
*/
struct inode in;
in.i_fs = fs;
in.i_devvp = ump->um_devvp;
in.i_dev = ump->um_dev;
in.i_number = -1;
in.i_uid = 0;
for (i = 0; i < dealloccnt; i++) {
/*
* Since the above blkfree may have failed, this blkalloc might
* fail as well, so don't check its error. Note that if the
* blkfree succeeded above, then this shouldn't fail because
* the buffer will be locked in the current transaction.
*/
ffs_blkalloc(&in, dbtofsb(fs, deallocblks[i]),
dealloclens[i]);
}
}
static int
wapbl_remove_log(struct mount *mp)
{
struct ufsmount *ump = VFSTOUFS(mp);
struct fs *fs = ump->um_fs;
struct vnode *vp;
struct inode *ip;
ino_t log_ino;
int error;
/* If all the log locators are 0, just clean up */
if (fs->fs_journallocs[0] == 0 &&
fs->fs_journallocs[1] == 0 &&
fs->fs_journallocs[2] == 0 &&
fs->fs_journallocs[3] == 0) {
DPRINTF("empty locators, just clear\n");
goto done;
}
switch (fs->fs_journal_location) {
case UFS_WAPBL_JOURNALLOC_NONE:
/* nothing! */
DPRINTF("no log\n");
break;
case UFS_WAPBL_JOURNALLOC_IN_FILESYSTEM:
log_ino = fs->fs_journallocs[UFS_WAPBL_INFS_INO];
DPRINTF("in-fs log, ino = %" PRId64 "\n",log_ino);
/* if no existing log inode, just clear all fields and bail */
if (log_ino == 0)
goto done;
error = VFS_VGET(mp, log_ino, &vp);
if (error != 0) {
printf("ffs_wapbl: vget failed %d\n",
error);
/* clear out log info on error */
goto done;
}
ip = VTOI(vp);
KASSERT(log_ino == ip->i_number);
if ((ip->i_flags & SF_LOG) == 0) {
printf("ffs_wapbl: try to clear non-log inode "
"%" PRId64 "\n", log_ino);
vput(vp);
/* clear out log info on error */
goto done;
}
/*
* remove the log inode by setting its link count back
* to zero and bail.
*/
ip->i_ffs_effnlink = 0;
ip->i_nlink = 0;
DIP_ASSIGN(ip, nlink, 0);
if (DOINGSOFTDEP(vp))
softdep_change_linkcnt(ip);
vput(vp);
case UFS_WAPBL_JOURNALLOC_END_PARTITION:
DPRINTF("end-of-partition log\n");
/* no extra work required */
break;
default:
printf("ffs_wapbl: unknown journal type %d\n",
fs->fs_journal_location);
return EINVAL;
}
done:
/* Clear out all previous knowledge of journal */
fs->fs_journal_version = 0;
fs->fs_journal_location = 0;
fs->fs_journal_flags = 0;
fs->fs_journallocs[0] = 0;
fs->fs_journallocs[1] = 0;
fs->fs_journallocs[2] = 0;
fs->fs_journallocs[3] = 0;
(void) ffs_sbupdate(ump, MNT_WAIT);
return 0;
}
int
ffs_wapbl_start(struct mount *mp)
{
struct ufsmount *ump = VFSTOUFS(mp);
struct fs *fs = ump->um_fs;
struct vnode *devvp = ump->um_devvp;
daddr_t off;
size_t count;
size_t blksize;
uint64_t extradata;
int error;
if (mp->mnt_wapbl == 0) {
if (fs->fs_journal_flags & UFS_WAPBL_FLAGS_CLEAR_LOG) {
/* Clear out any existing journal file */
error = wapbl_remove_log(mp);
if (error != 0)
return error;
}
if (mp->mnt_flag & MNT_LOG) {
KDASSERT(fs->fs_ronly == 0);
error = wapbl_log_position(mp, fs, devvp, &off,
&count, &blksize, &extradata);
if (error)
return error;
/* XXX any other consistancy checks here? */
if (blksize != DEV_BSIZE) {
printf("%s: bad blocksize %zd\n", __func__,
blksize);
return EINVAL;
}
error = wapbl_start(&mp->mnt_wapbl, mp, devvp, off,
count, blksize, mp->mnt_wapbl_replay,
ffs_wapbl_sync_metadata,
ffs_wapbl_abort_sync_metadata);
if (error)
return error;
mp->mnt_wapbl_op = &wapbl_ops;
#ifdef WAPBL_DEBUG
printf("%s: enabling logging\n", fs->fs_fsmnt);
#endif
if ((fs->fs_flags & FS_DOWAPBL) == 0) {
UFS_WAPBL_BEGIN(mp);
fs->fs_flags |= FS_DOWAPBL;
error = ffs_sbupdate(ump, MNT_WAIT);
if (error) {
UFS_WAPBL_END(mp);
ffs_wapbl_stop(mp, MNT_FORCE);
return error;
}
UFS_WAPBL_END(mp);
error = wapbl_flush(mp->mnt_wapbl, 1);
if (error) {
ffs_wapbl_stop(mp, MNT_FORCE);
return error;
}
}
} else if (fs->fs_flags & FS_DOWAPBL) {
fs->fs_fmod = 1;
fs->fs_flags &= ~FS_DOWAPBL;
}
}
/*
* It is recommended that you finish replay with logging enabled.
* However, even if logging is not enabled, the remaining log
* replay should be safely recoverable with an fsck, so perform
* it anyway.
*/
if ((fs->fs_ronly == 0) && mp->mnt_wapbl_replay) {
int saveflag = mp->mnt_flag & MNT_RDONLY;
/*
* Make sure MNT_RDONLY is not set so that the inode
* cleanup in ufs_inactive will actually do its work.
*/
mp->mnt_flag &= ~MNT_RDONLY;
ffs_wapbl_replay_finish(mp);
mp->mnt_flag |= saveflag;
KASSERT(fs->fs_ronly == 0);
}
return 0;
}
int
ffs_wapbl_stop(struct mount *mp, int force)
{
struct ufsmount *ump = VFSTOUFS(mp);
struct fs *fs = ump->um_fs;
int error;
if (mp->mnt_wapbl) {
KDASSERT(fs->fs_ronly == 0);
/*
* Make sure turning off FS_DOWAPBL is only removed
* as the only change in the final flush since otherwise
* a transaction may reorder writes.
*/
error = wapbl_flush(mp->mnt_wapbl, 1);
if (error && !force)
return error;
if (error && force)
goto forceout;
error = UFS_WAPBL_BEGIN(mp);
if (error && !force)
return error;
if (error && force)
goto forceout;
KASSERT(fs->fs_flags & FS_DOWAPBL);
fs->fs_flags &= ~FS_DOWAPBL;
error = ffs_sbupdate(ump, MNT_WAIT);
KASSERT(error == 0); /* XXX a bit drastic! */
UFS_WAPBL_END(mp);
forceout:
error = wapbl_stop(mp->mnt_wapbl, force);
if (error) {
KASSERT(!force);
fs->fs_flags |= FS_DOWAPBL;
return error;
}
fs->fs_flags &= ~FS_DOWAPBL; /* Repeat in case of forced error */
mp->mnt_wapbl = 0;
#ifdef WAPBL_DEBUG
printf("%s: disabled logging\n", fs->fs_fsmnt);
#endif
}
return 0;
}
int
ffs_wapbl_replay_start(struct mount *mp, struct fs *fs, struct vnode *devvp)
{
int error;
daddr_t off;
size_t count;
size_t blksize;
uint64_t extradata;
error = wapbl_log_position(mp, fs, devvp, &off, &count, &blksize,
&extradata);
if (error)
return error;
error = wapbl_replay_start(&mp->mnt_wapbl_replay, devvp, off,
count, blksize);
if (error)
return error;
mp->mnt_wapbl_op = &wapbl_ops;
return 0;
}
/*
* If the superblock doesn't already have a recorded journal location
* then we allocate the journal in one of two positions:
*
* - At the end of the partition after the filesystem if there's
* enough space. "Enough space" is defined as >= 1MB of journal
* per 1GB of filesystem or 64MB, whichever is smaller.
*
* - Inside the filesystem. We try to allocate a contiguous journal
* based on the total filesystem size - the target is 1MB of journal
* per 1GB of filesystem, up to a maximum journal size of 64MB. As
* a worst case allowing for fragmentation, we'll allocate a journal
* 1/4 of the desired size but never smaller than 1MB.
*
* XXX In the future if we allow for non-contiguous journal files we
* can tighten the above restrictions.
*
* XXX
* These seems like a lot of duplication both here and in some of
* the userland tools (fsck_ffs, dumpfs, tunefs) with similar
* "switch (fs_journal_location)" constructs. Can we centralise
* this sort of code somehow/somewhere?
*/
static int
wapbl_log_position(struct mount *mp, struct fs *fs, struct vnode *devvp,
daddr_t *startp, size_t *countp, size_t *blksizep, uint64_t *extradatap)
{
struct ufsmount *ump = VFSTOUFS(mp);
struct partinfo dpart;
daddr_t logstart, logend, desired_logsize;
size_t blksize;
int error;
if (fs->fs_journal_version == UFS_WAPBL_VERSION) {
switch (fs->fs_journal_location) {
case UFS_WAPBL_JOURNALLOC_END_PARTITION:
DPRINTF("found existing end-of-partition log\n");
*startp = fs->fs_journallocs[UFS_WAPBL_EPART_ADDR];
*countp = fs->fs_journallocs[UFS_WAPBL_EPART_COUNT];
*blksizep = fs->fs_journallocs[UFS_WAPBL_EPART_BLKSZ];
DPRINTF(" start = %" PRId64 ", size = %zd, "
"blksize = %zd\n", *startp, *countp, *blksizep);
return 0;
case UFS_WAPBL_JOURNALLOC_IN_FILESYSTEM:
DPRINTF("found existing in-filesystem log\n");
*startp = fs->fs_journallocs[UFS_WAPBL_INFS_ADDR];
*countp = fs->fs_journallocs[UFS_WAPBL_INFS_COUNT];
*blksizep = fs->fs_journallocs[UFS_WAPBL_INFS_BLKSZ];
DPRINTF(" start = %" PRId64 ", size = %zd, "
"blksize = %zd\n", *startp, *countp, *blksizep);
return 0;
default:
printf("ffs_wapbl: unknown journal type %d\n",
fs->fs_journal_location);
return EINVAL;
}
}
desired_logsize =
lfragtosize(fs, fs->fs_size) / UFS_WAPBL_JOURNAL_SCALE;
DPRINTF("desired log size = %" PRId64 " kB\n", desired_logsize / 1024);
desired_logsize = max(desired_logsize, UFS_WAPBL_MIN_JOURNAL_SIZE);
desired_logsize = min(desired_logsize, UFS_WAPBL_MAX_JOURNAL_SIZE);
DPRINTF("adjusted desired log size = %" PRId64 " kB\n",
desired_logsize / 1024);
/* Is there space after after filesystem on partition for log? */
logstart = fsbtodb(fs, fs->fs_size);
error = VOP_IOCTL(devvp, DIOCGPART, &dpart, FREAD, FSCRED);
if (!error) {
logend = dpart.part->p_size;
blksize = dpart.disklab->d_secsize;
} else {
struct dkwedge_info dkw;
error = VOP_IOCTL(devvp, DIOCGWEDGEINFO, &dkw, FREAD, FSCRED);
if (error)
return error;
blksize = DEV_BSIZE;
logend = dkw.dkw_size;
}
if ((logend - logstart) >= desired_logsize) {
KDASSERT(blksize != 0);
DPRINTF("enough space, use end-of-partition log\n");
*startp = logstart;
*countp = (logend - logstart);
*blksizep = blksize;
*extradatap = 0;
/* update superblock with log location */
fs->fs_journal_version = UFS_WAPBL_VERSION;
fs->fs_journal_location = UFS_WAPBL_JOURNALLOC_END_PARTITION;
fs->fs_journal_flags = 0;
fs->fs_journallocs[UFS_WAPBL_EPART_ADDR] = *startp;
fs->fs_journallocs[UFS_WAPBL_EPART_COUNT] = *countp;
fs->fs_journallocs[UFS_WAPBL_EPART_BLKSZ] = *blksizep;
fs->fs_journallocs[UFS_WAPBL_EPART_UNUSED] = *extradatap;
error = ffs_sbupdate(ump, MNT_WAIT);
return error;
}
DPRINTF("end-of-partition has only %" PRId64 " free\n",
logend - logstart);
error = wapbl_create_infs_log(mp, fs, devvp, startp, countp, blksizep,
extradatap);
ffs_sync(mp, 1, FSCRED);
return error;
}
/*
* Try to create a journal log inside the filesystem.
*/
static int
wapbl_create_infs_log(struct mount *mp, struct fs *fs, struct vnode *devvp,
daddr_t *startp, size_t *countp, size_t *blksizep, uint64_t *extradatap)
{
struct vnode *vp, *rvp;
struct inode *ip;
int error;
if ((error = VFS_ROOT(mp, &rvp)) != 0)
return error;
if ((error = UFS_VALLOC(rvp, 0 | S_IFREG, NOCRED, &vp)) != 0) {
vput(rvp);
return error;
}
vput(rvp);
vp->v_type = VREG;
ip = VTOI(vp);
ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE;
ip->i_mode = 0 | IFREG;
DIP_ASSIGN(ip, mode, ip->i_mode);
ip->i_flags = SF_LOG;
DIP_ASSIGN(ip, flags, ip->i_flags);
ip->i_ffs_effnlink = 1;
ip->i_nlink = 1;
DIP_ASSIGN(ip, nlink, 1);
if (DOINGSOFTDEP(vp))
softdep_change_linkcnt(ip);
ffs_update(vp, NULL, NULL, UPDATE_WAIT);
if ((error = wapbl_allocate_log_file(mp, vp)) != 0) {
/*
* If we couldn't allocate the space for the log file,
* remove the inode by setting its link count back to
* zero and bail.
*/
ip->i_ffs_effnlink = 0;
ip->i_nlink = 0;
DIP_ASSIGN(ip, nlink, 0);
if (DOINGSOFTDEP(vp))
softdep_change_linkcnt(ip);
vput(vp);
return error;
}
/*
* Now that we have the place-holder inode for the journal,
* we don't need the vnode ever again.
*/
vput(vp);
*startp = fs->fs_journallocs[UFS_WAPBL_INFS_ADDR];
*countp = fs->fs_journallocs[UFS_WAPBL_INFS_COUNT];
*blksizep = fs->fs_journallocs[UFS_WAPBL_INFS_BLKSZ];
*extradatap = fs->fs_journallocs[UFS_WAPBL_INFS_INO];
return 0;
}
int
wapbl_allocate_log_file(struct mount *mp, struct vnode *vp)
{
struct ufsmount *ump = VFSTOUFS(mp);
struct fs *fs = ump->um_fs;
daddr_t addr, indir_addr;
off_t logsize;
size_t size;
int error;
logsize = 0;
/* check if there's a suggested log size */
if (fs->fs_journal_flags & UFS_WAPBL_FLAGS_CREATE_LOG &&
fs->fs_journal_location == UFS_WAPBL_JOURNALLOC_IN_FILESYSTEM)
logsize = fs->fs_journallocs[UFS_WAPBL_INFS_COUNT];
if (vp->v_size > 0) {
printf("%s: file size (%" PRId64 ") non zero\n", __func__,
vp->v_size);
return EEXIST;
}
wapbl_find_log_start(mp, vp, logsize, &addr, &indir_addr, &size);
if (addr == 0) {
printf("%s: log not allocated, largest extent is "
"%" PRId64 "MB\n", __func__,
lblktosize(fs, size) / (1024 * 1024));
return ENOSPC;
}
logsize = lblktosize(fs, size); /* final log size */
VTOI(vp)->i_ffs_first_data_blk = addr;
VTOI(vp)->i_ffs_first_indir_blk = indir_addr;
error = GOP_ALLOC(vp, 0, logsize, B_CONTIG, FSCRED);
if (error) {
printf("%s: GOP_ALLOC error %d\n", __func__, error);
return error;
}
fs->fs_journal_version = UFS_WAPBL_VERSION;
fs->fs_journal_location = UFS_WAPBL_JOURNALLOC_IN_FILESYSTEM;
fs->fs_journal_flags = 0;
fs->fs_journallocs[UFS_WAPBL_INFS_ADDR] =
lfragtosize(fs, addr) / DEV_BSIZE;
fs->fs_journallocs[UFS_WAPBL_INFS_COUNT] = logsize / DEV_BSIZE;
fs->fs_journallocs[UFS_WAPBL_INFS_BLKSZ] = DEV_BSIZE;
fs->fs_journallocs[UFS_WAPBL_INFS_INO] = VTOI(vp)->i_number;
error = ffs_sbupdate(ump, MNT_WAIT);
return error;
}
/*
* Find a suitable location for the journal in the filesystem.
*
* Our strategy here is to look for a contiguous block of free space
* at least "logfile" MB in size (plus room for any indirect blocks).
* We start at the middle of the filesystem and check each cylinder
* group working outwards. If "logfile" MB is not available as a
* single contigous chunk, then return the address and size of the
* largest chunk found.
*
* XXX
* At what stage does the search fail? Is if the largest space we could
* find is less than a quarter the requested space reasonable? If the
* search fails entirely, return a block address if "0" it indicate this.
*/
static void
wapbl_find_log_start(struct mount *mp, struct vnode *vp, off_t logsize,
daddr_t *addr, daddr_t *indir_addr, size_t *size)
{
struct ufsmount *ump = VFSTOUFS(mp);
struct fs *fs = ump->um_fs;
struct vnode *devvp = ump->um_devvp;
struct cg *cgp;
struct buf *bp;
uint8_t *blksfree;
daddr_t blkno, best_addr, start_addr;
daddr_t desired_blks, min_desired_blks;
daddr_t freeblks, best_blks;
int bpcg, cg, error, fixedsize, indir_blks, n, s;
#ifdef FFS_EI
const int needswap = UFS_FSNEEDSWAP(fs);
#endif
if (logsize == 0) {
fixedsize = 0; /* We can adjust the size if tight */
logsize = lfragtosize(fs, fs->fs_dsize) /
UFS_WAPBL_JOURNAL_SCALE;
DPRINTF("suggested log size = %" PRId64 "\n", logsize);
logsize = max(logsize, UFS_WAPBL_MIN_JOURNAL_SIZE);
logsize = min(logsize, UFS_WAPBL_MAX_JOURNAL_SIZE);
DPRINTF("adjusted log size = %" PRId64 "\n", logsize);
} else {
fixedsize = 1;
DPRINTF("fixed log size = %" PRId64 "\n", logsize);
}
desired_blks = logsize / fs->fs_bsize;
DPRINTF("desired blocks = %" PRId64 "\n", desired_blks);
/* add in number of indirect blocks needed */
indir_blks = 0;
if (desired_blks >= NDADDR) {
struct indir indirs[NIADDR + 2];
int num;
error = ufs_getlbns(vp, desired_blks, indirs, &num);
if (error) {
printf("%s: ufs_getlbns failed, error %d!\n",
__func__, error);
goto bad;
}
switch (num) {
case 2:
indir_blks = 1; /* 1st level indirect */
break;
case 3:
indir_blks = 1 + /* 1st level indirect */
1 + /* 2nd level indirect */
indirs[1].in_off + 1; /* extra 1st level indirect */
break;
default:
printf("%s: unexpected numlevels %d from ufs_getlbns\n",
__func__, num);
*size = 0;
goto bad;
}
desired_blks += indir_blks;
}
DPRINTF("desired blocks = %" PRId64 " (including indirect)\n",
desired_blks);
/*
* If a specific size wasn't requested, allow for a smaller log
* if we're really tight for space...
*/
min_desired_blks = desired_blks;
if (!fixedsize)
min_desired_blks = desired_blks / 4;
/* Look at number of blocks per CG. If it's too small, bail early. */
bpcg = fragstoblks(fs, fs->fs_fpg);
if (min_desired_blks > bpcg) {
printf("ffs_wapbl: cylinder group size of %" PRId64 " MB "
" is not big enough for journal\n",
lblktosize(fs, bpcg) / (1024 * 1024));
goto bad;
}
/*
* Start with the middle cylinder group, and search outwards in
* both directions until we either find the requested log size
* or reach the start/end of the file system. If we reach the
* start/end without finding enough space for the full requested
* log size, use the largest extent found if it is large enough
* to satisfy the our minimum size.
*
* XXX
* Can we just use the cluster contigsum stuff (esp on UFS2)
* here to simplify this search code?
*/
best_addr = 0;
best_blks = 0;
for (cg = fs->fs_ncg / 2, s = 0, n = 1;
best_blks < desired_blks && cg >= 0 && cg < fs->fs_ncg;
s++, n = -n, cg += n * s) {
DPRINTF("check cg %d of %d\n", cg, fs->fs_ncg);
error = bread(devvp, fsbtodb(fs, cgtod(fs, cg)),
fs->fs_cgsize, FSCRED, 0, &bp);
cgp = (struct cg *)bp->b_data;
if (error || !cg_chkmagic(cgp, UFS_FSNEEDSWAP(fs))) {
brelse(bp, 0);
continue;
}
blksfree = cg_blksfree(cgp, needswap);
for (blkno = 0; blkno < bpcg;) {
/* look for next free block */
/* XXX use scanc() and fragtbl[] here? */
for (; blkno < bpcg - min_desired_blks; blkno++)
if (ffs_isblock(fs, blksfree, blkno))
break;
/* past end of search space in this CG? */
if (blkno >= bpcg - min_desired_blks)
break;
/* count how many free blocks in this extent */
start_addr = blkno;
for (freeblks = 0; blkno < bpcg; blkno++, freeblks++)
if (!ffs_isblock(fs, blksfree, blkno))
break;
if (freeblks > best_blks) {
best_blks = freeblks;
best_addr = blkstofrags(fs, start_addr) +
cgbase(fs, cg);
if (freeblks >= desired_blks) {
DPRINTF("found len %" PRId64
" at offset %" PRId64 " in gc\n",
freeblks, start_addr);
break;
}
}
}
brelse(bp, 0);
}
DPRINTF("best found len = %" PRId64 ", wanted %" PRId64
" at addr %" PRId64 "\n", best_blks, desired_blks, best_addr);
if (best_blks < min_desired_blks) {
*addr = 0;
*indir_addr = 0;
} else {
/* put indirect blocks at start, and data blocks after */
*addr = best_addr + blkstofrags(fs, indir_blks);
*indir_addr = best_addr;
}
*size = min(desired_blks, best_blks) - indir_blks;
return;
bad:
*addr = 0;
*indir_addr = 0;
*size = 0;
return;
}

View File

@ -1,4 +1,4 @@
/* $NetBSD: fs.h,v 1.49 2007/12/25 18:33:49 perry Exp $ */
/* $NetBSD: fs.h,v 1.50 2008/07/31 05:38:06 simonb Exp $ */
/*
* Copyright (c) 1982, 1986, 1993
@ -327,7 +327,12 @@ struct fs {
int32_t fs_old_cpc; /* cyl per cycle in postbl */
/* this area is otherwise allocated unless fs_old_flags & FS_FLAGS_UPDATED */
int32_t fs_maxbsize; /* maximum blocking factor permitted */
int64_t fs_sparecon64[17]; /* old rotation block list head */
uint8_t fs_journal_version; /* journal format version */
uint8_t fs_journal_location; /* journal location type */
uint8_t fs_journal_reserved[2];/* reserved for future use */
uint32_t fs_journal_flags; /* journal flags */
uint64_t fs_journallocs[4]; /* location info for journal */
int64_t fs_sparecon64[12]; /* reserved for future use */
int64_t fs_sblockloc; /* byte offset of standard superblock */
struct csum_total fs_cstotal; /* cylinder summary information */
int64_t fs_time; /* last time written */
@ -406,13 +411,17 @@ struct fs {
/*
* File system flags
*/
#define FS_UNCLEAN 0x01 /* file system not clean at mount (unused) */
#define FS_DOSOFTDEP 0x02 /* file system using soft dependencies */
#define FS_NEEDSFSCK 0x04 /* needs sync fsck (FreeBSD compat, unused) */
#define FS_INDEXDIRS 0x08 /* kernel supports indexed directories */
#define FS_ACLS 0x10 /* file system has ACLs enabled */
#define FS_MULTILABEL 0x20 /* file system is MAC multi-label */
#define FS_UNCLEAN 0x001 /* file system not clean at mount (unused) */
#define FS_DOSOFTDEP 0x002 /* file system using soft dependencies */
#define FS_NEEDSFSCK 0x004 /* needs sync fsck (FreeBSD compat, unused) */
#define FS_INDEXDIRS 0x008 /* kernel supports indexed directories */
#define FS_ACLS 0x010 /* file system has ACLs enabled */
#define FS_MULTILABEL 0x020 /* file system is MAC multi-label */
#define FS_FLAGS_UPDATED 0x80 /* flags have been moved to new location */
#define FS_DOWAPBL 0x100 /* Write ahead physical block logging */
/* File system flags that are ok for NetBSD if set in fs_flags */
#define FS_KNOWN_FLAGS (FS_DOSOFTDEP | FS_DOWAPBL)
/*
* File system internal flags, also in fs_flags.

View File

@ -1,4 +1,4 @@
# $NetBSD: files.ufs,v 1.17 2007/12/12 02:56:03 lukem Exp $
# $NetBSD: files.ufs,v 1.18 2008/07/31 05:38:06 simonb Exp $
deffs fs_ffs.h FFS
deffs EXT2FS
@ -34,6 +34,7 @@ file ufs/ffs/ffs_subr.c ffs | mfs | ext2fs
file ufs/ffs/ffs_tables.c ffs | mfs | ext2fs
file ufs/ffs/ffs_vfsops.c ffs | mfs | ext2fs
file ufs/ffs/ffs_vnops.c ffs | mfs | ext2fs
file ufs/ffs/ffs_wapbl.c ffs & wapbl
file ufs/ffs/ffs_appleufs.c ffs & apple_ufs
file ufs/lfs/lfs_alloc.c lfs
@ -62,3 +63,4 @@ file ufs/ufs/ufs_lookup.c ffs | lfs | mfs | ext2fs
file ufs/ufs/ufs_quota.c quota & (ffs | lfs | mfs | ext2fs)
file ufs/ufs/ufs_vfsops.c ffs | lfs | mfs | ext2fs
file ufs/ufs/ufs_vnops.c ffs | lfs | mfs | ext2fs
file ufs/ufs/ufs_wapbl.c ffs & wapbl

View File

@ -1,8 +1,8 @@
# $NetBSD: Makefile,v 1.5 2005/12/11 12:25:28 christos Exp $
# $NetBSD: Makefile,v 1.6 2008/07/31 05:38:06 simonb Exp $
INCSDIR= /usr/include/ufs/ufs
INCS= dinode.h dir.h extattr.h inode.h quota.h ufs_bswap.h ufs_extern.h \
ufsmount.h
ufs_wapbl.h ufsmount.h
.include <bsd.kinc.mk>

View File

@ -1,4 +1,4 @@
/* $NetBSD: inode.h,v 1.51 2008/01/09 16:15:23 ad Exp $ */
/* $NetBSD: inode.h,v 1.52 2008/07/31 05:38:06 simonb Exp $ */
/*
* Copyright (c) 1982, 1989, 1993
@ -51,6 +51,9 @@
*/
struct ffs_inode_ext {
daddr_t *ffs_snapblklist; /* Collect expunged snapshot blocks. */
/* follow two fields are used by contiguous allocation code only. */
daddr_t ffs_first_data_blk; /* first indirect block on disk. */
daddr_t ffs_first_indir_blk; /* first data block on disk. */
};
struct ext2fs_inode_ext {
@ -113,6 +116,8 @@ struct inode {
struct lfs_inode_ext *lfs;
} inode_ext;
#define i_snapblklist inode_ext.ffs.ffs_snapblklist
#define i_ffs_first_data_blk inode_ext.ffs.ffs_first_data_blk
#define i_ffs_first_indir_blk inode_ext.ffs.ffs_first_indir_blk
#define i_e2fs_last_lblk inode_ext.e2fs.ext2fs_last_lblk
#define i_e2fs_last_blk inode_ext.e2fs.ext2fs_last_blk
/*
@ -219,7 +224,7 @@ struct inode {
#define IN_CLEANING 0x0100 /* LFS: file is being cleaned */
#define IN_ADIROP 0x0200 /* LFS: dirop in progress */
#define IN_SPACECOUNTED 0x0400 /* Blocks to be freed in free count. */
#define IN_PAGING 0x1000 /* LFS: file is on paging queue */
#define IN_PAGING 0x1000 /* LFS: file is on paging queue */
#if defined(_KERNEL)

View File

@ -1,4 +1,4 @@
/* $NetBSD: ufs_inode.c,v 1.75 2008/01/17 10:39:15 ad Exp $ */
/* $NetBSD: ufs_inode.c,v 1.76 2008/07/31 05:38:06 simonb Exp $ */
/*
* Copyright (c) 1991, 1993
@ -37,11 +37,12 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: ufs_inode.c,v 1.75 2008/01/17 10:39:15 ad Exp $");
__KERNEL_RCSID(0, "$NetBSD: ufs_inode.c,v 1.76 2008/07/31 05:38:06 simonb Exp $");
#if defined(_KERNEL_OPT)
#include "opt_ffs.h"
#include "opt_quota.h"
#include "opt_wapbl.h"
#endif
#include <sys/param.h>
@ -52,12 +53,14 @@ __KERNEL_RCSID(0, "$NetBSD: ufs_inode.c,v 1.75 2008/01/17 10:39:15 ad Exp $");
#include <sys/kernel.h>
#include <sys/namei.h>
#include <sys/kauth.h>
#include <sys/wapbl.h>
#include <sys/fstrans.h>
#include <sys/kmem.h>
#include <ufs/ufs/inode.h>
#include <ufs/ufs/ufsmount.h>
#include <ufs/ufs/ufs_extern.h>
#include <ufs/ufs/ufs_wapbl.h>
#ifdef UFS_DIRHASH
#include <ufs/ufs/dirhash.h>
#endif
@ -84,6 +87,9 @@ ufs_inactive(void *v)
struct mount *transmp;
mode_t mode;
int error = 0;
int logged = 0;
UFS_WAPBL_JUNLOCK_ASSERT(vp->v_mount);
transmp = vp->v_mount;
fstrans_start(transmp, FSTRANS_SHARED);
@ -96,6 +102,10 @@ ufs_inactive(void *v)
softdep_releasefile(ip);
if (ip->i_nlink <= 0 && (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
error = UFS_WAPBL_BEGIN(vp->v_mount);
if (error)
goto out;
logged = 1;
#ifdef QUOTA
(void)chkiq(ip, -1, NOCRED, 0);
#endif
@ -103,7 +113,35 @@ ufs_inactive(void *v)
ufs_extattr_vnode_inactive(vp, curlwp);
#endif
if (ip->i_size != 0) {
error = UFS_TRUNCATE(vp, (off_t)0, 0, NOCRED);
/*
* When journaling, only truncate one indirect block
* at a time
*/
if (vp->v_mount->mnt_wapbl) {
uint64_t incr = MNINDIR(ip->i_ump) <<
vp->v_mount->mnt_fs_bshift; /* Power of 2 */
uint64_t base = NDADDR <<
vp->v_mount->mnt_fs_bshift;
while (!error && ip->i_size > base + incr) {
/*
* round down to next full indirect
* block boundary.
*/
uint64_t nsize = base +
((ip->i_size - base - 1) &
~(incr - 1));
error = UFS_TRUNCATE(vp, nsize, 0,
NOCRED);
if (error)
break;
UFS_WAPBL_END(vp->v_mount);
error = UFS_WAPBL_BEGIN(vp->v_mount);
if (error)
goto out;
}
}
if (!error)
error = UFS_TRUNCATE(vp, (off_t)0, 0, NOCRED);
}
/*
* Setting the mode to zero needs to wait for the inode
@ -125,8 +163,16 @@ ufs_inactive(void *v)
}
if (ip->i_flag & (IN_CHANGE | IN_UPDATE | IN_MODIFIED)) {
if (!logged++) {
int err;
err = UFS_WAPBL_BEGIN(vp->v_mount);
if (err)
goto out;
}
UFS_UPDATE(vp, NULL, NULL, 0);
}
if (logged)
UFS_WAPBL_END(vp->v_mount);
out:
/*
* If we are done with the inode, reclaim it
@ -149,6 +195,10 @@ ufs_reclaim(struct vnode *vp)
if (prtactive && vp->v_usecount > 1)
vprint("ufs_reclaim: pushing active", vp);
if (!UFS_WAPBL_BEGIN(vp->v_mount)) {
UFS_UPDATE(vp, NULL, NULL, UPDATE_CLOSE);
UFS_WAPBL_END(vp->v_mount);
}
UFS_UPDATE(vp, NULL, NULL, UPDATE_CLOSE);
/*

View File

@ -1,4 +1,4 @@
/* $NetBSD: ufs_lookup.c,v 1.98 2008/06/05 09:32:29 hannken Exp $ */
/* $NetBSD: ufs_lookup.c,v 1.99 2008/07/31 05:38:06 simonb Exp $ */
/*
* Copyright (c) 1989, 1993
@ -37,7 +37,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: ufs_lookup.c,v 1.98 2008/06/05 09:32:29 hannken Exp $");
__KERNEL_RCSID(0, "$NetBSD: ufs_lookup.c,v 1.99 2008/07/31 05:38:06 simonb Exp $");
#ifdef _KERNEL_OPT
#include "opt_ffs.h"
@ -53,6 +53,7 @@ __KERNEL_RCSID(0, "$NetBSD: ufs_lookup.c,v 1.98 2008/06/05 09:32:29 hannken Exp
#include <sys/vnode.h>
#include <sys/kernel.h>
#include <sys/kauth.h>
#include <sys/wapbl.h>
#include <sys/fstrans.h>
#include <sys/proc.h>
#include <sys/kmem.h>
@ -65,6 +66,7 @@ __KERNEL_RCSID(0, "$NetBSD: ufs_lookup.c,v 1.98 2008/06/05 09:32:29 hannken Exp
#include <ufs/ufs/ufsmount.h>
#include <ufs/ufs/ufs_extern.h>
#include <ufs/ufs/ufs_bswap.h>
#include <ufs/ufs/ufs_wapbl.h>
#include "fs_ffs.h"
@ -158,7 +160,7 @@ ufs_lookup(void *v)
return (error);
if ((flags & ISLASTCN) && (vdp->v_mount->mnt_flag & MNT_RDONLY) &&
(cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
(nameiop == DELETE || nameiop == RENAME))
return (EROFS);
/*
@ -495,6 +497,7 @@ found:
dp->i_size = dp->i_offset + DIRSIZ(FSFMT(vdp), ep, needswap);
DIP_ASSIGN(dp, size, dp->i_size);
dp->i_flag |= IN_CHANGE | IN_UPDATE;
UFS_WAPBL_UPDATE(vdp, NULL, NULL, UPDATE_DIROP);
}
brelse(bp, 0);
@ -690,11 +693,12 @@ ufs_dirbadentry(struct vnode *dp, struct direct *ep, int entryoffsetinblock)
DIRSIZ(FSFMT(dp), ep, needswap) ||
namlen > FFS_MAXNAMLEN) {
/*return (1); */
printf("First bad, reclen=%x, DIRSIZ=%lu, namlen=%d, flags=%x "
"entryoffsetinblock=%d, dirblksiz = %d\n",
printf("First bad, reclen=%#x, DIRSIZ=%lu, namlen=%d, "
"flags=%#x, entryoffsetinblock=%d, dirblksiz = %d\n",
ufs_rw16(ep->d_reclen, needswap),
(u_long)DIRSIZ(FSFMT(dp), ep, needswap),
namlen, dp->v_mount->mnt_flag, entryoffsetinblock,dirblksiz);
namlen, dp->v_mount->mnt_flag, entryoffsetinblock,
dirblksiz);
goto bad;
}
if (ep->d_ino == 0)
@ -762,6 +766,8 @@ ufs_direnter(struct vnode *dvp, struct vnode *tvp, struct direct *dirp,
const int needswap = UFS_MPNEEDSWAP(ump);
int dirblksiz = ump->um_dirblksiz;
UFS_WAPBL_JLOCK_ASSERT(dvp->v_mount);
error = 0;
cr = cnp->cn_cred;
l = curlwp;
@ -882,6 +888,7 @@ ufs_direnter(struct vnode *dvp, struct vnode *tvp, struct direct *dirp,
dp->i_size = dp->i_offset + dp->i_count;
DIP_ASSIGN(dp, size, dp->i_size);
dp->i_flag |= IN_CHANGE | IN_UPDATE;
UFS_WAPBL_UPDATE(dvp, NULL, NULL, UPDATE_DIROP);
}
/*
* Get the block containing the space for the new directory entry.
@ -1014,6 +1021,7 @@ ufs_direnter(struct vnode *dvp, struct vnode *tvp, struct direct *dirp,
if (DOINGSOFTDEP(dvp) && (tvp != NULL))
vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY);
}
UFS_WAPBL_UPDATE(dvp, NULL, NULL, UPDATE_DIROP);
return (error);
}
@ -1040,6 +1048,8 @@ ufs_dirremove(struct vnode *dvp, struct inode *ip, int flags, int isrmdir)
const int needswap = UFS_MPNEEDSWAP(dp->i_ump);
#endif
UFS_WAPBL_JLOCK_ASSERT(dvp->v_mount);
if (flags & DOWHITEOUT) {
/*
* Whiteout entry: set d_ino to WINO.
@ -1105,6 +1115,7 @@ out:
ip->i_nlink--;
DIP_ASSIGN(ip, nlink, ip->i_nlink);
ip->i_flag |= IN_CHANGE;
UFS_WAPBL_UPDATE(ITOV(ip), NULL, NULL, 0);
}
error = VOP_BWRITE(bp);
}
@ -1118,6 +1129,7 @@ out:
if (ip != 0 && (ip->i_flags & SF_SNAPSHOT) != 0 &&
ip->i_ffs_effnlink == 0)
ffs_snapgone(ip);
UFS_WAPBL_UPDATE(dvp, NULL, NULL, 0);
#endif
return (error);
}
@ -1151,6 +1163,7 @@ ufs_dirrewrite(struct inode *dp, struct inode *oip, ino_t newinum, int newtype,
oip->i_nlink--;
DIP_ASSIGN(oip, nlink, oip->i_nlink);
oip->i_flag |= IN_CHANGE;
UFS_WAPBL_UPDATE(ITOV(oip), NULL, NULL, UPDATE_DIROP);
error = VOP_BWRITE(bp);
}
dp->i_flag |= iflags;
@ -1162,6 +1175,7 @@ ufs_dirrewrite(struct inode *dp, struct inode *oip, ino_t newinum, int newtype,
*/
if ((oip->i_flags & SF_SNAPSHOT) != 0 && oip->i_ffs_effnlink == 0)
ffs_snapgone(oip);
UFS_WAPBL_UPDATE(vdp, NULL, NULL, UPDATE_DIROP);
#endif
return (error);
}
@ -1333,8 +1347,8 @@ ufs_blkatoff(struct vnode *vp, off_t offset, char **res, struct buf **bpp,
const int bsize = 1 << bshift;
off_t eof;
blks = kmem_alloc((1+dirrablks) * sizeof(daddr_t), KM_SLEEP);
blksizes = kmem_alloc((1+dirrablks) * sizeof(int), KM_SLEEP);
blks = kmem_alloc((1 + dirrablks) * sizeof(daddr_t), KM_SLEEP);
blksizes = kmem_alloc((1 + dirrablks) * sizeof(int), KM_SLEEP);
ip = VTOI(vp);
KASSERT(vp->v_size == ip->i_size);
GOP_SIZE(vp, vp->v_size, &eof, 0);
@ -1370,7 +1384,7 @@ ufs_blkatoff(struct vnode *vp, off_t offset, char **res, struct buf **bpp,
*bpp = bp;
out:
kmem_free(blks, (1+dirrablks) * sizeof(daddr_t));
kmem_free(blksizes, (1+dirrablks) * sizeof(int));
kmem_free(blks, (1 + dirrablks) * sizeof(daddr_t));
kmem_free(blksizes, (1 + dirrablks) * sizeof(int));
return error;
}

View File

@ -1,4 +1,4 @@
/* $NetBSD: ufs_readwrite.c,v 1.88 2008/05/16 09:22:01 hannken Exp $ */
/* $NetBSD: ufs_readwrite.c,v 1.89 2008/07/31 05:38:06 simonb Exp $ */
/*-
* Copyright (c) 1993
@ -32,7 +32,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(1, "$NetBSD: ufs_readwrite.c,v 1.88 2008/05/16 09:22:01 hannken Exp $");
__KERNEL_RCSID(1, "$NetBSD: ufs_readwrite.c,v 1.89 2008/07/31 05:38:06 simonb Exp $");
#ifdef LFS_READWRITE
#define FS struct lfs
@ -43,6 +43,9 @@ __KERNEL_RCSID(1, "$NetBSD: ufs_readwrite.c,v 1.88 2008/05/16 09:22:01 hannken E
#define WRITE_S "lfs_write"
#define fs_bsize lfs_bsize
#define fs_bmask lfs_bmask
#define UFS_WAPBL_BEGIN(mp) 0
#define UFS_WAPBL_END(mp) do { } while (0)
#define UFS_WAPBL_UPDATE(vp, access, modify, flags) do { } while (0)
#else
#define FS struct fs
#define I_FS i_fs
@ -177,8 +180,15 @@ READ(void *v)
out:
if (!(vp->v_mount->mnt_flag & MNT_NOATIME)) {
ip->i_flag |= IN_ACCESS;
if ((ap->a_ioflag & IO_SYNC) == IO_SYNC)
if ((ap->a_ioflag & IO_SYNC) == IO_SYNC) {
error = UFS_WAPBL_BEGIN(vp->v_mount);
if (error) {
fstrans_done(vp->v_mount);
return error;
}
error = UFS_UPDATE(vp, NULL, NULL, UPDATE_WAIT);
UFS_WAPBL_END(vp->v_mount);
}
}
fstrans_done(vp->v_mount);
@ -283,6 +293,15 @@ WRITE(void *v)
error = 0;
usepc = vp->v_type == VREG;
if ((ioflag & IO_JOURNALLOCKED) == 0) {
error = UFS_WAPBL_BEGIN(vp->v_mount);
if (error) {
fstrans_done(vp->v_mount);
return error;
}
}
#ifdef LFS_READWRITE
async = true;
lfs_check(vp, LFS_UNUSED_LBN, 0);
@ -511,8 +530,11 @@ out:
uio->uio_resid = resid;
} else if (resid > uio->uio_resid && (ioflag & IO_SYNC) == IO_SYNC)
error = UFS_UPDATE(vp, NULL, NULL, UPDATE_WAIT);
else
UFS_WAPBL_UPDATE(vp, NULL, NULL, 0);
KASSERT(vp->v_size == ip->i_size);
if ((ioflag & IO_JOURNALLOCKED) == 0)
UFS_WAPBL_END(vp->v_mount);
fstrans_done(vp->v_mount);
return (error);

View File

@ -1,4 +1,33 @@
/* $NetBSD: ufs_vnops.c,v 1.166 2008/06/02 16:00:33 ad Exp $ */
/* $NetBSD: ufs_vnops.c,v 1.167 2008/07/31 05:38:06 simonb Exp $ */
/*-
* Copyright (c) 2008 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
* by Wasabi Systems, Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/*
* Copyright (c) 1982, 1986, 1989, 1993, 1995
@ -37,7 +66,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: ufs_vnops.c,v 1.166 2008/06/02 16:00:33 ad Exp $");
__KERNEL_RCSID(0, "$NetBSD: ufs_vnops.c,v 1.167 2008/07/31 05:38:06 simonb Exp $");
#if defined(_KERNEL_OPT)
#include "opt_ffs.h"
@ -60,6 +89,7 @@ __KERNEL_RCSID(0, "$NetBSD: ufs_vnops.c,v 1.166 2008/06/02 16:00:33 ad Exp $");
#include <sys/dirent.h>
#include <sys/lockf.h>
#include <sys/kauth.h>
#include <sys/wapbl.h>
#include <sys/fstrans.h>
#include <miscfs/specfs/specdev.h>
@ -70,6 +100,7 @@ __KERNEL_RCSID(0, "$NetBSD: ufs_vnops.c,v 1.166 2008/06/02 16:00:33 ad Exp $");
#include <ufs/ufs/ufsmount.h>
#include <ufs/ufs/ufs_bswap.h>
#include <ufs/ufs/ufs_extern.h>
#include <ufs/ufs/ufs_wapbl.h>
#ifdef UFS_DIRHASH
#include <ufs/ufs/dirhash.h>
#endif
@ -105,13 +136,20 @@ ufs_create(void *v)
} */ *ap = v;
int error;
/*
* UFS_WAPBL_BEGIN1(dvp->v_mount, dvp) performed by successful
* ufs_makeinode
*/
fstrans_start(ap->a_dvp->v_mount, FSTRANS_SHARED);
error =
ufs_makeinode(MAKEIMODE(ap->a_vap->va_type, ap->a_vap->va_mode),
ap->a_dvp, ap->a_vpp, ap->a_cnp);
fstrans_done(ap->a_dvp->v_mount);
if (error)
if (error) {
fstrans_done(ap->a_dvp->v_mount);
return (error);
}
UFS_WAPBL_END1(ap->a_dvp->v_mount, ap->a_dvp);
fstrans_done(ap->a_dvp->v_mount);
VN_KNOTE(ap->a_dvp, NOTE_WRITE);
return (0);
}
@ -138,6 +176,11 @@ ufs_mknod(void *v)
vap = ap->a_vap;
vpp = ap->a_vpp;
/*
* UFS_WAPBL_BEGIN1(dvp->v_mount, dvp) performed by successful
* ufs_makeinode
*/
fstrans_start(ap->a_dvp->v_mount, FSTRANS_SHARED);
if ((error =
ufs_makeinode(MAKEIMODE(vap->va_type, vap->va_mode),
@ -161,6 +204,8 @@ ufs_mknod(void *v)
ip->i_ffs2_rdev = ufs_rw64(vap->va_rdev,
UFS_MPNEEDSWAP(ump));
}
UFS_WAPBL_UPDATE(*vpp, NULL, NULL, 0);
UFS_WAPBL_END1(ap->a_dvp->v_mount, ap->a_dvp);
/*
* Remove inode so that it will be reloaded by VFS_VGET and
* checked to see if it is an alias of an existing entry in
@ -394,8 +439,8 @@ ufs_setattr(void *v)
goto out;
}
if (kauth_cred_geteuid(cred) != ip->i_uid &&
(error = kauth_authorize_generic(cred, KAUTH_GENERIC_ISSUSER,
NULL)))
(error = kauth_authorize_generic(cred,
KAUTH_GENERIC_ISSUSER, NULL)))
goto out;
if (kauth_authorize_generic(cred, KAUTH_GENERIC_ISSUSER,
NULL) == 0) {
@ -411,6 +456,9 @@ ufs_setattr(void *v)
error = EPERM;
goto out;
}
error = UFS_WAPBL_BEGIN(vp->v_mount);
if (error)
goto out;
ip->i_flags = vap->va_flags;
DIP_ASSIGN(ip, flags, ip->i_flags);
} else {
@ -424,11 +472,16 @@ ufs_setattr(void *v)
error = EPERM;
goto out;
}
error = UFS_WAPBL_BEGIN(vp->v_mount);
if (error)
goto out;
ip->i_flags &= SF_SETTABLE;
ip->i_flags |= (vap->va_flags & UF_SETTABLE);
DIP_ASSIGN(ip, flags, ip->i_flags);
}
ip->i_flag |= IN_CHANGE;
UFS_WAPBL_UPDATE(vp, NULL, NULL, 0);
UFS_WAPBL_END(vp->v_mount);
if (vap->va_flags & (IMMUTABLE | APPEND)) {
error = 0;
goto out;
@ -446,7 +499,11 @@ ufs_setattr(void *v)
error = EROFS;
goto out;
}
error = UFS_WAPBL_BEGIN(vp->v_mount);
if (error)
goto out;
error = ufs_chown(vp, vap->va_uid, vap->va_gid, cred, l);
UFS_WAPBL_END(vp->v_mount);
if (error)
goto out;
}
@ -466,14 +523,46 @@ ufs_setattr(void *v)
break;
case VREG:
if (vp->v_mount->mnt_flag & MNT_RDONLY) {
error = EROFS;
goto out;
error = EROFS;
goto out;
}
if ((ip->i_flags & SF_SNAPSHOT) != 0) {
error = EPERM;
goto out;
}
error = UFS_TRUNCATE(vp, vap->va_size, 0, cred);
error = UFS_WAPBL_BEGIN(vp->v_mount);
if (error)
goto out;
/*
* When journaling, only truncate one indirect block
* at a time.
*/
if (vp->v_mount->mnt_wapbl) {
uint64_t incr = MNINDIR(ip->i_ump) <<
vp->v_mount->mnt_fs_bshift; /* Power of 2 */
uint64_t base = NDADDR <<
vp->v_mount->mnt_fs_bshift;
while (!error && ip->i_size > base + incr &&
ip->i_size > vap->va_size + incr) {
/*
* round down to next full indirect
* block boundary.
*/
uint64_t nsize = base +
((ip->i_size - base - 1) &
~(incr - 1));
error = UFS_TRUNCATE(vp, nsize, 0,
cred);
if (error == 0) {
UFS_WAPBL_END(vp->v_mount);
error =
UFS_WAPBL_BEGIN(vp->v_mount);
}
}
}
if (!error)
error = UFS_TRUNCATE(vp, vap->va_size, 0, cred);
UFS_WAPBL_END(vp->v_mount);
if (error)
goto out;
break;
@ -494,11 +583,14 @@ ufs_setattr(void *v)
goto out;
}
if (kauth_cred_geteuid(cred) != ip->i_uid &&
(error = kauth_authorize_generic(cred, KAUTH_GENERIC_ISSUSER,
NULL)) &&
(error = kauth_authorize_generic(cred,
KAUTH_GENERIC_ISSUSER, NULL)) &&
((vap->va_vaflags & VA_UTIMES_NULL) == 0 ||
(error = VOP_ACCESS(vp, VWRITE, cred))))
goto out;
error = UFS_WAPBL_BEGIN(vp->v_mount);
if (error)
goto out;
if (vap->va_atime.tv_sec != VNOVAL)
if (!(vp->v_mount->mnt_flag & MNT_NOATIME))
ip->i_flag |= IN_ACCESS;
@ -510,6 +602,7 @@ ufs_setattr(void *v)
ip->i_ffs2_birthnsec = vap->va_birthtime.tv_nsec;
}
error = UFS_UPDATE(vp, &vap->va_atime, &vap->va_mtime, 0);
UFS_WAPBL_END(vp->v_mount);
if (error)
goto out;
}
@ -525,7 +618,11 @@ ufs_setattr(void *v)
error = EPERM;
goto out;
}
error = UFS_WAPBL_BEGIN(vp->v_mount);
if (error)
goto out;
error = ufs_chmod(vp, (int)vap->va_mode, cred, l);
UFS_WAPBL_END(vp->v_mount);
}
VN_KNOTE(vp, NOTE_ATTRIB);
out:
@ -543,6 +640,8 @@ ufs_chmod(struct vnode *vp, int mode, kauth_cred_t cred, struct lwp *l)
struct inode *ip;
int error, ismember = 0;
UFS_WAPBL_JLOCK_ASSERT(vp->v_mount);
ip = VTOI(vp);
if (kauth_cred_geteuid(cred) != ip->i_uid &&
(error = kauth_authorize_generic(cred, KAUTH_GENERIC_ISSUSER, NULL)))
@ -558,6 +657,7 @@ ufs_chmod(struct vnode *vp, int mode, kauth_cred_t cred, struct lwp *l)
ip->i_mode |= (mode & ALLPERMS);
ip->i_flag |= IN_CHANGE;
DIP_ASSIGN(ip, mode, ip->i_mode);
UFS_WAPBL_UPDATE(vp, NULL, NULL, 0);
return (0);
}
@ -626,6 +726,7 @@ ufs_chown(struct vnode *vp, uid_t uid, gid_t gid, kauth_cred_t cred,
good:
#endif /* QUOTA */
ip->i_flag |= IN_CHANGE;
UFS_WAPBL_UPDATE(vp, NULL, NULL, 0);
return (0);
}
@ -649,8 +750,13 @@ ufs_remove(void *v)
if (vp->v_type == VDIR || (ip->i_flags & (IMMUTABLE | APPEND)) ||
(VTOI(dvp)->i_flags & APPEND))
error = EPERM;
else
error = ufs_dirremove(dvp, ip, ap->a_cnp->cn_flags, 0);
else {
error = UFS_WAPBL_BEGIN(dvp->v_mount);
if (error == 0) {
error = ufs_dirremove(dvp, ip, ap->a_cnp->cn_flags, 0);
UFS_WAPBL_END(dvp->v_mount);
}
}
VN_KNOTE(vp, NOTE_DELETE);
VN_KNOTE(dvp, NOTE_WRITE);
if (dvp == vp)
@ -720,6 +826,11 @@ ufs_link(void *v)
error = EPERM;
goto out1;
}
error = UFS_WAPBL_BEGIN(vp->v_mount);
if (error) {
VOP_ABORTOP(dvp, cnp);
goto out1;
}
ip->i_ffs_effnlink++;
ip->i_nlink++;
DIP_ASSIGN(ip, nlink, ip->i_nlink);
@ -738,10 +849,12 @@ ufs_link(void *v)
ip->i_nlink--;
DIP_ASSIGN(ip, nlink, ip->i_nlink);
ip->i_flag |= IN_CHANGE;
UFS_WAPBL_UPDATE(vp, NULL, NULL, UPDATE_DIROP);
if (DOINGSOFTDEP(vp))
softdep_change_linkcnt(ip);
}
PNBUF_PUT(cnp->cn_pnbuf);
UFS_WAPBL_END(vp->v_mount);
out1:
if (dvp != vp)
VOP_UNLOCK(vp, 0);
@ -865,6 +978,11 @@ ufs_rename(void *v)
struct direct *newdir;
int doingdirectory, oldparent, newparent, error;
#ifdef WAPBL
if (ap->a_tdvp->v_mount->mnt_wapbl)
return wapbl_ufs_rename(v);
#endif
tvp = ap->a_tvp;
tdvp = ap->a_tdvp;
fvp = ap->a_fvp;
@ -1297,6 +1415,9 @@ ufs_mkdir(void *v)
*/
if ((error = UFS_VALLOC(dvp, dmode, cnp->cn_cred, ap->a_vpp)) != 0)
goto out;
error = UFS_WAPBL_BEGIN(ap->a_dvp->v_mount);
if (error)
goto out;
tvp = *ap->a_vpp;
ip = VTOI(tvp);
ip->i_uid = kauth_cred_geteuid(cnp->cn_cred);
@ -1307,6 +1428,7 @@ ufs_mkdir(void *v)
if ((error = chkiq(ip, 1, cnp->cn_cred, 0))) {
PNBUF_PUT(cnp->cn_pnbuf);
UFS_VFREE(tvp, ip->i_number, dmode);
UFS_WAPBL_END(dvp->v_mount);
fstrans_done(dvp->v_mount);
vput(tvp);
vput(dvp);
@ -1412,11 +1534,13 @@ ufs_mkdir(void *v)
bad:
if (error == 0) {
VN_KNOTE(dvp, NOTE_WRITE | NOTE_LINK);
UFS_WAPBL_END(dvp->v_mount);
} else {
dp->i_ffs_effnlink--;
dp->i_nlink--;
DIP_ASSIGN(dp, nlink, dp->i_nlink);
dp->i_flag |= IN_CHANGE;
UFS_WAPBL_UPDATE(dvp, NULL, NULL, UPDATE_DIROP);
if (DOINGSOFTDEP(dvp))
softdep_change_linkcnt(dp);
/*
@ -1431,8 +1555,10 @@ ufs_mkdir(void *v)
/* If IN_ADIROP, account for it */
lfs_unmark_vnode(tvp);
#endif
UFS_WAPBL_UPDATE(tvp, NULL, NULL, UPDATE_DIROP);
if (DOINGSOFTDEP(tvp))
softdep_change_linkcnt(ip);
UFS_WAPBL_END(dvp->v_mount);
vput(tvp);
}
out:
@ -1496,6 +1622,9 @@ ufs_rmdir(void *v)
error = EPERM;
goto out;
}
error = UFS_WAPBL_BEGIN(dvp->v_mount);
if (error)
goto out;
/*
* Delete reference to directory before purging
* inode. If we crash in between, the directory
@ -1515,6 +1644,7 @@ ufs_rmdir(void *v)
softdep_change_linkcnt(dp);
softdep_change_linkcnt(ip);
}
UFS_WAPBL_END(dvp->v_mount);
goto out;
}
VN_KNOTE(dvp, NOTE_WRITE | NOTE_LINK);
@ -1531,6 +1661,7 @@ ufs_rmdir(void *v)
dp->i_ffs_effnlink--;
DIP_ASSIGN(dp, nlink, dp->i_nlink);
dp->i_flag |= IN_CHANGE;
UFS_WAPBL_UPDATE(dvp, NULL, NULL, UPDATE_DIROP);
ip->i_nlink--;
ip->i_ffs_effnlink--;
DIP_ASSIGN(ip, nlink, ip->i_nlink);
@ -1538,6 +1669,11 @@ ufs_rmdir(void *v)
error = UFS_TRUNCATE(vp, (off_t)0, IO_SYNC, cnp->cn_cred);
}
cache_purge(vp);
/*
* Unlock the log while we still have reference to unlinked
* directory vp so that it will not get locked for recycling
*/
UFS_WAPBL_END(dvp->v_mount);
#ifdef UFS_DIRHASH
if (ip->i_dirhash != NULL)
ufsdirhash_free(ip);
@ -1576,6 +1712,10 @@ ufs_symlink(void *v)
int len, error;
vpp = ap->a_vpp;
/*
* UFS_WAPBL_BEGIN1(dvp->v_mount, dvp) performed by successful
* ufs_makeinode
*/
fstrans_start(ap->a_dvp->v_mount, FSTRANS_SHARED);
error = ufs_makeinode(IFLNK | ap->a_vap->va_mode, ap->a_dvp,
vpp, ap->a_cnp);
@ -1591,10 +1731,12 @@ ufs_symlink(void *v)
DIP_ASSIGN(ip, size, len);
uvm_vnp_setsize(vp, ip->i_size);
ip->i_flag |= IN_CHANGE | IN_UPDATE;
UFS_WAPBL_UPDATE(vp, NULL, NULL, 0);
} else
error = vn_rdwr(UIO_WRITE, vp, ap->a_target, len, (off_t)0,
UIO_SYSSPACE, IO_NODELOCKED, ap->a_cnp->cn_cred, NULL,
NULL);
UIO_SYSSPACE, IO_NODELOCKED | IO_JOURNALLOCKED,
ap->a_cnp->cn_cred, NULL, NULL);
UFS_WAPBL_END1(ap->a_dvp->v_mount, ap->a_dvp);
if (error)
vput(vp);
out:
@ -2096,6 +2238,8 @@ ufs_makeinode(int mode, struct vnode *dvp, struct vnode **vpp,
struct vnode *tvp;
int error, ismember = 0;
UFS_WAPBL_JUNLOCK_ASSERT(dvp->v_mount);
pdir = VTOI(dvp);
#ifdef DIAGNOSTIC
if ((cnp->cn_flags & HASBUF) == 0)
@ -2115,9 +2259,22 @@ ufs_makeinode(int mode, struct vnode *dvp, struct vnode **vpp,
DIP_ASSIGN(ip, gid, ip->i_gid);
ip->i_uid = kauth_cred_geteuid(cnp->cn_cred);
DIP_ASSIGN(ip, uid, ip->i_uid);
error = UFS_WAPBL_BEGIN1(dvp->v_mount, dvp);
if (error) {
/*
* Note, we can't VOP_VFREE(tvp) here like we should
* because we can't write to the disk. Instead, we leave
* the vnode dangling from the journal.
*/
vput(tvp);
PNBUF_PUT(cnp->cn_pnbuf);
vput(dvp);
return (error);
}
#ifdef QUOTA
if ((error = chkiq(ip, 1, cnp->cn_cred, 0))) {
UFS_VFREE(tvp, ip->i_number, mode);
UFS_WAPBL_END1(dvp->v_mount, dvp);
vput(tvp);
PNBUF_PUT(cnp->cn_pnbuf);
vput(dvp);
@ -2175,9 +2332,11 @@ ufs_makeinode(int mode, struct vnode *dvp, struct vnode **vpp,
/* If IN_ADIROP, account for it */
lfs_unmark_vnode(tvp);
#endif
UFS_WAPBL_UPDATE(tvp, NULL, NULL, 0);
if (DOINGSOFTDEP(tvp))
softdep_change_linkcnt(ip);
tvp->v_type = VNON; /* explodes later if VBLK */
UFS_WAPBL_END1(dvp->v_mount, dvp);
vput(tvp);
PNBUF_PUT(cnp->cn_pnbuf);
vput(dvp);
@ -2228,7 +2387,8 @@ ufs_gop_alloc(struct vnode *vp, off_t off, off_t len, int flags,
}
out:
return error;
UFS_WAPBL_UPDATE(vp, NULL, NULL, 0);
return error;
}
void

805
sys/ufs/ufs/ufs_wapbl.c Normal file
View File

@ -0,0 +1,805 @@
/* $NetBSD: ufs_wapbl.c,v 1.2 2008/07/31 05:38:06 simonb Exp $ */
/*-
* Copyright (c) 2003,2006,2008 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
* by Wasabi Systems, Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/*
* Copyright (c) 1982, 1986, 1989, 1993, 1995
* The Regents of the University of California. All rights reserved.
* (c) UNIX System Laboratories, Inc.
* All or some portions of this file are derived from material licensed
* to the University of California by American Telephone and Telegraph
* Co. or Unix System Laboratories, Inc. and are reproduced herein with
* the permission of UNIX System Laboratories, Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)ufs_vnops.c 8.28 (Berkeley) 7/31/95
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: ufs_wapbl.c,v 1.2 2008/07/31 05:38:06 simonb Exp $");
#if defined(_KERNEL_OPT)
#include "opt_quota.h"
#include "fs_lfs.h"
#endif
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/namei.h>
#include <sys/resourcevar.h>
#include <sys/kernel.h>
#include <sys/file.h>
#include <sys/stat.h>
#include <sys/buf.h>
#include <sys/proc.h>
#include <sys/mount.h>
#include <sys/vnode.h>
#include <sys/malloc.h>
#include <sys/dirent.h>
#include <sys/lockf.h>
#include <sys/kauth.h>
#include <sys/wapbl.h>
#include <sys/fstrans.h>
#include <miscfs/specfs/specdev.h>
#include <miscfs/fifofs/fifo.h>
#include <ufs/ufs/quota.h>
#include <ufs/ufs/inode.h>
#include <ufs/ufs/dir.h>
#include <ufs/ufs/ufsmount.h>
#include <ufs/ufs/ufs_bswap.h>
#include <ufs/ufs/ufs_extern.h>
#include <ufs/ufs/ufs_wapbl.h>
#include <ufs/ext2fs/ext2fs_extern.h>
#include <ufs/lfs/lfs_extern.h>
#include <uvm/uvm.h>
/* XXX following lifted from ufs_lookup.c */
#define FSFMT(vp) (((vp)->v_mount->mnt_iflag & IMNT_DTYPE) == 0)
/*
* A virgin directory (no blushing please).
*/
static const struct dirtemplate mastertemplate = {
0, 12, DT_DIR, 1, ".",
0, DIRBLKSIZ - 12, DT_DIR, 2, ".."
};
/*
* Rename vnode operation
* rename("foo", "bar");
* is essentially
* unlink("bar");
* link("foo", "bar");
* unlink("foo");
* but ``atomically''. Can't do full commit without saving state in the
* inode on disk which isn't feasible at this time. Best we can do is
* always guarantee the target exists.
*
* Basic algorithm is:
*
* 1) Bump link count on source while we're linking it to the
* target. This also ensure the inode won't be deleted out
* from underneath us while we work (it may be truncated by
* a concurrent `trunc' or `open' for creation).
* 2) Link source to destination. If destination already exists,
* delete it first.
* 3) Unlink source reference to inode if still around. If a
* directory was moved and the parent of the destination
* is different from the source, patch the ".." entry in the
* directory.
*
* WAPBL NOTE: wapbl_ufs_rename derived from ufs_rename in ufs_vnops.c
* ufs_vnops.c netbsd cvs revision 1.108
* which has the berkeley copyright above
* changes introduced to ufs_rename since netbsd cvs revision 1.164
* will need to be ported into wapbl_ufs_rename
*/
int
wapbl_ufs_rename(void *v)
{
struct vop_rename_args /* {
struct vnode *a_fdvp;
struct vnode *a_fvp;
struct componentname *a_fcnp;
struct vnode *a_tdvp;
struct vnode *a_tvp;
struct componentname *a_tcnp;
} */ *ap = v;
struct vnode *tvp, *tdvp, *fvp, *fdvp;
struct componentname *tcnp, *fcnp;
struct inode *ip, *txp, *fxp, *tdp, *fdp;
struct mount *mp;
struct direct *newdir;
int doingdirectory, oldparent, newparent, error;
int32_t saved_f_count;
doff_t saved_f_diroff;
doff_t saved_f_offset;
u_int32_t saved_f_reclen;
int32_t saved_t_count;
doff_t saved_t_endoff;
doff_t saved_t_diroff;
doff_t saved_t_offset;
u_int32_t saved_t_reclen;
tvp = ap->a_tvp;
tdvp = ap->a_tdvp;
fvp = ap->a_fvp;
fdvp = ap->a_fdvp;
tcnp = ap->a_tcnp;
fcnp = ap->a_fcnp;
doingdirectory = oldparent = newparent = error = 0;
#ifdef DIAGNOSTIC
if ((tcnp->cn_flags & HASBUF) == 0 ||
(fcnp->cn_flags & HASBUF) == 0)
panic("ufs_rename: no name");
#endif
/*
* Check for cross-device rename.
*/
if ((fvp->v_mount != tdvp->v_mount) ||
(tvp && (fvp->v_mount != tvp->v_mount))) {
error = EXDEV;
abortit:
VOP_ABORTOP(tdvp, tcnp); /* XXX, why not in NFS? */
if (tdvp == tvp)
vrele(tdvp);
else
vput(tdvp);
if (tvp)
vput(tvp);
VOP_ABORTOP(fdvp, fcnp); /* XXX, why not in NFS? */
vrele(fdvp);
vrele(fvp);
return (error);
}
/*
* Check if just deleting a link name.
*/
if (tvp && ((VTOI(tvp)->i_flags & (IMMUTABLE | APPEND)) ||
(VTOI(tdvp)->i_flags & APPEND))) {
error = EPERM;
goto abortit;
}
if (fvp == tvp) {
if (fvp->v_type == VDIR) {
error = EINVAL;
goto abortit;
}
/* Release destination completely. */
VOP_ABORTOP(tdvp, tcnp);
vput(tdvp);
vput(tvp);
/* Delete source. */
vrele(fvp);
fcnp->cn_flags &= ~(MODMASK | SAVESTART);
fcnp->cn_flags |= LOCKPARENT | LOCKLEAF;
fcnp->cn_nameiop = DELETE;
vn_lock(fdvp, LK_EXCLUSIVE | LK_RETRY);
if ((error = relookup(fdvp, &fvp, fcnp))) {
vput(fdvp);
return (error);
}
return (VOP_REMOVE(fdvp, fvp, fcnp));
}
if ((error = vn_lock(fvp, LK_EXCLUSIVE)) != 0)
goto abortit;
fdp = VTOI(fdvp);
ip = VTOI(fvp);
if ((nlink_t) ip->i_nlink >= LINK_MAX) {
VOP_UNLOCK(fvp, 0);
error = EMLINK;
goto abortit;
}
if ((ip->i_flags & (IMMUTABLE | APPEND)) ||
(fdp->i_flags & APPEND)) {
VOP_UNLOCK(fvp, 0);
error = EPERM;
goto abortit;
}
if ((ip->i_mode & IFMT) == IFDIR) {
/*
* Avoid ".", "..", and aliases of "." for obvious reasons.
*/
if ((fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.') ||
fdp == ip ||
(fcnp->cn_flags & ISDOTDOT) ||
(tcnp->cn_flags & ISDOTDOT) ||
(ip->i_flag & IN_RENAME)) {
VOP_UNLOCK(fvp, 0);
error = EINVAL;
goto abortit;
}
ip->i_flag |= IN_RENAME;
doingdirectory = 1;
}
oldparent = fdp->i_number;
VN_KNOTE(fdvp, NOTE_WRITE); /* XXXLUKEM/XXX: right place? */
/*
* When the target exists, both the directory
* and target vnodes are returned locked.
*/
tdp = VTOI(tdvp);
txp = NULL;
if (tvp)
txp = VTOI(tvp);
mp = fdvp->v_mount;
fstrans_start(mp, FSTRANS_SHARED);
/*
* If ".." must be changed (ie the directory gets a new
* parent) then the source directory must not be in the
* directory hierarchy above the target, as this would
* orphan everything below the source directory. Also
* the user must have write permission in the source so
* as to be able to change "..". We must repeat the call
* to namei, as the parent directory is unlocked by the
* call to checkpath().
*/
error = VOP_ACCESS(fvp, VWRITE, tcnp->cn_cred);
VOP_UNLOCK(fvp, 0);
if (oldparent != tdp->i_number)
newparent = tdp->i_number;
if (doingdirectory && newparent) {
if (error) /* write access check above */
goto out;
if (txp != NULL)
vput(tvp);
txp = NULL;
vref(tdvp); /* compensate for the ref checkpath loses */
if ((error = ufs_checkpath(ip, tdp, tcnp->cn_cred)) != 0) {
vrele(tdvp);
tdp = NULL;
goto out;
}
tcnp->cn_flags &= ~SAVESTART;
tdp = NULL;
vn_lock(tdvp, LK_EXCLUSIVE | LK_RETRY);
error = relookup(tdvp, &tvp, tcnp);
if (error != 0) {
vput(tdvp);
goto out;
}
tdp = VTOI(tdvp);
if (tvp)
txp = VTOI(tvp);
}
/*
* XXX handle case where fdvp is parent of tdvp,
* by unlocking tdvp and regrabbing it with vget after?
*/
/* save directory lookup information in case tdvp == fdvp */
saved_t_count = tdp->i_count;
saved_t_endoff = tdp->i_endoff;
saved_t_diroff = tdp->i_diroff;
saved_t_offset = tdp->i_offset;
saved_t_reclen = tdp->i_reclen;
/*
* This was moved up to before the journal lock to
* avoid potential deadlock
*/
fcnp->cn_flags &= ~(MODMASK | SAVESTART);
fcnp->cn_flags |= LOCKPARENT | LOCKLEAF;
if (newparent) {
vn_lock(fdvp, LK_EXCLUSIVE | LK_RETRY);
if ((error = relookup(fdvp, &fvp, fcnp))) {
vput(fdvp);
vrele(ap->a_fvp);
goto out2;
}
} else {
error = VOP_LOOKUP(fdvp, &fvp, fcnp);
if (error && (error != EJUSTRETURN)) {
vrele(ap->a_fvp);
goto out2;
}
error = 0;
}
if (fvp != NULL) {
fxp = VTOI(fvp);
fdp = VTOI(fdvp);
} else {
/*
* From name has disappeared.
*/
if (doingdirectory)
panic("rename: lost dir entry");
vrele(ap->a_fvp);
error = ENOENT; /* XXX ufs_rename sets "0" here */
goto out2;
}
vrele(ap->a_fvp);
/* save directory lookup information in case tdvp == fdvp */
saved_f_count = fdp->i_count;
saved_f_diroff = fdp->i_diroff;
saved_f_offset = fdp->i_offset;
saved_f_reclen = fdp->i_reclen;
/* restore directory lookup information in case tdvp == fdvp */
tdp->i_offset = saved_t_offset;
tdp->i_reclen = saved_t_reclen;
tdp->i_count = saved_t_count;
tdp->i_endoff = saved_t_endoff;
tdp->i_diroff = saved_t_diroff;
error = UFS_WAPBL_BEGIN(fdvp->v_mount);
if (error)
goto out2;
/*
* 1) Bump link count while we're moving stuff
* around. If we crash somewhere before
* completing our work, the link count
* may be wrong, but correctable.
*/
ip->i_ffs_effnlink++;
ip->i_nlink++;
DIP_ASSIGN(ip, nlink, ip->i_nlink);
ip->i_flag |= IN_CHANGE;
if (DOINGSOFTDEP(fvp))
softdep_change_linkcnt(ip);
if ((error = UFS_UPDATE(fvp, NULL, NULL, UPDATE_DIROP)) != 0) {
goto bad;
}
/*
* 2) If target doesn't exist, link the target
* to the source and unlink the source.
* Otherwise, rewrite the target directory
* entry to reference the source inode and
* expunge the original entry's existence.
*/
if (txp == NULL) {
if (tdp->i_dev != ip->i_dev)
panic("rename: EXDEV");
/*
* Account for ".." in new directory.
* When source and destination have the same
* parent we don't fool with the link count.
*/
if (doingdirectory && newparent) {
if ((nlink_t)tdp->i_nlink >= LINK_MAX) {
error = EMLINK;
goto bad;
}
tdp->i_ffs_effnlink++;
tdp->i_nlink++;
DIP_ASSIGN(tdp, nlink, tdp->i_nlink);
tdp->i_flag |= IN_CHANGE;
if (DOINGSOFTDEP(tdvp))
softdep_change_linkcnt(tdp);
if ((error = UFS_UPDATE(tdvp, NULL, NULL,
UPDATE_DIROP)) != 0) {
tdp->i_ffs_effnlink--;
tdp->i_nlink--;
DIP_ASSIGN(tdp, nlink, tdp->i_nlink);
tdp->i_flag |= IN_CHANGE;
if (DOINGSOFTDEP(tdvp))
softdep_change_linkcnt(tdp);
goto bad;
}
}
newdir = pool_cache_get(ufs_direct_cache, PR_WAITOK);
ufs_makedirentry(ip, tcnp, newdir);
error = ufs_direnter(tdvp, NULL, newdir, tcnp, NULL);
pool_cache_put(ufs_direct_cache, newdir);
if (error != 0) {
if (doingdirectory && newparent) {
tdp->i_ffs_effnlink--;
tdp->i_nlink--;
DIP_ASSIGN(tdp, nlink, tdp->i_nlink);
tdp->i_flag |= IN_CHANGE;
if (DOINGSOFTDEP(tdvp))
softdep_change_linkcnt(tdp);
(void)UFS_UPDATE(tdvp, NULL, NULL,
UPDATE_WAIT | UPDATE_DIROP);
}
goto bad;
}
VN_KNOTE(tdvp, NOTE_WRITE);
} else {
if (txp->i_dev != tdp->i_dev || txp->i_dev != ip->i_dev)
panic("rename: EXDEV");
/*
* Short circuit rename(foo, foo).
*/
if (txp->i_number == ip->i_number)
panic("rename: same file");
/*
* If the parent directory is "sticky", then the user must
* own the parent directory, or the destination of the rename,
* otherwise the destination may not be changed (except by
* root). This implements append-only directories.
*/
if ((tdp->i_mode & S_ISTXT) &&
kauth_authorize_generic(tcnp->cn_cred,
KAUTH_GENERIC_ISSUSER, NULL) != 0 &&
kauth_cred_geteuid(tcnp->cn_cred) != tdp->i_uid &&
txp->i_uid != kauth_cred_geteuid(tcnp->cn_cred)) {
error = EPERM;
goto bad;
}
/*
* Target must be empty if a directory and have no links
* to it. Also, ensure source and target are compatible
* (both directories, or both not directories).
*/
if ((txp->i_mode & IFMT) == IFDIR) {
if (txp->i_ffs_effnlink > 2 ||
!ufs_dirempty(txp, tdp->i_number, tcnp->cn_cred)) {
error = ENOTEMPTY;
goto bad;
}
if (!doingdirectory) {
error = ENOTDIR;
goto bad;
}
cache_purge(tdvp);
} else if (doingdirectory) {
error = EISDIR;
goto bad;
}
if ((error = ufs_dirrewrite(tdp, txp, ip->i_number,
IFTODT(ip->i_mode), doingdirectory && newparent ?
newparent : doingdirectory, IN_CHANGE | IN_UPDATE)) != 0)
goto bad;
if (doingdirectory) {
if (!newparent) {
tdp->i_ffs_effnlink--;
if (DOINGSOFTDEP(tdvp))
softdep_change_linkcnt(tdp);
}
txp->i_ffs_effnlink--;
if (DOINGSOFTDEP(tvp))
softdep_change_linkcnt(txp);
}
if (doingdirectory && !DOINGSOFTDEP(tvp)) {
/*
* Truncate inode. The only stuff left in the directory
* is "." and "..". The "." reference is inconsequential
* since we are quashing it. We have removed the "."
* reference and the reference in the parent directory,
* but there may be other hard links. The soft
* dependency code will arrange to do these operations
* after the parent directory entry has been deleted on
* disk, so when running with that code we avoid doing
* them now.
*/
if (!newparent) {
tdp->i_nlink--;
DIP_ASSIGN(tdp, nlink, tdp->i_nlink);
tdp->i_flag |= IN_CHANGE;
UFS_WAPBL_UPDATE(tdvp, NULL, NULL, 0);
}
txp->i_nlink--;
DIP_ASSIGN(txp, nlink, txp->i_nlink);
txp->i_flag |= IN_CHANGE;
if ((error = UFS_TRUNCATE(tvp, (off_t)0, IO_SYNC,
tcnp->cn_cred)))
goto bad;
}
VN_KNOTE(tdvp, NOTE_WRITE);
VN_KNOTE(tvp, NOTE_DELETE);
}
/* restore directory lookup information in case tdvp == fdvp */
fdp->i_offset = saved_f_offset;
fdp->i_reclen = saved_f_reclen;
fdp->i_count = saved_f_count;
fdp->i_diroff = saved_f_diroff;
/*
* Handle case where the directory we need to remove may have
* been moved when the directory insertion above performed compaction.
* or when i_count may be wrong due to insertion before this entry.
*/
if ((tdp->i_number == fdp->i_number) &&
(((saved_f_offset >= saved_t_offset) &&
(saved_f_offset < saved_t_offset + saved_t_count)) ||
((saved_f_offset - saved_f_count >= saved_t_offset) &&
(saved_f_offset - saved_f_count <
saved_t_offset + saved_t_count)))) {
struct buf *bp;
struct direct *ep;
struct ufsmount *ump = fdp->i_ump;
doff_t endsearch; /* offset to end directory search */
int dirblksiz = ump->um_dirblksiz;
const int needswap = UFS_MPNEEDSWAP(ump);
u_long bmask;
int namlen, entryoffsetinblock;
char *dirbuf;
bmask = fdvp->v_mount->mnt_stat.f_iosize - 1;
/*
* the fcnp entry will be somewhere between the start of
* compaction and the original location.
*/
fdp->i_offset = saved_t_offset;
error = ufs_blkatoff(fdvp, (off_t)fdp->i_offset, &dirbuf, &bp,
false);
if (error)
goto bad;
/*
* keep existing fdp->i_count in case
* compaction started at the same location as the fcnp entry.
*/
endsearch = saved_f_offset + saved_f_reclen;
entryoffsetinblock = 0;
while (fdp->i_offset < endsearch) {
int reclen;
/*
* If necessary, get the next directory block.
*/
if ((fdp->i_offset & bmask) == 0) {
if (bp != NULL)
brelse(bp, 0);
error = ufs_blkatoff(fdvp, (off_t)fdp->i_offset,
&dirbuf, &bp, false);
if (error)
goto bad;
entryoffsetinblock = 0;
}
KASSERT(bp != NULL);
ep = (struct direct *)(dirbuf + entryoffsetinblock);
reclen = ufs_rw16(ep->d_reclen, needswap);
#if (BYTE_ORDER == LITTLE_ENDIAN)
if (FSFMT(fdvp) && needswap == 0)
namlen = ep->d_type;
else
namlen = ep->d_namlen;
#else
if (FSFMT(fdvp) && needswap != 0)
namlen = ep->d_type;
else
namlen = ep->d_namlen;
#endif
if ((ep->d_ino != 0) &&
(ufs_rw32(ep->d_ino, needswap) != WINO) &&
(namlen == fcnp->cn_namelen) &&
memcmp(ep->d_name, fcnp->cn_nameptr, namlen) == 0) {
fdp->i_reclen = reclen;
break;
}
fdp->i_offset += reclen;
fdp->i_count = reclen;
entryoffsetinblock += reclen;
}
KASSERT(fdp->i_offset <= endsearch);
/*
* If fdp->i_offset points to start of a directory block,
* set fdp->i_count so ufs_dirremove() doesn't compact over
* a directory block boundary.
*/
if ((fdp->i_offset & (dirblksiz - 1)) == 0)
fdp->i_count = 0;
brelse(bp, 0);
}
/*
* 3) Unlink the source.
*/
/*
* Ensure that the directory entry still exists and has not
* changed while the new name has been entered. If the source is
* a file then the entry may have been unlinked or renamed. In
* either case there is no further work to be done. If the source
* is a directory then it cannot have been rmdir'ed; The IRENAME
* flag ensures that it cannot be moved by another rename or removed
* by a rmdir.
*/
if (fxp != ip) {
if (doingdirectory)
panic("rename: lost dir entry");
} else {
/*
* If the source is a directory with a
* new parent, the link count of the old
* parent directory must be decremented
* and ".." set to point to the new parent.
*/
if (doingdirectory && newparent) {
KASSERT(fdp != NULL);
fxp->i_offset = mastertemplate.dot_reclen;
ufs_dirrewrite(fxp, fdp, newparent, DT_DIR, 0, IN_CHANGE);
cache_purge(fdvp);
}
error = ufs_dirremove(fdvp, fxp, fcnp->cn_flags, 0);
fxp->i_flag &= ~IN_RENAME;
}
VN_KNOTE(fvp, NOTE_RENAME);
goto done;
out:
vrele(fvp);
vrele(fdvp);
goto out2;
/* exit routines from steps 1 & 2 */
bad:
if (doingdirectory)
ip->i_flag &= ~IN_RENAME;
ip->i_ffs_effnlink--;
ip->i_nlink--;
DIP_ASSIGN(ip, nlink, ip->i_nlink);
ip->i_flag |= IN_CHANGE;
ip->i_flag &= ~IN_RENAME;
UFS_WAPBL_UPDATE(fvp, NULL, NULL, 0);
if (DOINGSOFTDEP(fvp))
softdep_change_linkcnt(ip);
done:
UFS_WAPBL_END(fdvp->v_mount);
vput(fdvp);
vput(fvp);
out2:
/*
* clear IN_RENAME - some exit paths happen too early to go
* through the cleanup done in the "bad" case above, so we
* always do this mini-cleanup here.
*/
ip->i_flag &= ~IN_RENAME;
if (txp)
vput(ITOV(txp));
if (tdp) {
if (newparent)
vput(ITOV(tdp));
else
vrele(ITOV(tdp));
}
fstrans_done(mp);
return (error);
}
#ifdef WAPBL_DEBUG_INODES
void
ufs_wapbl_verify_inodes(struct mount *mp, const char *str)
{
struct vnode *vp, *nvp;
struct inode *ip;
simple_lock(&mntvnode_slock);
loop:
TAILQ_FOREACH_REVERSE(vp, &mp->mnt_vnodelist, vnodelst, v_mntvnodes) {
/*
* If the vnode that we are about to sync is no longer
* associated with this mount point, start over.
*/
if (vp->v_mount != mp)
goto loop;
simple_lock(&vp->v_interlock);
nvp = TAILQ_NEXT(vp, v_mntvnodes);
ip = VTOI(vp);
if (vp->v_type == VNON) {
simple_unlock(&vp->v_interlock);
continue;
}
/* verify that update has been called on all inodes */
if (ip->i_flag & (IN_CHANGE | IN_UPDATE)) {
panic("wapbl_verify: mp %p: dirty vnode %p (inode %p): 0x%x\n",
mp, vp, ip, ip->i_flag);
}
KDASSERT(ip->i_nlink == ip->i_ffs_effnlink);
simple_unlock(&mntvnode_slock);
{
int s;
struct buf *bp;
struct buf *nbp;
s = splbio();
for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
nbp = LIST_NEXT(bp, b_vnbufs);
simple_lock(&bp->b_interlock);
if ((bp->b_flags & B_BUSY)) {
simple_unlock(&bp->b_interlock);
continue;
}
if ((bp->b_flags & B_DELWRI) == 0)
panic("wapbl_verify: not dirty, bp %p", bp);
if ((bp->b_flags & B_LOCKED) == 0)
panic("wapbl_verify: not locked, bp %p", bp);
simple_unlock(&bp->b_interlock);
}
splx(s);
}
simple_unlock(&vp->v_interlock);
simple_lock(&mntvnode_slock);
}
simple_unlock(&mntvnode_slock);
vp = VFSTOUFS(mp)->um_devvp;
simple_lock(&vp->v_interlock);
{
int s;
struct buf *bp;
struct buf *nbp;
s = splbio();
for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
nbp = LIST_NEXT(bp, b_vnbufs);
simple_lock(&bp->b_interlock);
if ((bp->b_flags & B_BUSY)) {
simple_unlock(&bp->b_interlock);
continue;
}
if ((bp->b_flags & B_DELWRI) == 0)
panic("wapbl_verify: devvp not dirty, bp %p", bp);
if ((bp->b_flags & B_LOCKED) == 0)
panic("wapbl_verify: devvp not locked, bp %p", bp);
simple_unlock(&bp->b_interlock);
}
splx(s);
}
simple_unlock(&vp->v_interlock);
}
#endif /* WAPBL_DEBUG_INODES */

176
sys/ufs/ufs/ufs_wapbl.h Normal file
View File

@ -0,0 +1,176 @@
/* $NetBSD: ufs_wapbl.h,v 1.2 2008/07/31 05:38:07 simonb Exp $ */
/*-
* Copyright (c) 2003,2006,2008 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
* by Wasabi Systems, Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef _UFS_UFS_UFS_WAPBL_H_
#define _UFS_UFS_UFS_WAPBL_H_
#if defined(_KERNEL_OPT)
#include "opt_wapbl.h"
#endif
/*
* Information for the journal location stored in the superblock.
* We store the journal version, some flags, the journal location
* type, and some location specific "locators" that identify where
* the log itself is located.
*/
/* fs->fs_journal_version */
#define UFS_WAPBL_VERSION 1
/* fs->fs_journal_location */
#define UFS_WAPBL_JOURNALLOC_NONE 0
#define UFS_WAPBL_JOURNALLOC_END_PARTITION 1
#define UFS_WAPBL_EPART_ADDR 0 /* locator slots */
#define UFS_WAPBL_EPART_COUNT 1
#define UFS_WAPBL_EPART_BLKSZ 2
#define UFS_WAPBL_EPART_UNUSED 3
#define UFS_WAPBL_JOURNALLOC_IN_FILESYSTEM 2
#define UFS_WAPBL_INFS_ADDR 0 /* locator slots */
#define UFS_WAPBL_INFS_COUNT 1
#define UFS_WAPBL_INFS_BLKSZ 2
#define UFS_WAPBL_INFS_INO 3
/* fs->fs_journal_flags */
#define UFS_WAPBL_FLAGS_CREATE_LOG 0x1
#define UFS_WAPBL_FLAGS_CLEAR_LOG 0x2
/*
* The journal size is limited to between 1MB and 64MB.
* The default journal size is the filesystem size divided by
* the scale factor - this is 1M of journal per 1GB of filesystem
* space.
*
* XXX: Is 64MB too limiting? If user explicitly asks for more, allow it?
*/
#define UFS_WAPBL_JOURNAL_SCALE 1024
#define UFS_WAPBL_MIN_JOURNAL_SIZE (1024 * 1024)
#define UFS_WAPBL_MAX_JOURNAL_SIZE (64 * 1024 * 1024)
#if defined(WAPBL)
#if defined(WAPBL_DEBUG)
#define WAPBL_DEBUG_INODES
#endif
int wapbl_ufs_rename(void *v);
#ifdef WAPBL_DEBUG_INODES
void ufs_wapbl_verify_inodes(struct mount *, const char *);
#endif
static __inline int
ufs_wapbl_begin2(struct mount *mp, struct vnode *vp1, struct vnode *vp2,
const char *file, int line)
{
if (mp->mnt_wapbl) {
int error;
if (vp1)
vref(vp1);
if (vp2)
vref(vp2);
error = wapbl_begin(mp->mnt_wapbl, file, line);
if (error)
return error;
#ifdef WAPBL_DEBUG_INODES
if (mp->mnt_wapbl->wl_lock.lk_exclusivecount == 1)
ufs_wapbl_verify_inodes(mp, "wapbl_begin");
#endif
}
return 0;
}
static __inline void
ufs_wapbl_end2(struct mount *mp, struct vnode *vp1, struct vnode *vp2)
{
if (mp->mnt_wapbl) {
#ifdef WAPBL_DEBUG_INODES
if (mp->mnt_wapbl->wl_lock.lk_exclusivecount == 1)
ufs_wapbl_verify_inodes(mp, "wapbl_end");
#endif
wapbl_end(mp->mnt_wapbl);
if (vp2)
vrele(vp2);
if (vp1)
vrele(vp1);
}
}
#define UFS_WAPBL_BEGIN(mp) \
ufs_wapbl_begin2(mp, 0, 0, __FUNCTION__, __LINE__)
#define UFS_WAPBL_BEGIN1(mp, v1) \
ufs_wapbl_begin2(mp, v1, 0, __FUNCTION__, __LINE__)
#define UFS_WAPBL_END(mp) ufs_wapbl_end2(mp, 0, 0)
#define UFS_WAPBL_END1(mp, v1) ufs_wapbl_end2(mp, v1, 0)
#define UFS_WAPBL_UPDATE(vp, access, modify, flags) \
if ((vp)->v_mount->mnt_wapbl) { \
UFS_UPDATE(vp, access, modify, flags); \
}
#ifdef UFS_WAPBL_DEBUG_JLOCK
#define UFS_WAPBL_JLOCK_ASSERT(mp) \
if (mp->mnt_wapbl) wapbl_jlock_assert(mp->mnt_wapbl)
#define UFS_WAPBL_JUNLOCK_ASSERT(mp) \
if (mp->mnt_wapbl) wapbl_junlock_assert(mp->mnt_wapbl)
#else
#define UFS_WAPBL_JLOCK_ASSERT(mp)
#define UFS_WAPBL_JUNLOCK_ASSERT(mp)
#endif
#define UFS_WAPBL_REGISTER_INODE(mp, ino, mode) \
if (mp->mnt_wapbl) wapbl_register_inode(mp->mnt_wapbl, ino, mode)
#define UFS_WAPBL_UNREGISTER_INODE(mp, ino, mode) \
if (mp->mnt_wapbl) wapbl_unregister_inode(mp->mnt_wapbl, ino, mode)
#define UFS_WAPBL_REGISTER_DEALLOCATION(mp, blk, len) \
if (mp->mnt_wapbl) wapbl_register_deallocation(mp->mnt_wapbl, blk, len)
#else /* ! WAPBL */
#define UFS_WAPBL_BEGIN(mp) 0
#define UFS_WAPBL_BEGIN1(mp, v1) 0
#define UFS_WAPBL_END(mp) do { } while (0)
#define UFS_WAPBL_END1(mp, v1)
#define UFS_WAPBL_UPDATE(vp, access, modify, flags) do { } while (0)
#define UFS_WAPBL_JLOCK_ASSERT(mp)
#define UFS_WAPBL_JUNLOCK_ASSERT(mp)
#define UFS_WAPBL_REGISTER_INODE(mp, ino, mode)
#define UFS_WAPBL_UNREGISTER_INODE(mp, ino, mode)
#define UFS_WAPBL_REGISTER_DEALLOCATION(mp, blk, len)
#endif
#endif /* !_UFS_UFS_UFS_WAPBL_H_ */

View File

@ -1,4 +1,4 @@
/* $NetBSD: dumpfs.c,v 1.49 2008/07/21 13:36:58 lukem Exp $ */
/* $NetBSD: dumpfs.c,v 1.50 2008/07/31 05:38:07 simonb Exp $ */
/*
* Copyright (c) 1983, 1992, 1993
@ -39,7 +39,7 @@ __COPYRIGHT("@(#) Copyright (c) 1983, 1992, 1993\
#if 0
static char sccsid[] = "@(#)dumpfs.c 8.5 (Berkeley) 4/29/95";
#else
__RCSID("$NetBSD: dumpfs.c,v 1.49 2008/07/21 13:36:58 lukem Exp $");
__RCSID("$NetBSD: dumpfs.c,v 1.50 2008/07/31 05:38:07 simonb Exp $");
#endif
#endif /* not lint */
@ -379,6 +379,13 @@ print_superblock(struct fs *fs, uint16_t *opostbl,
fs->fs_old_csshift, fs->fs_old_csmask);
printf("\ncgrotor\t%d\tfmod\t%d\tronly\t%d\tclean\t0x%02x\n",
fs->fs_cgrotor, fs->fs_fmod, fs->fs_ronly, fs->fs_clean);
printf("wapbl version 0x%x\tlocation %u\tflags 0x%x\n",
fs->fs_journal_version, fs->fs_journal_location,
fs->fs_journal_flags);
printf("wapbl loc0 %" PRIu64 "\tloc1 %" PRIu64,
fs->fs_journallocs[0], fs->fs_journallocs[1]);
printf("\tloc1 %" PRIu64 "\tloc2 %" PRIu64 "\n",
fs->fs_journallocs[2], fs->fs_journallocs[3]);
printf("flags\t");
if (fs->fs_flags == 0)
printf("none");
@ -396,8 +403,11 @@ print_superblock(struct fs *fs, uint16_t *opostbl,
printf("multilabel ");
if (fs->fs_flags & FS_FLAGS_UPDATED)
printf("fs_flags expanded ");
fsflags = fs->fs_flags & ~(FS_UNCLEAN | FS_DOSOFTDEP | FS_NEEDSFSCK | FS_INDEXDIRS |
FS_ACLS | FS_MULTILABEL | FS_FLAGS_UPDATED);
if (fs->fs_flags & FS_DOWAPBL)
printf("wapbl ");
fsflags = fs->fs_flags & ~(FS_UNCLEAN | FS_DOSOFTDEP | FS_NEEDSFSCK |
FS_INDEXDIRS | FS_ACLS | FS_MULTILABEL |
FS_FLAGS_UPDATED | FS_DOWAPBL);
if (fsflags != 0)
printf("unknown flags (%#x)", fsflags);
printf("\nfsmnt\t%s\n", fs->fs_fsmnt);