NetBSD/libexec/lfs_cleanerd/library.c

853 lines
22 KiB
C
Raw Normal View History

/* $NetBSD: library.c,v 1.31 2002/12/05 02:03:56 yamt Exp $ */
1997-10-07 17:39:56 +04:00
1994-06-08 22:42:09 +04:00
/*-
* Copyright (c) 1992, 1993
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
1997-10-07 17:39:56 +04:00
#include <sys/cdefs.h>
1994-06-08 22:42:09 +04:00
#ifndef lint
1997-10-07 17:39:56 +04:00
#if 0
1998-03-01 05:20:01 +03:00
static char sccsid[] = "@(#)library.c 8.3 (Berkeley) 5/24/95";
1997-10-07 17:39:56 +04:00
#else
__RCSID("$NetBSD: library.c,v 1.31 2002/12/05 02:03:56 yamt Exp $");
1997-10-07 17:39:56 +04:00
#endif
1994-06-08 22:42:09 +04:00
#endif /* not lint */
#include <sys/param.h>
#include <sys/time.h>
#include <sys/stat.h>
#include <sys/mount.h>
#include <sys/types.h>
#include <sys/mman.h>
#include <ufs/ufs/dinode.h>
#include <ufs/lfs/lfs.h>
1997-10-07 17:39:56 +04:00
#include <err.h>
1994-06-08 22:42:09 +04:00
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <syslog.h>
1994-06-08 22:42:09 +04:00
#include "clean.h"
void add_blocks(FS_INFO *, BLOCK_INFO_15 *, int *, SEGSUM *, caddr_t,
daddr_t, daddr_t);
void add_inodes(FS_INFO *, BLOCK_INFO_15 *, int *, SEGSUM *, caddr_t,
daddr_t);
Merge the short-lived perseant-lfsv2 branch into the trunk. Kernels and tools understand both v1 and v2 filesystems; newfs_lfs generates v2 by default. Changes for the v2 layout include: - Segments of non-PO2 size and arbitrary block offset, so these can be matched to convenient physical characteristics of the partition (e.g., stripe or track size and offset). - Address by fragment instead of by disk sector, paving the way for non-512-byte-sector devices. In theory fragments can be as large as you like, though in reality they must be smaller than MAXBSIZE in size. - Use serial number and filesystem identifier to ensure that roll-forward doesn't get old data and think it's new. Roll-forward is enabled for v2 filesystems, though not for v1 filesystems by default. - The inode free list is now a tailq, paving the way for undelete (undelete is not yet implemented, but can be without further non-backwards-compatible changes to disk structures). - Inode atime information is kept in the Ifile, instead of on the inode; that is, the inode is never written *just* because atime was changed. Because of this the inodes remain near the file data on the disk, rather than wandering all over as the disk is read repeatedly. This speeds up repeated reads by a small but noticeable amount. Other changes of note include: - The ifile written by newfs_lfs can now be of arbitrary length, it is no longer restricted to a single indirect block. - Fixed an old bug where ctime was changed every time a vnode was created. I need to look more closely to make sure that the times are only updated during write(2) and friends, not after-the-fact during a segment write, and certainly not by the cleaner.
2001-07-14 00:30:18 +04:00
int bi_compare(const void *, const void *);
int bi_toss(const void *, const void *, const void *);
void get_ifile(FS_INFO *, int);
int get_superblock(FS_INFO *, struct lfs *);
int pseg_valid(FS_INFO *, SEGSUM *, daddr_t);
int pseg_size(daddr_t, FS_INFO *, SEGSUM *);
1997-10-07 17:39:56 +04:00
extern int debug;
Merge the short-lived perseant-lfsv2 branch into the trunk. Kernels and tools understand both v1 and v2 filesystems; newfs_lfs generates v2 by default. Changes for the v2 layout include: - Segments of non-PO2 size and arbitrary block offset, so these can be matched to convenient physical characteristics of the partition (e.g., stripe or track size and offset). - Address by fragment instead of by disk sector, paving the way for non-512-byte-sector devices. In theory fragments can be as large as you like, though in reality they must be smaller than MAXBSIZE in size. - Use serial number and filesystem identifier to ensure that roll-forward doesn't get old data and think it's new. Roll-forward is enabled for v2 filesystems, though not for v1 filesystems by default. - The inode free list is now a tailq, paving the way for undelete (undelete is not yet implemented, but can be without further non-backwards-compatible changes to disk structures). - Inode atime information is kept in the Ifile, instead of on the inode; that is, the inode is never written *just* because atime was changed. Because of this the inodes remain near the file data on the disk, rather than wandering all over as the disk is read repeatedly. This speeds up repeated reads by a small but noticeable amount. Other changes of note include: - The ifile written by newfs_lfs can now be of arbitrary length, it is no longer restricted to a single indirect block. - Fixed an old bug where ctime was changed every time a vnode was created. I need to look more closely to make sure that the times are only updated during write(2) and friends, not after-the-fact during a segment write, and certainly not by the cleaner.
2001-07-14 00:30:18 +04:00
extern u_long cksum(void *, size_t); /* XXX */
1994-06-08 22:42:09 +04:00
static int ifile_fd = -1;
static int dev_fd = -1;
1994-06-08 22:42:09 +04:00
/*
* This function will get information on a a filesystem which matches
* the name and type given. If a "name" is in a filesystem of the given
* type, then buf is filled with that filesystem's info, and the
* a non-zero value is returned.
*/
int
Merge the short-lived perseant-lfsv2 branch into the trunk. Kernels and tools understand both v1 and v2 filesystems; newfs_lfs generates v2 by default. Changes for the v2 layout include: - Segments of non-PO2 size and arbitrary block offset, so these can be matched to convenient physical characteristics of the partition (e.g., stripe or track size and offset). - Address by fragment instead of by disk sector, paving the way for non-512-byte-sector devices. In theory fragments can be as large as you like, though in reality they must be smaller than MAXBSIZE in size. - Use serial number and filesystem identifier to ensure that roll-forward doesn't get old data and think it's new. Roll-forward is enabled for v2 filesystems, though not for v1 filesystems by default. - The inode free list is now a tailq, paving the way for undelete (undelete is not yet implemented, but can be without further non-backwards-compatible changes to disk structures). - Inode atime information is kept in the Ifile, instead of on the inode; that is, the inode is never written *just* because atime was changed. Because of this the inodes remain near the file data on the disk, rather than wandering all over as the disk is read repeatedly. This speeds up repeated reads by a small but noticeable amount. Other changes of note include: - The ifile written by newfs_lfs can now be of arbitrary length, it is no longer restricted to a single indirect block. - Fixed an old bug where ctime was changed every time a vnode was created. I need to look more closely to make sure that the times are only updated during write(2) and friends, not after-the-fact during a segment write, and certainly not by the cleaner.
2001-07-14 00:30:18 +04:00
fs_getmntinfo(struct statfs **buf, char *name, const char *type)
1994-06-08 22:42:09 +04:00
{
/* allocate space for the filesystem info */
if ((*buf = malloc(sizeof(struct statfs))) == NULL)
1994-06-08 22:42:09 +04:00
return 0;
/* grab the filesystem info */
if (ifile_fd == -1) {
if (statfs(name, *buf) == -1)
goto bad;
} else if(fstatfs(ifile_fd, *buf) == -1)
goto bad;
1994-06-08 22:42:09 +04:00
/* check to see if it's the one we want */
if (strncmp(type, (*buf)->f_fstypename, MFSNAMELEN) ||
strncmp(name, (*buf)->f_mntonname, MNAMELEN))
goto bad;
1994-06-08 22:42:09 +04:00
return 1;
bad:
free(*buf);
return 0;
1994-06-08 22:42:09 +04:00
}
/*
* Get all the information available on an LFS file system.
* Returns an pointer to an FS_INFO structure, NULL on error.
*/
FS_INFO *
get_fs_info(struct statfs *lstatfsp, int use_mmap)
1994-06-08 22:42:09 +04:00
{
FS_INFO *fsp;
if ((fsp = malloc(sizeof(FS_INFO))) == NULL)
1994-06-08 22:42:09 +04:00
return NULL;
memset(fsp, 0, sizeof(FS_INFO));
1994-06-08 22:42:09 +04:00
fsp->fi_statfsp = lstatfsp;
if (get_superblock(fsp, &fsp->fi_lfs)) {
syslog(LOG_ERR, "get_fs_info: get_superblock failed (%m)");
exit(1);
}
get_ifile(fsp, use_mmap);
1994-06-08 22:42:09 +04:00
return (fsp);
}
/*
* If we are reading the ifile then we need to refresh it. Even if
* we are mmapping it, it might have grown. Finally, we need to
1994-06-08 22:42:09 +04:00
* refresh the file system information (statfs) info.
*/
void
Merge the short-lived perseant-lfsv2 branch into the trunk. Kernels and tools understand both v1 and v2 filesystems; newfs_lfs generates v2 by default. Changes for the v2 layout include: - Segments of non-PO2 size and arbitrary block offset, so these can be matched to convenient physical characteristics of the partition (e.g., stripe or track size and offset). - Address by fragment instead of by disk sector, paving the way for non-512-byte-sector devices. In theory fragments can be as large as you like, though in reality they must be smaller than MAXBSIZE in size. - Use serial number and filesystem identifier to ensure that roll-forward doesn't get old data and think it's new. Roll-forward is enabled for v2 filesystems, though not for v1 filesystems by default. - The inode free list is now a tailq, paving the way for undelete (undelete is not yet implemented, but can be without further non-backwards-compatible changes to disk structures). - Inode atime information is kept in the Ifile, instead of on the inode; that is, the inode is never written *just* because atime was changed. Because of this the inodes remain near the file data on the disk, rather than wandering all over as the disk is read repeatedly. This speeds up repeated reads by a small but noticeable amount. Other changes of note include: - The ifile written by newfs_lfs can now be of arbitrary length, it is no longer restricted to a single indirect block. - Fixed an old bug where ctime was changed every time a vnode was created. I need to look more closely to make sure that the times are only updated during write(2) and friends, not after-the-fact during a segment write, and certainly not by the cleaner.
2001-07-14 00:30:18 +04:00
reread_fs_info(FS_INFO *fsp, int use_mmap)
1994-06-08 22:42:09 +04:00
{
if (ifile_fd != -1) {
if (fstatfs(ifile_fd, fsp->fi_statfsp) == -1) {
syslog(LOG_ERR, "reread_fs_info: fstatfs failed (%m)");
exit(1);
}
} else if (statfs(fsp->fi_statfsp->f_mntonname, fsp->fi_statfsp)) {
syslog(LOG_ERR, "reread_fs_info: statfs `%s' failed (%m)",
fsp->fi_statfsp->f_mntonname);
exit(1);
}
get_ifile(fsp, use_mmap);
}
static int
getdevfd(FS_INFO *fsp)
{
char rdev[MNAMELEN];
if (dev_fd != -1)
return dev_fd;
(void)snprintf(rdev, sizeof(rdev), "/dev/r%s",
fsp->fi_statfsp->f_mntfromname + 5);
if ((dev_fd = open(rdev, O_RDONLY)) == -1) {
syslog(LOG_ERR, "Cannot open `%s' (%m)", rdev);
exit(1);
}
return dev_fd;
1994-06-08 22:42:09 +04:00
}
/*
* Read a block from disk.
*/
int
get_rawblock(FS_INFO *fsp, char *buf, size_t size, ufs_daddr_t daddr)
{
return pread(getdevfd(fsp), buf, size, fsbtob(&fsp->fi_lfs,
(off_t)daddr));
}
/*
* Read an inode from disk.
*/
struct dinode *
get_dinode(FS_INFO *fsp, ino_t ino)
{
static struct dinode dino;
struct dinode *dip, *dib;
struct lfs *lfsp;
BLOCK_INFO_15 bi;
lfsp = &fsp->fi_lfs;
/*
* Locate the inode block and find the inode.
* Use this to know how large the file is.
*/
memset(&bi, 0, sizeof(bi));
bi.bi_inode = ino;
bi.bi_lbn = LFS_UNUSED_LBN; /* We want the inode */
if (lfs_bmapv(&fsp->fi_statfsp->f_fsid, &bi, 1) < 0) {
syslog(LOG_WARNING, "lfs_bmapv: %m");
return NULL;
}
if (bi.bi_daddr <= 0)
return NULL;
lseek(getdevfd(fsp), (off_t)0, SEEK_SET);
if ((dib = malloc(lfsp->lfs_ibsize)) == NULL) {
syslog(LOG_WARNING, "lfs_bmapv: %m");
return NULL;
}
pread(dev_fd, dib, lfsp->lfs_ibsize, fsbtob(lfsp, (off_t)bi.bi_daddr));
for (dip = dib; dip != dib + lfsp->lfs_inopb; ++dip)
if (dip->di_u.inumber == ino)
break;
if (dip == dib + lfsp->lfs_inopb) {
free(dib);
syslog(LOG_WARNING, "dinode %d not found at fsb 0x%x",
ino, bi.bi_daddr);
return NULL;
}
dino = *dip; /* structure copy */
free(dib);
return &dino;
}
/*
* Gets the superblock from disk (possibly in face of errors)
1994-06-08 22:42:09 +04:00
*/
int
get_superblock(FS_INFO *fsp, struct lfs *sbp)
1994-06-08 22:42:09 +04:00
{
char buf[LFS_SBPAD];
Merge the short-lived perseant-lfsv2 branch into the trunk. Kernels and tools understand both v1 and v2 filesystems; newfs_lfs generates v2 by default. Changes for the v2 layout include: - Segments of non-PO2 size and arbitrary block offset, so these can be matched to convenient physical characteristics of the partition (e.g., stripe or track size and offset). - Address by fragment instead of by disk sector, paving the way for non-512-byte-sector devices. In theory fragments can be as large as you like, though in reality they must be smaller than MAXBSIZE in size. - Use serial number and filesystem identifier to ensure that roll-forward doesn't get old data and think it's new. Roll-forward is enabled for v2 filesystems, though not for v1 filesystems by default. - The inode free list is now a tailq, paving the way for undelete (undelete is not yet implemented, but can be without further non-backwards-compatible changes to disk structures). - Inode atime information is kept in the Ifile, instead of on the inode; that is, the inode is never written *just* because atime was changed. Because of this the inodes remain near the file data on the disk, rather than wandering all over as the disk is read repeatedly. This speeds up repeated reads by a small but noticeable amount. Other changes of note include: - The ifile written by newfs_lfs can now be of arbitrary length, it is no longer restricted to a single indirect block. - Fixed an old bug where ctime was changed every time a vnode was created. I need to look more closely to make sure that the times are only updated during write(2) and friends, not after-the-fact during a segment write, and certainly not by the cleaner.
2001-07-14 00:30:18 +04:00
static off_t sboff = LFS_LABELPAD;
1994-06-08 22:42:09 +04:00
lseek(getdevfd(fsp), 0, SEEK_SET);
for (;;) {
Merge the short-lived perseant-lfsv2 branch into the trunk. Kernels and tools understand both v1 and v2 filesystems; newfs_lfs generates v2 by default. Changes for the v2 layout include: - Segments of non-PO2 size and arbitrary block offset, so these can be matched to convenient physical characteristics of the partition (e.g., stripe or track size and offset). - Address by fragment instead of by disk sector, paving the way for non-512-byte-sector devices. In theory fragments can be as large as you like, though in reality they must be smaller than MAXBSIZE in size. - Use serial number and filesystem identifier to ensure that roll-forward doesn't get old data and think it's new. Roll-forward is enabled for v2 filesystems, though not for v1 filesystems by default. - The inode free list is now a tailq, paving the way for undelete (undelete is not yet implemented, but can be without further non-backwards-compatible changes to disk structures). - Inode atime information is kept in the Ifile, instead of on the inode; that is, the inode is never written *just* because atime was changed. Because of this the inodes remain near the file data on the disk, rather than wandering all over as the disk is read repeatedly. This speeds up repeated reads by a small but noticeable amount. Other changes of note include: - The ifile written by newfs_lfs can now be of arbitrary length, it is no longer restricted to a single indirect block. - Fixed an old bug where ctime was changed every time a vnode was created. I need to look more closely to make sure that the times are only updated during write(2) and friends, not after-the-fact during a segment write, and certainly not by the cleaner.
2001-07-14 00:30:18 +04:00
get(dev_fd, sboff, buf, LFS_SBPAD);
memcpy(&(sbp->lfs_dlfs), buf, sizeof(struct dlfs));
if (sboff == LFS_LABELPAD && fsbtob(sbp, 1) > LFS_LABELPAD)
sboff = fsbtob(sbp, (off_t)sbp->lfs_sboffs[0]);
else
break;
}
Merge the short-lived perseant-lfsv2 branch into the trunk. Kernels and tools understand both v1 and v2 filesystems; newfs_lfs generates v2 by default. Changes for the v2 layout include: - Segments of non-PO2 size and arbitrary block offset, so these can be matched to convenient physical characteristics of the partition (e.g., stripe or track size and offset). - Address by fragment instead of by disk sector, paving the way for non-512-byte-sector devices. In theory fragments can be as large as you like, though in reality they must be smaller than MAXBSIZE in size. - Use serial number and filesystem identifier to ensure that roll-forward doesn't get old data and think it's new. Roll-forward is enabled for v2 filesystems, though not for v1 filesystems by default. - The inode free list is now a tailq, paving the way for undelete (undelete is not yet implemented, but can be without further non-backwards-compatible changes to disk structures). - Inode atime information is kept in the Ifile, instead of on the inode; that is, the inode is never written *just* because atime was changed. Because of this the inodes remain near the file data on the disk, rather than wandering all over as the disk is read repeatedly. This speeds up repeated reads by a small but noticeable amount. Other changes of note include: - The ifile written by newfs_lfs can now be of arbitrary length, it is no longer restricted to a single indirect block. - Fixed an old bug where ctime was changed every time a vnode was created. I need to look more closely to make sure that the times are only updated during write(2) and friends, not after-the-fact during a segment write, and certainly not by the cleaner.
2001-07-14 00:30:18 +04:00
/* Compatibility */
if (sbp->lfs_version < 2) {
sbp->lfs_sumsize = LFS_V1_SUMMARY_SIZE;
sbp->lfs_ibsize = sbp->lfs_bsize;
sbp->lfs_start = sbp->lfs_sboffs[0];
sbp->lfs_tstamp = sbp->lfs_otstamp;
sbp->lfs_fsbtodb = 0;
}
1994-06-08 22:42:09 +04:00
return (0);
}
/*
1994-06-08 22:42:09 +04:00
* This function will map the ifile into memory. It causes a
* fatal error on failure.
*/
void
get_ifile(FS_INFO *fsp, int use_mmap)
1994-06-08 22:42:09 +04:00
{
struct stat file_stat;
struct statfs statfsbuf;
1994-06-08 22:42:09 +04:00
caddr_t ifp;
char *ifile_name;
int count;
1994-06-08 22:42:09 +04:00
ifp = NULL;
ifile_name = malloc(strlen(fsp->fi_statfsp->f_mntonname) +
strlen(IFILE_NAME)+2);
strcat(strcat(strcpy(ifile_name, fsp->fi_statfsp->f_mntonname), "/"),
IFILE_NAME);
if(ifile_fd == -1) {
/* XXX KS - Do we ever *write* to the ifile? */
if ((ifile_fd = open(ifile_name, O_RDONLY)) == -1) {
syslog(LOG_ERR, "get_ifile: cannot open `%s': %m",
ifile_name);
exit(1);
}
} else
lseek(ifile_fd, (off_t)0, SEEK_SET);
1994-06-08 22:42:09 +04:00
if (fstat(ifile_fd, &file_stat) == -1) {
/* If the fs was unmounted, don't complain */
if (statfs(fsp->fi_statfsp->f_mntonname, &statfsbuf) != -1) {
if(memcmp(&statfsbuf.f_fsid, &fsp->fi_statfsp->f_fsid,
sizeof(statfsbuf.f_fsid)) != 0) {
/* Filesystem still mounted,
* this error is real
*/
syslog(LOG_ERR, "get_ifile: fstat failed: %m");
exit(1);
}
}
exit(0);
}
fsp->fi_fs_tstamp = file_stat.st_mtimespec.tv_sec;
1994-06-08 22:42:09 +04:00
if (use_mmap && file_stat.st_size == fsp->fi_ifile_length) {
/* (void) close(fid); */
2000-01-18 11:02:30 +03:00
free(ifile_name);
1994-06-08 22:42:09 +04:00
return;
}
/* get the ifile */
if (use_mmap) {
if (fsp->fi_cip)
munmap((caddr_t)fsp->fi_cip, fsp->fi_ifile_length);
/* XXX KS - Do we ever *write* to the ifile? */
ifp = mmap((caddr_t)0, file_stat.st_size,
PROT_READ, MAP_FILE|MAP_PRIVATE, ifile_fd, (off_t)0);
if (ifp == (caddr_t)(-1)) {
syslog(LOG_ERR, "get_ifile: mmap failed (%m)");
exit(1);
}
1994-06-08 22:42:09 +04:00
} else {
if (fsp->fi_cip)
free(fsp->fi_cip);
if ((ifp = malloc(file_stat.st_size)) == NULL) {
syslog(LOG_ERR, "get_ifile: malloc failed (%m)");
exit(1);
}
1994-06-08 22:42:09 +04:00
redo_read:
count = read(ifile_fd, ifp, (size_t)file_stat.st_size);
1994-06-08 22:42:09 +04:00
if (count < 0) {
syslog(LOG_ERR, "get_ifile: bad ifile read (%m)");
exit(1);
}
1994-06-08 22:42:09 +04:00
else if (count < file_stat.st_size) {
syslog(LOG_WARNING, "get_ifile (%m)");
if (lseek(ifile_fd, 0, SEEK_SET) < 0) {
syslog(LOG_ERR,
"get_ifile: bad ifile lseek (%m)");
exit(1);
}
1994-06-08 22:42:09 +04:00
goto redo_read;
}
}
fsp->fi_ifile_length = file_stat.st_size;
fsp->fi_cip = (CLEANERINFO *)ifp;
fsp->fi_segusep = (SEGUSE *)(ifp + CLEANSIZE(fsp));
fsp->fi_ifilep = (IFILE *)((caddr_t)fsp->fi_segusep + SEGTABSIZE(fsp));
/*
Various bug-fixes to LFS, to wit: Kernel: * Add runtime quantity lfs_ravail, the number of disk-blocks reserved for writing. Writes to the filesystem first reserve a maximum amount of blocks before their write is allowed to proceed; after the blocks are allocated the reserved total is reduced by a corresponding amount. If the lfs_reserve function cannot immediately reserve the requested number of blocks, the inode is unlocked, and the thread sleeps until the cleaner has made enough space available for the blocks to be reserved. In this way large files can be written to the filesystem (or, smaller files can be written to a nearly-full but thoroughly clean filesystem) and the cleaner can still function properly. * Remove explicit switching on dlfs_minfreeseg from the kernel code; it is now merely a fs-creation parameter used to compute dlfs_avail and dlfs_bfree (and used by fsck_lfs(8) to check their accuracy). Its former role is better assumed by a properly computed dlfs_avail. * Bounds-check inode numbers submitted through lfs_bmapv and lfs_markv. This prevents a panic, but, if the cleaner is feeding the filesystem the wrong data, you are still in a world of hurt. * Cleanup: remove explicit references of DEV_BSIZE in favor of btodb()/dbtob(). lfs_cleanerd: * Make -n mean "send N segments' blocks through a single call to lfs_markv". Previously it had meant "clean N segments though N calls to lfs_markv, before looking again to see if more need to be cleaned". The new behavior gives better packing of direct data on disk with as little metadata as possible, largely alleviating the problem that the cleaner can consume more disk through inefficient use of metadata than it frees by moving dirty data away from clean "holes" to produce entirely clean segments. * Make -b mean "read as many segments as necessary to write N segments of dirty data back to disk", rather than its former meaning of "read as many segments as necessary to free N segments worth of space". The new meaning, combined with the new -n behavior described above, further aids in cleaning storage efficiency as entire segments can be written at once, using as few blocks as possible for segment summaries and inode blocks. * Make the cleaner take note of segments which could not be cleaned due to error, and not attempt to clean them until they are entirely free of dirty blocks. This prevents the case in which a cleanerd running with -n 1 and without -b (formerly the default) would spin trying repeatedly to clean a corrupt segment, while the remaining space filled and deadlocked the filesystem. * Update the lfs_cleanerd manual page to describe all the options, including the changes mentioned here (in particular, the -b and -n flags were previously undocumented). fsck_lfs: * Check, and optionally fix, lfs_avail (to an exact figure) and lfs_bfree (within a margin of error) in pass 5. newfs_lfs: * Reduce the default dlfs_minfreeseg to 1/20 of the total segments. * Add a warning if the sgs disklabel field is 16 (the default for FFS' cpg, but not usually desirable for LFS' sgs: 5--8 is a better range). * Change the calculation of lfs_avail and lfs_bfree, corresponding to the kernel changes mentioned above. mount_lfs: * Add -N and -b options to pass corresponding -n and -b options to lfs_cleanerd. * Default to calling lfs_cleanerd with "-b -n 4". [All of these changes were largely tested in the 1.5 branch, with the idea that they (along with previous un-pulled-up work) could be applied to the branch while it was still in ALPHA2; however my test system has experienced corruption on another filesystem (/dev/console has gone missing :^), and, while I believe this unrelated to the LFS changes, I cannot with good conscience request that the changes be pulled up.]
2000-09-09 08:49:54 +04:00
* The number of ifile entries is equal to the number of
1994-06-08 22:42:09 +04:00
* blocks in the ifile minus the ones allocated to cleaner info
* and segment usage table multiplied by the number of ifile
* entries per page.
*/
1998-03-01 05:20:01 +03:00
fsp->fi_ifile_count = ((fsp->fi_ifile_length >> fsp->fi_lfs.lfs_bshift)
- fsp->fi_lfs.lfs_cleansz - fsp->fi_lfs.lfs_segtabsz) *
1994-06-08 22:42:09 +04:00
fsp->fi_lfs.lfs_ifpb;
free(ifile_name);
1994-06-08 22:42:09 +04:00
}
/*
* Return the size of the partial segment, in bytes.
*/
int
Merge the short-lived perseant-lfsv2 branch into the trunk. Kernels and tools understand both v1 and v2 filesystems; newfs_lfs generates v2 by default. Changes for the v2 layout include: - Segments of non-PO2 size and arbitrary block offset, so these can be matched to convenient physical characteristics of the partition (e.g., stripe or track size and offset). - Address by fragment instead of by disk sector, paving the way for non-512-byte-sector devices. In theory fragments can be as large as you like, though in reality they must be smaller than MAXBSIZE in size. - Use serial number and filesystem identifier to ensure that roll-forward doesn't get old data and think it's new. Roll-forward is enabled for v2 filesystems, though not for v1 filesystems by default. - The inode free list is now a tailq, paving the way for undelete (undelete is not yet implemented, but can be without further non-backwards-compatible changes to disk structures). - Inode atime information is kept in the Ifile, instead of on the inode; that is, the inode is never written *just* because atime was changed. Because of this the inodes remain near the file data on the disk, rather than wandering all over as the disk is read repeatedly. This speeds up repeated reads by a small but noticeable amount. Other changes of note include: - The ifile written by newfs_lfs can now be of arbitrary length, it is no longer restricted to a single indirect block. - Fixed an old bug where ctime was changed every time a vnode was created. I need to look more closely to make sure that the times are only updated during write(2) and friends, not after-the-fact during a segment write, and certainly not by the cleaner.
2001-07-14 00:30:18 +04:00
pseg_size(daddr_t pseg_addr, FS_INFO *fsp, SEGSUM *sp)
{
int i, ssize = 0;
struct lfs *lfsp;
FINFO *fp;
lfsp = &fsp->fi_lfs;
Merge the short-lived perseant-lfsv2 branch into the trunk. Kernels and tools understand both v1 and v2 filesystems; newfs_lfs generates v2 by default. Changes for the v2 layout include: - Segments of non-PO2 size and arbitrary block offset, so these can be matched to convenient physical characteristics of the partition (e.g., stripe or track size and offset). - Address by fragment instead of by disk sector, paving the way for non-512-byte-sector devices. In theory fragments can be as large as you like, though in reality they must be smaller than MAXBSIZE in size. - Use serial number and filesystem identifier to ensure that roll-forward doesn't get old data and think it's new. Roll-forward is enabled for v2 filesystems, though not for v1 filesystems by default. - The inode free list is now a tailq, paving the way for undelete (undelete is not yet implemented, but can be without further non-backwards-compatible changes to disk structures). - Inode atime information is kept in the Ifile, instead of on the inode; that is, the inode is never written *just* because atime was changed. Because of this the inodes remain near the file data on the disk, rather than wandering all over as the disk is read repeatedly. This speeds up repeated reads by a small but noticeable amount. Other changes of note include: - The ifile written by newfs_lfs can now be of arbitrary length, it is no longer restricted to a single indirect block. - Fixed an old bug where ctime was changed every time a vnode was created. I need to look more closely to make sure that the times are only updated during write(2) and friends, not after-the-fact during a segment write, and certainly not by the cleaner.
2001-07-14 00:30:18 +04:00
ssize = lfsp->lfs_sumsize
+ howmany(sp->ss_ninos, INOPB(lfsp)) * lfsp->lfs_ibsize;
if (lfsp->lfs_version == 1)
fp = (FINFO *)(((char *)sp) + sizeof(SEGSUM_V1));
else
fp = (FINFO *)(sp + 1);
for (i = 0; i < sp->ss_nfinfo; ++i) {
ssize += (fp->fi_nblocks-1) * lfsp->lfs_bsize
+ fp->fi_lastlength;
fp = (FINFO *)(&fp->fi_blocks[fp->fi_nblocks]);
}
return ssize;
}
1994-06-08 22:42:09 +04:00
/*
* This function will scan a segment and return a list of
* <inode, blocknum> pairs which indicate which blocks were
* contained as live data within the segment when the segment
* summary was read (it may have "died" since then). Any given
* pair will be listed at most once.
*/
int
lfs_segmapv(FS_INFO *fsp, int seg, caddr_t seg_buf, BLOCK_INFO_15 **blocks, int *bcount)
1994-06-08 22:42:09 +04:00
{
BLOCK_INFO_15 *bip, *_bip;
1994-06-08 22:42:09 +04:00
SEGSUM *sp;
SEGUSE *sup;
FINFO *fip;
struct lfs *lfsp;
1998-03-01 05:20:01 +03:00
caddr_t s;
1994-06-08 22:42:09 +04:00
daddr_t pseg_addr, seg_addr;
int nelem, nblocks, nsegs, sumsize, i, ssize;
1994-06-08 22:42:09 +04:00
1997-10-07 17:39:56 +04:00
i = 0;
bip = NULL;
1994-06-08 22:42:09 +04:00
lfsp = &fsp->fi_lfs;
nelem = 2 * segtod(lfsp, 1);
if (!(bip = malloc(nelem * sizeof(BLOCK_INFO_15)))) {
syslog(LOG_DEBUG, "couldn't allocate %ld bytes in lfs_segmapv",
(long)(nelem * sizeof(BLOCK_INFO_15)));
1994-06-08 22:42:09 +04:00
goto err0;
}
1994-06-08 22:42:09 +04:00
sup = SEGUSE_ENTRY(lfsp, fsp->fi_segusep, seg);
s = seg_buf + (sup->su_flags & SEGUSE_SUPERBLOCK ? LFS_SBPAD : 0);
Merge the short-lived perseant-lfsv2 branch into the trunk. Kernels and tools understand both v1 and v2 filesystems; newfs_lfs generates v2 by default. Changes for the v2 layout include: - Segments of non-PO2 size and arbitrary block offset, so these can be matched to convenient physical characteristics of the partition (e.g., stripe or track size and offset). - Address by fragment instead of by disk sector, paving the way for non-512-byte-sector devices. In theory fragments can be as large as you like, though in reality they must be smaller than MAXBSIZE in size. - Use serial number and filesystem identifier to ensure that roll-forward doesn't get old data and think it's new. Roll-forward is enabled for v2 filesystems, though not for v1 filesystems by default. - The inode free list is now a tailq, paving the way for undelete (undelete is not yet implemented, but can be without further non-backwards-compatible changes to disk structures). - Inode atime information is kept in the Ifile, instead of on the inode; that is, the inode is never written *just* because atime was changed. Because of this the inodes remain near the file data on the disk, rather than wandering all over as the disk is read repeatedly. This speeds up repeated reads by a small but noticeable amount. Other changes of note include: - The ifile written by newfs_lfs can now be of arbitrary length, it is no longer restricted to a single indirect block. - Fixed an old bug where ctime was changed every time a vnode was created. I need to look more closely to make sure that the times are only updated during write(2) and friends, not after-the-fact during a segment write, and certainly not by the cleaner.
2001-07-14 00:30:18 +04:00
seg_addr = sntod(lfsp, seg);
pseg_addr = seg_addr + (sup->su_flags & SEGUSE_SUPERBLOCK ?
btofsb(lfsp, LFS_SBPAD) : 0);
if (seg == 0 && lfsp->lfs_version > 1) {
if (fsbtob(lfsp, pseg_addr) < LFS_LABELPAD + LFS_SBPAD) {
pseg_addr = btofsb(lfsp, LFS_LABELPAD + LFS_SBPAD);
s = seg_buf + LFS_LABELPAD + LFS_SBPAD;
syslog(LOG_DEBUG, "adj segment 0 offset to 0x%x\n",
pseg_addr);
}
}
if(debug > 1)
syslog(LOG_DEBUG, "\tsegment buffer at: %p\tseg_addr 0x%x", s, seg_addr);
1994-06-08 22:42:09 +04:00
*bcount = 0;
for (nsegs = 0; nsegs < sup->su_nsums; nsegs++) {
1994-06-08 22:42:09 +04:00
sp = (SEGSUM *)s;
nblocks = pseg_valid(fsp, sp, pseg_addr);
1998-03-01 05:20:01 +03:00
if (nblocks <= 0) {
syslog(LOG_DEBUG, "Warning: invalid segment summary at 0x%x",
1998-03-01 05:20:01 +03:00
pseg_addr);
goto err0;
1998-03-01 05:20:01 +03:00
}
1994-06-08 22:42:09 +04:00
#ifdef DIAGNOSTIC
/* Verify size of summary block */
Merge the short-lived perseant-lfsv2 branch into the trunk. Kernels and tools understand both v1 and v2 filesystems; newfs_lfs generates v2 by default. Changes for the v2 layout include: - Segments of non-PO2 size and arbitrary block offset, so these can be matched to convenient physical characteristics of the partition (e.g., stripe or track size and offset). - Address by fragment instead of by disk sector, paving the way for non-512-byte-sector devices. In theory fragments can be as large as you like, though in reality they must be smaller than MAXBSIZE in size. - Use serial number and filesystem identifier to ensure that roll-forward doesn't get old data and think it's new. Roll-forward is enabled for v2 filesystems, though not for v1 filesystems by default. - The inode free list is now a tailq, paving the way for undelete (undelete is not yet implemented, but can be without further non-backwards-compatible changes to disk structures). - Inode atime information is kept in the Ifile, instead of on the inode; that is, the inode is never written *just* because atime was changed. Because of this the inodes remain near the file data on the disk, rather than wandering all over as the disk is read repeatedly. This speeds up repeated reads by a small but noticeable amount. Other changes of note include: - The ifile written by newfs_lfs can now be of arbitrary length, it is no longer restricted to a single indirect block. - Fixed an old bug where ctime was changed every time a vnode was created. I need to look more closely to make sure that the times are only updated during write(2) and friends, not after-the-fact during a segment write, and certainly not by the cleaner.
2001-07-14 00:30:18 +04:00
sumsize = (lfsp->lfs_version == 1 ? sizeof(SEGSUM_V1) :
sizeof(SEGSUM)) +
1994-06-08 22:42:09 +04:00
(sp->ss_ninos + INOPB(lfsp) - 1) / INOPB(lfsp);
Merge the short-lived perseant-lfsv2 branch into the trunk. Kernels and tools understand both v1 and v2 filesystems; newfs_lfs generates v2 by default. Changes for the v2 layout include: - Segments of non-PO2 size and arbitrary block offset, so these can be matched to convenient physical characteristics of the partition (e.g., stripe or track size and offset). - Address by fragment instead of by disk sector, paving the way for non-512-byte-sector devices. In theory fragments can be as large as you like, though in reality they must be smaller than MAXBSIZE in size. - Use serial number and filesystem identifier to ensure that roll-forward doesn't get old data and think it's new. Roll-forward is enabled for v2 filesystems, though not for v1 filesystems by default. - The inode free list is now a tailq, paving the way for undelete (undelete is not yet implemented, but can be without further non-backwards-compatible changes to disk structures). - Inode atime information is kept in the Ifile, instead of on the inode; that is, the inode is never written *just* because atime was changed. Because of this the inodes remain near the file data on the disk, rather than wandering all over as the disk is read repeatedly. This speeds up repeated reads by a small but noticeable amount. Other changes of note include: - The ifile written by newfs_lfs can now be of arbitrary length, it is no longer restricted to a single indirect block. - Fixed an old bug where ctime was changed every time a vnode was created. I need to look more closely to make sure that the times are only updated during write(2) and friends, not after-the-fact during a segment write, and certainly not by the cleaner.
2001-07-14 00:30:18 +04:00
if (lfsp->lfs_version == 1)
fip = (FINFO *)(((char *)sp) + sizeof(SEGSUM_V1));
else
fip = (FINFO *)(sp + 1);
for (i = 0; i < sp->ss_nfinfo; ++i) {
1994-06-08 22:42:09 +04:00
sumsize += sizeof(FINFO) +
(fip->fi_nblocks - 1) * sizeof(daddr_t);
fip = (FINFO *)(&fip->fi_blocks[fip->fi_nblocks]);
}
Merge the short-lived perseant-lfsv2 branch into the trunk. Kernels and tools understand both v1 and v2 filesystems; newfs_lfs generates v2 by default. Changes for the v2 layout include: - Segments of non-PO2 size and arbitrary block offset, so these can be matched to convenient physical characteristics of the partition (e.g., stripe or track size and offset). - Address by fragment instead of by disk sector, paving the way for non-512-byte-sector devices. In theory fragments can be as large as you like, though in reality they must be smaller than MAXBSIZE in size. - Use serial number and filesystem identifier to ensure that roll-forward doesn't get old data and think it's new. Roll-forward is enabled for v2 filesystems, though not for v1 filesystems by default. - The inode free list is now a tailq, paving the way for undelete (undelete is not yet implemented, but can be without further non-backwards-compatible changes to disk structures). - Inode atime information is kept in the Ifile, instead of on the inode; that is, the inode is never written *just* because atime was changed. Because of this the inodes remain near the file data on the disk, rather than wandering all over as the disk is read repeatedly. This speeds up repeated reads by a small but noticeable amount. Other changes of note include: - The ifile written by newfs_lfs can now be of arbitrary length, it is no longer restricted to a single indirect block. - Fixed an old bug where ctime was changed every time a vnode was created. I need to look more closely to make sure that the times are only updated during write(2) and friends, not after-the-fact during a segment write, and certainly not by the cleaner.
2001-07-14 00:30:18 +04:00
if (sumsize > lfsp->lfs_sumsize) {
syslog(LOG_ERR,
"Segment %d summary block too big: %d",
1994-06-08 22:42:09 +04:00
seg, sumsize);
exit(1);
}
#endif
if (*bcount + nblocks + sp->ss_ninos > nelem) {
nelem = *bcount + nblocks + sp->ss_ninos;
bip = realloc(bip, nelem * sizeof(BLOCK_INFO_15));
1994-06-08 22:42:09 +04:00
if (!bip)
goto err0;
}
add_blocks(fsp, bip, bcount, sp, seg_buf, seg_addr, pseg_addr);
add_inodes(fsp, bip, bcount, sp, seg_buf, seg_addr);
ssize = pseg_size(pseg_addr, fsp, sp);
s += ssize;
Merge the short-lived perseant-lfsv2 branch into the trunk. Kernels and tools understand both v1 and v2 filesystems; newfs_lfs generates v2 by default. Changes for the v2 layout include: - Segments of non-PO2 size and arbitrary block offset, so these can be matched to convenient physical characteristics of the partition (e.g., stripe or track size and offset). - Address by fragment instead of by disk sector, paving the way for non-512-byte-sector devices. In theory fragments can be as large as you like, though in reality they must be smaller than MAXBSIZE in size. - Use serial number and filesystem identifier to ensure that roll-forward doesn't get old data and think it's new. Roll-forward is enabled for v2 filesystems, though not for v1 filesystems by default. - The inode free list is now a tailq, paving the way for undelete (undelete is not yet implemented, but can be without further non-backwards-compatible changes to disk structures). - Inode atime information is kept in the Ifile, instead of on the inode; that is, the inode is never written *just* because atime was changed. Because of this the inodes remain near the file data on the disk, rather than wandering all over as the disk is read repeatedly. This speeds up repeated reads by a small but noticeable amount. Other changes of note include: - The ifile written by newfs_lfs can now be of arbitrary length, it is no longer restricted to a single indirect block. - Fixed an old bug where ctime was changed every time a vnode was created. I need to look more closely to make sure that the times are only updated during write(2) and friends, not after-the-fact during a segment write, and certainly not by the cleaner.
2001-07-14 00:30:18 +04:00
pseg_addr += btofsb(lfsp, ssize);
}
if(nsegs < sup->su_nsums) {
syslog(LOG_WARNING,"only %d segment summaries in seg %d (expected %d)",
nsegs, seg, sup->su_nsums);
goto err0;
1994-06-08 22:42:09 +04:00
}
qsort(bip, *bcount, sizeof(BLOCK_INFO_15), bi_compare);
toss(bip, bcount, sizeof(BLOCK_INFO_15), bi_toss, NULL);
1994-06-08 22:42:09 +04:00
if(debug > 1) {
syslog(LOG_DEBUG, "BLOCK INFOS");
for (_bip = bip, i=0; i < *bcount; ++_bip, ++i)
PRINT_BINFO(_bip);
}
1994-06-08 22:42:09 +04:00
*blocks = bip;
return (0);
err0:
if (bip)
free(bip);
*bcount = 0;
1994-06-08 22:42:09 +04:00
return (-1);
1994-06-08 22:42:09 +04:00
}
/*
* This will parse a partial segment and fill in BLOCK_INFO_15 structures
1994-06-08 22:42:09 +04:00
* for each block described in the segment summary. It will not include
* blocks or inodes from files with new version numbers.
1994-06-08 22:42:09 +04:00
*/
void
add_blocks(FS_INFO *fsp, BLOCK_INFO_15 *bip, int *countp, SEGSUM *sp,
caddr_t seg_buf, daddr_t segaddr, daddr_t psegaddr)
1994-06-08 22:42:09 +04:00
{
IFILE *ifp;
FINFO *fip;
caddr_t bp;
daddr_t *dp, *iaddrp;
int fsb_per_block, fsb_per_iblock, i, j;
int fsb_frag, iblks_seen;
u_long iblk_size, blk_size;
Merge the short-lived perseant-lfsv2 branch into the trunk. Kernels and tools understand both v1 and v2 filesystems; newfs_lfs generates v2 by default. Changes for the v2 layout include: - Segments of non-PO2 size and arbitrary block offset, so these can be matched to convenient physical characteristics of the partition (e.g., stripe or track size and offset). - Address by fragment instead of by disk sector, paving the way for non-512-byte-sector devices. In theory fragments can be as large as you like, though in reality they must be smaller than MAXBSIZE in size. - Use serial number and filesystem identifier to ensure that roll-forward doesn't get old data and think it's new. Roll-forward is enabled for v2 filesystems, though not for v1 filesystems by default. - The inode free list is now a tailq, paving the way for undelete (undelete is not yet implemented, but can be without further non-backwards-compatible changes to disk structures). - Inode atime information is kept in the Ifile, instead of on the inode; that is, the inode is never written *just* because atime was changed. Because of this the inodes remain near the file data on the disk, rather than wandering all over as the disk is read repeatedly. This speeds up repeated reads by a small but noticeable amount. Other changes of note include: - The ifile written by newfs_lfs can now be of arbitrary length, it is no longer restricted to a single indirect block. - Fixed an old bug where ctime was changed every time a vnode was created. I need to look more closely to make sure that the times are only updated during write(2) and friends, not after-the-fact during a segment write, and certainly not by the cleaner.
2001-07-14 00:30:18 +04:00
struct lfs *lfsp;
1994-06-08 22:42:09 +04:00
if(debug > 1)
syslog(LOG_DEBUG, "FILE INFOS");
Merge the short-lived perseant-lfsv2 branch into the trunk. Kernels and tools understand both v1 and v2 filesystems; newfs_lfs generates v2 by default. Changes for the v2 layout include: - Segments of non-PO2 size and arbitrary block offset, so these can be matched to convenient physical characteristics of the partition (e.g., stripe or track size and offset). - Address by fragment instead of by disk sector, paving the way for non-512-byte-sector devices. In theory fragments can be as large as you like, though in reality they must be smaller than MAXBSIZE in size. - Use serial number and filesystem identifier to ensure that roll-forward doesn't get old data and think it's new. Roll-forward is enabled for v2 filesystems, though not for v1 filesystems by default. - The inode free list is now a tailq, paving the way for undelete (undelete is not yet implemented, but can be without further non-backwards-compatible changes to disk structures). - Inode atime information is kept in the Ifile, instead of on the inode; that is, the inode is never written *just* because atime was changed. Because of this the inodes remain near the file data on the disk, rather than wandering all over as the disk is read repeatedly. This speeds up repeated reads by a small but noticeable amount. Other changes of note include: - The ifile written by newfs_lfs can now be of arbitrary length, it is no longer restricted to a single indirect block. - Fixed an old bug where ctime was changed every time a vnode was created. I need to look more closely to make sure that the times are only updated during write(2) and friends, not after-the-fact during a segment write, and certainly not by the cleaner.
2001-07-14 00:30:18 +04:00
lfsp = &fsp->fi_lfs;
fsb_per_block = fragstofsb(lfsp, lfsp->lfs_frag);
fsb_per_iblock = btofsb(lfsp, lfsp->lfs_ibsize);
blk_size = fsp->fi_lfs.lfs_bsize;
iblk_size = fsp->fi_lfs.lfs_ibsize;
Merge the short-lived perseant-lfsv2 branch into the trunk. Kernels and tools understand both v1 and v2 filesystems; newfs_lfs generates v2 by default. Changes for the v2 layout include: - Segments of non-PO2 size and arbitrary block offset, so these can be matched to convenient physical characteristics of the partition (e.g., stripe or track size and offset). - Address by fragment instead of by disk sector, paving the way for non-512-byte-sector devices. In theory fragments can be as large as you like, though in reality they must be smaller than MAXBSIZE in size. - Use serial number and filesystem identifier to ensure that roll-forward doesn't get old data and think it's new. Roll-forward is enabled for v2 filesystems, though not for v1 filesystems by default. - The inode free list is now a tailq, paving the way for undelete (undelete is not yet implemented, but can be without further non-backwards-compatible changes to disk structures). - Inode atime information is kept in the Ifile, instead of on the inode; that is, the inode is never written *just* because atime was changed. Because of this the inodes remain near the file data on the disk, rather than wandering all over as the disk is read repeatedly. This speeds up repeated reads by a small but noticeable amount. Other changes of note include: - The ifile written by newfs_lfs can now be of arbitrary length, it is no longer restricted to a single indirect block. - Fixed an old bug where ctime was changed every time a vnode was created. I need to look more closely to make sure that the times are only updated during write(2) and friends, not after-the-fact during a segment write, and certainly not by the cleaner.
2001-07-14 00:30:18 +04:00
bp = seg_buf + fsbtob(lfsp, psegaddr - segaddr) + lfsp->lfs_sumsize;
1994-06-08 22:42:09 +04:00
bip += *countp;
Merge the short-lived perseant-lfsv2 branch into the trunk. Kernels and tools understand both v1 and v2 filesystems; newfs_lfs generates v2 by default. Changes for the v2 layout include: - Segments of non-PO2 size and arbitrary block offset, so these can be matched to convenient physical characteristics of the partition (e.g., stripe or track size and offset). - Address by fragment instead of by disk sector, paving the way for non-512-byte-sector devices. In theory fragments can be as large as you like, though in reality they must be smaller than MAXBSIZE in size. - Use serial number and filesystem identifier to ensure that roll-forward doesn't get old data and think it's new. Roll-forward is enabled for v2 filesystems, though not for v1 filesystems by default. - The inode free list is now a tailq, paving the way for undelete (undelete is not yet implemented, but can be without further non-backwards-compatible changes to disk structures). - Inode atime information is kept in the Ifile, instead of on the inode; that is, the inode is never written *just* because atime was changed. Because of this the inodes remain near the file data on the disk, rather than wandering all over as the disk is read repeatedly. This speeds up repeated reads by a small but noticeable amount. Other changes of note include: - The ifile written by newfs_lfs can now be of arbitrary length, it is no longer restricted to a single indirect block. - Fixed an old bug where ctime was changed every time a vnode was created. I need to look more closely to make sure that the times are only updated during write(2) and friends, not after-the-fact during a segment write, and certainly not by the cleaner.
2001-07-14 00:30:18 +04:00
psegaddr += btofsb(lfsp, lfsp->lfs_sumsize);
iaddrp = (daddr_t *)((caddr_t)sp + lfsp->lfs_sumsize);
1994-06-08 22:42:09 +04:00
--iaddrp;
Merge the short-lived perseant-lfsv2 branch into the trunk. Kernels and tools understand both v1 and v2 filesystems; newfs_lfs generates v2 by default. Changes for the v2 layout include: - Segments of non-PO2 size and arbitrary block offset, so these can be matched to convenient physical characteristics of the partition (e.g., stripe or track size and offset). - Address by fragment instead of by disk sector, paving the way for non-512-byte-sector devices. In theory fragments can be as large as you like, though in reality they must be smaller than MAXBSIZE in size. - Use serial number and filesystem identifier to ensure that roll-forward doesn't get old data and think it's new. Roll-forward is enabled for v2 filesystems, though not for v1 filesystems by default. - The inode free list is now a tailq, paving the way for undelete (undelete is not yet implemented, but can be without further non-backwards-compatible changes to disk structures). - Inode atime information is kept in the Ifile, instead of on the inode; that is, the inode is never written *just* because atime was changed. Because of this the inodes remain near the file data on the disk, rather than wandering all over as the disk is read repeatedly. This speeds up repeated reads by a small but noticeable amount. Other changes of note include: - The ifile written by newfs_lfs can now be of arbitrary length, it is no longer restricted to a single indirect block. - Fixed an old bug where ctime was changed every time a vnode was created. I need to look more closely to make sure that the times are only updated during write(2) and friends, not after-the-fact during a segment write, and certainly not by the cleaner.
2001-07-14 00:30:18 +04:00
if (lfsp->lfs_version == 1)
fip = (FINFO *)(((char *)sp) + sizeof(SEGSUM_V1));
else
fip = (FINFO *)(sp + 1);
iblks_seen = 0;
Merge the short-lived perseant-lfsv2 branch into the trunk. Kernels and tools understand both v1 and v2 filesystems; newfs_lfs generates v2 by default. Changes for the v2 layout include: - Segments of non-PO2 size and arbitrary block offset, so these can be matched to convenient physical characteristics of the partition (e.g., stripe or track size and offset). - Address by fragment instead of by disk sector, paving the way for non-512-byte-sector devices. In theory fragments can be as large as you like, though in reality they must be smaller than MAXBSIZE in size. - Use serial number and filesystem identifier to ensure that roll-forward doesn't get old data and think it's new. Roll-forward is enabled for v2 filesystems, though not for v1 filesystems by default. - The inode free list is now a tailq, paving the way for undelete (undelete is not yet implemented, but can be without further non-backwards-compatible changes to disk structures). - Inode atime information is kept in the Ifile, instead of on the inode; that is, the inode is never written *just* because atime was changed. Because of this the inodes remain near the file data on the disk, rather than wandering all over as the disk is read repeatedly. This speeds up repeated reads by a small but noticeable amount. Other changes of note include: - The ifile written by newfs_lfs can now be of arbitrary length, it is no longer restricted to a single indirect block. - Fixed an old bug where ctime was changed every time a vnode was created. I need to look more closely to make sure that the times are only updated during write(2) and friends, not after-the-fact during a segment write, and certainly not by the cleaner.
2001-07-14 00:30:18 +04:00
for (i = 0; i < sp->ss_nfinfo;
1994-06-08 22:42:09 +04:00
++i, fip = (FINFO *)(&fip->fi_blocks[fip->fi_nblocks])) {
ifp = IFILE_ENTRY(&fsp->fi_lfs, fsp->fi_ifilep, fip->fi_ino);
PRINT_FINFO(fip, ifp);
dp = &(fip->fi_blocks[0]);
for (j = 0; j < fip->fi_nblocks; j++, dp++) {
/* Skip over intervening inode blocks */
1994-06-08 22:42:09 +04:00
while (psegaddr == *iaddrp) {
/* syslog(LOG_DEBUG, "skipping ino block at 0x%x",
psegaddr); */
psegaddr += fsb_per_iblock;
bp += iblk_size;
1994-06-08 22:42:09 +04:00
--iaddrp;
}
bip->bi_inode = fip->fi_ino;
bip->bi_lbn = *dp;
bip->bi_daddr = psegaddr;
Merge the short-lived perseant-lfsv2 branch into the trunk. Kernels and tools understand both v1 and v2 filesystems; newfs_lfs generates v2 by default. Changes for the v2 layout include: - Segments of non-PO2 size and arbitrary block offset, so these can be matched to convenient physical characteristics of the partition (e.g., stripe or track size and offset). - Address by fragment instead of by disk sector, paving the way for non-512-byte-sector devices. In theory fragments can be as large as you like, though in reality they must be smaller than MAXBSIZE in size. - Use serial number and filesystem identifier to ensure that roll-forward doesn't get old data and think it's new. Roll-forward is enabled for v2 filesystems, though not for v1 filesystems by default. - The inode free list is now a tailq, paving the way for undelete (undelete is not yet implemented, but can be without further non-backwards-compatible changes to disk structures). - Inode atime information is kept in the Ifile, instead of on the inode; that is, the inode is never written *just* because atime was changed. Because of this the inodes remain near the file data on the disk, rather than wandering all over as the disk is read repeatedly. This speeds up repeated reads by a small but noticeable amount. Other changes of note include: - The ifile written by newfs_lfs can now be of arbitrary length, it is no longer restricted to a single indirect block. - Fixed an old bug where ctime was changed every time a vnode was created. I need to look more closely to make sure that the times are only updated during write(2) and friends, not after-the-fact during a segment write, and certainly not by the cleaner.
2001-07-14 00:30:18 +04:00
if (lfsp->lfs_version == 1)
bip->bi_segcreate = (time_t)(sp->ss_ident);
else
bip->bi_segcreate = (time_t)(sp->ss_create);
/* syslog(LOG_DEBUG, "ino %d lbn %d 0x%x %p",
bip->bi_inode, bip->bi_lbn, bip->bi_daddr,
bp); */
1994-06-08 22:42:09 +04:00
bip->bi_bp = bp;
bip->bi_version = ifp->if_version;
if (j < fip->fi_nblocks-1
|| fip->fi_lastlength == blk_size)
{
bip->bi_size = blk_size;
Merge the short-lived perseant-lfsv2 branch into the trunk. Kernels and tools understand both v1 and v2 filesystems; newfs_lfs generates v2 by default. Changes for the v2 layout include: - Segments of non-PO2 size and arbitrary block offset, so these can be matched to convenient physical characteristics of the partition (e.g., stripe or track size and offset). - Address by fragment instead of by disk sector, paving the way for non-512-byte-sector devices. In theory fragments can be as large as you like, though in reality they must be smaller than MAXBSIZE in size. - Use serial number and filesystem identifier to ensure that roll-forward doesn't get old data and think it's new. Roll-forward is enabled for v2 filesystems, though not for v1 filesystems by default. - The inode free list is now a tailq, paving the way for undelete (undelete is not yet implemented, but can be without further non-backwards-compatible changes to disk structures). - Inode atime information is kept in the Ifile, instead of on the inode; that is, the inode is never written *just* because atime was changed. Because of this the inodes remain near the file data on the disk, rather than wandering all over as the disk is read repeatedly. This speeds up repeated reads by a small but noticeable amount. Other changes of note include: - The ifile written by newfs_lfs can now be of arbitrary length, it is no longer restricted to a single indirect block. - Fixed an old bug where ctime was changed every time a vnode was created. I need to look more closely to make sure that the times are only updated during write(2) and friends, not after-the-fact during a segment write, and certainly not by the cleaner.
2001-07-14 00:30:18 +04:00
psegaddr += fsb_per_block;
bp += blk_size;
1998-03-01 05:20:01 +03:00
} else {
Merge the short-lived perseant-lfsv2 branch into the trunk. Kernels and tools understand both v1 and v2 filesystems; newfs_lfs generates v2 by default. Changes for the v2 layout include: - Segments of non-PO2 size and arbitrary block offset, so these can be matched to convenient physical characteristics of the partition (e.g., stripe or track size and offset). - Address by fragment instead of by disk sector, paving the way for non-512-byte-sector devices. In theory fragments can be as large as you like, though in reality they must be smaller than MAXBSIZE in size. - Use serial number and filesystem identifier to ensure that roll-forward doesn't get old data and think it's new. Roll-forward is enabled for v2 filesystems, though not for v1 filesystems by default. - The inode free list is now a tailq, paving the way for undelete (undelete is not yet implemented, but can be without further non-backwards-compatible changes to disk structures). - Inode atime information is kept in the Ifile, instead of on the inode; that is, the inode is never written *just* because atime was changed. Because of this the inodes remain near the file data on the disk, rather than wandering all over as the disk is read repeatedly. This speeds up repeated reads by a small but noticeable amount. Other changes of note include: - The ifile written by newfs_lfs can now be of arbitrary length, it is no longer restricted to a single indirect block. - Fixed an old bug where ctime was changed every time a vnode was created. I need to look more closely to make sure that the times are only updated during write(2) and friends, not after-the-fact during a segment write, and certainly not by the cleaner.
2001-07-14 00:30:18 +04:00
fsb_frag = fragstofsb(&(fsp->fi_lfs),
1998-03-01 05:20:01 +03:00
numfrags(&(fsp->fi_lfs),
fip->fi_lastlength));
if(debug > 1) {
syslog(LOG_DEBUG, "lastlength, frags: %d, %d",
Merge the short-lived perseant-lfsv2 branch into the trunk. Kernels and tools understand both v1 and v2 filesystems; newfs_lfs generates v2 by default. Changes for the v2 layout include: - Segments of non-PO2 size and arbitrary block offset, so these can be matched to convenient physical characteristics of the partition (e.g., stripe or track size and offset). - Address by fragment instead of by disk sector, paving the way for non-512-byte-sector devices. In theory fragments can be as large as you like, though in reality they must be smaller than MAXBSIZE in size. - Use serial number and filesystem identifier to ensure that roll-forward doesn't get old data and think it's new. Roll-forward is enabled for v2 filesystems, though not for v1 filesystems by default. - The inode free list is now a tailq, paving the way for undelete (undelete is not yet implemented, but can be without further non-backwards-compatible changes to disk structures). - Inode atime information is kept in the Ifile, instead of on the inode; that is, the inode is never written *just* because atime was changed. Because of this the inodes remain near the file data on the disk, rather than wandering all over as the disk is read repeatedly. This speeds up repeated reads by a small but noticeable amount. Other changes of note include: - The ifile written by newfs_lfs can now be of arbitrary length, it is no longer restricted to a single indirect block. - Fixed an old bug where ctime was changed every time a vnode was created. I need to look more closely to make sure that the times are only updated during write(2) and friends, not after-the-fact during a segment write, and certainly not by the cleaner.
2001-07-14 00:30:18 +04:00
fip->fi_lastlength, fsb_frag);
}
1998-03-01 05:20:01 +03:00
bip->bi_size = fip->fi_lastlength;
bp += fip->fi_lastlength;
Merge the short-lived perseant-lfsv2 branch into the trunk. Kernels and tools understand both v1 and v2 filesystems; newfs_lfs generates v2 by default. Changes for the v2 layout include: - Segments of non-PO2 size and arbitrary block offset, so these can be matched to convenient physical characteristics of the partition (e.g., stripe or track size and offset). - Address by fragment instead of by disk sector, paving the way for non-512-byte-sector devices. In theory fragments can be as large as you like, though in reality they must be smaller than MAXBSIZE in size. - Use serial number and filesystem identifier to ensure that roll-forward doesn't get old data and think it's new. Roll-forward is enabled for v2 filesystems, though not for v1 filesystems by default. - The inode free list is now a tailq, paving the way for undelete (undelete is not yet implemented, but can be without further non-backwards-compatible changes to disk structures). - Inode atime information is kept in the Ifile, instead of on the inode; that is, the inode is never written *just* because atime was changed. Because of this the inodes remain near the file data on the disk, rather than wandering all over as the disk is read repeatedly. This speeds up repeated reads by a small but noticeable amount. Other changes of note include: - The ifile written by newfs_lfs can now be of arbitrary length, it is no longer restricted to a single indirect block. - Fixed an old bug where ctime was changed every time a vnode was created. I need to look more closely to make sure that the times are only updated during write(2) and friends, not after-the-fact during a segment write, and certainly not by the cleaner.
2001-07-14 00:30:18 +04:00
psegaddr += fsb_frag;
1998-03-01 05:20:01 +03:00
}
if (ifp->if_version == fip->fi_version) {
++bip;
++(*countp);
}
1994-06-08 22:42:09 +04:00
}
}
}
/*
* For a particular segment summary, reads the inode blocks and adds
* INODE_INFO structures to the array. Returns the number of inodes
* actually added.
*/
void
add_inodes(FS_INFO *fsp, BLOCK_INFO_15 *bip, int *countp, SEGSUM *sp,
Merge the short-lived perseant-lfsv2 branch into the trunk. Kernels and tools understand both v1 and v2 filesystems; newfs_lfs generates v2 by default. Changes for the v2 layout include: - Segments of non-PO2 size and arbitrary block offset, so these can be matched to convenient physical characteristics of the partition (e.g., stripe or track size and offset). - Address by fragment instead of by disk sector, paving the way for non-512-byte-sector devices. In theory fragments can be as large as you like, though in reality they must be smaller than MAXBSIZE in size. - Use serial number and filesystem identifier to ensure that roll-forward doesn't get old data and think it's new. Roll-forward is enabled for v2 filesystems, though not for v1 filesystems by default. - The inode free list is now a tailq, paving the way for undelete (undelete is not yet implemented, but can be without further non-backwards-compatible changes to disk structures). - Inode atime information is kept in the Ifile, instead of on the inode; that is, the inode is never written *just* because atime was changed. Because of this the inodes remain near the file data on the disk, rather than wandering all over as the disk is read repeatedly. This speeds up repeated reads by a small but noticeable amount. Other changes of note include: - The ifile written by newfs_lfs can now be of arbitrary length, it is no longer restricted to a single indirect block. - Fixed an old bug where ctime was changed every time a vnode was created. I need to look more closely to make sure that the times are only updated during write(2) and friends, not after-the-fact during a segment write, and certainly not by the cleaner.
2001-07-14 00:30:18 +04:00
caddr_t seg_buf, daddr_t seg_addr)
1994-06-08 22:42:09 +04:00
{
1997-10-07 17:39:56 +04:00
struct dinode *di = NULL; /* XXX gcc */
1994-06-08 22:42:09 +04:00
struct lfs *lfsp;
IFILE *ifp;
BLOCK_INFO_15 *bp;
1994-06-08 22:42:09 +04:00
daddr_t *daddrp;
ino_t inum;
int i;
1994-06-08 22:42:09 +04:00
if (sp->ss_ninos <= 0)
return;
1994-06-08 22:42:09 +04:00
bp = bip + *countp;
lfsp = &fsp->fi_lfs;
if(debug > 1)
syslog(LOG_DEBUG, "INODES:");
Merge the short-lived perseant-lfsv2 branch into the trunk. Kernels and tools understand both v1 and v2 filesystems; newfs_lfs generates v2 by default. Changes for the v2 layout include: - Segments of non-PO2 size and arbitrary block offset, so these can be matched to convenient physical characteristics of the partition (e.g., stripe or track size and offset). - Address by fragment instead of by disk sector, paving the way for non-512-byte-sector devices. In theory fragments can be as large as you like, though in reality they must be smaller than MAXBSIZE in size. - Use serial number and filesystem identifier to ensure that roll-forward doesn't get old data and think it's new. Roll-forward is enabled for v2 filesystems, though not for v1 filesystems by default. - The inode free list is now a tailq, paving the way for undelete (undelete is not yet implemented, but can be without further non-backwards-compatible changes to disk structures). - Inode atime information is kept in the Ifile, instead of on the inode; that is, the inode is never written *just* because atime was changed. Because of this the inodes remain near the file data on the disk, rather than wandering all over as the disk is read repeatedly. This speeds up repeated reads by a small but noticeable amount. Other changes of note include: - The ifile written by newfs_lfs can now be of arbitrary length, it is no longer restricted to a single indirect block. - Fixed an old bug where ctime was changed every time a vnode was created. I need to look more closely to make sure that the times are only updated during write(2) and friends, not after-the-fact during a segment write, and certainly not by the cleaner.
2001-07-14 00:30:18 +04:00
daddrp = (daddr_t *)((caddr_t)sp + lfsp->lfs_sumsize);
1994-06-08 22:42:09 +04:00
for (i = 0; i < sp->ss_ninos; ++i) {
if (i % INOPB(lfsp) == 0) {
--daddrp;
Merge the short-lived perseant-lfsv2 branch into the trunk. Kernels and tools understand both v1 and v2 filesystems; newfs_lfs generates v2 by default. Changes for the v2 layout include: - Segments of non-PO2 size and arbitrary block offset, so these can be matched to convenient physical characteristics of the partition (e.g., stripe or track size and offset). - Address by fragment instead of by disk sector, paving the way for non-512-byte-sector devices. In theory fragments can be as large as you like, though in reality they must be smaller than MAXBSIZE in size. - Use serial number and filesystem identifier to ensure that roll-forward doesn't get old data and think it's new. Roll-forward is enabled for v2 filesystems, though not for v1 filesystems by default. - The inode free list is now a tailq, paving the way for undelete (undelete is not yet implemented, but can be without further non-backwards-compatible changes to disk structures). - Inode atime information is kept in the Ifile, instead of on the inode; that is, the inode is never written *just* because atime was changed. Because of this the inodes remain near the file data on the disk, rather than wandering all over as the disk is read repeatedly. This speeds up repeated reads by a small but noticeable amount. Other changes of note include: - The ifile written by newfs_lfs can now be of arbitrary length, it is no longer restricted to a single indirect block. - Fixed an old bug where ctime was changed every time a vnode was created. I need to look more closely to make sure that the times are only updated during write(2) and friends, not after-the-fact during a segment write, and certainly not by the cleaner.
2001-07-14 00:30:18 +04:00
di = (struct dinode *)(seg_buf + fsbtob(lfsp,
*daddrp - seg_addr));
} else
1994-06-08 22:42:09 +04:00
++di;
1994-06-08 22:42:09 +04:00
inum = di->di_inumber;
bp->bi_lbn = LFS_UNUSED_LBN;
bp->bi_inode = inum;
bp->bi_daddr = *daddrp;
bp->bi_bp = di;
Merge the short-lived perseant-lfsv2 branch into the trunk. Kernels and tools understand both v1 and v2 filesystems; newfs_lfs generates v2 by default. Changes for the v2 layout include: - Segments of non-PO2 size and arbitrary block offset, so these can be matched to convenient physical characteristics of the partition (e.g., stripe or track size and offset). - Address by fragment instead of by disk sector, paving the way for non-512-byte-sector devices. In theory fragments can be as large as you like, though in reality they must be smaller than MAXBSIZE in size. - Use serial number and filesystem identifier to ensure that roll-forward doesn't get old data and think it's new. Roll-forward is enabled for v2 filesystems, though not for v1 filesystems by default. - The inode free list is now a tailq, paving the way for undelete (undelete is not yet implemented, but can be without further non-backwards-compatible changes to disk structures). - Inode atime information is kept in the Ifile, instead of on the inode; that is, the inode is never written *just* because atime was changed. Because of this the inodes remain near the file data on the disk, rather than wandering all over as the disk is read repeatedly. This speeds up repeated reads by a small but noticeable amount. Other changes of note include: - The ifile written by newfs_lfs can now be of arbitrary length, it is no longer restricted to a single indirect block. - Fixed an old bug where ctime was changed every time a vnode was created. I need to look more closely to make sure that the times are only updated during write(2) and friends, not after-the-fact during a segment write, and certainly not by the cleaner.
2001-07-14 00:30:18 +04:00
if (lfsp->lfs_version == 1)
bp->bi_segcreate = sp->ss_ident;
else
bp->bi_segcreate = sp->ss_create;
bp->bi_size = i; /* XXX KS - kludge */
1994-06-08 22:42:09 +04:00
if (inum == LFS_IFILE_INUM) {
bp->bi_version = 1; /* Ifile version should be 1 */
bp++;
++(*countp);
PRINT_INODE(1, bp);
} else {
ifp = IFILE_ENTRY(lfsp, fsp->fi_ifilep, inum);
PRINT_INODE(ifp->if_daddr == *daddrp, bp);
bp->bi_version = ifp->if_version;
if (ifp->if_daddr == *daddrp) {
bp++;
++(*countp);
}
1994-06-08 22:42:09 +04:00
}
}
}
/*
* Checks the summary checksum and the data checksum to determine if the
* segment is valid or not. Returns the size of the partial segment if it
* is valid, and 0 otherwise. Use dump_summary to figure out size of the
1994-06-08 22:42:09 +04:00
* the partial as well as whether or not the checksum is valid.
*/
1994-06-08 22:42:09 +04:00
int
pseg_valid(FS_INFO *fsp, SEGSUM *ssp, daddr_t addr)
1994-06-08 22:42:09 +04:00
{
int nblocks;
#if 0
1994-06-08 22:42:09 +04:00
caddr_t p;
int i;
1994-06-08 22:42:09 +04:00
u_long *datap;
#endif
1994-06-08 22:42:09 +04:00
if (ssp->ss_magic != SS_MAGIC) {
syslog(LOG_WARNING, "Bad magic number: 0x%x instead of 0x%x", ssp->ss_magic, SS_MAGIC);
1998-03-01 05:20:01 +03:00
return(0);
}
1998-03-01 05:20:01 +03:00
if ((nblocks = dump_summary(&fsp->fi_lfs, ssp, 0, NULL, addr)) <= 0 ||
nblocks > (fsp->fi_lfs.lfs_ssize / fsp->fi_lfs.lfs_fsize) - 1)
1994-06-08 22:42:09 +04:00
return(0);
#if 0
1994-06-08 22:42:09 +04:00
/* check data/inode block(s) checksum too */
datap = (u_long *)malloc(nblocks * sizeof(u_long));
Merge the short-lived perseant-lfsv2 branch into the trunk. Kernels and tools understand both v1 and v2 filesystems; newfs_lfs generates v2 by default. Changes for the v2 layout include: - Segments of non-PO2 size and arbitrary block offset, so these can be matched to convenient physical characteristics of the partition (e.g., stripe or track size and offset). - Address by fragment instead of by disk sector, paving the way for non-512-byte-sector devices. In theory fragments can be as large as you like, though in reality they must be smaller than MAXBSIZE in size. - Use serial number and filesystem identifier to ensure that roll-forward doesn't get old data and think it's new. Roll-forward is enabled for v2 filesystems, though not for v1 filesystems by default. - The inode free list is now a tailq, paving the way for undelete (undelete is not yet implemented, but can be without further non-backwards-compatible changes to disk structures). - Inode atime information is kept in the Ifile, instead of on the inode; that is, the inode is never written *just* because atime was changed. Because of this the inodes remain near the file data on the disk, rather than wandering all over as the disk is read repeatedly. This speeds up repeated reads by a small but noticeable amount. Other changes of note include: - The ifile written by newfs_lfs can now be of arbitrary length, it is no longer restricted to a single indirect block. - Fixed an old bug where ctime was changed every time a vnode was created. I need to look more closely to make sure that the times are only updated during write(2) and friends, not after-the-fact during a segment write, and certainly not by the cleaner.
2001-07-14 00:30:18 +04:00
p = (caddr_t)ssp + lfsp->lfs_sumsize;
1994-06-08 22:42:09 +04:00
for (i = 0; i < nblocks; ++i) {
datap[i] = *((u_long *)p);
p += fsp->fi_lfs.lfs_bsize;
}
if (cksum ((void *)datap, nblocks * sizeof(u_long)) != ssp->ss_datasum) {
syslog(LOG_WARNING, "Bad data checksum");
2000-01-18 11:02:30 +03:00
free(datap);
return 0;
}
#endif
1994-06-08 22:42:09 +04:00
return (nblocks);
}
/* #define MMAP_SEGMENT */
/*
1994-06-08 22:42:09 +04:00
* read a segment into a memory buffer
*/
int
mmap_segment(FS_INFO *fsp, int segment, caddr_t *segbuf, int use_mmap)
1994-06-08 22:42:09 +04:00
{
struct lfs *lfsp;
daddr_t seg_daddr; /* base disk address of segment */
off_t seg_byte;
size_t ssize;
lfsp = &fsp->fi_lfs;
/* get the disk address of the beginning of the segment */
Merge the short-lived perseant-lfsv2 branch into the trunk. Kernels and tools understand both v1 and v2 filesystems; newfs_lfs generates v2 by default. Changes for the v2 layout include: - Segments of non-PO2 size and arbitrary block offset, so these can be matched to convenient physical characteristics of the partition (e.g., stripe or track size and offset). - Address by fragment instead of by disk sector, paving the way for non-512-byte-sector devices. In theory fragments can be as large as you like, though in reality they must be smaller than MAXBSIZE in size. - Use serial number and filesystem identifier to ensure that roll-forward doesn't get old data and think it's new. Roll-forward is enabled for v2 filesystems, though not for v1 filesystems by default. - The inode free list is now a tailq, paving the way for undelete (undelete is not yet implemented, but can be without further non-backwards-compatible changes to disk structures). - Inode atime information is kept in the Ifile, instead of on the inode; that is, the inode is never written *just* because atime was changed. Because of this the inodes remain near the file data on the disk, rather than wandering all over as the disk is read repeatedly. This speeds up repeated reads by a small but noticeable amount. Other changes of note include: - The ifile written by newfs_lfs can now be of arbitrary length, it is no longer restricted to a single indirect block. - Fixed an old bug where ctime was changed every time a vnode was created. I need to look more closely to make sure that the times are only updated during write(2) and friends, not after-the-fact during a segment write, and certainly not by the cleaner.
2001-07-14 00:30:18 +04:00
seg_daddr = sntod(lfsp, segment);
seg_byte = fsbtob(lfsp, (off_t)seg_daddr);
1994-06-08 22:42:09 +04:00
ssize = seg_size(lfsp);
lseek(getdevfd(fsp), 0, SEEK_SET);
1994-06-08 22:42:09 +04:00
if (use_mmap) {
*segbuf = mmap((caddr_t)0, seg_size(lfsp), PROT_READ,
MAP_FILE|MAP_SHARED, dev_fd, seg_byte);
1994-06-08 22:42:09 +04:00
if (*(long *)segbuf < 0) {
syslog(LOG_WARNING,"mmap_segment: mmap failed: %m");
return (0);
1994-06-08 22:42:09 +04:00
}
} else {
if(debug > 1)
syslog(LOG_DEBUG, "mmap_segment\tseg_daddr: %lu\tseg_size: %lu\tseg_offset: %llu",
(u_long)seg_daddr, (u_long)ssize, (long long)seg_byte);
1994-06-08 22:42:09 +04:00
/* malloc the space for the buffer */
*segbuf = malloc(ssize);
if (!*segbuf) {
syslog(LOG_WARNING,"mmap_segment: malloc failed: %m");
return (0);
1994-06-08 22:42:09 +04:00
}
/* read the segment data into the buffer */
if (lseek(dev_fd, seg_byte, SEEK_SET) != seg_byte) {
syslog(LOG_WARNING,"mmap_segment: bad lseek: %m");
1994-06-08 22:42:09 +04:00
free(*segbuf);
return (-1);
}
if (read(dev_fd, *segbuf, ssize) != ssize) {
syslog(LOG_WARNING,"mmap_segment: bad read: %m");
1994-06-08 22:42:09 +04:00
free(*segbuf);
return (-1);
}
}
/* close (fid); */
1994-06-08 22:42:09 +04:00
return (0);
}
void
munmap_segment(FS_INFO *fsp, caddr_t seg_buf, int use_mmap)
1994-06-08 22:42:09 +04:00
{
if (use_mmap)
munmap(seg_buf, seg_size(&fsp->fi_lfs));
1994-06-08 22:42:09 +04:00
else
free(seg_buf);
1994-06-08 22:42:09 +04:00
}
/*
* USEFUL DEBUGGING TOOLS:
*/
void
print_SEGSUM(struct lfs *lfsp, SEGSUM *p, daddr_t addr)
1994-06-08 22:42:09 +04:00
{
if (p)
(void) dump_summary(lfsp, p, DUMP_ALL, NULL, addr);
else
syslog(LOG_DEBUG, "0x0");
1994-06-08 22:42:09 +04:00
}
int
Merge the short-lived perseant-lfsv2 branch into the trunk. Kernels and tools understand both v1 and v2 filesystems; newfs_lfs generates v2 by default. Changes for the v2 layout include: - Segments of non-PO2 size and arbitrary block offset, so these can be matched to convenient physical characteristics of the partition (e.g., stripe or track size and offset). - Address by fragment instead of by disk sector, paving the way for non-512-byte-sector devices. In theory fragments can be as large as you like, though in reality they must be smaller than MAXBSIZE in size. - Use serial number and filesystem identifier to ensure that roll-forward doesn't get old data and think it's new. Roll-forward is enabled for v2 filesystems, though not for v1 filesystems by default. - The inode free list is now a tailq, paving the way for undelete (undelete is not yet implemented, but can be without further non-backwards-compatible changes to disk structures). - Inode atime information is kept in the Ifile, instead of on the inode; that is, the inode is never written *just* because atime was changed. Because of this the inodes remain near the file data on the disk, rather than wandering all over as the disk is read repeatedly. This speeds up repeated reads by a small but noticeable amount. Other changes of note include: - The ifile written by newfs_lfs can now be of arbitrary length, it is no longer restricted to a single indirect block. - Fixed an old bug where ctime was changed every time a vnode was created. I need to look more closely to make sure that the times are only updated during write(2) and friends, not after-the-fact during a segment write, and certainly not by the cleaner.
2001-07-14 00:30:18 +04:00
bi_compare(const void *a, const void *b)
1994-06-08 22:42:09 +04:00
{
const BLOCK_INFO_15 *ba, *bb;
1994-06-08 22:42:09 +04:00
int diff;
ba = a;
bb = b;
1997-10-07 17:39:56 +04:00
if ((diff = (int)(ba->bi_inode - bb->bi_inode)))
1994-06-08 22:42:09 +04:00
return (diff);
1997-10-07 17:39:56 +04:00
if ((diff = (int)(ba->bi_lbn - bb->bi_lbn))) {
1994-06-08 22:42:09 +04:00
if (ba->bi_lbn == LFS_UNUSED_LBN)
return(-1);
else if (bb->bi_lbn == LFS_UNUSED_LBN)
return(1);
else if (ba->bi_lbn < 0 && bb->bi_lbn >= 0)
return(1);
else if (bb->bi_lbn < 0 && ba->bi_lbn >= 0)
return(-1);
else
return (diff);
}
if ((diff = (int)(ba->bi_daddr - bb->bi_daddr)))
return (diff);
if(ba->bi_inode != LFS_IFILE_INUM && debug)
syslog(LOG_DEBUG,"bi_compare: using kludge on ino %d!", ba->bi_inode);
diff = ba->bi_size - bb->bi_size;
return diff;
}
1994-06-08 22:42:09 +04:00
int
Merge the short-lived perseant-lfsv2 branch into the trunk. Kernels and tools understand both v1 and v2 filesystems; newfs_lfs generates v2 by default. Changes for the v2 layout include: - Segments of non-PO2 size and arbitrary block offset, so these can be matched to convenient physical characteristics of the partition (e.g., stripe or track size and offset). - Address by fragment instead of by disk sector, paving the way for non-512-byte-sector devices. In theory fragments can be as large as you like, though in reality they must be smaller than MAXBSIZE in size. - Use serial number and filesystem identifier to ensure that roll-forward doesn't get old data and think it's new. Roll-forward is enabled for v2 filesystems, though not for v1 filesystems by default. - The inode free list is now a tailq, paving the way for undelete (undelete is not yet implemented, but can be without further non-backwards-compatible changes to disk structures). - Inode atime information is kept in the Ifile, instead of on the inode; that is, the inode is never written *just* because atime was changed. Because of this the inodes remain near the file data on the disk, rather than wandering all over as the disk is read repeatedly. This speeds up repeated reads by a small but noticeable amount. Other changes of note include: - The ifile written by newfs_lfs can now be of arbitrary length, it is no longer restricted to a single indirect block. - Fixed an old bug where ctime was changed every time a vnode was created. I need to look more closely to make sure that the times are only updated during write(2) and friends, not after-the-fact during a segment write, and certainly not by the cleaner.
2001-07-14 00:30:18 +04:00
bi_toss(const void *dummy, const void *a, const void *b)
1994-06-08 22:42:09 +04:00
{
const BLOCK_INFO_15 *ba, *bb;
1994-06-08 22:42:09 +04:00
ba = a;
bb = b;
return(ba->bi_inode == bb->bi_inode && ba->bi_lbn == bb->bi_lbn);
}
void
Merge the short-lived perseant-lfsv2 branch into the trunk. Kernels and tools understand both v1 and v2 filesystems; newfs_lfs generates v2 by default. Changes for the v2 layout include: - Segments of non-PO2 size and arbitrary block offset, so these can be matched to convenient physical characteristics of the partition (e.g., stripe or track size and offset). - Address by fragment instead of by disk sector, paving the way for non-512-byte-sector devices. In theory fragments can be as large as you like, though in reality they must be smaller than MAXBSIZE in size. - Use serial number and filesystem identifier to ensure that roll-forward doesn't get old data and think it's new. Roll-forward is enabled for v2 filesystems, though not for v1 filesystems by default. - The inode free list is now a tailq, paving the way for undelete (undelete is not yet implemented, but can be without further non-backwards-compatible changes to disk structures). - Inode atime information is kept in the Ifile, instead of on the inode; that is, the inode is never written *just* because atime was changed. Because of this the inodes remain near the file data on the disk, rather than wandering all over as the disk is read repeatedly. This speeds up repeated reads by a small but noticeable amount. Other changes of note include: - The ifile written by newfs_lfs can now be of arbitrary length, it is no longer restricted to a single indirect block. - Fixed an old bug where ctime was changed every time a vnode was created. I need to look more closely to make sure that the times are only updated during write(2) and friends, not after-the-fact during a segment write, and certainly not by the cleaner.
2001-07-14 00:30:18 +04:00
toss(void *p, int *nump, size_t size, int (*dotoss)(const void *, const void *, const void *), void *client)
1994-06-08 22:42:09 +04:00
{
int i;
1997-08-01 10:33:39 +04:00
char *p0, *p1;
1994-06-08 22:42:09 +04:00
if (*nump == 0)
return;
1997-08-01 10:33:39 +04:00
p0 = p;
1994-06-08 22:42:09 +04:00
for (i = *nump; --i > 0;) {
1997-08-01 10:33:39 +04:00
p1 = p0 + size;
if (dotoss(client, p0, p1)) {
memmove(p0, p1, i * size);
1994-06-08 22:42:09 +04:00
--(*nump);
} else
1997-08-01 10:33:39 +04:00
p0 += size;
1994-06-08 22:42:09 +04:00
}
}