NetBSD/libexec/lfs_cleanerd/library.c

798 lines
21 KiB
C
Raw Normal View History

Various bug-fixes to LFS, to wit: Kernel: * Add runtime quantity lfs_ravail, the number of disk-blocks reserved for writing. Writes to the filesystem first reserve a maximum amount of blocks before their write is allowed to proceed; after the blocks are allocated the reserved total is reduced by a corresponding amount. If the lfs_reserve function cannot immediately reserve the requested number of blocks, the inode is unlocked, and the thread sleeps until the cleaner has made enough space available for the blocks to be reserved. In this way large files can be written to the filesystem (or, smaller files can be written to a nearly-full but thoroughly clean filesystem) and the cleaner can still function properly. * Remove explicit switching on dlfs_minfreeseg from the kernel code; it is now merely a fs-creation parameter used to compute dlfs_avail and dlfs_bfree (and used by fsck_lfs(8) to check their accuracy). Its former role is better assumed by a properly computed dlfs_avail. * Bounds-check inode numbers submitted through lfs_bmapv and lfs_markv. This prevents a panic, but, if the cleaner is feeding the filesystem the wrong data, you are still in a world of hurt. * Cleanup: remove explicit references of DEV_BSIZE in favor of btodb()/dbtob(). lfs_cleanerd: * Make -n mean "send N segments' blocks through a single call to lfs_markv". Previously it had meant "clean N segments though N calls to lfs_markv, before looking again to see if more need to be cleaned". The new behavior gives better packing of direct data on disk with as little metadata as possible, largely alleviating the problem that the cleaner can consume more disk through inefficient use of metadata than it frees by moving dirty data away from clean "holes" to produce entirely clean segments. * Make -b mean "read as many segments as necessary to write N segments of dirty data back to disk", rather than its former meaning of "read as many segments as necessary to free N segments worth of space". The new meaning, combined with the new -n behavior described above, further aids in cleaning storage efficiency as entire segments can be written at once, using as few blocks as possible for segment summaries and inode blocks. * Make the cleaner take note of segments which could not be cleaned due to error, and not attempt to clean them until they are entirely free of dirty blocks. This prevents the case in which a cleanerd running with -n 1 and without -b (formerly the default) would spin trying repeatedly to clean a corrupt segment, while the remaining space filled and deadlocked the filesystem. * Update the lfs_cleanerd manual page to describe all the options, including the changes mentioned here (in particular, the -b and -n flags were previously undocumented). fsck_lfs: * Check, and optionally fix, lfs_avail (to an exact figure) and lfs_bfree (within a margin of error) in pass 5. newfs_lfs: * Reduce the default dlfs_minfreeseg to 1/20 of the total segments. * Add a warning if the sgs disklabel field is 16 (the default for FFS' cpg, but not usually desirable for LFS' sgs: 5--8 is a better range). * Change the calculation of lfs_avail and lfs_bfree, corresponding to the kernel changes mentioned above. mount_lfs: * Add -N and -b options to pass corresponding -n and -b options to lfs_cleanerd. * Default to calling lfs_cleanerd with "-b -n 4". [All of these changes were largely tested in the 1.5 branch, with the idea that they (along with previous un-pulled-up work) could be applied to the branch while it was still in ALPHA2; however my test system has experienced corruption on another filesystem (/dev/console has gone missing :^), and, while I believe this unrelated to the LFS changes, I cannot with good conscience request that the changes be pulled up.]
2000-09-09 08:49:54 +04:00
/* $NetBSD: library.c,v 1.16 2000/09/09 04:49:56 perseant Exp $ */
1997-10-07 17:39:56 +04:00
1994-06-08 22:42:09 +04:00
/*-
* Copyright (c) 1992, 1993
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
1997-10-07 17:39:56 +04:00
#include <sys/cdefs.h>
1994-06-08 22:42:09 +04:00
#ifndef lint
1997-10-07 17:39:56 +04:00
#if 0
1998-03-01 05:20:01 +03:00
static char sccsid[] = "@(#)library.c 8.3 (Berkeley) 5/24/95";
1997-10-07 17:39:56 +04:00
#else
Various bug-fixes to LFS, to wit: Kernel: * Add runtime quantity lfs_ravail, the number of disk-blocks reserved for writing. Writes to the filesystem first reserve a maximum amount of blocks before their write is allowed to proceed; after the blocks are allocated the reserved total is reduced by a corresponding amount. If the lfs_reserve function cannot immediately reserve the requested number of blocks, the inode is unlocked, and the thread sleeps until the cleaner has made enough space available for the blocks to be reserved. In this way large files can be written to the filesystem (or, smaller files can be written to a nearly-full but thoroughly clean filesystem) and the cleaner can still function properly. * Remove explicit switching on dlfs_minfreeseg from the kernel code; it is now merely a fs-creation parameter used to compute dlfs_avail and dlfs_bfree (and used by fsck_lfs(8) to check their accuracy). Its former role is better assumed by a properly computed dlfs_avail. * Bounds-check inode numbers submitted through lfs_bmapv and lfs_markv. This prevents a panic, but, if the cleaner is feeding the filesystem the wrong data, you are still in a world of hurt. * Cleanup: remove explicit references of DEV_BSIZE in favor of btodb()/dbtob(). lfs_cleanerd: * Make -n mean "send N segments' blocks through a single call to lfs_markv". Previously it had meant "clean N segments though N calls to lfs_markv, before looking again to see if more need to be cleaned". The new behavior gives better packing of direct data on disk with as little metadata as possible, largely alleviating the problem that the cleaner can consume more disk through inefficient use of metadata than it frees by moving dirty data away from clean "holes" to produce entirely clean segments. * Make -b mean "read as many segments as necessary to write N segments of dirty data back to disk", rather than its former meaning of "read as many segments as necessary to free N segments worth of space". The new meaning, combined with the new -n behavior described above, further aids in cleaning storage efficiency as entire segments can be written at once, using as few blocks as possible for segment summaries and inode blocks. * Make the cleaner take note of segments which could not be cleaned due to error, and not attempt to clean them until they are entirely free of dirty blocks. This prevents the case in which a cleanerd running with -n 1 and without -b (formerly the default) would spin trying repeatedly to clean a corrupt segment, while the remaining space filled and deadlocked the filesystem. * Update the lfs_cleanerd manual page to describe all the options, including the changes mentioned here (in particular, the -b and -n flags were previously undocumented). fsck_lfs: * Check, and optionally fix, lfs_avail (to an exact figure) and lfs_bfree (within a margin of error) in pass 5. newfs_lfs: * Reduce the default dlfs_minfreeseg to 1/20 of the total segments. * Add a warning if the sgs disklabel field is 16 (the default for FFS' cpg, but not usually desirable for LFS' sgs: 5--8 is a better range). * Change the calculation of lfs_avail and lfs_bfree, corresponding to the kernel changes mentioned above. mount_lfs: * Add -N and -b options to pass corresponding -n and -b options to lfs_cleanerd. * Default to calling lfs_cleanerd with "-b -n 4". [All of these changes were largely tested in the 1.5 branch, with the idea that they (along with previous un-pulled-up work) could be applied to the branch while it was still in ALPHA2; however my test system has experienced corruption on another filesystem (/dev/console has gone missing :^), and, while I believe this unrelated to the LFS changes, I cannot with good conscience request that the changes be pulled up.]
2000-09-09 08:49:54 +04:00
__RCSID("$NetBSD: library.c,v 1.16 2000/09/09 04:49:56 perseant Exp $");
1997-10-07 17:39:56 +04:00
#endif
1994-06-08 22:42:09 +04:00
#endif /* not lint */
#include <sys/param.h>
#include <sys/time.h>
#include <sys/stat.h>
#include <sys/mount.h>
#include <sys/types.h>
#include <sys/mman.h>
#include <ufs/ufs/dinode.h>
#include <ufs/lfs/lfs.h>
1997-10-07 17:39:56 +04:00
#include <err.h>
1994-06-08 22:42:09 +04:00
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <syslog.h>
1994-06-08 22:42:09 +04:00
#include "clean.h"
void add_blocks __P((FS_INFO *, BLOCK_INFO *, int *, SEGSUM *, caddr_t,
daddr_t, daddr_t));
void add_inodes __P((FS_INFO *, BLOCK_INFO *, int *, SEGSUM *, caddr_t,
daddr_t));
int bi_compare __P((const void *, const void *));
int bi_toss __P((const void *, const void *, const void *));
void get_ifile __P((FS_INFO *, int));
int get_superblock __P((FS_INFO *, struct lfs *));
int pseg_valid __P((FS_INFO *, SEGSUM *, daddr_t));
int pseg_size __P((daddr_t, FS_INFO *, SEGSUM *));
void print_SEGSUM __P((struct lfs *, SEGSUM *, daddr_t));
1997-10-07 17:39:56 +04:00
extern int debug;
1997-10-07 17:39:56 +04:00
extern u_long cksum __P((void *, size_t)); /* XXX */
1994-06-08 22:42:09 +04:00
static int ifile_fd;
static int dev_fd;
1994-06-08 22:42:09 +04:00
/*
* This function will get information on a a filesystem which matches
* the name and type given. If a "name" is in a filesystem of the given
* type, then buf is filled with that filesystem's info, and the
* a non-zero value is returned.
*/
int
fs_getmntinfo(buf, name, type)
struct statfs **buf;
char *name;
const char *type;
1994-06-08 22:42:09 +04:00
{
/* allocate space for the filesystem info */
*buf = (struct statfs *)malloc(sizeof(struct statfs));
if (*buf == NULL)
return 0;
/* grab the filesystem info */
if (ifile_fd <= 0) {
if (statfs(name, *buf) < 0) {
free(*buf);
return 0;
}
} else if(fstatfs(ifile_fd, *buf) < 0) {
1994-06-08 22:42:09 +04:00
free(*buf);
return 0;
}
1994-06-08 22:42:09 +04:00
/* check to see if it's the one we want */
if (strncmp(type, (*buf)->f_fstypename, MFSNAMELEN) ||
1994-06-08 22:42:09 +04:00
strncmp(name, (*buf)->f_mntonname, MNAMELEN)) {
/* "this is not the filesystem you're looking for" */
1994-06-08 22:42:09 +04:00
free(*buf);
return 0;
}
return 1;
}
/*
* Get all the information available on an LFS file system.
* Returns an pointer to an FS_INFO structure, NULL on error.
*/
FS_INFO *
get_fs_info (lstatfsp, use_mmap)
struct statfs *lstatfsp; /* IN: pointer to statfs struct */
int use_mmap; /* IN: mmap or read */
{
FS_INFO *fsp;
1994-06-08 22:42:09 +04:00
fsp = (FS_INFO *)malloc(sizeof(FS_INFO));
if (fsp == NULL)
return NULL;
memset(fsp, 0, sizeof(FS_INFO));
1994-06-08 22:42:09 +04:00
fsp->fi_statfsp = lstatfsp;
if (get_superblock (fsp, &fsp->fi_lfs)) {
syslog(LOG_ERR, "Exiting: get_fs_info: get_superblock failed: %m");
exit(1);
}
1994-06-08 22:42:09 +04:00
fsp->fi_daddr_shift =
fsp->fi_lfs.lfs_bshift - fsp->fi_lfs.lfs_fsbtodb;
get_ifile (fsp, use_mmap);
return (fsp);
}
/*
* If we are reading the ifile then we need to refresh it. Even if
* we are mmapping it, it might have grown. Finally, we need to
1994-06-08 22:42:09 +04:00
* refresh the file system information (statfs) info.
*/
void
reread_fs_info(fsp, use_mmap)
FS_INFO *fsp; /* IN: prointer fs_infos to reread */
int use_mmap;
{
if (ifile_fd <= 0) {
if (fstatfs(ifile_fd, fsp->fi_statfsp)) {
syslog(LOG_ERR, "Exiting: reread_fs_info: fstatfs failed: %m");
exit(1);
}
} else if (statfs(fsp->fi_statfsp->f_mntonname, fsp->fi_statfsp)) {
syslog(LOG_ERR, "Exiting: reread_fs_info: statfs failed: %m");
exit(1);
}
1994-06-08 22:42:09 +04:00
get_ifile (fsp, use_mmap);
}
/*
* Gets the superblock from disk (possibly in face of errors)
1994-06-08 22:42:09 +04:00
*/
int
get_superblock (fsp, sbp)
FS_INFO *fsp; /* local file system info structure */
struct lfs *sbp;
{
char mntfromname[MNAMELEN+1];
char buf[LFS_SBPAD];
1994-06-08 22:42:09 +04:00
strcpy(mntfromname, "/dev/r");
strcat(mntfromname, fsp->fi_statfsp->f_mntfromname+5);
if(dev_fd <= 0) {
if ((dev_fd = open(mntfromname, O_RDONLY, (mode_t)0)) < 0) {
syslog(LOG_WARNING,"get_superblock: bad open: %m");
return (-1);
}
} else
lseek(dev_fd, 0, SEEK_SET);
get(dev_fd, LFS_LABELPAD, buf, LFS_SBPAD);
1998-09-12 01:21:29 +04:00
memcpy(&(sbp->lfs_dlfs), buf, sizeof(struct dlfs));
/* close (fid); */
1994-06-08 22:42:09 +04:00
return (0);
}
/*
1994-06-08 22:42:09 +04:00
* This function will map the ifile into memory. It causes a
* fatal error on failure.
*/
void
get_ifile (fsp, use_mmap)
FS_INFO *fsp;
int use_mmap;
{
struct stat file_stat;
struct statfs statfsbuf;
1994-06-08 22:42:09 +04:00
caddr_t ifp;
char *ifile_name;
int count;
1994-06-08 22:42:09 +04:00
ifp = NULL;
ifile_name = malloc(strlen(fsp->fi_statfsp->f_mntonname) +
strlen(IFILE_NAME)+2);
strcat(strcat(strcpy(ifile_name, fsp->fi_statfsp->f_mntonname), "/"),
IFILE_NAME);
if(ifile_fd <= 0) {
/* XXX KS - Do we ever *write* to the ifile? */
if ((ifile_fd = open(ifile_name, O_RDONLY, (mode_t)0)) < 0) {
syslog(LOG_ERR, "Exiting: get_ifile: bad open: %m");
exit(1);
}
} else
lseek(ifile_fd, 0, SEEK_SET);
1994-06-08 22:42:09 +04:00
if (fstat (ifile_fd, &file_stat)) {
/* If the fs was unmounted, don't complain */
statfs(fsp->fi_statfsp->f_mntonname, &statfsbuf);
if(memcmp(&statfsbuf.f_fsid,&fsp->fi_statfsp->f_fsid,
sizeof(statfsbuf.f_fsid))!=0)
{
/* Filesystem still mounted, this error is real */
syslog(LOG_ERR, "Exiting: get_ifile: fstat failed: %m");
exit(1);
}
exit(0);
}
fsp->fi_fs_tstamp = file_stat.st_mtimespec.tv_sec;
1994-06-08 22:42:09 +04:00
if (use_mmap && file_stat.st_size == fsp->fi_ifile_length) {
/* (void) close(fid); */
2000-01-18 11:02:30 +03:00
free(ifile_name);
1994-06-08 22:42:09 +04:00
return;
}
/* get the ifile */
if (use_mmap) {
if (fsp->fi_cip)
munmap((caddr_t)fsp->fi_cip, fsp->fi_ifile_length);
/* XXX KS - Do we ever *write* to the ifile? */
1994-06-08 22:42:09 +04:00
ifp = mmap ((caddr_t)0, file_stat.st_size,
PROT_READ, MAP_FILE|MAP_PRIVATE, ifile_fd, (off_t)0);
if (ifp == (caddr_t)(-1)) {
syslog(LOG_ERR, "Exiting: get_ifile: mmap failed: %m");
exit(1);
}
1994-06-08 22:42:09 +04:00
} else {
if (fsp->fi_cip)
free(fsp->fi_cip);
if (!(ifp = malloc (file_stat.st_size))) {
syslog(LOG_ERR, "Exiting: get_ifile: malloc failed: %m");
exit(1);
}
1994-06-08 22:42:09 +04:00
redo_read:
count = read (ifile_fd, ifp, (size_t) file_stat.st_size);
1994-06-08 22:42:09 +04:00
if (count < 0) {
syslog(LOG_ERR, "Exiting: get_ifile: bad ifile read: %m");
exit(1);
}
1994-06-08 22:42:09 +04:00
else if (count < file_stat.st_size) {
syslog(LOG_WARNING, "get_ifile: %m");
if (lseek(ifile_fd, 0, SEEK_SET) < 0) {
syslog(LOG_ERR, "Exiting: get_ifile: bad ifile lseek: %m");
exit(1);
}
1994-06-08 22:42:09 +04:00
goto redo_read;
}
}
fsp->fi_ifile_length = file_stat.st_size;
/* close (fid); */
1994-06-08 22:42:09 +04:00
fsp->fi_cip = (CLEANERINFO *)ifp;
fsp->fi_segusep = (SEGUSE *)(ifp + CLEANSIZE(fsp));
fsp->fi_ifilep = (IFILE *)((caddr_t)fsp->fi_segusep + SEGTABSIZE(fsp));
/*
Various bug-fixes to LFS, to wit: Kernel: * Add runtime quantity lfs_ravail, the number of disk-blocks reserved for writing. Writes to the filesystem first reserve a maximum amount of blocks before their write is allowed to proceed; after the blocks are allocated the reserved total is reduced by a corresponding amount. If the lfs_reserve function cannot immediately reserve the requested number of blocks, the inode is unlocked, and the thread sleeps until the cleaner has made enough space available for the blocks to be reserved. In this way large files can be written to the filesystem (or, smaller files can be written to a nearly-full but thoroughly clean filesystem) and the cleaner can still function properly. * Remove explicit switching on dlfs_minfreeseg from the kernel code; it is now merely a fs-creation parameter used to compute dlfs_avail and dlfs_bfree (and used by fsck_lfs(8) to check their accuracy). Its former role is better assumed by a properly computed dlfs_avail. * Bounds-check inode numbers submitted through lfs_bmapv and lfs_markv. This prevents a panic, but, if the cleaner is feeding the filesystem the wrong data, you are still in a world of hurt. * Cleanup: remove explicit references of DEV_BSIZE in favor of btodb()/dbtob(). lfs_cleanerd: * Make -n mean "send N segments' blocks through a single call to lfs_markv". Previously it had meant "clean N segments though N calls to lfs_markv, before looking again to see if more need to be cleaned". The new behavior gives better packing of direct data on disk with as little metadata as possible, largely alleviating the problem that the cleaner can consume more disk through inefficient use of metadata than it frees by moving dirty data away from clean "holes" to produce entirely clean segments. * Make -b mean "read as many segments as necessary to write N segments of dirty data back to disk", rather than its former meaning of "read as many segments as necessary to free N segments worth of space". The new meaning, combined with the new -n behavior described above, further aids in cleaning storage efficiency as entire segments can be written at once, using as few blocks as possible for segment summaries and inode blocks. * Make the cleaner take note of segments which could not be cleaned due to error, and not attempt to clean them until they are entirely free of dirty blocks. This prevents the case in which a cleanerd running with -n 1 and without -b (formerly the default) would spin trying repeatedly to clean a corrupt segment, while the remaining space filled and deadlocked the filesystem. * Update the lfs_cleanerd manual page to describe all the options, including the changes mentioned here (in particular, the -b and -n flags were previously undocumented). fsck_lfs: * Check, and optionally fix, lfs_avail (to an exact figure) and lfs_bfree (within a margin of error) in pass 5. newfs_lfs: * Reduce the default dlfs_minfreeseg to 1/20 of the total segments. * Add a warning if the sgs disklabel field is 16 (the default for FFS' cpg, but not usually desirable for LFS' sgs: 5--8 is a better range). * Change the calculation of lfs_avail and lfs_bfree, corresponding to the kernel changes mentioned above. mount_lfs: * Add -N and -b options to pass corresponding -n and -b options to lfs_cleanerd. * Default to calling lfs_cleanerd with "-b -n 4". [All of these changes were largely tested in the 1.5 branch, with the idea that they (along with previous un-pulled-up work) could be applied to the branch while it was still in ALPHA2; however my test system has experienced corruption on another filesystem (/dev/console has gone missing :^), and, while I believe this unrelated to the LFS changes, I cannot with good conscience request that the changes be pulled up.]
2000-09-09 08:49:54 +04:00
* The number of ifile entries is equal to the number of
1994-06-08 22:42:09 +04:00
* blocks in the ifile minus the ones allocated to cleaner info
* and segment usage table multiplied by the number of ifile
* entries per page.
*/
1998-03-01 05:20:01 +03:00
fsp->fi_ifile_count = ((fsp->fi_ifile_length >> fsp->fi_lfs.lfs_bshift)
- fsp->fi_lfs.lfs_cleansz - fsp->fi_lfs.lfs_segtabsz) *
1994-06-08 22:42:09 +04:00
fsp->fi_lfs.lfs_ifpb;
free (ifile_name);
}
/*
* Return the size of the partial segment, in bytes.
*/
int
pseg_size(pseg_addr, fsp, sp)
daddr_t pseg_addr; /* base address of the segsum */
FS_INFO *fsp; /* Filesystem info */
SEGSUM *sp; /* the segsum */
{
int i, ssize = 0;
struct lfs *lfsp;
FINFO *fp;
lfsp = &fsp->fi_lfs;
ssize = LFS_SUMMARY_SIZE
+ howmany(sp->ss_ninos, INOPB(lfsp)) * lfsp->lfs_bsize;
for (fp = (FINFO *)(sp + 1), i = 0; i < sp->ss_nfinfo; ++i) {
ssize += (fp->fi_nblocks-1) * lfsp->lfs_bsize
+ fp->fi_lastlength;
fp = (FINFO *)(&fp->fi_blocks[fp->fi_nblocks]);
}
return ssize;
}
1994-06-08 22:42:09 +04:00
/*
* This function will scan a segment and return a list of
* <inode, blocknum> pairs which indicate which blocks were
* contained as live data within the segment when the segment
* summary was read (it may have "died" since then). Any given
* pair will be listed at most once.
*/
int
1994-06-08 22:42:09 +04:00
lfs_segmapv(fsp, seg, seg_buf, blocks, bcount)
FS_INFO *fsp; /* pointer to local file system information */
int seg; /* the segment number */
caddr_t seg_buf; /* the buffer containing the segment's data */
BLOCK_INFO **blocks; /* OUT: array of block_info for live blocks */
int *bcount; /* OUT: number of active blocks in segment */
{
BLOCK_INFO *bip, *_bip;
1994-06-08 22:42:09 +04:00
SEGSUM *sp;
SEGUSE *sup;
FINFO *fip;
struct lfs *lfsp;
1998-03-01 05:20:01 +03:00
caddr_t s;
1994-06-08 22:42:09 +04:00
daddr_t pseg_addr, seg_addr;
int nelem, nblocks, nsegs, sumsize, i, ssize;
1994-06-08 22:42:09 +04:00
1997-10-07 17:39:56 +04:00
i = 0;
1994-06-08 22:42:09 +04:00
lfsp = &fsp->fi_lfs;
nelem = 2 * lfsp->lfs_ssize;
if (!(bip = malloc(nelem * sizeof(BLOCK_INFO))))
goto err0;
sup = SEGUSE_ENTRY(lfsp, fsp->fi_segusep, seg);
s = seg_buf + (sup->su_flags & SEGUSE_SUPERBLOCK ? LFS_SBPAD : 0);
seg_addr = sntoda(lfsp, seg);
pseg_addr = seg_addr + (sup->su_flags & SEGUSE_SUPERBLOCK ? btodb(LFS_SBPAD) : 0);
if(debug > 1)
syslog(LOG_DEBUG, "\tsegment buffer at: %p\tseg_addr 0x%x", s, seg_addr);
1994-06-08 22:42:09 +04:00
*bcount = 0;
for (nsegs = 0; nsegs < sup->su_nsums; nsegs++) {
1994-06-08 22:42:09 +04:00
sp = (SEGSUM *)s;
nblocks = pseg_valid(fsp, sp, pseg_addr);
1998-03-01 05:20:01 +03:00
if (nblocks <= 0) {
syslog(LOG_DEBUG, "Warning: invalid segment summary at 0x%x",
1998-03-01 05:20:01 +03:00
pseg_addr);
*bcount = 0;
return -1;
/* break; */
1998-03-01 05:20:01 +03:00
}
1994-06-08 22:42:09 +04:00
#ifdef DIAGNOSTIC
/* Verify size of summary block */
1994-06-08 22:42:09 +04:00
sumsize = sizeof(SEGSUM) +
(sp->ss_ninos + INOPB(lfsp) - 1) / INOPB(lfsp);
1998-03-01 05:20:01 +03:00
for (i = 0, fip = (FINFO *)(sp + 1); i < sp->ss_nfinfo; ++i) {
1994-06-08 22:42:09 +04:00
sumsize += sizeof(FINFO) +
(fip->fi_nblocks - 1) * sizeof(daddr_t);
fip = (FINFO *)(&fip->fi_blocks[fip->fi_nblocks]);
}
if (sumsize > LFS_SUMMARY_SIZE) {
syslog(LOG_ERR,
"Exiting: Segment %d summary block too big: %d\n",
1994-06-08 22:42:09 +04:00
seg, sumsize);
exit(1);
}
#endif
if (*bcount + nblocks + sp->ss_ninos > nelem) {
nelem = *bcount + nblocks + sp->ss_ninos;
bip = realloc (bip, nelem * sizeof(BLOCK_INFO));
if (!bip)
goto err0;
}
add_blocks(fsp, bip, bcount, sp, seg_buf, seg_addr, pseg_addr);
add_inodes(fsp, bip, bcount, sp, seg_buf, seg_addr);
ssize = pseg_size(pseg_addr, fsp, sp);
s += ssize;
pseg_addr += btodb(ssize); /* XXX was bytetoda(fsp,ssize) */
}
if(nsegs < sup->su_nsums) {
syslog(LOG_NOTICE,"only %d segment summaries in seg %d (expected %d)",
nsegs, seg, sup->su_nsums);
goto err0;
1994-06-08 22:42:09 +04:00
}
qsort(bip, *bcount, sizeof(BLOCK_INFO), bi_compare);
toss(bip, bcount, sizeof(BLOCK_INFO), bi_toss, NULL);
if(debug > 1) {
syslog(LOG_DEBUG, "BLOCK INFOS");
for (_bip = bip, i=0; i < *bcount; ++_bip, ++i)
PRINT_BINFO(_bip);
}
1994-06-08 22:42:09 +04:00
*blocks = bip;
return (0);
err0: *bcount = 0;
return (-1);
1994-06-08 22:42:09 +04:00
}
/*
1994-06-08 22:42:09 +04:00
* This will parse a partial segment and fill in BLOCK_INFO structures
* for each block described in the segment summary. It will not include
* blocks or inodes from files with new version numbers.
1994-06-08 22:42:09 +04:00
*/
void
add_blocks (fsp, bip, countp, sp, seg_buf, segaddr, psegaddr)
FS_INFO *fsp; /* pointer to super block */
BLOCK_INFO *bip; /* Block info array */
int *countp; /* IN/OUT: number of blocks in array */
SEGSUM *sp; /* segment summmary pointer */
caddr_t seg_buf; /* buffer containing segment */
daddr_t segaddr; /* address of this segment */
daddr_t psegaddr; /* address of this partial segment */
{
IFILE *ifp;
FINFO *fip;
caddr_t bp;
daddr_t *dp, *iaddrp;
int db_per_block, i, j;
1998-03-01 05:20:01 +03:00
int db_frag;
1994-06-08 22:42:09 +04:00
u_long page_size;
if(debug > 1)
syslog(LOG_DEBUG, "FILE INFOS");
1994-06-08 22:42:09 +04:00
db_per_block = fsbtodb(&fsp->fi_lfs, 1);
page_size = fsp->fi_lfs.lfs_bsize;
bp = seg_buf + datobyte(fsp, psegaddr - segaddr) + LFS_SUMMARY_SIZE;
bip += *countp;
psegaddr += bytetoda(fsp, LFS_SUMMARY_SIZE);
iaddrp = (daddr_t *)((caddr_t)sp + LFS_SUMMARY_SIZE);
--iaddrp;
for (fip = (FINFO *)(sp + 1), i = 0; i < sp->ss_nfinfo;
++i, fip = (FINFO *)(&fip->fi_blocks[fip->fi_nblocks])) {
ifp = IFILE_ENTRY(&fsp->fi_lfs, fsp->fi_ifilep, fip->fi_ino);
PRINT_FINFO(fip, ifp);
if (ifp->if_version > fip->fi_version)
continue;
dp = &(fip->fi_blocks[0]);
for (j = 0; j < fip->fi_nblocks; j++, dp++) {
/* Skip over intervening inode blocks */
1994-06-08 22:42:09 +04:00
while (psegaddr == *iaddrp) {
psegaddr += db_per_block;
bp += page_size;
--iaddrp;
}
bip->bi_inode = fip->fi_ino;
bip->bi_lbn = *dp;
bip->bi_daddr = psegaddr;
bip->bi_segcreate = (time_t)(sp->ss_create);
bip->bi_bp = bp;
bip->bi_version = ifp->if_version;
if (j < fip->fi_nblocks-1
|| fip->fi_lastlength == page_size)
{
1998-03-01 05:20:01 +03:00
bip->bi_size = page_size;
psegaddr += db_per_block;
bp += page_size;
} else {
db_frag = fragstodb(&(fsp->fi_lfs),
numfrags(&(fsp->fi_lfs),
fip->fi_lastlength));
if(debug > 1) {
syslog(LOG_DEBUG, "lastlength, frags: %d, %d",
fip->fi_lastlength, db_frag);
}
1998-03-01 05:20:01 +03:00
bip->bi_size = fip->fi_lastlength;
bp += fip->fi_lastlength;
psegaddr += db_frag;
}
1994-06-08 22:42:09 +04:00
++bip;
++(*countp);
}
}
}
/*
* For a particular segment summary, reads the inode blocks and adds
* INODE_INFO structures to the array. Returns the number of inodes
* actually added.
*/
void
add_inodes (fsp, bip, countp, sp, seg_buf, seg_addr)
FS_INFO *fsp; /* pointer to super block */
BLOCK_INFO *bip; /* block info array */
int *countp; /* pointer to current number of inodes */
SEGSUM *sp; /* segsum pointer */
caddr_t seg_buf; /* the buffer containing the segment's data */
daddr_t seg_addr; /* disk address of seg_buf */
{
1997-10-07 17:39:56 +04:00
struct dinode *di = NULL; /* XXX gcc */
1994-06-08 22:42:09 +04:00
struct lfs *lfsp;
IFILE *ifp;
BLOCK_INFO *bp;
daddr_t *daddrp;
ino_t inum;
int i;
1994-06-08 22:42:09 +04:00
if (sp->ss_ninos <= 0)
return;
1994-06-08 22:42:09 +04:00
bp = bip + *countp;
lfsp = &fsp->fi_lfs;
if(debug > 1)
syslog(LOG_DEBUG, "INODES:");
1994-06-08 22:42:09 +04:00
daddrp = (daddr_t *)((caddr_t)sp + LFS_SUMMARY_SIZE);
for (i = 0; i < sp->ss_ninos; ++i) {
if (i % INOPB(lfsp) == 0) {
--daddrp;
di = (struct dinode *)(seg_buf +
((*daddrp - seg_addr) << fsp->fi_daddr_shift));
} else
1994-06-08 22:42:09 +04:00
++di;
1994-06-08 22:42:09 +04:00
inum = di->di_inumber;
bp->bi_lbn = LFS_UNUSED_LBN;
bp->bi_inode = inum;
bp->bi_daddr = *daddrp;
bp->bi_bp = di;
bp->bi_segcreate = sp->ss_create;
bp->bi_size = i; /* XXX KS - kludge */
1994-06-08 22:42:09 +04:00
if (inum == LFS_IFILE_INUM) {
bp->bi_version = 1; /* Ifile version should be 1 */
bp++;
++(*countp);
PRINT_INODE(1, bp);
} else {
ifp = IFILE_ENTRY(lfsp, fsp->fi_ifilep, inum);
PRINT_INODE(ifp->if_daddr == *daddrp, bp);
bp->bi_version = ifp->if_version;
if (ifp->if_daddr == *daddrp) {
bp++;
++(*countp);
}
1994-06-08 22:42:09 +04:00
}
}
}
/*
* Checks the summary checksum and the data checksum to determine if the
* segment is valid or not. Returns the size of the partial segment if it
* is valid, and 0 otherwise. Use dump_summary to figure out size of the
1994-06-08 22:42:09 +04:00
* the partial as well as whether or not the checksum is valid.
*/
1994-06-08 22:42:09 +04:00
int
pseg_valid (fsp, ssp, addr)
1994-06-08 22:42:09 +04:00
FS_INFO *fsp; /* pointer to file system info */
SEGSUM *ssp; /* pointer to segment summary block */
daddr_t addr; /* address of the summary block on disk */
1994-06-08 22:42:09 +04:00
{
int nblocks;
#if 0
1994-06-08 22:42:09 +04:00
caddr_t p;
int i;
1994-06-08 22:42:09 +04:00
u_long *datap;
#endif
1994-06-08 22:42:09 +04:00
if (ssp->ss_magic != SS_MAGIC) {
syslog(LOG_DEBUG, "Bad magic number: 0x%x instead of 0x%x", ssp->ss_magic, SS_MAGIC);
1998-03-01 05:20:01 +03:00
return(0);
}
1998-03-01 05:20:01 +03:00
if ((nblocks = dump_summary(&fsp->fi_lfs, ssp, 0, NULL, addr)) <= 0 ||
1994-06-08 22:42:09 +04:00
nblocks > fsp->fi_lfs.lfs_ssize - 1)
return(0);
#if 0
1994-06-08 22:42:09 +04:00
/* check data/inode block(s) checksum too */
datap = (u_long *)malloc(nblocks * sizeof(u_long));
p = (caddr_t)ssp + LFS_SUMMARY_SIZE;
for (i = 0; i < nblocks; ++i) {
datap[i] = *((u_long *)p);
p += fsp->fi_lfs.lfs_bsize;
}
if (cksum ((void *)datap, nblocks * sizeof(u_long)) != ssp->ss_datasum) {
syslog(LOG_DEBUG, "Bad data checksum");
2000-01-18 11:02:30 +03:00
free(datap);
return 0;
}
#endif
1994-06-08 22:42:09 +04:00
return (nblocks);
}
/* #define MMAP_SEGMENT */
/*
1994-06-08 22:42:09 +04:00
* read a segment into a memory buffer
*/
int
mmap_segment (fsp, segment, segbuf, use_mmap)
FS_INFO *fsp; /* file system information */
int segment; /* segment number */
caddr_t *segbuf; /* pointer to buffer area */
int use_mmap; /* mmap instead of read */
{
struct lfs *lfsp;
daddr_t seg_daddr; /* base disk address of segment */
off_t seg_byte;
size_t ssize;
char mntfromname[MNAMELEN+2];
lfsp = &fsp->fi_lfs;
/* get the disk address of the beginning of the segment */
seg_daddr = sntoda(lfsp, segment);
seg_byte = datobyte(fsp, seg_daddr);
ssize = seg_size(lfsp);
strcpy(mntfromname, "/dev/r");
strcat(mntfromname, fsp->fi_statfsp->f_mntfromname+5);
if(dev_fd <= 0) {
if ((dev_fd = open(mntfromname, O_RDONLY, (mode_t)0)) < 0) {
syslog(LOG_WARNING,"mmap_segment: bad open: %m");
return (-1);
}
} else
lseek(dev_fd, 0, SEEK_SET);
1994-06-08 22:42:09 +04:00
if (use_mmap) {
*segbuf = mmap ((caddr_t)0, seg_size(lfsp), PROT_READ,
MAP_FILE|MAP_SHARED, dev_fd, seg_byte);
1994-06-08 22:42:09 +04:00
if (*(long *)segbuf < 0) {
syslog(LOG_WARNING,"mmap_segment: mmap failed: %m");
return (0);
1994-06-08 22:42:09 +04:00
}
} else {
if(debug > 1)
syslog(LOG_DEBUG, "mmap_segment\tseg_daddr: %lu\tseg_size: %lu\tseg_offset: %qu",
(u_long)seg_daddr, (u_long)ssize, (long long)seg_byte);
1994-06-08 22:42:09 +04:00
/* malloc the space for the buffer */
*segbuf = malloc(ssize);
if (!*segbuf) {
syslog(LOG_WARNING,"mmap_segment: malloc failed: %m");
return (0);
1994-06-08 22:42:09 +04:00
}
/* read the segment data into the buffer */
if (lseek (dev_fd, seg_byte, SEEK_SET) != seg_byte) {
syslog(LOG_WARNING,"mmap_segment: bad lseek: %m");
1994-06-08 22:42:09 +04:00
free(*segbuf);
return (-1);
}
if (read (dev_fd, *segbuf, ssize) != ssize) {
syslog(LOG_WARNING,"mmap_segment: bad read: %m");
1994-06-08 22:42:09 +04:00
free(*segbuf);
return (-1);
}
}
/* close (fid); */
1994-06-08 22:42:09 +04:00
return (0);
}
void
munmap_segment (fsp, seg_buf, use_mmap)
FS_INFO *fsp; /* file system information */
caddr_t seg_buf; /* pointer to buffer area */
int use_mmap; /* mmap instead of read/write */
{
if (use_mmap)
munmap (seg_buf, seg_size(&fsp->fi_lfs));
else
free (seg_buf);
}
/*
* USEFUL DEBUGGING TOOLS:
*/
void
print_SEGSUM (lfsp, p, addr)
1994-06-08 22:42:09 +04:00
struct lfs *lfsp;
SEGSUM *p;
daddr_t addr;
1994-06-08 22:42:09 +04:00
{
if (p)
(void) dump_summary(lfsp, p, DUMP_ALL, NULL, addr);
else
syslog(LOG_DEBUG, "0x0");
1994-06-08 22:42:09 +04:00
}
int
bi_compare(a, b)
const void *a;
const void *b;
{
const BLOCK_INFO *ba, *bb;
int diff;
ba = a;
bb = b;
1997-10-07 17:39:56 +04:00
if ((diff = (int)(ba->bi_inode - bb->bi_inode)))
1994-06-08 22:42:09 +04:00
return (diff);
1997-10-07 17:39:56 +04:00
if ((diff = (int)(ba->bi_lbn - bb->bi_lbn))) {
1994-06-08 22:42:09 +04:00
if (ba->bi_lbn == LFS_UNUSED_LBN)
return(-1);
else if (bb->bi_lbn == LFS_UNUSED_LBN)
return(1);
else if (ba->bi_lbn < 0 && bb->bi_lbn >= 0)
return(1);
else if (bb->bi_lbn < 0 && ba->bi_lbn >= 0)
return(-1);
else
return (diff);
}
1997-10-07 17:39:56 +04:00
if ((diff = (int)(ba->bi_segcreate - bb->bi_segcreate)))
1994-06-08 22:42:09 +04:00
return (diff);
if ((diff = (int)(ba->bi_daddr - bb->bi_daddr)))
return (diff);
if(ba->bi_inode != LFS_IFILE_INUM)
syslog(LOG_DEBUG,"bi_compare: using kludge on ino %d!", ba->bi_inode);
diff = ba->bi_size - bb->bi_size;
return diff;
}
1994-06-08 22:42:09 +04:00
int
bi_toss(dummy, a, b)
const void *dummy;
const void *a;
const void *b;
{
const BLOCK_INFO *ba, *bb;
ba = a;
bb = b;
return(ba->bi_inode == bb->bi_inode && ba->bi_lbn == bb->bi_lbn);
}
void
toss(p, nump, size, dotoss, client)
void *p;
int *nump;
size_t size;
int (*dotoss) __P((const void *, const void *, const void *));
void *client;
{
int i;
1997-08-01 10:33:39 +04:00
char *p0, *p1;
1994-06-08 22:42:09 +04:00
if (*nump == 0)
return;
1997-08-01 10:33:39 +04:00
p0 = p;
1994-06-08 22:42:09 +04:00
for (i = *nump; --i > 0;) {
1997-08-01 10:33:39 +04:00
p1 = p0 + size;
if (dotoss(client, p0, p1)) {
memmove(p0, p1, i * size);
1994-06-08 22:42:09 +04:00
--(*nump);
} else
1997-08-01 10:33:39 +04:00
p0 += size;
1994-06-08 22:42:09 +04:00
}
}