f4fea25c9f
failures as well as successes when a run of clean_all_inodes completes. Explicitly cast to off_t in get_dinode and get_rawblock, to make sure we read the right block.
315 lines
8.5 KiB
C
315 lines
8.5 KiB
C
/* $NetBSD: coalesce.c,v 1.3 2002/06/14 05:21:21 perseant Exp $ */
|
|
|
|
/*-
|
|
* Copyright (c) 2002 The NetBSD Foundation, Inc.
|
|
* All rights reserved.
|
|
*
|
|
* This code is derived from software contributed to The NetBSD Foundation
|
|
* by Konrad E. Schroder <perseant@hhhh.org>.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
* 3. All advertising materials mentioning features or use of this software
|
|
* must display the following acknowledgement:
|
|
* This product includes software developed by the NetBSD
|
|
* Foundation, Inc. and its contributors.
|
|
* 4. Neither the name of The NetBSD Foundation nor the names of its
|
|
* contributors may be used to endorse or promote products derived
|
|
* from this software without specific prior written permission.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
|
|
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
|
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
|
|
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
* POSSIBILITY OF SUCH DAMAGE.
|
|
*/
|
|
|
|
#include <sys/param.h>
|
|
#include <sys/mount.h>
|
|
#include <sys/time.h>
|
|
#include <sys/resource.h>
|
|
#include <sys/types.h>
|
|
#include <sys/wait.h>
|
|
#include <sys/mman.h>
|
|
|
|
#include <ufs/ufs/dinode.h>
|
|
#include <ufs/lfs/lfs.h>
|
|
|
|
#include <fcntl.h>
|
|
#include <signal.h>
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <time.h>
|
|
#include <unistd.h>
|
|
#include <util.h>
|
|
#include <errno.h>
|
|
#include <err.h>
|
|
|
|
#include <syslog.h>
|
|
|
|
#include "clean.h"
|
|
|
|
extern int debug, do_mmap;
|
|
|
|
static int
|
|
tossdead(const void *client, const void *a, const void *b)
|
|
{
|
|
return (((BLOCK_INFO_15 *)a)->bi_daddr <= 0 ||
|
|
((BLOCK_INFO_15 *)a)->bi_size == 0);
|
|
}
|
|
|
|
static int log2int(int n)
|
|
{
|
|
int log;
|
|
|
|
log = 0;
|
|
while (n > 0) {
|
|
++log;
|
|
n /= 2;
|
|
}
|
|
return log - 1;
|
|
}
|
|
|
|
enum coalesce_returncodes {
|
|
COALESCE_OK = 0,
|
|
COALESCE_NOINODE,
|
|
COALESCE_TOOSMALL,
|
|
COALESCE_BADSIZE,
|
|
COALESCE_BADBLOCKSIZE,
|
|
COALESCE_NOMEM,
|
|
COALESCE_BADBMAPV,
|
|
COALESCE_NOTWORTHIT,
|
|
COALESCE_NOTHINGLEFT,
|
|
COALESCE_NOTHINGLEFT2,
|
|
|
|
COALESCE_MAXERROR
|
|
};
|
|
|
|
char *coalesce_return[] = {
|
|
"Successfully coalesced",
|
|
"File not in use or inode not found",
|
|
"Not large enough to coalesce",
|
|
"Negative size",
|
|
"Not enough blocks to account for size",
|
|
"Malloc failed",
|
|
"lfs_bmapv failed",
|
|
"Not broken enough to fix",
|
|
"Too many blocks not found",
|
|
"Too many blocks found in active segments",
|
|
|
|
"No such error"
|
|
};
|
|
|
|
/*
|
|
* Find out if this inode's data blocks are discontinuous; if they are,
|
|
* rewrite them using lfs_markv. Return the number of inodes rewritten.
|
|
*/
|
|
int clean_inode(struct fs_info *fsp, ino_t ino)
|
|
{
|
|
int i, error;
|
|
BLOCK_INFO_15 *bip, *tbip;
|
|
struct dinode *dip;
|
|
int nb, onb, noff;
|
|
ufs_daddr_t toff;
|
|
struct lfs *lfsp;
|
|
int bps;
|
|
SEGUSE *sup;
|
|
|
|
lfsp = &fsp->fi_lfs;
|
|
|
|
dip = get_dinode(fsp, ino);
|
|
if (dip == NULL)
|
|
return COALESCE_NOINODE;
|
|
|
|
/* Compute file block size, set up for lfs_bmapv */
|
|
onb = nb = btofsb(lfsp, dip->di_size);
|
|
|
|
/* XXX for now, don't do any file small enough to have fragments */
|
|
if (nb < NDADDR)
|
|
return COALESCE_TOOSMALL;
|
|
|
|
/* Sanity checks */
|
|
if (dip->di_size < 0) {
|
|
if (debug)
|
|
syslog(LOG_DEBUG, "ino %d, negative size (%lld)",
|
|
ino, (long long)dip->di_size);
|
|
return COALESCE_BADSIZE;
|
|
}
|
|
if (nb > dip->di_blocks) {
|
|
if (debug)
|
|
syslog(LOG_DEBUG, "ino %d, computed blocks %d > held blocks %d",
|
|
ino, nb, dip->di_blocks);
|
|
return COALESCE_BADBLOCKSIZE;
|
|
}
|
|
|
|
bip = (BLOCK_INFO_15 *)malloc(sizeof(BLOCK_INFO_15) * nb);
|
|
if (bip == NULL) {
|
|
syslog(LOG_WARNING, "ino %d, %d blocks: %m", ino, nb);
|
|
return COALESCE_NOMEM;
|
|
}
|
|
for (i = 0; i < nb; i++) {
|
|
memset(bip + i, 0, sizeof(BLOCK_INFO_15));
|
|
bip[i].bi_inode = ino;
|
|
bip[i].bi_lbn = i;
|
|
bip[i].bi_version = dip->di_gen;
|
|
/* Don't set the size, but let lfs_bmap fill it in */
|
|
}
|
|
if ((error = lfs_bmapv(&fsp->fi_statfsp->f_fsid, bip, nb)) < 0) {
|
|
syslog(LOG_WARNING, "lfs_bmapv: %m");
|
|
free(bip);
|
|
return COALESCE_BADBMAPV;
|
|
}
|
|
noff = toff = 0;
|
|
for (i = 1; i < nb; i++) {
|
|
if (bip[i].bi_daddr != bip[i - 1].bi_daddr + 1)
|
|
++noff;
|
|
toff += abs(bip[i].bi_daddr - bip[i - 1].bi_daddr - 1);
|
|
}
|
|
|
|
/*
|
|
* If this file is not discontinuous, there's no point in rewriting it.
|
|
*
|
|
* Explicitly allow a certain amount of discontinuity, since large
|
|
* files will be broken among segments and medium-sized files
|
|
* can have a break or two and it's okay.
|
|
*/
|
|
if (nb <= 1 || noff == 0 || noff < log2int(nb) ||
|
|
segtod(lfsp, noff) * 2 < nb) {
|
|
free(bip);
|
|
return COALESCE_NOTWORTHIT;
|
|
} else if (debug)
|
|
syslog(LOG_DEBUG, "ino %d total discontinuity "
|
|
"%d (%d) for %d blocks", ino, noff, toff, nb);
|
|
|
|
/* Search for blocks in active segments; don't move them. */
|
|
for (i = 0; i < nb; i++) {
|
|
if (bip[i].bi_daddr <= 0)
|
|
continue;
|
|
sup = SEGUSE_ENTRY(lfsp, fsp->fi_segusep,
|
|
dtosn(lfsp, bip[i].bi_daddr));
|
|
if (sup->su_flags & SEGUSE_ACTIVE)
|
|
bip[i].bi_daddr = LFS_UNUSED_DADDR; /* 0 */
|
|
}
|
|
/*
|
|
* Get rid of any we've marked dead. If this is an older
|
|
* kernel that doesn't have lfs_bmapv fill in the block
|
|
* sizes, we'll toss everything here.
|
|
*/
|
|
toss(bip, &nb, sizeof(BLOCK_INFO_15), tossdead, NULL);
|
|
if (nb && tossdead(NULL, bip + nb - 1, NULL))
|
|
--nb;
|
|
if (nb == 0) {
|
|
free(bip);
|
|
return COALESCE_NOTHINGLEFT;
|
|
}
|
|
|
|
/*
|
|
* We may have tossed enough blocks that it is no longer worthwhile
|
|
* to rewrite this inode.
|
|
*/
|
|
if (onb - nb > log2int(onb)) {
|
|
if (debug)
|
|
syslog(LOG_DEBUG, "too many blocks tossed, not rewriting");
|
|
return COALESCE_NOTHINGLEFT2;
|
|
}
|
|
|
|
/*
|
|
* We are going to rewrite this inode.
|
|
* For any remaining blocks, read in their contents.
|
|
*/
|
|
for (i = 0; i < nb; i++) {
|
|
bip[i].bi_bp = malloc(bip[i].bi_size);
|
|
get_rawblock(fsp, bip[i].bi_bp, bip[i].bi_size, bip[i].bi_daddr);
|
|
}
|
|
if (debug)
|
|
syslog(LOG_DEBUG, "ino %d markv %d blocks", ino, nb);
|
|
|
|
/*
|
|
* Write in segment-sized chunks. If at any point we'd write more
|
|
* than half of the available segments, sleep until that's not
|
|
* true any more.
|
|
*/
|
|
bps = segtod(lfsp, 1);
|
|
for (tbip = bip; tbip < bip + nb; tbip += bps) {
|
|
while (fsp->fi_cip->clean < 4) {
|
|
lfs_segwait(&fsp->fi_statfsp->f_fsid, NULL);
|
|
reread_fs_info(fsp, do_mmap);
|
|
/* XXX start over? */
|
|
}
|
|
lfs_markv(&fsp->fi_statfsp->f_fsid, tbip,
|
|
(tbip + bps < bip + nb ? bps : nb % bps));
|
|
}
|
|
|
|
for (i = 0; i < nb; i++)
|
|
if (bip[i].bi_bp)
|
|
free(bip[i].bi_bp);
|
|
free(bip);
|
|
return COALESCE_OK;
|
|
}
|
|
|
|
/*
|
|
* Try coalescing every inode in the filesystem.
|
|
* Return the number of inodes actually altered.
|
|
*/
|
|
int clean_all_inodes(struct fs_info *fsp)
|
|
{
|
|
int i, r;
|
|
int totals[COALESCE_MAXERROR];
|
|
|
|
memset(totals, 0, sizeof(totals));
|
|
for (i = 0; i < fsp->fi_ifile_count; i++) {
|
|
r = clean_inode(fsp, i);
|
|
++totals[r];
|
|
}
|
|
|
|
for (i = 0; i < COALESCE_MAXERROR; i++)
|
|
if (totals[i])
|
|
syslog(LOG_DEBUG, "%s: %d", coalesce_return[i],
|
|
totals[i]);
|
|
|
|
return totals[COALESCE_OK];
|
|
}
|
|
|
|
int fork_coalesce(struct fs_info *fsp)
|
|
{
|
|
static pid_t childpid;
|
|
int num;
|
|
|
|
reread_fs_info(fsp, do_mmap);
|
|
|
|
if (childpid) {
|
|
if (waitpid(childpid, NULL, WNOHANG) == childpid)
|
|
childpid = 0;
|
|
}
|
|
if (childpid && kill(childpid, 0) >= 0) {
|
|
/* already running a coalesce process */
|
|
if (debug)
|
|
syslog(LOG_DEBUG, "coalescing already in progress");
|
|
return 0;
|
|
}
|
|
childpid = fork();
|
|
if (childpid < 0) {
|
|
syslog(LOG_ERR, "fork: %m");
|
|
return 0;
|
|
} else if (childpid == 0) {
|
|
syslog(LOG_NOTICE, "new coalescing process, pid %d", getpid());
|
|
num = clean_all_inodes(fsp);
|
|
syslog(LOG_NOTICE, "coalesced %d discontiguous inodes", num);
|
|
exit(0);
|
|
}
|
|
return 0;
|
|
}
|