NetBSD/libexec/lfs_cleanerd/cleanerd.c
perseant 2f695b5476 Provide a resize_lfs(8), including kernel and cleaner support. The current
implementation requires the fs to be mounted while resizing.  Tested in both
directions, and everything appears to work happily, but ymmv.
2005-04-23 19:47:51 +00:00

1089 lines
29 KiB
C

/* $NetBSD: cleanerd.c,v 1.54 2005/04/23 19:47:51 perseant Exp $ */
/*-
* Copyright (c) 1992, 1993
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
#ifndef lint
__COPYRIGHT("@(#) Copyright (c) 1992, 1993\n\
The Regents of the University of California. All rights reserved.\n");
#if 0
static char sccsid[] = "@(#)cleanerd.c 8.5 (Berkeley) 6/10/95";
#else
__RCSID("$NetBSD: cleanerd.c,v 1.54 2005/04/23 19:47:51 perseant Exp $");
#endif
#endif /* not lint */
#include <sys/fcntl.h>
#include <sys/param.h>
#include <sys/mount.h>
#include <sys/time.h>
#include <sys/resource.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <sys/mman.h>
#include <ufs/ufs/dinode.h>
#include <ufs/lfs/lfs.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <unistd.h>
#include <util.h>
#include <errno.h>
#include <err.h>
#include <syslog.h>
#include "clean.h"
char *special = "cleanerd";
int do_small = 0;
int do_mmap = 0;
int do_quit = 0;
int stat_report = 0;
int debug = 0;
int segwait_timeout = 5*60; /* Five minutes */
double load_threshold = 0.2;
int use_fs_idle = 0;
struct cleaner_stats {
double util_tot;
double util_sos;
int blocks_read;
int blocks_written;
int segs_cleaned;
int segs_empty;
int segs_error;
} cleaner_stats;
struct seglist {
unsigned long sl_id; /* segment number */
unsigned long sl_cost; /* cleaning cost */
unsigned long sl_bytes; /* bytes in segment */
unsigned long sl_age; /* age in seconds */
};
struct tossstruct {
struct lfs *lfs;
int seg;
};
typedef struct {
int nsegs; /* number of segments */
struct seglist **segs; /* segment numbers, costs, etc */
int nb; /* total number of blocks */
BLOCK_INFO *ba; /* accumulated block_infos */
caddr_t *buf; /* segment buffers */
} SEGS_AND_BLOCKS;
#define CLEAN_BYTES 0x1
/* function prototypes */
int bi_tossold(const void *, const void *, const void *);
int choose_segments(FS_INFO *, struct seglist *,
unsigned long (*)(FS_INFO *, SEGUSE *));
void clean_fs(FS_INFO *, unsigned long (*)(FS_INFO *, SEGUSE *), int, long);
int invalidate_segment(FS_INFO *, int);
int clean_loop(FS_INFO *, int, long);
int add_segment(FS_INFO *, struct seglist *, SEGS_AND_BLOCKS *);
int clean_segments(FS_INFO *, SEGS_AND_BLOCKS *);
unsigned long cost_benefit(FS_INFO *, SEGUSE *);
int cost_compare(const void *, const void *);
void sig_report(int);
void just_exit(int);
int main(int, char *[]);
/*
* Emulate lfs_{bmapv,markv,segwait} using fcntl calls.
* NOTE: the old system calls still use BLOCK_INFO_15,
* while the fcntls use BLOCK_INFO.
*/
int
lfs_markv_emul(int fd, BLOCK_INFO *blkiov, int blkcnt)
{
struct lfs_fcntl_markv /* {
BLOCK_INFO *blkiov;
int blkcnt;
} */ lim;
lim.blkiov = blkiov;
lim.blkcnt = blkcnt;
return fcntl(fd, LFCNMARKV, &lim);
}
int
lfs_bmapv_emul(int fd, BLOCK_INFO *blkiov, int blkcnt)
{
struct lfs_fcntl_markv /* {
BLOCK_INFO *blkiov;
int blkcnt;
} */ lim;
lim.blkiov = blkiov;
lim.blkcnt = blkcnt;
return fcntl(fd, LFCNBMAPV, &lim);
}
int
lfs_segwait_emul(int fd, struct timeval *tv)
{
return fcntl(fd, LFCNSEGWAIT, tv);
}
/*
* Cleaning Cost Functions:
*
* These return the cost of cleaning a segment. The higher the cost value
* the better it is to clean the segment, so empty segments have the highest
* cost. (It is probably better to think of this as a priority value
* instead).
*
* This is the cost-benefit policy simulated and described in Rosenblum's
* 1991 SOSP paper.
*/
unsigned long
cost_benefit(FS_INFO *fsp, SEGUSE *su)
{
struct lfs *lfsp;
struct timeval t;
time_t age;
unsigned long live;
gettimeofday(&t, NULL);
live = su->su_nbytes;
age = t.tv_sec < su->su_lastmod ? 0 : t.tv_sec - su->su_lastmod;
lfsp = &fsp->fi_lfs;
if (live == 0) { /* No cost, only benefit. */
return lblkno(lfsp, seg_size(lfsp)) * t.tv_sec;
} else if (su->su_flags & SEGUSE_ERROR) {
/* No benefit: don't even try */
return 0;
} else {
/*
* from lfsSegUsage.c (Mendel's code).
* priority calculation is done using INTEGER arithmetic.
* sizes are in BLOCKS (that is why we use lblkno below).
* age is in seconds.
*
* priority = ((seg_size - live) * age) / (seg_size + live)
*/
if (live < 0 || live > seg_size(lfsp)) {
syslog(LOG_WARNING,"bad segusage count: %ld", live);
live = 0;
}
return (lblkno(lfsp, seg_size(lfsp) - live) * age)
/ lblkno(lfsp, seg_size(lfsp) + live);
}
}
int
main(int argc, char **argv)
{
FS_INFO *fsp;
struct statvfs *lstatvfsp; /* file system stats */
struct timeval timeout; /* sleep timeout */
fsid_t fsid;
long clean_opts; /* cleaning options */
int inval_segment;
int segs_per_clean;
int opt, cmd_err;
int do_coaleace;
pid_t childpid;
char *fs_name; /* name of filesystem to clean */
time_t now, lasttime;
int loopcount;
char *pidname; /* Name of pid file base */
char *cp;
cmd_err = debug = do_quit = 0;
do_coaleace = clean_opts = 0;
segs_per_clean = 1;
inval_segment = -1;
while ((opt = getopt(argc, argv, "bcdfi:l:mn:qr:st:")) != -1) {
switch (opt) {
case 'b':
/*
* Use live bytes to determine
* how many segs to clean.
*/
clean_opts |= CLEAN_BYTES;
break;
case 'c':
debug++;
do_coaleace++;
break;
case 'd': /* Debug mode. */
debug++;
break;
case 'f':
use_fs_idle = 1;
break;
case 'i':
inval_segment = atoi(optarg);
break;
case 'l': /* Load below which to clean */
load_threshold = atof(optarg);
break;
case 'm':
do_mmap = 1;
break;
case 'n': /* How many segs to clean at once */
segs_per_clean = atoi(optarg);
break;
case 'q': /* Quit after one run */
do_quit = 1;
break;
case 'r': /* Report every stat_report segments */
stat_report = atoi(optarg);
break;
case 's': /* small writes */
do_small = 1;
break;
case 't':
segwait_timeout = atoi(optarg);
break;
default:
++cmd_err;
}
}
argc -= optind;
argv += optind;
if (cmd_err || (argc != 1))
errx(1, "usage: lfs_cleanerd [-bcdfmsq] [-l load] [-n nsegs] [-r report_freq] [-t timeout] fs_name");
fs_name = argv[0];
if (fs_getmntinfo(&lstatvfsp, fs_name, MOUNT_LFS) == 0) {
/* didn't find the filesystem */
errx(1, "lfs_cleanerd: filesystem %s isn't an LFS!", fs_name);
}
/* should we become a daemon, chdir to / & close fd's */
if (debug == 0 && inval_segment < 0) {
if (daemon(0, 0) == -1)
err(1, "lfs_cleanerd: couldn't become a daemon!");
openlog("lfs_cleanerd", LOG_NDELAY | LOG_PID | 0, LOG_DAEMON);
lasttime=0;
loopcount=0;
loop:
if ((childpid=fork()) < 0)
log_exit(0, LOG_ERR,"%s: couldn't fork, exiting: %m",
fs_name);
if(childpid == 0) {
/* Record child's pid */
asprintf(&pidname, "lfs_cleanerd:s:%s", fs_name);
if (!pidname)
log_exit(0, LOG_WARNING,"malloc failed: %m");
while((cp = strchr(pidname, '/')) != NULL)
*cp = '|';
pidfile(pidname);
/* The cleaner wants to stay in core, really */
if (mlockall(MCL_FUTURE) != 0) {
syslog(LOG_WARNING, "mlockall failed: %m");
}
} else {
/* Record parent's pid */
asprintf(&pidname, "lfs_cleanerd:m:%s", fs_name);
if (!pidname)
log_exit(0, LOG_WARNING,"malloc failed: %m");
while((cp = strchr(pidname, '/')) != NULL)
*cp = '|';
pidfile(pidname);
signal(SIGINT, just_exit);
wait(NULL);
/* If the child is looping, give up */
++loopcount;
if((now=time(NULL)) - lasttime > TIME_THRESHOLD) {
loopcount=0;
}
lasttime = now;
if (loopcount > LOOP_THRESHOLD)
log_exit(0, LOG_ERR,
"%s: cleanerd looping, exiting",
fs_name);
/* Check for unmounted fs */
if (fs_getmntinfo(&lstatvfsp, fs_name, MOUNT_LFS) == 0)
log_exit(0, LOG_NOTICE,
"lfs_cleanerd: %s unmounted, exiting",
fs_name);
goto loop;
}
} else
openlog("lfs_cleanerd", LOG_NDELAY | LOG_PID | LOG_PERROR,
LOG_DAEMON);
signal(SIGINT, sig_report);
signal(SIGUSR1, sig_report);
signal(SIGUSR2, sig_report);
if(debug)
syslog(LOG_INFO,"Cleaner starting on filesystem %s", fs_name);
timeout.tv_sec = segwait_timeout;
timeout.tv_usec = 0;
(void)memset(&fsid, 0, sizeof(fsid));
for (fsp = get_fs_info(lstatvfsp, do_mmap); ;
reread_fs_info(fsp, do_mmap)) {
/*
* If the user specified '-F', he doesn't want us
* to do regular cleaning, only coalesce.
*/
if (do_coaleace) {
clean_all_inodes(fsp);
exit(0);
}
/* Likewise for invalidation */
if (inval_segment >= 0) {
invalidate_segment(fsp, inval_segment);
exit(0);
}
/*
* clean the filesystem, and, if it needed cleaning
* (i.e. it returned nonzero) try it again
* to make sure that some nasty process hasn't just
* filled the disk system up.
*/
if (clean_loop(fsp, segs_per_clean, clean_opts))
continue;
fsid = lstatvfsp->f_fsidx;
if(debug > 1)
syslog(LOG_DEBUG,"Cleaner going to sleep.");
if (lfs_segwait_emul(ifile_fd, &timeout) < 0) {
if (errno == ESHUTDOWN) /* FS unmounted */
exit(0);
syslog(LOG_WARNING,"LFCNSEGWAIT: %m");
}
if(debug > 1)
syslog(LOG_DEBUG,"Cleaner waking up.");
}
}
int
invalidate_segment(FS_INFO *fsp, int sn)
{
struct seglist sl;
SEGS_AND_BLOCKS sb;
/* Make the kernel back up the the beginning, and sync */
fcntl(ifile_fd, LFCNREWIND, &sn);
fsync(ifile_fd);
/* Clean the segment */
memset(&sb, 0, sizeof(SEGS_AND_BLOCKS));
sl.sl_id = sn;
if (add_segment(fsp, &sl, &sb) < 0)
err(1, "add_segment %d", sn);
clean_segments(fsp, &sb);
/* Now do the actual invalidation */
if (fcntl(ifile_fd, LFCNINVAL, &sn) < 0)
err(1, "LFCNINVAL segment %d", sn);
return 0;
}
/* return the number of segments cleaned */
int
clean_loop(FS_INFO *fsp, int nsegs, long options)
{
struct lfs *lfsp;
double loadavg[MAXLOADS];
time_t now;
u_long max_free_segs;
u_long fsb_per_seg;
lfsp = &fsp->fi_lfs;
/*
* Compute the maximum possible number of free segments, given the
* number of free blocks.
*/
fsb_per_seg = segtod(lfsp, 1);
max_free_segs = MAX(fsp->fi_cip->bfree, 0) / fsb_per_seg +
lfsp->lfs_minfreeseg;
/*
* We will clean if there are not enough free blocks or total clean
* space is less than BUSY_LIM % of possible clean space.
*/
now = time((time_t *)NULL);
if(debug > 1) {
syslog(LOG_DEBUG, "fsb_per_seg = %lu bfree = %d avail = %d",
fsb_per_seg, fsp->fi_cip->bfree, fsp->fi_cip->avail);
syslog(LOG_DEBUG, "clean segs = %d, max_free_segs = %ld",
fsp->fi_cip->clean, max_free_segs);
}
if ((fsp->fi_cip->bfree - fsp->fi_cip->avail > fsb_per_seg &&
fsp->fi_cip->avail < (long)fsb_per_seg &&
fsp->fi_cip->bfree > (long)fsb_per_seg) ||
(fsp->fi_cip->clean < max_free_segs &&
(fsp->fi_cip->clean <= lfsp->lfs_minfreeseg ||
fsp->fi_cip->clean < max_free_segs * BUSY_LIM)))
{
if(debug)
syslog(LOG_DEBUG, "Cleaner Running at %s"
" (%d of %lu segments available, avail = %d,"
" bfree = %d)",
ctime(&now), fsp->fi_cip->clean, max_free_segs,
fsp->fi_cip->avail, fsp->fi_cip->bfree);
clean_fs(fsp, cost_benefit, nsegs, options);
if(do_quit) {
if(debug)
syslog(LOG_INFO,"Cleaner shutting down");
exit(0);
}
return (1);
} else if(use_fs_idle) {
/*
* If we're using "filesystem idle" instead of system idle,
* clean if the fs has not been modified in segwait_timeout
* seconds.
*/
if(now-fsp->fi_fs_tstamp > segwait_timeout
&& fsp->fi_cip->clean < max_free_segs * IDLE_LIM) {
if(debug) {
syslog(LOG_DEBUG, "Cleaner Running at %s: "
"fs idle time %ld sec; %d of %lu segments available)",
ctime(&now), (long)now-fsp->fi_fs_tstamp,
fsp->fi_cip->clean, max_free_segs);
syslog(LOG_DEBUG, " filesystem idle since %s", ctime(&(fsp->fi_fs_tstamp)));
}
clean_fs(fsp, cost_benefit, nsegs, options);
if(do_quit) {
if(debug)
syslog(LOG_INFO,"Cleaner shutting down");
exit(0);
}
return (1);
}
} else {
/*
* We will also clean if the system is reasonably idle and
* the total clean space is less than IDLE_LIM % of possible
* clean space.
*/
if (getloadavg(loadavg, MAXLOADS) == -1) {
perror("getloadavg: failed");
return (-1);
}
if (loadavg[ONE_MIN] < load_threshold
&& fsp->fi_cip->clean < max_free_segs * IDLE_LIM)
{
if (debug)
syslog(LOG_DEBUG, "Cleaner Running at %s "
"(system load %.1f, %d of %lu segments available)",
ctime(&now), loadavg[ONE_MIN],
fsp->fi_cip->clean, max_free_segs);
clean_fs(fsp, cost_benefit, nsegs, options);
if (do_quit) {
if(debug)
syslog(LOG_INFO,"Cleaner shutting down");
exit(0);
}
return (1);
}
}
if (debug > 1) {
if (fsp->fi_cip->bfree - fsp->fi_cip->avail <= fsb_per_seg)
syslog(LOG_DEBUG, "condition 1 false");
if (fsp->fi_cip->avail >= (long)fsb_per_seg)
syslog(LOG_DEBUG, "condition 2 false");
if (fsp->fi_cip->clean >= max_free_segs)
syslog(LOG_DEBUG, "condition 3 false");
if (fsp->fi_cip->clean > lfsp->lfs_minfreeseg)
syslog(LOG_DEBUG, "condition 4 false");
if (fsp->fi_cip->clean >= max_free_segs * BUSY_LIM)
syslog(LOG_DEBUG, "condition 5 false");
syslog(LOG_DEBUG, "Cleaner Not Running at %s", ctime(&now));
}
return (0);
}
void
clean_fs(FS_INFO *fsp, unsigned long (*cost_func)(FS_INFO *, SEGUSE *),
int nsegs, long options)
{
struct seglist *segs, *sp;
long int to_clean, cleaned_bytes;
unsigned long i, j, total;
struct rusage ru;
fsid_t *fsidp;
int error;
SEGS_AND_BLOCKS *sbp;
fsidp = &fsp->fi_statvfsp->f_fsidx;
if ((segs =
malloc(fsp->fi_lfs.lfs_nseg * sizeof(struct seglist))) == NULL) {
syslog(LOG_WARNING,"malloc failed: %m");
return;
}
total = i = choose_segments(fsp, segs, cost_func);
/*
* If we can get lots of cleaning for free, do it now.
* The theory is that if empty dirty segments exist, we
* can afford to write a bunch of inodes, directory blocks,
* and Ifile blocks in order to clean them. (If things are
* so bad that we don't have enough segments to do this, though,
* we're in trouble.)
*/
sp = segs;
for (j = 0; j < total && sp->sl_bytes == 0; j++) {
if (debug)
syslog(LOG_DEBUG,"Not cleaning empty segment %ld",
sp->sl_id);
++cleaner_stats.segs_empty;
sp++;
i--;
}
if (j > 0) {
/* Call limited checkpoint to help clean empty segs */
fcntl(ifile_fd, LFCNRECLAIM, 0);
}
if (j > nsegs) {
free(segs);
return;
}
#if 0
/* If we relly need to clean a lot, do it now */
if(fsp->fi_cip->clean < 2 * fsp->fi_lfs.lfs_minfreeseg)
nsegs = MAX(nsegs, fsp->fi_lfs.lfs_minfreeseg);
#endif
/* But back down if we haven't got that many free to clean into */
if (nsegs > fsp->fi_cip->clean / 2)
nsegs = MAX(1, fsp->fi_cip->clean / 2);
if (debug > 1)
syslog(LOG_DEBUG, "clean_fs: found %ld segments to clean in %s",
i, fsp->fi_statvfsp->f_mntonname);
if (i) {
sbp = (SEGS_AND_BLOCKS *)malloc(sizeof(SEGS_AND_BLOCKS));
memset(sbp, 0, sizeof(SEGS_AND_BLOCKS));
/* Check which cleaning algorithm to use. */
if (options & CLEAN_BYTES) {
/*
* Count bytes. Be careful here not to run us out
* of segments to write into. We try to stay below
* our given number of segments minus 1/4 segment,
* unless we can't clean anything without going over
* the limit (e.g. nsegs == 1 and the preferred
* segment to clean is 80% full).
*/
cleaned_bytes = 0;
to_clean = nsegs * fsp->fi_lfs.lfs_ssize -
fsp->fi_lfs.lfs_ssize / 4;
for (; i && (cleaned_bytes == 0 ||
cleaned_bytes + sp->sl_bytes <= to_clean);
i--, ++sp) {
if (add_segment(fsp, sp, sbp) < 0) {
syslog(LOG_WARNING,"add_segment failed"
" segment %ld: %m", sp->sl_id);
if (sbp->nsegs == 0 && errno != ENOENT)
continue;
else
break;
}
cleaned_bytes += sp->sl_bytes;
}
} else {
/* Count segments */
for (i = MIN(i, nsegs); i-- ; ++sp) {
total--;
syslog(LOG_DEBUG, "Cleaning segment %ld"
" (of %ld choices)", sp->sl_id, i + 1);
if (add_segment(fsp, sp, sbp) != 0) {
syslog(LOG_WARNING,"add_segment failed"
" segment %ld: %m", sp->sl_id);
if (sbp->nsegs == 0 && errno != ENOENT)
continue;
else
break;
}
}
}
clean_segments(fsp, sbp);
if (sbp->buf)
free(sbp->buf);
if (sbp->segs)
free(sbp->segs);
free(sbp);
}
free(segs);
if(debug) {
error = getrusage(RUSAGE_SELF, &ru);
if(error) {
syslog(LOG_WARNING, "getrusage returned error: %m");
} else {
syslog(LOG_DEBUG, "Current usage: maxrss=%ld,"
" idrss=%ld, isrss=%ld", ru.ru_maxrss,
ru.ru_idrss, ru.ru_isrss);
}
}
}
/*
* Segment with the highest priority get sorted to the beginning of the
* list. This sort assumes that empty segments always have a higher
* cost/benefit than any used segment.
*/
int
cost_compare(const void *a, const void *b)
{
return ((struct seglist *)b)->sl_cost < ((struct seglist *)a)->sl_cost ? -1 : 1;
}
/*
* Returns the number of segments to be cleaned with the elements of seglist
* filled in.
*/
int
choose_segments(FS_INFO *fsp, struct seglist *seglist, unsigned long (*cost_func)(FS_INFO *, SEGUSE *))
{
struct lfs *lfsp;
struct seglist *sp;
SEGUSE *sup;
int i, nsegs;
lfsp = &fsp->fi_lfs;
if (debug > 1)
syslog(LOG_DEBUG,"Entering choose_segments");
dump_super(lfsp);
dump_cleaner_info(fsp->fi_cip);
for (sp = seglist, i = 0; i < lfsp->lfs_nseg; ++i) {
if (debug > 1) {
printf("%d...", i);
fflush(stdout);
}
sup = SEGUSE_ENTRY(lfsp, fsp->fi_segusep, i);
if (debug > 2)
PRINT_SEGUSE(sup, i);
if (!(sup->su_flags & SEGUSE_DIRTY) ||
sup->su_flags & SEGUSE_ACTIVE)
continue;
if (debug > 2)
syslog(LOG_DEBUG, "\tchoosing segment %d", i);
sp->sl_cost = (*cost_func)(fsp, sup);
sp->sl_id = i;
sp->sl_bytes = sup->su_nbytes;
sp->sl_age = time(NULL) - sup->su_lastmod;
++sp;
}
nsegs = sp - seglist;
if (debug > 1) {
putchar('\n');
syslog(LOG_DEBUG, "Sorting...");
}
qsort(seglist, nsegs, sizeof(struct seglist), cost_compare);
if (debug > 2)
for(i = 0; i < nsegs; i++) {
syslog(LOG_DEBUG, "%d: segment %lu age %lu"
" contains %lu priority %lu\n", i,
seglist[i].sl_age, seglist[i].sl_id,
seglist[i].sl_bytes, seglist[i].sl_cost);
}
if (debug > 1)
syslog(LOG_DEBUG,"Returning %d segments", nsegs);
return (nsegs);
}
/*
* Add still-valid blocks from the given segment to the block array,
* in preparation for sending through markv.
*/
int
add_segment(FS_INFO *fsp, struct seglist *slp, SEGS_AND_BLOCKS *sbp)
{
int id = slp->sl_id;
BLOCK_INFO *tba, *_bip;
SEGUSE *sp;
struct lfs *lfsp;
struct tossstruct t;
struct ufs1_dinode *dip;
caddr_t seg_buf;
caddr_t cmp_buf, cmp_dp;
size_t size;
daddr_t seg_addr;
int num_blocks, i, j, error;
int seg_isempty=0;
unsigned long *lp;
struct seglist **nsegs;
caddr_t *nbuf;
BLOCK_INFO *nba;
lfsp = &fsp->fi_lfs;
sp = SEGUSE_ENTRY(lfsp, fsp->fi_segusep, id);
seg_addr = sntod(lfsp,id);
error = 0;
tba = NULL;
syslog(LOG_DEBUG, "adding segment %d: contains %lu bytes", id,
(unsigned long)sp->su_nbytes);
/* XXX could add debugging to verify that segment is really empty */
if (sp->su_nbytes == 0) {
++cleaner_stats.segs_empty;
++seg_isempty;
}
/* Add a new segment to the accumulated list */
nsegs = (struct seglist **)realloc(sbp->segs,
sizeof(struct seglist *) * (sbp->nsegs + 1));
nbuf = (caddr_t *)realloc(sbp->buf, sizeof(caddr_t) * (sbp->nsegs + 1));
if (!nsegs || !nbuf)
err(1, "realloc");
sbp->segs = nsegs;
sbp->buf = nbuf;
sbp->nsegs++;
sbp->segs[sbp->nsegs - 1] = slp;
/* map the segment into a buffer */
if (mmap_segment(fsp, id, &seg_buf, do_mmap) < 0) {
syslog(LOG_WARNING,"add_segment: mmap_segment failed: %m");
++cleaner_stats.segs_error;
--sbp->nsegs;
return (-1);
}
sbp->buf[sbp->nsegs - 1] = seg_buf;
/* get a list of blocks that are contained by the segment */
if ((error = lfs_segmapv(fsp, id, seg_buf, &tba, &num_blocks)) < 0) {
syslog(LOG_WARNING,
"add_segment: lfs_segmapv failed for segment %d", id);
goto out;
}
cleaner_stats.blocks_read += segtod(lfsp, 1);
if (debug > 1)
syslog(LOG_DEBUG, "lfs_segmapv returned %d blocks", num_blocks);
/* get the current disk address of blocks contained by the segment */
if ((error = lfs_bmapv_emul(ifile_fd, tba, num_blocks)) < 0) {
if (errno != ESHUTDOWN)
syslog(LOG_WARNING, "add_segment: LFCNBMAPV failed");
goto out;
}
/* Now toss any blocks not in the current segment */
t.lfs = lfsp;
t.seg = id;
toss(tba, &num_blocks, sizeof(BLOCK_INFO), bi_tossold, &t);
/* Check if last element should be tossed */
if (num_blocks && bi_tossold(&t, tba + num_blocks - 1, NULL))
--num_blocks;
if(seg_isempty) {
if(num_blocks)
syslog(LOG_WARNING,"segment %d was supposed to be empty, but has %d live blocks!", id, num_blocks);
else
syslog(LOG_DEBUG,"segment %d is empty, as claimed", id);
}
/* XXX KS - check for misplaced blocks */
/* XXX ondisk32 */
for(i=0; i<num_blocks; i++) {
if(tba[i].bi_daddr
&& tba[i].bi_daddr != seg_addr +
btofsb(lfsp, (char *)(tba[i].bi_bp) - seg_buf)
&& dtosn(&(fsp->fi_lfs), tba[i].bi_daddr) == id)
{
syslog(LOG_ERR, "bi_daddr = 0x%x = %lldb; %p - %p = %ld",
tba[i].bi_daddr,
(long long)fsbtob(lfsp, tba[i].bi_daddr - seg_addr),
tba[i].bi_bp, seg_buf,
(long)(((char *)(tba[i].bi_bp) - seg_buf)));
syslog(LOG_ERR, "seg %d (0x%llx), ino %d lbn %d, 0x%x != 0x%llx",
id, (long long)seg_addr,
tba[i].bi_inode,
tba[i].bi_lbn,
tba[i].bi_daddr,
(long long)(seg_addr +
btofsb(lfsp, (char *)(tba[i].bi_bp) - seg_buf)));
error = EFAULT;
goto out;
/*
* XXX KS - have to be careful here about Inodes;
* if bmapv shows them somewhere else in the
* segment from where we thought, we need to reload
* the *right* inode, not the first one in the block.
*/
/* XXX ondisk32 */
if(tba[i].bi_lbn == LFS_UNUSED_LBN) {
dip = (struct ufs1_dinode *)(seg_buf + fsbtob(lfsp, tba[i].bi_daddr - seg_addr));
for(j=INOPB(lfsp)-1;j>=0;j--) {
if(dip[j].di_u.inumber == tba[i].bi_inode) {
tba[i].bi_bp = (char *)(dip+j);
break;
}
}
if(j<0) {
syslog(LOG_ERR, "lost inode %d in the shuffle! (blk %d)",
tba[i].bi_inode, tba[i].bi_daddr);
if (debug) {
syslog(LOG_DEBUG,
"inode numbers found were:");
for(j=INOPB(lfsp)-1;j>=0;j--) {
syslog(LOG_DEBUG, "%d",
dip[j].di_u.inumber);
}
}
errx(1, "lost inode");
} else if (debug > 1) {
syslog(LOG_DEBUG,"Ino %d corrected to 0x%x",
tba[i].bi_inode,
tba[i].bi_daddr);
}
} else {
tba[i].bi_bp = seg_buf + fsbtob(lfsp, tba[i].bi_daddr - seg_addr);
}
}
}
/* Update live bytes calc - XXX KS */
slp->sl_bytes = 0;
for(i=0; i<num_blocks; i++)
if(tba[i].bi_lbn == LFS_UNUSED_LBN)
slp->sl_bytes += sizeof(struct ufs1_dinode);
else
slp->sl_bytes += tba[i].bi_size;
if(debug > 1) {
syslog(LOG_DEBUG, "after bmapv still have %d blocks",
num_blocks);
if (num_blocks)
syslog(LOG_DEBUG, "BLOCK INFOS");
for (_bip = tba, i=0; i < num_blocks; ++_bip, ++i) {
PRINT_BINFO(_bip);
/* XXX ondisk32? */
lp = (u_long *)_bip->bi_bp;
}
}
/* Compress segment buffer, if necessary */
if (!do_mmap && slp->sl_bytes < seg_size(lfsp) / 2) {
if (debug > 1)
syslog(LOG_DEBUG, "compressing: %d < %d",
(int)slp->sl_bytes, seg_size(lfsp) / 2);
cmp_buf = malloc(slp->sl_bytes); /* XXX could do in-place */
if (cmp_buf == NULL) {
if (debug)
syslog(LOG_DEBUG, "can't compress segment: %m");
} else {
cmp_dp = cmp_buf;
for (i = 0; i < num_blocks; i++) {
if(tba[i].bi_lbn == LFS_UNUSED_LBN)
size = sizeof(struct ufs1_dinode);
else
size = tba[i].bi_size;
memcpy(cmp_dp, tba[i].bi_bp, size);
tba[i].bi_bp = cmp_dp;
cmp_dp += size;
}
free(seg_buf);
seg_buf = cmp_buf;
sbp->buf[sbp->nsegs - 1] = seg_buf;
}
}
/* Add these blocks to the accumulated list */
nba = realloc(sbp->ba, (sbp->nb + num_blocks) * sizeof(BLOCK_INFO));
if (!nba)
err(1, "realloc");
sbp->ba = nba;
memcpy(sbp->ba + sbp->nb, tba, num_blocks * sizeof(BLOCK_INFO));
sbp->nb += num_blocks;
free(tba);
return (0);
out:
--sbp->nsegs;
if (tba)
free(tba);
if (error) {
sp->su_flags |= SEGUSE_ERROR;
++cleaner_stats.segs_error;
}
munmap_segment(fsp, sbp->buf[sbp->nsegs], do_mmap);
if (stat_report && cleaner_stats.segs_cleaned % stat_report == 0)
sig_report(SIGUSR1);
return (error);
}
/* Call markv and clean up */
int
clean_segments(FS_INFO *fsp, SEGS_AND_BLOCKS *sbp)
{
int maxblocks, clean_blocks, icount, extra, ebytes, nbytes;
BLOCK_INFO *bp;
int i, error;
double util;
ino_t ino, inino;
error = 0;
cleaner_stats.segs_cleaned += sbp->nsegs;
cleaner_stats.blocks_written += sbp->nb;
/*
* Count up the number of indirect blocks and inodes we'll
* have to write to take care of this (if we are asked to do this).
* XXX this only cares about single indirect blocks.
*/
icount = 0;
ino = inino = 0;
extra = 0;
nbytes = 0;
for (i = sbp->nb, bp = sbp->ba; i > 0; bp++, i--) {
if (bp->bi_lbn != LFS_UNUSED_LBN)
nbytes += bp->bi_size;
if (ino != bp->bi_inode) {
ino = bp->bi_inode;
++icount;
}
if (bp->bi_lbn == -NDADDR)
inino = ino;
if (inino != ino && bp->bi_lbn > 0 && bp->bi_lbn > NDADDR) {
++extra;
inino = ino;
}
}
ebytes = 0 + INOPB(&fsp->fi_lfs) * fsp->fi_lfs.lfs_ibsize ;
ebytes += extra * fsp->fi_lfs.lfs_bsize;
if (debug) {
fprintf(stderr, "clean_segment: %d inodes %d indirect -> %d bytes + %d = %d total (to save %d)\n",
icount, extra, ebytes, nbytes, ebytes + nbytes,
fsp->fi_lfs.lfs_fsize * fsp->fi_lfs.lfs_fsbpseg * sbp->nsegs);
}
/* If we're writing more than we're saving, try coalescing */
if (ebytes + nbytes > fsp->fi_lfs.lfs_fsize * fsp->fi_lfs.lfs_fsbpseg * sbp->nsegs) {
fork_coalesce(fsp);
}
util = ((double)sbp->nb / segtod(&fsp->fi_lfs, 1));
cleaner_stats.util_tot += util;
cleaner_stats.util_sos += util * util;
if (do_small)
maxblocks = MAXPHYS / fsp->fi_lfs.lfs_bsize - 1;
else
maxblocks = sbp->nb;
for (bp = sbp->ba; sbp->nb > 0; bp += clean_blocks) {
clean_blocks = maxblocks < sbp->nb ? maxblocks : sbp->nb;
if ((error = lfs_markv_emul(ifile_fd, bp, clean_blocks)) < 0) {
if (errno != ESHUTDOWN)
syslog(LOG_WARNING,"clean_segment: LFCNMARKV failed: %m");
++cleaner_stats.segs_error;
if (errno == ENOENT) break;
}
sbp->nb -= clean_blocks;
}
/* Clean up */
if (sbp->ba)
free(sbp->ba);
if (error)
++cleaner_stats.segs_error;
for (i = 0; i < sbp->nsegs; i++)
munmap_segment(fsp, sbp->buf[i], do_mmap);
if (stat_report && cleaner_stats.segs_cleaned % stat_report == 0)
sig_report(SIGUSR1);
return (error);
}
int
bi_tossold(const void *client, const void *a, const void *b)
{
const struct tossstruct *t;
t = (struct tossstruct *)client;
return (((BLOCK_INFO *)a)->bi_daddr == LFS_UNUSED_DADDR ||
dtosn(t->lfs, ((BLOCK_INFO *)a)->bi_daddr) != t->seg);
}
void
sig_report(int sig)
{
double avg = 0.0;
syslog(LOG_INFO, "lfs_cleanerd:\t%s%d\n\t\t%s%d\n\t\t%s%d\n\t\t%s%d\n\t\t%s%d",
"blocks_read ", cleaner_stats.blocks_read,
"blocks_written ", cleaner_stats.blocks_written,
"segs_cleaned ", cleaner_stats.segs_cleaned,
"segs_empty ", cleaner_stats.segs_empty,
"seg_error ", cleaner_stats.segs_error);
syslog(LOG_INFO, "\t\t%s%5.2f\n\t\t%s%5.2f",
"util_tot ", cleaner_stats.util_tot,
"util_sos ", cleaner_stats.util_sos);
avg = cleaner_stats.util_tot / MAX(cleaner_stats.segs_cleaned, 1.0);
syslog(LOG_INFO, "\t\tavg util: %4.2f std dev: %9.6f", avg,
cleaner_stats.util_sos / MAX(cleaner_stats.segs_cleaned - avg * avg, 1.0));
if (sig == SIGUSR2) {
cleaner_stats.blocks_read = 0;
cleaner_stats.blocks_written = 0;
cleaner_stats.segs_cleaned = 0;
cleaner_stats.segs_empty = 0;
cleaner_stats.segs_error = 0;
cleaner_stats.util_tot = 0.0;
cleaner_stats.util_sos = 0.0;
}
if (sig == SIGINT)
exit(0);
}
void
just_exit(int sig)
{
exit(0);
}