5646727e5a
thinks it needs to clean and segments are tantalizingly lingering in the "empty but dirty" state.
1032 lines
28 KiB
C
1032 lines
28 KiB
C
/* $NetBSD: cleanerd.c,v 1.46 2003/03/02 04:38:20 perseant Exp $ */
|
|
|
|
/*-
|
|
* Copyright (c) 1992, 1993
|
|
* The Regents of the University of California. All rights reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
* 3. All advertising materials mentioning features or use of this software
|
|
* must display the following acknowledgement:
|
|
* This product includes software developed by the University of
|
|
* California, Berkeley and its contributors.
|
|
* 4. Neither the name of the University nor the names of its contributors
|
|
* may be used to endorse or promote products derived from this software
|
|
* without specific prior written permission.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
* SUCH DAMAGE.
|
|
*/
|
|
|
|
#include <sys/cdefs.h>
|
|
#ifndef lint
|
|
__COPYRIGHT("@(#) Copyright (c) 1992, 1993\n\
|
|
The Regents of the University of California. All rights reserved.\n");
|
|
#if 0
|
|
static char sccsid[] = "@(#)cleanerd.c 8.5 (Berkeley) 6/10/95";
|
|
#else
|
|
__RCSID("$NetBSD: cleanerd.c,v 1.46 2003/03/02 04:38:20 perseant Exp $");
|
|
#endif
|
|
#endif /* not lint */
|
|
|
|
#include <sys/fcntl.h>
|
|
#include <sys/param.h>
|
|
#include <sys/mount.h>
|
|
#include <sys/time.h>
|
|
#include <sys/resource.h>
|
|
#include <sys/types.h>
|
|
#include <sys/wait.h>
|
|
#include <sys/mman.h>
|
|
|
|
#include <ufs/ufs/dinode.h>
|
|
#include <ufs/lfs/lfs.h>
|
|
|
|
#include <signal.h>
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <time.h>
|
|
#include <unistd.h>
|
|
#include <util.h>
|
|
#include <errno.h>
|
|
#include <err.h>
|
|
|
|
#include <syslog.h>
|
|
|
|
#include "clean.h"
|
|
char *special = "cleanerd";
|
|
int do_small = 0;
|
|
int do_mmap = 0;
|
|
int do_quit = 0;
|
|
int stat_report = 0;
|
|
int debug = 0;
|
|
int segwait_timeout = 5*60; /* Five minutes */
|
|
double load_threshold = 0.2;
|
|
int use_fs_idle = 0;
|
|
struct cleaner_stats {
|
|
double util_tot;
|
|
double util_sos;
|
|
int blocks_read;
|
|
int blocks_written;
|
|
int segs_cleaned;
|
|
int segs_empty;
|
|
int segs_error;
|
|
} cleaner_stats;
|
|
|
|
struct seglist {
|
|
unsigned long sl_id; /* segment number */
|
|
unsigned long sl_cost; /* cleaning cost */
|
|
unsigned long sl_bytes; /* bytes in segment */
|
|
unsigned long sl_age; /* age in seconds */
|
|
};
|
|
|
|
struct tossstruct {
|
|
struct lfs *lfs;
|
|
int seg;
|
|
};
|
|
|
|
typedef struct {
|
|
int nsegs; /* number of segments */
|
|
struct seglist **segs; /* segment numbers, costs, etc */
|
|
int nb; /* total number of blocks */
|
|
BLOCK_INFO *ba; /* accumulated block_infos */
|
|
caddr_t *buf; /* segment buffers */
|
|
} SEGS_AND_BLOCKS;
|
|
|
|
#define CLEAN_BYTES 0x1
|
|
|
|
/* function prototypes */
|
|
int bi_tossold(const void *, const void *, const void *);
|
|
int choose_segments(FS_INFO *, struct seglist *,
|
|
unsigned long (*)(FS_INFO *, SEGUSE *));
|
|
void clean_fs(FS_INFO *, unsigned long (*)(FS_INFO *, SEGUSE *), int, long);
|
|
int clean_loop(FS_INFO *, int, long);
|
|
int add_segment(FS_INFO *, struct seglist *, SEGS_AND_BLOCKS *);
|
|
int clean_segments(FS_INFO *, SEGS_AND_BLOCKS *);
|
|
unsigned long cost_benefit(FS_INFO *, SEGUSE *);
|
|
int cost_compare(const void *, const void *);
|
|
void sig_report(int);
|
|
void just_exit(int);
|
|
int main(int, char *[]);
|
|
|
|
/*
|
|
* Emulate lfs_{bmapv,markv,segwait} using fcntl calls.
|
|
* NOTE: the old system calls still use BLOCK_INFO_15,
|
|
* while the fcntls use BLOCK_INFO.
|
|
*/
|
|
int
|
|
lfs_markv_emul(int fd, BLOCK_INFO *blkiov, int blkcnt)
|
|
{
|
|
struct lfs_fcntl_markv /* {
|
|
BLOCK_INFO *blkiov;
|
|
int blkcnt;
|
|
} */ lim;
|
|
|
|
lim.blkiov = blkiov;
|
|
lim.blkcnt = blkcnt;
|
|
return fcntl(fd, LFCNMARKV, &lim);
|
|
}
|
|
|
|
int
|
|
lfs_bmapv_emul(int fd, BLOCK_INFO *blkiov, int blkcnt)
|
|
{
|
|
struct lfs_fcntl_markv /* {
|
|
BLOCK_INFO *blkiov;
|
|
int blkcnt;
|
|
} */ lim;
|
|
|
|
lim.blkiov = blkiov;
|
|
lim.blkcnt = blkcnt;
|
|
return fcntl(fd, LFCNBMAPV, &lim);
|
|
}
|
|
|
|
int
|
|
lfs_segwait_emul(int fd, struct timeval *tv)
|
|
{
|
|
return fcntl(fd, LFCNSEGWAIT, tv);
|
|
}
|
|
|
|
/*
|
|
* Cleaning Cost Functions:
|
|
*
|
|
* These return the cost of cleaning a segment. The higher the cost value
|
|
* the better it is to clean the segment, so empty segments have the highest
|
|
* cost. (It is probably better to think of this as a priority value
|
|
* instead).
|
|
*
|
|
* This is the cost-benefit policy simulated and described in Rosenblum's
|
|
* 1991 SOSP paper.
|
|
*/
|
|
|
|
unsigned long
|
|
cost_benefit(FS_INFO *fsp, SEGUSE *su)
|
|
{
|
|
struct lfs *lfsp;
|
|
struct timeval t;
|
|
time_t age;
|
|
unsigned long live;
|
|
|
|
gettimeofday(&t, NULL);
|
|
|
|
live = su->su_nbytes;
|
|
age = t.tv_sec < su->su_lastmod ? 0 : t.tv_sec - su->su_lastmod;
|
|
|
|
lfsp = &fsp->fi_lfs;
|
|
if (live == 0) { /* No cost, only benefit. */
|
|
return lblkno(lfsp, seg_size(lfsp)) * t.tv_sec;
|
|
} else if (su->su_flags & SEGUSE_ERROR) {
|
|
/* No benefit: don't even try */
|
|
return 0;
|
|
} else {
|
|
/*
|
|
* from lfsSegUsage.c (Mendel's code).
|
|
* priority calculation is done using INTEGER arithmetic.
|
|
* sizes are in BLOCKS (that is why we use lblkno below).
|
|
* age is in seconds.
|
|
*
|
|
* priority = ((seg_size - live) * age) / (seg_size + live)
|
|
*/
|
|
if (live < 0 || live > seg_size(lfsp)) {
|
|
syslog(LOG_WARNING,"bad segusage count: %ld", live);
|
|
live = 0;
|
|
}
|
|
return (lblkno(lfsp, seg_size(lfsp) - live) * age)
|
|
/ lblkno(lfsp, seg_size(lfsp) + live);
|
|
}
|
|
}
|
|
|
|
int
|
|
main(int argc, char **argv)
|
|
{
|
|
FS_INFO *fsp;
|
|
struct statfs *lstatfsp; /* file system stats */
|
|
struct timeval timeout; /* sleep timeout */
|
|
fsid_t fsid;
|
|
long clean_opts; /* cleaning options */
|
|
int segs_per_clean;
|
|
int opt, cmd_err;
|
|
int do_coaleace;
|
|
pid_t childpid;
|
|
char *fs_name; /* name of filesystem to clean */
|
|
time_t now, lasttime;
|
|
int loopcount;
|
|
char *pidname; /* Name of pid file base */
|
|
char *cp;
|
|
|
|
cmd_err = debug = do_quit = 0;
|
|
do_coaleace = clean_opts = 0;
|
|
segs_per_clean = 1;
|
|
while ((opt = getopt(argc, argv, "bcdfl:mn:qr:st:")) != -1) {
|
|
switch (opt) {
|
|
case 'b':
|
|
/*
|
|
* Use live bytes to determine
|
|
* how many segs to clean.
|
|
*/
|
|
clean_opts |= CLEAN_BYTES;
|
|
break;
|
|
case 'c':
|
|
debug++;
|
|
do_coaleace++;
|
|
break;
|
|
case 'd': /* Debug mode. */
|
|
debug++;
|
|
break;
|
|
case 'f':
|
|
use_fs_idle = 1;
|
|
break;
|
|
case 'l': /* Load below which to clean */
|
|
load_threshold = atof(optarg);
|
|
break;
|
|
case 'm':
|
|
do_mmap = 1;
|
|
break;
|
|
case 'n': /* How many segs to clean at once */
|
|
segs_per_clean = atoi(optarg);
|
|
break;
|
|
case 'q': /* Quit after one run */
|
|
do_quit = 1;
|
|
break;
|
|
case 'r': /* Report every stat_report segments */
|
|
stat_report = atoi(optarg);
|
|
break;
|
|
case 's': /* small writes */
|
|
do_small = 1;
|
|
break;
|
|
case 't':
|
|
segwait_timeout = atoi(optarg);
|
|
break;
|
|
default:
|
|
++cmd_err;
|
|
}
|
|
}
|
|
argc -= optind;
|
|
argv += optind;
|
|
if (cmd_err || (argc != 1))
|
|
errx(1, "usage: lfs_cleanerd [-bdms] [-l load] [-n nsegs] [-r report_freq] [-t timeout] fs_name");
|
|
|
|
fs_name = argv[0];
|
|
|
|
if (fs_getmntinfo(&lstatfsp, fs_name, MOUNT_LFS) == 0) {
|
|
/* didn't find the filesystem */
|
|
errx(1, "lfs_cleanerd: filesystem %s isn't an LFS!", fs_name);
|
|
}
|
|
|
|
/* should we become a daemon, chdir to / & close fd's */
|
|
if (debug == 0) {
|
|
if (daemon(0, 0) == -1)
|
|
err(1, "lfs_cleanerd: couldn't become a daemon!");
|
|
openlog("lfs_cleanerd", LOG_NDELAY | LOG_PID | 0, LOG_DAEMON);
|
|
lasttime=0;
|
|
loopcount=0;
|
|
loop:
|
|
if((childpid=fork())<0) {
|
|
syslog(LOG_ERR,"%s: couldn't fork, exiting: %m",
|
|
fs_name);
|
|
exit(1);
|
|
}
|
|
if(childpid == 0) {
|
|
/* Record child's pid */
|
|
pidname = malloc(strlen(fs_name) + 16);
|
|
sprintf(pidname, "lfs_cleanerd:s:%s", fs_name);
|
|
while((cp = strchr(pidname, '/')) != NULL)
|
|
*cp = '|';
|
|
pidfile(pidname);
|
|
/* The cleaner wants to stay in core, really */
|
|
if (mlockall(MCL_FUTURE) != 0) {
|
|
syslog(LOG_WARNING, "mlockall failed: %m");
|
|
}
|
|
} else {
|
|
/* Record parent's pid */
|
|
pidname = malloc(strlen(fs_name) + 16);
|
|
sprintf(pidname, "lfs_cleanerd:m:%s", fs_name);
|
|
while((cp = strchr(pidname, '/')) != NULL)
|
|
*cp = '|';
|
|
pidfile(pidname);
|
|
signal(SIGINT, just_exit);
|
|
|
|
wait(NULL);
|
|
/* If the child is looping, give up */
|
|
++loopcount;
|
|
if((now=time(NULL)) - lasttime > TIME_THRESHOLD) {
|
|
loopcount=0;
|
|
}
|
|
lasttime = now;
|
|
if(loopcount > LOOP_THRESHOLD) {
|
|
syslog(LOG_ERR,"%s: cleanerd looping, exiting",
|
|
fs_name);
|
|
exit(1);
|
|
}
|
|
if (fs_getmntinfo(&lstatfsp, fs_name, MOUNT_LFS) == 0) {
|
|
/* fs has been unmounted(?); exit quietly */
|
|
syslog(LOG_ERR,"lfs_cleanerd: fs %s unmounted, exiting", fs_name);
|
|
exit(0);
|
|
}
|
|
goto loop;
|
|
}
|
|
} else
|
|
openlog("lfs_cleanerd", LOG_NDELAY | LOG_PID | LOG_PERROR,
|
|
LOG_DAEMON);
|
|
|
|
signal(SIGINT, sig_report);
|
|
signal(SIGUSR1, sig_report);
|
|
signal(SIGUSR2, sig_report);
|
|
|
|
if(debug)
|
|
syslog(LOG_INFO,"Cleaner starting on filesystem %s", fs_name);
|
|
|
|
timeout.tv_sec = segwait_timeout;
|
|
timeout.tv_usec = 0;
|
|
fsid.val[0] = 0;
|
|
fsid.val[1] = 0;
|
|
|
|
for (fsp = get_fs_info(lstatfsp, do_mmap); ;
|
|
reread_fs_info(fsp, do_mmap)) {
|
|
/*
|
|
* If the user specified '-F', he doesn't want us
|
|
* to do regular cleaning, only coalesce.
|
|
*/
|
|
if (do_coaleace) {
|
|
clean_all_inodes(fsp);
|
|
exit(0);
|
|
}
|
|
|
|
/*
|
|
* clean the filesystem, and, if it needed cleaning
|
|
* (i.e. it returned nonzero) try it again
|
|
* to make sure that some nasty process hasn't just
|
|
* filled the disk system up.
|
|
*/
|
|
if (clean_loop(fsp, segs_per_clean, clean_opts))
|
|
continue;
|
|
|
|
fsid = lstatfsp->f_fsid;
|
|
if(debug > 1)
|
|
syslog(LOG_DEBUG,"Cleaner going to sleep.");
|
|
if (lfs_segwait_emul(ifile_fd, &timeout) < 0)
|
|
syslog(LOG_WARNING,"LFCNSEGWAIT: %m");
|
|
if(debug > 1)
|
|
syslog(LOG_DEBUG,"Cleaner waking up.");
|
|
}
|
|
}
|
|
|
|
/* return the number of segments cleaned */
|
|
int
|
|
clean_loop(FS_INFO *fsp, int nsegs, long options)
|
|
{
|
|
struct lfs *lfsp;
|
|
double loadavg[MAXLOADS];
|
|
time_t now;
|
|
u_long max_free_segs;
|
|
u_long fsb_per_seg;
|
|
|
|
lfsp = &fsp->fi_lfs;
|
|
/*
|
|
* Compute the maximum possible number of free segments, given the
|
|
* number of free blocks.
|
|
*/
|
|
fsb_per_seg = segtod(lfsp, 1);
|
|
max_free_segs = fsp->fi_cip->bfree / fsb_per_seg + lfsp->lfs_minfreeseg;
|
|
|
|
/*
|
|
* We will clean if there are not enough free blocks or total clean
|
|
* space is less than BUSY_LIM % of possible clean space.
|
|
*/
|
|
now = time((time_t *)NULL);
|
|
|
|
if(debug > 1) {
|
|
syslog(LOG_DEBUG, "fsb_per_seg = %lu bfree = %u avail = %d",
|
|
fsb_per_seg, fsp->fi_cip->bfree, fsp->fi_cip->avail);
|
|
syslog(LOG_DEBUG, "clean segs = %d, max_free_segs = %ld",
|
|
fsp->fi_cip->clean, max_free_segs);
|
|
}
|
|
|
|
if ((fsp->fi_cip->bfree - fsp->fi_cip->avail > fsb_per_seg &&
|
|
fsp->fi_cip->avail < (long)fsb_per_seg &&
|
|
fsp->fi_cip->bfree > (long)fsb_per_seg) ||
|
|
(fsp->fi_cip->clean < max_free_segs &&
|
|
(fsp->fi_cip->clean <= lfsp->lfs_minfreeseg ||
|
|
fsp->fi_cip->clean < max_free_segs * BUSY_LIM)))
|
|
{
|
|
if(debug)
|
|
syslog(LOG_DEBUG, "Cleaner Running at %s"
|
|
" (%d of %lu segments available, avail = %d,"
|
|
" bfree = %u)",
|
|
ctime(&now), fsp->fi_cip->clean, max_free_segs,
|
|
fsp->fi_cip->avail, fsp->fi_cip->bfree);
|
|
clean_fs(fsp, cost_benefit, nsegs, options);
|
|
if(do_quit) {
|
|
if(debug)
|
|
syslog(LOG_INFO,"Cleaner shutting down");
|
|
exit(0);
|
|
}
|
|
return (1);
|
|
} else if(use_fs_idle) {
|
|
/*
|
|
* If we're using "filesystem idle" instead of system idle,
|
|
* clean if the fs has not been modified in segwait_timeout
|
|
* seconds.
|
|
*/
|
|
if(now-fsp->fi_fs_tstamp > segwait_timeout
|
|
&& fsp->fi_cip->clean < max_free_segs * IDLE_LIM) {
|
|
if(debug) {
|
|
syslog(LOG_DEBUG, "Cleaner Running at %s: "
|
|
"fs idle time %ld sec; %d of %lu segments available)",
|
|
ctime(&now), (long)now-fsp->fi_fs_tstamp,
|
|
fsp->fi_cip->clean, max_free_segs);
|
|
syslog(LOG_DEBUG, " filesystem idle since %s", ctime(&(fsp->fi_fs_tstamp)));
|
|
}
|
|
clean_fs(fsp, cost_benefit, nsegs, options);
|
|
if(do_quit) {
|
|
if(debug)
|
|
syslog(LOG_INFO,"Cleaner shutting down");
|
|
exit(0);
|
|
}
|
|
return (1);
|
|
}
|
|
} else {
|
|
/*
|
|
* We will also clean if the system is reasonably idle and
|
|
* the total clean space is less than IDLE_LIM % of possible
|
|
* clean space.
|
|
*/
|
|
if (getloadavg(loadavg, MAXLOADS) == -1) {
|
|
perror("getloadavg: failed");
|
|
return (-1);
|
|
}
|
|
if (loadavg[ONE_MIN] < load_threshold
|
|
&& fsp->fi_cip->clean < max_free_segs * IDLE_LIM)
|
|
{
|
|
if (debug)
|
|
syslog(LOG_DEBUG, "Cleaner Running at %s "
|
|
"(system load %.1f, %d of %lu segments available)",
|
|
ctime(&now), loadavg[ONE_MIN],
|
|
fsp->fi_cip->clean, max_free_segs);
|
|
clean_fs(fsp, cost_benefit, nsegs, options);
|
|
if (do_quit) {
|
|
if(debug)
|
|
syslog(LOG_INFO,"Cleaner shutting down");
|
|
exit(0);
|
|
}
|
|
return (1);
|
|
}
|
|
}
|
|
if (debug > 1) {
|
|
if (fsp->fi_cip->bfree - fsp->fi_cip->avail <= fsb_per_seg)
|
|
syslog(LOG_DEBUG, "condition 1 false");
|
|
if (fsp->fi_cip->avail >= (long)fsb_per_seg)
|
|
syslog(LOG_DEBUG, "condition 2 false");
|
|
if (fsp->fi_cip->clean >= max_free_segs)
|
|
syslog(LOG_DEBUG, "condition 3 false");
|
|
if (fsp->fi_cip->clean > lfsp->lfs_minfreeseg)
|
|
syslog(LOG_DEBUG, "condition 4 false");
|
|
if (fsp->fi_cip->clean >= max_free_segs * BUSY_LIM)
|
|
syslog(LOG_DEBUG, "condition 5 false");
|
|
|
|
syslog(LOG_DEBUG, "Cleaner Not Running at %s", ctime(&now));
|
|
}
|
|
return (0);
|
|
}
|
|
|
|
|
|
void
|
|
clean_fs(FS_INFO *fsp, unsigned long (*cost_func)(FS_INFO *, SEGUSE *),
|
|
int nsegs, long options)
|
|
{
|
|
struct seglist *segs, *sp;
|
|
long int to_clean, cleaned_bytes;
|
|
unsigned long i, j, total;
|
|
struct rusage ru;
|
|
fsid_t *fsidp;
|
|
int error;
|
|
SEGS_AND_BLOCKS *sbp;
|
|
|
|
fsidp = &fsp->fi_statfsp->f_fsid;
|
|
|
|
if ((segs =
|
|
malloc(fsp->fi_lfs.lfs_nseg * sizeof(struct seglist))) == NULL) {
|
|
syslog(LOG_WARNING,"malloc failed: %m");
|
|
return;
|
|
}
|
|
total = i = choose_segments(fsp, segs, cost_func);
|
|
|
|
/*
|
|
* If we can get lots of cleaning for free, do it now.
|
|
* The theory is that if empty dirty segments exist, we
|
|
* can afford to write a bunch of inodes, directory blocks,
|
|
* and Ifile blocks in order to clean them. (If things are
|
|
* so bad that we don't have enough segments to do this, though,
|
|
* we're in trouble.)
|
|
*/
|
|
sp = segs;
|
|
for (j = 0; j < total && sp->sl_bytes == 0; j++) {
|
|
if (debug)
|
|
syslog(LOG_DEBUG,"Not cleaning empty segment %ld",
|
|
sp->sl_id);
|
|
++cleaner_stats.segs_empty;
|
|
sp++;
|
|
i--;
|
|
}
|
|
if (j > 0) {
|
|
/* Call limited checkpoint to help clean empty segs */
|
|
fcntl(ifile_fd, LFCNRECLAIM, 0);
|
|
}
|
|
if (j > nsegs) {
|
|
free(segs);
|
|
return;
|
|
}
|
|
|
|
#if 0
|
|
/* If we relly need to clean a lot, do it now */
|
|
if(fsp->fi_cip->clean < 2 * fsp->fi_lfs.lfs_minfreeseg)
|
|
nsegs = MAX(nsegs, fsp->fi_lfs.lfs_minfreeseg);
|
|
#endif
|
|
/* But back down if we haven't got that many free to clean into */
|
|
if(fsp->fi_cip->clean < nsegs)
|
|
nsegs = fsp->fi_cip->clean;
|
|
|
|
if (debug > 1)
|
|
syslog(LOG_DEBUG, "clean_fs: found %ld segments to clean in %s",
|
|
i, fsp->fi_statfsp->f_mntonname);
|
|
|
|
if (i) {
|
|
sbp = (SEGS_AND_BLOCKS *)malloc(sizeof(SEGS_AND_BLOCKS));
|
|
memset(sbp, 0, sizeof(SEGS_AND_BLOCKS));
|
|
|
|
/* Check which cleaning algorithm to use. */
|
|
if (options & CLEAN_BYTES) {
|
|
/* Count bytes */
|
|
cleaned_bytes = 0;
|
|
to_clean = nsegs * fsp->fi_lfs.lfs_ssize;
|
|
for (; i && cleaned_bytes < to_clean; i--, ++sp) {
|
|
if (add_segment(fsp, sp, sbp) < 0) {
|
|
syslog(LOG_WARNING,"add_segment failed"
|
|
" segment %ld: %m", sp->sl_id);
|
|
if (sbp->nsegs == 0 && errno != ENOENT)
|
|
continue;
|
|
else
|
|
break;
|
|
}
|
|
cleaned_bytes += sp->sl_bytes;
|
|
}
|
|
} else {
|
|
/* Count segments */
|
|
for (i = MIN(i, nsegs); i-- ; ++sp) {
|
|
total--;
|
|
syslog(LOG_DEBUG, "Cleaning segment %ld"
|
|
" (of %ld choices)", sp->sl_id, i + 1);
|
|
if (add_segment(fsp, sp, sbp) != 0) {
|
|
syslog(LOG_WARNING,"add_segment failed"
|
|
" segment %ld: %m", sp->sl_id);
|
|
if (sbp->nsegs == 0 && errno != ENOENT)
|
|
continue;
|
|
else
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
clean_segments(fsp, sbp);
|
|
|
|
if (sbp->buf)
|
|
free(sbp->buf);
|
|
if (sbp->segs)
|
|
free(sbp->segs);
|
|
free(sbp);
|
|
}
|
|
free(segs);
|
|
if(debug) {
|
|
error = getrusage(RUSAGE_SELF, &ru);
|
|
if(error) {
|
|
syslog(LOG_WARNING, "getrusage returned error: %m");
|
|
} else {
|
|
syslog(LOG_DEBUG, "Current usage: maxrss=%ld,"
|
|
" idrss=%ld, isrss=%ld", ru.ru_maxrss,
|
|
ru.ru_idrss, ru.ru_isrss);
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Segment with the highest priority get sorted to the beginning of the
|
|
* list. This sort assumes that empty segments always have a higher
|
|
* cost/benefit than any used segment.
|
|
*/
|
|
int
|
|
cost_compare(const void *a, const void *b)
|
|
{
|
|
return ((struct seglist *)b)->sl_cost < ((struct seglist *)a)->sl_cost ? -1 : 1;
|
|
}
|
|
|
|
|
|
/*
|
|
* Returns the number of segments to be cleaned with the elements of seglist
|
|
* filled in.
|
|
*/
|
|
int
|
|
choose_segments(FS_INFO *fsp, struct seglist *seglist, unsigned long (*cost_func)(FS_INFO *, SEGUSE *))
|
|
{
|
|
struct lfs *lfsp;
|
|
struct seglist *sp;
|
|
SEGUSE *sup;
|
|
int i, nsegs;
|
|
|
|
lfsp = &fsp->fi_lfs;
|
|
|
|
if (debug > 1)
|
|
syslog(LOG_DEBUG,"Entering choose_segments");
|
|
dump_super(lfsp);
|
|
dump_cleaner_info(fsp->fi_cip);
|
|
|
|
for (sp = seglist, i = 0; i < lfsp->lfs_nseg; ++i) {
|
|
if (debug > 1) {
|
|
printf("%d...", i);
|
|
fflush(stdout);
|
|
}
|
|
sup = SEGUSE_ENTRY(lfsp, fsp->fi_segusep, i);
|
|
if (debug > 2)
|
|
PRINT_SEGUSE(sup, i);
|
|
if (!(sup->su_flags & SEGUSE_DIRTY) ||
|
|
sup->su_flags & SEGUSE_ACTIVE)
|
|
continue;
|
|
if (debug > 2)
|
|
syslog(LOG_DEBUG, "\tchoosing segment %d", i);
|
|
sp->sl_cost = (*cost_func)(fsp, sup);
|
|
sp->sl_id = i;
|
|
sp->sl_bytes = sup->su_nbytes;
|
|
sp->sl_age = time(NULL) - sup->su_lastmod;
|
|
++sp;
|
|
}
|
|
nsegs = sp - seglist;
|
|
if (debug > 1) {
|
|
putchar('\n');
|
|
syslog(LOG_DEBUG, "Sorting...");
|
|
}
|
|
qsort(seglist, nsegs, sizeof(struct seglist), cost_compare);
|
|
if (debug > 2)
|
|
for(i = 0; i < nsegs; i++) {
|
|
syslog(LOG_DEBUG, "%d: segment %lu age %lu"
|
|
" contains %lu priority %lu\n", i,
|
|
seglist[i].sl_age, seglist[i].sl_id,
|
|
seglist[i].sl_bytes, seglist[i].sl_cost);
|
|
}
|
|
|
|
if (debug > 1)
|
|
syslog(LOG_DEBUG,"Returning %d segments", nsegs);
|
|
|
|
return (nsegs);
|
|
}
|
|
|
|
/*
|
|
* Add still-valid blocks from the given segment to the block array,
|
|
* in preparation for sending through markv.
|
|
*/
|
|
int
|
|
add_segment(FS_INFO *fsp, struct seglist *slp, SEGS_AND_BLOCKS *sbp)
|
|
{
|
|
int id = slp->sl_id;
|
|
BLOCK_INFO *tba, *_bip;
|
|
SEGUSE *sp;
|
|
struct lfs *lfsp;
|
|
struct tossstruct t;
|
|
struct dinode *dip;
|
|
caddr_t seg_buf;
|
|
caddr_t cmp_buf, cmp_dp;
|
|
size_t size;
|
|
daddr_t seg_addr;
|
|
int num_blocks, i, j, error;
|
|
int seg_isempty=0;
|
|
unsigned long *lp;
|
|
|
|
lfsp = &fsp->fi_lfs;
|
|
sp = SEGUSE_ENTRY(lfsp, fsp->fi_segusep, id);
|
|
seg_addr = sntod(lfsp,id);
|
|
error = 0;
|
|
tba = NULL;
|
|
|
|
syslog(LOG_DEBUG, "adding segment %d: contains %lu bytes", id,
|
|
(unsigned long)sp->su_nbytes);
|
|
|
|
/* XXX could add debugging to verify that segment is really empty */
|
|
if (sp->su_nbytes == 0) {
|
|
++cleaner_stats.segs_empty;
|
|
++seg_isempty;
|
|
}
|
|
|
|
/* Add a new segment to the accumulated list */
|
|
sbp->nsegs++;
|
|
sbp->segs = (struct seglist **)realloc(sbp->segs, sizeof(struct seglist *) * sbp->nsegs);
|
|
sbp->buf = (caddr_t *)realloc(sbp->buf, sizeof(caddr_t) * sbp->nsegs);
|
|
sbp->segs[sbp->nsegs - 1] = slp;
|
|
|
|
/* map the segment into a buffer */
|
|
if (mmap_segment(fsp, id, &seg_buf, do_mmap) < 0) {
|
|
syslog(LOG_WARNING,"add_segment: mmap_segment failed: %m");
|
|
++cleaner_stats.segs_error;
|
|
--sbp->nsegs;
|
|
return (-1);
|
|
}
|
|
sbp->buf[sbp->nsegs - 1] = seg_buf;
|
|
/* get a list of blocks that are contained by the segment */
|
|
if ((error = lfs_segmapv(fsp, id, seg_buf, &tba, &num_blocks)) < 0) {
|
|
syslog(LOG_WARNING,
|
|
"add_segment: lfs_segmapv failed for segment %d", id);
|
|
goto out;
|
|
}
|
|
cleaner_stats.blocks_read += segtod(lfsp, 1);
|
|
|
|
if (debug > 1)
|
|
syslog(LOG_DEBUG, "lfs_segmapv returned %d blocks", num_blocks);
|
|
|
|
/* get the current disk address of blocks contained by the segment */
|
|
if ((error = lfs_bmapv_emul(ifile_fd, tba, num_blocks)) < 0) {
|
|
syslog(LOG_WARNING, "add_segment: LFCNBMAPV failed");
|
|
goto out;
|
|
}
|
|
|
|
/* Now toss any blocks not in the current segment */
|
|
t.lfs = lfsp;
|
|
t.seg = id;
|
|
toss(tba, &num_blocks, sizeof(BLOCK_INFO), bi_tossold, &t);
|
|
/* Check if last element should be tossed */
|
|
if (num_blocks && bi_tossold(&t, tba + num_blocks - 1, NULL))
|
|
--num_blocks;
|
|
|
|
if(seg_isempty) {
|
|
if(num_blocks)
|
|
syslog(LOG_WARNING,"segment %d was supposed to be empty, but has %d live blocks!", id, num_blocks);
|
|
else
|
|
syslog(LOG_DEBUG,"segment %d is empty, as claimed", id);
|
|
}
|
|
/* XXX KS - check for misplaced blocks */
|
|
/* XXX ondisk32 */
|
|
for(i=0; i<num_blocks; i++) {
|
|
if(tba[i].bi_daddr
|
|
&& tba[i].bi_daddr != seg_addr +
|
|
btofsb(lfsp, (char *)(tba[i].bi_bp) - seg_buf)
|
|
&& dtosn(&(fsp->fi_lfs), tba[i].bi_daddr) == id)
|
|
{
|
|
syslog(LOG_ERR, "bi_daddr = 0x%x = %lldb; %p - %p = %ld",
|
|
tba[i].bi_daddr,
|
|
(long long)fsbtob(lfsp, tba[i].bi_daddr - seg_addr),
|
|
tba[i].bi_bp, seg_buf,
|
|
(long)(((char *)(tba[i].bi_bp) - seg_buf)));
|
|
syslog(LOG_ERR, "seg %d (0x%llx), ino %d lbn %d, 0x%x != 0x%llx",
|
|
id, (long long)seg_addr,
|
|
tba[i].bi_inode,
|
|
tba[i].bi_lbn,
|
|
tba[i].bi_daddr,
|
|
(long long)(seg_addr +
|
|
btofsb(lfsp, (char *)(tba[i].bi_bp) - seg_buf)));
|
|
error = EFAULT;
|
|
goto out;
|
|
|
|
/*
|
|
* XXX KS - have to be careful here about Inodes;
|
|
* if bmapv shows them somewhere else in the
|
|
* segment from where we thought, we need to reload
|
|
* the *right* inode, not the first one in the block.
|
|
*/
|
|
/* XXX ondisk32 */
|
|
if(tba[i].bi_lbn == LFS_UNUSED_LBN) {
|
|
dip = (struct dinode *)(seg_buf + fsbtob(lfsp, tba[i].bi_daddr - seg_addr));
|
|
for(j=INOPB(lfsp)-1;j>=0;j--) {
|
|
if(dip[j].di_u.inumber == tba[i].bi_inode) {
|
|
tba[i].bi_bp = (char *)(dip+j);
|
|
break;
|
|
}
|
|
}
|
|
if(j<0) {
|
|
syslog(LOG_ERR, "lost inode %d in the shuffle! (blk %d)",
|
|
tba[i].bi_inode, tba[i].bi_daddr);
|
|
if (debug) {
|
|
syslog(LOG_DEBUG,
|
|
"inode numbers found were:");
|
|
for(j=INOPB(lfsp)-1;j>=0;j--) {
|
|
syslog(LOG_DEBUG, "%d",
|
|
dip[j].di_u.inumber);
|
|
}
|
|
}
|
|
errx(1, "lost inode");
|
|
} else if (debug > 1) {
|
|
syslog(LOG_DEBUG,"Ino %d corrected to 0x%x",
|
|
tba[i].bi_inode,
|
|
tba[i].bi_daddr);
|
|
}
|
|
} else {
|
|
tba[i].bi_bp = seg_buf + fsbtob(lfsp, tba[i].bi_daddr - seg_addr);
|
|
}
|
|
}
|
|
}
|
|
|
|
/* Update live bytes calc - XXX KS */
|
|
slp->sl_bytes = 0;
|
|
for(i=0; i<num_blocks; i++)
|
|
if(tba[i].bi_lbn == LFS_UNUSED_LBN)
|
|
slp->sl_bytes += sizeof(struct dinode);
|
|
else
|
|
slp->sl_bytes += tba[i].bi_size;
|
|
|
|
if(debug > 1) {
|
|
syslog(LOG_DEBUG, "after bmapv still have %d blocks",
|
|
num_blocks);
|
|
if (num_blocks)
|
|
syslog(LOG_DEBUG, "BLOCK INFOS");
|
|
for (_bip = tba, i=0; i < num_blocks; ++_bip, ++i) {
|
|
PRINT_BINFO(_bip);
|
|
/* XXX ondisk32? */
|
|
lp = (u_long *)_bip->bi_bp;
|
|
}
|
|
}
|
|
|
|
/* Compress segment buffer, if necessary */
|
|
if (!do_mmap && slp->sl_bytes < seg_size(lfsp) / 2) {
|
|
if (debug > 1)
|
|
syslog(LOG_DEBUG, "compressing: %d < %d",
|
|
(int)slp->sl_bytes, seg_size(lfsp) / 2);
|
|
cmp_buf = malloc(slp->sl_bytes); /* XXX could do in-place */
|
|
if (cmp_buf == NULL) {
|
|
if (debug)
|
|
syslog(LOG_DEBUG, "can't compress segment: %m");
|
|
} else {
|
|
cmp_dp = cmp_buf;
|
|
for (i = 0; i < num_blocks; i++) {
|
|
if(tba[i].bi_lbn == LFS_UNUSED_LBN)
|
|
size = sizeof(struct dinode);
|
|
else
|
|
size = tba[i].bi_size;
|
|
memcpy(cmp_dp, tba[i].bi_bp, size);
|
|
tba[i].bi_bp = cmp_dp;
|
|
cmp_dp += size;
|
|
}
|
|
free(seg_buf);
|
|
seg_buf = cmp_buf;
|
|
sbp->buf[sbp->nsegs - 1] = seg_buf;
|
|
}
|
|
}
|
|
|
|
/* Add these blocks to the accumulated list */
|
|
sbp->ba = realloc(sbp->ba, (sbp->nb + num_blocks) * sizeof(BLOCK_INFO));
|
|
memcpy(sbp->ba + sbp->nb, tba, num_blocks * sizeof(BLOCK_INFO));
|
|
sbp->nb += num_blocks;
|
|
|
|
free(tba);
|
|
return (0);
|
|
|
|
out:
|
|
--sbp->nsegs;
|
|
if (tba)
|
|
free(tba);
|
|
if (error) {
|
|
sp->su_flags |= SEGUSE_ERROR;
|
|
++cleaner_stats.segs_error;
|
|
}
|
|
munmap_segment(fsp, sbp->buf[sbp->nsegs], do_mmap);
|
|
if (stat_report && cleaner_stats.segs_cleaned % stat_report == 0)
|
|
sig_report(SIGUSR1);
|
|
return (error);
|
|
}
|
|
|
|
/* Call markv and clean up */
|
|
int
|
|
clean_segments(FS_INFO *fsp, SEGS_AND_BLOCKS *sbp)
|
|
{
|
|
int maxblocks, clean_blocks, icount, extra, ebytes, nbytes;
|
|
BLOCK_INFO *bp;
|
|
int i, error;
|
|
double util;
|
|
ino_t ino, inino;
|
|
|
|
error = 0;
|
|
|
|
cleaner_stats.segs_cleaned += sbp->nsegs;
|
|
cleaner_stats.blocks_written += sbp->nb;
|
|
/*
|
|
* Count up the number of indirect blocks and inodes we'll
|
|
* have to write to take care of this (if we are asked to do this).
|
|
* XXX this only cares about single indirect blocks.
|
|
*/
|
|
icount = 0;
|
|
ino = inino = 0;
|
|
extra = 0;
|
|
nbytes = 0;
|
|
for (i = sbp->nb, bp = sbp->ba; i > 0; bp++, i--) {
|
|
if (bp->bi_lbn != LFS_UNUSED_LBN)
|
|
nbytes += bp->bi_size;
|
|
if (ino != bp->bi_inode) {
|
|
ino = bp->bi_inode;
|
|
++icount;
|
|
}
|
|
if (bp->bi_lbn == -NDADDR)
|
|
inino = ino;
|
|
if (inino != ino && bp->bi_lbn > 0 && bp->bi_lbn > NDADDR) {
|
|
++extra;
|
|
inino = ino;
|
|
}
|
|
}
|
|
ebytes = 0 + INOPB(&fsp->fi_lfs) * fsp->fi_lfs.lfs_ibsize ;
|
|
ebytes += extra * fsp->fi_lfs.lfs_bsize;
|
|
if (debug) {
|
|
fprintf(stderr, "clean_segment: %d inodes %d indirect -> %d bytes + %d = %d total (to save %d)\n",
|
|
icount, extra, ebytes, nbytes, ebytes + nbytes,
|
|
fsp->fi_lfs.lfs_fsize * fsp->fi_lfs.lfs_fsbpseg * sbp->nsegs);
|
|
}
|
|
/* If we're writing more than we're saving, try coalescing */
|
|
if (ebytes + nbytes > fsp->fi_lfs.lfs_fsize * fsp->fi_lfs.lfs_fsbpseg * sbp->nsegs) {
|
|
fork_coalesce(fsp);
|
|
}
|
|
|
|
util = ((double)sbp->nb / segtod(&fsp->fi_lfs, 1));
|
|
cleaner_stats.util_tot += util;
|
|
cleaner_stats.util_sos += util * util;
|
|
if (do_small)
|
|
maxblocks = MAXPHYS / fsp->fi_lfs.lfs_bsize - 1;
|
|
else
|
|
maxblocks = sbp->nb;
|
|
|
|
for (bp = sbp->ba; sbp->nb > 0; bp += clean_blocks) {
|
|
clean_blocks = maxblocks < sbp->nb ? maxblocks : sbp->nb;
|
|
if ((error = lfs_markv_emul(ifile_fd, bp, clean_blocks)) < 0) {
|
|
syslog(LOG_WARNING,"clean_segment: LFCNMARKV failed: %m");
|
|
++cleaner_stats.segs_error;
|
|
if (errno == ENOENT) break;
|
|
}
|
|
sbp->nb -= clean_blocks;
|
|
}
|
|
|
|
/* Clean up */
|
|
if (sbp->ba)
|
|
free(sbp->ba);
|
|
if (error)
|
|
++cleaner_stats.segs_error;
|
|
for (i = 0; i < sbp->nsegs; i++)
|
|
munmap_segment(fsp, sbp->buf[i], do_mmap);
|
|
if (stat_report && cleaner_stats.segs_cleaned % stat_report == 0)
|
|
sig_report(SIGUSR1);
|
|
return (error);
|
|
}
|
|
|
|
|
|
int
|
|
bi_tossold(const void *client, const void *a, const void *b)
|
|
{
|
|
const struct tossstruct *t;
|
|
|
|
t = (struct tossstruct *)client;
|
|
|
|
return (((BLOCK_INFO *)a)->bi_daddr == LFS_UNUSED_DADDR ||
|
|
dtosn(t->lfs, ((BLOCK_INFO *)a)->bi_daddr) != t->seg);
|
|
}
|
|
|
|
void
|
|
sig_report(int sig)
|
|
{
|
|
double avg = 0.0;
|
|
|
|
syslog(LOG_INFO, "lfs_cleanerd:\t%s%d\n\t\t%s%d\n\t\t%s%d\n\t\t%s%d\n\t\t%s%d",
|
|
"blocks_read ", cleaner_stats.blocks_read,
|
|
"blocks_written ", cleaner_stats.blocks_written,
|
|
"segs_cleaned ", cleaner_stats.segs_cleaned,
|
|
"segs_empty ", cleaner_stats.segs_empty,
|
|
"seg_error ", cleaner_stats.segs_error);
|
|
syslog(LOG_INFO, "\t\t%s%5.2f\n\t\t%s%5.2f",
|
|
"util_tot ", cleaner_stats.util_tot,
|
|
"util_sos ", cleaner_stats.util_sos);
|
|
avg = cleaner_stats.util_tot / MAX(cleaner_stats.segs_cleaned, 1.0);
|
|
syslog(LOG_INFO, "\t\tavg util: %4.2f std dev: %9.6f", avg,
|
|
cleaner_stats.util_sos / MAX(cleaner_stats.segs_cleaned - avg * avg, 1.0));
|
|
|
|
|
|
if (sig == SIGUSR2) {
|
|
cleaner_stats.blocks_read = 0;
|
|
cleaner_stats.blocks_written = 0;
|
|
cleaner_stats.segs_cleaned = 0;
|
|
cleaner_stats.segs_empty = 0;
|
|
cleaner_stats.segs_error = 0;
|
|
cleaner_stats.util_tot = 0.0;
|
|
cleaner_stats.util_sos = 0.0;
|
|
}
|
|
if (sig == SIGINT)
|
|
exit(0);
|
|
}
|
|
|
|
void
|
|
just_exit(int sig)
|
|
{
|
|
exit(0);
|
|
}
|