NetBSD/usr.bin/vndcompress/vndcompress.c
riastradh 76d4b81251 Clarify compile-time and run-time arithmetic safety assertions.
This is an experiment with a handful of macros for writing the
checks, most of which are compile-time:

MUL_OK(t, a, b)         Does a*b avoid overflow in type t?
ADD_OK(t, a, b)         Does a + b avoid overflow in type t?
TOOMANY(t, x, b, m)     Are there more than m b-element blocks in x in type t?
                        (I.e., does ceiling(x/b) > m?)

Addenda that might make sense but are not needed here:

MUL(t, a, b, &p)        Set p = a*b and return 0, or return ERANGE if overflow.
ADD(t, a, b, &s)        Set s = a+b and return 0, or return ERANGE if overflow.

Example:

	uint32_t a = ..., b = ..., y = ..., z = ..., x, w;

        /* input validation */
        error = MUL(size_t, a, b, &x);
        if (error)
                fail;
        if (TOOMANY(uint32_t, x, BLKSIZ, MAX_NBLK))
                fail;
        y = HOWMANY(x, BLKSIZ);
        if (z > Z_MAX)
                fail;
        ...
        /* internal computation */
        __CTASSERT(MUL_OK(uint32_t, Z_MAX, MAX_NBLK));
        w = z*y;

Obvious shortcomings:

1. Nothing checks your ctassert matches your subsequent arithmetic.
   (Maybe we could have BOUNDED_MUL(t, x, xmax, y, ymax) with a
   ctassert inside.)

2. Nothing flows the bounds needed by the arithmetic you use back
   into candidate definitions of X_MAX/Y_MAX.

But at least the reviewer's job is only to make sure that (a) the
MUL_OK matches the *, and (b) the bounds in the assertion match the
bounds on the inputs -- in particular, the reviewer need not derive
the bounds from the context, only confirm they are supported by the
paths to it.

This is not meant to be a general-purpose proof assistant, or even a
special-purpose one like gfverif <http://gfverif.cryptojedi.org/>.
Rather, it is an experiment in adding a modicum of compile-time
verification with a simple C API change.

This also is not intended to serve as trapping arithmetic on
overflow.  The goal here is to enable writing the program with
explicit checks on input and compile-time annotations on computation
to gain confident that overflow won't happen in the computation.
2017-07-29 21:04:07 +00:00

916 lines
27 KiB
C

/* $NetBSD: vndcompress.c,v 1.29 2017/07/29 21:04:07 riastradh Exp $ */
/*-
* Copyright (c) 2013 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
* by Taylor R. Campbell.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__RCSID("$NetBSD: vndcompress.c,v 1.29 2017/07/29 21:04:07 riastradh Exp $");
#include <sys/endian.h>
#include <sys/stat.h>
#include <assert.h>
#include <err.h>
#include <errno.h>
#include <fcntl.h>
#include <inttypes.h>
#include <limits.h>
#include <signal.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <zlib.h>
#include "common.h"
#include "offtab.h"
#include "utils.h"
/*
* XXX Switch to control bug-for-bug byte-for-byte compatibility with
* NetBSD's vndcompress.
*/
#define VNDCOMPRESS_COMPAT 0
__CTASSERT(sizeof(struct cloop2_header) == CLOOP2_OFFSET_TABLE_OFFSET);
struct compress_state {
uint64_t size; /* uncompressed size */
uint64_t offset; /* output byte offset */
uint32_t blocksize; /* bytes per block */
uint32_t blkno; /* input block number */
uint32_t n_full_blocks; /* floor(size/blocksize) */
uint32_t n_blocks; /* ceiling(size/blocksize) */
uint32_t n_offsets; /* n_blocks + 1 */
uint32_t end_block; /* last block to transfer */
uint32_t checkpoint_blocks; /* blocks before checkpoint */
int image_fd;
int cloop2_fd;
struct offtab offtab;
uint32_t n_checkpointed_blocks;
volatile sig_atomic_t
initialized; /* everything above initialized? */
};
/* Global compression state for SIGINFO handler. */
static struct compress_state global_state;
struct sigdesc {
int sd_signo;
const char *sd_name;
};
static const struct sigdesc info_signals[] = {
{ SIGINFO, "SIGINFO" },
{ SIGUSR1, "SIGUSR1" },
};
static const struct sigdesc checkpoint_signals[] = {
{ SIGUSR2, "SIGUSR2" },
};
static void init_signals(void);
static void init_signal_handler(int, const struct sigdesc *, size_t,
void (*)(int));
static void info_signal_handler(int);
static void checkpoint_signal_handler(int);
static void compress_progress(struct compress_state *);
static void compress_init(int, char **, const struct options *,
struct compress_state *);
static bool compress_restart(struct compress_state *);
static uint32_t compress_block(int, int, uint32_t, uint32_t, uint32_t, void *,
void *);
static void compress_maybe_checkpoint(struct compress_state *);
static void compress_checkpoint(struct compress_state *);
static void compress_exit(struct compress_state *);
/*
* Compression entry point.
*/
int
vndcompress(int argc, char **argv, const struct options *O)
{
struct compress_state *const S = &global_state;
/* Paranoia. The other fields either have no sentinel or use zero. */
S->image_fd = -1;
S->cloop2_fd = -1;
/* Set up signal handlers so we can handle SIGINFO ASAP. */
init_signals();
/*
* Parse the arguments to initialize our state.
*/
compress_init(argc, argv, O, S);
assert(MIN_BLOCKSIZE <= S->blocksize);
assert(S->blocksize <= MAX_BLOCKSIZE);
/*
* Allocate compression buffers.
*
* Compression may actually expand. From an overabundance of
* caution, assume it can expand by at most double.
*
* XXX Check and consider tightening this assumption.
*/
__CTASSERT(MAX_BLOCKSIZE <= SIZE_MAX);
void *const uncompbuf = malloc(S->blocksize);
if (uncompbuf == NULL)
err(1, "malloc uncompressed buffer");
/* XXX compression ratio bound */
__CTASSERT(MUL_OK(size_t, 2, MAX_BLOCKSIZE));
void *const compbuf = malloc(2 * (size_t)S->blocksize);
if (compbuf == NULL)
err(1, "malloc compressed buffer");
/*
* Compress the blocks. S->blkno specifies the input block
* we're about to transfer. S->offset is the current output
* offset.
*/
while (S->blkno < S->n_blocks) {
/* Report any progress. */
compress_progress(S);
/* Stop if we've done the requested partial transfer. */
if ((0 < S->end_block) && (S->end_block <= S->blkno))
goto out;
/* Checkpoint if appropriate. */
compress_maybe_checkpoint(S);
offtab_prepare_put(&S->offtab, (S->blkno + 1));
/* Choose read size: partial if last block, full if not. */
const uint32_t readsize = (S->blkno == S->n_full_blocks?
(S->size % S->blocksize) : S->blocksize);
assert(readsize > 0);
assert(readsize <= S->blocksize);
/* Fail noisily if we might be about to overflow. */
/* XXX compression ratio bound */
__CTASSERT(MUL_OK(uint64_t, 2, MAX_BLOCKSIZE));
__CTASSERT(MUL_OK(off_t, 2, MAX_BLOCKSIZE));
assert(S->offset <= MIN(UINT64_MAX, OFF_MAX));
if (!ADD_OK(uint64_t, S->offset, 2*(uintmax_t)readsize) ||
!ADD_OK(off_t, S->offset, 2*(uintmax_t)readsize))
errx(1, "blkno %"PRIu32" may overflow: %ju + 2*%ju",
S->blkno, (uintmax_t)S->offset,
(uintmax_t)readsize);
/* Process the block. */
const uint32_t complen =
compress_block(S->image_fd, S->cloop2_fd, S->blkno,
S->blocksize, readsize, uncompbuf, compbuf);
/*
* Signal-atomically update the state to reflect
* (a) what block number we are now at,
* (b) how far we are now in the output file, and
* (c) where the last block ended.
*/
assert(ADD_OK(uint32_t, S->blkno, 1));
assert(ADD_OK(uint64_t, S->offset, complen));
assert(ADD_OK(off_t, (off_t)S->offset, (off_t)complen));
assert((S->blkno + 1) < S->n_offsets);
{
sigset_t old_sigmask;
block_signals(&old_sigmask);
S->blkno += 1; /* (a) */
S->offset += complen; /* (b) */
offtab_put(&S->offtab, S->blkno, S->offset); /* (c) */
restore_sigmask(&old_sigmask);
}
}
/* Make sure we're all done. */
assert(S->blkno == S->n_blocks);
assert((S->blkno + 1) == S->n_offsets);
/* Pad to the disk block size. */
const uint32_t n_extra = (S->offset % DEV_BSIZE);
if (n_extra != 0) {
const uint32_t n_padding = (DEV_BSIZE - n_extra);
/* Reuse compbuf -- guaranteed to be large enough. */
(void)memset(compbuf, 0, n_padding);
const ssize_t n_written = write(S->cloop2_fd, compbuf,
n_padding);
if (n_written == -1)
err(1, "write final padding failed");
assert(n_written >= 0);
if ((size_t)n_written != n_padding)
errx(1, "partial write of final padding bytes"
": %zu != %"PRIu32,
(size_t)n_written, n_padding);
/* Account for the extra bytes in the output file. */
assert(ADD_OK(uint64_t, S->offset, n_padding));
assert(ADD_OK(off_t, (off_t)S->offset, (off_t)n_padding));
{
sigset_t old_sigmask;
block_signals(&old_sigmask);
S->offset += n_padding;
restore_sigmask(&old_sigmask);
}
}
out:
/* One last checkpoint to commit the offset table. */
assert(S->offset <= OFF_MAX);
assert((off_t)S->offset == lseek(S->cloop2_fd, 0, SEEK_CUR));
compress_checkpoint(S);
/*
* Free the compression buffers and finalize the compression.
*/
free(compbuf);
free(uncompbuf);
compress_exit(S);
return 0;
}
/*
* Signal cruft.
*/
static void
init_signals(void)
{
init_signal_handler(SA_RESTART, info_signals,
__arraycount(info_signals), &info_signal_handler);
init_signal_handler(SA_RESTART, checkpoint_signals,
__arraycount(checkpoint_signals), &checkpoint_signal_handler);
}
static void
init_signal_handler(int flags, const struct sigdesc *signals, size_t n,
void (*handler)(int))
{
static const struct sigaction zero_sa;
struct sigaction sa = zero_sa;
size_t i;
(void)sigemptyset(&sa.sa_mask);
for (i = 0; i < n; i++)
(void)sigaddset(&sa.sa_mask, signals[i].sd_signo);
sa.sa_flags = flags;
sa.sa_handler = handler;
for (i = 0; i < n; i++)
if (sigaction(signals[i].sd_signo, &sa, NULL) == -1)
err(1, "sigaction(%s)", signals[i].sd_name);
}
static void
info_signal_handler(int signo __unused)
{
/* Save errno. */
const int error = errno;
struct compress_state *const S = &global_state;
char buf[128];
/* Bail if the state is not yet initialized. */
if (!S->initialized) {
warnx_ss("initializing");
goto out;
}
/* Carefully calculate our I/O position. */
assert(S->blocksize > 0);
__CTASSERT(MUL_OK(uint64_t, MAX_N_BLOCKS, MAX_BLOCKSIZE));
const uint64_t nread = ((uint64_t)S->blkno * (uint64_t)S->blocksize);
assert(S->n_blocks > 0);
__CTASSERT(MUL_OK(uint64_t, MAX_N_BLOCKS, sizeof(uint64_t)));
__CTASSERT(ADD_OK(uint64_t, CLOOP2_OFFSET_TABLE_OFFSET,
MAX_N_BLOCKS*sizeof(uint64_t)));
const uint64_t nwritten = (S->offset <= (CLOOP2_OFFSET_TABLE_OFFSET +
((uint64_t)S->n_blocks * sizeof(uint64_t)))?
0 : S->offset);
/* snprintf_ss can't do floating-point, so do fixed-point instead. */
const uint64_t ratio_percent =
(nread > 0?
((nwritten >= (UINT64_MAX / 100)) ?
((nwritten / nread) * 100) : ((nwritten * 100) / nread))
: 0);
/* Format the status. */
assert(S->n_checkpointed_blocks <= MAX_N_BLOCKS);
assert(S->blocksize <= MAX_BLOCKSIZE);
__CTASSERT(MUL_OK(uint64_t, MAX_N_BLOCKS, MAX_BLOCKSIZE));
const int n = snprintf_ss(buf, sizeof(buf),
"vndcompress: read %"PRIu64" bytes, wrote %"PRIu64" bytes, "
"compression ratio %"PRIu64"%% (checkpointed %"PRIu64" bytes)\n",
nread, nwritten, ratio_percent,
((uint64_t)S->n_checkpointed_blocks * (uint64_t)S->blocksize));
if (n < 0) {
const char msg[] = "vndcompress: can't format info\n";
(void)write(STDERR_FILENO, msg, __arraycount(msg));
} else {
__CTASSERT(INT_MAX <= SIZE_MAX);
(void)write(STDERR_FILENO, buf, (size_t)n);
}
out:
/* Restore errno. */
errno = error;
}
static void
checkpoint_signal_handler(int signo __unused)
{
/* Save errno. */
const int error = errno;
struct compress_state *const S = &global_state;
/* Bail if the state is not yet initialized. */
if (!S->initialized) {
warnx_ss("nothing to checkpoint yet");
goto out;
}
assert(S->image_fd >= 0);
assert(S->cloop2_fd >= 0);
/* Take a checkpoint. */
assert(S->blkno <= MAX_N_BLOCKS);
assert(S->blocksize <= MAX_BLOCKSIZE);
__CTASSERT(MUL_OK(uint64_t, MAX_N_BLOCKS, MAX_BLOCKSIZE));
warnx_ss("checkpointing %"PRIu64" bytes",
((uint64_t)S->blkno * (uint64_t)S->blocksize));
compress_checkpoint(S);
out:
/* Restore errno. */
errno = error;
}
/*
* Report progress.
*
* XXX Should do a progress bar here.
*/
static void
compress_progress(struct compress_state *S __unused)
{
}
/*
* Parse arguments, open the files, and initialize the state.
*/
static void
compress_init(int argc, char **argv, const struct options *O,
struct compress_state *S)
{
if (!((argc == 2) || (argc == 3)))
usage();
const char *const image_pathname = argv[0];
const char *const cloop2_pathname = argv[1];
/* Grab the block size either from `-b' or from the last argument. */
__CTASSERT(0 < DEV_BSIZE);
__CTASSERT((MIN_BLOCKSIZE % DEV_BSIZE) == 0);
__CTASSERT(MIN_BLOCKSIZE <= DEF_BLOCKSIZE);
__CTASSERT((DEF_BLOCKSIZE % DEV_BSIZE) == 0);
__CTASSERT(DEF_BLOCKSIZE <= MAX_BLOCKSIZE);
__CTASSERT((MAX_BLOCKSIZE % DEV_BSIZE) == 0);
if (ISSET(O->flags, FLAG_b)) {
if (argc == 3) {
warnx("use -b or the extra argument, not both");
usage();
}
S->blocksize = O->blocksize;
} else {
S->blocksize = (argc == 2? DEF_BLOCKSIZE :
strsuftoll("block size", argv[2], MIN_BLOCKSIZE,
MAX_BLOCKSIZE));
}
/* Sanity-check the blocksize. (strsuftoll guarantees bounds.) */
__CTASSERT(DEV_BSIZE <= UINT32_MAX);
if ((S->blocksize % DEV_BSIZE) != 0)
errx(1, "bad blocksize: %"PRIu32
" (not a multiple of %"PRIu32")",
S->blocksize, (uint32_t)DEV_BSIZE);
assert(MIN_BLOCKSIZE <= S->blocksize);
assert((S->blocksize % DEV_BSIZE) == 0);
assert(S->blocksize <= MAX_BLOCKSIZE);
/* Grab the end block number if we have one. */
S->end_block = (ISSET(O->flags, FLAG_p)? O->end_block : 0);
/* Grab the checkpoint block count, if we have one. */
S->checkpoint_blocks =
(ISSET(O->flags, FLAG_k)? O->checkpoint_blocks : 0);
/* Open the input image file and the output cloop2 file. */
S->image_fd = open(image_pathname, O_RDONLY);
if (S->image_fd == -1)
err(1, "open(%s)", image_pathname);
int oflags;
if (!ISSET(O->flags, FLAG_r))
oflags = (O_WRONLY | O_TRUNC | O_CREAT);
else if (!ISSET(O->flags, FLAG_R))
oflags = (O_RDWR | O_CREAT);
else
oflags = O_RDWR;
S->cloop2_fd = open(cloop2_pathname, oflags, 0777);
if (S->cloop2_fd == -1)
err(1, "open(%s)", cloop2_pathname);
/* Find the size of the input image. */
if (ISSET(O->flags, FLAG_l)) {
S->size = O->length;
} else {
static const struct stat zero_st;
struct stat st = zero_st;
if (fstat(S->image_fd, &st) == -1)
err(1, "stat(%s)", image_pathname);
if (st.st_size <= 0)
errx(1, "unknown image size");
assert(st.st_size >= 0);
__CTASSERT(OFF_MAX <= UINT64_MAX);
assert(__type_fit(uint64_t, st.st_size));
S->size = st.st_size;
}
assert(S->size <= OFF_MAX);
/* Find number of full blocks and whether there's a partial block. */
__CTASSERT(0 < MIN_BLOCKSIZE);
assert(0 < S->blocksize);
if (TOOMANY(off_t, (off_t)S->size, (off_t)S->blocksize,
(off_t)MAX_N_BLOCKS))
errx(1, "image too large for block size %"PRIu32": %"PRIu64,
S->blocksize, S->size);
__CTASSERT(MAX_N_BLOCKS <= UINT32_MAX);
S->n_full_blocks = S->size/S->blocksize;
S->n_blocks = HOWMANY(S->size, S->blocksize);
assert(S->n_full_blocks <= S->n_blocks);
assert(S->n_blocks <= MAX_N_BLOCKS);
/* Choose a window size. */
const uint32_t window_size = (ISSET(O->flags, FLAG_w)? O->window_size :
DEF_WINDOW_SIZE);
/* Create an offset table for the blocks; one extra for the end. */
__CTASSERT(ADD_OK(uint32_t, MAX_N_BLOCKS, 1));
S->n_offsets = (S->n_blocks + 1);
__CTASSERT(MAX_N_OFFSETS == (MAX_N_BLOCKS + 1));
__CTASSERT(MUL_OK(size_t, MAX_N_OFFSETS, sizeof(uint64_t)));
__CTASSERT(CLOOP2_OFFSET_TABLE_OFFSET <= OFFTAB_MAX_FDPOS);
offtab_init(&S->offtab, S->n_offsets, window_size, S->cloop2_fd,
CLOOP2_OFFSET_TABLE_OFFSET);
/* Attempt to restart a partial transfer if requested. */
if (ISSET(O->flags, FLAG_r)) {
if (compress_restart(S)) {
/*
* Restart succeeded. Truncate the output
* here, in case any garbage got appended. We
* are committed to making progress at this
* point. If the ftruncate fails, we don't
* lose anything valuable -- this is the last
* point at which we can restart anyway.
*/
if (ftruncate(S->cloop2_fd, S->offset) == -1)
err(1, "ftruncate failed");
/* All set! No more initialization to do. */
return;
} else {
/* Restart failed. Barf now if requested. */
if (ISSET(O->flags, FLAG_R))
errx(1, "restart failed, aborting");
/* Otherwise, truncate and start at the top. */
if (ftruncate(S->cloop2_fd, 0) == -1)
err(1, "truncate failed");
if (lseek(S->cloop2_fd, 0, SEEK_SET) == -1)
err(1, "lseek to cloop2 beginning failed");
/* If we seeked in the input, rewind. */
if (S->blkno != 0) {
if (lseek(S->image_fd, 0, SEEK_SET) == -1)
err(1,
"lseek to image beginning failed");
}
}
}
/* Write a bogus (zero) header for now, until we checkpoint. */
static const struct cloop2_header zero_header;
const ssize_t h_written = write(S->cloop2_fd, &zero_header,
sizeof(zero_header));
if (h_written == -1)
err(1, "write header");
assert(h_written >= 0);
if ((size_t)h_written != sizeof(zero_header))
errx(1, "partial write of header: %zu != %zu",
(size_t)h_written, sizeof(zero_header));
/* Reset the offset table to be empty and write it. */
offtab_reset_write(&S->offtab);
/* Start at the beginning of the image. */
S->blkno = 0;
S->offset = (sizeof(struct cloop2_header) +
((uint64_t)S->n_offsets * sizeof(uint64_t)));
S->n_checkpointed_blocks = 0;
/* Good to go and ready for interruption by a signal. */
S->initialized = 1;
}
/*
* Try to recover state from an existing output file.
*
* On success, fill the offset table with what's in the file, set
* S->blkno and S->offset to reflect our position, and seek to the
* respective positions in the input and output files.
*
* On failure, return false. May clobber the offset table, S->blkno,
* S->offset, and the file pointers.
*/
static bool
compress_restart(struct compress_state *S)
{
/* Read in the header. */
static const struct cloop2_header zero_header;
struct cloop2_header header = zero_header;
const ssize_t h_read = read_block(S->cloop2_fd, &header,
sizeof(header));
if (h_read == -1) {
warn("failed to read header");
return false;
}
assert(h_read >= 0);
if ((size_t)h_read != sizeof(header)) {
warnx("partial read of header");
return false;
}
/* Check that the header looks like a header. */
__CTASSERT(sizeof(cloop2_magic) <= sizeof(header.cl2h_magic));
if (memcmp(header.cl2h_magic, cloop2_magic, sizeof(cloop2_magic))
!= 0) {
warnx("bad cloop2 shell script magic");
return false;
}
/* Check the header parameters. */
if (be32toh(header.cl2h_blocksize) != S->blocksize) {
warnx("mismatched block size: %"PRIu32
" (expected %"PRIu32")",
be32toh(header.cl2h_blocksize), S->blocksize);
return false;
}
if (be32toh(header.cl2h_n_blocks) != S->n_blocks) {
warnx("mismatched number of blocks: %"PRIu32
" (expected %"PRIu32")",
be32toh(header.cl2h_n_blocks), S->n_blocks);
return false;
}
/* Read in the partial offset table. */
if (!offtab_reset_read(&S->offtab, &warn, &warnx))
return false;
if (!offtab_prepare_get(&S->offtab, 0))
return false;
const uint64_t first_offset = offtab_get(&S->offtab, 0);
__CTASSERT(MUL_OK(uint64_t, MAX_N_OFFSETS, sizeof(uint64_t)));
__CTASSERT(ADD_OK(uint64_t, sizeof(struct cloop2_header),
MAX_N_OFFSETS*sizeof(uint64_t)));
const uint64_t expected = sizeof(struct cloop2_header) +
((uint64_t)S->n_offsets * sizeof(uint64_t));
if (first_offset != expected) {
warnx("first offset is not 0x%"PRIx64": 0x%"PRIx64,
expected, first_offset);
return false;
}
/* Find where we left off. */
__CTASSERT(MAX_N_OFFSETS <= UINT32_MAX);
uint32_t blkno = 0;
uint64_t last_offset = first_offset;
for (blkno = 0; blkno < S->n_blocks; blkno++) {
if (!offtab_prepare_get(&S->offtab, blkno))
return false;
const uint64_t offset = offtab_get(&S->offtab, blkno);
if (offset == ~(uint64_t)0)
break;
if (0 < blkno) {
const uint64_t start = last_offset;
const uint64_t end = offset;
if (end <= start) {
warnx("bad offset table: 0x%"PRIx64
", 0x%"PRIx64, start, end);
return false;
}
/* XXX compression ratio bound */
__CTASSERT(MUL_OK(size_t, 2, MAX_BLOCKSIZE));
if ((2 * (size_t)S->blocksize) <= (end - start)) {
warnx("block %"PRIu32" too large:"
" %"PRIu64" bytes"
" from 0x%"PRIx64" to 0x%"PRIx64,
blkno, (end - start), start, end);
return false;
}
}
last_offset = offset;
}
if (blkno == 0) {
warnx("no blocks were written; nothing to restart");
return false;
}
/* Make sure the rest of the offset table is all ones. */
if (blkno < S->n_blocks) {
uint32_t nblkno;
for (nblkno = blkno; nblkno < S->n_blocks; nblkno++) {
if (!offtab_prepare_get(&S->offtab, nblkno))
return false;
const uint64_t offset = offtab_get(&S->offtab, nblkno);
if (offset != ~(uint64_t)0) {
warnx("bad partial offset table entry"
" at %"PRIu32": 0x%"PRIx64,
nblkno, offset);
return false;
}
}
}
/*
* XXX Consider decompressing some number of blocks to make
* sure they match.
*/
/* Back up by one. */
assert(1 <= blkno);
blkno -= 1;
/* Seek to the output position. */
assert(last_offset <= OFF_MAX);
if (lseek(S->cloop2_fd, last_offset, SEEK_SET) == -1) {
warn("lseek output cloop2 to %"PRIx64" failed", last_offset);
return false;
}
/* Switch from reading to writing the offset table. */
if (!offtab_transmogrify_read_to_write(&S->offtab, blkno))
return false;
/*
* Seek to the input position last, after all other possible
* failures, because if the input is a pipe, we can't change
* our mind, rewind, and start at the beginning instead of
* restarting.
*/
assert(S->size <= OFF_MAX);
assert(blkno <= (S->size / S->blocksize));
const off_t restart_position = ((off_t)blkno * (off_t)S->blocksize);
assert(0 <= restart_position);
assert(restart_position <= (off_t)S->size);
if (lseek(S->image_fd, restart_position, SEEK_SET) == -1) {
if (errno != ESPIPE) {
warn("lseek input image failed");
return false;
}
/* Try read instead of lseek for a pipe/socket/fifo. */
void *const buffer = malloc(0x10000);
if (buffer == NULL)
err(1, "malloc temporary buffer");
off_t left = restart_position;
while (left > 0) {
const size_t size = MIN(0x10000, left);
const ssize_t n_read = read_block(S->image_fd, buffer,
size);
if (n_read == -1) {
free(buffer);
warn("read of input image failed");
return false;
}
assert(n_read >= 0);
if ((size_t)n_read != size) {
free(buffer);
warnx("partial read of input image");
return false;
}
assert((off_t)size <= left);
left -= size;
}
free(buffer);
}
/* Start where we left off. */
S->blkno = blkno;
S->offset = last_offset;
S->n_checkpointed_blocks = blkno;
/* Good to go and ready for interruption by a signal. */
S->initialized = 1;
/* Success! */
return true;
}
/*
* Read a single block, compress it, and write the compressed block.
* Return the size of the compressed block.
*/
static uint32_t
compress_block(int in_fd, int out_fd, uint32_t blkno, uint32_t blocksize,
uint32_t readsize, void *uncompbuf, void *compbuf)
{
assert(readsize <= blocksize);
assert(blocksize <= MAX_BLOCKSIZE);
/* Read the uncompressed block. */
const ssize_t n_read = read_block(in_fd, uncompbuf, readsize);
if (n_read == -1)
err(1, "read block %"PRIu32, blkno);
assert(n_read >= 0);
if ((size_t)n_read != readsize)
errx(1, "partial read of block %"PRIu32": %zu != %"PRIu32,
blkno, (size_t)n_read, readsize);
/* Compress the block. */
/* XXX compression ratio bound */
__CTASSERT(MUL_OK(unsigned long, 2, MAX_BLOCKSIZE));
const unsigned long uncomplen =
(VNDCOMPRESS_COMPAT? blocksize : readsize); /* XXX */
unsigned long complen = (uncomplen * 2);
const int zerror = compress2(compbuf, &complen, uncompbuf, uncomplen,
Z_BEST_COMPRESSION);
if (zerror != Z_OK)
errx(1, "compressed failed at block %"PRIu32" (%d): %s", blkno,
zerror, zError(zerror));
assert(complen <= (uncomplen * 2));
/* Write the compressed block. */
const ssize_t n_written = write(out_fd, compbuf, complen);
if (n_written == -1)
err(1, "write block %"PRIu32, blkno);
assert(n_written >= 0);
if ((size_t)n_written != complen)
errx(1, "partial write of block %"PRIu32": %zu != %lu",
blkno, (size_t)n_written, complen);
return (size_t)n_written;
}
/*
* Checkpoint if appropriate.
*/
static void
compress_maybe_checkpoint(struct compress_state *S)
{
if ((0 < S->checkpoint_blocks) && (0 < S->blkno) &&
((S->blkno % S->checkpoint_blocks) == 0)) {
assert(S->offset <= OFF_MAX);
assert((off_t)S->offset == lseek(S->cloop2_fd, 0, SEEK_CUR));
compress_checkpoint(S);
}
}
/*
* Write the prefix of the offset table that we have filled so far.
*
* We fsync the data blocks we have written, and then write the offset
* table, and then fsync the offset table and file metadata. This
* should help to avoid offset tables that point at garbage data.
*
* This may be called from a signal handler, so it must not use stdio,
* malloc, &c. -- it may only (a) handle signal-safe state in S, and
* (b) do file descriptor I/O / fsync.
*
* XXX This requires further thought and heavy testing to be sure.
*
* XXX Should have an option to suppress fsync.
*
* XXX Should have an option to fail on fsync failures.
*
* XXX Would be nice if we could just do a barrier rather than an
* fsync.
*
* XXX How might we automatically test the fsyncs?
*/
static void
compress_checkpoint(struct compress_state *S)
{
assert(S->blkno < S->n_offsets);
const uint32_t n_offsets = (S->blkno + 1);
assert(n_offsets <= S->n_offsets);
assert(S->offset <= OFF_MAX);
assert((off_t)S->offset <= lseek(S->cloop2_fd, 0, SEEK_CUR));
/* Make sure the data hits the disk before we say it's ready. */
if (fsync_range(S->cloop2_fd, (FFILESYNC | FDISKSYNC), 0, S->offset)
== -1)
warn_ss("fsync of output failed");
/* Say the data blocks are ready. */
offtab_checkpoint(&S->offtab, n_offsets,
(S->n_checkpointed_blocks == 0? OFFTAB_CHECKPOINT_SYNC : 0));
/*
* If this is the first checkpoint, initialize the header.
* Signal handler can race with main code here, but it is
* harmless -- just an extra fsync and write of the header,
* which are both idempotent.
*
* Once we have synchronously checkpointed the offset table,
* subsequent writes will preserve a valid state.
*/
if (S->n_checkpointed_blocks == 0) {
static const struct cloop2_header zero_header;
struct cloop2_header header = zero_header;
/* Format the header. */
__CTASSERT(sizeof(cloop2_magic) <= sizeof(header.cl2h_magic));
(void)memcpy(header.cl2h_magic, cloop2_magic,
sizeof(cloop2_magic));
header.cl2h_blocksize = htobe32(S->blocksize);
header.cl2h_n_blocks = htobe32(S->n_blocks);
/* Write the header. */
const ssize_t h_written = pwrite(S->cloop2_fd, &header,
sizeof(header), 0);
if (h_written == -1)
err_ss(1, "write header");
assert(h_written >= 0);
if ((size_t)h_written != sizeof(header))
errx_ss(1, "partial write of header: %zu != %zu",
(size_t)h_written, sizeof(header));
}
/* Record how many blocks we've checkpointed. */
{
sigset_t old_sigmask;
block_signals(&old_sigmask);
S->n_checkpointed_blocks = S->blkno;
restore_sigmask(&old_sigmask);
}
}
/*
* Release everything we allocated in compress_init.
*/
static void
compress_exit(struct compress_state *S)
{
/* Done with the offset table. Destroy it. */
offtab_destroy(&S->offtab);
/* Done with the files. Close them. */
if (close(S->cloop2_fd) == -1)
warn("close(cloop2 fd)");
if (close(S->image_fd) == -1)
warn("close(image fd)");
}