476 lines
12 KiB
C
476 lines
12 KiB
C
/* $NetBSD: unxz.c,v 1.8 2018/10/06 16:36:45 martin Exp $ */
|
|
|
|
/*-
|
|
* Copyright (c) 2011 The NetBSD Foundation, Inc.
|
|
* All rights reserved.
|
|
*
|
|
* This code is derived from software contributed to The NetBSD Foundation
|
|
* by Christos Zoulas.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
|
|
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
|
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
|
|
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
* POSSIBILITY OF SUCH DAMAGE.
|
|
*/
|
|
#include <sys/cdefs.h>
|
|
__RCSID("$NetBSD: unxz.c,v 1.8 2018/10/06 16:36:45 martin Exp $");
|
|
|
|
#include <stdarg.h>
|
|
#include <errno.h>
|
|
#include <stdio.h>
|
|
#include <unistd.h>
|
|
#include <lzma.h>
|
|
|
|
static off_t
|
|
unxz(int i, int o, char *pre, size_t prelen, off_t *bytes_in)
|
|
{
|
|
lzma_stream strm = LZMA_STREAM_INIT;
|
|
static const int flags = LZMA_TELL_UNSUPPORTED_CHECK|LZMA_CONCATENATED;
|
|
lzma_ret ret;
|
|
lzma_action action = LZMA_RUN;
|
|
off_t bytes_out, bp;
|
|
uint8_t ibuf[BUFSIZ];
|
|
uint8_t obuf[BUFSIZ];
|
|
|
|
if (bytes_in == NULL)
|
|
bytes_in = &bp;
|
|
|
|
strm.next_in = ibuf;
|
|
memcpy(ibuf, pre, prelen);
|
|
strm.avail_in = read(i, ibuf + prelen, sizeof(ibuf) - prelen);
|
|
if (strm.avail_in == (size_t)-1)
|
|
maybe_err("read failed");
|
|
infile_newdata(strm.avail_in);
|
|
strm.avail_in += prelen;
|
|
*bytes_in = strm.avail_in;
|
|
|
|
if ((ret = lzma_stream_decoder(&strm, UINT64_MAX, flags)) != LZMA_OK)
|
|
maybe_errx("Can't initialize decoder (%d)", ret);
|
|
|
|
strm.next_out = NULL;
|
|
strm.avail_out = 0;
|
|
if ((ret = lzma_code(&strm, LZMA_RUN)) != LZMA_OK)
|
|
maybe_errx("Can't read headers (%d)", ret);
|
|
|
|
bytes_out = 0;
|
|
strm.next_out = obuf;
|
|
strm.avail_out = sizeof(obuf);
|
|
|
|
for (;;) {
|
|
check_siginfo();
|
|
if (strm.avail_in == 0) {
|
|
strm.next_in = ibuf;
|
|
strm.avail_in = read(i, ibuf, sizeof(ibuf));
|
|
switch (strm.avail_in) {
|
|
case (size_t)-1:
|
|
maybe_err("read failed");
|
|
/*NOTREACHED*/
|
|
case 0:
|
|
action = LZMA_FINISH;
|
|
break;
|
|
default:
|
|
infile_newdata(strm.avail_in);
|
|
*bytes_in += strm.avail_in;
|
|
break;
|
|
}
|
|
}
|
|
|
|
ret = lzma_code(&strm, action);
|
|
|
|
// Write and check write error before checking decoder error.
|
|
// This way as much data as possible gets written to output
|
|
// even if decoder detected an error.
|
|
if (strm.avail_out == 0 || ret != LZMA_OK) {
|
|
const size_t write_size = sizeof(obuf) - strm.avail_out;
|
|
|
|
if (write(o, obuf, write_size) != (ssize_t)write_size)
|
|
maybe_err("write failed");
|
|
|
|
strm.next_out = obuf;
|
|
strm.avail_out = sizeof(obuf);
|
|
bytes_out += write_size;
|
|
}
|
|
|
|
if (ret != LZMA_OK) {
|
|
if (ret == LZMA_STREAM_END) {
|
|
// Check that there's no trailing garbage.
|
|
if (strm.avail_in != 0 || read(i, ibuf, 1))
|
|
ret = LZMA_DATA_ERROR;
|
|
else {
|
|
lzma_end(&strm);
|
|
return bytes_out;
|
|
}
|
|
}
|
|
|
|
const char *msg;
|
|
switch (ret) {
|
|
case LZMA_MEM_ERROR:
|
|
msg = strerror(ENOMEM);
|
|
break;
|
|
|
|
case LZMA_FORMAT_ERROR:
|
|
msg = "File format not recognized";
|
|
break;
|
|
|
|
case LZMA_OPTIONS_ERROR:
|
|
// FIXME: Better message?
|
|
msg = "Unsupported compression options";
|
|
break;
|
|
|
|
case LZMA_DATA_ERROR:
|
|
msg = "File is corrupt";
|
|
break;
|
|
|
|
case LZMA_BUF_ERROR:
|
|
msg = "Unexpected end of input";
|
|
break;
|
|
|
|
case LZMA_MEMLIMIT_ERROR:
|
|
msg = "Reached memory limit";
|
|
break;
|
|
|
|
default:
|
|
maybe_errx("Unknown error (%d)", ret);
|
|
break;
|
|
}
|
|
maybe_errx("%s", msg);
|
|
|
|
}
|
|
}
|
|
}
|
|
|
|
#include <stdbool.h>
|
|
|
|
/*
|
|
* Copied various bits and pieces from xz support code or brute force
|
|
* replacements.
|
|
*/
|
|
|
|
#define my_min(A,B) ((A)<(B)?(A):(B))
|
|
|
|
// Some systems have suboptimal BUFSIZ. Use a bit bigger value on them.
|
|
// We also need that IO_BUFFER_SIZE is a multiple of 8 (sizeof(uint64_t))
|
|
#if BUFSIZ <= 1024
|
|
# define IO_BUFFER_SIZE 8192
|
|
#else
|
|
# define IO_BUFFER_SIZE (BUFSIZ & ~7U)
|
|
#endif
|
|
|
|
/// is_sparse() accesses the buffer as uint64_t for maximum speed.
|
|
/// Use an union to make sure that the buffer is properly aligned.
|
|
typedef union {
|
|
uint8_t u8[IO_BUFFER_SIZE];
|
|
uint32_t u32[IO_BUFFER_SIZE / sizeof(uint32_t)];
|
|
uint64_t u64[IO_BUFFER_SIZE / sizeof(uint64_t)];
|
|
} io_buf;
|
|
|
|
|
|
static bool
|
|
io_pread(int fd, io_buf *buf, size_t size, off_t pos)
|
|
{
|
|
// Using lseek() and read() is more portable than pread() and
|
|
// for us it is as good as real pread().
|
|
if (lseek(fd, pos, SEEK_SET) != pos) {
|
|
return true;
|
|
}
|
|
|
|
const size_t amount = read(fd, buf, size);
|
|
if (amount == SIZE_MAX)
|
|
return true;
|
|
|
|
if (amount != size) {
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
/*
|
|
* Most of the following is copied (mostly verbatim) from the xz
|
|
* distribution, from file src/xz/list.c
|
|
*/
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
//
|
|
/// \file list.c
|
|
/// \brief Listing information about .xz files
|
|
//
|
|
// Author: Lasse Collin
|
|
//
|
|
// This file has been put into the public domain.
|
|
// You can do whatever you want with this file.
|
|
//
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
|
|
|
|
/// Information about a .xz file
|
|
typedef struct {
|
|
/// Combined Index of all Streams in the file
|
|
lzma_index *idx;
|
|
|
|
/// Total amount of Stream Padding
|
|
uint64_t stream_padding;
|
|
|
|
/// Highest memory usage so far
|
|
uint64_t memusage_max;
|
|
|
|
/// True if all Blocks so far have Compressed Size and
|
|
/// Uncompressed Size fields
|
|
bool all_have_sizes;
|
|
|
|
/// Oldest XZ Utils version that will decompress the file
|
|
uint32_t min_version;
|
|
|
|
} xz_file_info;
|
|
|
|
#define XZ_FILE_INFO_INIT { NULL, 0, 0, true, 50000002 }
|
|
|
|
|
|
/// \brief Parse the Index(es) from the given .xz file
|
|
///
|
|
/// \param xfi Pointer to structure where the decoded information
|
|
/// is stored.
|
|
/// \param pair Input file
|
|
///
|
|
/// \return On success, false is returned. On error, true is returned.
|
|
///
|
|
// TODO: This function is pretty big. liblzma should have a function that
|
|
// takes a callback function to parse the Index(es) from a .xz file to make
|
|
// it easy for applications.
|
|
static bool
|
|
parse_indexes(xz_file_info *xfi, int src_fd)
|
|
{
|
|
struct stat st;
|
|
|
|
fstat(src_fd, &st);
|
|
if (st.st_size <= 0) {
|
|
return true;
|
|
}
|
|
|
|
if (st.st_size < 2 * LZMA_STREAM_HEADER_SIZE) {
|
|
return true;
|
|
}
|
|
|
|
io_buf buf;
|
|
lzma_stream_flags header_flags;
|
|
lzma_stream_flags footer_flags;
|
|
lzma_ret ret;
|
|
|
|
// lzma_stream for the Index decoder
|
|
lzma_stream strm = LZMA_STREAM_INIT;
|
|
|
|
// All Indexes decoded so far
|
|
lzma_index *combined_index = NULL;
|
|
|
|
// The Index currently being decoded
|
|
lzma_index *this_index = NULL;
|
|
|
|
// Current position in the file. We parse the file backwards so
|
|
// initialize it to point to the end of the file.
|
|
off_t pos = st.st_size;
|
|
|
|
// Each loop iteration decodes one Index.
|
|
do {
|
|
// Check that there is enough data left to contain at least
|
|
// the Stream Header and Stream Footer. This check cannot
|
|
// fail in the first pass of this loop.
|
|
if (pos < 2 * LZMA_STREAM_HEADER_SIZE) {
|
|
goto error;
|
|
}
|
|
|
|
pos -= LZMA_STREAM_HEADER_SIZE;
|
|
lzma_vli stream_padding = 0;
|
|
|
|
// Locate the Stream Footer. There may be Stream Padding which
|
|
// we must skip when reading backwards.
|
|
while (true) {
|
|
if (pos < LZMA_STREAM_HEADER_SIZE) {
|
|
goto error;
|
|
}
|
|
|
|
if (io_pread(src_fd, &buf,
|
|
LZMA_STREAM_HEADER_SIZE, pos))
|
|
goto error;
|
|
|
|
// Stream Padding is always a multiple of four bytes.
|
|
int i = 2;
|
|
if (buf.u32[i] != 0)
|
|
break;
|
|
|
|
// To avoid calling io_pread() for every four bytes
|
|
// of Stream Padding, take advantage that we read
|
|
// 12 bytes (LZMA_STREAM_HEADER_SIZE) already and
|
|
// check them too before calling io_pread() again.
|
|
do {
|
|
stream_padding += 4;
|
|
pos -= 4;
|
|
--i;
|
|
} while (i >= 0 && buf.u32[i] == 0);
|
|
}
|
|
|
|
// Decode the Stream Footer.
|
|
ret = lzma_stream_footer_decode(&footer_flags, buf.u8);
|
|
if (ret != LZMA_OK) {
|
|
goto error;
|
|
}
|
|
|
|
// Check that the Stream Footer doesn't specify something
|
|
// that we don't support. This can only happen if the xz
|
|
// version is older than liblzma and liblzma supports
|
|
// something new.
|
|
//
|
|
// It is enough to check Stream Footer. Stream Header must
|
|
// match when it is compared against Stream Footer with
|
|
// lzma_stream_flags_compare().
|
|
if (footer_flags.version != 0) {
|
|
goto error;
|
|
}
|
|
|
|
// Check that the size of the Index field looks sane.
|
|
lzma_vli index_size = footer_flags.backward_size;
|
|
if ((lzma_vli)(pos) < index_size + LZMA_STREAM_HEADER_SIZE) {
|
|
goto error;
|
|
}
|
|
|
|
// Set pos to the beginning of the Index.
|
|
pos -= index_size;
|
|
|
|
// Decode the Index.
|
|
ret = lzma_index_decoder(&strm, &this_index, UINT64_MAX);
|
|
if (ret != LZMA_OK) {
|
|
goto error;
|
|
}
|
|
|
|
do {
|
|
// Don't give the decoder more input than the
|
|
// Index size.
|
|
strm.avail_in = my_min(IO_BUFFER_SIZE, index_size);
|
|
if (io_pread(src_fd, &buf, strm.avail_in, pos))
|
|
goto error;
|
|
|
|
pos += strm.avail_in;
|
|
index_size -= strm.avail_in;
|
|
|
|
strm.next_in = buf.u8;
|
|
ret = lzma_code(&strm, LZMA_RUN);
|
|
|
|
} while (ret == LZMA_OK);
|
|
|
|
// If the decoding seems to be successful, check also that
|
|
// the Index decoder consumed as much input as indicated
|
|
// by the Backward Size field.
|
|
if (ret == LZMA_STREAM_END)
|
|
if (index_size != 0 || strm.avail_in != 0)
|
|
ret = LZMA_DATA_ERROR;
|
|
|
|
if (ret != LZMA_STREAM_END) {
|
|
// LZMA_BUFFER_ERROR means that the Index decoder
|
|
// would have liked more input than what the Index
|
|
// size should be according to Stream Footer.
|
|
// The message for LZMA_DATA_ERROR makes more
|
|
// sense in that case.
|
|
if (ret == LZMA_BUF_ERROR)
|
|
ret = LZMA_DATA_ERROR;
|
|
|
|
goto error;
|
|
}
|
|
|
|
// Decode the Stream Header and check that its Stream Flags
|
|
// match the Stream Footer.
|
|
pos -= footer_flags.backward_size + LZMA_STREAM_HEADER_SIZE;
|
|
if ((lzma_vli)(pos) < lzma_index_total_size(this_index)) {
|
|
goto error;
|
|
}
|
|
|
|
pos -= lzma_index_total_size(this_index);
|
|
if (io_pread(src_fd, &buf, LZMA_STREAM_HEADER_SIZE, pos))
|
|
goto error;
|
|
|
|
ret = lzma_stream_header_decode(&header_flags, buf.u8);
|
|
if (ret != LZMA_OK) {
|
|
goto error;
|
|
}
|
|
|
|
ret = lzma_stream_flags_compare(&header_flags, &footer_flags);
|
|
if (ret != LZMA_OK) {
|
|
goto error;
|
|
}
|
|
|
|
// Store the decoded Stream Flags into this_index. This is
|
|
// needed so that we can print which Check is used in each
|
|
// Stream.
|
|
ret = lzma_index_stream_flags(this_index, &footer_flags);
|
|
if (ret != LZMA_OK)
|
|
goto error;
|
|
|
|
// Store also the size of the Stream Padding field. It is
|
|
// needed to show the offsets of the Streams correctly.
|
|
ret = lzma_index_stream_padding(this_index, stream_padding);
|
|
if (ret != LZMA_OK)
|
|
goto error;
|
|
|
|
if (combined_index != NULL) {
|
|
// Append the earlier decoded Indexes
|
|
// after this_index.
|
|
ret = lzma_index_cat(
|
|
this_index, combined_index, NULL);
|
|
if (ret != LZMA_OK) {
|
|
goto error;
|
|
}
|
|
}
|
|
|
|
combined_index = this_index;
|
|
this_index = NULL;
|
|
|
|
xfi->stream_padding += stream_padding;
|
|
|
|
} while (pos > 0);
|
|
|
|
lzma_end(&strm);
|
|
|
|
// All OK. Make combined_index available to the caller.
|
|
xfi->idx = combined_index;
|
|
return false;
|
|
|
|
error:
|
|
// Something went wrong, free the allocated memory.
|
|
lzma_end(&strm);
|
|
lzma_index_end(combined_index, NULL);
|
|
lzma_index_end(this_index, NULL);
|
|
return true;
|
|
}
|
|
|
|
/***************** end of copy form list.c *************************/
|
|
|
|
/*
|
|
* Small wrapper to extract total length of a file
|
|
*/
|
|
off_t
|
|
unxz_len(int fd)
|
|
{
|
|
xz_file_info xfi = XZ_FILE_INFO_INIT;
|
|
if (!parse_indexes(&xfi, fd)) {
|
|
off_t res = lzma_index_uncompressed_size(xfi.idx);
|
|
lzma_index_end(xfi.idx, NULL);
|
|
return res;
|
|
}
|
|
return 0;
|
|
}
|
|
|