From 40b41259f0d3ac57f437015c62be7e69c388fc1e Mon Sep 17 00:00:00 2001 From: christos Date: Sun, 19 Jun 2011 00:43:54 +0000 Subject: [PATCH] Add lzma (.xz) support. Somehow this does not decode after the first read yet. --- usr.bin/gzip/Makefile | 6 +-- usr.bin/gzip/gzip.c | 65 +++++++++++++++++++----- usr.bin/gzip/unxz.c | 113 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 168 insertions(+), 16 deletions(-) create mode 100644 usr.bin/gzip/unxz.c diff --git a/usr.bin/gzip/Makefile b/usr.bin/gzip/Makefile index a66e77d4d744..2135a2a0946f 100644 --- a/usr.bin/gzip/Makefile +++ b/usr.bin/gzip/Makefile @@ -1,12 +1,12 @@ -# $NetBSD: Makefile,v 1.13 2009/04/14 22:15:20 lukem Exp $ +# $NetBSD: Makefile,v 1.14 2011/06/19 00:43:54 christos Exp $ USE_FORT?= yes # data-driven bugs? PROG= gzip MAN= gzip.1 gzexe.1 zdiff.1 zforce.1 zgrep.1 zmore.1 znew.1 -DPADD= ${LIBZ} ${LIBBZ2} -LDADD= -lz -lbz2 +DPADD= ${LIBZ} ${LIBBZ2} ${LIBLZMA} +LDADD= -lz -lbz2 -llzma SCRIPTS= gzexe zdiff zforce zgrep zmore znew diff --git a/usr.bin/gzip/gzip.c b/usr.bin/gzip/gzip.c index bd5f2ef32c17..4b771ec87039 100644 --- a/usr.bin/gzip/gzip.c +++ b/usr.bin/gzip/gzip.c @@ -1,4 +1,4 @@ -/* $NetBSD: gzip.c,v 1.99 2011/03/23 12:59:44 tsutsui Exp $ */ +/* $NetBSD: gzip.c,v 1.100 2011/06/19 00:43:54 christos Exp $ */ /* * Copyright (c) 1997, 1998, 2003, 2004, 2006 Matthew R. Green @@ -30,7 +30,7 @@ #ifndef lint __COPYRIGHT("@(#) Copyright (c) 1997, 1998, 2003, 2004, 2006\ Matthew R. Green. All rights reserved."); -__RCSID("$NetBSD: gzip.c,v 1.99 2011/03/23 12:59:44 tsutsui Exp $"); +__RCSID("$NetBSD: gzip.c,v 1.100 2011/06/19 00:43:54 christos Exp $"); #endif /* not lint */ /* @@ -80,6 +80,9 @@ enum filetype { #endif #ifndef NO_PACK_SUPPORT FT_PACK, +#endif +#ifndef NO_XZ_SUPPORT + FT_XZ, #endif FT_LAST, FT_UNKNOWN @@ -101,6 +104,12 @@ enum filetype { #define PACK_MAGIC "\037\036" #endif +#ifndef NO_XZ_SUPPORT +#include +#define XZ_SUFFIX ".xz" +#define XZ_MAGIC "\3757zXZ" +#endif + #define GZ_SUFFIX ".gz" #define BUFLEN (64 * 1024) @@ -227,6 +236,10 @@ static off_t zuncompress(FILE *, FILE *, char *, size_t, off_t *); static off_t unpack(int, int, char *, size_t, off_t *); #endif +#ifndef NO_XZ_SUPPORT +static off_t unxz(int, int, char *, size_t, off_t *); +#endif + int main(int, char *p[]); #ifdef SMALL @@ -1096,6 +1109,11 @@ file_gettype(u_char *buf) if (memcmp(buf, PACK_MAGIC, 2) == 0) return FT_PACK; else +#endif +#ifndef NO_XZ_SUPPORT + if (memcmp(buf, XZ_MAGIC, 4) == 0) /* XXX: We only have 4 bytes */ + return FT_XZ; + else #endif return FT_UNKNOWN; } @@ -1326,7 +1344,6 @@ file_uncompress(char *file, char *outfile, size_t outsize) } method = file_gettype(header1); - #ifndef SMALL if (fflag == 0 && method == FT_UNKNOWN) { maybe_warnx("%s: not in gzip format", file); @@ -1401,9 +1418,9 @@ file_uncompress(char *file, char *outfile, size_t outsize) } else zfd = STDOUT_FILENO; + switch (method) { #ifndef NO_BZIP2_SUPPORT - if (method == FT_BZIP2) { - + case FT_BZIP2: /* XXX */ if (lflag) { maybe_warnx("no -l with bzip2 files"); @@ -1411,11 +1428,11 @@ file_uncompress(char *file, char *outfile, size_t outsize) } size = unbzip2(fd, zfd, NULL, 0, NULL); - } else + break; #endif #ifndef NO_COMPRESS_SUPPORT - if (method == FT_Z) { + case FT_Z: { FILE *in, *out; /* XXX */ @@ -1448,30 +1465,42 @@ file_uncompress(char *file, char *outfile, size_t outsize) unlink(outfile); goto lose; } - } else + break; + } #endif #ifndef NO_PACK_SUPPORT - if (method == FT_PACK) { + case FT_PACK: if (lflag) { maybe_warnx("no -l with packed files"); goto lose; } size = unpack(fd, zfd, NULL, 0, NULL); - } else + break; +#endif + +#ifndef NO_XZ_SUPPORT + case FT_XZ: + if (lflag) { + maybe_warnx("no -l with xz files"); + goto lose; + } + + size = unxz(fd, zfd, NULL, 0, NULL); + break; #endif #ifndef SMALL - if (method == FT_UNKNOWN) { + case FT_UNKNOWN: if (lflag) { maybe_warnx("no -l for unknown filetypes"); goto lose; } size = cat_fd(NULL, 0, NULL, fd); - } else + break; #endif - { + default: if (lflag) { print_list(fd, isb.st_size, outfile, isb.st_mtime); close(fd); @@ -1479,6 +1508,7 @@ file_uncompress(char *file, char *outfile, size_t outsize) } size = gz_uncompress(fd, zfd, NULL, 0, NULL, file); + break; } if (close(fd) != 0) @@ -1660,6 +1690,12 @@ handle_stdin(void) usize = unpack(STDIN_FILENO, STDOUT_FILENO, (char *)header1, sizeof header1, &gsize); break; +#endif +#ifndef NO_XZ_SUPPORT + case FT_XZ: + usize = unxz(STDIN_FILENO, STDOUT_FILENO, + (char *)header1, sizeof header1, &gsize); + break; #endif } @@ -2037,6 +2073,9 @@ display_version(void) #ifndef NO_PACK_SUPPORT #include "unpack.c" #endif +#ifndef NO_XZ_SUPPORT +#include "unxz.c" +#endif static ssize_t read_retry(int fd, void *buf, size_t sz) diff --git a/usr.bin/gzip/unxz.c b/usr.bin/gzip/unxz.c new file mode 100644 index 000000000000..02ce0ed2e217 --- /dev/null +++ b/usr.bin/gzip/unxz.c @@ -0,0 +1,113 @@ + +#include +#include +#include +#include +#include + +static off_t +unxz(int i, int o, char *pre, size_t prelen, off_t *bytes_in) +{ + lzma_stream strm = LZMA_STREAM_INIT; + lzma_ret ret; + off_t x = 0; + + // Initialize the decoder + ret = lzma_alone_decoder(&strm, UINT64_MAX); + if (ret != LZMA_OK) { + errno = ret == LZMA_MEM_ERROR ? ENOMEM : EINVAL; + maybe_errx("Cannot initialize decoder"); + } + + // Input and output buffers + uint8_t ibuf[BUFSIZ]; + uint8_t obuf[BUFSIZ]; + + *bytes_in = prelen; + strm.next_in = ibuf; + strm.avail_in = read(i, ibuf + prelen, sizeof(ibuf) - prelen); + if (strm.avail_in == (size_t)-1) + maybe_errx("Read failed"); + + memcpy(ibuf, pre, prelen); + *bytes_in += strm.avail_in; + + strm.next_out = obuf; + strm.avail_out = sizeof(obuf); + if ((ret = lzma_stream_decoder(&strm, UINT64_MAX, 0)) != LZMA_OK) + maybe_errx("Can't initialize decoder"); + + for (;;) { + if (strm.avail_in == 0) { + strm.next_in = ibuf; + strm.avail_in = read(i, ibuf, sizeof(ibuf)); +// fprintf(stderr, "read = %zu\n", strm.avail_in); + if (strm.avail_in == (size_t)-1) + maybe_errx("Read failed"); + } + + ret = lzma_code(&strm, LZMA_RUN); +// fprintf(stderr, "ret = %d %zu %zu\n", ret, strm.avail_in, strm.avail_out); + + // Write and check write error before checking decoder error. + // This way as much data as possible gets written to output + // even if decoder detected an error. + if (strm.avail_out == 0 || ret != LZMA_OK) { + const size_t write_size = sizeof(obuf) - strm.avail_out; + + if (write(o, obuf, write_size) != (ssize_t)write_size) + maybe_err("write failed"); + + strm.next_out = obuf; + strm.avail_out = sizeof(obuf); + x += write_size; + } + + if (ret != LZMA_OK) { + if (ret == LZMA_STREAM_END) { + // Check that there's no trailing garbage. + if (strm.avail_in != 0 || read(i, ibuf, 1)) + ret = LZMA_DATA_ERROR; + else { + lzma_end(&strm); + return x; + } + } + + const char *msg; + switch (ret) { + case LZMA_MEM_ERROR: + msg = strerror(ENOMEM); + break; + + case LZMA_FORMAT_ERROR: + msg = "File format not recognized"; + break; + + case LZMA_OPTIONS_ERROR: + // FIXME: Better message? + msg = "Unsupported compression options"; + break; + + case LZMA_DATA_ERROR: + msg = "File is corrupt"; + break; + + case LZMA_BUF_ERROR: + msg = "Unexpected end of input"; + break; + + case LZMA_MEMLIMIT_ERROR: + msg = "Reached memory limit"; + break; + + + default: + msg = "Internal error (bug)"; + break; + } + + maybe_errx("%s", msg); + } + } +}