From 40b41259f0d3ac57f437015c62be7e69c388fc1e Mon Sep 17 00:00:00 2001
From: christos <christos@NetBSD.org>
Date: Sun, 19 Jun 2011 00:43:54 +0000
Subject: [PATCH] Add lzma (.xz) support. Somehow this does not decode after
 the first read yet.

---
 usr.bin/gzip/Makefile |   6 +--
 usr.bin/gzip/gzip.c   |  65 +++++++++++++++++++-----
 usr.bin/gzip/unxz.c   | 113 ++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 168 insertions(+), 16 deletions(-)
 create mode 100644 usr.bin/gzip/unxz.c

diff --git a/usr.bin/gzip/Makefile b/usr.bin/gzip/Makefile
index a66e77d4d744..2135a2a0946f 100644
--- a/usr.bin/gzip/Makefile
+++ b/usr.bin/gzip/Makefile
@@ -1,12 +1,12 @@
-#	$NetBSD: Makefile,v 1.13 2009/04/14 22:15:20 lukem Exp $
+#	$NetBSD: Makefile,v 1.14 2011/06/19 00:43:54 christos Exp $
 
 USE_FORT?= yes	# data-driven bugs?
 
 PROG=		gzip
 MAN=		gzip.1 gzexe.1 zdiff.1 zforce.1 zgrep.1 zmore.1 znew.1
 
-DPADD=		${LIBZ} ${LIBBZ2}
-LDADD=		-lz -lbz2
+DPADD=		${LIBZ} ${LIBBZ2} ${LIBLZMA}
+LDADD=		-lz -lbz2 -llzma
 
 SCRIPTS=	gzexe zdiff zforce zgrep zmore znew
 
diff --git a/usr.bin/gzip/gzip.c b/usr.bin/gzip/gzip.c
index bd5f2ef32c17..4b771ec87039 100644
--- a/usr.bin/gzip/gzip.c
+++ b/usr.bin/gzip/gzip.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: gzip.c,v 1.99 2011/03/23 12:59:44 tsutsui Exp $	*/
+/*	$NetBSD: gzip.c,v 1.100 2011/06/19 00:43:54 christos Exp $	*/
 
 /*
  * Copyright (c) 1997, 1998, 2003, 2004, 2006 Matthew R. Green
@@ -30,7 +30,7 @@
 #ifndef lint
 __COPYRIGHT("@(#) Copyright (c) 1997, 1998, 2003, 2004, 2006\
  Matthew R. Green.  All rights reserved.");
-__RCSID("$NetBSD: gzip.c,v 1.99 2011/03/23 12:59:44 tsutsui Exp $");
+__RCSID("$NetBSD: gzip.c,v 1.100 2011/06/19 00:43:54 christos Exp $");
 #endif /* not lint */
 
 /*
@@ -80,6 +80,9 @@ enum filetype {
 #endif
 #ifndef NO_PACK_SUPPORT
 	FT_PACK,
+#endif
+#ifndef NO_XZ_SUPPORT
+	FT_XZ,
 #endif
 	FT_LAST,
 	FT_UNKNOWN
@@ -101,6 +104,12 @@ enum filetype {
 #define PACK_MAGIC	"\037\036"
 #endif
 
+#ifndef NO_XZ_SUPPORT
+#include <lzma.h>
+#define XZ_SUFFIX	".xz"
+#define XZ_MAGIC	"\3757zXZ"
+#endif
+
 #define GZ_SUFFIX	".gz"
 
 #define BUFLEN		(64 * 1024)
@@ -227,6 +236,10 @@ static	off_t	zuncompress(FILE *, FILE *, char *, size_t, off_t *);
 static	off_t	unpack(int, int, char *, size_t, off_t *);
 #endif
 
+#ifndef NO_XZ_SUPPORT
+static	off_t	unxz(int, int, char *, size_t, off_t *);
+#endif
+
 int main(int, char *p[]);
 
 #ifdef SMALL
@@ -1096,6 +1109,11 @@ file_gettype(u_char *buf)
 	if (memcmp(buf, PACK_MAGIC, 2) == 0)
 		return FT_PACK;
 	else
+#endif
+#ifndef NO_XZ_SUPPORT
+	if (memcmp(buf, XZ_MAGIC, 4) == 0)	/* XXX: We only have 4 bytes */
+		return FT_XZ;
+	else
 #endif
 		return FT_UNKNOWN;
 }
@@ -1326,7 +1344,6 @@ file_uncompress(char *file, char *outfile, size_t outsize)
 	}
 
 	method = file_gettype(header1);
-
 #ifndef SMALL
 	if (fflag == 0 && method == FT_UNKNOWN) {
 		maybe_warnx("%s: not in gzip format", file);
@@ -1401,9 +1418,9 @@ file_uncompress(char *file, char *outfile, size_t outsize)
 	} else
 		zfd = STDOUT_FILENO;
 
+	switch (method) {
 #ifndef NO_BZIP2_SUPPORT
-	if (method == FT_BZIP2) {
-
+	case FT_BZIP2:
 		/* XXX */
 		if (lflag) {
 			maybe_warnx("no -l with bzip2 files");
@@ -1411,11 +1428,11 @@ file_uncompress(char *file, char *outfile, size_t outsize)
 		}
 
 		size = unbzip2(fd, zfd, NULL, 0, NULL);
-	} else
+		break;
 #endif
 
 #ifndef NO_COMPRESS_SUPPORT
-	if (method == FT_Z) {
+	case FT_Z: {
 		FILE *in, *out;
 
 		/* XXX */
@@ -1448,30 +1465,42 @@ file_uncompress(char *file, char *outfile, size_t outsize)
 			unlink(outfile);
 			goto lose;
 		}
-	} else
+		break;
+	}
 #endif
 
 #ifndef NO_PACK_SUPPORT
-	if (method == FT_PACK) {
+	case FT_PACK:
 		if (lflag) {
 			maybe_warnx("no -l with packed files");
 			goto lose;
 		}
 
 		size = unpack(fd, zfd, NULL, 0, NULL);
-	} else
+		break;
+#endif
+
+#ifndef NO_XZ_SUPPORT
+	case FT_XZ:
+		if (lflag) {
+			maybe_warnx("no -l with xz files");
+			goto lose;
+		}
+
+		size = unxz(fd, zfd, NULL, 0, NULL);
+		break;
 #endif
 
 #ifndef SMALL
-	if (method == FT_UNKNOWN) {
+	case FT_UNKNOWN:
 		if (lflag) {
 			maybe_warnx("no -l for unknown filetypes");
 			goto lose;
 		}
 		size = cat_fd(NULL, 0, NULL, fd);
-	} else
+		break;
 #endif
-	{
+	default:
 		if (lflag) {
 			print_list(fd, isb.st_size, outfile, isb.st_mtime);
 			close(fd);
@@ -1479,6 +1508,7 @@ file_uncompress(char *file, char *outfile, size_t outsize)
 		}
 
 		size = gz_uncompress(fd, zfd, NULL, 0, NULL, file);
+		break;
 	}
 
 	if (close(fd) != 0)
@@ -1660,6 +1690,12 @@ handle_stdin(void)
 		usize = unpack(STDIN_FILENO, STDOUT_FILENO,
 			       (char *)header1, sizeof header1, &gsize);
 		break;
+#endif
+#ifndef NO_XZ_SUPPORT
+	case FT_XZ:
+		usize = unxz(STDIN_FILENO, STDOUT_FILENO,
+			     (char *)header1, sizeof header1, &gsize);
+		break;
 #endif
 	}
 
@@ -2037,6 +2073,9 @@ display_version(void)
 #ifndef NO_PACK_SUPPORT
 #include "unpack.c"
 #endif
+#ifndef NO_XZ_SUPPORT
+#include "unxz.c"
+#endif
 
 static ssize_t
 read_retry(int fd, void *buf, size_t sz)
diff --git a/usr.bin/gzip/unxz.c b/usr.bin/gzip/unxz.c
new file mode 100644
index 000000000000..02ce0ed2e217
--- /dev/null
+++ b/usr.bin/gzip/unxz.c
@@ -0,0 +1,113 @@
+
+#include <stdarg.h>
+#include <errno.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <lzma.h>
+
+static off_t
+unxz(int i, int o, char *pre, size_t prelen, off_t *bytes_in)
+{
+	lzma_stream strm = LZMA_STREAM_INIT;
+	lzma_ret ret;
+	off_t x = 0;
+
+	// Initialize the decoder
+	ret = lzma_alone_decoder(&strm, UINT64_MAX);
+	if (ret != LZMA_OK) {
+		errno = ret == LZMA_MEM_ERROR ? ENOMEM : EINVAL;
+		maybe_errx("Cannot initialize decoder");
+	}
+
+	// Input and output buffers
+	uint8_t ibuf[BUFSIZ];
+	uint8_t obuf[BUFSIZ];
+
+	*bytes_in = prelen;
+	strm.next_in = ibuf;
+	strm.avail_in = read(i, ibuf + prelen, sizeof(ibuf) - prelen);
+	if (strm.avail_in == (size_t)-1)
+		maybe_errx("Read failed");
+
+	memcpy(ibuf, pre, prelen);
+	*bytes_in += strm.avail_in;
+
+	strm.next_out = obuf;
+	strm.avail_out = sizeof(obuf);
+	if ((ret = lzma_stream_decoder(&strm, UINT64_MAX, 0)) != LZMA_OK)
+		maybe_errx("Can't initialize decoder");
+
+	for (;;) {
+		if (strm.avail_in == 0) {
+			strm.next_in = ibuf;
+			strm.avail_in = read(i, ibuf, sizeof(ibuf));
+//			fprintf(stderr, "read = %zu\n", strm.avail_in);
+			if (strm.avail_in == (size_t)-1)
+				maybe_errx("Read failed");
+		}
+
+		ret = lzma_code(&strm, LZMA_RUN);
+//		fprintf(stderr, "ret = %d %zu %zu\n", ret, strm.avail_in, strm.avail_out);
+
+		// Write and check write error before checking decoder error.
+		// This way as much data as possible gets written to output
+		// even if decoder detected an error.
+		if (strm.avail_out == 0 || ret != LZMA_OK) {
+			const size_t write_size = sizeof(obuf) - strm.avail_out;
+
+			if (write(o, obuf, write_size) != (ssize_t)write_size)
+				maybe_err("write failed");
+
+			strm.next_out = obuf;
+			strm.avail_out = sizeof(obuf);
+			x += write_size;
+		}
+
+		if (ret != LZMA_OK) {
+			if (ret == LZMA_STREAM_END) {
+				// Check that there's no trailing garbage.
+				if (strm.avail_in != 0 || read(i, ibuf, 1))
+					ret = LZMA_DATA_ERROR;
+				else {
+					lzma_end(&strm);
+					return x;
+				}
+			}
+
+			const char *msg;
+			switch (ret) {
+			case LZMA_MEM_ERROR:
+				msg = strerror(ENOMEM);
+				break;
+
+			case LZMA_FORMAT_ERROR:
+				msg = "File format not recognized";
+				break;
+
+			case LZMA_OPTIONS_ERROR:
+				// FIXME: Better message?
+				msg = "Unsupported compression options";
+				break;
+
+			case LZMA_DATA_ERROR:
+				msg = "File is corrupt";
+				break;
+
+			case LZMA_BUF_ERROR:
+				msg = "Unexpected end of input";
+				break;
+
+			case LZMA_MEMLIMIT_ERROR:
+				msg = "Reached memory limit";
+				break;
+
+
+			default:
+				msg = "Internal error (bug)";
+				break;
+			}
+
+			maybe_errx("%s", msg);
+		}
+	}
+}