From 8bcb00aa88e30786342581ddcc13c52445abfd5b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Duval?= Date: Mon, 7 Jun 2004 21:23:10 +0000 Subject: [PATCH] Added compress-4.0 git-svn-id: file:///srv/svn/repos/haiku/trunk/current@7815 a95241bf-73f2-0310-859d-f6bbb57e9c96 --- src/apps/bin/compress/Jamfile | 7 + src/apps/bin/compress/README | 283 +++++ src/apps/bin/compress/compress.1 | 264 +++++ src/apps/bin/compress/compress.c | 1775 ++++++++++++++++++++++++++++++ src/apps/bin/compress/usermem.sh | 83 ++ 5 files changed, 2412 insertions(+) create mode 100644 src/apps/bin/compress/Jamfile create mode 100644 src/apps/bin/compress/README create mode 100644 src/apps/bin/compress/compress.1 create mode 100644 src/apps/bin/compress/compress.c create mode 100644 src/apps/bin/compress/usermem.sh diff --git a/src/apps/bin/compress/Jamfile b/src/apps/bin/compress/Jamfile new file mode 100644 index 0000000000..e07dcd5432 --- /dev/null +++ b/src/apps/bin/compress/Jamfile @@ -0,0 +1,7 @@ +SubDir OBOS_TOP src apps bin compress ; + +SubDirCcFlags -DSACREDMEM=256000 -D_FPOS_T -DUSERMEM=2097152 -DBEOS=1 ; + +BinCommand compress : + compress.c + ; diff --git a/src/apps/bin/compress/README b/src/apps/bin/compress/README new file mode 100644 index 0000000000..4eb44d1882 --- /dev/null +++ b/src/apps/bin/compress/README @@ -0,0 +1,283 @@ + + @(#)README 1.1 86/09/25 SMI; from UCB 5.3 85/09/17 + +Compress version 4.0 improvements over 3.0: + o compress() speedup (10-50%) by changing division hash to xor + o decompress() speedup (5-10%) + o Memory requirements reduced (3-30%) + o Stack requirements reduced to less than 4kb + o Removed 'Big+Fast' compress code (FBITS) because of compress speedup + o Portability mods for Z8000 and PC/XT (but not zeus 3.2) + o Default to 'quiet' mode + o Unification of 'force' flags + o Manual page overhaul + o Portability enhancement for M_XENIX + o Removed text on #else and #endif + o Added "-V" switch to print version and options + o Added #defines for SIGNED_COMPARE_SLOW + o Added Makefile and "usermem" program + o Removed all floating point computations + o New programs: [deleted] + +The "usermem" script attempts to determine the maximum process size. Some +editing of the script may be necessary (see the comments). [It should work +fine on 4.3 bsd.] If you can't get it to work at all, just create file +"USERMEM" containing the maximum process size in decimal. + +The following preprocessor symbols control the compilation of "compress.c": + + o USERMEM Maximum process memory on the system + o SACREDMEM Amount to reserve for other proceses + o SIGNED_COMPARE_SLOW Unsigned compare instructions are faster + o NO_UCHAR Don't use "unsigned char" types + o BITS Overrules default set by USERMEM-SACREDMEM + o vax Generate inline assembler + o interdata Defines SIGNED_COMPARE_SLOW + o M_XENIX Makes arrays < 65536 bytes each + o pdp11 BITS=12, NO_UCHAR + o z8000 BITS=12 + o pcxt BITS=12 + o BSD4_2 Allow long filenames ( > 14 characters) & + Call setlinebuf(stderr) + +The difference "usermem-sacredmem" determines the maximum BITS that can be +specified with the "-b" flag. + +memory: at least BITS +------ -- ----- ---- + 433,484 16 + 229,600 15 + 127,536 14 + 73,464 13 + 0 12 + +The default is BITS=16. + +The maximum bits can be overrulled by specifying "-DBITS=bits" at +compilation time. + +WARNING: files compressed on a large machine with more bits than allowed by +a version of compress on a smaller machine cannot be decompressed! Use the +"-b12" flag to generate a file on a large machine that can be uncompressed +on a 16-bit machine. + +The output of compress 4.0 is fully compatible with that of compress 3.0. +In other words, the output of compress 4.0 may be fed into uncompress 3.0 or +the output of compress 3.0 may be fed into uncompress 4.0. + +The output of compress 4.0 not compatible with that of +compress 2.0. However, compress 4.0 still accepts the output of +compress 2.0. To generate output that is compatible with compress +2.0, use the undocumented "-C" flag. + + -from mod.sources, submitted by vax135!petsd!joe (Joe Orost), 8/1/85 +-------------------------------- + +Enclosed is compress version 3.0 with the following changes: + +1. "Block" compression is performed. After the BITS run out, the + compression ratio is checked every so often. If it is decreasing, + the table is cleared and a new set of substrings are generated. + + This makes the output of compress 3.0 not compatible with that of + compress 2.0. However, compress 3.0 still accepts the output of + compress 2.0. To generate output that is compatible with compress + 2.0, use the undocumented "-C" flag. + +2. A quiet "-q" flag has been added for use by the news system. + +3. The character chaining has been deleted and the program now uses + hashing. This improves the speed of the program, especially + during decompression. Other speed improvements have been made, + such as using putc() instead of fwrite(). + +4. A large table is used on large machines when a relatively small + number of bits is specified. This saves much time when compressing + for a 16-bit machine on a 32-bit virtual machine. Note that the + speed improvement only occurs when the input file is > 30000 + characters, and the -b BITS is less than or equal to the cutoff + described below. + +Most of these changes were made by James A. Woods (ames!jaw). Thank you +James! + +To compile compress: + + cc -O -DUSERMEM=usermem -o compress compress.c + +Where "usermem" is the amount of physical user memory available (in bytes). +If any physical memory is to be reserved for other processes, put in +"-DSACREDMEM sacredmem", where "sacredmem" is the amount to be reserved. + +The difference "usermem-sacredmem" determines the maximum BITS that can be +specified, and the cutoff bits where the large+fast table is used. + +memory: at least BITS cutoff +------ -- ----- ---- ------ + 4,718,592 16 13 + 2,621,440 16 12 + 1,572,864 16 11 + 1,048,576 16 10 + 631,808 16 -- + 329,728 15 -- + 178,176 14 -- + 99,328 13 -- + 0 12 -- + +The default memory size is 750,000 which gives a maximum BITS=16 and no +large+fast table. + +The maximum bits can be overruled by specifying "-DBITS=bits" at +compilation time. + +If your machine doesn't support unsigned characters, define "NO_UCHAR" +when compiling. + +If your machine has "int" as 16-bits, define "SHORT_INT" when compiling. + +After compilation, move "compress" to a standard executable location, such +as /usr/local. Then: + cd /usr/local + ln compress uncompress + ln compress zcat + +On machines that have a fixed stack size (such as Perkin-Elmer), set the +stack to at least 12kb. ("setstack compress 12" on Perkin-Elmer). + +Next, install the manual (compress.l). + cp compress.l /usr/man/manl + cd /usr/man/manl + ln compress.l uncompress.l + ln compress.l zcat.l + + - or - + + cp compress.l /usr/man/man1/compress.1 + cd /usr/man/man1 + ln compress.1 uncompress.1 + ln compress.1 zcat.1 + + regards, + petsd!joe + +Here is a note from the net: + +>From hplabs!pesnta!amd!turtlevax!ken Sat Jan 5 03:35:20 1985 +Path: ames!hplabs!pesnta!amd!turtlevax!ken +From: ken@turtlevax.UUCP (Ken Turkowski) +Newsgroups: net.sources +Subject: Re: Compress release 3.0 : sample Makefile +Organization: CADLINC, Inc. @ Menlo Park, CA + +In the compress 3.0 source recently posted to mod.sources, there is a +#define variable which can be set for optimum performance on a machine +with a large amount of memory. A program (usermem) to calculate the +useable amount of physical user memory is enclosed, as well as a sample +4.2bsd Vax Makefile for compress. + +Here is the README file from the previous version of compress (2.0): + +>Enclosed is compress.c version 2.0 with the following bugs fixed: +> +>1. The packed files produced by compress are different on different +> machines and dependent on the vax sysgen option. +> The bug was in the different byte/bit ordering on the +> various machines. This has been fixed. +> +> This version is NOT compatible with the original vax posting +> unless the '-DCOMPATIBLE' option is specified to the C +> compiler. The original posting has a bug which I fixed, +> causing incompatible files. I recommend you NOT to use this +> option unless you already have a lot of packed files from +> the original posting by thomas. +>2. The exit status is not well defined (on some machines) causing the +> scripts to fail. +> The exit status is now 0,1 or 2 and is documented in +> compress.l. +>3. The function getopt() is not available in all C libraries. +> The function getopt() is no longer referenced by the +> program. +>4. Error status is not being checked on the fwrite() and fflush() calls. +> Fixed. +> +>The following enhancements have been made: +> +>1. Added facilities of "compact" into the compress program. "Pack", +> "Unpack", and "Pcat" are no longer required (no longer supplied). +>2. Installed work around for C compiler bug with "-O". +>3. Added a magic number header (\037\235). Put the bits specified +> in the file. +>4. Added "-f" flag to force overwrite of output file. +>5. Added "-c" flag and "zcat" program. 'ln compress zcat' after you +> compile. +>6. The 'uncompress' script has been deleted; simply +> 'ln compress uncompress' after you compile and it will work. +>7. Removed extra bit masking for machines that support unsigned +> characters. If your machine doesn't support unsigned characters, +> define "NO_UCHAR" when compiling. +> +>Compile "compress.c" with "-O -o compress" flags. Move "compress" to a +>standard executable location, such as /usr/local. Then: +> cd /usr/local +> ln compress uncompress +> ln compress zcat +> +>On machines that have a fixed stack size (such as Perkin-Elmer), set the +>stack to at least 12kb. ("setstack compress 12" on Perkin-Elmer). +> +>Next, install the manual (compress.l). +> cp compress.l /usr/man/manl - or - +> cp compress.l /usr/man/man1/compress.1 +> +>Here is the README that I sent with my first posting: +> +>>Enclosed is a modified version of compress.c, along with scripts to make it +>>run identically to pack(1), unpack(1), an pcat(1). Here is what I +>>(petsd!joe) and a colleague (petsd!peora!srd) did: +>> +>>1. Removed VAX dependencies. +>>2. Changed the struct to separate arrays; saves mucho memory. +>>3. Did comparisons in unsigned, where possible. (Faster on Perkin-Elmer.) +>>4. Sorted the character next chain and changed the search to stop +>>prematurely. This saves a lot on the execution time when compressing. +>> +>>This version is totally compatible with the original version. Even though +>>lint(1) -p has no complaints about compress.c, it won't run on a 16-bit +>>machine, due to the size of the arrays. +>> +>>Here is the README file from the original author: +>> +>>>Well, with all this discussion about file compression (for news batching +>>>in particular) going around, I decided to implement the text compression +>>>algorithm described in the June Computer magazine. The author claimed +>>>blinding speed and good compression ratios. It's certainly faster than +>>>compact (but, then, what wouldn't be), but it's also the same speed as +>>>pack, and gets better compression than both of them. On 350K bytes of +>>>unix-wizards, compact took about 8 minutes of CPU, pack took about 80 +>>>seconds, and compress (herein) also took 80 seconds. But, compact and +>>>pack got about 30% compression, whereas compress got over 50%. So, I +>>>decided I had something, and that others might be interested, too. +>>> +>>>As is probably true of compact and pack (although I haven't checked), +>>>the byte order within a word is probably relevant here, but as long as +>>>you stay on a single machine type, you should be ok. (Can anybody +>>>elucidate on this?) There are a couple of asm's in the code (extv and +>>>insv instructions), so anyone porting it to another machine will have to +>>>deal with this anyway (and could probably make it compatible with Vax +>>>byte order at the same time). Anyway, I've linted the code (both with +>>>and without -p), so it should run elsewhere. Note the longs in the +>>>code, you can take these out if you reduce BITS to <= 15. +>>> +>>>Have fun, and as always, if you make good enhancements, or bug fixes, +>>>I'd like to see them. +>>> +>>>=Spencer (thomas@utah-20, {harpo,hplabs,arizona}!utah-cs!thomas) +>> +>> regards, +>> joe +>> +>>-- +>>Full-Name: Joseph M. Orost +>>UUCP: ..!{decvax,ucbvax,ihnp4}!vax135!petsd!joe +>>US Mail: MS 313; Perkin-Elmer; 106 Apple St; Tinton Falls, NJ 07724 +>>Phone: (201) 870-5844 diff --git a/src/apps/bin/compress/compress.1 b/src/apps/bin/compress/compress.1 new file mode 100644 index 0000000000..335eedd4da --- /dev/null +++ b/src/apps/bin/compress/compress.1 @@ -0,0 +1,264 @@ +.PU +.TH COMPRESS 1 local +.SH NAME +compress, uncompress, zcat \- compress and uncompress files +.SH SYNOPSIS +.ll +8 +.B compress +[ +.B \-c +] [ +.B \-C +] [ +.B \-d +] [ +.B \-f +] [ +.B \-v +] [ +.B \-b +.I bits +] [ +.I "filename \&..." +] +.ll -8 +.br +.B uncompress +[ +.B \-c +] [ +.B \-f +] [ +.B \-v +] [ +.B \-V +] [ +.I "filename \&..." +] +.br +.B zcat +[ +.I "filename \&..." +] +.SH DESCRIPTION +Compresses the specified files or standard input. +Each file is replaced by a file with the extension +.B "\&.Z," +but only if the file got smaller. +If no files are specified, +the compression is applied to the standard input +and is written to standard output +regardless of the results. +Compressed files can be restored +to their original form by specifying the +.B \-d +option, or by running +.I uncompress +(linked to +.IR compress ), +on the +.B "\&.Z" +files or the standard input. +.PP +If the output file exists, it will not be overwritten unless the +.B \-f +flag is given. If +.B \-f +is not specified and +.I compress +is run in the foreground, +the user is prompted +as to whether the file should be overwritten. +.PP +If the +.B \-f +flag is given, all files specified are replaced with +.B "\&.Z" +files \- even if the file didn't get smaller. +.PP +When file names are given, the ownership (if run by root), modes, accessed +and modified times are maintained between the file and its +.B "\&.Z" +version. In this respect, +.I compress +can be used for archival purposes, yet can still be used with +.IR make "(1)" +after uncompression. +.PP +The +.B \-c +option causes the results of the compress/uncompress operation to be written +to stdout; no files are changed. The +.I zcat +program is the same as specifying +.B \-c +to +.I uncompress +(all files are unpacked and written to stdout). +.PP +.I Compress +uses the modified Lempel-Ziv algorithm described in +"A Technique for High Performance Data Compression", +Terry A. Welch, +.I "IEEE Computer" +Vol 17, No 6 (June 1984), pp 8-19. +Common substrings in the file are first replaced by 9-bit codes 257 and up. +When code 512 is reached, the algorithm switches to 10-bit codes and +continues to use more bits until the +.I bits +limit as specified by the +.B \-b +flag is reached (default 16). +.I Bits +must be between 9 and 16. The default can be changed in the source to allow +.I compress +to be run on a smaller machine. +.PP +After the +.I bits +limit is reached, +.I compress +periodically checks the compression ratio. If it is increasing, +.I compress +continues to use the codes that were previously found in the file. However, +if the compression ratio decreases, +.I compress +discards the table of substrings and rebuilds it from scratch. This allows +the algorithm to adapt to the next "block" of the file. The +.B \-C +(compatibility) flag prevents subdivision of the file into blocks; +this produces an output file that old versions of +.I compress +can read. +.PP +A two byte magic number is prepended to the file +to ensure that neither uncompression of random text nor recompression of +compressed text are attempted. In addition, the +.I bits +specified during +.I compress +is written to the file so that the +.B \-b +flag can be omitted for +.IR uncompress \. +.PP +.ne 8 +The amount of compression obtained depends on the size of the +input file, the amount of +.I bits +per code, and the distribution of character substrings. +Typically, text files, such as C programs, +are reduced by 50\-60%. +Compression is generally much better than that achieved by +Huffman coding (as used in +.IR pack ), +or adaptive Huffman coding +.RI ( compact ), +and takes less time to compute. +.PP +.PP +If the +.B \-v +(verbose) flag is given, then +after each file is compressed, a message is printed giving the percentage of +the input file that has been saved by compression. +.PP +If the +.B \-V +(version) flag is given, the program's version number is printed. +.PP +The exit status is normally 0; +if the last file gets bigger after compression, the exit status is 2; +if an error occurs, the exit status is 1. +.SH "SEE ALSO" +compact(1), pack(1) +.SH "DIAGNOSTICS" +Usage: compress [-cCdfvV] [-b maxbits] [file ...] +.in +8 +Invalid options were specified on the command line. +.in -8 +Missing maxbits +.in +8 +Maxbits must follow +.BR \-b \. +.in -8 +Unknown flag: +.I "\'x\';" +.in +8 +Invalid flags were specified on the command line. +.in -8 +.IR file : +not in compressed format +.in +8 +The specified file has not been compressed. +.in -8 +.IR file : +compressed with +.I xx +bits, can only handle +.I yy +bits +.in +8 +The specified file was compressed by a compress program that could handle +more +.I bits +than the current compress program. Recompress the file with a smaller +.IR bits \. +.in -8 +.IR file : +already has .Z suffix -- no change +.in +8 +Cannot compress a file that has a ".Z" suffix. +.IR mv "(1)" +the file to a different name and try again. +.in -8 +.IR file : +filename too long to tack on .Z +.in +8 +The specified file cannot be compressed because its filename is longer than +12 characters. +.IR mv "(1)" +the file to a different name and try again. This message does not occur on +4.2BSD systems. +.in -8 +.I file +already exists; do you wish to overwrite (y or n)? +.in +8 +Respond "y" if you want the output file to be replaced; "n" if you want it +to be left alone. +.in -8 +.IR file : +.in +8 +This message fragment is written during the processing of a file. +.in -8 +Compression: +.I "xx.xx%" +.in +8 +This message fragment gives the percentage of the input file that has been +saved by compression. +.in -8 +-- not a regular file: unchanged +.in +8 +This message fragment is written when the input file is not a regular file. +The input file is left unchanged. +.in -8 +-- has +.I xx +other links: unchanged +.in +8 +This message fragment is written when the input file has links. The input +file is left unchanged. See +.IR ln "(1)" +for more information. +.in -8 +-- file unchanged +.in +8 +This message fragment is written when no savings are achieved by +compression. The input file is left unchanged. +.in -8 +-- replaced with +.I file +.in +8 +This message fragment is written when a file has been sucessfully +compressed/uncompressed. +.in -8 diff --git a/src/apps/bin/compress/compress.c b/src/apps/bin/compress/compress.c new file mode 100644 index 0000000000..7c6da280c4 --- /dev/null +++ b/src/apps/bin/compress/compress.c @@ -0,0 +1,1775 @@ +/* + * Compress - data compression program + */ + +/* + * machine variants which require cc -Dmachine: pdp11, z8000, pcxt + */ + +/* + * Set USERMEM to the maximum amount of physical user memory available + * in bytes. USERMEM is used to determine the maximum BITS that can be used + * for compression. + * + * SACREDMEM is the amount of physical memory saved for others; compress + * will hog the rest. + */ +#ifndef SACREDMEM +#define SACREDMEM 0 +#endif + +#ifndef USERMEM +# define USERMEM 450000 /* default user memory */ +#endif + +#ifdef interdata /* (Perkin-Elmer) */ +#define SIGNED_COMPARE_SLOW /* signed compare is slower than unsigned */ +#endif + +#ifdef pdp11 +# define BITS 12 /* max bits/code for 16-bit machine */ +# define NO_UCHAR /* also if "unsigned char" functions as signed char */ +# undef USERMEM +#endif /* pdp11 */ /* don't forget to compile with -i */ + +#ifdef z8000 +# define BITS 12 +# undef vax /* weird preprocessor */ +# undef USERMEM +#endif /* z8000 */ + +#ifdef MSDOS /* Microsoft C 3.0 for MS-DOS */ +# undef USERMEM +# ifdef BIG /* then this is a large data compilation */ +# undef DEBUG /* DEBUG makes the executible too big */ +# define BITS 16 +# define XENIX_16 +# else /* this is a small model compilation */ +# define BITS 12 +# endif +#else +#undef BIG +#endif /* MSDOS */ + +#ifdef pcxt +# define BITS 12 +# undef USERMEM +#endif /* pcxt */ + +#ifdef USERMEM +# if USERMEM >= (433484+SACREDMEM) +# define PBITS 16 +# else +# if USERMEM >= (229600+SACREDMEM) +# define PBITS 15 +# else +# if USERMEM >= (127536+SACREDMEM) +# define PBITS 14 +# else +# if USERMEM >= (73464+SACREDMEM) +# define PBITS 13 +# else +# define PBITS 12 +# endif +# endif +# endif +# endif +# undef USERMEM +#endif /* USERMEM */ + +#ifdef PBITS /* Preferred BITS for this memory size */ +# ifndef BITS +# define BITS PBITS +# endif BITS +#endif /* PBITS */ + +#if BITS == 16 +# define HSIZE 69001 /* 95% occupancy */ +#endif +#if BITS == 15 +# define HSIZE 35023 /* 94% occupancy */ +#endif +#if BITS == 14 +# define HSIZE 18013 /* 91% occupancy */ +#endif +#if BITS == 13 +# define HSIZE 9001 /* 91% occupancy */ +#endif +#if BITS <= 12 +# define HSIZE 5003 /* 80% occupancy */ +#endif + +#ifdef M_XENIX /* Stupid compiler can't handle arrays with */ +# if BITS == 16 /* more than 65535 bytes - so we fake it */ +# define XENIX_16 +# else +# if BITS > 13 /* Code only handles BITS = 12, 13, or 16 */ +# define BITS 13 +# endif +# endif +#endif + +/* + * a code_int must be able to hold 2**BITS values of type int, and also -1 + */ +#if BITS > 15 +typedef long int code_int; +#else +typedef int code_int; +#endif + +#ifdef SIGNED_COMPARE_SLOW +typedef unsigned long int count_int; +typedef unsigned short int count_short; +#else +typedef long int count_int; +#endif + +#ifdef NO_UCHAR + typedef char char_type; +#else + typedef unsigned char char_type; +#endif /* UCHAR */ +char_type magic_header[] = { "\037\235" }; /* 1F 9D */ + +/* Defines for third byte of header */ +#define BIT_MASK 0x1f +#define BLOCK_MASK 0x80 +/* Masks 0x40 and 0x20 are free. I think 0x20 should mean that there is + a fourth header byte (for expansion). +*/ +#define INIT_BITS 9 /* initial number of bits/code */ + +#define min(a, b) ((a) > (b) ? (b) : (a)) + +/* + * compress.c - File compression ala IEEE Computer, June 1984. + * + * Authors: Spencer W. Thomas (decvax!harpo!utah-cs!utah-gr!thomas) + * Jim McKie (decvax!mcvax!jim) + * Steve Davies (decvax!vax135!petsd!peora!srd) + * Ken Turkowski (decvax!decwrl!turtlevax!ken) + * James A. Woods (decvax!ihnp4!ames!jaw) + * Joe Orost (decvax!vax135!petsd!joe) + * + * $Header: /tmp/bonefish/open-beos/current/src/apps/bin/compress/compress.c,v 1.1 2004/06/07 21:23:09 korli Exp $ + * $Log: compress.c,v $ + * Revision 1.1 2004/06/07 21:23:09 korli + * Added compress-4.0 + * + * Revision 1.12 1996/02/24 02:45:03 cyril + * got rid of Unix.h + * + * Revision 1.11 1996/02/18 23:50:39 cyril + * final clean up for new resource policy. + * + * Revision 1.10 1996/01/25 09:53:14 dbg + * deal with the now correct posix headers + * + * Revision 1.9 1996/01/23 19:17:39 btaylor + * prep for CW8 + * + * Revision 1.8 1996/01/11 18:28:19 dbg + * deal with the new posix environment (i.e. we can now + * include and because they + * exist and we don't need as many workarounds for missing + * things in the posix environment). + * + * Revision 1.7 1995/12/21 18:58:15 erich + * Add macro for isascii, which doesn't really exist. + * + * Revision 1.1 1995/12/13 22:51:52 ming + * DR6 FROZEN ON 12/13/95 14:00:00 + * + * Revision 1.6 1995/11/08 22:49:32 robert + * FILE_NAME_LENGTH -> B_FILE_NAME_LENGTH + * + * Revision 1.5 1995/08/30 02:42:55 herold + * ### API ### remove all traces of int type from OS.h. Change + * FILENAME_LENGTH to FILE_NAME_LENGTH. Lotsa buck for not much + * bang, but there it is... + * + * Revision 1.4 1995/06/05 22:34:07 erich + * Attempt to make stuff in ./gnu compile...sort of. + * + * Revision 1.3 1995/02/22 19:38:43 peter + * Update 'tar' and 'compress' tools to work with new resource/data files. + * 'tar' as some glitches (when using the 'u' flag), but we'll worry about + * that later. + * + * Revision 1.2 1994/10/04 17:38:00 herold + * don't depend on /usr/include being around - use Unix.h instead + * + * Revision 1.1 1993/10/14 22:46:10 moofie + * Initial revision + * + * Revision 4.0.1 87/11/27 21:45:00 rms (Richard Stallman) + * Once copystat has run, don't delete output file on interrupt. + * Mention more flags in the Usage string. + * + * Revision 4.0 85/07/30 12:50:00 joe + * Removed ferror() calls in output routine on every output except first. + * Prepared for release to the world. + * + * Revision 3.6 85/07/04 01:22:21 joe + * Remove much wasted storage by overlaying hash table with the tables + * used by decompress: tab_suffix[1<putc] and + * added signal catcher [plus beef in writeerr()] to delete effluvia. + * + * Revision 2.0 84/08/28 22:00:00 petsd!joe + * Add check for foreground before prompting user. Insert maxbits into + * compressed file. Force file being uncompressed to end with ".Z". + * Added "-c" flag and "zcat". Prepared for release. + * + * Revision 1.10 84/08/24 18:28:00 turtlevax!ken + * Will only compress regular files (no directories), added a magic number + * header (plus an undocumented -n flag to handle old files without headers), + * added -f flag to force overwriting of possibly existing destination file, + * otherwise the user is prompted for a response. Will tack on a .Z to a + * filename if it doesn't have one when decompressing. Will only replace + * file if it was compressed. + * + * Revision 1.9 84/08/16 17:28:00 turtlevax!ken + * Removed scanargs(), getopt(), added .Z extension and unlimited number of + * filenames to compress. Flags may be clustered (-Ddvb12) or separated + * (-D -d -v -b 12), or combination thereof. Modes and other status is + * copied with copystat(). -O bug for 4.2 seems to have disappeared with + * 1.8. + * + * Revision 1.8 84/08/09 23:15:00 joe + * Made it compatible with vax version, installed jim's fixes/enhancements + * + * Revision 1.6 84/08/01 22:08:00 joe + * Sped up algorithm significantly by sorting the compress chain. + * + * Revision 1.5 84/07/13 13:11:00 srd + * Added C version of vax asm routines. Changed structure to arrays to + * save much memory. Do unsigned compares where possible (faster on + * Perkin-Elmer) + * + * Revision 1.4 84/07/05 03:11:11 thomas + * Clean up the code a little and lint it. (Lint complains about all + * the regs used in the asm, but I'm not going to "fix" this.) + * + * Revision 1.3 84/07/05 02:06:54 thomas + * Minor fixes. + * + * Revision 1.2 84/07/05 00:27:27 thomas + * Add variable bit length output. + * + */ +static char rcs_ident[] = "$Header: /tmp/bonefish/open-beos/current/src/apps/bin/compress/compress.c,v 1.1 2004/06/07 21:23:09 korli Exp $"; + +#include +#include +#include +#include +#include +#include + +#ifdef MSDOS +#include +#endif + +#ifdef __HOBBIT__ +# undef putc +#endif + +#define ARGVAL() (*++(*argv) || (--argc && *++argv)) + +int n_bits; /* number of bits/code */ +int maxbits = BITS; /* user settable max # bits/code */ +code_int maxcode; /* maximum code, given n_bits */ +code_int maxmaxcode = (code_int)1 << BITS; /* should NEVER generate this code */ +#ifdef COMPATIBLE /* But wrong! */ +# define MAXCODE(n_bits) ((code_int) 1 << (n_bits) - 1) +#else +# define MAXCODE(n_bits) (((code_int) 1 << (n_bits)) - 1) +#endif /* COMPATIBLE */ + +#ifdef XENIX_16 +# ifdef MSDOS + +count_int far htab0[8192]; +count_int far htab1[8192]; +count_int far htab2[8192]; +count_int far htab3[8192]; +count_int far htab4[8192]; +count_int far htab5[8192]; +count_int far htab6[8192]; +count_int far htab7[8192]; +count_int far htab8[HSIZE-65536]; +count_int far * htab[9] = { + htab0, htab1, htab2, htab3, htab4, htab5, htab6, htab7, htab8 }; + +unsigned short far code0tab[16384]; +unsigned short far code1tab[16384]; +unsigned short far code2tab[16384]; +unsigned short far code3tab[16384]; +unsigned short far code4tab[16384]; +unsigned short far * codetab[5] = { + code0tab, code1tab, code2tab, code3tab, code4tab }; + +# else + +count_int htab0[8192]; +count_int htab1[8192]; +count_int htab2[8192]; +count_int htab3[8192]; +count_int htab4[8192]; +count_int htab5[8192]; +count_int htab6[8192]; +count_int htab7[8192]; +count_int htab8[HSIZE-65536]; +count_int * htab[9] = { + htab0, htab1, htab2, htab3, htab4, htab5, htab6, htab7, htab8 }; + +unsigned short code0tab[16384]; +unsigned short code1tab[16384]; +unsigned short code2tab[16384]; +unsigned short code3tab[16384]; +unsigned short code4tab[16384]; +unsigned short * codetab[5] = { + code0tab, code1tab, code2tab, code3tab, code4tab }; + +# endif /* MSDOS */ + +#define htabof(i) (htab[(i) >> 13][(i) & 0x1fff]) +#define codetabof(i) (codetab[(i) >> 14][(i) & 0x3fff]) + +#else /* Normal machine */ +count_int htab [HSIZE]; +unsigned short codetab [HSIZE]; +#define htabof(i) htab[i] +#define codetabof(i) codetab[i] + +#endif /* XENIX_16 */ +code_int hsize = HSIZE; /* for dynamic table sizing */ +count_int fsize; + +/* + * To save much memory, we overlay the table used by compress() with those + * used by decompress(). The tab_prefix table is the same size and type + * as the codetab. The tab_suffix table needs 2**BITS characters. We + * get this from the beginning of htab. The output stack uses the rest + * of htab, and contains characters. There is plenty of room for any + * possible stack (stack used to be 8000 characters). + */ + +#define tab_prefixof(i) codetabof(i) + +#ifdef XENIX_16 +# ifdef MSDOS +# define tab_suffixof(i) ((char_type far *)htab[(i)>>15])[(i) & 0x7fff] +# define de_stack ((char_type far *)(htab2)) +# else +# define tab_suffixof(i) ((char_type *)htab[(i)>>15])[(i) & 0x7fff] +# define de_stack ((char_type *)(htab2)) +# endif /* MSDOS */ +#else /* Normal machine */ +# define tab_suffixof(i) ((char_type *)(htab))[i] +# define de_stack ((char_type *)&tab_suffixof((code_int)1< image (binary) mode; 2 <=> text mode */ +#else +# define PATH_SEP '/' +#endif + +#ifdef DEBUG +int verbose = 0; +#endif /* DEBUG */ +void (*bgnd_flag)(); + +int do_decomp = 0; + +/***************************************************************** + * TAG( main ) + * + * Algorithm from "A Technique for High Performance Data Compression", + * Terry A. Welch, IEEE Computer Vol 17, No 6 (June 1984), pp 8-19. + * + * Usage: compress [-cdfivV] [-b bits] [file ...] + * Inputs: + * + * -c: Write output on stdout, don't remove original. + * + * -d: If given, decompression is done instead. + * + * -f: Forces output file to be generated, even if one already + * exists, and even if no space is saved by compressing. + * If -f is not used, the user will be prompted if stdin is + * a tty, otherwise, the output file will not be overwritten. + * + * -i: Image mode (defined only under MS-DOS). Prevents + * conversion between UNIX text representation (LF line + * termination) in compressed form and MS-DOS text + * representation (CR-LF line termination) in uncompressed + * form. Useful with non-text files. + * + * -v: Write compression statistics + * + * -V: Write version and compilation options. + * + * -b: Parameter limits the max number of bits/code. + * + * file ...: Files to be compressed. If none specified, stdin + * is used. + * Outputs: + * file.Z: Compressed form of file with same mode, owner, and utimes + * or stdout (if stdin used as input) + * + * Assumptions: + * When filenames are given, replaces with the compressed version + * (.Z suffix) only if the file decreases in size. + * Algorithm: + * Modified Lempel-Ziv method (LZW). Basically finds common + * substrings and replaces them with a variable size code. This is + * deterministic, and can be done on the fly. Thus, the decompression + * procedure needs no input table, but tracks the way the table was built. + */ + +main( argc, argv ) +register int argc; char **argv; +{ + int overwrite = 0; /* Do not overwrite unless given -f flag */ + char tempname[100]; + char **filelist, **fileptr; + char *cp, *rindex(), *malloc(); + struct stat statbuf; + extern void onintr(); + +#ifdef MSDOS + char *sufp; +#else + extern void oops(); +#endif + +#ifndef MSDOS + if ( (bgnd_flag = signal ( SIGINT, SIG_IGN )) != SIG_IGN ) { +#endif + + signal ( SIGINT, onintr ); + +#ifndef MSDOS + signal ( SIGSEGV, oops ); + } +#endif + +#ifdef COMPATIBLE + nomagic = 1; /* Original didn't have a magic number */ +#endif /* COMPATIBLE */ + + filelist = fileptr = (char **)(malloc(argc * sizeof(*argv))); + *filelist = NULL; + + if((cp = rindex(argv[0], PATH_SEP)) != 0) { + cp++; + } else { + cp = argv[0]; + } + +#ifdef MSDOS + if(strcmp(cp, "UNCOMPRE.EXE") == 0) { +#else + if(strcmp(cp, "uncompress") == 0) { +#endif + + do_decomp = 1; + +#ifdef MSDOS + } else if(strcmp(cp, "ZCAT.EXE") == 0) { +#else + } else if(strcmp(cp, "zcat") == 0) { +#endif + + do_decomp = 1; + zcat_flg = 1; + } + +#ifdef BSD4_2 + /* 4.2BSD dependent - take it out if not */ + setlinebuf( stderr ); +#endif /* BSD4_2 */ + + /* Argument Processing + * All flags are optional. + * -D => debug + * -V => print Version; debug verbose + * -d => do_decomp + * -v => unquiet + * -f => force overwrite of output file + * -n => no header: useful to uncompress old files + * -b maxbits => maxbits. If -b is specified, then maxbits MUST be + * given also. + * -c => cat all output to stdout + * -C => generate output compatible with compress 2.0. + * if a string is left, must be an input filename. + */ + for (argc--, argv++; argc > 0; argc--, argv++) { + if (**argv == '-') { /* A flag argument */ + while (*++(*argv)) { /* Process all flags in this arg */ + switch (**argv) { +#ifdef DEBUG + case 'D': + debug = 1; + break; + case 'V': + verbose = 1; + version(); + break; +#else + case 'V': + version(); + break; +#endif /* DEBUG */ + +#ifdef MSDOS + case 'i': + image = 1; + break; +#endif + + case 'v': + quiet = 0; + break; + case 'd': + do_decomp = 1; + break; + case 'f': + case 'F': + overwrite = 1; + force = 1; + break; + case 'n': + nomagic = 1; + break; + case 'C': + block_compress = 0; + break; + case 'b': + if (!ARGVAL()) { + fprintf(stderr, "Missing maxbits\n"); + Usage(); + exit(1); + } + maxbits = atoi(*argv); + goto nextarg; + case 'c': + zcat_flg = 1; + break; + case 'q': + quiet = 1; + break; + default: + fprintf(stderr, "Unknown flag: '%c'; ", **argv); + Usage(); + exit(1); + } + } + } + else { /* Input file name */ + *fileptr++ = *argv; /* Build input file list */ + *fileptr = NULL; + /* process nextarg; */ + } + nextarg: continue; + } + + if(maxbits < INIT_BITS) maxbits = INIT_BITS; + if (maxbits > BITS) maxbits = BITS; + maxmaxcode = (code_int) 1 << maxbits; + + if (*filelist != NULL) { + for (fileptr = filelist; *fileptr; fileptr++) { + exit_stat = 0; + if (do_decomp != 0) { /* DECOMPRESSION */ + +#ifdef MSDOS + /* Check for .Z or XZ suffix; add one if necessary */ + cp = *fileptr + strlen(*fileptr) - 2; + if ((*cp != '.' && *cp != 'X' && *cp != 'x') || + (*(++cp) != 'Z' && *cp != 'z')) { + strcpy(tempname, *fileptr); + if ((cp=rindex(tempname,'.')) == NULL) + strcat(tempname, ".Z"); + else if(*(++cp) == '\0') strcat(tempname, "Z"); + else { + *(++cp) = '\0'; + strcat(tempname, "XZ"); + } + *fileptr = tempname; + } +#else + /* Check for .Z suffix */ + if (strcmp(*fileptr + strlen(*fileptr) - 2, ".Z") != 0) { + /* No .Z: tack one on */ + strcpy(tempname, *fileptr); + strcat(tempname, ".Z"); + *fileptr = tempname; + } +#endif /*MSDOS */ + + /* Open input file for decompression */ + +#ifdef MSDOS + if ((freopen(*fileptr, "rb", stdin)) == NULL) { +#else + if ((freopen(*fileptr, "r", stdin)) == NULL) { +#endif + + perror(*fileptr); continue; + } + /* Check the magic number */ + if (nomagic == 0) { + if ((getchar() != (magic_header[0] & 0xFF)) + || (getchar() != (magic_header[1] & 0xFF))) { + fprintf(stderr, "%s: not in compressed format\n", + *fileptr); + continue; + } + maxbits = getchar(); /* set -b from file */ + block_compress = maxbits & BLOCK_MASK; + maxbits &= BIT_MASK; + maxmaxcode = (code_int) 1 << maxbits; + if(maxbits > BITS) { + fprintf(stderr, + "%s: compressed with %d bits, can only handle %d bits\n", + *fileptr, maxbits, BITS); + continue; + } + } + /* Generate output filename */ + strcpy(ofname, *fileptr); + ofname[strlen(*fileptr) - 2] = '\0'; /* Strip off .Z */ + } else { /* COMPRESSION */ + +#ifdef MSDOS + cp = *fileptr + strlen(*fileptr) - 2; + if ((*cp == '.' || *cp == 'X' || *cp == 'x') && + (*(++cp) == 'Z' || *cp == 'z')) { + fprintf(stderr,"%s: already has %s suffix -- no change\n", + *fileptr,--cp); +#else + if (strcmp(*fileptr + strlen(*fileptr) - 2, ".Z") == 0) { + fprintf(stderr, "%s: already has .Z suffix -- no change\n", + *fileptr); +#endif /* MSDOS */ + + continue; + } + /* Open input file for compression */ + +#ifdef MSDOS + if ((freopen(*fileptr, image == 2 ? "rt" : "rb", stdin)) + == NULL) { +#else + if ((freopen(*fileptr, "r", stdin)) == NULL) { +#endif + + perror(*fileptr); continue; + } + stat ( *fileptr, &statbuf ); + fsize = (long) statbuf.st_size; + /* + * tune hash table size for small files -- ad hoc, + * but the sizes match earlier #defines, which + * serve as upper bounds on the number of output codes. + */ + hsize = HSIZE; + if ( fsize < (1 << 12) ) + hsize = min ( 5003, HSIZE ); + else if ( fsize < (1 << 13) ) + hsize = min ( 9001, HSIZE ); + else if ( fsize < (1 << 14) ) + hsize = min ( 18013, HSIZE ); + else if ( fsize < (1 << 15) ) + hsize = min ( 35023, HSIZE ); + else if ( fsize < 47000 ) + hsize = min ( 50021, HSIZE ); + + /* Generate output filename */ + strcpy(ofname, *fileptr); +#if !defined BSD4_2 && !defined BEOS /* Short filenames */ + if ((cp = rindex(ofname, PATH_SEP)) != NULL) cp++; + else cp = ofname; +# ifdef MSDOS + if (zcat_flg == 0 && (sufp = rindex(cp, '.')) != NULL && + strlen(sufp) > 2) fprintf(stderr, + "%s: part of filename extension will be replaced by XZ\n", + cp); +# else + if (strlen(cp) > 12) { + fprintf(stderr,"%s: filename too long to tack on .Z\n",cp); + continue; + } +# endif +#endif /* BSD4_2 Long filenames allowed */ + +#ifdef MSDOS + if ((cp = rindex(ofname, '.')) == NULL) strcat(ofname, ".Z"); + else { + if(*(++cp) != '\0') *(++cp) = '\0'; + strcat(ofname, "XZ"); + } +#else + strcat(ofname, ".Z"); +#endif /* MSDOS */ + + } + precious = 0; + /* Check for overwrite of existing file */ + if (overwrite == 0 && zcat_flg == 0) { + if (stat(ofname, &statbuf) == 0) { + char response[2]; + response[0] = 'n'; + fprintf(stderr, "%s already exists;", ofname); +#ifndef MSDOS + if (foreground()) { +#endif + fprintf(stderr, + " do you wish to overwrite %s (y or n)? ", ofname); + fflush(stderr); + read(2, response, 2); + while (response[1] != '\n') { + if (read(2, response+1, 1) < 0) { /* Ack! */ + perror("stderr"); break; + } + } +#ifndef MSDOS + } +#endif + if (response[0] != 'y') { + fprintf(stderr, "\tnot overwritten\n"); + continue; + } + } + } + if(zcat_flg == 0) { /* Open output file */ + +#ifdef MSDOS + if (freopen(ofname, do_decomp && image == 2 ? "wt" : "wb", + stdout) == NULL) { +#else + if (freopen(ofname, "w", stdout) == NULL) { +#endif + + perror(ofname); continue; + } + if(!quiet) + fprintf(stderr, "%s: ", *fileptr); + } + + /* Actually do the compression/decompression */ + if (do_decomp == 0) compress(); +#ifndef DEBUG + else decompress(); +#else + else if (debug == 0) decompress(); + else printcodes(); + if (verbose) dump_tab(); +#endif /* DEBUG */ + if(zcat_flg == 0) { + copystat(*fileptr, ofname); /* Copy stats */ + if((exit_stat == 1) || (!quiet)) + putc('\n', stderr); + } + } + } else { /* Standard input */ + if (do_decomp == 0) { + compress(); +#ifdef DEBUG + if(verbose) dump_tab(); +#endif /* DEBUG */ + if(!quiet) + putc('\n', stderr); + } else { + /* Check the magic number */ + if (nomagic == 0) { + if ((getchar()!=(magic_header[0] & 0xFF)) + || (getchar()!=(magic_header[1] & 0xFF))) { + fprintf(stderr, "stdin: not in compressed format\n"); + exit(1); + } + maxbits = getchar(); /* set -b from file */ + block_compress = maxbits & BLOCK_MASK; + maxbits &= BIT_MASK; + maxmaxcode = (code_int) 1 << maxbits; + fsize = 100000; /* assume stdin large for USERMEM */ + if(maxbits > BITS) { + fprintf(stderr, + "stdin: compressed with %d bits, can only handle %d bits\n", + maxbits, BITS); + exit(1); + } + } +#ifndef DEBUG + decompress(); +#else + if (debug == 0) decompress(); + else printcodes(); + if (verbose) dump_tab(); +#endif /* DEBUG */ + } + } + exit(exit_stat); +} + +static int offset; +long int in_count = 1; /* length of input */ +long int bytes_out; /* length of compressed output */ +long int out_count = 0; /* # of codes output (for debugging) */ + +/* + * compress stdin to stdout + * + * Algorithm: use open addressing double hashing (no chaining) on the + * prefix code / next character combination. We do a variant of Knuth's + * algorithm D (vol. 3, sec. 6.4) along with G. Knott's relatively-prime + * secondary probe. Here, the modular division first probe is gives way + * to a faster exclusive-or manipulation. Also do block compression with + * an adaptive reset, whereby the code table is cleared when the compression + * ratio decreases, but after the table fills. The variable-length output + * codes are re-sized at this point, and a special CLEAR code is generated + * for the decompressor. Late addition: construct the table according to + * file size for noticeable speed improvement on small files. Please direct + * questions about this implementation to ames!jaw. + */ + +compress() { + register long fcode; + register code_int i = 0; + register int c; + register code_int ent; + register code_int disp; + register code_int hsize_reg; + register int hshift; + +#ifndef COMPATIBLE + if (nomagic == 0) { + putchar(magic_header[0]); putchar(magic_header[1]); + putchar((char)(maxbits | block_compress)); + if(ferror(stdout)) + writeerr(); + } +#endif /* COMPATIBLE */ + + offset = 0; + bytes_out = 3; /* includes 3-byte header mojo */ + out_count = 0; + clear_flg = 0; + ratio = 0; + in_count = 1; + checkpoint = CHECK_GAP; + maxcode = MAXCODE(n_bits = INIT_BITS); + free_ent = ((block_compress) ? FIRST : 256 ); + + ent = getchar (); + + hshift = 0; + for ( fcode = (long) hsize; fcode < 65536L; fcode *= 2L ) + hshift++; + hshift = 8 - hshift; /* set hash code range bound */ + + hsize_reg = hsize; + cl_hash( (count_int) hsize_reg); /* clear hash table */ + +#ifdef SIGNED_COMPARE_SLOW + while ( (c = getchar()) != (unsigned) EOF ) { +#else + while ( (c = getchar()) != EOF ) { +#endif + +#ifdef MSDOS + if (c == '\n') in_count += image; else /* include CR if text mode */ +#endif + + in_count++; + + fcode = (long) (((long) c << maxbits) + ent); + i = (((code_int)c << hshift) ^ ent); /* xor hashing */ + + if ( htabof (i) == fcode ) { + ent = codetabof (i); + continue; + } else if ( (long)htabof (i) < 0 ) /* empty slot */ + goto nomatch; + disp = hsize_reg - i; /* secondary hash (after G. Knott) */ + if ( i == 0 ) + disp = 1; +probe: + if ( (i -= disp) < 0 ) + i += hsize_reg; + + if ( htabof (i) == fcode ) { + ent = codetabof (i); + continue; + } + if ( (long)htabof (i) > 0 ) + goto probe; +nomatch: + output ( (code_int) ent ); + out_count++; + ent = c; +#ifdef SIGNED_COMPARE_SLOW + if ( (unsigned) free_ent < (unsigned) maxmaxcode) { +#else + if ( free_ent < maxmaxcode ) { +#endif + codetabof (i) = free_ent++; /* code -> hashtable */ + htabof (i) = fcode; + } + else if ( (count_int)in_count >= checkpoint && block_compress ) + cl_block (); + } + /* + * Put out the final code. + */ + output( (code_int)ent ); + out_count++; + output( (code_int)-1 ); + + /* + * Print out stats on stderr + */ + if(zcat_flg == 0 && !quiet) { +#ifdef DEBUG + fprintf( stderr, + "%ld chars in, %ld codes (%ld bytes) out, compression factor: ", + in_count, out_count, bytes_out ); + prratio( stderr, in_count, bytes_out ); + fprintf( stderr, "\n"); + fprintf( stderr, "\tCompression as in compact: " ); + prratio( stderr, in_count-bytes_out, in_count ); + fprintf( stderr, "\n"); + fprintf( stderr, "\tLargest code (of last block) was %d (%d bits)\n", + free_ent - 1, n_bits ); +#else /* !DEBUG */ + fprintf( stderr, "Compression: " ); + prratio( stderr, in_count-bytes_out, in_count ); +#endif /* DEBUG */ + } + if(bytes_out > in_count) /* exit(2) if no savings */ + exit_stat = 2; + return; +} + +/***************************************************************** + * TAG( output ) + * + * Output the given code. + * Inputs: + * code: A n_bits-bit integer. If == -1, then EOF. This assumes + * that n_bits =< (long)wordsize - 1. + * Outputs: + * Outputs code to the file. + * Assumptions: + * Chars are 8 bits long. + * Algorithm: + * Maintain a BITS character long buffer (so that 8 codes will + * fit in it exactly). Use the VAX insv instruction to insert each + * code in turn. When the buffer fills up empty it and start over. + */ + +static char buf[BITS]; + +#ifndef vax +char_type lmask[9] = {0xff, 0xfe, 0xfc, 0xf8, 0xf0, 0xe0, 0xc0, 0x80, 0x00}; +char_type rmask[9] = {0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff}; +#endif /* vax */ + +output( code ) +code_int code; +{ +#ifdef DEBUG + static int col = 0; +#endif /* DEBUG */ + + /* + * On the VAX, it is important to have the register declarations + * in exactly the order given, or the asm will break. + */ + register int r_off = offset, bits= n_bits; + register char * bp = buf; + +#ifdef DEBUG + if ( verbose ) + fprintf( stderr, "%5d%c", code, + (col+=6) >= 74 ? (col = 0, '\n') : ' ' ); +#endif /* DEBUG */ + if ( code >= 0 ) { +#ifdef vax + /* VAX DEPENDENT!! Implementation on other machines is below. + * + * Translation: Insert BITS bits from the argument starting at + * offset bits from the beginning of buf. + */ + 0; /* Work around for pcc -O bug with asm and if stmt */ + asm( "insv 4(ap),r11,r10,(r9)" ); +#else /* not a vax */ +/* + * byte/bit numbering on the VAX is simulated by the following code + */ + /* + * Get to the first byte. + */ + bp += (r_off >> 3); + r_off &= 7; + /* + * Since code is always >= 8 bits, only need to mask the first + * hunk on the left. + */ + *bp = (*bp & rmask[r_off]) | (code << r_off) & lmask[r_off]; + bp++; + bits -= (8 - r_off); + code >>= 8 - r_off; + /* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */ + if ( bits >= 8 ) { + *bp++ = code; + code >>= 8; + bits -= 8; + } + /* Last bits. */ + if(bits) + *bp = code; +#endif /* vax */ + offset += n_bits; + if ( offset == (n_bits << 3) ) { + bp = buf; + bits = n_bits; + bytes_out += bits; + do + putchar(*bp++); + while(--bits); + offset = 0; + } + + /* + * If the next entry is going to be too big for the code size, + * then increase it, if possible. + */ + if ( free_ent > maxcode || (clear_flg > 0)) + { + /* + * Write the whole buffer, because the input side won't + * discover the size increase until after it has read it. + */ + if ( offset > 0 ) { + if( fwrite( buf, 1, n_bits, stdout ) != n_bits) + writeerr(); + bytes_out += n_bits; + } + offset = 0; + + if ( clear_flg ) { + maxcode = MAXCODE (n_bits = INIT_BITS); + clear_flg = 0; + } + else { + n_bits++; + if ( n_bits == maxbits ) + maxcode = maxmaxcode; + else + maxcode = MAXCODE(n_bits); + } +#ifdef DEBUG + if ( debug ) { + fprintf( stderr, "\nChange to %d bits\n", n_bits ); + col = 0; + } +#endif /* DEBUG */ + } + } else { + /* + * At EOF, write the rest of the buffer. + */ + if ( offset > 0 ) + fwrite( buf, 1, (offset + 7) / 8, stdout ); + bytes_out += (offset + 7) / 8; + offset = 0; + fflush( stdout ); +#ifdef DEBUG + if ( verbose ) + fprintf( stderr, "\n" ); +#endif /* DEBUG */ + if( ferror( stdout ) ) + writeerr(); + } +} + +/* + * Decompress stdin to stdout. This routine adapts to the codes in the + * file building the "string" table on-the-fly; requiring no table to + * be stored in the compressed file. The tables used herein are shared + * with those of the compress() routine. See the definitions above. + */ + +decompress() { + +#ifdef BIG + register char_type far *stackp; +#else + register char_type *stackp; +#endif + + register int finchar; + register code_int code, oldcode, incode; + + /* + * As above, initialize the first 256 entries in the table. + */ + maxcode = MAXCODE(n_bits = INIT_BITS); + for ( code = 255; code >= 0; code-- ) { + tab_prefixof(code) = 0; + tab_suffixof(code) = (char_type)code; + } + free_ent = ((block_compress) ? FIRST : 256 ); + + finchar = oldcode = getcode(); + if(oldcode == -1) /* EOF already? */ + return; /* Get out of here */ + putchar( (char)finchar ); /* first code must be 8 bits = char */ + if(ferror(stdout)) /* Crash if can't write */ + writeerr(); + stackp = de_stack; + + while ( (code = getcode()) > -1 ) { + + if ( (code == CLEAR) && block_compress ) { + for ( code = 255; code >= 0; code-- ) + tab_prefixof(code) = 0; + clear_flg = 1; + free_ent = FIRST - 1; + if ( (code = getcode ()) == -1 ) /* O, untimely death! */ + break; + } + incode = code; + /* + * Special case for KwKwK string. + */ + if ( code >= free_ent ) { + *stackp++ = finchar; + code = oldcode; + } + + /* + * Generate output characters in reverse order + */ +#ifdef SIGNED_COMPARE_SLOW + while ( ((unsigned long)code) >= ((unsigned long)256) ) { +#else + while ( code >= 256 ) { +#endif + *stackp++ = tab_suffixof(code); + code = tab_prefixof(code); + } + *stackp++ = finchar = tab_suffixof(code); + + /* + * And put them out in forward order + */ + do + putchar ( *--stackp ); + while ( stackp > de_stack ); + + /* + * Generate the new entry. + */ + if ( (code=free_ent) < maxmaxcode ) { + tab_prefixof(code) = (unsigned short)oldcode; + tab_suffixof(code) = finchar; + free_ent = code+1; + } + /* + * Remember previous code. + */ + oldcode = incode; + } + fflush( stdout ); + if(ferror(stdout)) + writeerr(); +} + +/***************************************************************** + * TAG( getcode ) + * + * Read one code from the standard input. If EOF, return -1. + * Inputs: + * stdin + * Outputs: + * code or -1 is returned. + */ + +code_int +getcode() { + /* + * On the VAX, it is important to have the register declarations + * in exactly the order given, or the asm will break. + */ + register code_int code; + static int offset = 0, size = 0; + static char_type buf[BITS]; + register int r_off, bits; + register char_type *bp = buf; + + if ( clear_flg > 0 || offset >= size || free_ent > maxcode ) { + /* + * If the next entry will be too big for the current code + * size, then we must increase the size. This implies reading + * a new buffer full, too. + */ + if ( free_ent > maxcode ) { + n_bits++; + if ( n_bits == maxbits ) + maxcode = maxmaxcode; /* won't get any bigger now */ + else + maxcode = MAXCODE(n_bits); + } + if ( clear_flg > 0) { + maxcode = MAXCODE (n_bits = INIT_BITS); + clear_flg = 0; + } + size = fread( buf, 1, n_bits, stdin ); + if ( size <= 0 ) + return -1; /* end of file */ + offset = 0; + /* Round size down to integral number of codes */ + size = (size << 3) - (n_bits - 1); + } + r_off = offset; + bits = n_bits; +#ifdef vax + asm( "extzv r10,r9,(r8),r11" ); +#else /* not a vax */ + /* + * Get to the first byte. + */ + bp += (r_off >> 3); + r_off &= 7; + /* Get first part (low order bits) */ +#ifdef NO_UCHAR + code = ((*bp++ >> r_off) & rmask[8 - r_off]) & 0xff; +#else + code = (*bp++ >> r_off); +#endif /* NO_UCHAR */ + bits -= (8 - r_off); + r_off = 8 - r_off; /* now, offset into code word */ + /* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */ + if ( bits >= 8 ) { +#ifdef NO_UCHAR + code |= (*bp++ & 0xff) << r_off; +#else + code |= *bp++ << r_off; +#endif /* NO_UCHAR */ + r_off += 8; + bits -= 8; + } + /* high order bits. */ + code |= (*bp & rmask[bits]) << r_off; +#endif /* vax */ + offset += n_bits; + + return code; +} + +char * +rindex(s, c) /* For those who don't have it in libc.a */ +register char *s, c; +{ + char *p; + for (p = NULL; *s; s++) + if (*s == c) + p = s; + return(p); +} + +#ifdef DEBUG +printcodes() +{ + /* + * Just print out codes from input file. For debugging. + */ + code_int code; + int col = 0, bits; + + bits = n_bits = INIT_BITS; + maxcode = MAXCODE(n_bits); + free_ent = ((block_compress) ? FIRST : 256 ); + while ( ( code = getcode() ) >= 0 ) { + if ( (code == CLEAR) && block_compress ) { + free_ent = FIRST - 1; + clear_flg = 1; + } + else if ( free_ent < maxmaxcode ) + free_ent++; + if ( bits != n_bits ) { + fprintf(stderr, "\nChange to %d bits\n", n_bits ); + bits = n_bits; + col = 0; + } + fprintf(stderr, "%5d%c", code, (col+=6) >= 74 ? (col = 0, '\n') : ' ' ); + } + putc( '\n', stderr ); + exit( 0 ); +} + +code_int sorttab[1<= 0) { + sorttab[codetabof(i)] = i; + } + } + first = block_compress ? FIRST : 256; + for(i = first; i < free_ent; i++) { + fprintf(stderr, "%5d: \"", i); + de_stack[--stack_top] = '\n'; + de_stack[--stack_top] = '"'; + stack_top = in_stack((htabof(sorttab[i])>>maxbits)&0xff, + stack_top); + for(ent=htabof(sorttab[i]) & ((1< 256; + ent=htabof(sorttab[ent]) & ((1<> maxbits, + stack_top); + } + stack_top = in_stack(ent, stack_top); + fwrite( &de_stack[stack_top], 1, STACK_SIZE-stack_top, stderr); + stack_top = STACK_SIZE; + } + } else if(!debug) { /* decompressing */ + + for ( i = 0; i < free_ent; i++ ) { + ent = i; + c = tab_suffixof(ent); + if ( isascii(c) && isprint(c) ) + fprintf( stderr, "%5d: %5d/'%c' \"", + ent, tab_prefixof(ent), c ); + else + fprintf( stderr, "%5d: %5d/\\%03o \"", + ent, tab_prefixof(ent), c ); + de_stack[--stack_top] = '\n'; + de_stack[--stack_top] = '"'; + for ( ; ent != NULL; + ent = (ent >= FIRST ? tab_prefixof(ent) : NULL) ) { + stack_top = in_stack(tab_suffixof(ent), stack_top); + } + fwrite( &de_stack[stack_top], 1, STACK_SIZE - stack_top, stderr ); + stack_top = STACK_SIZE; + } + } +} + +int +in_stack(c, stack_top) + register c, stack_top; +{ + if ( (isascii(c) && isprint(c) && c != '\\') || c == ' ' ) { + de_stack[--stack_top] = c; + } else { + switch( c ) { + case '\n': de_stack[--stack_top] = 'n'; break; + case '\t': de_stack[--stack_top] = 't'; break; + case '\b': de_stack[--stack_top] = 'b'; break; + case '\f': de_stack[--stack_top] = 'f'; break; + case '\r': de_stack[--stack_top] = 'r'; break; + case '\\': de_stack[--stack_top] = '\\'; break; + default: + de_stack[--stack_top] = '0' + c % 8; + de_stack[--stack_top] = '0' + (c / 8) % 8; + de_stack[--stack_top] = '0' + c / 64; + break; + } + de_stack[--stack_top] = '\\'; + } + return stack_top; +} +#endif /* DEBUG */ + +writeerr() +{ + perror ( ofname ); + unlink ( ofname ); + exit ( 1 ); +} + +copystat(ifname, ofname) +char *ifname, *ofname; +{ + struct stat statbuf; + int mode; + struct utimbuf timep; + +#ifdef MSDOS + if (_osmajor < 3) freopen("CON","at",stdout); else /* MS-DOS 2.xx bug */ +#endif + + fclose(stdout); + if (stat(ifname, &statbuf)) { /* Get stat on input file */ + perror(ifname); + return; + } + +#ifndef MSDOS + if ((statbuf.st_mode & S_IFMT/*0170000*/) != S_IFREG/*0100000*/) { + if(quiet) + fprintf(stderr, "%s: ", ifname); + fprintf(stderr, " -- not a regular file: unchanged"); + exit_stat = 1; + } else if (statbuf.st_nlink > 1) { + if(quiet) + fprintf(stderr, "%s: ", ifname); + fprintf(stderr, " -- has %d other links: unchanged", + statbuf.st_nlink - 1); + exit_stat = 1; + } else if (exit_stat == 2 && (!force)) { /* No compression: remove file.Z */ +#else + if (exit_stat == 2 && (!force)) { /* No compression: remove file.Z */ +#endif /* MSDOS */ + + if(!quiet) + fprintf(stderr, " -- file unchanged"); + } else { /* ***** Successful Compression ***** */ + exit_stat = 0; + mode = statbuf.st_mode & 07777; + if (chmod(ofname, mode)) /* Copy modes */ + perror(ofname); + +#ifndef MSDOS + chown(ofname, statbuf.st_uid, statbuf.st_gid); /* Copy ownership */ +#endif + + timep.actime = statbuf.st_atime; + timep.modtime = statbuf.st_mtime; + utime(ofname, &timep); /* Update last accessed and modified times */ + precious = 1; + fclose(stdin); + if (unlink(ifname)) { /* Remove input file */ + perror(ifname); + } + if(!quiet) + fprintf(stderr, " -- replaced with %s", ofname); + return; /* Successful return */ + } + + /* Unsuccessful return -- one of the tests failed */ + if (unlink(ofname)) { + perror(ofname); + } +} + +#ifndef MSDOS +/* + * This routine returns 1 if we are running in the foreground and stderr + * is a tty. + */ +foreground() +{ + if(bgnd_flag != SIG_DFL) { /* background? */ + return(0); + } else { /* foreground */ + if(isatty(2)) { /* and stderr is a tty */ + return(1); + } else { + return(0); + } + } +} +#endif + +void +onintr ( ) +{ + if (!precious) + unlink ( ofname ); + exit ( 1 ); +} + +#ifndef MSDOS +void +oops ( ) /* wild pointer -- assume bad input */ +{ + if ( do_decomp == 1 ) + fprintf ( stderr, "uncompress: corrupt input\n" ); + unlink ( ofname ); + exit ( 1 ); +} +#endif /* MSDOS */ + +cl_block () /* table clear for block compress */ +{ + register long int rat; + + checkpoint = in_count + CHECK_GAP; +#ifdef DEBUG + if ( debug ) { + fprintf ( stderr, "count: %ld, ratio: ", in_count ); + prratio ( stderr, in_count, bytes_out ); + fprintf ( stderr, "\n"); + } +#endif /* DEBUG */ + + if(in_count > 0x007fffff) { /* shift will overflow */ + rat = bytes_out >> 8; + if(rat == 0) { /* Don't divide by zero */ + rat = 0x7fffffff; + } else { + rat = in_count / rat; + } + } else { + rat = (in_count << 8) / bytes_out; /* 8 fractional bits */ + } + if ( rat > ratio ) { + ratio = rat; + } else { + ratio = 0; +#ifdef DEBUG + if(verbose) + dump_tab(); /* dump string table */ +#endif + cl_hash ( (count_int) hsize ); + free_ent = FIRST; + clear_flg = 1; + output ( (code_int) CLEAR ); +#ifdef DEBUG + if(debug) + fprintf ( stderr, "clear\n" ); +#endif /* DEBUG */ + } +} + +cl_hash(hsize) /* reset code table */ + register count_int hsize; +{ + +#ifdef XENIX_16 + register j; + register long k = hsize; + +# ifdef MSDOS + register count_int far *htab_p; +# else + register count_int *htab_p; +# endif /* MSDOS */ + +#else /* Normal machine */ + register count_int *htab_p = htab+hsize; +#endif /* XENIX_16 */ + + register long i; + register long m1 = -1; + +#ifdef XENIX_16 + for(j=0; j<=8 && k>=0; j++,k-=8192) { + i = 8192; + if(k < 8192) { + i = k; + } + htab_p = &(htab[j][i]); + i -= 16; + if(i > 0) { +#else + i = hsize - 16; +#endif + do { /* might use Sys V memset(3) here */ + *(htab_p-16) = m1; + *(htab_p-15) = m1; + *(htab_p-14) = m1; + *(htab_p-13) = m1; + *(htab_p-12) = m1; + *(htab_p-11) = m1; + *(htab_p-10) = m1; + *(htab_p-9) = m1; + *(htab_p-8) = m1; + *(htab_p-7) = m1; + *(htab_p-6) = m1; + *(htab_p-5) = m1; + *(htab_p-4) = m1; + *(htab_p-3) = m1; + *(htab_p-2) = m1; + *(htab_p-1) = m1; + htab_p -= 16; + } while ((i -= 16) >= 0); +#ifdef XENIX_16 + } + } +#endif + for ( i += 16; i > 0; i-- ) + *--htab_p = m1; +} + +prratio(stream, num, den) +FILE *stream; +long int num, den; +{ + +#ifdef DEBUG + register long q; /* permits |result| > 655.36% */ +#else + register int q; /* Doesn't need to be long */ +#endif + + if(num > 214748L) { /* 2147483647/10000 */ + q = num / (den / 10000L); + } else { + q = 10000L * num / den; /* Long calculations, though */ + } + if (q < 0) { + putc('-', stream); + q = -q; + } + fprintf(stream, "%d.%02d%%", (int)(q / 100), (int)(q % 100)); +} + +version() +{ + fprintf(stderr, "%s\n", rcs_ident); + fprintf(stderr, "Options: "); +#ifdef vax + fprintf(stderr, "vax, "); +#endif +#ifdef NO_UCHAR + fprintf(stderr, "NO_UCHAR, "); +#endif +#ifdef SIGNED_COMPARE_SLOW + fprintf(stderr, "SIGNED_COMPARE_SLOW, "); +#endif +#ifdef MSDOS + fprintf(stderr, "MSDOS, "); +#endif +#ifdef XENIX_16 + fprintf(stderr, "XENIX_16, "); +#endif +#ifdef COMPATIBLE + fprintf(stderr, "COMPATIBLE, "); +#endif +#ifdef DEBUG + fprintf(stderr, "DEBUG, "); +#endif +#ifdef BSD4_2 + fprintf(stderr, "BSD4_2, "); +#endif + fprintf(stderr, "BITS = %d\n", BITS); +} diff --git a/src/apps/bin/compress/usermem.sh b/src/apps/bin/compress/usermem.sh new file mode 100644 index 0000000000..ba63b8b423 --- /dev/null +++ b/src/apps/bin/compress/usermem.sh @@ -0,0 +1,83 @@ +#! /bin/sh +# +# @(#)usermem.sh 1.1 86/09/25 SMI; from UCB 5.4 85/09/17 +# +: This shell script snoops around to find the maximum amount of available +: user memory. These variables need to be set only if there is no +: /usr/adm/messages. KMEM, UNIX, and CLICKSIZE can be set on the command +: line, if desired, e.g. UNIX=/unix +KMEM=/dev/kmem # User needs read access to KMEM +UNIX= +# VAX CLICKSIZE=512, UNIX=/vmunix +# PDP-11 CLICKSIZE=64, UNIX=/unix +# CADLINC 68000 CLICKSIZE=4096, UNIX=/unix +# Perkin-Elmer 3205 CLICKSIZE=4096, UNIX=/edition7 +# Perkin-Elmer all others, CLICKSIZE=2048, UNIX=/edition7 +CLICKSIZE=512 +eval $* + +if test -n "$UNIX" +then + : User must have specified it already. +elif test -r /vmunix +then + UNIX=/vmunix + if [ -r /bin/sun2 ] && /bin/sun2 + then + CLICKSIZE=2048 # Sun-2 + elif [ -r /bin/sun3 ] && /bin/sun3 + then + CLICKSIZE=8192 # Sun-3 + else + CLICKSIZE=512 # Probably VAX + fi +elif test -r /edition7 +then + UNIX=/edition7 + CLICKSIZE=2048 # Perkin-Elmer: change to 4096 on a 3205 +elif test -r /unix +then + UNIX=/unix # Could be anything +fi + +SIZE=0 +# messages: probably the most transportable +if test -r /usr/adm/messages -a -s /usr/adm/messages +then + SIZE=`grep avail /usr/adm/messages | sed -n '$s/.*[ ]//p'` +fi + +if test 0$SIZE -le 0 # no SIZE in /usr/adm/messages +then + if test -r $KMEM # Readable KMEM + then + if test -n "$UNIX" + then + SIZE=`echo maxmem/D | adb $UNIX $KMEM | sed -n '$s/.*[ ]//p'` + if test 0$SIZE -le 0 + then + SIZE=`echo physmem/D | adb $UNIX $KMEM | sed -n '$s/.*[ ]//p'` + fi + SIZE=`expr 0$SIZE '*' $CLICKSIZE` + fi + fi +fi + +case $UNIX in + /vmunix) # Assume 4.2bsd: check for resource limits + MAXSIZE=`csh -c limit | awk 'BEGIN { MAXSIZE = 1000000 } +/datasize|memoryuse/ && NF == 3 { if ($2 < MAXSIZE) MAXSIZE = $2 } +END { print MAXSIZE * 1000 }'` + if test $MAXSIZE -lt $SIZE + then + SIZE=$MAXSIZE + fi + ;; +esac + +if test 0$SIZE -le 0 +then + echo 0;exit 1 +else + echo $SIZE +fi