Upgrade to Gawk 2.15.4.

1994-02-17 01:21:51 +00:00 · 1994-02-17 01:21:51 +00:00 · dff3317321
parent 652a63ee06
commit dff3317321
27 changed files with 7395 additions and 4436 deletions
--- a/gnu/usr.bin/gawk/NEWS
+++ b/gnu/usr.bin/gawk/NEWS
@ -1,3 +1,53 @@
+Changes from 2.15.3 to 2.15.4
+-----------------------------
+
+Lots of lint fixes, and do_sprintf made mostly ANSI C compatible.
+
+Man page updated and edited.
+
+Copyrights updated.
+
+Arrays now grow dynamically, initially scaling up by an order of magnitude
+  and then doubling, up to ~ 64K.  This should keep gawk's performance
+  graceful under heavy load.
+
+New `delete array' feature added.  Only documented in the man page.
+
+Switched to dfa and regex suites from grep-2.0. These offer the ability to
+  move to POSIX regexps in the next release.
+
+Disabled GNU regex ops.
+
+Research awk -m option now recognized. It does nothing in gawk, since gawk
+  has no static limits.  Only documented in the man page.
+
+New bionic (faster, better, stronger than before) hashing function.
+
+Bug fix in argument handling. `gawk -X' now notices there was no program.
+  Additional bug fixes to make --compat and --lint work again.
+
+Many changes for 16-bit cleanliness.
+
+Add explicit alloca(0) in io.c to recover space from C alloca.
+
+Fixed file descriptor leak in io.c.
+
+The --version option now follows the GNU coding standards and exits.
+
+Fixed several prototypes in protos.h.
+
+Several tests updated. On Solaris, warn that the out? tests will fail.
+
+Configuration files for SunOS with cc and Solaris 2.x added.
+
+Improved error messages in awk.y on gawk extensions if do_unix or do_compat.
+
+INSTALL file added.
+
+Fixed Atari Makefile and several VMS specific changes.
+
+Better conversion of numbers to strings on systems with broken sprintfs.
+
 Changes from 2.15.2 to 2.15.3
 -----------------------------

--- a/gnu/usr.bin/gawk/PROBLEMS
+++ b/gnu/usr.bin/gawk/PROBLEMS
@ -3,4 +3,8 @@ Hopefully they will all be fixed in the next major release of gawk.

 Please keep in mind that the code is still undergoing significant evolution.

-1. Gawk's printf is probably still not POSIX compliant.
+1. The interactions with the lexer and yyerror need reworking. It is possible
+   to get line numbers that are one line off if --compat or --posix is
+   true and either `next file' or `delete array' are used.
+
+   Really the whole lexical analysis stuff needs reworking.
--- a/gnu/usr.bin/gawk/README
+++ b/gnu/usr.bin/gawk/README
@ -10,7 +10,7 @@ See the installation instructions, below.

 Known problems are given in the PROBLEMS file.  Work to be done is
 described briefly in the FUTURES file.  Verified ports are listed in
-the PORTS file.  Changes in this version are summarized in the CHANGES file.
+the PORTS file.  Changes in this version are summarized in the NEWS file.
 Please read the LIMITATIONS and ACKNOWLEDGMENT files.

 Read the file POSIX for a discussion of how the standard says comparisons
@ -28,6 +28,8 @@ INSTALLATION:

 Check whether there is a system-specific README file for your system.

+A quick overview of the installation process is in the file INSTALLATION.
+
 Makefile.in may need some tailoring.  The only changes necessary should
 be to change installation targets or to change compiler flags.
 The changes to make in Makefile.in are commented and should be obvious.
@ -69,7 +71,7 @@ problem.

 PRINTING THE MANUAL

-The 'support' directory contains texinfo.tex 2.65, which will be necessary
+The 'support' directory contains texinfo.tex 2.115, which will be necessary
 for printing the manual, and the texindex.c program from the texinfo
 distribution which is also necessary.  See the makefile for the steps needed
 to get a DVI file from the manual.
@ -93,7 +95,7 @@ INTERNET:	david@cs.dal.ca

 Arnold Robbins
 1736 Reindeer Drive
-Atlanta, GA, 30329, USA
+Atlanta, GA, 30329-3528, USA

 INTERNET:	arnold@skeeve.atl.ga.us
 UUCP:		{ gatech, emory, emoryu1 }!skeeve!arnold
@ -115,8 +117,10 @@ VMS:

 Atari ST:
 	Michal Jaegermann
-	NTOMCZAK@vm.ucs.UAlberta.CA  (e-mail only)
+	michal@gortel.phys.ualberta.ca (e-mail only)

 OS/2:
 	Kai Uwe Rommel
 	rommel@ars.muc.de (e-mail only)
+	Darrel Hankerson
+	hankedr@mail.auburn.edu (e-mail only)
--- a/gnu/usr.bin/gawk/VERSION
+++ b/gnu/usr.bin/gawk/VERSION
@ -1 +1 @@
-2.15.3
+2.15.4
--- a/gnu/usr.bin/gawk/array.c
+++ b/gnu/usr.bin/gawk/array.c
@ -3,7 +3,7 @@
 */

 /* 
- * Copyright (C) 1986, 1988, 1989, 1991, 1992 the Free Software Foundation, Inc.
+ * Copyright (C) 1986, 1988, 1989, 1991, 1992, 1993 the Free Software Foundation, Inc.
 * 
 * This file is part of GAWK, the GNU implementation of the
 * AWK Progamming Language.
@ -24,12 +24,27 @@
 */

 #ifndef lint
-static char rcsid[] = "$Id: array.c,v 1.3 1993/11/13 02:26:15 jtc Exp $";
-#endif /* not lint */
+static char rcsid[] = "$Id: array.c,v 1.4 1994/02/17 01:21:57 jtc Exp $";
+#endif
+
+/*
+ * Tree walks (``for (iggy in foo)'') and array deletions use expensive
+ * linear searching.  So what we do is start out with small arrays and
+ * grow them as needed, so that our arrays are hopefully small enough,
+ * most of the time, that they're pretty full and we're not looking at
+ * wasted space.
+ *
+ * The decision is made to grow the array if the average chain length is
+ * ``too big''. This is defined as the total number of entries in the table
+ * divided by the size of the array being greater than some constant.
+ */
+
+#define AVG_CHAIN_MAX	10   /* don't want to linear search more than this */

 #include "awk.h"

 static NODE *assoc_find P((NODE *symbol, NODE *subs, int hash1));
+static void grow_table P((NODE *symbol));

 NODE *
 concat_exp(tree)
@ -88,7 +103,7 @@ NODE *symbol;

 	if (symbol->var_array == 0)
 		return;
-	for (i = 0; i < HASHSIZE; i++) {
+	for (i = 0; i < symbol->array_size; i++) {
 		for (bucket = symbol->var_array[i]; bucket; bucket = next) {
 			next = bucket->ahnext;
 			unref(bucket->ahname);
@ -97,17 +112,25 @@ NODE *symbol;
 		}
 		symbol->var_array[i] = 0;
 	}
+	free(symbol->var_array);
+	symbol->var_array = NULL;
+	symbol->array_size = symbol->table_size = 0;
 }

 /*
 * calculate the hash function of the string in subs
 */
 unsigned int
-hash(s, len)
-register char *s;
+hash(s, len, hsize)
+register const char *s;
 register size_t len;
+unsigned long hsize;
 {
-	register unsigned long h = 0, g;
+	register unsigned long h = 0;
+
+#ifdef this_is_really_slow
+
+	register unsigned long g;

 	while (len--) {
 		h = (h << 4) + *s++;
@ -117,10 +140,84 @@ register size_t len;
 			h = h ^ g;
 		}
 	}
-	if (h < HASHSIZE)
-		return h;
-	else
-		return h%HASHSIZE;
+
+#else /* this_is_really_slow */
+/*
+ * This is INCREDIBLY ugly, but fast.  We break the string up into 8 byte
+ * units.  On the first time through the loop we get the "leftover bytes"
+ * (strlen % 8).  On every other iteration, we perform 8 HASHC's so we handle
+ * all 8 bytes.  Essentially, this saves us 7 cmp & branch instructions.  If
+ * this routine is heavily used enough, it's worth the ugly coding.
+ *
+ * OZ's original sdbm hash, copied from Margo Seltzers db package.
+ *
+ */
+
+/* Even more speed: */
+/* #define HASHC   h = *s++ + 65599 * h */
+/* Because 65599 = pow(2,6) + pow(2,16) - 1 we multiply by shifts */
+#define HASHC   htmp = (h << 6);  \
+		h = *s++ + htmp + (htmp << 10) - h
+
+	unsigned long htmp;
+
+	h = 0;
+
+#if defined(VAXC)
+/*	
+ * [This was an implementation of "Duff's Device", but it has been
+ * redone, separating the switch for extra iterations from the loop.
+ * This is necessary because the DEC VAX-C compiler is STOOPID.]
+ */
+	switch (len & (8 - 1)) {
+	case 7:		HASHC;
+	case 6:		HASHC;
+	case 5:		HASHC;
+	case 4:		HASHC;
+	case 3:		HASHC;
+	case 2:		HASHC;
+	case 1:		HASHC;
+	default:	break;
+	}
+
+	if (len > (8 - 1)) {
+		register size_t loop = len >> 3;
+		do {
+			HASHC;
+			HASHC;
+			HASHC;
+			HASHC;
+			HASHC;
+			HASHC;
+			HASHC;
+			HASHC;
+		} while (--loop);
+	}
+#else /* !VAXC */
+	/* "Duff's Device" for those who can handle it */
+	if (len > 0) {
+		register size_t loop = (len + 8 - 1) >> 3;
+
+		switch (len & (8 - 1)) {
+		case 0:
+			do {	/* All fall throughs */
+				HASHC;
+		case 7:		HASHC;
+		case 6:		HASHC;
+		case 5:		HASHC;
+		case 4:		HASHC;
+		case 3:		HASHC;
+		case 2:		HASHC;
+		case 1:		HASHC;
+			} while (--loop);
+		}
+	}
+#endif /* !VAXC */
+#endif /* this_is_really_slow - not */
+
+	if (h >= hsize)
+		h %= hsize;
+	return h;
 }

 /*
@ -162,7 +259,7 @@ NODE *symbol, *subs;
 	if (symbol->var_array == 0)
 		return 0;
 	subs = concat_exp(subs);	/* concat_exp returns a string node */
-	hash1 = hash(subs->stptr, subs->stlen);
+	hash1 = hash(subs->stptr, subs->stlen, (unsigned long) symbol->array_size);
 	if (assoc_find(symbol, subs, hash1) == NULL) {
 		free_temp(subs);
 		return 0;
@ -187,17 +284,16 @@ NODE *symbol, *subs;
 	register NODE *bucket;

 	(void) force_string(subs);
-	hash1 = hash(subs->stptr, subs->stlen);

-	if (symbol->var_array == 0) {	/* this table really should grow
-					 * dynamically */
-		size_t size;
-
-		size = sizeof(NODE *) * HASHSIZE;
-		emalloc(symbol->var_array, NODE **, size, "assoc_lookup");
-		memset((char *)symbol->var_array, 0, size);
+	if (symbol->var_array == 0) {
 		symbol->type = Node_var_array;
+		symbol->array_size = symbol->table_size = 0;	/* sanity */
+		grow_table(symbol);
+		hash1 = hash(subs->stptr, subs->stlen,
+				(unsigned long) symbol->array_size);
 	} else {
+		hash1 = hash(subs->stptr, subs->stlen,
+				(unsigned long) symbol->array_size);
 		bucket = assoc_find(symbol, subs, hash1);
 		if (bucket != NULL) {
 			free_temp(subs);
@ -209,6 +305,17 @@ NODE *symbol, *subs;
 	if (do_lint && subs->stlen == 0)
 		warning("subscript of array `%s' is null string",
 			symbol->vname);
+
+	/* first see if we would need to grow the array, before installing */
+	symbol->table_size++;
+	if ((symbol->flags & ARRAYMAXED) == 0
+	    && symbol->table_size/symbol->array_size > AVG_CHAIN_MAX) {
+		grow_table(symbol);
+		/* have to recompute hash value for new size */
+		hash1 = hash(subs->stptr, subs->stlen,
+				(unsigned long) symbol->array_size);
+	}
+
 	getnode(bucket);
 	bucket->type = Node_ahash;
 	if (subs->flags & TEMP)
@ -244,7 +351,7 @@ NODE *symbol, *tree;
 	if (symbol->var_array == 0)
 		return;
 	subs = concat_exp(tree);	/* concat_exp returns string node */
-	hash1 = hash(subs->stptr, subs->stlen);
+	hash1 = hash(subs->stptr, subs->stlen, (unsigned long) symbol->array_size);

 	last = NULL;
 	for (bucket = symbol->var_array[hash1]; bucket; last = bucket, bucket = bucket->ahnext)
@ -260,6 +367,14 @@ NODE *symbol, *tree;
 	unref(bucket->ahname);
 	unref(bucket->ahvalue);
 	freenode(bucket);
+	symbol->table_size--;
+	if (symbol->table_size <= 0) {
+		memset(symbol->var_array, '\0',
+			sizeof(NODE *) * symbol->array_size);
+		symbol->table_size = symbol->array_size = 0;
+		free(symbol->var_array);
+		symbol->var_array = NULL;
+	}
 }

 void
@ -267,12 +382,12 @@ assoc_scan(symbol, lookat)
 NODE *symbol;
 struct search *lookat;
 {
-	if (!symbol->var_array) {
+	if (symbol->var_array == NULL) {
 		lookat->retval = NULL;
 		return;
 	}
 	lookat->arr_ptr = symbol->var_array;
-	lookat->arr_end = lookat->arr_ptr + HASHSIZE;	/* added */
+	lookat->arr_end = lookat->arr_ptr + symbol->array_size;
 	lookat->bucket = symbol->var_array[0];
 	assoc_next(lookat);
 }
@ -295,3 +410,77 @@ struct search *lookat;
 	}
 	return;
 }
+
+/* grow_table --- grow a hash table */
+
+static void
+grow_table(symbol)
+NODE *symbol;
+{
+	NODE **old, **new, *chain, *next;
+	int i, j;
+	unsigned long hash1;
+	unsigned long oldsize, newsize;
+	/*
+	 * This is an array of primes. We grow the table by an order of
+	 * magnitude each time (not just doubling) so that growing is a
+	 * rare operation. We expect, on average, that it won't happen
+	 * more than twice.  The final size is also chosen to be small
+	 * enough so that MS-DOG mallocs can handle it. When things are
+	 * very large (> 8K), we just double more or less, instead of
+	 * just jumping from 8K to 64K.
+	 */
+	static long sizes[] = { 13, 127, 1021, 8191, 16381, 32749, 65497 };
+
+	/* find next biggest hash size */
+	oldsize = symbol->array_size;
+	newsize = 0;
+	for (i = 0, j = sizeof(sizes)/sizeof(sizes[0]); i < j; i++) {
+		if (oldsize < sizes[i]) {
+			newsize = sizes[i];
+			break;
+		}
+	}
+
+	if (newsize == oldsize) {	/* table already at max (!) */
+		symbol->flags |= ARRAYMAXED;
+		return;
+	}
+
+	/* allocate new table */
+	emalloc(new, NODE **, newsize * sizeof(NODE *), "grow_table");
+	memset(new, '\0', newsize * sizeof(NODE *));
+
+	/* brand new hash table, set things up and return */
+	if (symbol->var_array == NULL) {
+		symbol->table_size = 0;
+		goto done;
+	}
+
+	/* old hash table there, move stuff to new, free old */
+	old = symbol->var_array;
+	for (i = 0; i < oldsize; i++) {
+		if (old[i] == NULL)
+			continue;
+
+		for (chain = old[i]; chain != NULL; chain = next) {
+			next = chain->ahnext;
+			hash1 = hash(chain->ahname->stptr,
+					chain->ahname->stlen, newsize);
+
+			/* remove from old list, add to new */
+			chain->ahnext = new[hash1];
+			new[hash1] = chain;
+
+		}
+	}
+	free(old);
+
+done:
+	/*
+	 * note that symbol->table_size does not change if an old array,
+	 * and is explicitly set to 0 if a new one.
+	 */
+	symbol->var_array = new;
+	symbol->array_size = newsize;
+}
--- a/gnu/usr.bin/gawk/awk.1
+++ b/gnu/usr.bin/gawk/awk.1
@ -1,8 +1,8 @@
-.\"	$Id: awk.1,v 1.3 1993/11/13 02:26:18 jtc Exp $	-*- nroff -*-
+.\"	$Id: awk.1,v 1.4 1994/02/17 01:21:59 jtc Exp $	-*- nroff -*-
 .ds PX \s-1POSIX\s+1
 .ds UX \s-1UNIX\s+1
 .ds AN \s-1ANSI\s+1
-.TH GAWK 1 "Nov 4 1993" "Free Software Foundation" "Utility Commands"
+.TH GAWK 1 "Dec 24 1993" "Free Software Foundation" "Utility Commands"
 .SH NAME
 gawk \- pattern scanning and processing language
 .SH SYNOPSIS
@ -72,6 +72,11 @@ option.
 Each
 .B \-W
 option has a corresponding GNU style long option, as detailed below.
+Arguments to GNU style long options are either joined with the option
+by an
+.B =
+sign, with no intervening spaces, or they may be provided in the
+next command line argument.
 .PP
 .I Gawk
 accepts the following options.
@ -115,6 +120,26 @@ Multiple
 (or
 .BR \-\^\-file )
 options may be used.
+.TP
+.PD 0
+.BI \-mf= NNN
+.TP
+.BI \-mr= NNN
+Set various memory limits to the value
+.IR NNN .
+The
+.B f
+flag sets the maximum number of fields, and the
+.B r
+flag sets the maximum record size.  These two flags and the
+.B \-m
+option are from the AT&T Bell Labs research version of \*(UX
+.IR awk .
+They are ignored by
+.IR gawk ,
+since
+.I gawk
+has no pre-defined limits.
 .TP \w'\fB\-\^\-copyright\fR'u+1n
 .PD 0
 .B "\-W compat"
@ -159,6 +184,8 @@ the error output.
 .B \-\^\-usage
 Print a relatively short summary of the available options on
 the error output.
+Per the GNU Coding Standards, these options cause an immediate,
+successful exit.
 .TP
 .PD 0
 .B "\-W lint"
@ -249,6 +276,8 @@ This is useful mainly for knowing if the current copy of
 on your system
 is up to date with respect to whatever the Free Software Foundation
 is distributing.
+Per the GNU Coding Standards, these options cause an immediate,
+successful exit.
 .TP
 .B \-\^\-
 Signal the end of options. This is useful to allow further arguments to the
@ -256,7 +285,13 @@ AWK program itself to start with a ``\-''.
 This is mainly for consistency with the argument parsing convention used
 by most other \*(PX programs.
 .PP
-Any other options are flagged as illegal, but are otherwise ignored.
+In compatibility mode,
+any other options are flagged as illegal, but are otherwise ignored.
+In normal operation, as long as program text has been supplied, unknown
+options are passed on to the AWK program in the
+.B ARGV
+array for processing.  This is particularly useful for running AWK
+programs via the ``#!'' executable interpreter mechanism.
 .SH AWK PROGRAM EXECUTION
 .PP
 An AWK program consists of a sequence of pattern-action statements
@ -271,23 +306,23 @@ and optional function definitions.
 .I Gawk
 first reads the program source from the
 .IR program-file (s)
-if specified, or from the first non-option argument on the command line.
+if specified,
+from arguments to
+.BR "\-W source=" ,
+or from the first non-option argument on the command line.
 The
 .B \-f
-option may be used multiple times on the command line.
+and
+.B "\-W source="
+options may be used multiple times on the command line.
 .I Gawk
 will read the program text as if all the
 .IR program-file s
+and command line source texts
 had been concatenated together.  This is useful for building libraries
 of AWK functions, without having to include them in each new AWK
-program that uses them.  To use a library function in a file from a
-program typed in on the command line, specify
-.B /dev/tty
-as one of the
-.IR program-file s,
-type your program, and end it with a
-.B ^D
-(control-d).
+program that uses them.  It also provides the ability to mix library
+functions with command line programs.
 .PP
 The environment variable
 .B AWKPATH
@ -303,11 +338,13 @@ option contains a ``/'' character, no path search is performed.
 .I Gawk
 executes AWK programs in the following order.
 First,
+all variable assignments specified via the
+.B \-v
+option are performed.
+Next,
 .I gawk
 compiles the program into an internal form.
-Next, all variable assignments specified via the
-.B \-v
-option are performed.  Then,
+Then,
 .I gawk
 executes the code in the
 .B BEGIN
@ -360,8 +397,8 @@ block(s) (if any).
 AWK variables are dynamic; they come into existence when they are
 first used. Their values are either floating-point numbers or strings,
 or both,
-depending upon how they are used. AWK also has one dimension
-arrays; multiply dimensioned arrays may be simulated.
+depending upon how they are used. AWK also has one dimensional
+arrays; arrays with multiple dimensions may be simulated.
 Several pre-defined variables are set as a program
 runs; these will be described as needed and summarized below.
 .SS Fields
@ -436,6 +473,7 @@ cause the value of
 .B $0
 to be recomputed, with the fields being separated by the value of
 .BR OFS .
+References to negative numbered fields cause a fatal error.
 .SS Built-in Variables
 .PP
 AWK's built-in variables are:
@ -483,7 +521,7 @@ If a system error occurs either doing a redirection for
 during a read for
 .BR getline ,
 or during a
-.BR close ,
+.BR close() ,
 then
 .B ERRNO
 will contain
@ -650,6 +688,9 @@ loop to iterate over all the elements of an array.
 An element may be deleted from an array using the
 .B delete
 statement.
+The
+.B delete
+statement may also be used to delete the entire contents of an array.
 .SS Variable Typing And Conversion
 .PP
 Variables and fields
@ -686,7 +727,7 @@ b = a ""
 .PP
 the variable
 .B b
-has a value of \fB"12"\fR and not \fB"12.00"\fR.
+has a string value of \fB"12"\fR and not \fB"12.00"\fR.
 .PP
 .I Gawk
 performs comparisons as follows:
@ -815,7 +856,8 @@ the third. Only one of the second and third patterns is evaluated.
 .PP
 The 
 .IB pattern1 ", " pattern2
-form of an expression is called a range pattern.
+form of an expression is called a
+.IR "range pattern" .
 It matches all input records starting with a line that matches
 .IR pattern1 ,
 and continuing until a record that matches
@ -988,6 +1030,7 @@ as follows:
 \fBbreak\fR
 \fBcontinue\fR
 \fBdelete \fIarray\^\fB[\^\fIindex\^\fB]\fR
+\fBdelete \fIarray\^\fR
 \fBexit\fR [ \fIexpression\fR ]
 \fB{ \fIstatements \fB}
 .fi
@ -1052,10 +1095,20 @@ Prints the current record.
 .TP
 .BI print " expr-list"
 Prints expressions.
+Each expression is separated by the value of the
+.B OFS
+variable. The output record is terminated with the value of the
+.B ORS
+variable.
 .TP
 .BI print " expr-list" " >" file
 Prints expressions on
 .IR file .
+Each expression is separated by the value of the
+.B OFS
+variable. The output record is terminated with the value of the
+.B ORS
+variable.
 .TP
 .BI printf " fmt, expr-list"
 Format and print.
@ -1084,8 +1137,9 @@ In a similar fashion,
 .IB command " | getline"
 pipes into
 .BR getline .
-.BR Getline
-will return 0 on end of file, and \-1 on an error.
+The
+.BR getline
+command will return 0 on end of file, and \-1 on an error.
 .SS The \fIprintf\fP\^ Statement
 .PP
 The AWK versions of the
@ -1159,6 +1213,7 @@ The expression should be left-justified within its field.
 The field should be padded to this width. If the number has a leading
 zero, then the field will be padded with zeros.
 Otherwise it is padded with blanks.
+This applies even to the non-numeric output formats.
 .TP
 .BI . prec
 A number indicating the maximum width of strings or digits to the right
@ -1235,7 +1290,7 @@ is the value of the
 system call.
 If there are any additional fields, they are the group IDs returned by
 .IR getgroups (2).
-(Multiple groups may not be supported on all systems.)
+Multiple groups may not be supported on all systems.
 .TP
 .B /dev/stdin
 The standard input.
@ -1366,6 +1421,9 @@ and returns the number of fields. If
 is omitted,
 .B FS
 is used instead.
+The array
+.I a
+is cleared first.
 .TP
 .BI sprintf( fmt , " expr-list" )
 prints
@ -1483,11 +1541,11 @@ the
 As in \*(AN C, all following hexadecimal digits are considered part of
 the escape sequence.
 (This feature should tell us something about language design by committee.)
-E.g., "\ex1B" is the \s-1ASCII\s+1 \s-1ESC\s+1 (escape) character.
+E.g., \fB"\ex1B"\fR is the \s-1ASCII\s+1 \s-1ESC\s+1 (escape) character.
 .TP
 .BI \e ddd
 The character represented by the 1-, 2-, or 3-digit sequence of octal
-digits. E.g. "\e033" is the \s-1ASCII\s+1 \s-1ESC\s+1 (escape) character.
+digits. E.g. \fB"\e033"\fR is the \s-1ASCII\s+1 \s-1ESC\s+1 (escape) character.
 .TP
 .BI \e c
 The literal character
@ -1568,7 +1626,15 @@ Concatenate and line number (a variation on a theme):
 .ft R
 .fi
 .SH SEE ALSO
-.IR egrep (1)
+.IR egrep (1),
+.IR getpid (2),
+.IR getppid (2),
+.IR getpgrp (2),
+.IR getuid (2),
+.IR geteuid (2),
+.IR getgid (2),
+.IR getegid (2),
+.IR getgroups (2)
 .PP
 .IR "The AWK Programming Language" ,
 Alfred V. Aho, Brian W. Kernighan, Peter J. Weinberger,
@ -1606,7 +1672,7 @@ block was run.  Applications came to depend on this ``feature.''
 When
 .I awk
 was changed to match its documentation, this option was added to
-accomodate applications that depended upon the old behavior.
+accommodate applications that depended upon the old behavior.
 (This feature was agreed upon by both the AT&T and GNU developers.)
 .PP
 The
@ -1616,7 +1682,11 @@ option for implementation specific features is from the \*(PX standard.
 When processing arguments,
 .I gawk
 uses the special option ``\fB\-\^\-\fP'' to signal the end of
-arguments, and warns about, but otherwise ignores, undefined options.
+arguments.
+In compatibility mode, it will warn about, but otherwise ignore,
+undefined options.
+In normal operation, such arguments are passed on to the AWK program for
+it to process.
 .PP
 The AWK book does not define the return value of
 .BR srand() .
@ -1712,6 +1782,11 @@ environment variable is not special.
 The use of
 .B "next file"
 to abandon processing of the current input file.
+.TP
+\(bu
+The use of
+.BI delete " array"
+to delete the entire contents of an array.
 .RE
 .PP
 The AWK book does not define the return value of the
@ -1739,7 +1814,7 @@ option is ``t'', then
 will be set to the tab character.
 Since this is a rather ugly special case, it is not the default behavior.
 This behavior also does not occur if
-.B \-Wposix
+.B "\-W posix"
 has been specified.
 .ig
 .PP
@ -1791,7 +1866,7 @@ a = length($0)
 This feature is marked as ``deprecated'' in the \*(PX standard, and
 .I gawk
 will issue a warning about its use if
-.B \-Wlint
+.B "\-W lint"
 is specified on the command line.
 .PP
 The other feature is the use of the
@ -1807,7 +1882,7 @@ equivalent to the
 statement.
 .I Gawk
 will support this usage if
-.B \-Wposix
+.B "\-W posix"
 has not been specified.
 .SH BUGS
 The
@ -1850,6 +1925,7 @@ the
 and
 .B \-e
 options of the 2.11 version are no longer recognized.
+This fact will not even be documented in the manual page for version 2.16.
 .SH AUTHORS
 The original version of \*(UX
 .I awk
@ -1873,6 +1949,8 @@ compatible with the new version of \*(UX
 The initial DOS port was done by Conrad Kwok and Scott Garfinkle.
 Scott Deifik is the current DOS maintainer.  Pat Rankin did the
 port to VMS, and Michal Jaegermann did the port to the Atari ST.
+The port to OS/2 was done by Kai Uwe Rommel, with contributions and
+help from Darrel Hankerson.
 .SH ACKNOWLEDGEMENTS
 Brian Kernighan of Bell Labs
 provided valuable assistance during testing and debugging.
--- a/gnu/usr.bin/gawk/awk.h
+++ b/gnu/usr.bin/gawk/awk.h
@ -3,7 +3,7 @@
 */

 /* 
- * Copyright (C) 1986, 1988, 1989, 1991, 1992 the Free Software Foundation, Inc.
+ * Copyright (C) 1986, 1988, 1989, 1991, 1992, 1993 the Free Software Foundation, Inc.
 * 
 * This file is part of GAWK, the GNU implementation of the
 * AWK Progamming Language.
@ -22,7 +22,7 @@
 * along with GAWK; see the file COPYING.  If not, write to
 * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 *
- *	$Id: awk.h,v 1.3 1993/11/13 02:26:21 jtc Exp $
+ *	$Id: awk.h,v 1.4 1994/02/17 01:22:01 jtc Exp $
 */

 /* ------------------------------ Includes ------------------------------ */
@ -174,7 +174,7 @@ extern int getpgrp P((void));
 typedef struct Regexp {
 	struct re_pattern_buffer pat;
 	struct re_registers regs;
-	struct regexp dfareg;
+	struct dfa dfareg;
 	int dfa;
 } Regexp;
 #define	RESTART(rp,s)	(rp)->regs.start[0]
@ -198,6 +198,22 @@ extern int _text_read (int, char *, int);
 #define ENVSEP	':'
 #endif

+#define DEFAULT_G_PRECISION 6
+
+/* semi-temporary hack, mostly to gracefully handle VMS */
+#ifdef GFMT_WORKAROUND
+extern void sgfmt P((char *, const char *, int, int, int, double)); /* builtin.c */
+
+/* Partial fix, to handle the most common case.  */
+#define NUMTOSTR(str, format, num)					   \
+	if (strcmp((format), "%.6g") == 0 || strcmp((format), "%g") == 0)  \
+		sgfmt(str, "%*.*g", 0, 1, DEFAULT_G_PRECISION, num);	   \
+	else								   \
+		(void) sprintf(str, format, num) /* NOTE: no semi-colon! */
+#else
+#define NUMTOSTR(str, format, num) (void) sprintf(str, format, num)
+#endif /* GFMT_WORKAROUND */
+
 /* ------------------ Constants, Structures, Typedefs  ------------------ */
 #define AWKNUM	double

@ -335,6 +351,7 @@ typedef struct exp_node {
 			union {
 				struct exp_node *lptr;
 				char *param_name;
+				long ll;
 			} l;
 			union {
 				struct exp_node *rptr;
@ -347,6 +364,7 @@ typedef struct exp_node {
 			union {
 				char *name;
 				struct exp_node *extra;
+				long xl;
 			} x;
 			short number;
 			unsigned char reflags;
@ -392,8 +410,8 @@ typedef struct exp_node {
 #			define	NUM	32	/* numeric value is current */
 #			define	NUMBER	64	/* assigned as number */
 #			define	MAYBE_NUM 128	/* user input:  if NUMERIC then
-						 * a NUMBER
-						 */
+						 * a NUMBER */
+#			define	ARRAYMAXED 256	/* array is at max size */
 	char *vname;	/* variable's name */
 } NODE;

@ -426,6 +444,8 @@ typedef struct exp_node {

 #define var_value lnode
 #define var_array sub.nodep.r.av
+#define array_size sub.nodep.l.ll
+#define table_size sub.nodep.x.xl

 #define condpair lnode
 #define triggered sub.nodep.r.r_ent
@ -433,8 +453,6 @@ typedef struct exp_node {
 #ifdef DONTDEF
 int primes[] = {31, 61, 127, 257, 509, 1021, 2053, 4099, 8191, 16381};
 #endif
-/* a quick profile suggests that the following is a good value */
-#define	HASHSIZE	1021

 typedef struct for_loop_header {
 	NODE *init;
@ -628,7 +646,7 @@ extern double _msc51bug;
 /* array.c */
 extern NODE *concat_exp P((NODE *tree));
 extern void assoc_clear P((NODE *symbol));
-extern unsigned int hash P((char *s, size_t len));
+extern unsigned int hash P((const char *s, size_t len, unsigned long hsize));
 extern int in_array P((NODE *symbol, NODE *subs));
 extern NODE **assoc_lookup P((NODE *symbol, NODE *subs));
 extern void do_delete P((NODE *symbol, NODE *tree));
@ -639,7 +657,7 @@ extern char *tokexpand P((void));
 extern char nextc P((void));
 extern NODE *node P((NODE *left, NODETYPE op, NODE *right));
 extern NODE *install P((char *name, NODE *value));
-extern NODE *lookup P((char *name));
+extern NODE *lookup P((const char *name));
 extern NODE *variable P((char *name, int can_free));
 extern int yyparse P((void));
 /* builtin.c */
@ -695,8 +713,8 @@ extern struct redirect *redirect P((NODE *tree, int *errflg));
 extern NODE *do_close P((NODE *tree));
 extern int flush_io P((void));
 extern int close_io P((void));
-extern int devopen P((char *name, char *mode));
-extern int pathopen P((char *file));
+extern int devopen P((const char *name, const char *mode));
+extern int pathopen P((const char *file));
 extern NODE *do_getline P((NODE *tree));
 extern void do_nextfile P((void));
 /* iop.c */
@ -710,7 +728,7 @@ extern void load_environ P((void));
 extern char *arg_assign P((char *arg));
 extern SIGTYPE catchsig P((int sig, int code));
 /* msg.c */
-extern void err P((char *s, char *emsg, va_list argp));
+extern void err P((const char *s, const char *emsg, va_list argp));
 #if _MSC_VER == 510
 extern void msg P((va_list va_alist, ...));
 extern void warning P((va_list va_alist, ...));
@ -734,8 +752,9 @@ extern void freenode P((NODE *it));
 extern void unref P((NODE *tmp));
 extern int parse_escape P((char **string_ptr));
 /* re.c */
-extern Regexp *make_regexp P((char *s, int len, int ignorecase, int dfa));
-extern int research P((Regexp *rp, char *str, int start, int len, int need_start));
+extern Regexp *make_regexp P((char *s, size_t len, int ignorecase, int dfa));
+extern int research P((Regexp *rp, char *str, int start,
+		       size_t len, int need_start));
 extern void refree P((Regexp *rp));
 extern void reg_error P((const char *s));
 extern Regexp *re_update P((NODE *t));
--- a/gnu/usr.bin/gawk/awk.y
+++ b/gnu/usr.bin/gawk/awk.y
@ -3,7 +3,7 @@
 */

 /* 
- * Copyright (C) 1986, 1988, 1989, 1991, 1992 the Free Software Foundation, Inc.
+ * Copyright (C) 1986, 1988, 1989, 1991, 1992, 1993 the Free Software Foundation, Inc.
 * 
 * This file is part of GAWK, the GNU implementation of the
 * AWK Progamming Language.
@ -21,6 +21,8 @@
 * You should have received a copy of the GNU General Public License
 * along with GAWK; see the file COPYING.  If not, write to
 * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ *	$Id: awk.y,v 1.3 1994/02/17 01:22:02 jtc Exp $
 */

 %{
@ -56,9 +58,10 @@ static char *thisline = NULL;
 #define YYDEBUG_LEXER_TEXT (lexeme)
 static int param_counter;
 static char *tokstart = NULL;
-static char *token = NULL;
+static char *tok = NULL;
 static char *tokend;

+#define HASHSIZE	1021	/* this constant only used here */
 NODE *variables[HASHSIZE];

 extern char *source;
@ -291,7 +294,7 @@ regexp
 	  REGEXP '/'
 		{
 		  NODE *n;
-		  int len;
+		  size_t len;

 		  getnode(n);
 		  n->type = Node_regex;
@ -386,10 +389,19 @@ statement
 		  if ($2 && $2 == lookup("file")) {
 			if (do_lint)
 				warning("`next file' is a gawk extension");
-			else if (do_unix || do_posix)
-				yyerror("`next file' is a gawk extension");
-			 else if (! io_allowed)
-				yyerror("`next file' used in BEGIN or END action");
+			if (do_unix || do_posix) {
+				/*
+				 * can't use yyerror, since may have overshot
+				 * the source line
+				 */
+				errcount++;
+				msg("`next file' is a gawk extension");
+			}
+			if (! io_allowed) {
+				/* same thing */
+				errcount++;
+				msg("`next file' used in BEGIN or END action");
+			}
 			type = Node_K_nextfile;
 		  } else {
 			if (! io_allowed)
@ -406,6 +418,20 @@ statement
 		{ $$ = node ($3, Node_K_return, (NODE *)NULL); }
 	| LEX_DELETE NAME '[' expression_list ']' statement_term
 		{ $$ = node (variable($2,1), Node_K_delete, $4); }
+	| LEX_DELETE NAME  statement_term
+		{
+		  if (do_lint)
+			warning("`delete array' is a gawk extension");
+		  if (do_unix || do_posix) {
+			/*
+			 * can't use yyerror, since may have overshot
+			 * the source line
+			 */
+			errcount++;
+			msg("`delete array' is a gawk extension");
+		  }
+		  $$ = node (variable($2,1), Node_K_delete, (NODE *) NULL);
+		}
 	| exp statement_term
 		{ $$ = $1; }
 	;
@ -746,7 +772,7 @@ comma	: ',' opt_nls	{ yyerrok; }
 %%

 struct token {
-	char *operator;		/* text to match */
+	const char *operator;		/* text to match */
 	NODETYPE value;		/* node type */
 	int class;		/* lexical class */
 	unsigned flags;		/* # of args. allowed and compatability */
@ -820,10 +846,11 @@ yyerror(va_alist)
 va_dcl
 {
 	va_list args;
-	char *mesg = NULL;
+	const char *mesg = NULL;
 	register char *bp, *cp;
 	char *scan;
 	char buf[120];
+	static char end_of_file_line[] = "(END OF FILE)";

 	errcount++;
 	/* Find the current line in the input file */
@ -845,8 +872,8 @@ va_dcl
 		while (bp < lexend && *bp && *bp != '\n')
 			bp++;
 	} else {
-		thisline = "(END OF FILE)";
-		bp = thisline + 13;
+		thisline = end_of_file_line;
+		bp = thisline + strlen(thisline);
 	}
 	msg("%.*s", (int) (bp - thisline), thisline);
 	bp = buf;
@ -982,7 +1009,7 @@ get_src_buf()
 	return buf;
 }

-#define	tokadd(x) (*token++ = (x), token == tokend ? tokexpand() : token)
+#define	tokadd(x) (*tok++ = (x), tok == tokend ? tokexpand() : tok)

 char *
 tokexpand()
@ -990,15 +1017,15 @@ tokexpand()
 	static int toksize = 60;
 	int tokoffset;

-	tokoffset = token - tokstart;
+	tokoffset = tok - tokstart;
 	toksize *= 2;
 	if (tokstart)
 		erealloc(tokstart, char *, toksize, "tokexpand");
 	else
 		emalloc(tokstart, char *, toksize, "tokexpand");
 	tokend = tokstart + toksize;
-	token = tokstart + tokoffset;
-	return token;
+	tok = tokstart + tokoffset;
+	return tok;
 }

 #if DEBUG
@ -1053,7 +1080,7 @@ yylex()
 		int in_brack = 0;

 		want_regexp = 0;
-		token = tokstart;
+		tok = tokstart;
 		while ((c = nextc()) != 0) {
 			switch (c) {
 			case '[':
@ -1094,7 +1121,7 @@ retry:

 	lexeme = lexptr ? lexptr - 1 : lexptr;
 	thisline = NULL;
-	token = tokstart;
+	tok = tokstart;
 	yylval.nodetypeval = Node_illegal;

 	switch (c) {
@ -1115,13 +1142,23 @@ retry:

 	case '\\':
 #ifdef RELAXED_CONTINUATION
-		if (!do_unix) {	/* strip trailing white-space and/or comment */
-			while ((c = nextc()) == ' ' || c == '\t') continue;
+		/*
+		 * This code puports to allow comments and/or whitespace
+		 * after the `\' at the end of a line used for continuation.
+		 * Use it at your own risk. We think it's a bad idea, which
+		 * is why it's not on by default.
+		 */
+		if (!do_unix) {
+			/* strip trailing white-space and/or comment */
+			while ((c = nextc()) == ' ' || c == '\t')
+				continue;
 			if (c == '#')
-				while ((c = nextc()) != '\n') if (!c) break;
+				while ((c = nextc()) != '\n')
+					if (c == '\0')
+						break;
 			pushback();
 		}
-#endif /*RELAXED_CONTINUATION*/
+#endif /* RELAXED_CONTINUATION */
 		if (nextc() == '\n') {
 			sourceline++;
 			goto retry;
@ -1307,7 +1344,7 @@ retry:
 			tokadd(c);
 		}
 		yylval.nodeval = make_str_node(tokstart,
-					token - tokstart, esc_seen ? SCAN : 0);
+					tok - tokstart, esc_seen ? SCAN : 0);
 		yylval.nodeval->flags |= PERM;
 		return YSTRING;

@ -1443,14 +1480,14 @@ retry:
 		yyerror("Invalid char '%c' in expression\n", c);

 	/* it's some type of name-type-thing.  Find its length */
-	token = tokstart;
+	tok = tokstart;
 	while (is_identchar(c)) {
 		tokadd(c);
 		c = nextc();
 	}
 	tokadd('\0');
-	emalloc(tokkey, char *, token - tokstart, "yylex");
-	memcpy(tokkey, tokstart, token - tokstart);
+	emalloc(tokkey, char *, tok - tokstart, "yylex");
+	memcpy(tokkey, tokstart, tok - tokstart);
 	pushback();

 	/* See if it is a special token.  */
@ -1653,7 +1690,7 @@ NODE *value;
 	register int bucket;

 	len = strlen(name);
-	bucket = hash(name, len);
+	bucket = hash(name, len, (unsigned long) HASHSIZE);
 	getnode(hp);
 	hp->type = Node_hashnode;
 	hp->hnext = variables[bucket];
@ -1668,13 +1705,13 @@ NODE *value;
 /* find the most recent hash node for name installed by install */
 NODE *
 lookup(name)
-char *name;
+const char *name;
 {
 	register NODE *bucket;
 	register size_t len;

 	len = strlen(name);
-	bucket = variables[hash(name, len)];
+	bucket = variables[hash(name, len, (unsigned long) HASHSIZE)];
 	while (bucket) {
 		if (bucket->hlength == len && STREQN(bucket->hname, name, len))
 			return bucket->hvalue;
@ -1738,7 +1775,7 @@ int freeit;

 	name = np->param;
 	len = strlen(name);
-	save = &(variables[hash(name, len)]);
+	save = &(variables[hash(name, len, (unsigned long) HASHSIZE)]);
 	for (bucket = *save; bucket; bucket = bucket->hnext) {
 		if (len == bucket->hlength && STREQN(bucket->hname, name, len)) {
 			*save = bucket->hnext;
--- a/gnu/usr.bin/gawk/builtin.c
+++ b/gnu/usr.bin/gawk/builtin.c
@ -3,7 +3,7 @@
 */

 /* 
- * Copyright (C) 1986, 1988, 1989, 1991, 1992 the Free Software Foundation, Inc.
+ * Copyright (C) 1986, 1988, 1989, 1991, 1992, 1993 the Free Software Foundation, Inc.
 * 
 * This file is part of GAWK, the GNU implementation of the
 * AWK Progamming Language.
@ -24,12 +24,11 @@
 */

 #ifndef lint
-static char rcsid[] = "$Id: builtin.c,v 1.3 1993/11/13 02:26:27 jtc Exp $";
-#endif /* not lint */
+static char rcsid[] = "$Id: builtin.c,v 1.4 1994/02/17 01:22:04 jtc Exp $";
+#endif

 #include "awk.h"

-
 #ifndef SRANDOM_PROTO
 extern void srandom P((int seed));
 #endif
@ -44,10 +43,6 @@ extern int output_is_tty;

 static NODE *sub_common P((NODE *tree, int global));

-#ifdef GFMT_WORKAROUND
-char *gfmt P((double g, int prec, char *buf));
-#endif
-
 #ifdef _CRAY
 /* Work around a problem in conversion of doubles to exact integers. */
 #include <float.h>
@ -66,20 +61,18 @@ double (*Log)() = log;
 #define Ceil(n) ceil(n)
 #endif

-#if __STDC__
-static void
-efwrite(void *ptr, size_t size, size_t count, FILE *fp,
-	char *from, struct redirect *rp,int flush)
-#else
+
+static void efwrite P((const void *ptr, size_t size, size_t count, FILE *fp,
+		       const char *from, struct redirect *rp,int flush));
+
 static void
 efwrite(ptr, size, count, fp, from, rp, flush)
-void *ptr;
+const void *ptr;
 size_t size, count;
 FILE *fp;
-char *from;
+const char *from;
 struct redirect *rp;
 int flush;
-#endif
 {
 	errno = 0;
 	if (fwrite(ptr, size, count, fp) != count)
@ -220,22 +213,41 @@ NODE *tree;
 	return tmp_number((AWKNUM) d);
 }

-/* %e and %f formats are not properly implemented.  Someone should fix them */
-/* Actually, this whole thing should be reimplemented. */
+/*
+ * do_sprintf does the sprintf function. It is one of the uglier parts of
+ * gawk.  Thanks to Michal Jaegerman for taming this beast and making it
+ * compatible with ANSI C.
+ */

 NODE *
 do_sprintf(tree)
 NODE *tree;
 {
+/* copy 'l' bytes from 's' to 'obufout' checking for space in the process */
+/* difference of pointers should be of ptrdiff_t type, but let us be kind */
 #define bchunk(s,l) if(l) {\
    while((l)>ofre) {\
+      long olen = obufout - obuf;\
      erealloc(obuf, char *, osiz*2, "do_sprintf");\
      ofre+=osiz;\
      osiz*=2;\
+      obufout = obuf + olen;\
    }\
-    memcpy(obuf+olen,s,(size_t)(l));\
-    olen+=(l);\
+    memcpy(obufout,s,(size_t)(l));\
+    obufout+=(l);\
    ofre-=(l);\
+  }
+/* copy one byte from 's' to 'obufout' checking for space in the process */
+#define bchunk_one(s) {\
+    if(ofre <= 0) {\
+      long olen = obufout - obuf;\
+      erealloc(obuf, char *, osiz*2, "do_sprintf");\
+      ofre+=osiz;\
+      osiz*=2;\
+      obufout = obuf + olen;\
+    }\
+    *obufout++ = *s;\
+    --ofre;\
  }

 	/* Is there space for something L big in the buffer? */
@ -259,15 +271,16 @@ NODE *tree;

 	NODE *r;
 	int toofew = 0;
-	char *obuf;
-	size_t osiz, ofre, olen;
-	static char chbuf[] = "0123456789abcdef";
-	static char sp[] = " ";
+	char *obuf, *obufout;
+	size_t osiz, ofre;
+	char *chbuf;
 	char *s0, *s1;
+	int cs1;
 	int n0;
 	NODE *sfmt, *arg;
 	register NODE *carg;
-	long fw, prec, lj, alt, big;
+	long fw, prec;
+	int lj, alt, big;
 	long *cur;
 	long val;
 #ifdef sun386		/* Can't cast unsigned (int/long) from ptr->value */
@ -281,16 +294,17 @@ NODE *tree;
 	char *cp;
 	char *fill;
 	double tmpval;
-	char *pr_str;
-	int ucasehex = 0;
 	char signchar = 0;
 	size_t len;
-
+	static char sp[] = " ";
+	static char zero_string[] = "0";
+	static char lchbuf[] = "0123456789abcdefx";
+	static char Uchbuf[] = "0123456789ABCDEFX";

 	emalloc(obuf, char *, 120, "do_sprintf");
+	obufout = obuf;
 	osiz = 120;
 	ofre = osiz - 1;
-	olen = 0;
 	sfmt = tree_eval(tree->lnode);
 	sfmt = force_string(sfmt);
 	carg = tree->rnode;
@ -311,17 +325,17 @@ NODE *tree;

 retry:
 		--n0;
-		switch (*s1++) {
+		switch (cs1 = *s1++) {
 		case '%':
-			bchunk("%", 1);
+			bchunk_one("%");
 			s0 = s1;
 			break;

 		case '0':
-			if (fill != sp || lj)
-				goto lose;
+			if (lj)
+				goto retry;
 			if (cur == &fw)
-				fill = "0";	/* FALL through */
+				fill = zero_string;	/* FALL through */
 		case '1':
 		case '2':
 		case '3':
@ -332,42 +346,58 @@ retry:
 		case '8':
 		case '9':
 			if (cur == 0)
-				goto lose;
-			*cur = s1[-1] - '0';
+				/* goto lose; */
+				break;
+			if (prec >= 0)  /* this happens only when we have */
+					/* a negative precision		  */
+				*cur = cs1 - '0';
 			while (n0 > 0 && *s1 >= '0' && *s1 <= '9') {
 				--n0;
 				*cur = *cur * 10 + *s1++ - '0';
 			}
+			if (prec < 0) {	/* negative precision is discarded */
+				prec = 0;
+				cur = 0;
+			}
 			goto retry;
 		case '*':
 			if (cur == 0)
-				goto lose;
+				/* goto lose; */
+				break;
 			parse_next_arg();
 			*cur = force_number(arg);
 			free_temp(arg);
 			goto retry;
 		case ' ':		/* print ' ' or '-' */
+					/* 'space' flag is ignored */
+					/* if '+' already present  */
+			if (signchar != 0) 
+				goto retry;
+			/* FALL THROUGH */
 		case '+':		/* print '+' or '-' */
-			signchar = *(s1-1);
+			signchar = cs1;
 			goto retry;
 		case '-':
-			if (lj || fill != sp)
-				goto lose;
-			lj++;
+			if (cur == &prec) {
+				prec = -1;
+				goto retry;
+			}
+			fill = sp;      /* if left justified then other */
+			lj++; 		/* filling is ignored */
 			goto retry;
 		case '.':
 			if (cur != &fw)
-				goto lose;
+				break;
 			cur = &prec;
 			goto retry;
 		case '#':
-			if (alt)
-				goto lose;
+			if (cur != &fw)
+				break;
 			alt++;
 			goto retry;
 		case 'l':
 			if (big)
-				goto lose;
+				break;
 			big++;
 			goto retry;
 		case 'c':
@ -381,44 +411,26 @@ retry:
 #endif
 				cpbuf[0] = uval;
 				prec = 1;
-				pr_str = cpbuf;
-				goto dopr_string;
+				cp = cpbuf;
+				goto pr_tail;
 			}
-			if (! prec)
+			if (prec == 0)
 				prec = 1;
 			else if (prec > arg->stlen)
 				prec = arg->stlen;
-			pr_str = arg->stptr;
-			goto dopr_string;
+			cp = arg->stptr;
+			goto pr_tail;
 		case 's':
 			parse_next_arg();
 			arg = force_string(arg);
-			if (!prec || prec > arg->stlen)
+			if (prec == 0 || prec > arg->stlen)
 				prec = arg->stlen;
-			pr_str = arg->stptr;
-
-	dopr_string:
-			if (fw > prec && !lj) {
-				while (fw > prec) {
-					bchunk(fill, 1);
-					fw--;
-				}
-			}
-			bchunk(pr_str, (int) prec);
-			if (fw > prec) {
-				while (fw > prec) {
-					bchunk(fill, 1);
-					fw--;
-				}
-			}
-			s0 = s1;
-			free_temp(arg);
-			break;
+			cp = arg->stptr;
+			goto pr_tail;
 		case 'd':
 		case 'i':
 			parse_next_arg();
 			val = (long) force_number(arg);
-			free_temp(arg);
 			if (val < 0) {
 				sgn = 1;
 				val = -val;
@ -432,30 +444,19 @@ retry:
 				*--cp = '-';
 			else if (signchar)
 				*--cp = signchar;
+			if (prec != 0)		/* ignore '0' flag if */
+				fill = sp; 	/* precision given    */
 			if (prec > fw)
 				fw = prec;
 			prec = cend - cp;
-			if (fw > prec && !lj) {
-				if (fill != sp && (*cp == '-' || signchar)) {
-					bchunk(cp, 1);
-					cp++;
-					prec--;
-					fw--;
-				}
-				while (fw > prec) {
-					bchunk(fill, 1);
-					fw--;
-				}
+			if (fw > prec && ! lj && fill != sp
+			    && (*cp == '-' || signchar)) {
+				bchunk_one(cp);
+				cp++;
+				prec--;
+				fw--;
 			}
-			bchunk(cp, (int) prec);
-			if (fw > prec) {
-				while (fw > prec) {
-					bchunk(fill, 1);
-					fw--;
-				}
-			}
-			s0 = s1;
-			break;
+			goto pr_tail;
 		case 'u':
 			base = 10;
 			goto pr_unsigned;
@ -463,140 +464,91 @@ retry:
 			base = 8;
 			goto pr_unsigned;
 		case 'X':
-			ucasehex = 1;
 		case 'x':
 			base = 16;
-			goto pr_unsigned;
 	pr_unsigned:
+			if (cs1 == 'X')
+				chbuf = Uchbuf;
+			else
+				chbuf = lchbuf;
+			if (prec != 0)		/* ignore '0' flag if */
+				fill = sp; 	/* precision given    */
 			parse_next_arg();
 			uval = (unsigned long) force_number(arg);
-			free_temp(arg);
 			do {
 				*--cp = chbuf[uval % base];
-				if (ucasehex && isalpha(*cp))
-					*cp = toupper(*cp);
 				uval /= base;
 			} while (uval);
-			if (alt && (base == 8 || base == 16)) {
+			if (alt) {
 				if (base == 16) {
-					if (ucasehex)
-						*--cp = 'X';
-					else
-						*--cp = 'x';
-				}
-				*--cp = '0';
+					*--cp = cs1;
+					*--cp = '0';
+					if (fill != sp) {
+						bchunk(cp, 2);
+						cp += 2;
+						fw -= 2;
+					}
+				} else if (base == 8)
+					*--cp = '0';
 			}
 			prec = cend - cp;
-			if (fw > prec && !lj) {
+	pr_tail:
+			if (! lj) {
 				while (fw > prec) {
-					bchunk(fill, 1);
+			    		bchunk_one(fill);
 					fw--;
 				}
 			}
 			bchunk(cp, (int) prec);
-			if (fw > prec) {
-				while (fw > prec) {
-					bchunk(fill, 1);
-					fw--;
-				}
+			while (fw > prec) {
+				bchunk_one(fill);
+				fw--;
 			}
 			s0 = s1;
-			break;
-		case 'g':
-			parse_next_arg();
-			tmpval = force_number(arg);
 			free_temp(arg);
-			chksize(fw + prec + 9);	/* 9==slop */
-
-			cp = cpbuf;
-			*cp++ = '%';
-			if (lj)
-				*cp++ = '-';
-			if (fill != sp)
-				*cp++ = '0';
-#ifndef GFMT_WORKAROUND
-			if (cur != &fw) {
-				(void) strcpy(cp, "*.*g");
-				(void) sprintf(obuf + olen, cpbuf, (int) fw, (int) prec, (double) tmpval);
-			} else {
-				(void) strcpy(cp, "*g");
-				(void) sprintf(obuf + olen, cpbuf, (int) fw, (double) tmpval);
-			}
-#else	/* GFMT_WORKAROUND */
-		      {
-			char *gptr, gbuf[120];
-#define DEFAULT_G_PRECISION 6
-			if (fw + prec + 9 > sizeof gbuf) {	/* 9==slop */
-				emalloc(gptr, char *, fw+prec+9, "do_sprintf(gfmt)");
-			} else
-				gptr = gbuf;
-			(void) gfmt((double) tmpval, cur != &fw ?
-				    (int) prec : DEFAULT_G_PRECISION, gptr);
-			*cp++ = '*',  *cp++ = 's',  *cp = '\0';
-			(void) sprintf(obuf + olen, cpbuf, (int) fw, gptr);
-			if (fill != sp && *gptr == ' ') {
-				char *p = gptr;
-				do { *p++ = '0'; } while (*p == ' ');
-			}
-			if (gptr != gbuf) free(gptr);
-		      }
-#endif	/* GFMT_WORKAROUND */
-			len = strlen(obuf + olen);
-			ofre -= len;
-			olen += len;
-			s0 = s1;
-			break;
-
-		case 'f':
-			parse_next_arg();
-			tmpval = force_number(arg);
-			free_temp(arg);
-			chksize(fw + prec + 9);	/* 9==slop */
-
-			cp = cpbuf;
-			*cp++ = '%';
-			if (lj)
-				*cp++ = '-';
-			if (fill != sp)
-				*cp++ = '0';
-			if (cur != &fw) {
-				(void) strcpy(cp, "*.*f");
-				(void) sprintf(obuf + olen, cpbuf, (int) fw, (int) prec, (double) tmpval);
-			} else {
-				(void) strcpy(cp, "*f");
-				(void) sprintf(obuf + olen, cpbuf, (int) fw, (double) tmpval);
-			}
-			len = strlen(obuf + olen);
-			ofre -= len;
-			olen += len;
-			s0 = s1;
 			break;
 		case 'e':
+		case 'f':
+		case 'g':
+		case 'E':
+		case 'G':
 			parse_next_arg();
 			tmpval = force_number(arg);
 			free_temp(arg);
 			chksize(fw + prec + 9);	/* 9==slop */
+
 			cp = cpbuf;
 			*cp++ = '%';
 			if (lj)
 				*cp++ = '-';
+			if (signchar)
+				*cp++ = signchar;
+			if (alt)
+				*cp++ = '#';
 			if (fill != sp)
 				*cp++ = '0';
-			if (cur != &fw) {
-				(void) strcpy(cp, "*.*e");
-				(void) sprintf(obuf + olen, cpbuf, (int) fw, (int) prec, (double) tmpval);
-			} else {
-				(void) strcpy(cp, "*e");
-				(void) sprintf(obuf + olen, cpbuf, (int) fw, (double) tmpval);
-			}
-			len = strlen(obuf + olen);
+			cp = strcpy(cp, "*.*") + 3;
+			*cp++ = cs1;
+			*cp   = '\0';
+			if (prec <= 0)
+				prec = DEFAULT_G_PRECISION;
+#ifndef GFMT_WORKAROUND
+			(void) sprintf(obufout, cpbuf,
+				       (int) fw, (int) prec, (double) tmpval);
+#else	/* GFMT_WORKAROUND */
+			if (cs1 == 'g' || cs1 == 'G')
+				(void) sgfmt(obufout, cpbuf, (int) alt,
+				       (int) fw, (int) prec, (double) tmpval);
+			else
+				(void) sprintf(obufout, cpbuf,
+				       (int) fw, (int) prec, (double) tmpval);
+#endif	/* GFMT_WORKAROUND */
+			len = strlen(obufout);
 			ofre -= len;
-			olen += len;
+			obufout += len;
 			s0 = s1;
 			break;
-
 		default:
-	lose:
 			break;
 		}
 		if (toofew)
@ -610,7 +562,7 @@ retry:
 		warning("too many arguments supplied for format string");
 	bchunk(s0, s1 - s0);
 	free_temp(sfmt);
-	r = make_str_node(obuf, olen, ALREADY_MALLOCED);
+	r = make_str_node(obuf, obufout - obuf, ALREADY_MALLOCED);
 	r->flags |= TEMP;
 	return r;
 }
@ -799,7 +751,8 @@ register NODE *tree;
 			else {
 				char buf[100];

-				sprintf(buf, OFMT, t1->numbr);
+				NUMTOSTR(buf, OFMT, t1->numbr);
+				free_temp(t1);
 				t1 = tmp_string(buf, strlen(buf));
 			}
 		}
@ -1128,41 +1081,75 @@ NODE *tree;
 }

 #ifdef GFMT_WORKAROUND
-	/*
-	 *	printf's %g format [can't rely on gcvt()]
-	 *		caveat: don't use as argument to *printf()!
-	 */
-char *
-gfmt(g, prec, buf)
-double g;	/* value to format */
-int prec;	/* indicates desired significant digits, not decimal places */
+/*
+ * printf's %g format [can't rely on gcvt()]
+ *	caveat: don't use as argument to *printf()!
+ * 'format' string HAS to be of "<flags>*.*g" kind, or we bomb!
+ */
+void
+sgfmt(buf, format, alt, fwidth, prec, g)
 char *buf;	/* return buffer; assumed big enough to hold result */
+const char *format;
+int alt;	/* use alternate form flag */
+int fwidth;	/* field width in a format */
+int prec;	/* indicates desired significant digits, not decimal places */
+double g;	/* value to format */
 {
-	if (g == 0.0) {
-		(void) strcpy(buf, "0");	/* easy special case */
-	} else {
-		register char *d, *e, *p;
+	char dform[40];
+	register char *gpos;
+	register char *d, *e, *p;
+	int again = 0;

-		/* start with 'e' format (it'll provide nice exponent) */
-		if (prec < 1) prec = 1;	    /* at least 1 significant digit */
-		(void) sprintf(buf, "%.*e", prec - 1, g);
-		if ((e = strchr(buf, 'e')) != 0) {	/* find exponent  */
-			int exp = atoi(e+1);		/* fetch exponent */
-			if (exp >= -4 && exp < prec) {	/* per K&R2, B1.2 */
-				/* switch to 'f' format and re-do */
-				prec -= (exp + 1);	/* decimal precision */
-				(void) sprintf(buf, "%.*f", prec, g);
-				e = buf + strlen(buf);
-			}
-			if ((d = strchr(buf, '.')) != 0) {
-				/* remove trailing zeroes and decimal point */
-				for (p = e; p > d && *--p == '0'; ) continue;
-				if (*p == '.') --p;
-				if (++p < e)	/* copy exponent and NUL */
-					while ((*p++ = *e++) != '\0') continue;
-			}
-		}
+	strncpy(dform, format, sizeof dform - 1);
+	dform[sizeof dform - 1] = '\0';
+	gpos = strrchr(dform, '.');
+
+	if (g == 0.0 && alt == 0) {	/* easy special case */
+		*gpos++ = 'd';
+		*gpos = '\0';
+		(void) sprintf(buf, dform, fwidth, 0);
+		return;
+	}
+	gpos += 2;  /* advance to location of 'g' in the format */
+
+	if (prec <= 0)	      /* negative precision is ignored */
+		prec = (prec < 0 ?  DEFAULT_G_PRECISION : 1);
+
+	if (*gpos == 'G')
+		again = 1;
+	/* start with 'e' format (it'll provide nice exponent) */
+	*gpos = 'e';
+	prec -= 1;
+	(void) sprintf(buf, dform, fwidth, prec, g);
+	if ((e = strrchr(buf, 'e')) != NULL) {	/* find exponent  */
+		int exp = atoi(e+1);		/* fetch exponent */
+		if (exp >= -4 && exp <= prec) {	/* per K&R2, B1.2 */
+			/* switch to 'f' format and re-do */
+			*gpos = 'f';
+			prec -= exp;		/* decimal precision */
+			(void) sprintf(buf, dform, fwidth, prec, g);
+			e = buf + strlen(buf);
+			while (*--e == ' ')
+				continue;
+			e += 1;
+		}
+		else if (again != 0)
+			*gpos = 'E';
+
+		/* if 'alt' in force, then trailing zeros are not removed */
+		if (alt == 0 && (d = strrchr(buf, '.')) != NULL) {
+			/* throw away an excess of precision */
+			for (p = e; p > d && *--p == '0'; )
+				prec -= 1;
+			if (d == p)
+				prec -= 1;
+			if (prec < 0)
+				prec = 0;
+			/* and do that once again */
+			again = 1;
+		}
+		if (again != 0)
+			(void) sprintf(buf, dform, fwidth, prec, g);
 	}
-	return buf;
 }
 #endif	/* GFMT_WORKAROUND */
--- a/gnu/usr.bin/gawk/dfa.c
+++ b/gnu/usr.bin/gawk/dfa.c
--- a/gnu/usr.bin/gawk/dfa.h
+++ b/gnu/usr.bin/gawk/dfa.h
@ -1,333 +1,133 @@
 /* dfa.h - declarations for GNU deterministic regexp compiler
   Copyright (C) 1988 Free Software Foundation, Inc.
-                      Written June, 1988 by Mike Haertel

-		       NO WARRANTY
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2, or (at your option)
+   any later version.

-  BECAUSE THIS PROGRAM IS LICENSED FREE OF CHARGE, WE PROVIDE ABSOLUTELY
-NO WARRANTY, TO THE EXTENT PERMITTED BY APPLICABLE STATE LAW.  EXCEPT
-WHEN OTHERWISE STATED IN WRITING, FREE SOFTWARE FOUNDATION, INC,
-RICHARD M. STALLMAN AND/OR OTHER PARTIES PROVIDE THIS PROGRAM "AS IS"
-WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING,
-BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
-FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY
-AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE PROGRAM PROVE
-DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR
-CORRECTION.
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.

- IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW WILL RICHARD M.
-STALLMAN, THE FREE SOFTWARE FOUNDATION, INC., AND/OR ANY OTHER PARTY
-WHO MAY MODIFY AND REDISTRIBUTE THIS PROGRAM AS PERMITTED BELOW, BE
-LIABLE TO YOU FOR DAMAGES, INCLUDING ANY LOST PROFITS, LOST MONIES, OR
-OTHER SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
-USE OR INABILITY TO USE (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR
-DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY THIRD PARTIES OR
-A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS) THIS
-PROGRAM, EVEN IF YOU HAVE BEEN ADVISED OF THE POSSIBILITY OF SUCH
-DAMAGES, OR FOR ANY CLAIM BY ANY OTHER PARTY.
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.

-		GENERAL PUBLIC LICENSE TO COPY
-
-  1. You may copy and distribute verbatim copies of this source file
-as you receive it, in any medium, provided that you conspicuously and
-appropriately publish on each copy a valid copyright notice "Copyright
- (C) 1988 Free Software Foundation, Inc."; and include following the
-copyright notice a verbatim copy of the above disclaimer of warranty
-and of this License.  You may charge a distribution fee for the
-physical act of transferring a copy.
-
-  2. You may modify your copy or copies of this source file or
-any portion of it, and copy and distribute such modifications under
-the terms of Paragraph 1 above, provided that you also do the following:
-
-    a) cause the modified files to carry prominent notices stating
-    that you changed the files and the date of any change; and
-
-    b) cause the whole of any work that you distribute or publish,
-    that in whole or in part contains or is a derivative of this
-    program or any part thereof, to be licensed at no charge to all
-    third parties on terms identical to those contained in this
-    License Agreement (except that you may choose to grant more extensive
-    warranty protection to some or all third parties, at your option).
-
-    c) You may charge a distribution fee for the physical act of
-    transferring a copy, and you may at your option offer warranty
-    protection in exchange for a fee.
-
-Mere aggregation of another unrelated program with this program (or its
-derivative) on a volume of a storage or distribution medium does not bring
-the other program under the scope of these terms.
-
-  3. You may copy and distribute this program or any portion of it in
-compiled, executable or object code form under the terms of Paragraphs
-1 and 2 above provided that you do the following:
-
-    a) accompany it with the complete corresponding machine-readable
-    source code, which must be distributed under the terms of
-    Paragraphs 1 and 2 above; or,
-
-    b) accompany it with a written offer, valid for at least three
-    years, to give any third party free (except for a nominal
-    shipping charge) a complete machine-readable copy of the
-    corresponding source code, to be distributed under the terms of
-    Paragraphs 1 and 2 above; or,
-
-    c) accompany it with the information you received as to where the
-    corresponding source code may be obtained.  (This alternative is
-    allowed only for noncommercial distribution and only if you
-    received the program in object code or executable form alone.)
-
-For an executable file, complete source code means all the source code for
-all modules it contains; but, as a special exception, it need not include
-source code for modules which are standard libraries that accompany the
-operating system on which the executable file runs.
-
-  4. You may not copy, sublicense, distribute or transfer this program
-except as expressly provided under this License Agreement.  Any attempt
-otherwise to copy, sublicense, distribute or transfer this program is void and
-your rights to use the program under this License agreement shall be
-automatically terminated.  However, parties who have received computer
-software programs from you with this License Agreement will not have
-their licenses terminated so long as such parties remain in full compliance.
-
-  5. If you wish to incorporate parts of this program into other free
-programs whose distribution conditions are different, write to the Free
-Software Foundation at 675 Mass Ave, Cambridge, MA 02139.  We have not yet
-worked out a simple rule that can be stated here, but we will often permit
-this.  We will be guided by the two goals of preserving the free status of
-all derivatives our free software and of promoting the sharing and reuse of
-software.
-
-
-In other words, you are welcome to use, share and improve this program.
-You are forbidden to forbid anyone else to use, share and improve
-what you give them.   Help stamp out software-hoarding!
-
-	$Id: dfa.h,v 1.3 1993/11/13 02:26:36 jtc Exp $
+	$Id: dfa.h,v 1.4 1994/02/17 01:22:09 jtc Exp $
 */
-
-#ifdef __STDC__

-#ifdef SOMEDAY
-#define ISALNUM(c) isalnum(c)
-#define ISALPHA(c) isalpha(c)
-#define ISUPPER(c) isupper(c)
-#else
-#define ISALNUM(c) (isascii(c) && isalnum(c))
-#define ISALPHA(c) (isascii(c) && isalpha(c))
-#define ISUPPER(c) (isascii(c) && isupper(c))
-#endif
+/* Written June, 1988 by Mike Haertel */

-#else /* ! __STDC__ */
-
-#define const
-
-#define ISALNUM(c) (isascii(c) && isalnum(c))
-#define ISALPHA(c) (isascii(c) && isalpha(c))
-#define ISUPPER(c) (isascii(c) && isupper(c))
-
-#endif /* ! __STDC__ */
-
-/* 1 means plain parentheses serve as grouping, and backslash
-     parentheses are needed for literal searching.
-   0 means backslash-parentheses are grouping, and plain parentheses
-     are for literal searching.  */
-#ifndef RE_NO_BK_PARENS
-#define RE_NO_BK_PARENS 1L
-#endif
-
-/* 1 means plain | serves as the "or"-operator, and \| is a literal.
-   0 means \| serves as the "or"-operator, and | is a literal.  */
-#ifndef RE_NO_BK_VBAR
-#define	RE_NO_BK_VBAR	 (1L << 1)
-#endif
-
-/* 0 means plain + or ? serves as an operator, and \+, \? are literals.
-   1 means \+, \? are operators and plain +, ? are literals.  */
-#ifndef RE_BK_PLUS_QM
-#define	RE_BK_PLUS_QM	 (1L << 2)
-#endif
-
-/* 1 means | binds tighter than ^ or $.
-   0 means the contrary.  */
-#ifndef RE_TIGHT_VBAR
-#define	RE_TIGHT_VBAR	 (1L << 3)
-#endif
-
-/* 1 means treat \n as an _OR operator
-   0 means treat it as a normal character */
-#ifndef RE_NEWLINE_OR
-#define	RE_NEWLINE_OR	 (1L << 4)
-#endif
-
-/* 0 means that a special characters (such as *, ^, and $) always have
-     their special meaning regardless of the surrounding context.
-   1 means that special characters may act as normal characters in some
-     contexts.  Specifically, this applies to:
-	^ - only special at the beginning, or after ( or |
-	$ - only special at the end, or before ) or |
-	*, +, ? - only special when not after the beginning, (, or | */
-#ifndef RE_CONTEXT_INDEP_OPS
-#define	RE_CONTEXT_INDEP_OPS	 (1L << 5)
-#endif
-
-/* 1 means that \ in a character class escapes the next character (typically
-   a hyphen.  It also is overloaded to mean that hyphen at the end of the range
-   is allowable and means that the hyphen is to be taken literally. */
-#define	RE_AWK_CLASS_HACK (1L << 6)
-
-/* Now define combinations of bits for the standard possibilities.  */
-#ifdef notdef
-#define RE_SYNTAX_AWK (RE_NO_BK_PARENS | RE_NO_BK_VBAR | RE_CONTEXT_INDEP_OPS)
-#define RE_SYNTAX_EGREP (RE_SYNTAX_AWK | RE_NEWLINE_OR)
-#define RE_SYNTAX_GREP (RE_BK_PLUS_QM | RE_NEWLINE_OR)
-#define RE_SYNTAX_EMACS 0
-#endif
-
-/* The NULL pointer. */
-#ifndef NULL
-#define NULL 0
-#endif
+/* FIXME:
+   2.  We should not export so much of the DFA internals.
+   In addition to clobbering modularity, we eat up valuable
+   name space. */

 /* Number of bits in an unsigned char. */
-#ifndef CHARBITS
 #define CHARBITS 8
-#endif

 /* First integer value that is greater than any character code. */
-#define _NOTCHAR (1 << CHARBITS)
+#define NOTCHAR (1 << CHARBITS)

 /* INTBITS need not be exact, just a lower bound. */
-#ifndef INTBITS
 #define INTBITS (CHARBITS * sizeof (int))
-#endif

 /* Number of ints required to hold a bit for every character. */
-#define _CHARSET_INTS ((_NOTCHAR + INTBITS - 1) / INTBITS)
+#define CHARCLASS_INTS ((NOTCHAR + INTBITS - 1) / INTBITS)

 /* Sets of unsigned characters are stored as bit vectors in arrays of ints. */
-typedef int _charset[_CHARSET_INTS];
+typedef int charclass[CHARCLASS_INTS];

 /* The regexp is parsed into an array of tokens in postfix form.  Some tokens
   are operators and others are terminal symbols.  Most (but not all) of these
   codes are returned by the lexical analyzer. */
-#ifdef __STDC__

 typedef enum
 {
-  _END = -1,			/* _END is a terminal symbol that matches the
-				   end of input; any value of _END or less in
+  END = -1,			/* END is a terminal symbol that matches the
+				   end of input; any value of END or less in
 				   the parse tree is such a symbol.  Accepting
 				   states of the DFA are those that would have
-				   a transition on _END. */
+				   a transition on END. */

  /* Ordinary character values are terminal symbols that match themselves. */

-  _EMPTY = _NOTCHAR,		/* _EMPTY is a terminal symbol that matches
+  EMPTY = NOTCHAR,		/* EMPTY is a terminal symbol that matches
 				   the empty string. */

-  _BACKREF,			/* _BACKREF is generated by \<digit>; it
+  BACKREF,			/* BACKREF is generated by \<digit>; it
 				   it not completely handled.  If the scanner
 				   detects a transition on backref, it returns
 				   a kind of "semi-success" indicating that
 				   the match will have to be verified with
 				   a backtracking matcher. */

-  _BEGLINE,			/* _BEGLINE is a terminal symbol that matches
+  BEGLINE,			/* BEGLINE is a terminal symbol that matches
 				   the empty string if it is at the beginning
 				   of a line. */

-  _ALLBEGLINE,			/* _ALLBEGLINE is a terminal symbol that
-				   matches the empty string if it is at the
-				   beginning of a line; _ALLBEGLINE applies
-				   to the entire regexp and can only occur
-				   as the first token thereof.  _ALLBEGLINE
-				   never appears in the parse tree; a _BEGLINE
-				   is prepended with _CAT to the entire
-				   regexp instead. */
-
-  _ENDLINE,			/* _ENDLINE is a terminal symbol that matches
+  ENDLINE,			/* ENDLINE is a terminal symbol that matches
 				   the empty string if it is at the end of
 				   a line. */

-  _ALLENDLINE,			/* _ALLENDLINE is to _ENDLINE as _ALLBEGLINE
-				   is to _BEGLINE. */
-
-  _BEGWORD,			/* _BEGWORD is a terminal symbol that matches
+  BEGWORD,			/* BEGWORD is a terminal symbol that matches
 				   the empty string if it is at the beginning
 				   of a word. */

-  _ENDWORD,			/* _ENDWORD is a terminal symbol that matches
+  ENDWORD,			/* ENDWORD is a terminal symbol that matches
 				   the empty string if it is at the end of
 				   a word. */

-  _LIMWORD,			/* _LIMWORD is a terminal symbol that matches
+  LIMWORD,			/* LIMWORD is a terminal symbol that matches
 				   the empty string if it is at the beginning
 				   or the end of a word. */

-  _NOTLIMWORD,			/* _NOTLIMWORD is a terminal symbol that
+  NOTLIMWORD,			/* NOTLIMWORD is a terminal symbol that
 				   matches the empty string if it is not at
 				   the beginning or end of a word. */

-  _QMARK,			/* _QMARK is an operator of one argument that
+  QMARK,			/* QMARK is an operator of one argument that
 				   matches zero or one occurences of its
 				   argument. */

-  _STAR,			/* _STAR is an operator of one argument that
+  STAR,				/* STAR is an operator of one argument that
 				   matches the Kleene closure (zero or more
 				   occurrences) of its argument. */

-  _PLUS,			/* _PLUS is an operator of one argument that
+  PLUS,				/* PLUS is an operator of one argument that
 				   matches the positive closure (one or more
 				   occurrences) of its argument. */

-  _CAT,				/* _CAT is an operator of two arguments that
+  REPMN,			/* REPMN is a lexical token corresponding
+				   to the {m,n} construct.  REPMN never
+				   appears in the compiled token vector. */
+
+  CAT,				/* CAT is an operator of two arguments that
 				   matches the concatenation of its
-				   arguments.  _CAT is never returned by the
+				   arguments.  CAT is never returned by the
 				   lexical analyzer. */

-  _OR,				/* _OR is an operator of two arguments that
+  OR,				/* OR is an operator of two arguments that
 				   matches either of its arguments. */

-  _LPAREN,			/* _LPAREN never appears in the parse tree,
+  ORTOP,			/* OR at the toplevel in the parse tree.
+				   This is used for a boyer-moore heuristic. */
+
+  LPAREN,			/* LPAREN never appears in the parse tree,
 				   it is only a lexeme. */

-  _RPAREN,			/* _RPAREN never appears in the parse tree. */
+  RPAREN,			/* RPAREN never appears in the parse tree. */

-  _SET				/* _SET and (and any value greater) is a
+  CSET				/* CSET and (and any value greater) is a
 				   terminal symbol that matches any of a
 				   class of characters. */
-} _token;
+} token;

-#else /* ! __STDC__ */
-
-typedef short _token;
-
-#define _END -1
-#define _EMPTY _NOTCHAR
-#define _BACKREF (_EMPTY + 1)
-#define _BEGLINE (_EMPTY + 2)
-#define _ALLBEGLINE (_EMPTY + 3)
-#define _ENDLINE (_EMPTY + 4)
-#define _ALLENDLINE (_EMPTY + 5)
-#define _BEGWORD (_EMPTY + 6)
-#define _ENDWORD (_EMPTY + 7)
-#define _LIMWORD (_EMPTY + 8)
-#define _NOTLIMWORD (_EMPTY + 9)
-#define _QMARK (_EMPTY + 10)
-#define _STAR (_EMPTY + 11)
-#define _PLUS (_EMPTY + 12)
-#define _CAT (_EMPTY + 13)
-#define _OR (_EMPTY + 14)
-#define _LPAREN (_EMPTY + 15)
-#define _RPAREN (_EMPTY + 16)
-#define _SET (_EMPTY + 17)
-
-#endif /* ! __STDC__ */
-
-/* Sets are stored in an array in the compiled regexp; the index of the
-   array corresponding to a given set token is given by _SET_INDEX(t). */
-#define _SET_INDEX(t) ((t) - _SET)
+/* Sets are stored in an array in the compiled dfa; the index of the
+   array corresponding to a given set token is given by SET_INDEX(t). */
+#define SET_INDEX(t) ((t) - CSET)

 /* Sometimes characters can only be matched depending on the surrounding
   context.  Such context decisions depend on what the previous character
@ -347,36 +147,36 @@ typedef short _token;

   Word-constituent characters are those that satisfy isalnum().

-   The macro _SUCCEEDS_IN_CONTEXT determines whether a a given constraint
+   The macro SUCCEEDS_IN_CONTEXT determines whether a a given constraint
   succeeds in a particular context.  Prevn is true if the previous character
   was a newline, currn is true if the lookahead character is a newline.
   Prevl and currl similarly depend upon whether the previous and current
   characters are word-constituent letters. */
-#define _MATCHES_NEWLINE_CONTEXT(constraint, prevn, currn) \
-  ((constraint) & (1 << (((prevn) ? 2 : 0) + ((currn) ? 1 : 0) + 4)))
-#define _MATCHES_LETTER_CONTEXT(constraint, prevl, currl) \
-  ((constraint) & (1 << (((prevl) ? 2 : 0) + ((currl) ? 1 : 0))))
-#define _SUCCEEDS_IN_CONTEXT(constraint, prevn, currn, prevl, currl) \
-  (_MATCHES_NEWLINE_CONTEXT(constraint, prevn, currn)		     \
-   && _MATCHES_LETTER_CONTEXT(constraint, prevl, currl))
+#define MATCHES_NEWLINE_CONTEXT(constraint, prevn, currn) \
+  ((constraint) & 1 << (((prevn) ? 2 : 0) + ((currn) ? 1 : 0) + 4))
+#define MATCHES_LETTER_CONTEXT(constraint, prevl, currl) \
+  ((constraint) & 1 << (((prevl) ? 2 : 0) + ((currl) ? 1 : 0)))
+#define SUCCEEDS_IN_CONTEXT(constraint, prevn, currn, prevl, currl) \
+  (MATCHES_NEWLINE_CONTEXT(constraint, prevn, currn)		     \
+   && MATCHES_LETTER_CONTEXT(constraint, prevl, currl))

 /* The following macros give information about what a constraint depends on. */
-#define _PREV_NEWLINE_DEPENDENT(constraint) \
+#define PREV_NEWLINE_DEPENDENT(constraint) \
  (((constraint) & 0xc0) >> 2 != ((constraint) & 0x30))
-#define _PREV_LETTER_DEPENDENT(constraint) \
+#define PREV_LETTER_DEPENDENT(constraint) \
  (((constraint) & 0x0c) >> 2 != ((constraint) & 0x03))

 /* Tokens that match the empty string subject to some constraint actually
   work by applying that constraint to determine what may follow them,
   taking into account what has gone before.  The following values are
   the constraints corresponding to the special tokens previously defined. */
-#define _NO_CONSTRAINT 0xff
-#define _BEGLINE_CONSTRAINT 0xcf
-#define _ENDLINE_CONSTRAINT 0xaf
-#define _BEGWORD_CONSTRAINT 0xf2
-#define _ENDWORD_CONSTRAINT 0xf4
-#define _LIMWORD_CONSTRAINT 0xf6
-#define _NOTLIMWORD_CONSTRAINT 0xf9
+#define NO_CONSTRAINT 0xff
+#define BEGLINE_CONSTRAINT 0xcf
+#define ENDLINE_CONSTRAINT 0xaf
+#define BEGWORD_CONSTRAINT 0xf2
+#define ENDWORD_CONSTRAINT 0xf4
+#define LIMWORD_CONSTRAINT 0xf6
+#define NOTLIMWORD_CONSTRAINT 0xf9

 /* States of the recognizer correspond to sets of positions in the parse
   tree, together with the constraints under which they may be matched.
@ -386,44 +186,48 @@ typedef struct
 {
  unsigned index;		/* Index into the parse array. */
  unsigned constraint;		/* Constraint for matching this position. */
-} _position;
+} position;

 /* Sets of positions are stored as arrays. */
 typedef struct
 {
-  _position *elems;		/* Elements of this position set. */
+  position *elems;		/* Elements of this position set. */
  int nelem;			/* Number of elements in this set. */
-} _position_set;
+} position_set;

-/* A state of the regexp consists of a set of positions, some flags,
+/* A state of the dfa consists of a set of positions, some flags,
   and the token value of the lowest-numbered position of the state that
-   contains an _END token. */
+   contains an END token. */
 typedef struct
 {
  int hash;			/* Hash of the positions of this state. */
-  _position_set elems;		/* Positions this state could match. */
+  position_set elems;		/* Positions this state could match. */
  char newline;			/* True if previous state matched newline. */
  char letter;			/* True if previous state matched a letter. */
  char backref;			/* True if this state matches a \<digit>. */
  unsigned char constraint;	/* Constraint for this state to accept. */
-  int first_end;		/* Token value of the first _END in elems. */
-} _dfa_state;
+  int first_end;		/* Token value of the first END in elems. */
+} dfa_state;

-/* If an r.e. is at most MUST_MAX characters long, we look for a string which
-   must appear in it; whatever's found is dropped into the struct reg. */
-
-#define MUST_MAX	50
+/* Element of a list of strings, at least one of which is known to
+   appear in any R.E. matching the DFA. */
+struct dfamust
+{
+  int exact;
+  char *must;
+  struct dfamust *next;
+};

 /* A compiled regular expression. */
-struct regexp
+struct dfa
 {
  /* Stuff built by the scanner. */
-  _charset *charsets;		/* Array of character sets for _SET tokens. */
-  int cindex;			/* Index for adding new charsets. */
-  int calloc;			/* Number of charsets currently allocated. */
+  charclass *charclasses;	/* Array of character sets for CSET tokens. */
+  int cindex;			/* Index for adding new charclasses. */
+  int calloc;			/* Number of charclasses currently allocated. */

  /* Stuff built by the parser. */
-  _token *tokens;		/* Postfix parse array. */
+  token *tokens;		/* Postfix parse array. */
  int tindex;			/* Index for adding new tokens. */
  int talloc;			/* Number of tokens currently allocated. */
  int depth;			/* Depth required of an evaluation stack
@ -431,15 +235,15 @@ struct regexp
 				   parse tree. */
  int nleaves;			/* Number of leaves on the parse tree. */
  int nregexps;			/* Count of parallel regexps being built
-				   with regparse(). */
+				   with dfaparse(). */

  /* Stuff owned by the state builder. */
-  _dfa_state *states;		/* States of the regexp. */
+  dfa_state *states;		/* States of the dfa. */
  int sindex;			/* Index for adding new states. */
  int salloc;			/* Number of states currently allocated. */

  /* Stuff built by the structure analyzer. */
-  _position_set *follows;	/* Array of follow sets, indexed by position
+  position_set *follows;	/* Array of follow sets, indexed by position
 				   index.  The follow of a position is the set
 				   of positions containing characters that
 				   could conceivably follow a character
@ -469,7 +273,7 @@ struct regexp
  int **fails;			/* Transition tables after failing to accept
 				   on a state that potentially could do so. */
  int *success;			/* Table of acceptance conditions used in
-				   regexecute and computed in build_state. */
+				   dfaexec and computed in build_state. */
  int *newlines;		/* Transitions on newlines.  The entry for a
 				   newline in any transition table is always
 				   -1 so we can count lines without wasting
@ -477,40 +281,41 @@ struct regexp
 				   newline is stored separately and handled
 				   as a special case.  Newline is also used
 				   as a sentinel at the end of the buffer. */
-  char must[MUST_MAX];
-  int mustn;
+  struct dfamust *musts;	/* List of strings, at least one of which
+				   is known to appear in any r.e. matching
+				   the dfa. */
 };

-/* Some macros for user access to regexp internals. */
+/* Some macros for user access to dfa internals. */

 /* ACCEPTING returns true if s could possibly be an accepting state of r. */
 #define ACCEPTING(s, r) ((r).states[s].constraint)

 /* ACCEPTS_IN_CONTEXT returns true if the given state accepts in the
   specified context. */
-#define ACCEPTS_IN_CONTEXT(prevn, currn, prevl, currl, state, reg) \
-  _SUCCEEDS_IN_CONTEXT((reg).states[state].constraint,		   \
+#define ACCEPTS_IN_CONTEXT(prevn, currn, prevl, currl, state, dfa) \
+  SUCCEEDS_IN_CONTEXT((dfa).states[state].constraint,		   \
 		       prevn, currn, prevl, currl)

 /* FIRST_MATCHING_REGEXP returns the index number of the first of parallel
   regexps that a given state could accept.  Parallel regexps are numbered
   starting at 1. */
-#define FIRST_MATCHING_REGEXP(state, reg) (-(reg).states[state].first_end)
+#define FIRST_MATCHING_REGEXP(state, dfa) (-(dfa).states[state].first_end)

 /* Entry points. */

 #ifdef __STDC__

-/* Regsyntax() takes two arguments; the first sets the syntax bits described
+/* dfasyntax() takes two arguments; the first sets the syntax bits described
   earlier in this file, and the second sets the case-folding flag. */
-extern void regsyntax(long, int);
+extern void dfasyntax(reg_syntax_t, int);

-/* Compile the given string of the given length into the given struct regexp.
+/* Compile the given string of the given length into the given struct dfa.
   Final argument is a flag specifying whether to build a searching or an
   exact matcher. */
-extern void regcompile(const char *, size_t, struct regexp *, int);
+extern void dfacomp(char *, size_t, struct dfa *, int);

-/* Execute the given struct regexp on the buffer of characters.  The
+/* Execute the given struct dfa on the buffer of characters.  The
   first char * points to the beginning, and the second points to the
   first character after the end of the buffer, which must be a writable
   place so a sentinel end-of-buffer marker can be stored there.  The
@ -522,37 +327,37 @@ extern void regcompile(const char *, size_t, struct regexp *, int);
   order to verify backreferencing; otherwise the flag will be cleared.
   Returns NULL if no match is found, or a pointer to the first
   character after the first & shortest matching string in the buffer. */
-extern char *regexecute(struct regexp *, char *, char *, int, int *, int *);
+extern char *dfaexec(struct dfa *, char *, char *, int, int *, int *);

-/* Free the storage held by the components of a struct regexp. */
-extern void reg_free(struct regexp *);
+/* Free the storage held by the components of a struct dfa. */
+extern void dfafree(struct dfa *);

 /* Entry points for people who know what they're doing. */

-/* Initialize the components of a struct regexp. */
-extern void reginit(struct regexp *);
+/* Initialize the components of a struct dfa. */
+extern void dfainit(struct dfa *);

-/* Incrementally parse a string of given length into a struct regexp. */
-extern void regparse(const char *, size_t, struct regexp *);
+/* Incrementally parse a string of given length into a struct dfa. */
+extern void dfaparse(char *, size_t, struct dfa *);

 /* Analyze a parsed regexp; second argument tells whether to build a searching
   or an exact matcher. */
-extern void reganalyze(struct regexp *, int);
+extern void dfaanalyze(struct dfa *, int);

 /* Compute, for each possible character, the transitions out of a given
   state, storing them in an array of integers. */
-extern void regstate(int, struct regexp *, int []);
+extern void dfastate(int, struct dfa *, int []);

 /* Error handling. */

-/* Regerror() is called by the regexp routines whenever an error occurs.  It
+/* dfaerror() is called by the regexp routines whenever an error occurs.  It
   takes a single argument, a NUL-terminated string describing the error.
-   The default reg_error() prints the error message to stderr and exits.
-   The user can provide a different reg_free() if so desired. */
-extern void reg_error(const char *);
+   The default dfaerror() prints the error message to stderr and exits.
+   The user can provide a different dfafree() if so desired. */
+extern void dfaerror(const char *);

 #else /* ! __STDC__ */
-extern void regsyntax(), regcompile(), reg_free(), reginit(), regparse();
-extern void reganalyze(), regstate(), reg_error();
-extern char *regexecute();
-#endif
+extern void dfasyntax(), dfacomp(), dfafree(), dfainit(), dfaparse();
+extern void dfaanalyze(), dfastate(), dfaerror();
+extern char *dfaexec();
+#endif /* ! __STDC__ */
--- a/gnu/usr.bin/gawk/eval.c
+++ b/gnu/usr.bin/gawk/eval.c
@ -3,7 +3,7 @@
 */

 /* 
- * Copyright (C) 1986, 1988, 1989, 1991, 1992 the Free Software Foundation, Inc.
+ * Copyright (C) 1986, 1988, 1989, 1991, 1992, 1993 the Free Software Foundation, Inc.
 * 
 * This file is part of GAWK, the GNU implementation of the
 * AWK Progamming Language.
@ -24,8 +24,8 @@
 */

 #ifndef lint
-static char rcsid[] = "$Id: eval.c,v 1.3 1993/11/13 02:26:39 jtc Exp $";
-#endif /* not lint */
+static char rcsid[] = "$Id: eval.c,v 1.4 1994/02/17 01:22:11 jtc Exp $";
+#endif

 #include "awk.h"

@ -322,7 +322,10 @@ register NODE *volatile tree;
 		break;

 	case Node_K_delete:
-		do_delete(tree->lnode, tree->rnode);
+		if (tree->rnode != NULL)
+			do_delete(tree->lnode, tree->rnode);
+		else
+			assoc_clear(tree->lnode);
 		break;

 	case Node_K_next:
@ -971,18 +974,20 @@ NODE *arg_list;		/* Node_expression_list of calling args. */
 			/* should we free arg->var_value ? */
 			arg->var_array = n->var_array;
 			arg->type = Node_var_array;
+			arg->array_size = n->array_size;
+			arg->table_size = n->table_size;
 		}
-		unref(n->lnode);
+		/* n->lnode overlays the array size, don't unref it if array */
+		if (n->type != Node_var_array)
+			unref(n->lnode);
 		freenode(n);
 		count--;
 	}
 	while (count-- > 0) {
 		n = *sp++;
 		/* if n is an (local) array, all the elements should be freed */
-		if (n->type == Node_var_array) {
+		if (n->type == Node_var_array)
 			assoc_clear(n);
-			free(n->var_array);
-		}
 		unref(n->lnode);
 		freenode(n);
 	}
--- a/gnu/usr.bin/gawk/field.c
+++ b/gnu/usr.bin/gawk/field.c
@ -3,7 +3,7 @@
 */

 /* 
- * Copyright (C) 1986, 1988, 1989, 1991, 1992 the Free Software Foundation, Inc.
+ * Copyright (C) 1986, 1988, 1989, 1991, 1992, 1993 the Free Software Foundation, Inc.
 * 
 * This file is part of GAWK, the GNU implementation of the
 * AWK Progamming Language.
@ -24,22 +24,24 @@
 */

 #ifndef lint
-static char rcsid[] = "$Id: field.c,v 1.3 1993/11/13 02:26:43 jtc Exp $";
-#endif /* not lint */
+static char rcsid[] = "$Id: field.c,v 1.4 1994/02/17 01:22:13 jtc Exp $";
+#endif

 #include "awk.h"

+typedef void (* Setfunc) P((int, char*, int, NODE *));
+
 static int (*parse_field) P((int, char **, int, NODE *,
-			     Regexp *, void (*)(), NODE *));
+			     Regexp *, Setfunc, NODE *));
 static void rebuild_record P((void));
 static int re_parse_field P((int, char **, int, NODE *,
-			     Regexp *, void (*)(), NODE *));
+			     Regexp *, Setfunc, NODE *));
 static int def_parse_field P((int, char **, int, NODE *,
-			      Regexp *, void (*)(), NODE *));
+			      Regexp *, Setfunc, NODE *));
 static int sc_parse_field P((int, char **, int, NODE *,
-			     Regexp *, void (*)(), NODE *));
+			     Regexp *, Setfunc, NODE *));
 static int fw_parse_field P((int, char **, int, NODE *,
-			     Regexp *, void (*)(), NODE *));
+			     Regexp *, Setfunc, NODE *));
 static void set_element P((int, char *, int, NODE *));
 static void grow_fields_arr P((int num));
 static void set_field P((int num, char *str, int len, NODE *dummy));
@ -230,7 +232,7 @@ char **buf;	/* on input: string to parse; on output: point to start next */
 int len;
 NODE *fs;
 Regexp *rp;
-void (*set) ();	/* routine to set the value of the parsed field */
+Setfunc set;	/* routine to set the value of the parsed field */
 NODE *n;
 {
 	register char *scan = *buf;
@ -248,9 +250,9 @@ NODE *n;
 			scan++;
 	field = scan;
 	while (scan < end
-	       && research(rp, scan, 0, (int)(end - scan), 1) != -1
+	       && research(rp, scan, 0, (end - scan), 1) != -1
 	       && nf < up_to) {
-		if (REEND(rp, scan) == RESTART(rp, scan)) {	/* null match */
+		if (REEND(rp, scan) == RESTART(rp, scan)) {   /* null match */
 			scan++;
 			if (scan == end) {
 				(*set)(++nf, field, (int)(scan - field), n);
@ -286,7 +288,7 @@ char **buf;	/* on input: string to parse; on output: point to start next */
 int len;
 NODE *fs;
 Regexp *rp;
-void (*set) ();	/* routine to set the value of the parsed field */
+Setfunc set;	/* routine to set the value of the parsed field */
 NODE *n;
 {
 	register char *scan = *buf;
@ -340,7 +342,7 @@ char **buf;	/* on input: string to parse; on output: point to start next */
 int len;
 NODE *fs;
 Regexp *rp;
-void (*set) ();	/* routine to set the value of the parsed field */
+Setfunc set;	/* routine to set the value of the parsed field */
 NODE *n;
 {
 	register char *scan = *buf;
@ -393,7 +395,7 @@ char **buf;	/* on input: string to parse; on output: point to start next */
 int len;
 NODE *fs;
 Regexp *rp;
-void (*set) ();	/* routine to set the value of the parsed field */
+Setfunc set;	/* routine to set the value of the parsed field */
 NODE *n;
 {
 	register char *scan = *buf;
@ -518,7 +520,7 @@ NODE *tree;
 	NODE *fs;
 	char *s;
 	int (*parseit)P((int, char **, int, NODE *,
-			 Regexp *, void (*)(), NODE *));
+			 Regexp *, Setfunc, NODE *));
 	Regexp *rp = NULL;

 	t1 = tree_eval(tree->lnode);
--- a/gnu/usr.bin/gawk/getopt.c
+++ b/gnu/usr.bin/gawk/getopt.c
@ -21,8 +21,8 @@
   Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.  */

 #ifndef lint
-static char rcsid[] = "$Id: getopt.c,v 1.3 1993/11/13 02:26:46 jtc Exp $";
-#endif /* not lint */
+static char rcsid[] = "$Id: getopt.c,v 1.4 1994/02/17 01:22:16 jtc Exp $";
+#endif

 #ifdef HAVE_CONFIG_H
 #if defined (emacs) || defined (CONFIG_BROKETS)
--- a/gnu/usr.bin/gawk/getopt.h
+++ b/gnu/usr.bin/gawk/getopt.h
@ -15,7 +15,7 @@
   along with this program; if not, write to the Free Software
   Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 

-	$Id: getopt.h,v 1.3 1993/11/13 02:26:50 jtc Exp $
+	$Id: getopt.h,v 1.4 1994/02/17 01:22:18 jtc Exp $
 */

 #ifndef _GETOPT_H
@ -79,7 +79,7 @@ extern int optopt;

 struct option
 {
-#if	__STDC__
+#ifdef	__STDC__
  const char *name;
 #else
  char *name;
@ -97,7 +97,7 @@ struct option
 #define required_argument	1
 #define optional_argument	2

-#if __STDC__
+#ifdef __STDC__
 #if defined(__GNU_LIBRARY__)
 /* Many other libraries have conflicting prototypes for getopt, with
   differences in the consts, in stdlib.h.  To avoid compilation
--- a/gnu/usr.bin/gawk/getopt1.c
+++ b/gnu/usr.bin/gawk/getopt1.c
@ -17,8 +17,8 @@
   Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.  */

 #ifndef lint
-static char rcsid[] = "$Id: getopt1.c,v 1.3 1993/11/13 02:26:52 jtc Exp $";
-#endif /* not lint */
+static char rcsid[] = "$Id: getopt1.c,v 1.4 1994/02/17 01:22:19 jtc Exp $";
+#endif

 #ifdef HAVE_CONFIG_H
 #if defined (emacs) || defined (CONFIG_BROKETS)
--- a/gnu/usr.bin/gawk/io.c
+++ b/gnu/usr.bin/gawk/io.c
@ -3,7 +3,7 @@
 */

 /* 
- * Copyright (C) 1986, 1988, 1989, 1991, 1992 the Free Software Foundation, Inc.
+ * Copyright (C) 1986, 1988, 1989, 1991, 1992, 1993 the Free Software Foundation, Inc.
 * 
 * This file is part of GAWK, the GNU implementation of the
 * AWK Progamming Language.
@ -24,8 +24,8 @@
 */

 #ifndef lint
-static char rcsid[] = "$Id: io.c,v 1.4 1993/11/13 02:26:54 jtc Exp $";
-#endif /* not lint */
+static char rcsid[] = "$Id: io.c,v 1.5 1994/02/17 01:22:21 jtc Exp $";
+#endif

 #if !defined(VMS) && !defined(VMS_POSIX) && !defined(_MSC_VER)
 #include <sys/param.h>
@ -60,14 +60,14 @@ static int close_redir P((struct redirect *rp));
 static int wait_any P((int interesting));
 #endif
 static IOBUF *gawk_popen P((char *cmd, struct redirect *rp));
-static IOBUF *iop_open P((char *file, char *how));
+static IOBUF *iop_open P((const char *file, const char *how));
 static int gawk_pclose P((struct redirect *rp));
-static int do_pathopen P((char *file));
-static int str2mode P((char *mode));
+static int do_pathopen P((const char *file));
+static int str2mode P((const char *mode));
 static void spec_setup P((IOBUF *iop, int len, int allocate));
-static int specfdopen P((IOBUF *iop, char *name, char *mode));
-static int pidopen P((IOBUF *iop, char *name, char *mode));
-static int useropen P((IOBUF *iop, char *name, char *mode));
+static int specfdopen P((IOBUF *iop, const char *name, const char *mode));
+static int pidopen P((IOBUF *iop, const char *name, const char *mode));
+static int useropen P((IOBUF *iop, const char *name, const char *mode));

 extern FILE	*fdopen();

@ -266,6 +266,9 @@ do_input()
 		if (inrec(iop) == 0)
 			while (interpret(expression_value) && inrec(iop) == 0)
 				;
+		/* recover any space from C based alloca */
+		(void) alloca(0);
+
 		if (exiting)
 			break;
 	}
@ -282,10 +285,10 @@ int *errflg;
 	register char *str;
 	int tflag = 0;
 	int outflag = 0;
-	char *direction = "to";
-	char *mode;
+	const char *direction = "to";
+	const char *mode;
 	int fd;
-	char *what = NULL;
+	const char *what = NULL;

 	switch (tree->type) {
 	case Node_redirect_append:
@ -398,9 +401,13 @@ int *errflg;
 					rp->fp = stdout;
 				else if (fd == fileno(stderr))
 					rp->fp = stderr;
-				else	
-					rp->fp = fdopen(fd, mode);
-				if (isatty(fd))
+				else {
+					rp->fp = fdopen(fd, (char *) mode);
+					/* don't leak file descriptors */
+					if (rp->fp == NULL)
+						close(fd);
+				}
+				if (rp->fp != NULL && isatty(fd))
 					rp->flag |= RED_NOBUF;
 			}
 		}
@ -593,7 +600,7 @@ close_io ()

 static int
 str2mode(mode)
-char *mode;
+const char *mode;
 {
 	int ret;

@ -609,7 +616,9 @@ char *mode;
 	case 'a':
 		ret = O_WRONLY|O_APPEND|O_CREAT;
 		break;
+
 	default:
+		ret = 0;		/* lint */
 		cant_happen();
 	}
 	return ret;
@ -626,10 +635,10 @@ char *mode;

 int
 devopen(name, mode)
-char *name, *mode;
+const char *name, *mode;
 {
 	int openfd = INVALID_HANDLE;
-	char *cp, *ptr;
+	const char *cp, *ptr;
 	int flag = 0;
 	struct stat buf;
 	extern double strtod();
@ -646,7 +655,7 @@ char *name, *mode;

 	if (STREQ(name, "-"))
 		openfd = fileno(stdin);
-	else if (STREQN(name, "/dev/", 5) && stat(name, &buf) == -1) {
+	else if (STREQN(name, "/dev/", 5) && stat((char *) name, &buf) == -1) {
 		cp = name + 5;
 		
 		if (STREQ(cp, "stdin") && (flag & O_RDONLY) == O_RDONLY)
@ -705,7 +714,7 @@ int allocate;
 static int
 specfdopen(iop, name, mode)
 IOBUF *iop;
-char *name, *mode;
+const char *name, *mode;
 {
 	int fd;
 	IOBUF *tp;
@ -728,7 +737,7 @@ char *name, *mode;
 * to maximize portability.
 */
 #ifndef GETPGRP_NOARG
-#if defined(__svr4__) || defined(BSD4_4) || defined(_POSIX_SOURCE) || defined(_POSIX_JOB_CONTROL)
+#if defined(__svr4__) || defined(BSD4_4) || defined(_POSIX_SOURCE)
 #define GETPGRP_NOARG
 #else
 #if defined(i860) || defined(_AIX) || defined(hpux) || defined(VMS)
@ -752,7 +761,7 @@ char *name, *mode;
 static int
 pidopen(iop, name, mode)
 IOBUF *iop;
-char *name, *mode;
+const char *name, *mode;
 {
 	char tbuf[BUFSIZ];
 	int i;
@ -784,12 +793,12 @@ char *name, *mode;
 static int
 useropen(iop, name, mode)
 IOBUF *iop;
-char *name, *mode;
+const char *name, *mode;
 {
 	char tbuf[BUFSIZ], *cp;
 	int i;
 #if defined(NGROUPS_MAX) && NGROUPS_MAX > 0
-#if defined(atarist)
+#if defined(atarist) || defined(__svr4__)
 	gid_t groupset[NGROUPS_MAX];
 #else
 	int groupset[NGROUPS_MAX];
@ -825,16 +834,16 @@ char *name, *mode;

 static IOBUF *
 iop_open(name, mode)
-char *name, *mode;
+const char *name, *mode;
 {
 	int openfd = INVALID_HANDLE;
 	int flag = 0;
 	struct stat buf;
 	IOBUF *iop;
 	static struct internal {
-		char *name;
+		const char *name;
 		int compare;
-		int (*fp)();
+		int (*fp) P((IOBUF*,const char *,const char *));
 		IOBUF iob;
 	} table[] = {
 		{ "/dev/fd/",		8,	specfdopen },
@ -855,12 +864,12 @@ char *name, *mode;

 	if (STREQ(name, "-"))
 		openfd = fileno(stdin);
-	else if (STREQN(name, "/dev/", 5) && stat(name, &buf) == -1) {
+	else if (STREQN(name, "/dev/", 5) && stat((char *) name, &buf) == -1) {
 		int i;

 		for (i = 0; i < devcount; i++) {
 			if (STREQN(name, table[i].name, table[i].compare)) {
-				IOBUF *iop = & table[i].iob;
+				iop = & table[i].iob;

 				if (iop->buf != NULL) {
 					spec_setup(iop, 0, 0);
@ -1009,7 +1018,7 @@ gawk_pclose(rp)
 struct redirect *rp;
 {
 	int rval, aval, fd = rp->iop->fd;
-	FILE *kludge = fdopen(fd, "r"); /* pclose needs FILE* w/ right fileno */
+	FILE *kludge = fdopen(fd, (char *) "r"); /* pclose needs FILE* w/ right fileno */

 	rp->iop->fd = dup(fd);	  /* kludge to allow close() + pclose() */
 	rval = iop_close(rp->iop);
@ -1017,7 +1026,7 @@ struct redirect *rp;
 	aval = pclose(kludge);
 	return (rval < 0 ? rval : aval);
 }
-#else	/* VMS */
+#else	/* VMS || OS2 || MSDOS */

 static
 struct {
@ -1067,7 +1076,7 @@ struct redirect *rp;
 	free(pipes[cur].command);
 	return rval;
 }
-#endif	/* VMS */
+#endif	/* VMS || OS2 || MSDOS */

 #endif	/* PIPES_SIMULATED */

@ -1092,7 +1101,7 @@ NODE *tree;
 			rp = redirect(tree->rnode, &redir_error);
 			if (rp == NULL && redir_error) { /* failed redirect */
 				if (! do_unix) {
-					char *s = strerror(redir_error);
+					s = strerror(redir_error);

 					unref(ERRNO_node->var_value);
 					ERRNO_node->var_value =
@ -1107,7 +1116,7 @@ NODE *tree;
 		errcode = 0;
 		cnt = get_a_record(&s, iop, *RS, & errcode);
 		if (! do_unix && errcode != 0) {
-			char *s = strerror(errcode);
+			s = strerror(errcode);

 			unref(ERRNO_node->var_value);
 			ERRNO_node->var_value = make_string(s, strlen(s));
@ -1153,7 +1162,7 @@ NODE *tree;

 int
 pathopen (file)
-char *file;
+const char *file;
 {
 	int fd = do_pathopen(file);

@ -1185,12 +1194,12 @@ char *file;

 static int
 do_pathopen (file)
-char *file;
+const char *file;
 {
-	static char *savepath = DEFPATH;	/* defined in config.h */
+	static const char *savepath = DEFPATH;	/* defined in config.h */
 	static int first = 1;
-	char *awkpath, *cp;
-	char trypath[BUFSIZ];
+	const char *awkpath;
+	char *cp, trypath[BUFSIZ];
 	int fd;

 	if (STREQ(file, "-"))
--- a/gnu/usr.bin/gawk/iop.c
+++ b/gnu/usr.bin/gawk/iop.c
@ -3,7 +3,7 @@
 */

 /* 
- * Copyright (C) 1986, 1988, 1989, 1991, 1992 the Free Software Foundation, Inc.
+ * Copyright (C) 1986, 1988, 1989, 1991, 1992, 1993 the Free Software Foundation, Inc.
 * 
 * This file is part of GAWK, the GNU implementation of the
 * AWK Progamming Language.
@ -24,8 +24,8 @@
 */

 #ifndef lint
-static char rcsid[] = "$Id: iop.c,v 1.2 1993/08/02 17:29:54 mycroft Exp $";
-#endif /* not lint */
+static char rcsid[] = "$Id: iop.c,v 1.3 1994/02/17 01:22:22 jtc Exp $";
+#endif

 #include "awk.h"

@ -66,7 +66,7 @@ int fd;
 	else if (fstat(fd, &stb) < 0)
 		return 8*512;	/* conservative in case of DECnet access */
 	else
-		return 24*512;
+		return 32*512;

 #else
 	/*
@ -150,17 +150,14 @@ int *errcode;
 	register char *bp = iop->off;
 	char *bufend;
 	char *start = iop->off;			/* beginning of record */
-	int saw_newline;
 	char rs;
-	int eat_whitespace;
+	int saw_newline = 0, eat_whitespace = 0;	/* used iff grRS==0 */

 	if (iop->cnt == EOF)	/* previous read hit EOF */
 		return EOF;

 	if (grRS == 0) {	/* special case:  grRS == "" */
 		rs = '\n';
-		eat_whitespace = 0;
-		saw_newline = 0;
 	} else
 		rs = (char) grRS;

--- a/gnu/usr.bin/gawk/main.c
+++ b/gnu/usr.bin/gawk/main.c
@ -3,7 +3,7 @@
 */

 /* 
- * Copyright (C) 1986, 1988, 1989, 1991, 1992 the Free Software Foundation, Inc.
+ * Copyright (C) 1986, 1988, 1989, 1991, 1992, 1993 the Free Software Foundation, Inc.
 * 
 * This file is part of GAWK, the GNU implementation of the
 * AWK Progamming Language.
@ -24,7 +24,7 @@
 */

 #ifndef lint
-static char rcsid[] = "$Id: main.c,v 1.3 1993/11/13 02:26:57 jtc Exp $";
+static char rcsid[] = "$Id: main.c,v 1.4 1994/02/17 01:22:23 jtc Exp $";
 #endif

 #include "getopt.h"
@ -141,7 +141,8 @@ char **argv;
 	extern int optind;
 	extern int opterr;
 	extern char *optarg;
-	char *optlist = "+F:f:v:W:";
+	const char *optlist = "+F:f:v:W:m:";
+	int stopped_early = 0;

 #ifdef __EMX__
 	_response(&argc, &argv);
@ -175,7 +176,6 @@ char **argv;
 	Nnull_string->flags = (PERM|STR|STRING|NUM|NUMBER);

 	/* Set up the special variables */
-
 	/*
 	 * Note that this must be done BEFORE arg parsing else -F
 	 * breaks horribly 
@ -227,6 +227,19 @@ char **argv;
 			pre_assign(optarg);
 			break;

+		case 'm':
+			/*
+			 * Research awk extension.
+			 *	-mf=nnn		set # fields, gawk ignores
+			 *	-mr=nnn		set record length, ditto
+			 */
+			if (do_lint)
+				warning("-m[fr] option irrelevant");
+			if ((optarg[0] != 'r' && optarg[0] != 'f')
+			    || optarg[1] != '=')
+				warning("-m option usage: -m[fn]=nnn");
+			break;
+
 		case 'W':       /* gawk specific options */
 			gawk_option(optarg);
 			break;
@ -259,6 +272,14 @@ char **argv;
 			break;
 #endif

+		case 0:
+			/*
+			 * getopt_long found an option that sets a variable
+			 * instead of returning a letter. Do nothing, just
+			 * cycle around for the next one.
+			 */
+			break;
+
 		case '?':
 		default:
 			/*
@ -275,6 +296,7 @@ char **argv;
 			if (! do_posix
 			    && (optopt == 0 || strchr(optlist, optopt) == NULL)) {
 				optind--;
+				stopped_early = 1;
 				goto out;
 			} else if (optopt)
 				/* Use 1003.2 required message format */
@ -302,7 +324,7 @@ out:
 		output_is_tty = 1;
 	/* No -f or --source options, use next arg */
 	if (numfiles == -1) {
-		if (optind > argc - 1)	/* no args left */
+		if (optind > argc - 1 || stopped_early) /* no args left or no program */
 			usage(1);
 		srcfiles[++numfiles].stype = CMDLINE;
 		srcfiles[numfiles].val = argv[optind];
@ -342,16 +364,15 @@ static void
 usage(exitval)
 int exitval;
 {
-	char *opt1 = " -f progfile [--]";
-#if defined(MSDOS) || defined(OS2)
-	char *opt2 = " [--] \"program\"";
+	const char *opt1 = " -f progfile [--]";
+#if defined(MSDOS) || defined(OS2) || defined(VMS)
+	const char *opt2 = " [--] \"program\"";
 #else
-	char *opt2 = " [--] 'program'";
+	const char *opt2 = " [--] 'program'";
 #endif
-	char *regops = " [POSIX or GNU style options]";
+	const char *regops = " [POSIX or GNU style options]";

-	version();
-	fprintf(stderr, "Usage: %s%s%s file ...\n\t%s%s%s file ...\n",
+	fprintf(stderr, "Usage:\t%s%s%s file ...\n\t%s%s%s file ...\n",
 		myname, regops, opt1, myname, regops, opt2);

 	/* GNU long options info. Gack. */
@ -359,12 +380,13 @@ int exitval;
 	fputs("\t-f progfile\t\t--file=progfile\n", stderr);
 	fputs("\t-F fs\t\t\t--field-separator=fs\n", stderr);
 	fputs("\t-v var=val\t\t--assign=var=val\n", stderr);
+	fputs("\t-m[fr]=val\n", stderr);
 	fputs("\t-W compat\t\t--compat\n", stderr);
 	fputs("\t-W copyleft\t\t--copyleft\n", stderr);
 	fputs("\t-W copyright\t\t--copyright\n", stderr);
 	fputs("\t-W help\t\t\t--help\n", stderr);
 	fputs("\t-W lint\t\t\t--lint\n", stderr);
-#if 0
+#ifdef NOSTALGIA
 	fputs("\t-W nostalgia\t\t--nostalgia\n", stderr);
 #endif
 #ifdef DEBUG
@ -399,7 +421,6 @@ GNU General Public License for more details.\n\
 along with this program; if not, write to the Free Software\n\
 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.\n";

-	version();
 	fputs(blurb_part1, stderr);
 	fputs(blurb_part2, stderr);
 	fputs(blurb_part3, stderr);
@ -411,7 +432,8 @@ cmdline_fs(str)
 char *str;
 {
 	register NODE **tmp;
-	int len = strlen(str);
+	/* int len = strlen(str); *//* don't do that - we want to
+	                               avoid mismatched types */

 	tmp = get_lhs(FS_node, (Func_ptr *) 0);
 	unref(*tmp);
@ -428,7 +450,7 @@ char *str;
 		if (do_unix && ! do_posix)
 			str[0] = '\t';
 	}
-	*tmp = make_str_node(str, len, SCAN);	/* do process escapes */
+	*tmp = make_str_node(str, strlen(str), SCAN); /* do process escapes */
 	set_FS();
 }

@ -460,9 +482,9 @@ char **argv;
 */
 struct varinit {
 	NODE **spec;
-	char *name;
+	const char *name;
 	NODETYPE type;
-	char *strval;
+	const char *strval;
 	AWKNUM numval;
 	Func_ptr assign;
 };
@ -493,9 +515,10 @@ init_vars()
 	register struct varinit *vp;

 	for (vp = varinit; vp->name; vp++) {
-		*(vp->spec) = install(vp->name,
+		*(vp->spec) = install((char *) vp->name,
 		  node(vp->strval == 0 ? make_number(vp->numval)
-				: make_string(vp->strval, strlen(vp->strval)),
+				: make_string((char *) vp->strval,
+					strlen(vp->strval)),
 		       vp->type, (NODE *) NULL));
 		if (vp->assign)
 			(*(vp->assign))();
@ -731,6 +754,8 @@ static void
 version()
 {
 	fprintf(stderr, "%s, patchlevel %d\n", version_string, PATCHLEVEL);
+	/* per GNU coding standards, exit successfully, do nothing else */
+	exit(0);
 }

 /* this mess will improve in 2.16 */
--- a/gnu/usr.bin/gawk/msg.c
+++ b/gnu/usr.bin/gawk/msg.c
@ -3,7 +3,7 @@
 */

 /* 
- * Copyright (C) 1986, 1988, 1989, 1991, 1992 the Free Software Foundation, Inc.
+ * Copyright (C) 1986, 1988, 1989, 1991, 1992, 1993 the Free Software Foundation, Inc.
 * 
 * This file is part of GAWK, the GNU implementation of the
 * AWK Progamming Language.
@ -24,8 +24,8 @@
 */

 #ifndef lint
-static char rcsid[] = "$Id: msg.c,v 1.2 1993/08/02 17:29:55 mycroft Exp $";
-#endif /* not lint */
+static char rcsid[] = "$Id: msg.c,v 1.3 1994/02/17 01:22:25 jtc Exp $";
+#endif

 #include "awk.h"

@ -35,8 +35,8 @@ char *source = NULL;
 /* VARARGS2 */
 void
 err(s, emsg, argp)
-char *s;
-char *emsg;
+const char *s;
+const char *emsg;
 va_list argp;
 {
 	char *file;
@ -53,8 +53,9 @@ va_list argp;
 	}
 	if (FNR) {
 		file = FILENAME_node->var_value->stptr;
+		(void) putc('(', stderr);
 		if (file)
-			(void) fprintf(stderr, "(FILENAME=%s ", file);
+			(void) fprintf(stderr, "FILENAME=%s ", file);
 		(void) fprintf(stderr, "FNR=%d) ", FNR);
 	}
 	(void) fprintf(stderr, s);
--- a/gnu/usr.bin/gawk/node.c
+++ b/gnu/usr.bin/gawk/node.c
@ -3,7 +3,7 @@
 */

 /* 
- * Copyright (C) 1986, 1988, 1989, 1991, 1992 the Free Software Foundation, Inc.
+ * Copyright (C) 1986, 1988, 1989, 1991, 1992, 1993 the Free Software Foundation, Inc.
 * 
 * This file is part of GAWK, the GNU implementation of the
 * AWK Progamming Language.
@ -24,8 +24,8 @@
 */

 #ifndef lint
-static char rcsid[] = "$Id: node.c,v 1.3 1993/11/13 02:27:00 jtc Exp $";
-#endif /* not lint */
+static char rcsid[] = "$Id: node.c,v 1.4 1994/02/17 01:22:27 jtc Exp $";
+#endif

 #include "awk.h"

@ -106,7 +106,7 @@ register NODE *n;
 * (more complicated) variations on this theme didn't seem to pay off, but 
 * systematic testing might be in order at some point
 */
-static char *values[] = {
+static const char *values[] = {
 	"0",
 	"1",
 	"2",
@ -141,7 +141,7 @@ register NODE *s;
 		num = (long)s->numbr;
 	if ((AWKNUM) num == s->numbr) {	/* integral value */
 		if (num < NVAL && num >= 0) {
-			sp = values[num];
+			sp = (char *) values[num];
 			s->stlen = 1;
 		} else {
 			(void) sprintf(sp, "%ld", num);
@ -149,7 +149,7 @@ register NODE *s;
 		}
 		s->stfmt = -1;
 	} else {
-		(void) sprintf(sp, CONVFMT, s->numbr);
+		NUMTOSTR(sp, CONVFMT, s->numbr);
 		s->stlen = strlen(sp);
 		s->stfmt = (char)CONVFMTidx;
 	}
--- a/gnu/usr.bin/gawk/patchlevel.h
+++ b/gnu/usr.bin/gawk/patchlevel.h
@ -1,3 +1 @@
-/*	$Id: patchlevel.h,v 1.3 1993/11/13 02:27:02 jtc Exp $ */
-
-#define PATCHLEVEL	3
+#define PATCHLEVEL	4
--- a/gnu/usr.bin/gawk/protos.h
+++ b/gnu/usr.bin/gawk/protos.h
@ -3,7 +3,7 @@
 */

 /* 
- * Copyright (C) 1991, 1992, the Free Software Foundation, Inc.
+ * Copyright (C) 1991, 1992, 1993 the Free Software Foundation, Inc.
 * 
 * This file is part of GAWK, the GNU implementation of the
 * AWK Progamming Language.
@ -22,7 +22,7 @@
 * along with GAWK; see the file COPYING.  If not, write to
 * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 *
- *	$Id: protos.h,v 1.2 1993/08/02 17:30:01 mycroft Exp $
+ *	$Id: protos.h,v 1.3 1994/02/17 01:22:31 jtc Exp $
 */

 #ifdef __STDC__
@ -53,7 +53,7 @@ extern char *strstr P((const char *s1, const char *s2));
 extern int strlen P((const char *));
 extern long strtol P((const char *, char **, int));
 #if !defined(_MSC_VER) && !defined(__GNU_LIBRARY__)
-extern int strftime P((char *, int, const char *, const struct tm *));
+extern size_t strftime P((char *, size_t, const char *, const struct tm *));
 #endif
 extern time_t time P((time_t *));
 extern aptr_t memset P((aptr_t, int, size_t));
@ -62,10 +62,9 @@ extern aptr_t memmove P((aptr_t, const aptr_t, size_t));
 extern aptr_t memchr P((const aptr_t, int, size_t));
 extern int memcmp P((const aptr_t, const aptr_t, size_t));

-/* extern int fprintf P((FILE *, char *, ...)); */
-extern int fprintf P(());
+extern int fprintf P((FILE *, const char *, ...));
 #if !defined(MSDOS) && !defined(__GNU_LIBRARY__)
-extern int fwrite P((const char *, int, int, FILE *));
+extern size_t fwrite P((const void *, size_t, size_t, FILE *));
 extern int fputs P((const char *, FILE *));
 extern int unlink P((const char *));
 #endif
@ -77,7 +76,7 @@ extern void abort P(());
 extern int isatty P((int));
 extern void exit P((int));
 extern int system P((const char *));
-extern int sscanf P((/* char *, char *, ... */));
+extern int sscanf P((const char *, const char *, ...));
 #ifndef toupper
 extern int toupper P((int));
 #endif
@ -93,8 +92,8 @@ extern int stat P((const char *, struct stat *));
 extern off_t lseek P((int, off_t, int));
 extern int fseek P((FILE *, long, int));
 extern int close P((int));
-extern int creat P(());
-extern int open P(());
+extern int creat P((const char *, mode_t));
+extern int open P((const char *, int, ...));
 extern int pipe P((int *));
 extern int dup P((int));
 extern int dup2 P((int,int));
--- a/gnu/usr.bin/gawk/re.c
+++ b/gnu/usr.bin/gawk/re.c
@ -3,7 +3,7 @@
 */

 /* 
- * Copyright (C) 1991, 1992 the Free Software Foundation, Inc.
+ * Copyright (C) 1991, 1992, 1993 the Free Software Foundation, Inc.
 * 
 * This file is part of GAWK, the GNU implementation of the
 * AWK Progamming Language.
@ -24,8 +24,8 @@
 */

 #ifndef lint
-static char rcsid[] = "$Id: re.c,v 1.3 1993/11/13 02:27:05 jtc Exp $";
-#endif /* not lint */
+static char rcsid[] = "$Id: re.c,v 1.4 1994/02/17 01:22:33 jtc Exp $";
+#endif

 #include "awk.h"

@ -34,12 +34,12 @@ static char rcsid[] = "$Id: re.c,v 1.3 1993/11/13 02:27:05 jtc Exp $";
 Regexp *
 make_regexp(s, len, ignorecase, dfa)
 char *s;
-int len;
+size_t len;
 int ignorecase;
 int dfa;
 {
 	Regexp *rp;
-	char *err;
+	const char *rerr;
 	char *src = s;
 	char *temp;
 	char *end = s + len;
@ -94,7 +94,7 @@ int dfa;
 	*dest = '\0' ;	/* Only necessary if we print dest ? */
 	emalloc(rp, Regexp *, sizeof(*rp), "make_regexp");
 	memset((char *) rp, 0, sizeof(*rp));
-	emalloc(rp->pat.buffer, char *, 16, "make_regexp");
+	emalloc(rp->pat.buffer, unsigned char *, 16, "make_regexp");
 	rp->pat.allocated = 16;
 	emalloc(rp->pat.fastmap, char *, 256, "make_regexp");

@ -103,13 +103,14 @@ int dfa;
 	else
 		rp->pat.translate = NULL;
 	len = dest - temp;
-	if ((err = re_compile_pattern(temp, len, &(rp->pat))) != NULL)
-		fatal("%s: /%s/", err, temp);
+	if ((rerr = re_compile_pattern(temp, len, &(rp->pat))) != NULL)
+		fatal("%s: /%s/", rerr, temp);
 	if (dfa && !ignorecase) {
-		regcompile(temp, len, &(rp->dfareg), 1);
+		dfacomp(temp, len, &(rp->dfareg), 1);
 		rp->dfa = 1;
 	} else
 		rp->dfa = 0;
+
 	free(temp);
 	return rp;
 }
@ -119,24 +120,24 @@ research(rp, str, start, len, need_start)
 Regexp *rp;
 register char *str;
 int start;
-register int len;
+register size_t len;
 int need_start;
 {
 	char *ret = str;

 	if (rp->dfa) {
-		char save1;
-		char save2;
+		char save;
 		int count = 0;
 		int try_backref;

-		save1 = str[start+len];
-		str[start+len] = '\n';
-		save2 = str[start+len+1];
-		ret = regexecute(&(rp->dfareg), str+start, str+start+len+1, 1,
+		/*
+		 * dfa likes to stick a '\n' right after the matched
+		 * text.  So we just save and restore the character.
+		 */
+		save = str[start+len];
+		ret = dfaexec(&(rp->dfareg), str+start, str+start+len, 1,
 					&count, &try_backref);
-		str[start+len] = save1;
-		str[start+len+1] = save2;
+		str[start+len] = save;
 	}
 	if (ret) {
 		if (need_start || rp->dfa == 0)
@ -155,12 +156,12 @@ Regexp *rp;
 	free(rp->pat.buffer);
 	free(rp->pat.fastmap);
 	if (rp->dfa)
-		reg_free(&(rp->dfareg));
+		dfafree(&(rp->dfareg));
 	free(rp);
 }

 void
-reg_error(s)
+dfaerror(s)
 const char *s;
 {
 	fatal(s);
@ -198,7 +199,8 @@ NODE *t;
 		t->re_text = dupnode(t1);
 		free_temp(t1);
 	}
-	t->re_reg = make_regexp(t->re_text->stptr, t->re_text->stlen, IGNORECASE, t->re_cnt);
+	t->re_reg = make_regexp(t->re_text->stptr, t->re_text->stlen,
+				IGNORECASE, t->re_cnt);
 	t->re_flags &= ~CASE;
 	t->re_flags |= IGNORECASE;
 	return t->re_reg;
@ -207,6 +209,8 @@ NODE *t;
 void
 resetup()
 {
-	(void) re_set_syntax(RE_SYNTAX_AWK);
-	regsyntax(RE_SYNTAX_AWK, 0);
+	reg_syntax_t syn = RE_SYNTAX_AWK;
+
+	(void) re_set_syntax(syn);
+	dfasyntax(syn, 0);
 }
--- a/gnu/usr.bin/gawk/regex.c
+++ b/gnu/usr.bin/gawk/regex.c
--- a/gnu/usr.bin/gawk/regex.h
+++ b/gnu/usr.bin/gawk/regex.h
@ -1,10 +1,11 @@
-/* Definitions for data structures callers pass the regex library.
+/* Definitions for data structures and routines for the regular
+   expression library, version 0.12.

-   Copyright (C) 1985, 1989-90 Free Software Foundation, Inc.
+   Copyright (C) 1985, 1989, 1990, 1991, 1992, 1993 Free Software Foundation, Inc.

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 1, or (at your option)
+   the Free Software Foundation; either version 2, or (at your option)
   any later version.

   This program is distributed in the hope that it will be useful,
@ -16,247 +17,492 @@
   along with this program; if not, write to the Free Software
   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.

-	$Id: regex.h,v 1.2 1993/08/02 17:30:13 mycroft Exp $
+	$Id: regex.h,v 1.3 1994/02/17 01:22:42 jtc Exp $ 
 */

-#ifndef __REGEXP_LIBRARY
-#define __REGEXP_LIBRARY
+#ifndef __REGEXP_LIBRARY_H__
+#define __REGEXP_LIBRARY_H__

-/* Define number of parens for which we record the beginnings and ends.
-   This affects how much space the `struct re_registers' type takes up.  */
-#ifndef RE_NREGS
-#define RE_NREGS 10
-#endif
+/* POSIX says that <sys/types.h> must be included (by the caller) before
+   <regex.h>.  */

-#define BYTEWIDTH 8
-
-
-/* Maximum number of duplicates an interval can allow.  */
-#ifndef RE_DUP_MAX
-#define RE_DUP_MAX  ((1 << 15) - 1) 
+#ifdef VMS
+/* VMS doesn't have `size_t' in <sys/types.h>, even though POSIX says it
+   should be there.  */
+#include <stddef.h>
 #endif


-/* This defines the various regexp syntaxes.  */
-extern long obscure_syntax;
+/* The following two types have to be signed and unsigned integer type
+   wide enough to hold a value of a pointer.  For most ANSI compilers
+   ptrdiff_t and size_t should be likely OK.  Still size of these two
+   types is 2 for Microsoft C.  Ugh... */
+typedef long s_reg_t;
+typedef unsigned long active_reg_t;

+/* The following bits are used to determine the regexp syntax we
+   recognize.  The set/not-set meanings are chosen so that Emacs syntax
+   remains the value 0.  The bits are given in alphabetical order, and
+   the definitions shifted by one from the previous bit; thus, when we
+   add or remove a bit, only one other definition need change.  */
+typedef unsigned long reg_syntax_t;

-/* The following bits are used in the obscure_syntax variable to choose among
-   alternative regexp syntaxes.  */
+/* If this bit is not set, then \ inside a bracket expression is literal.
+   If set, then such a \ quotes the following character.  */
+#define RE_BACKSLASH_ESCAPE_IN_LISTS (1L)

-/* If this bit is set, plain parentheses serve as grouping, and backslash
-     parentheses are needed for literal searching.
-   If not set, backslash-parentheses are grouping, and plain parentheses
-     are for literal searching.  */
-#define RE_NO_BK_PARENS	1L
+/* If this bit is not set, then + and ? are operators, and \+ and \? are
+     literals. 
+   If set, then \+ and \? are operators and + and ? are literals.  */
+#define RE_BK_PLUS_QM (RE_BACKSLASH_ESCAPE_IN_LISTS << 1)

-/* If this bit is set, plain | serves as the `or'-operator, and \| is a 
-     literal.
-   If not set, \| serves as the `or'-operator, and | is a literal.  */
-#define RE_NO_BK_VBAR (1L << 1)
-
-/* If this bit is not set, plain + or ? serves as an operator, and \+, \? are 
-     literals.
-   If set, \+, \? are operators and plain +, ? are literals.  */
-#define RE_BK_PLUS_QM (1L << 2)
-
-/* If this bit is set, | binds tighter than ^ or $.
-   If not set, the contrary.  */
-#define RE_TIGHT_VBAR (1L << 3)
-
-/* If this bit is set, then treat newline as an OR operator.
-   If not set, treat it as a normal character.  */
-#define RE_NEWLINE_OR (1L << 4)
-
-/* If this bit is set, then special characters may act as normal
-   characters in some contexts. Specifically, this applies to:
-	^ -- only special at the beginning, or after ( or |;
-	$ -- only special at the end, or before ) or |;
-	*, +, ? -- only special when not after the beginning, (, or |.
-   If this bit is not set, special characters (such as *, ^, and $)
-   always have their special meaning regardless of the surrounding
-   context.  */
-#define RE_CONTEXT_INDEP_OPS (1L << 5)
-
-/* If this bit is not set, then \ before anything inside [ and ] is taken as 
-     a real \.
-   If set, then such a \ escapes the following character.  This is a
-     special case for awk.  */
-#define RE_AWK_CLASS_HACK (1L << 6)
-
-/* If this bit is set, then \{ and \} or { and } serve as interval operators.
-   If not set, then \{ and \} and { and } are treated as literals.  */
-#define RE_INTERVALS (1L << 7)
-
-/* If this bit is not set, then \{ and \} serve as interval operators and 
-     { and } are literals.
-   If set, then { and } serve as interval operators and \{ and \} are 
-     literals.  */
-#define RE_NO_BK_CURLY_BRACES (1L << 8)
-
-/* If this bit is set, then character classes are supported; they are:
-     [:alpha:],	[:upper:], [:lower:],  [:digit:], [:alnum:], [:xdigit:],
+/* If this bit is set, then character classes are supported.  They are:
+     [:alpha:], [:upper:], [:lower:],  [:digit:], [:alnum:], [:xdigit:],
     [:space:], [:print:], [:punct:], [:graph:], and [:cntrl:].
   If not set, then character classes are not supported.  */
-#define RE_CHAR_CLASSES (1L << 9)
+#define RE_CHAR_CLASSES (RE_BK_PLUS_QM << 1)

-/* If this bit is set, then the dot re doesn't match a null byte.
-   If not set, it does.  */
-#define RE_DOT_NOT_NULL (1L << 10)
+/* If this bit is set, then ^ and $ are always anchors (outside bracket
+     expressions, of course).
+   If this bit is not set, then it depends:
+        ^  is an anchor if it is at the beginning of a regular
+           expression or after an open-group or an alternation operator;
+        $  is an anchor if it is at the end of a regular expression, or
+           before a close-group or an alternation operator.  

-/* If this bit is set, then [^...] doesn't match a newline.
-   If not set, it does.  */
-#define RE_HAT_NOT_NEWLINE (1L << 11)
+   This bit could be (re)combined with RE_CONTEXT_INDEP_OPS, because
+   POSIX draft 11.2 says that * etc. in leading positions is undefined.
+   We already implemented a previous draft which made those constructs
+   invalid, though, so we haven't changed the code back.  */
+#define RE_CONTEXT_INDEP_ANCHORS (RE_CHAR_CLASSES << 1)

-/* If this bit is set, back references are recognized.
-   If not set, they aren't.  */
-#define RE_NO_BK_REFS (1L << 12)
+/* If this bit is set, then special characters are always special
+     regardless of where they are in the pattern.
+   If this bit is not set, then special characters are special only in
+     some contexts; otherwise they are ordinary.  Specifically, 
+     * + ? and intervals are only special when not after the beginning,
+     open-group, or alternation operator.  */
+#define RE_CONTEXT_INDEP_OPS (RE_CONTEXT_INDEP_ANCHORS << 1)

-/* If this bit is set, back references must refer to a preceding
-   subexpression.  If not set, a back reference to a nonexistent
-   subexpression is treated as literal characters.  */
-#define RE_NO_EMPTY_BK_REF (1L << 13)
+/* If this bit is set, then *, +, ?, and { cannot be first in an re or
+     immediately after an alternation or begin-group operator.  */
+#define RE_CONTEXT_INVALID_OPS (RE_CONTEXT_INDEP_OPS << 1)

-/* If this bit is set, bracket expressions can't be empty.  
-   If it is set, they can be empty.  */
-#define RE_NO_EMPTY_BRACKETS (1L << 14)
+/* If this bit is set, then . matches newline.
+   If not set, then it doesn't.  */
+#define RE_DOT_NEWLINE (RE_CONTEXT_INVALID_OPS << 1)

-/* If this bit is set, then *, +, ? and { cannot be first in an re or
-   immediately after a |, or a (.  Furthermore, a | cannot be first or
-   last in an re, or immediately follow another | or a (.  Also, a ^
-   cannot appear in a nonleading position and a $ cannot appear in a
-   nontrailing position (outside of bracket expressions, that is).  */
-#define RE_CONTEXTUAL_INVALID_OPS (1L << 15)
+/* If this bit is set, then . doesn't match NUL.
+   If not set, then it does.  */
+#define RE_DOT_NOT_NULL (RE_DOT_NEWLINE << 1)

-/* If this bit is set, then +, ? and | aren't recognized as operators.
-   If it's not, they are.  */
-#define RE_LIMITED_OPS (1L << 16)
+/* If this bit is set, nonmatching lists [^...] do not match newline.
+   If not set, they do.  */
+#define RE_HAT_LISTS_NOT_NEWLINE (RE_DOT_NOT_NULL << 1)

-/* If this bit is set, then an ending range point has to collate higher
-     or equal to the starting range point.
-   If it's not set, then when the ending range point collates higher
-     than the starting range point, the range is just considered empty.  */
-#define RE_NO_EMPTY_RANGES (1L << 17)
+/* If this bit is set, either \{...\} or {...} defines an
+     interval, depending on RE_NO_BK_BRACES. 
+   If not set, \{, \}, {, and } are literals.  */
+#define RE_INTERVALS (RE_HAT_LISTS_NOT_NEWLINE << 1)

-/* If this bit is set, then a hyphen (-) can't be an ending range point.
-   If it isn't, then it can.  */
-#define RE_NO_HYPHEN_RANGE_END (1L << 18)
+/* If this bit is set, +, ? and | aren't recognized as operators.
+   If not set, they are.  */
+#define RE_LIMITED_OPS (RE_INTERVALS << 1)

+/* If this bit is set, newline is an alternation operator.
+   If not set, newline is literal.  */
+#define RE_NEWLINE_ALT (RE_LIMITED_OPS << 1)

-/* Define combinations of bits for the standard possibilities.  */
-#define RE_SYNTAX_POSIX_AWK (RE_NO_BK_PARENS | RE_NO_BK_VBAR \
-			| RE_CONTEXT_INDEP_OPS)
-#define RE_SYNTAX_AWK (RE_NO_BK_PARENS | RE_NO_BK_VBAR | RE_AWK_CLASS_HACK)
-#define RE_SYNTAX_EGREP (RE_NO_BK_PARENS | RE_NO_BK_VBAR \
-			| RE_CONTEXT_INDEP_OPS | RE_NEWLINE_OR)
-#define RE_SYNTAX_GREP (RE_BK_PLUS_QM | RE_NEWLINE_OR)
+/* If this bit is set, then `{...}' defines an interval, and \{ and \}
+     are literals.
+  If not set, then `\{...\}' defines an interval.  */
+#define RE_NO_BK_BRACES (RE_NEWLINE_ALT << 1)
+
+/* If this bit is set, (...) defines a group, and \( and \) are literals.
+   If not set, \(...\) defines a group, and ( and ) are literals.  */
+#define RE_NO_BK_PARENS (RE_NO_BK_BRACES << 1)
+
+/* If this bit is set, then \<digit> matches <digit>.
+   If not set, then \<digit> is a back-reference.  */
+#define RE_NO_BK_REFS (RE_NO_BK_PARENS << 1)
+
+/* If this bit is set, then | is an alternation operator, and \| is literal. 
+   If not set, then \| is an alternation operator, and | is literal.  */
+#define RE_NO_BK_VBAR (RE_NO_BK_REFS << 1)
+
+/* If this bit is set, then an ending range point collating higher
+     than the starting range point, as in [z-a], is invalid.
+   If not set, then when ending range point collates higher than the
+     starting range point, the range is ignored.  */
+#define RE_NO_EMPTY_RANGES (RE_NO_BK_VBAR << 1)
+
+/* If this bit is set, then an unmatched ) is ordinary.
+   If not set, then an unmatched ) is invalid.  */
+#define RE_UNMATCHED_RIGHT_PAREN_ORD (RE_NO_EMPTY_RANGES << 1)
+
+/* If this bit is set, do not process the GNU regex operators.
+   IF not set, then the GNU regex operators are recognized. */
+#define RE_NO_GNU_OPS (RE_UNMATCHED_RIGHT_PAREN_ORD << 1)
+
+/* This global variable defines the particular regexp syntax to use (for
+   some interfaces).  When a regexp is compiled, the syntax used is
+   stored in the pattern buffer, so changing this does not affect
+   already-compiled regexps.  */
+extern reg_syntax_t re_syntax_options;
+
+/* Define combinations of the above bits for the standard possibilities.
+   (The [[[ comments delimit what gets put into the Texinfo file, so
+   don't delete them!)  */ 
+/* [[[begin syntaxes]]] */
 #define RE_SYNTAX_EMACS 0
-#define RE_SYNTAX_POSIX_BASIC (RE_INTERVALS | RE_BK_PLUS_QM 		\
-			| RE_CHAR_CLASSES | RE_DOT_NOT_NULL 		\
-                        | RE_HAT_NOT_NEWLINE | RE_NO_EMPTY_BK_REF 	\
-                        | RE_NO_EMPTY_BRACKETS | RE_LIMITED_OPS		\
-                        | RE_NO_EMPTY_RANGES | RE_NO_HYPHEN_RANGE_END)	
-                        
-#define RE_SYNTAX_POSIX_EXTENDED (RE_INTERVALS | RE_NO_BK_CURLY_BRACES	   \
-			| RE_NO_BK_VBAR | RE_NO_BK_PARENS 		   \
-                        | RE_HAT_NOT_NEWLINE | RE_CHAR_CLASSES 		   \
-                        | RE_NO_EMPTY_BRACKETS | RE_CONTEXTUAL_INVALID_OPS \
-                        | RE_NO_BK_REFS | RE_NO_EMPTY_RANGES 		   \
-                        | RE_NO_HYPHEN_RANGE_END)
+
+#define RE_SYNTAX_AWK							\
+  (RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DOT_NOT_NULL			\
+   | RE_NO_BK_PARENS            | RE_NO_BK_REFS				\
+   | RE_NO_BK_VBAR               | RE_NO_EMPTY_RANGES			\
+   | RE_UNMATCHED_RIGHT_PAREN_ORD | RE_NO_GNU_OPS)
+
+#define RE_SYNTAX_GNU_AWK 						\
+  (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS)
+
+#define RE_SYNTAX_POSIX_AWK 						\
+  (RE_SYNTAX_GNU_AWK | RE_NO_GNU_OPS)
+
+#define RE_SYNTAX_GREP							\
+  (RE_BK_PLUS_QM              | RE_CHAR_CLASSES				\
+   | RE_HAT_LISTS_NOT_NEWLINE | RE_INTERVALS				\
+   | RE_NEWLINE_ALT)
+
+#define RE_SYNTAX_EGREP							\
+  (RE_CHAR_CLASSES        | RE_CONTEXT_INDEP_ANCHORS			\
+   | RE_CONTEXT_INDEP_OPS | RE_HAT_LISTS_NOT_NEWLINE			\
+   | RE_NEWLINE_ALT       | RE_NO_BK_PARENS				\
+   | RE_NO_BK_VBAR)
+
+#define RE_SYNTAX_POSIX_EGREP						\
+  (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES)
+
+/* P1003.2/D11.2, section 4.20.7.1, lines 5078ff.  */
+#define RE_SYNTAX_ED RE_SYNTAX_POSIX_BASIC
+
+#define RE_SYNTAX_SED RE_SYNTAX_POSIX_BASIC
+
+/* Syntax bits common to both basic and extended POSIX regex syntax.  */
+#define _RE_SYNTAX_POSIX_COMMON						\
+  (RE_CHAR_CLASSES | RE_DOT_NEWLINE      | RE_DOT_NOT_NULL		\
+   | RE_INTERVALS  | RE_NO_EMPTY_RANGES)
+
+#define RE_SYNTAX_POSIX_BASIC						\
+  (_RE_SYNTAX_POSIX_COMMON | RE_BK_PLUS_QM)
+
+/* Differs from ..._POSIX_BASIC only in that RE_BK_PLUS_QM becomes
+   RE_LIMITED_OPS, i.e., \? \+ \| are not recognized.  Actually, this
+   isn't minimal, since other operators, such as \`, aren't disabled.  */
+#define RE_SYNTAX_POSIX_MINIMAL_BASIC					\
+  (_RE_SYNTAX_POSIX_COMMON | RE_LIMITED_OPS)
+
+#define RE_SYNTAX_POSIX_EXTENDED					\
+  (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS			\
+   | RE_CONTEXT_INDEP_OPS  | RE_NO_BK_BRACES				\
+   | RE_NO_BK_PARENS       | RE_NO_BK_VBAR				\
+   | RE_UNMATCHED_RIGHT_PAREN_ORD)
+
+/* Differs from ..._POSIX_EXTENDED in that RE_CONTEXT_INVALID_OPS
+   replaces RE_CONTEXT_INDEP_OPS and RE_NO_BK_REFS is added.  */
+#define RE_SYNTAX_POSIX_MINIMAL_EXTENDED				\
+  (_RE_SYNTAX_POSIX_COMMON  | RE_CONTEXT_INDEP_ANCHORS			\
+   | RE_CONTEXT_INVALID_OPS | RE_NO_BK_BRACES				\
+   | RE_NO_BK_PARENS        | RE_NO_BK_REFS				\
+   | RE_NO_BK_VBAR	    | RE_UNMATCHED_RIGHT_PAREN_ORD)
+/* [[[end syntaxes]]] */
+
+/* Maximum number of duplicates an interval can allow.  Some systems
+   (erroneously) define this in other header files, but we want our
+   value, so remove any previous define.  */
+#ifdef RE_DUP_MAX
+#undef RE_DUP_MAX
+#endif
+/* if sizeof(int) == 2, then ((1 << 15) - 1) overflows  */
+#define RE_DUP_MAX  (0x7fff)


-/* This data structure is used to represent a compiled pattern.  */
+/* POSIX `cflags' bits (i.e., information for `regcomp').  */
+
+/* If this bit is set, then use extended regular expression syntax.
+   If not set, then use basic regular expression syntax.  */
+#define REG_EXTENDED 1
+
+/* If this bit is set, then ignore case when matching.
+   If not set, then case is significant.  */
+#define REG_ICASE (REG_EXTENDED << 1)
+ 
+/* If this bit is set, then anchors do not match at newline
+     characters in the string.
+   If not set, then anchors do match at newlines.  */
+#define REG_NEWLINE (REG_ICASE << 1)
+
+/* If this bit is set, then report only success or fail in regexec.
+   If not set, then returns differ between not matching and errors.  */
+#define REG_NOSUB (REG_NEWLINE << 1)
+
+
+/* POSIX `eflags' bits (i.e., information for regexec).  */
+
+/* If this bit is set, then the beginning-of-line operator doesn't match
+     the beginning of the string (presumably because it's not the
+     beginning of a line).
+   If not set, then the beginning-of-line operator does match the
+     beginning of the string.  */
+#define REG_NOTBOL 1
+
+/* Like REG_NOTBOL, except for the end-of-line.  */
+#define REG_NOTEOL (1 << 1)
+
+
+/* If any error codes are removed, changed, or added, update the
+   `re_error_msg' table in regex.c.  */
+typedef enum
+{
+  REG_NOERROR = 0,	/* Success.  */
+  REG_NOMATCH,		/* Didn't find a match (for regexec).  */
+
+  /* POSIX regcomp return error codes.  (In the order listed in the
+     standard.)  */
+  REG_BADPAT,		/* Invalid pattern.  */
+  REG_ECOLLATE,		/* Not implemented.  */
+  REG_ECTYPE,		/* Invalid character class name.  */
+  REG_EESCAPE,		/* Trailing backslash.  */
+  REG_ESUBREG,		/* Invalid back reference.  */
+  REG_EBRACK,		/* Unmatched left bracket.  */
+  REG_EPAREN,		/* Parenthesis imbalance.  */ 
+  REG_EBRACE,		/* Unmatched \{.  */
+  REG_BADBR,		/* Invalid contents of \{\}.  */
+  REG_ERANGE,		/* Invalid range end.  */
+  REG_ESPACE,		/* Ran out of memory.  */
+  REG_BADRPT,		/* No preceding re for repetition op.  */
+
+  /* Error codes we've added.  */
+  REG_EEND,		/* Premature end.  */
+  REG_ESIZE,		/* Compiled pattern bigger than 2^16 bytes.  */
+  REG_ERPAREN		/* Unmatched ) or \); not returned from regcomp.  */
+} reg_errcode_t;
+
+/* This data structure represents a compiled pattern.  Before calling
+   the pattern compiler, the fields `buffer', `allocated', `fastmap',
+   `translate', and `no_sub' can be set.  After the pattern has been
+   compiled, the `re_nsub' field is available.  All other fields are
+   private to the regex routines.  */

 struct re_pattern_buffer
-  {
-    char *buffer;	/* Space holding the compiled pattern commands.  */
-    long allocated;	/* Size of space that `buffer' points to. */
-    long used;		/* Length of portion of buffer actually occupied  */
-    char *fastmap;	/* Pointer to fastmap, if any, or zero if none.  */
-			/* re_search uses the fastmap, if there is one,
-			   to skip over totally implausible characters.  */
-    char *translate;	/* Translate table to apply to all characters before 
-		           comparing, or zero for no translation.
-			   The translation is applied to a pattern when it is 
-                           compiled and to data when it is matched.  */
-    char fastmap_accurate;
-			/* Set to zero when a new pattern is stored,
-			   set to one when the fastmap is updated from it.  */
-    char can_be_null;   /* Set to one by compiling fastmap
-			   if this pattern might match the null string.
-			   It does not necessarily match the null string
-			   in that case, but if this is zero, it cannot.
-			   2 as value means can match null string
-			   but at end of range or before a character
-			   listed in the fastmap.  */
-  };
+{
+/* [[[begin pattern_buffer]]] */
+	/* Space that holds the compiled pattern.  It is declared as
+          `unsigned char *' because its elements are
+           sometimes used as array indexes.  */
+  unsigned char *buffer;
+
+	/* Number of bytes to which `buffer' points.  */
+  unsigned long allocated;
+
+	/* Number of bytes actually used in `buffer'.  */
+  unsigned long used;	
+
+        /* Syntax setting with which the pattern was compiled.  */
+  reg_syntax_t syntax;
+
+        /* Pointer to a fastmap, if any, otherwise zero.  re_search uses
+           the fastmap, if there is one, to skip over impossible
+           starting points for matches.  */
+  char *fastmap;
+
+        /* Either a translate table to apply to all characters before
+           comparing them, or zero for no translation.  The translation
+           is applied to a pattern when it is compiled and to a string
+           when it is matched.  */
+  char *translate;
+
+	/* Number of subexpressions found by the compiler.  */
+  size_t re_nsub;
+
+        /* Zero if this pattern cannot match the empty string, one else.
+           Well, in truth it's used only in `re_search_2', to see
+           whether or not we should use the fastmap, so we don't set
+           this absolutely perfectly; see `re_compile_fastmap' (the
+           `duplicate' case).  */
+  unsigned can_be_null : 1;
+
+        /* If REGS_UNALLOCATED, allocate space in the `regs' structure
+             for `max (RE_NREGS, re_nsub + 1)' groups.
+           If REGS_REALLOCATE, reallocate space if necessary.
+           If REGS_FIXED, use what's there.  */
+#define REGS_UNALLOCATED 0
+#define REGS_REALLOCATE 1
+#define REGS_FIXED 2
+  unsigned regs_allocated : 2;
+
+        /* Set to zero when `regex_compile' compiles a pattern; set to one
+           by `re_compile_fastmap' if it updates the fastmap.  */
+  unsigned fastmap_accurate : 1;
+
+        /* If set, `re_match_2' does not return information about
+           subexpressions.  */
+  unsigned no_sub : 1;
+
+        /* If set, a beginning-of-line anchor doesn't match at the
+           beginning of the string.  */ 
+  unsigned not_bol : 1;
+
+        /* Similarly for an end-of-line anchor.  */
+  unsigned not_eol : 1;
+
+        /* If true, an anchor at a newline matches.  */
+  unsigned newline_anchor : 1;
+
+/* [[[end pattern_buffer]]] */
+};
+
+typedef struct re_pattern_buffer regex_t;


-/* search.c (search_buffer) needs this one value.  It is defined both in
-   regex.c and here.  */
+/* search.c (search_buffer) in Emacs needs this one opcode value.  It is
+   defined both in `regex.c' and here.  */
 #define RE_EXACTN_VALUE 1
-
-
-/* Structure to store register contents data in.
-
-   Pass the address of such a structure as an argument to re_match, etc.,
-   if you want this information back.
-
-   For i from 1 to RE_NREGS - 1, start[i] records the starting index in
-   the string of where the ith subexpression matched, and end[i] records
-   one after the ending index.  start[0] and end[0] are analogous, for
-   the entire pattern.  */
-
-struct re_registers
-  {
-    int start[RE_NREGS];
-    int end[RE_NREGS];
-  };
-
-

+/* Type for byte offsets within the string.  POSIX mandates this.  */
+typedef int regoff_t;
+
+
+/* This is the structure we store register match data in.  See
+   regex.texinfo for a full description of what registers match.  */
+struct re_registers
+{
+  unsigned num_regs;
+  regoff_t *start;
+  regoff_t *end;
+};
+
+
+/* If `regs_allocated' is REGS_UNALLOCATED in the pattern buffer,
+   `re_match_2' returns information about at least this many registers
+   the first time a `regs' structure is passed.  */
+#ifndef RE_NREGS
+#define RE_NREGS 30
+#endif
+
+
+/* POSIX specification for registers.  Aside from the different names than
+   `re_registers', POSIX uses an array of structures, instead of a
+   structure of arrays.  */
+typedef struct
+{
+  regoff_t rm_so;  /* Byte offset from string's start to substring's start.  */
+  regoff_t rm_eo;  /* Byte offset from string's start to substring's end.  */
+} regmatch_t;
+
+/* Declarations for routines.  */
+
+/* To avoid duplicating every routine declaration -- once with a
+   prototype (if we are ANSI), and once without (if we aren't) -- we
+   use the following macro to declare argument types.  This
+   unfortunately clutters up the declarations a bit, but I think it's
+   worth it.  */
+
 #ifdef __STDC__

-extern char *re_compile_pattern (char *, size_t, struct re_pattern_buffer *);
-/* Is this really advertised?  */
-extern void re_compile_fastmap (struct re_pattern_buffer *);
-extern int re_search (struct re_pattern_buffer *, char*, int, int, int,
-		      struct re_registers *);
-extern int re_search_2 (struct re_pattern_buffer *, char *, int,
-			char *, int, int, int,
-			struct re_registers *, int);
-extern int re_match (struct re_pattern_buffer *, char *, int, int,
-		     struct re_registers *);
-extern int re_match_2 (struct re_pattern_buffer *, char *, int,
-		       char *, int, int, struct re_registers *, int);
-extern long re_set_syntax (long syntax);
+#define _RE_ARGS(args) args
+
+#else /* not __STDC__ */
+
+#define _RE_ARGS(args) ()
+
+#endif /* not __STDC__ */
+
+/* Sets the current default syntax to SYNTAX, and return the old syntax.
+   You can also simply assign to the `re_syntax_options' variable.  */
+extern reg_syntax_t re_set_syntax _RE_ARGS ((reg_syntax_t syntax));
+
+/* Compile the regular expression PATTERN, with length LENGTH
+   and syntax given by the global `re_syntax_options', into the buffer
+   BUFFER.  Return NULL if successful, and an error string if not.  */
+extern const char *re_compile_pattern
+  _RE_ARGS ((const char *pattern, size_t length,
+             struct re_pattern_buffer *buffer));
+
+
+/* Compile a fastmap for the compiled pattern in BUFFER; used to
+   accelerate searches.  Return 0 if successful and -2 if was an
+   internal error.  */
+extern int re_compile_fastmap _RE_ARGS ((struct re_pattern_buffer *buffer));
+
+
+/* Search in the string STRING (with length LENGTH) for the pattern
+   compiled into BUFFER.  Start searching at position START, for RANGE
+   characters.  Return the starting position of the match, -1 for no
+   match, or -2 for an internal error.  Also return register
+   information in REGS (if REGS and BUFFER->no_sub are nonzero).  */
+extern int re_search
+  _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string,
+            int length, int start, int range, struct re_registers *regs));
+
+
+/* Like `re_search', but search in the concatenation of STRING1 and
+   STRING2.  Also, stop searching at index START + STOP.  */
+extern int re_search_2
+  _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string1,
+             int length1, const char *string2, int length2,
+             int start, int range, struct re_registers *regs, int stop));
+
+
+/* Like `re_search', but return how many characters in STRING the regexp
+   in BUFFER matched, starting at position START.  */
+extern int re_match
+  _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string,
+             int length, int start, struct re_registers *regs));
+
+
+/* Relates to `re_match' as `re_search_2' relates to `re_search'.  */
+extern int re_match_2 
+  _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string1,
+             int length1, const char *string2, int length2,
+             int start, struct re_registers *regs, int stop));
+
+
+/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
+   ENDS.  Subsequent matches using BUFFER and REGS will use this memory
+   for recording register information.  STARTS and ENDS must be
+   allocated with malloc, and must each be at least `NUM_REGS * sizeof
+   (regoff_t)' bytes long.
+
+   If NUM_REGS == 0, then subsequent matches should allocate their own
+   register data.
+
+   Unless this function is called, the first search or match using
+   PATTERN_BUFFER will allocate its own register data, without
+   freeing the old data.  */
+extern void re_set_registers
+  _RE_ARGS ((struct re_pattern_buffer *buffer, struct re_registers *regs,
+             unsigned num_regs, regoff_t *starts, regoff_t *ends));

-#ifndef GAWK
 /* 4.2 bsd compatibility.  */
-extern char *re_comp (char *);
-extern int re_exec (char *);
-#endif
+extern char *re_comp _RE_ARGS ((const char *));
+extern int re_exec _RE_ARGS ((const char *));

-#else /* !__STDC__ */
+/* POSIX compatibility.  */
+extern int regcomp _RE_ARGS ((regex_t *preg, const char *pattern, int cflags));
+extern int regexec
+  _RE_ARGS ((const regex_t *preg, const char *string, size_t nmatch,
+             regmatch_t pmatch[], int eflags));
+extern size_t regerror
+  _RE_ARGS ((int errcode, const regex_t *preg, char *errbuf,
+             size_t errbuf_size));
+extern void regfree _RE_ARGS ((regex_t *preg));

-extern char *re_compile_pattern ();
-/* Is this really advertised? */
-extern void re_compile_fastmap ();
-extern int re_search (), re_search_2 ();
-extern int re_match (), re_match_2 ();
-extern long re_set_syntax();
-
-#ifndef GAWK
-/* 4.2 bsd compatibility.  */
-extern char *re_comp ();
-extern int re_exec ();
-#endif
-
-#endif /* __STDC__ */
-
-
-#ifdef SYNTAX_TABLE
-extern char *re_syntax_table;
-#endif
-
-#endif /* !__REGEXP_LIBRARY */
+#endif /* not __REGEXP_LIBRARY_H__ */
+
+/*
+Local variables:
+make-backup-files: t
+version-control: t
+trim-versions-without-asking: nil
+End:
+*/
--- a/gnu/usr.bin/gawk/version.c
+++ b/gnu/usr.bin/gawk/version.c
@ -1,5 +1,5 @@
-/*char *version_string = "from: @(#)Gnu Awk (gawk) 2.15";*/
-char *version_string = "$Id: version.c,v 1.2 1993/08/01 18:49:02 mycroft Exp $ 2.15";
+/* DO NOT CHANGE VERSION STRING TO USE A REAL SCCS OR RCS ID */
+char *version_string = "@(#)Gnu Awk (gawk) 2.15";

 /* 1.02		fixed /= += *= etc to return the new Left Hand Side instead
 		of the Right Hand Side */
@ -43,5 +43,6 @@ char *version_string = "$Id: version.c,v 1.2 1993/08/01 18:49:02 mycroft Exp $ 2
 /* 2.14		Mostly bug fixes. */

 /* 2.15		Bug fixes plus intermixing of command-line source and files,
-		GNU long options, ARGIND, ERRNO and Plan 9 style /dev/ files. */
+		GNU long options, ARGIND, ERRNO and Plan 9 style /dev/ files.
+		`delete array'. OS/2 port added. */
 @ -1 +1 @@
 .15.3
 .15.4