Upgrade to Gawk 2.15.4.

This commit is contained in:
jtc 1994-02-17 01:21:51 +00:00
parent 652a63ee06
commit dff3317321
27 changed files with 7395 additions and 4436 deletions

View File

@ -1,3 +1,53 @@
Changes from 2.15.3 to 2.15.4
-----------------------------
Lots of lint fixes, and do_sprintf made mostly ANSI C compatible.
Man page updated and edited.
Copyrights updated.
Arrays now grow dynamically, initially scaling up by an order of magnitude
and then doubling, up to ~ 64K. This should keep gawk's performance
graceful under heavy load.
New `delete array' feature added. Only documented in the man page.
Switched to dfa and regex suites from grep-2.0. These offer the ability to
move to POSIX regexps in the next release.
Disabled GNU regex ops.
Research awk -m option now recognized. It does nothing in gawk, since gawk
has no static limits. Only documented in the man page.
New bionic (faster, better, stronger than before) hashing function.
Bug fix in argument handling. `gawk -X' now notices there was no program.
Additional bug fixes to make --compat and --lint work again.
Many changes for 16-bit cleanliness.
Add explicit alloca(0) in io.c to recover space from C alloca.
Fixed file descriptor leak in io.c.
The --version option now follows the GNU coding standards and exits.
Fixed several prototypes in protos.h.
Several tests updated. On Solaris, warn that the out? tests will fail.
Configuration files for SunOS with cc and Solaris 2.x added.
Improved error messages in awk.y on gawk extensions if do_unix or do_compat.
INSTALL file added.
Fixed Atari Makefile and several VMS specific changes.
Better conversion of numbers to strings on systems with broken sprintfs.
Changes from 2.15.2 to 2.15.3
-----------------------------

View File

@ -3,4 +3,8 @@ Hopefully they will all be fixed in the next major release of gawk.
Please keep in mind that the code is still undergoing significant evolution.
1. Gawk's printf is probably still not POSIX compliant.
1. The interactions with the lexer and yyerror need reworking. It is possible
to get line numbers that are one line off if --compat or --posix is
true and either `next file' or `delete array' are used.
Really the whole lexical analysis stuff needs reworking.

View File

@ -10,7 +10,7 @@ See the installation instructions, below.
Known problems are given in the PROBLEMS file. Work to be done is
described briefly in the FUTURES file. Verified ports are listed in
the PORTS file. Changes in this version are summarized in the CHANGES file.
the PORTS file. Changes in this version are summarized in the NEWS file.
Please read the LIMITATIONS and ACKNOWLEDGMENT files.
Read the file POSIX for a discussion of how the standard says comparisons
@ -28,6 +28,8 @@ INSTALLATION:
Check whether there is a system-specific README file for your system.
A quick overview of the installation process is in the file INSTALLATION.
Makefile.in may need some tailoring. The only changes necessary should
be to change installation targets or to change compiler flags.
The changes to make in Makefile.in are commented and should be obvious.
@ -69,7 +71,7 @@ problem.
PRINTING THE MANUAL
The 'support' directory contains texinfo.tex 2.65, which will be necessary
The 'support' directory contains texinfo.tex 2.115, which will be necessary
for printing the manual, and the texindex.c program from the texinfo
distribution which is also necessary. See the makefile for the steps needed
to get a DVI file from the manual.
@ -93,7 +95,7 @@ INTERNET: david@cs.dal.ca
Arnold Robbins
1736 Reindeer Drive
Atlanta, GA, 30329, USA
Atlanta, GA, 30329-3528, USA
INTERNET: arnold@skeeve.atl.ga.us
UUCP: { gatech, emory, emoryu1 }!skeeve!arnold
@ -115,8 +117,10 @@ VMS:
Atari ST:
Michal Jaegermann
NTOMCZAK@vm.ucs.UAlberta.CA (e-mail only)
michal@gortel.phys.ualberta.ca (e-mail only)
OS/2:
Kai Uwe Rommel
rommel@ars.muc.de (e-mail only)
Darrel Hankerson
hankedr@mail.auburn.edu (e-mail only)

View File

@ -1 +1 @@
2.15.3
2.15.4

View File

@ -3,7 +3,7 @@
*/
/*
* Copyright (C) 1986, 1988, 1989, 1991, 1992 the Free Software Foundation, Inc.
* Copyright (C) 1986, 1988, 1989, 1991, 1992, 1993 the Free Software Foundation, Inc.
*
* This file is part of GAWK, the GNU implementation of the
* AWK Progamming Language.
@ -24,12 +24,27 @@
*/
#ifndef lint
static char rcsid[] = "$Id: array.c,v 1.3 1993/11/13 02:26:15 jtc Exp $";
#endif /* not lint */
static char rcsid[] = "$Id: array.c,v 1.4 1994/02/17 01:21:57 jtc Exp $";
#endif
/*
* Tree walks (``for (iggy in foo)'') and array deletions use expensive
* linear searching. So what we do is start out with small arrays and
* grow them as needed, so that our arrays are hopefully small enough,
* most of the time, that they're pretty full and we're not looking at
* wasted space.
*
* The decision is made to grow the array if the average chain length is
* ``too big''. This is defined as the total number of entries in the table
* divided by the size of the array being greater than some constant.
*/
#define AVG_CHAIN_MAX 10 /* don't want to linear search more than this */
#include "awk.h"
static NODE *assoc_find P((NODE *symbol, NODE *subs, int hash1));
static void grow_table P((NODE *symbol));
NODE *
concat_exp(tree)
@ -88,7 +103,7 @@ NODE *symbol;
if (symbol->var_array == 0)
return;
for (i = 0; i < HASHSIZE; i++) {
for (i = 0; i < symbol->array_size; i++) {
for (bucket = symbol->var_array[i]; bucket; bucket = next) {
next = bucket->ahnext;
unref(bucket->ahname);
@ -97,17 +112,25 @@ NODE *symbol;
}
symbol->var_array[i] = 0;
}
free(symbol->var_array);
symbol->var_array = NULL;
symbol->array_size = symbol->table_size = 0;
}
/*
* calculate the hash function of the string in subs
*/
unsigned int
hash(s, len)
register char *s;
hash(s, len, hsize)
register const char *s;
register size_t len;
unsigned long hsize;
{
register unsigned long h = 0, g;
register unsigned long h = 0;
#ifdef this_is_really_slow
register unsigned long g;
while (len--) {
h = (h << 4) + *s++;
@ -117,10 +140,84 @@ register size_t len;
h = h ^ g;
}
}
if (h < HASHSIZE)
return h;
else
return h%HASHSIZE;
#else /* this_is_really_slow */
/*
* This is INCREDIBLY ugly, but fast. We break the string up into 8 byte
* units. On the first time through the loop we get the "leftover bytes"
* (strlen % 8). On every other iteration, we perform 8 HASHC's so we handle
* all 8 bytes. Essentially, this saves us 7 cmp & branch instructions. If
* this routine is heavily used enough, it's worth the ugly coding.
*
* OZ's original sdbm hash, copied from Margo Seltzers db package.
*
*/
/* Even more speed: */
/* #define HASHC h = *s++ + 65599 * h */
/* Because 65599 = pow(2,6) + pow(2,16) - 1 we multiply by shifts */
#define HASHC htmp = (h << 6); \
h = *s++ + htmp + (htmp << 10) - h
unsigned long htmp;
h = 0;
#if defined(VAXC)
/*
* [This was an implementation of "Duff's Device", but it has been
* redone, separating the switch for extra iterations from the loop.
* This is necessary because the DEC VAX-C compiler is STOOPID.]
*/
switch (len & (8 - 1)) {
case 7: HASHC;
case 6: HASHC;
case 5: HASHC;
case 4: HASHC;
case 3: HASHC;
case 2: HASHC;
case 1: HASHC;
default: break;
}
if (len > (8 - 1)) {
register size_t loop = len >> 3;
do {
HASHC;
HASHC;
HASHC;
HASHC;
HASHC;
HASHC;
HASHC;
HASHC;
} while (--loop);
}
#else /* !VAXC */
/* "Duff's Device" for those who can handle it */
if (len > 0) {
register size_t loop = (len + 8 - 1) >> 3;
switch (len & (8 - 1)) {
case 0:
do { /* All fall throughs */
HASHC;
case 7: HASHC;
case 6: HASHC;
case 5: HASHC;
case 4: HASHC;
case 3: HASHC;
case 2: HASHC;
case 1: HASHC;
} while (--loop);
}
}
#endif /* !VAXC */
#endif /* this_is_really_slow - not */
if (h >= hsize)
h %= hsize;
return h;
}
/*
@ -162,7 +259,7 @@ NODE *symbol, *subs;
if (symbol->var_array == 0)
return 0;
subs = concat_exp(subs); /* concat_exp returns a string node */
hash1 = hash(subs->stptr, subs->stlen);
hash1 = hash(subs->stptr, subs->stlen, (unsigned long) symbol->array_size);
if (assoc_find(symbol, subs, hash1) == NULL) {
free_temp(subs);
return 0;
@ -187,17 +284,16 @@ NODE *symbol, *subs;
register NODE *bucket;
(void) force_string(subs);
hash1 = hash(subs->stptr, subs->stlen);
if (symbol->var_array == 0) { /* this table really should grow
* dynamically */
size_t size;
size = sizeof(NODE *) * HASHSIZE;
emalloc(symbol->var_array, NODE **, size, "assoc_lookup");
memset((char *)symbol->var_array, 0, size);
if (symbol->var_array == 0) {
symbol->type = Node_var_array;
symbol->array_size = symbol->table_size = 0; /* sanity */
grow_table(symbol);
hash1 = hash(subs->stptr, subs->stlen,
(unsigned long) symbol->array_size);
} else {
hash1 = hash(subs->stptr, subs->stlen,
(unsigned long) symbol->array_size);
bucket = assoc_find(symbol, subs, hash1);
if (bucket != NULL) {
free_temp(subs);
@ -209,6 +305,17 @@ NODE *symbol, *subs;
if (do_lint && subs->stlen == 0)
warning("subscript of array `%s' is null string",
symbol->vname);
/* first see if we would need to grow the array, before installing */
symbol->table_size++;
if ((symbol->flags & ARRAYMAXED) == 0
&& symbol->table_size/symbol->array_size > AVG_CHAIN_MAX) {
grow_table(symbol);
/* have to recompute hash value for new size */
hash1 = hash(subs->stptr, subs->stlen,
(unsigned long) symbol->array_size);
}
getnode(bucket);
bucket->type = Node_ahash;
if (subs->flags & TEMP)
@ -244,7 +351,7 @@ NODE *symbol, *tree;
if (symbol->var_array == 0)
return;
subs = concat_exp(tree); /* concat_exp returns string node */
hash1 = hash(subs->stptr, subs->stlen);
hash1 = hash(subs->stptr, subs->stlen, (unsigned long) symbol->array_size);
last = NULL;
for (bucket = symbol->var_array[hash1]; bucket; last = bucket, bucket = bucket->ahnext)
@ -260,6 +367,14 @@ NODE *symbol, *tree;
unref(bucket->ahname);
unref(bucket->ahvalue);
freenode(bucket);
symbol->table_size--;
if (symbol->table_size <= 0) {
memset(symbol->var_array, '\0',
sizeof(NODE *) * symbol->array_size);
symbol->table_size = symbol->array_size = 0;
free(symbol->var_array);
symbol->var_array = NULL;
}
}
void
@ -267,12 +382,12 @@ assoc_scan(symbol, lookat)
NODE *symbol;
struct search *lookat;
{
if (!symbol->var_array) {
if (symbol->var_array == NULL) {
lookat->retval = NULL;
return;
}
lookat->arr_ptr = symbol->var_array;
lookat->arr_end = lookat->arr_ptr + HASHSIZE; /* added */
lookat->arr_end = lookat->arr_ptr + symbol->array_size;
lookat->bucket = symbol->var_array[0];
assoc_next(lookat);
}
@ -295,3 +410,77 @@ struct search *lookat;
}
return;
}
/* grow_table --- grow a hash table */
static void
grow_table(symbol)
NODE *symbol;
{
NODE **old, **new, *chain, *next;
int i, j;
unsigned long hash1;
unsigned long oldsize, newsize;
/*
* This is an array of primes. We grow the table by an order of
* magnitude each time (not just doubling) so that growing is a
* rare operation. We expect, on average, that it won't happen
* more than twice. The final size is also chosen to be small
* enough so that MS-DOG mallocs can handle it. When things are
* very large (> 8K), we just double more or less, instead of
* just jumping from 8K to 64K.
*/
static long sizes[] = { 13, 127, 1021, 8191, 16381, 32749, 65497 };
/* find next biggest hash size */
oldsize = symbol->array_size;
newsize = 0;
for (i = 0, j = sizeof(sizes)/sizeof(sizes[0]); i < j; i++) {
if (oldsize < sizes[i]) {
newsize = sizes[i];
break;
}
}
if (newsize == oldsize) { /* table already at max (!) */
symbol->flags |= ARRAYMAXED;
return;
}
/* allocate new table */
emalloc(new, NODE **, newsize * sizeof(NODE *), "grow_table");
memset(new, '\0', newsize * sizeof(NODE *));
/* brand new hash table, set things up and return */
if (symbol->var_array == NULL) {
symbol->table_size = 0;
goto done;
}
/* old hash table there, move stuff to new, free old */
old = symbol->var_array;
for (i = 0; i < oldsize; i++) {
if (old[i] == NULL)
continue;
for (chain = old[i]; chain != NULL; chain = next) {
next = chain->ahnext;
hash1 = hash(chain->ahname->stptr,
chain->ahname->stlen, newsize);
/* remove from old list, add to new */
chain->ahnext = new[hash1];
new[hash1] = chain;
}
}
free(old);
done:
/*
* note that symbol->table_size does not change if an old array,
* and is explicitly set to 0 if a new one.
*/
symbol->var_array = new;
symbol->array_size = newsize;
}

View File

@ -1,8 +1,8 @@
.\" $Id: awk.1,v 1.3 1993/11/13 02:26:18 jtc Exp $ -*- nroff -*-
.\" $Id: awk.1,v 1.4 1994/02/17 01:21:59 jtc Exp $ -*- nroff -*-
.ds PX \s-1POSIX\s+1
.ds UX \s-1UNIX\s+1
.ds AN \s-1ANSI\s+1
.TH GAWK 1 "Nov 4 1993" "Free Software Foundation" "Utility Commands"
.TH GAWK 1 "Dec 24 1993" "Free Software Foundation" "Utility Commands"
.SH NAME
gawk \- pattern scanning and processing language
.SH SYNOPSIS
@ -72,6 +72,11 @@ option.
Each
.B \-W
option has a corresponding GNU style long option, as detailed below.
Arguments to GNU style long options are either joined with the option
by an
.B =
sign, with no intervening spaces, or they may be provided in the
next command line argument.
.PP
.I Gawk
accepts the following options.
@ -115,6 +120,26 @@ Multiple
(or
.BR \-\^\-file )
options may be used.
.TP
.PD 0
.BI \-mf= NNN
.TP
.BI \-mr= NNN
Set various memory limits to the value
.IR NNN .
The
.B f
flag sets the maximum number of fields, and the
.B r
flag sets the maximum record size. These two flags and the
.B \-m
option are from the AT&T Bell Labs research version of \*(UX
.IR awk .
They are ignored by
.IR gawk ,
since
.I gawk
has no pre-defined limits.
.TP \w'\fB\-\^\-copyright\fR'u+1n
.PD 0
.B "\-W compat"
@ -159,6 +184,8 @@ the error output.
.B \-\^\-usage
Print a relatively short summary of the available options on
the error output.
Per the GNU Coding Standards, these options cause an immediate,
successful exit.
.TP
.PD 0
.B "\-W lint"
@ -249,6 +276,8 @@ This is useful mainly for knowing if the current copy of
on your system
is up to date with respect to whatever the Free Software Foundation
is distributing.
Per the GNU Coding Standards, these options cause an immediate,
successful exit.
.TP
.B \-\^\-
Signal the end of options. This is useful to allow further arguments to the
@ -256,7 +285,13 @@ AWK program itself to start with a ``\-''.
This is mainly for consistency with the argument parsing convention used
by most other \*(PX programs.
.PP
Any other options are flagged as illegal, but are otherwise ignored.
In compatibility mode,
any other options are flagged as illegal, but are otherwise ignored.
In normal operation, as long as program text has been supplied, unknown
options are passed on to the AWK program in the
.B ARGV
array for processing. This is particularly useful for running AWK
programs via the ``#!'' executable interpreter mechanism.
.SH AWK PROGRAM EXECUTION
.PP
An AWK program consists of a sequence of pattern-action statements
@ -271,23 +306,23 @@ and optional function definitions.
.I Gawk
first reads the program source from the
.IR program-file (s)
if specified, or from the first non-option argument on the command line.
if specified,
from arguments to
.BR "\-W source=" ,
or from the first non-option argument on the command line.
The
.B \-f
option may be used multiple times on the command line.
and
.B "\-W source="
options may be used multiple times on the command line.
.I Gawk
will read the program text as if all the
.IR program-file s
and command line source texts
had been concatenated together. This is useful for building libraries
of AWK functions, without having to include them in each new AWK
program that uses them. To use a library function in a file from a
program typed in on the command line, specify
.B /dev/tty
as one of the
.IR program-file s,
type your program, and end it with a
.B ^D
(control-d).
program that uses them. It also provides the ability to mix library
functions with command line programs.
.PP
The environment variable
.B AWKPATH
@ -303,11 +338,13 @@ option contains a ``/'' character, no path search is performed.
.I Gawk
executes AWK programs in the following order.
First,
all variable assignments specified via the
.B \-v
option are performed.
Next,
.I gawk
compiles the program into an internal form.
Next, all variable assignments specified via the
.B \-v
option are performed. Then,
Then,
.I gawk
executes the code in the
.B BEGIN
@ -360,8 +397,8 @@ block(s) (if any).
AWK variables are dynamic; they come into existence when they are
first used. Their values are either floating-point numbers or strings,
or both,
depending upon how they are used. AWK also has one dimension
arrays; multiply dimensioned arrays may be simulated.
depending upon how they are used. AWK also has one dimensional
arrays; arrays with multiple dimensions may be simulated.
Several pre-defined variables are set as a program
runs; these will be described as needed and summarized below.
.SS Fields
@ -436,6 +473,7 @@ cause the value of
.B $0
to be recomputed, with the fields being separated by the value of
.BR OFS .
References to negative numbered fields cause a fatal error.
.SS Built-in Variables
.PP
AWK's built-in variables are:
@ -483,7 +521,7 @@ If a system error occurs either doing a redirection for
during a read for
.BR getline ,
or during a
.BR close ,
.BR close() ,
then
.B ERRNO
will contain
@ -650,6 +688,9 @@ loop to iterate over all the elements of an array.
An element may be deleted from an array using the
.B delete
statement.
The
.B delete
statement may also be used to delete the entire contents of an array.
.SS Variable Typing And Conversion
.PP
Variables and fields
@ -686,7 +727,7 @@ b = a ""
.PP
the variable
.B b
has a value of \fB"12"\fR and not \fB"12.00"\fR.
has a string value of \fB"12"\fR and not \fB"12.00"\fR.
.PP
.I Gawk
performs comparisons as follows:
@ -815,7 +856,8 @@ the third. Only one of the second and third patterns is evaluated.
.PP
The
.IB pattern1 ", " pattern2
form of an expression is called a range pattern.
form of an expression is called a
.IR "range pattern" .
It matches all input records starting with a line that matches
.IR pattern1 ,
and continuing until a record that matches
@ -988,6 +1030,7 @@ as follows:
\fBbreak\fR
\fBcontinue\fR
\fBdelete \fIarray\^\fB[\^\fIindex\^\fB]\fR
\fBdelete \fIarray\^\fR
\fBexit\fR [ \fIexpression\fR ]
\fB{ \fIstatements \fB}
.fi
@ -1052,10 +1095,20 @@ Prints the current record.
.TP
.BI print " expr-list"
Prints expressions.
Each expression is separated by the value of the
.B OFS
variable. The output record is terminated with the value of the
.B ORS
variable.
.TP
.BI print " expr-list" " >" file
Prints expressions on
.IR file .
Each expression is separated by the value of the
.B OFS
variable. The output record is terminated with the value of the
.B ORS
variable.
.TP
.BI printf " fmt, expr-list"
Format and print.
@ -1084,8 +1137,9 @@ In a similar fashion,
.IB command " | getline"
pipes into
.BR getline .
.BR Getline
will return 0 on end of file, and \-1 on an error.
The
.BR getline
command will return 0 on end of file, and \-1 on an error.
.SS The \fIprintf\fP\^ Statement
.PP
The AWK versions of the
@ -1159,6 +1213,7 @@ The expression should be left-justified within its field.
The field should be padded to this width. If the number has a leading
zero, then the field will be padded with zeros.
Otherwise it is padded with blanks.
This applies even to the non-numeric output formats.
.TP
.BI . prec
A number indicating the maximum width of strings or digits to the right
@ -1235,7 +1290,7 @@ is the value of the
system call.
If there are any additional fields, they are the group IDs returned by
.IR getgroups (2).
(Multiple groups may not be supported on all systems.)
Multiple groups may not be supported on all systems.
.TP
.B /dev/stdin
The standard input.
@ -1366,6 +1421,9 @@ and returns the number of fields. If
is omitted,
.B FS
is used instead.
The array
.I a
is cleared first.
.TP
.BI sprintf( fmt , " expr-list" )
prints
@ -1483,11 +1541,11 @@ the
As in \*(AN C, all following hexadecimal digits are considered part of
the escape sequence.
(This feature should tell us something about language design by committee.)
E.g., "\ex1B" is the \s-1ASCII\s+1 \s-1ESC\s+1 (escape) character.
E.g., \fB"\ex1B"\fR is the \s-1ASCII\s+1 \s-1ESC\s+1 (escape) character.
.TP
.BI \e ddd
The character represented by the 1-, 2-, or 3-digit sequence of octal
digits. E.g. "\e033" is the \s-1ASCII\s+1 \s-1ESC\s+1 (escape) character.
digits. E.g. \fB"\e033"\fR is the \s-1ASCII\s+1 \s-1ESC\s+1 (escape) character.
.TP
.BI \e c
The literal character
@ -1568,7 +1626,15 @@ Concatenate and line number (a variation on a theme):
.ft R
.fi
.SH SEE ALSO
.IR egrep (1)
.IR egrep (1),
.IR getpid (2),
.IR getppid (2),
.IR getpgrp (2),
.IR getuid (2),
.IR geteuid (2),
.IR getgid (2),
.IR getegid (2),
.IR getgroups (2)
.PP
.IR "The AWK Programming Language" ,
Alfred V. Aho, Brian W. Kernighan, Peter J. Weinberger,
@ -1606,7 +1672,7 @@ block was run. Applications came to depend on this ``feature.''
When
.I awk
was changed to match its documentation, this option was added to
accomodate applications that depended upon the old behavior.
accommodate applications that depended upon the old behavior.
(This feature was agreed upon by both the AT&T and GNU developers.)
.PP
The
@ -1616,7 +1682,11 @@ option for implementation specific features is from the \*(PX standard.
When processing arguments,
.I gawk
uses the special option ``\fB\-\^\-\fP'' to signal the end of
arguments, and warns about, but otherwise ignores, undefined options.
arguments.
In compatibility mode, it will warn about, but otherwise ignore,
undefined options.
In normal operation, such arguments are passed on to the AWK program for
it to process.
.PP
The AWK book does not define the return value of
.BR srand() .
@ -1712,6 +1782,11 @@ environment variable is not special.
The use of
.B "next file"
to abandon processing of the current input file.
.TP
\(bu
The use of
.BI delete " array"
to delete the entire contents of an array.
.RE
.PP
The AWK book does not define the return value of the
@ -1739,7 +1814,7 @@ option is ``t'', then
will be set to the tab character.
Since this is a rather ugly special case, it is not the default behavior.
This behavior also does not occur if
.B \-Wposix
.B "\-W posix"
has been specified.
.ig
.PP
@ -1791,7 +1866,7 @@ a = length($0)
This feature is marked as ``deprecated'' in the \*(PX standard, and
.I gawk
will issue a warning about its use if
.B \-Wlint
.B "\-W lint"
is specified on the command line.
.PP
The other feature is the use of the
@ -1807,7 +1882,7 @@ equivalent to the
statement.
.I Gawk
will support this usage if
.B \-Wposix
.B "\-W posix"
has not been specified.
.SH BUGS
The
@ -1850,6 +1925,7 @@ the
and
.B \-e
options of the 2.11 version are no longer recognized.
This fact will not even be documented in the manual page for version 2.16.
.SH AUTHORS
The original version of \*(UX
.I awk
@ -1873,6 +1949,8 @@ compatible with the new version of \*(UX
The initial DOS port was done by Conrad Kwok and Scott Garfinkle.
Scott Deifik is the current DOS maintainer. Pat Rankin did the
port to VMS, and Michal Jaegermann did the port to the Atari ST.
The port to OS/2 was done by Kai Uwe Rommel, with contributions and
help from Darrel Hankerson.
.SH ACKNOWLEDGEMENTS
Brian Kernighan of Bell Labs
provided valuable assistance during testing and debugging.

View File

@ -3,7 +3,7 @@
*/
/*
* Copyright (C) 1986, 1988, 1989, 1991, 1992 the Free Software Foundation, Inc.
* Copyright (C) 1986, 1988, 1989, 1991, 1992, 1993 the Free Software Foundation, Inc.
*
* This file is part of GAWK, the GNU implementation of the
* AWK Progamming Language.
@ -22,7 +22,7 @@
* along with GAWK; see the file COPYING. If not, write to
* the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*
* $Id: awk.h,v 1.3 1993/11/13 02:26:21 jtc Exp $
* $Id: awk.h,v 1.4 1994/02/17 01:22:01 jtc Exp $
*/
/* ------------------------------ Includes ------------------------------ */
@ -174,7 +174,7 @@ extern int getpgrp P((void));
typedef struct Regexp {
struct re_pattern_buffer pat;
struct re_registers regs;
struct regexp dfareg;
struct dfa dfareg;
int dfa;
} Regexp;
#define RESTART(rp,s) (rp)->regs.start[0]
@ -198,6 +198,22 @@ extern int _text_read (int, char *, int);
#define ENVSEP ':'
#endif
#define DEFAULT_G_PRECISION 6
/* semi-temporary hack, mostly to gracefully handle VMS */
#ifdef GFMT_WORKAROUND
extern void sgfmt P((char *, const char *, int, int, int, double)); /* builtin.c */
/* Partial fix, to handle the most common case. */
#define NUMTOSTR(str, format, num) \
if (strcmp((format), "%.6g") == 0 || strcmp((format), "%g") == 0) \
sgfmt(str, "%*.*g", 0, 1, DEFAULT_G_PRECISION, num); \
else \
(void) sprintf(str, format, num) /* NOTE: no semi-colon! */
#else
#define NUMTOSTR(str, format, num) (void) sprintf(str, format, num)
#endif /* GFMT_WORKAROUND */
/* ------------------ Constants, Structures, Typedefs ------------------ */
#define AWKNUM double
@ -335,6 +351,7 @@ typedef struct exp_node {
union {
struct exp_node *lptr;
char *param_name;
long ll;
} l;
union {
struct exp_node *rptr;
@ -347,6 +364,7 @@ typedef struct exp_node {
union {
char *name;
struct exp_node *extra;
long xl;
} x;
short number;
unsigned char reflags;
@ -392,8 +410,8 @@ typedef struct exp_node {
# define NUM 32 /* numeric value is current */
# define NUMBER 64 /* assigned as number */
# define MAYBE_NUM 128 /* user input: if NUMERIC then
* a NUMBER
*/
* a NUMBER */
# define ARRAYMAXED 256 /* array is at max size */
char *vname; /* variable's name */
} NODE;
@ -426,6 +444,8 @@ typedef struct exp_node {
#define var_value lnode
#define var_array sub.nodep.r.av
#define array_size sub.nodep.l.ll
#define table_size sub.nodep.x.xl
#define condpair lnode
#define triggered sub.nodep.r.r_ent
@ -433,8 +453,6 @@ typedef struct exp_node {
#ifdef DONTDEF
int primes[] = {31, 61, 127, 257, 509, 1021, 2053, 4099, 8191, 16381};
#endif
/* a quick profile suggests that the following is a good value */
#define HASHSIZE 1021
typedef struct for_loop_header {
NODE *init;
@ -628,7 +646,7 @@ extern double _msc51bug;
/* array.c */
extern NODE *concat_exp P((NODE *tree));
extern void assoc_clear P((NODE *symbol));
extern unsigned int hash P((char *s, size_t len));
extern unsigned int hash P((const char *s, size_t len, unsigned long hsize));
extern int in_array P((NODE *symbol, NODE *subs));
extern NODE **assoc_lookup P((NODE *symbol, NODE *subs));
extern void do_delete P((NODE *symbol, NODE *tree));
@ -639,7 +657,7 @@ extern char *tokexpand P((void));
extern char nextc P((void));
extern NODE *node P((NODE *left, NODETYPE op, NODE *right));
extern NODE *install P((char *name, NODE *value));
extern NODE *lookup P((char *name));
extern NODE *lookup P((const char *name));
extern NODE *variable P((char *name, int can_free));
extern int yyparse P((void));
/* builtin.c */
@ -695,8 +713,8 @@ extern struct redirect *redirect P((NODE *tree, int *errflg));
extern NODE *do_close P((NODE *tree));
extern int flush_io P((void));
extern int close_io P((void));
extern int devopen P((char *name, char *mode));
extern int pathopen P((char *file));
extern int devopen P((const char *name, const char *mode));
extern int pathopen P((const char *file));
extern NODE *do_getline P((NODE *tree));
extern void do_nextfile P((void));
/* iop.c */
@ -710,7 +728,7 @@ extern void load_environ P((void));
extern char *arg_assign P((char *arg));
extern SIGTYPE catchsig P((int sig, int code));
/* msg.c */
extern void err P((char *s, char *emsg, va_list argp));
extern void err P((const char *s, const char *emsg, va_list argp));
#if _MSC_VER == 510
extern void msg P((va_list va_alist, ...));
extern void warning P((va_list va_alist, ...));
@ -734,8 +752,9 @@ extern void freenode P((NODE *it));
extern void unref P((NODE *tmp));
extern int parse_escape P((char **string_ptr));
/* re.c */
extern Regexp *make_regexp P((char *s, int len, int ignorecase, int dfa));
extern int research P((Regexp *rp, char *str, int start, int len, int need_start));
extern Regexp *make_regexp P((char *s, size_t len, int ignorecase, int dfa));
extern int research P((Regexp *rp, char *str, int start,
size_t len, int need_start));
extern void refree P((Regexp *rp));
extern void reg_error P((const char *s));
extern Regexp *re_update P((NODE *t));

View File

@ -3,7 +3,7 @@
*/
/*
* Copyright (C) 1986, 1988, 1989, 1991, 1992 the Free Software Foundation, Inc.
* Copyright (C) 1986, 1988, 1989, 1991, 1992, 1993 the Free Software Foundation, Inc.
*
* This file is part of GAWK, the GNU implementation of the
* AWK Progamming Language.
@ -21,6 +21,8 @@
* You should have received a copy of the GNU General Public License
* along with GAWK; see the file COPYING. If not, write to
* the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*
* $Id: awk.y,v 1.3 1994/02/17 01:22:02 jtc Exp $
*/
%{
@ -56,9 +58,10 @@ static char *thisline = NULL;
#define YYDEBUG_LEXER_TEXT (lexeme)
static int param_counter;
static char *tokstart = NULL;
static char *token = NULL;
static char *tok = NULL;
static char *tokend;
#define HASHSIZE 1021 /* this constant only used here */
NODE *variables[HASHSIZE];
extern char *source;
@ -291,7 +294,7 @@ regexp
REGEXP '/'
{
NODE *n;
int len;
size_t len;
getnode(n);
n->type = Node_regex;
@ -386,10 +389,19 @@ statement
if ($2 && $2 == lookup("file")) {
if (do_lint)
warning("`next file' is a gawk extension");
else if (do_unix || do_posix)
yyerror("`next file' is a gawk extension");
else if (! io_allowed)
yyerror("`next file' used in BEGIN or END action");
if (do_unix || do_posix) {
/*
* can't use yyerror, since may have overshot
* the source line
*/
errcount++;
msg("`next file' is a gawk extension");
}
if (! io_allowed) {
/* same thing */
errcount++;
msg("`next file' used in BEGIN or END action");
}
type = Node_K_nextfile;
} else {
if (! io_allowed)
@ -406,6 +418,20 @@ statement
{ $$ = node ($3, Node_K_return, (NODE *)NULL); }
| LEX_DELETE NAME '[' expression_list ']' statement_term
{ $$ = node (variable($2,1), Node_K_delete, $4); }
| LEX_DELETE NAME statement_term
{
if (do_lint)
warning("`delete array' is a gawk extension");
if (do_unix || do_posix) {
/*
* can't use yyerror, since may have overshot
* the source line
*/
errcount++;
msg("`delete array' is a gawk extension");
}
$$ = node (variable($2,1), Node_K_delete, (NODE *) NULL);
}
| exp statement_term
{ $$ = $1; }
;
@ -746,7 +772,7 @@ comma : ',' opt_nls { yyerrok; }
%%
struct token {
char *operator; /* text to match */
const char *operator; /* text to match */
NODETYPE value; /* node type */
int class; /* lexical class */
unsigned flags; /* # of args. allowed and compatability */
@ -820,10 +846,11 @@ yyerror(va_alist)
va_dcl
{
va_list args;
char *mesg = NULL;
const char *mesg = NULL;
register char *bp, *cp;
char *scan;
char buf[120];
static char end_of_file_line[] = "(END OF FILE)";
errcount++;
/* Find the current line in the input file */
@ -845,8 +872,8 @@ va_dcl
while (bp < lexend && *bp && *bp != '\n')
bp++;
} else {
thisline = "(END OF FILE)";
bp = thisline + 13;
thisline = end_of_file_line;
bp = thisline + strlen(thisline);
}
msg("%.*s", (int) (bp - thisline), thisline);
bp = buf;
@ -982,7 +1009,7 @@ get_src_buf()
return buf;
}
#define tokadd(x) (*token++ = (x), token == tokend ? tokexpand() : token)
#define tokadd(x) (*tok++ = (x), tok == tokend ? tokexpand() : tok)
char *
tokexpand()
@ -990,15 +1017,15 @@ tokexpand()
static int toksize = 60;
int tokoffset;
tokoffset = token - tokstart;
tokoffset = tok - tokstart;
toksize *= 2;
if (tokstart)
erealloc(tokstart, char *, toksize, "tokexpand");
else
emalloc(tokstart, char *, toksize, "tokexpand");
tokend = tokstart + toksize;
token = tokstart + tokoffset;
return token;
tok = tokstart + tokoffset;
return tok;
}
#if DEBUG
@ -1053,7 +1080,7 @@ yylex()
int in_brack = 0;
want_regexp = 0;
token = tokstart;
tok = tokstart;
while ((c = nextc()) != 0) {
switch (c) {
case '[':
@ -1094,7 +1121,7 @@ retry:
lexeme = lexptr ? lexptr - 1 : lexptr;
thisline = NULL;
token = tokstart;
tok = tokstart;
yylval.nodetypeval = Node_illegal;
switch (c) {
@ -1115,13 +1142,23 @@ retry:
case '\\':
#ifdef RELAXED_CONTINUATION
if (!do_unix) { /* strip trailing white-space and/or comment */
while ((c = nextc()) == ' ' || c == '\t') continue;
/*
* This code puports to allow comments and/or whitespace
* after the `\' at the end of a line used for continuation.
* Use it at your own risk. We think it's a bad idea, which
* is why it's not on by default.
*/
if (!do_unix) {
/* strip trailing white-space and/or comment */
while ((c = nextc()) == ' ' || c == '\t')
continue;
if (c == '#')
while ((c = nextc()) != '\n') if (!c) break;
while ((c = nextc()) != '\n')
if (c == '\0')
break;
pushback();
}
#endif /*RELAXED_CONTINUATION*/
#endif /* RELAXED_CONTINUATION */
if (nextc() == '\n') {
sourceline++;
goto retry;
@ -1307,7 +1344,7 @@ retry:
tokadd(c);
}
yylval.nodeval = make_str_node(tokstart,
token - tokstart, esc_seen ? SCAN : 0);
tok - tokstart, esc_seen ? SCAN : 0);
yylval.nodeval->flags |= PERM;
return YSTRING;
@ -1443,14 +1480,14 @@ retry:
yyerror("Invalid char '%c' in expression\n", c);
/* it's some type of name-type-thing. Find its length */
token = tokstart;
tok = tokstart;
while (is_identchar(c)) {
tokadd(c);
c = nextc();
}
tokadd('\0');
emalloc(tokkey, char *, token - tokstart, "yylex");
memcpy(tokkey, tokstart, token - tokstart);
emalloc(tokkey, char *, tok - tokstart, "yylex");
memcpy(tokkey, tokstart, tok - tokstart);
pushback();
/* See if it is a special token. */
@ -1653,7 +1690,7 @@ NODE *value;
register int bucket;
len = strlen(name);
bucket = hash(name, len);
bucket = hash(name, len, (unsigned long) HASHSIZE);
getnode(hp);
hp->type = Node_hashnode;
hp->hnext = variables[bucket];
@ -1668,13 +1705,13 @@ NODE *value;
/* find the most recent hash node for name installed by install */
NODE *
lookup(name)
char *name;
const char *name;
{
register NODE *bucket;
register size_t len;
len = strlen(name);
bucket = variables[hash(name, len)];
bucket = variables[hash(name, len, (unsigned long) HASHSIZE)];
while (bucket) {
if (bucket->hlength == len && STREQN(bucket->hname, name, len))
return bucket->hvalue;
@ -1738,7 +1775,7 @@ int freeit;
name = np->param;
len = strlen(name);
save = &(variables[hash(name, len)]);
save = &(variables[hash(name, len, (unsigned long) HASHSIZE)]);
for (bucket = *save; bucket; bucket = bucket->hnext) {
if (len == bucket->hlength && STREQN(bucket->hname, name, len)) {
*save = bucket->hnext;

View File

@ -3,7 +3,7 @@
*/
/*
* Copyright (C) 1986, 1988, 1989, 1991, 1992 the Free Software Foundation, Inc.
* Copyright (C) 1986, 1988, 1989, 1991, 1992, 1993 the Free Software Foundation, Inc.
*
* This file is part of GAWK, the GNU implementation of the
* AWK Progamming Language.
@ -24,12 +24,11 @@
*/
#ifndef lint
static char rcsid[] = "$Id: builtin.c,v 1.3 1993/11/13 02:26:27 jtc Exp $";
#endif /* not lint */
static char rcsid[] = "$Id: builtin.c,v 1.4 1994/02/17 01:22:04 jtc Exp $";
#endif
#include "awk.h"
#ifndef SRANDOM_PROTO
extern void srandom P((int seed));
#endif
@ -44,10 +43,6 @@ extern int output_is_tty;
static NODE *sub_common P((NODE *tree, int global));
#ifdef GFMT_WORKAROUND
char *gfmt P((double g, int prec, char *buf));
#endif
#ifdef _CRAY
/* Work around a problem in conversion of doubles to exact integers. */
#include <float.h>
@ -66,20 +61,18 @@ double (*Log)() = log;
#define Ceil(n) ceil(n)
#endif
#if __STDC__
static void
efwrite(void *ptr, size_t size, size_t count, FILE *fp,
char *from, struct redirect *rp,int flush)
#else
static void efwrite P((const void *ptr, size_t size, size_t count, FILE *fp,
const char *from, struct redirect *rp,int flush));
static void
efwrite(ptr, size, count, fp, from, rp, flush)
void *ptr;
const void *ptr;
size_t size, count;
FILE *fp;
char *from;
const char *from;
struct redirect *rp;
int flush;
#endif
{
errno = 0;
if (fwrite(ptr, size, count, fp) != count)
@ -220,22 +213,41 @@ NODE *tree;
return tmp_number((AWKNUM) d);
}
/* %e and %f formats are not properly implemented. Someone should fix them */
/* Actually, this whole thing should be reimplemented. */
/*
* do_sprintf does the sprintf function. It is one of the uglier parts of
* gawk. Thanks to Michal Jaegerman for taming this beast and making it
* compatible with ANSI C.
*/
NODE *
do_sprintf(tree)
NODE *tree;
{
/* copy 'l' bytes from 's' to 'obufout' checking for space in the process */
/* difference of pointers should be of ptrdiff_t type, but let us be kind */
#define bchunk(s,l) if(l) {\
while((l)>ofre) {\
long olen = obufout - obuf;\
erealloc(obuf, char *, osiz*2, "do_sprintf");\
ofre+=osiz;\
osiz*=2;\
obufout = obuf + olen;\
}\
memcpy(obuf+olen,s,(size_t)(l));\
olen+=(l);\
memcpy(obufout,s,(size_t)(l));\
obufout+=(l);\
ofre-=(l);\
}
/* copy one byte from 's' to 'obufout' checking for space in the process */
#define bchunk_one(s) {\
if(ofre <= 0) {\
long olen = obufout - obuf;\
erealloc(obuf, char *, osiz*2, "do_sprintf");\
ofre+=osiz;\
osiz*=2;\
obufout = obuf + olen;\
}\
*obufout++ = *s;\
--ofre;\
}
/* Is there space for something L big in the buffer? */
@ -259,15 +271,16 @@ NODE *tree;
NODE *r;
int toofew = 0;
char *obuf;
size_t osiz, ofre, olen;
static char chbuf[] = "0123456789abcdef";
static char sp[] = " ";
char *obuf, *obufout;
size_t osiz, ofre;
char *chbuf;
char *s0, *s1;
int cs1;
int n0;
NODE *sfmt, *arg;
register NODE *carg;
long fw, prec, lj, alt, big;
long fw, prec;
int lj, alt, big;
long *cur;
long val;
#ifdef sun386 /* Can't cast unsigned (int/long) from ptr->value */
@ -281,16 +294,17 @@ NODE *tree;
char *cp;
char *fill;
double tmpval;
char *pr_str;
int ucasehex = 0;
char signchar = 0;
size_t len;
static char sp[] = " ";
static char zero_string[] = "0";
static char lchbuf[] = "0123456789abcdefx";
static char Uchbuf[] = "0123456789ABCDEFX";
emalloc(obuf, char *, 120, "do_sprintf");
obufout = obuf;
osiz = 120;
ofre = osiz - 1;
olen = 0;
sfmt = tree_eval(tree->lnode);
sfmt = force_string(sfmt);
carg = tree->rnode;
@ -311,17 +325,17 @@ NODE *tree;
retry:
--n0;
switch (*s1++) {
switch (cs1 = *s1++) {
case '%':
bchunk("%", 1);
bchunk_one("%");
s0 = s1;
break;
case '0':
if (fill != sp || lj)
goto lose;
if (lj)
goto retry;
if (cur == &fw)
fill = "0"; /* FALL through */
fill = zero_string; /* FALL through */
case '1':
case '2':
case '3':
@ -332,42 +346,58 @@ retry:
case '8':
case '9':
if (cur == 0)
goto lose;
*cur = s1[-1] - '0';
/* goto lose; */
break;
if (prec >= 0) /* this happens only when we have */
/* a negative precision */
*cur = cs1 - '0';
while (n0 > 0 && *s1 >= '0' && *s1 <= '9') {
--n0;
*cur = *cur * 10 + *s1++ - '0';
}
if (prec < 0) { /* negative precision is discarded */
prec = 0;
cur = 0;
}
goto retry;
case '*':
if (cur == 0)
goto lose;
/* goto lose; */
break;
parse_next_arg();
*cur = force_number(arg);
free_temp(arg);
goto retry;
case ' ': /* print ' ' or '-' */
/* 'space' flag is ignored */
/* if '+' already present */
if (signchar != 0)
goto retry;
/* FALL THROUGH */
case '+': /* print '+' or '-' */
signchar = *(s1-1);
signchar = cs1;
goto retry;
case '-':
if (lj || fill != sp)
goto lose;
lj++;
if (cur == &prec) {
prec = -1;
goto retry;
}
fill = sp; /* if left justified then other */
lj++; /* filling is ignored */
goto retry;
case '.':
if (cur != &fw)
goto lose;
break;
cur = &prec;
goto retry;
case '#':
if (alt)
goto lose;
if (cur != &fw)
break;
alt++;
goto retry;
case 'l':
if (big)
goto lose;
break;
big++;
goto retry;
case 'c':
@ -381,44 +411,26 @@ retry:
#endif
cpbuf[0] = uval;
prec = 1;
pr_str = cpbuf;
goto dopr_string;
cp = cpbuf;
goto pr_tail;
}
if (! prec)
if (prec == 0)
prec = 1;
else if (prec > arg->stlen)
prec = arg->stlen;
pr_str = arg->stptr;
goto dopr_string;
cp = arg->stptr;
goto pr_tail;
case 's':
parse_next_arg();
arg = force_string(arg);
if (!prec || prec > arg->stlen)
if (prec == 0 || prec > arg->stlen)
prec = arg->stlen;
pr_str = arg->stptr;
dopr_string:
if (fw > prec && !lj) {
while (fw > prec) {
bchunk(fill, 1);
fw--;
}
}
bchunk(pr_str, (int) prec);
if (fw > prec) {
while (fw > prec) {
bchunk(fill, 1);
fw--;
}
}
s0 = s1;
free_temp(arg);
break;
cp = arg->stptr;
goto pr_tail;
case 'd':
case 'i':
parse_next_arg();
val = (long) force_number(arg);
free_temp(arg);
if (val < 0) {
sgn = 1;
val = -val;
@ -432,30 +444,19 @@ retry:
*--cp = '-';
else if (signchar)
*--cp = signchar;
if (prec != 0) /* ignore '0' flag if */
fill = sp; /* precision given */
if (prec > fw)
fw = prec;
prec = cend - cp;
if (fw > prec && !lj) {
if (fill != sp && (*cp == '-' || signchar)) {
bchunk(cp, 1);
cp++;
prec--;
fw--;
}
while (fw > prec) {
bchunk(fill, 1);
fw--;
}
if (fw > prec && ! lj && fill != sp
&& (*cp == '-' || signchar)) {
bchunk_one(cp);
cp++;
prec--;
fw--;
}
bchunk(cp, (int) prec);
if (fw > prec) {
while (fw > prec) {
bchunk(fill, 1);
fw--;
}
}
s0 = s1;
break;
goto pr_tail;
case 'u':
base = 10;
goto pr_unsigned;
@ -463,140 +464,91 @@ retry:
base = 8;
goto pr_unsigned;
case 'X':
ucasehex = 1;
case 'x':
base = 16;
goto pr_unsigned;
pr_unsigned:
if (cs1 == 'X')
chbuf = Uchbuf;
else
chbuf = lchbuf;
if (prec != 0) /* ignore '0' flag if */
fill = sp; /* precision given */
parse_next_arg();
uval = (unsigned long) force_number(arg);
free_temp(arg);
do {
*--cp = chbuf[uval % base];
if (ucasehex && isalpha(*cp))
*cp = toupper(*cp);
uval /= base;
} while (uval);
if (alt && (base == 8 || base == 16)) {
if (alt) {
if (base == 16) {
if (ucasehex)
*--cp = 'X';
else
*--cp = 'x';
}
*--cp = '0';
*--cp = cs1;
*--cp = '0';
if (fill != sp) {
bchunk(cp, 2);
cp += 2;
fw -= 2;
}
} else if (base == 8)
*--cp = '0';
}
prec = cend - cp;
if (fw > prec && !lj) {
pr_tail:
if (! lj) {
while (fw > prec) {
bchunk(fill, 1);
bchunk_one(fill);
fw--;
}
}
bchunk(cp, (int) prec);
if (fw > prec) {
while (fw > prec) {
bchunk(fill, 1);
fw--;
}
while (fw > prec) {
bchunk_one(fill);
fw--;
}
s0 = s1;
break;
case 'g':
parse_next_arg();
tmpval = force_number(arg);
free_temp(arg);
chksize(fw + prec + 9); /* 9==slop */
cp = cpbuf;
*cp++ = '%';
if (lj)
*cp++ = '-';
if (fill != sp)
*cp++ = '0';
#ifndef GFMT_WORKAROUND
if (cur != &fw) {
(void) strcpy(cp, "*.*g");
(void) sprintf(obuf + olen, cpbuf, (int) fw, (int) prec, (double) tmpval);
} else {
(void) strcpy(cp, "*g");
(void) sprintf(obuf + olen, cpbuf, (int) fw, (double) tmpval);
}
#else /* GFMT_WORKAROUND */
{
char *gptr, gbuf[120];
#define DEFAULT_G_PRECISION 6
if (fw + prec + 9 > sizeof gbuf) { /* 9==slop */
emalloc(gptr, char *, fw+prec+9, "do_sprintf(gfmt)");
} else
gptr = gbuf;
(void) gfmt((double) tmpval, cur != &fw ?
(int) prec : DEFAULT_G_PRECISION, gptr);
*cp++ = '*', *cp++ = 's', *cp = '\0';
(void) sprintf(obuf + olen, cpbuf, (int) fw, gptr);
if (fill != sp && *gptr == ' ') {
char *p = gptr;
do { *p++ = '0'; } while (*p == ' ');
}
if (gptr != gbuf) free(gptr);
}
#endif /* GFMT_WORKAROUND */
len = strlen(obuf + olen);
ofre -= len;
olen += len;
s0 = s1;
break;
case 'f':
parse_next_arg();
tmpval = force_number(arg);
free_temp(arg);
chksize(fw + prec + 9); /* 9==slop */
cp = cpbuf;
*cp++ = '%';
if (lj)
*cp++ = '-';
if (fill != sp)
*cp++ = '0';
if (cur != &fw) {
(void) strcpy(cp, "*.*f");
(void) sprintf(obuf + olen, cpbuf, (int) fw, (int) prec, (double) tmpval);
} else {
(void) strcpy(cp, "*f");
(void) sprintf(obuf + olen, cpbuf, (int) fw, (double) tmpval);
}
len = strlen(obuf + olen);
ofre -= len;
olen += len;
s0 = s1;
break;
case 'e':
case 'f':
case 'g':
case 'E':
case 'G':
parse_next_arg();
tmpval = force_number(arg);
free_temp(arg);
chksize(fw + prec + 9); /* 9==slop */
cp = cpbuf;
*cp++ = '%';
if (lj)
*cp++ = '-';
if (signchar)
*cp++ = signchar;
if (alt)
*cp++ = '#';
if (fill != sp)
*cp++ = '0';
if (cur != &fw) {
(void) strcpy(cp, "*.*e");
(void) sprintf(obuf + olen, cpbuf, (int) fw, (int) prec, (double) tmpval);
} else {
(void) strcpy(cp, "*e");
(void) sprintf(obuf + olen, cpbuf, (int) fw, (double) tmpval);
}
len = strlen(obuf + olen);
cp = strcpy(cp, "*.*") + 3;
*cp++ = cs1;
*cp = '\0';
if (prec <= 0)
prec = DEFAULT_G_PRECISION;
#ifndef GFMT_WORKAROUND
(void) sprintf(obufout, cpbuf,
(int) fw, (int) prec, (double) tmpval);
#else /* GFMT_WORKAROUND */
if (cs1 == 'g' || cs1 == 'G')
(void) sgfmt(obufout, cpbuf, (int) alt,
(int) fw, (int) prec, (double) tmpval);
else
(void) sprintf(obufout, cpbuf,
(int) fw, (int) prec, (double) tmpval);
#endif /* GFMT_WORKAROUND */
len = strlen(obufout);
ofre -= len;
olen += len;
obufout += len;
s0 = s1;
break;
default:
lose:
break;
}
if (toofew)
@ -610,7 +562,7 @@ retry:
warning("too many arguments supplied for format string");
bchunk(s0, s1 - s0);
free_temp(sfmt);
r = make_str_node(obuf, olen, ALREADY_MALLOCED);
r = make_str_node(obuf, obufout - obuf, ALREADY_MALLOCED);
r->flags |= TEMP;
return r;
}
@ -799,7 +751,8 @@ register NODE *tree;
else {
char buf[100];
sprintf(buf, OFMT, t1->numbr);
NUMTOSTR(buf, OFMT, t1->numbr);
free_temp(t1);
t1 = tmp_string(buf, strlen(buf));
}
}
@ -1128,41 +1081,75 @@ NODE *tree;
}
#ifdef GFMT_WORKAROUND
/*
* printf's %g format [can't rely on gcvt()]
* caveat: don't use as argument to *printf()!
*/
char *
gfmt(g, prec, buf)
double g; /* value to format */
int prec; /* indicates desired significant digits, not decimal places */
/*
* printf's %g format [can't rely on gcvt()]
* caveat: don't use as argument to *printf()!
* 'format' string HAS to be of "<flags>*.*g" kind, or we bomb!
*/
void
sgfmt(buf, format, alt, fwidth, prec, g)
char *buf; /* return buffer; assumed big enough to hold result */
const char *format;
int alt; /* use alternate form flag */
int fwidth; /* field width in a format */
int prec; /* indicates desired significant digits, not decimal places */
double g; /* value to format */
{
if (g == 0.0) {
(void) strcpy(buf, "0"); /* easy special case */
} else {
register char *d, *e, *p;
char dform[40];
register char *gpos;
register char *d, *e, *p;
int again = 0;
/* start with 'e' format (it'll provide nice exponent) */
if (prec < 1) prec = 1; /* at least 1 significant digit */
(void) sprintf(buf, "%.*e", prec - 1, g);
if ((e = strchr(buf, 'e')) != 0) { /* find exponent */
int exp = atoi(e+1); /* fetch exponent */
if (exp >= -4 && exp < prec) { /* per K&R2, B1.2 */
/* switch to 'f' format and re-do */
prec -= (exp + 1); /* decimal precision */
(void) sprintf(buf, "%.*f", prec, g);
e = buf + strlen(buf);
}
if ((d = strchr(buf, '.')) != 0) {
/* remove trailing zeroes and decimal point */
for (p = e; p > d && *--p == '0'; ) continue;
if (*p == '.') --p;
if (++p < e) /* copy exponent and NUL */
while ((*p++ = *e++) != '\0') continue;
}
}
strncpy(dform, format, sizeof dform - 1);
dform[sizeof dform - 1] = '\0';
gpos = strrchr(dform, '.');
if (g == 0.0 && alt == 0) { /* easy special case */
*gpos++ = 'd';
*gpos = '\0';
(void) sprintf(buf, dform, fwidth, 0);
return;
}
gpos += 2; /* advance to location of 'g' in the format */
if (prec <= 0) /* negative precision is ignored */
prec = (prec < 0 ? DEFAULT_G_PRECISION : 1);
if (*gpos == 'G')
again = 1;
/* start with 'e' format (it'll provide nice exponent) */
*gpos = 'e';
prec -= 1;
(void) sprintf(buf, dform, fwidth, prec, g);
if ((e = strrchr(buf, 'e')) != NULL) { /* find exponent */
int exp = atoi(e+1); /* fetch exponent */
if (exp >= -4 && exp <= prec) { /* per K&R2, B1.2 */
/* switch to 'f' format and re-do */
*gpos = 'f';
prec -= exp; /* decimal precision */
(void) sprintf(buf, dform, fwidth, prec, g);
e = buf + strlen(buf);
while (*--e == ' ')
continue;
e += 1;
}
else if (again != 0)
*gpos = 'E';
/* if 'alt' in force, then trailing zeros are not removed */
if (alt == 0 && (d = strrchr(buf, '.')) != NULL) {
/* throw away an excess of precision */
for (p = e; p > d && *--p == '0'; )
prec -= 1;
if (d == p)
prec -= 1;
if (prec < 0)
prec = 0;
/* and do that once again */
again = 1;
}
if (again != 0)
(void) sprintf(buf, dform, fwidth, prec, g);
}
return buf;
}
#endif /* GFMT_WORKAROUND */

File diff suppressed because it is too large Load Diff

View File

@ -1,333 +1,133 @@
/* dfa.h - declarations for GNU deterministic regexp compiler
Copyright (C) 1988 Free Software Foundation, Inc.
Written June, 1988 by Mike Haertel
NO WARRANTY
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
BECAUSE THIS PROGRAM IS LICENSED FREE OF CHARGE, WE PROVIDE ABSOLUTELY
NO WARRANTY, TO THE EXTENT PERMITTED BY APPLICABLE STATE LAW. EXCEPT
WHEN OTHERWISE STATED IN WRITING, FREE SOFTWARE FOUNDATION, INC,
RICHARD M. STALLMAN AND/OR OTHER PARTIES PROVIDE THIS PROGRAM "AS IS"
WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING,
BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY
AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE
DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR
CORRECTION.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW WILL RICHARD M.
STALLMAN, THE FREE SOFTWARE FOUNDATION, INC., AND/OR ANY OTHER PARTY
WHO MAY MODIFY AND REDISTRIBUTE THIS PROGRAM AS PERMITTED BELOW, BE
LIABLE TO YOU FOR DAMAGES, INCLUDING ANY LOST PROFITS, LOST MONIES, OR
OTHER SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
USE OR INABILITY TO USE (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR
DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY THIRD PARTIES OR
A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS) THIS
PROGRAM, EVEN IF YOU HAVE BEEN ADVISED OF THE POSSIBILITY OF SUCH
DAMAGES, OR FOR ANY CLAIM BY ANY OTHER PARTY.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
GENERAL PUBLIC LICENSE TO COPY
1. You may copy and distribute verbatim copies of this source file
as you receive it, in any medium, provided that you conspicuously and
appropriately publish on each copy a valid copyright notice "Copyright
(C) 1988 Free Software Foundation, Inc."; and include following the
copyright notice a verbatim copy of the above disclaimer of warranty
and of this License. You may charge a distribution fee for the
physical act of transferring a copy.
2. You may modify your copy or copies of this source file or
any portion of it, and copy and distribute such modifications under
the terms of Paragraph 1 above, provided that you also do the following:
a) cause the modified files to carry prominent notices stating
that you changed the files and the date of any change; and
b) cause the whole of any work that you distribute or publish,
that in whole or in part contains or is a derivative of this
program or any part thereof, to be licensed at no charge to all
third parties on terms identical to those contained in this
License Agreement (except that you may choose to grant more extensive
warranty protection to some or all third parties, at your option).
c) You may charge a distribution fee for the physical act of
transferring a copy, and you may at your option offer warranty
protection in exchange for a fee.
Mere aggregation of another unrelated program with this program (or its
derivative) on a volume of a storage or distribution medium does not bring
the other program under the scope of these terms.
3. You may copy and distribute this program or any portion of it in
compiled, executable or object code form under the terms of Paragraphs
1 and 2 above provided that you do the following:
a) accompany it with the complete corresponding machine-readable
source code, which must be distributed under the terms of
Paragraphs 1 and 2 above; or,
b) accompany it with a written offer, valid for at least three
years, to give any third party free (except for a nominal
shipping charge) a complete machine-readable copy of the
corresponding source code, to be distributed under the terms of
Paragraphs 1 and 2 above; or,
c) accompany it with the information you received as to where the
corresponding source code may be obtained. (This alternative is
allowed only for noncommercial distribution and only if you
received the program in object code or executable form alone.)
For an executable file, complete source code means all the source code for
all modules it contains; but, as a special exception, it need not include
source code for modules which are standard libraries that accompany the
operating system on which the executable file runs.
4. You may not copy, sublicense, distribute or transfer this program
except as expressly provided under this License Agreement. Any attempt
otherwise to copy, sublicense, distribute or transfer this program is void and
your rights to use the program under this License agreement shall be
automatically terminated. However, parties who have received computer
software programs from you with this License Agreement will not have
their licenses terminated so long as such parties remain in full compliance.
5. If you wish to incorporate parts of this program into other free
programs whose distribution conditions are different, write to the Free
Software Foundation at 675 Mass Ave, Cambridge, MA 02139. We have not yet
worked out a simple rule that can be stated here, but we will often permit
this. We will be guided by the two goals of preserving the free status of
all derivatives our free software and of promoting the sharing and reuse of
software.
In other words, you are welcome to use, share and improve this program.
You are forbidden to forbid anyone else to use, share and improve
what you give them. Help stamp out software-hoarding!
$Id: dfa.h,v 1.3 1993/11/13 02:26:36 jtc Exp $
$Id: dfa.h,v 1.4 1994/02/17 01:22:09 jtc Exp $
*/
#ifdef __STDC__
#ifdef SOMEDAY
#define ISALNUM(c) isalnum(c)
#define ISALPHA(c) isalpha(c)
#define ISUPPER(c) isupper(c)
#else
#define ISALNUM(c) (isascii(c) && isalnum(c))
#define ISALPHA(c) (isascii(c) && isalpha(c))
#define ISUPPER(c) (isascii(c) && isupper(c))
#endif
/* Written June, 1988 by Mike Haertel */
#else /* ! __STDC__ */
#define const
#define ISALNUM(c) (isascii(c) && isalnum(c))
#define ISALPHA(c) (isascii(c) && isalpha(c))
#define ISUPPER(c) (isascii(c) && isupper(c))
#endif /* ! __STDC__ */
/* 1 means plain parentheses serve as grouping, and backslash
parentheses are needed for literal searching.
0 means backslash-parentheses are grouping, and plain parentheses
are for literal searching. */
#ifndef RE_NO_BK_PARENS
#define RE_NO_BK_PARENS 1L
#endif
/* 1 means plain | serves as the "or"-operator, and \| is a literal.
0 means \| serves as the "or"-operator, and | is a literal. */
#ifndef RE_NO_BK_VBAR
#define RE_NO_BK_VBAR (1L << 1)
#endif
/* 0 means plain + or ? serves as an operator, and \+, \? are literals.
1 means \+, \? are operators and plain +, ? are literals. */
#ifndef RE_BK_PLUS_QM
#define RE_BK_PLUS_QM (1L << 2)
#endif
/* 1 means | binds tighter than ^ or $.
0 means the contrary. */
#ifndef RE_TIGHT_VBAR
#define RE_TIGHT_VBAR (1L << 3)
#endif
/* 1 means treat \n as an _OR operator
0 means treat it as a normal character */
#ifndef RE_NEWLINE_OR
#define RE_NEWLINE_OR (1L << 4)
#endif
/* 0 means that a special characters (such as *, ^, and $) always have
their special meaning regardless of the surrounding context.
1 means that special characters may act as normal characters in some
contexts. Specifically, this applies to:
^ - only special at the beginning, or after ( or |
$ - only special at the end, or before ) or |
*, +, ? - only special when not after the beginning, (, or | */
#ifndef RE_CONTEXT_INDEP_OPS
#define RE_CONTEXT_INDEP_OPS (1L << 5)
#endif
/* 1 means that \ in a character class escapes the next character (typically
a hyphen. It also is overloaded to mean that hyphen at the end of the range
is allowable and means that the hyphen is to be taken literally. */
#define RE_AWK_CLASS_HACK (1L << 6)
/* Now define combinations of bits for the standard possibilities. */
#ifdef notdef
#define RE_SYNTAX_AWK (RE_NO_BK_PARENS | RE_NO_BK_VBAR | RE_CONTEXT_INDEP_OPS)
#define RE_SYNTAX_EGREP (RE_SYNTAX_AWK | RE_NEWLINE_OR)
#define RE_SYNTAX_GREP (RE_BK_PLUS_QM | RE_NEWLINE_OR)
#define RE_SYNTAX_EMACS 0
#endif
/* The NULL pointer. */
#ifndef NULL
#define NULL 0
#endif
/* FIXME:
2. We should not export so much of the DFA internals.
In addition to clobbering modularity, we eat up valuable
name space. */
/* Number of bits in an unsigned char. */
#ifndef CHARBITS
#define CHARBITS 8
#endif
/* First integer value that is greater than any character code. */
#define _NOTCHAR (1 << CHARBITS)
#define NOTCHAR (1 << CHARBITS)
/* INTBITS need not be exact, just a lower bound. */
#ifndef INTBITS
#define INTBITS (CHARBITS * sizeof (int))
#endif
/* Number of ints required to hold a bit for every character. */
#define _CHARSET_INTS ((_NOTCHAR + INTBITS - 1) / INTBITS)
#define CHARCLASS_INTS ((NOTCHAR + INTBITS - 1) / INTBITS)
/* Sets of unsigned characters are stored as bit vectors in arrays of ints. */
typedef int _charset[_CHARSET_INTS];
typedef int charclass[CHARCLASS_INTS];
/* The regexp is parsed into an array of tokens in postfix form. Some tokens
are operators and others are terminal symbols. Most (but not all) of these
codes are returned by the lexical analyzer. */
#ifdef __STDC__
typedef enum
{
_END = -1, /* _END is a terminal symbol that matches the
end of input; any value of _END or less in
END = -1, /* END is a terminal symbol that matches the
end of input; any value of END or less in
the parse tree is such a symbol. Accepting
states of the DFA are those that would have
a transition on _END. */
a transition on END. */
/* Ordinary character values are terminal symbols that match themselves. */
_EMPTY = _NOTCHAR, /* _EMPTY is a terminal symbol that matches
EMPTY = NOTCHAR, /* EMPTY is a terminal symbol that matches
the empty string. */
_BACKREF, /* _BACKREF is generated by \<digit>; it
BACKREF, /* BACKREF is generated by \<digit>; it
it not completely handled. If the scanner
detects a transition on backref, it returns
a kind of "semi-success" indicating that
the match will have to be verified with
a backtracking matcher. */
_BEGLINE, /* _BEGLINE is a terminal symbol that matches
BEGLINE, /* BEGLINE is a terminal symbol that matches
the empty string if it is at the beginning
of a line. */
_ALLBEGLINE, /* _ALLBEGLINE is a terminal symbol that
matches the empty string if it is at the
beginning of a line; _ALLBEGLINE applies
to the entire regexp and can only occur
as the first token thereof. _ALLBEGLINE
never appears in the parse tree; a _BEGLINE
is prepended with _CAT to the entire
regexp instead. */
_ENDLINE, /* _ENDLINE is a terminal symbol that matches
ENDLINE, /* ENDLINE is a terminal symbol that matches
the empty string if it is at the end of
a line. */
_ALLENDLINE, /* _ALLENDLINE is to _ENDLINE as _ALLBEGLINE
is to _BEGLINE. */
_BEGWORD, /* _BEGWORD is a terminal symbol that matches
BEGWORD, /* BEGWORD is a terminal symbol that matches
the empty string if it is at the beginning
of a word. */
_ENDWORD, /* _ENDWORD is a terminal symbol that matches
ENDWORD, /* ENDWORD is a terminal symbol that matches
the empty string if it is at the end of
a word. */
_LIMWORD, /* _LIMWORD is a terminal symbol that matches
LIMWORD, /* LIMWORD is a terminal symbol that matches
the empty string if it is at the beginning
or the end of a word. */
_NOTLIMWORD, /* _NOTLIMWORD is a terminal symbol that
NOTLIMWORD, /* NOTLIMWORD is a terminal symbol that
matches the empty string if it is not at
the beginning or end of a word. */
_QMARK, /* _QMARK is an operator of one argument that
QMARK, /* QMARK is an operator of one argument that
matches zero or one occurences of its
argument. */
_STAR, /* _STAR is an operator of one argument that
STAR, /* STAR is an operator of one argument that
matches the Kleene closure (zero or more
occurrences) of its argument. */
_PLUS, /* _PLUS is an operator of one argument that
PLUS, /* PLUS is an operator of one argument that
matches the positive closure (one or more
occurrences) of its argument. */
_CAT, /* _CAT is an operator of two arguments that
REPMN, /* REPMN is a lexical token corresponding
to the {m,n} construct. REPMN never
appears in the compiled token vector. */
CAT, /* CAT is an operator of two arguments that
matches the concatenation of its
arguments. _CAT is never returned by the
arguments. CAT is never returned by the
lexical analyzer. */
_OR, /* _OR is an operator of two arguments that
OR, /* OR is an operator of two arguments that
matches either of its arguments. */
_LPAREN, /* _LPAREN never appears in the parse tree,
ORTOP, /* OR at the toplevel in the parse tree.
This is used for a boyer-moore heuristic. */
LPAREN, /* LPAREN never appears in the parse tree,
it is only a lexeme. */
_RPAREN, /* _RPAREN never appears in the parse tree. */
RPAREN, /* RPAREN never appears in the parse tree. */
_SET /* _SET and (and any value greater) is a
CSET /* CSET and (and any value greater) is a
terminal symbol that matches any of a
class of characters. */
} _token;
} token;
#else /* ! __STDC__ */
typedef short _token;
#define _END -1
#define _EMPTY _NOTCHAR
#define _BACKREF (_EMPTY + 1)
#define _BEGLINE (_EMPTY + 2)
#define _ALLBEGLINE (_EMPTY + 3)
#define _ENDLINE (_EMPTY + 4)
#define _ALLENDLINE (_EMPTY + 5)
#define _BEGWORD (_EMPTY + 6)
#define _ENDWORD (_EMPTY + 7)
#define _LIMWORD (_EMPTY + 8)
#define _NOTLIMWORD (_EMPTY + 9)
#define _QMARK (_EMPTY + 10)
#define _STAR (_EMPTY + 11)
#define _PLUS (_EMPTY + 12)
#define _CAT (_EMPTY + 13)
#define _OR (_EMPTY + 14)
#define _LPAREN (_EMPTY + 15)
#define _RPAREN (_EMPTY + 16)
#define _SET (_EMPTY + 17)
#endif /* ! __STDC__ */
/* Sets are stored in an array in the compiled regexp; the index of the
array corresponding to a given set token is given by _SET_INDEX(t). */
#define _SET_INDEX(t) ((t) - _SET)
/* Sets are stored in an array in the compiled dfa; the index of the
array corresponding to a given set token is given by SET_INDEX(t). */
#define SET_INDEX(t) ((t) - CSET)
/* Sometimes characters can only be matched depending on the surrounding
context. Such context decisions depend on what the previous character
@ -347,36 +147,36 @@ typedef short _token;
Word-constituent characters are those that satisfy isalnum().
The macro _SUCCEEDS_IN_CONTEXT determines whether a a given constraint
The macro SUCCEEDS_IN_CONTEXT determines whether a a given constraint
succeeds in a particular context. Prevn is true if the previous character
was a newline, currn is true if the lookahead character is a newline.
Prevl and currl similarly depend upon whether the previous and current
characters are word-constituent letters. */
#define _MATCHES_NEWLINE_CONTEXT(constraint, prevn, currn) \
((constraint) & (1 << (((prevn) ? 2 : 0) + ((currn) ? 1 : 0) + 4)))
#define _MATCHES_LETTER_CONTEXT(constraint, prevl, currl) \
((constraint) & (1 << (((prevl) ? 2 : 0) + ((currl) ? 1 : 0))))
#define _SUCCEEDS_IN_CONTEXT(constraint, prevn, currn, prevl, currl) \
(_MATCHES_NEWLINE_CONTEXT(constraint, prevn, currn) \
&& _MATCHES_LETTER_CONTEXT(constraint, prevl, currl))
#define MATCHES_NEWLINE_CONTEXT(constraint, prevn, currn) \
((constraint) & 1 << (((prevn) ? 2 : 0) + ((currn) ? 1 : 0) + 4))
#define MATCHES_LETTER_CONTEXT(constraint, prevl, currl) \
((constraint) & 1 << (((prevl) ? 2 : 0) + ((currl) ? 1 : 0)))
#define SUCCEEDS_IN_CONTEXT(constraint, prevn, currn, prevl, currl) \
(MATCHES_NEWLINE_CONTEXT(constraint, prevn, currn) \
&& MATCHES_LETTER_CONTEXT(constraint, prevl, currl))
/* The following macros give information about what a constraint depends on. */
#define _PREV_NEWLINE_DEPENDENT(constraint) \
#define PREV_NEWLINE_DEPENDENT(constraint) \
(((constraint) & 0xc0) >> 2 != ((constraint) & 0x30))
#define _PREV_LETTER_DEPENDENT(constraint) \
#define PREV_LETTER_DEPENDENT(constraint) \
(((constraint) & 0x0c) >> 2 != ((constraint) & 0x03))
/* Tokens that match the empty string subject to some constraint actually
work by applying that constraint to determine what may follow them,
taking into account what has gone before. The following values are
the constraints corresponding to the special tokens previously defined. */
#define _NO_CONSTRAINT 0xff
#define _BEGLINE_CONSTRAINT 0xcf
#define _ENDLINE_CONSTRAINT 0xaf
#define _BEGWORD_CONSTRAINT 0xf2
#define _ENDWORD_CONSTRAINT 0xf4
#define _LIMWORD_CONSTRAINT 0xf6
#define _NOTLIMWORD_CONSTRAINT 0xf9
#define NO_CONSTRAINT 0xff
#define BEGLINE_CONSTRAINT 0xcf
#define ENDLINE_CONSTRAINT 0xaf
#define BEGWORD_CONSTRAINT 0xf2
#define ENDWORD_CONSTRAINT 0xf4
#define LIMWORD_CONSTRAINT 0xf6
#define NOTLIMWORD_CONSTRAINT 0xf9
/* States of the recognizer correspond to sets of positions in the parse
tree, together with the constraints under which they may be matched.
@ -386,44 +186,48 @@ typedef struct
{
unsigned index; /* Index into the parse array. */
unsigned constraint; /* Constraint for matching this position. */
} _position;
} position;
/* Sets of positions are stored as arrays. */
typedef struct
{
_position *elems; /* Elements of this position set. */
position *elems; /* Elements of this position set. */
int nelem; /* Number of elements in this set. */
} _position_set;
} position_set;
/* A state of the regexp consists of a set of positions, some flags,
/* A state of the dfa consists of a set of positions, some flags,
and the token value of the lowest-numbered position of the state that
contains an _END token. */
contains an END token. */
typedef struct
{
int hash; /* Hash of the positions of this state. */
_position_set elems; /* Positions this state could match. */
position_set elems; /* Positions this state could match. */
char newline; /* True if previous state matched newline. */
char letter; /* True if previous state matched a letter. */
char backref; /* True if this state matches a \<digit>. */
unsigned char constraint; /* Constraint for this state to accept. */
int first_end; /* Token value of the first _END in elems. */
} _dfa_state;
int first_end; /* Token value of the first END in elems. */
} dfa_state;
/* If an r.e. is at most MUST_MAX characters long, we look for a string which
must appear in it; whatever's found is dropped into the struct reg. */
#define MUST_MAX 50
/* Element of a list of strings, at least one of which is known to
appear in any R.E. matching the DFA. */
struct dfamust
{
int exact;
char *must;
struct dfamust *next;
};
/* A compiled regular expression. */
struct regexp
struct dfa
{
/* Stuff built by the scanner. */
_charset *charsets; /* Array of character sets for _SET tokens. */
int cindex; /* Index for adding new charsets. */
int calloc; /* Number of charsets currently allocated. */
charclass *charclasses; /* Array of character sets for CSET tokens. */
int cindex; /* Index for adding new charclasses. */
int calloc; /* Number of charclasses currently allocated. */
/* Stuff built by the parser. */
_token *tokens; /* Postfix parse array. */
token *tokens; /* Postfix parse array. */
int tindex; /* Index for adding new tokens. */
int talloc; /* Number of tokens currently allocated. */
int depth; /* Depth required of an evaluation stack
@ -431,15 +235,15 @@ struct regexp
parse tree. */
int nleaves; /* Number of leaves on the parse tree. */
int nregexps; /* Count of parallel regexps being built
with regparse(). */
with dfaparse(). */
/* Stuff owned by the state builder. */
_dfa_state *states; /* States of the regexp. */
dfa_state *states; /* States of the dfa. */
int sindex; /* Index for adding new states. */
int salloc; /* Number of states currently allocated. */
/* Stuff built by the structure analyzer. */
_position_set *follows; /* Array of follow sets, indexed by position
position_set *follows; /* Array of follow sets, indexed by position
index. The follow of a position is the set
of positions containing characters that
could conceivably follow a character
@ -469,7 +273,7 @@ struct regexp
int **fails; /* Transition tables after failing to accept
on a state that potentially could do so. */
int *success; /* Table of acceptance conditions used in
regexecute and computed in build_state. */
dfaexec and computed in build_state. */
int *newlines; /* Transitions on newlines. The entry for a
newline in any transition table is always
-1 so we can count lines without wasting
@ -477,40 +281,41 @@ struct regexp
newline is stored separately and handled
as a special case. Newline is also used
as a sentinel at the end of the buffer. */
char must[MUST_MAX];
int mustn;
struct dfamust *musts; /* List of strings, at least one of which
is known to appear in any r.e. matching
the dfa. */
};
/* Some macros for user access to regexp internals. */
/* Some macros for user access to dfa internals. */
/* ACCEPTING returns true if s could possibly be an accepting state of r. */
#define ACCEPTING(s, r) ((r).states[s].constraint)
/* ACCEPTS_IN_CONTEXT returns true if the given state accepts in the
specified context. */
#define ACCEPTS_IN_CONTEXT(prevn, currn, prevl, currl, state, reg) \
_SUCCEEDS_IN_CONTEXT((reg).states[state].constraint, \
#define ACCEPTS_IN_CONTEXT(prevn, currn, prevl, currl, state, dfa) \
SUCCEEDS_IN_CONTEXT((dfa).states[state].constraint, \
prevn, currn, prevl, currl)
/* FIRST_MATCHING_REGEXP returns the index number of the first of parallel
regexps that a given state could accept. Parallel regexps are numbered
starting at 1. */
#define FIRST_MATCHING_REGEXP(state, reg) (-(reg).states[state].first_end)
#define FIRST_MATCHING_REGEXP(state, dfa) (-(dfa).states[state].first_end)
/* Entry points. */
#ifdef __STDC__
/* Regsyntax() takes two arguments; the first sets the syntax bits described
/* dfasyntax() takes two arguments; the first sets the syntax bits described
earlier in this file, and the second sets the case-folding flag. */
extern void regsyntax(long, int);
extern void dfasyntax(reg_syntax_t, int);
/* Compile the given string of the given length into the given struct regexp.
/* Compile the given string of the given length into the given struct dfa.
Final argument is a flag specifying whether to build a searching or an
exact matcher. */
extern void regcompile(const char *, size_t, struct regexp *, int);
extern void dfacomp(char *, size_t, struct dfa *, int);
/* Execute the given struct regexp on the buffer of characters. The
/* Execute the given struct dfa on the buffer of characters. The
first char * points to the beginning, and the second points to the
first character after the end of the buffer, which must be a writable
place so a sentinel end-of-buffer marker can be stored there. The
@ -522,37 +327,37 @@ extern void regcompile(const char *, size_t, struct regexp *, int);
order to verify backreferencing; otherwise the flag will be cleared.
Returns NULL if no match is found, or a pointer to the first
character after the first & shortest matching string in the buffer. */
extern char *regexecute(struct regexp *, char *, char *, int, int *, int *);
extern char *dfaexec(struct dfa *, char *, char *, int, int *, int *);
/* Free the storage held by the components of a struct regexp. */
extern void reg_free(struct regexp *);
/* Free the storage held by the components of a struct dfa. */
extern void dfafree(struct dfa *);
/* Entry points for people who know what they're doing. */
/* Initialize the components of a struct regexp. */
extern void reginit(struct regexp *);
/* Initialize the components of a struct dfa. */
extern void dfainit(struct dfa *);
/* Incrementally parse a string of given length into a struct regexp. */
extern void regparse(const char *, size_t, struct regexp *);
/* Incrementally parse a string of given length into a struct dfa. */
extern void dfaparse(char *, size_t, struct dfa *);
/* Analyze a parsed regexp; second argument tells whether to build a searching
or an exact matcher. */
extern void reganalyze(struct regexp *, int);
extern void dfaanalyze(struct dfa *, int);
/* Compute, for each possible character, the transitions out of a given
state, storing them in an array of integers. */
extern void regstate(int, struct regexp *, int []);
extern void dfastate(int, struct dfa *, int []);
/* Error handling. */
/* Regerror() is called by the regexp routines whenever an error occurs. It
/* dfaerror() is called by the regexp routines whenever an error occurs. It
takes a single argument, a NUL-terminated string describing the error.
The default reg_error() prints the error message to stderr and exits.
The user can provide a different reg_free() if so desired. */
extern void reg_error(const char *);
The default dfaerror() prints the error message to stderr and exits.
The user can provide a different dfafree() if so desired. */
extern void dfaerror(const char *);
#else /* ! __STDC__ */
extern void regsyntax(), regcompile(), reg_free(), reginit(), regparse();
extern void reganalyze(), regstate(), reg_error();
extern char *regexecute();
#endif
extern void dfasyntax(), dfacomp(), dfafree(), dfainit(), dfaparse();
extern void dfaanalyze(), dfastate(), dfaerror();
extern char *dfaexec();
#endif /* ! __STDC__ */

View File

@ -3,7 +3,7 @@
*/
/*
* Copyright (C) 1986, 1988, 1989, 1991, 1992 the Free Software Foundation, Inc.
* Copyright (C) 1986, 1988, 1989, 1991, 1992, 1993 the Free Software Foundation, Inc.
*
* This file is part of GAWK, the GNU implementation of the
* AWK Progamming Language.
@ -24,8 +24,8 @@
*/
#ifndef lint
static char rcsid[] = "$Id: eval.c,v 1.3 1993/11/13 02:26:39 jtc Exp $";
#endif /* not lint */
static char rcsid[] = "$Id: eval.c,v 1.4 1994/02/17 01:22:11 jtc Exp $";
#endif
#include "awk.h"
@ -322,7 +322,10 @@ register NODE *volatile tree;
break;
case Node_K_delete:
do_delete(tree->lnode, tree->rnode);
if (tree->rnode != NULL)
do_delete(tree->lnode, tree->rnode);
else
assoc_clear(tree->lnode);
break;
case Node_K_next:
@ -971,18 +974,20 @@ NODE *arg_list; /* Node_expression_list of calling args. */
/* should we free arg->var_value ? */
arg->var_array = n->var_array;
arg->type = Node_var_array;
arg->array_size = n->array_size;
arg->table_size = n->table_size;
}
unref(n->lnode);
/* n->lnode overlays the array size, don't unref it if array */
if (n->type != Node_var_array)
unref(n->lnode);
freenode(n);
count--;
}
while (count-- > 0) {
n = *sp++;
/* if n is an (local) array, all the elements should be freed */
if (n->type == Node_var_array) {
if (n->type == Node_var_array)
assoc_clear(n);
free(n->var_array);
}
unref(n->lnode);
freenode(n);
}

View File

@ -3,7 +3,7 @@
*/
/*
* Copyright (C) 1986, 1988, 1989, 1991, 1992 the Free Software Foundation, Inc.
* Copyright (C) 1986, 1988, 1989, 1991, 1992, 1993 the Free Software Foundation, Inc.
*
* This file is part of GAWK, the GNU implementation of the
* AWK Progamming Language.
@ -24,22 +24,24 @@
*/
#ifndef lint
static char rcsid[] = "$Id: field.c,v 1.3 1993/11/13 02:26:43 jtc Exp $";
#endif /* not lint */
static char rcsid[] = "$Id: field.c,v 1.4 1994/02/17 01:22:13 jtc Exp $";
#endif
#include "awk.h"
typedef void (* Setfunc) P((int, char*, int, NODE *));
static int (*parse_field) P((int, char **, int, NODE *,
Regexp *, void (*)(), NODE *));
Regexp *, Setfunc, NODE *));
static void rebuild_record P((void));
static int re_parse_field P((int, char **, int, NODE *,
Regexp *, void (*)(), NODE *));
Regexp *, Setfunc, NODE *));
static int def_parse_field P((int, char **, int, NODE *,
Regexp *, void (*)(), NODE *));
Regexp *, Setfunc, NODE *));
static int sc_parse_field P((int, char **, int, NODE *,
Regexp *, void (*)(), NODE *));
Regexp *, Setfunc, NODE *));
static int fw_parse_field P((int, char **, int, NODE *,
Regexp *, void (*)(), NODE *));
Regexp *, Setfunc, NODE *));
static void set_element P((int, char *, int, NODE *));
static void grow_fields_arr P((int num));
static void set_field P((int num, char *str, int len, NODE *dummy));
@ -230,7 +232,7 @@ char **buf; /* on input: string to parse; on output: point to start next */
int len;
NODE *fs;
Regexp *rp;
void (*set) (); /* routine to set the value of the parsed field */
Setfunc set; /* routine to set the value of the parsed field */
NODE *n;
{
register char *scan = *buf;
@ -248,9 +250,9 @@ NODE *n;
scan++;
field = scan;
while (scan < end
&& research(rp, scan, 0, (int)(end - scan), 1) != -1
&& research(rp, scan, 0, (end - scan), 1) != -1
&& nf < up_to) {
if (REEND(rp, scan) == RESTART(rp, scan)) { /* null match */
if (REEND(rp, scan) == RESTART(rp, scan)) { /* null match */
scan++;
if (scan == end) {
(*set)(++nf, field, (int)(scan - field), n);
@ -286,7 +288,7 @@ char **buf; /* on input: string to parse; on output: point to start next */
int len;
NODE *fs;
Regexp *rp;
void (*set) (); /* routine to set the value of the parsed field */
Setfunc set; /* routine to set the value of the parsed field */
NODE *n;
{
register char *scan = *buf;
@ -340,7 +342,7 @@ char **buf; /* on input: string to parse; on output: point to start next */
int len;
NODE *fs;
Regexp *rp;
void (*set) (); /* routine to set the value of the parsed field */
Setfunc set; /* routine to set the value of the parsed field */
NODE *n;
{
register char *scan = *buf;
@ -393,7 +395,7 @@ char **buf; /* on input: string to parse; on output: point to start next */
int len;
NODE *fs;
Regexp *rp;
void (*set) (); /* routine to set the value of the parsed field */
Setfunc set; /* routine to set the value of the parsed field */
NODE *n;
{
register char *scan = *buf;
@ -518,7 +520,7 @@ NODE *tree;
NODE *fs;
char *s;
int (*parseit)P((int, char **, int, NODE *,
Regexp *, void (*)(), NODE *));
Regexp *, Setfunc, NODE *));
Regexp *rp = NULL;
t1 = tree_eval(tree->lnode);

View File

@ -21,8 +21,8 @@
Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
#ifndef lint
static char rcsid[] = "$Id: getopt.c,v 1.3 1993/11/13 02:26:46 jtc Exp $";
#endif /* not lint */
static char rcsid[] = "$Id: getopt.c,v 1.4 1994/02/17 01:22:16 jtc Exp $";
#endif
#ifdef HAVE_CONFIG_H
#if defined (emacs) || defined (CONFIG_BROKETS)

View File

@ -15,7 +15,7 @@
along with this program; if not, write to the Free Software
Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
$Id: getopt.h,v 1.3 1993/11/13 02:26:50 jtc Exp $
$Id: getopt.h,v 1.4 1994/02/17 01:22:18 jtc Exp $
*/
#ifndef _GETOPT_H
@ -79,7 +79,7 @@ extern int optopt;
struct option
{
#if __STDC__
#ifdef __STDC__
const char *name;
#else
char *name;
@ -97,7 +97,7 @@ struct option
#define required_argument 1
#define optional_argument 2
#if __STDC__
#ifdef __STDC__
#if defined(__GNU_LIBRARY__)
/* Many other libraries have conflicting prototypes for getopt, with
differences in the consts, in stdlib.h. To avoid compilation

View File

@ -17,8 +17,8 @@
Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
#ifndef lint
static char rcsid[] = "$Id: getopt1.c,v 1.3 1993/11/13 02:26:52 jtc Exp $";
#endif /* not lint */
static char rcsid[] = "$Id: getopt1.c,v 1.4 1994/02/17 01:22:19 jtc Exp $";
#endif
#ifdef HAVE_CONFIG_H
#if defined (emacs) || defined (CONFIG_BROKETS)

View File

@ -3,7 +3,7 @@
*/
/*
* Copyright (C) 1986, 1988, 1989, 1991, 1992 the Free Software Foundation, Inc.
* Copyright (C) 1986, 1988, 1989, 1991, 1992, 1993 the Free Software Foundation, Inc.
*
* This file is part of GAWK, the GNU implementation of the
* AWK Progamming Language.
@ -24,8 +24,8 @@
*/
#ifndef lint
static char rcsid[] = "$Id: io.c,v 1.4 1993/11/13 02:26:54 jtc Exp $";
#endif /* not lint */
static char rcsid[] = "$Id: io.c,v 1.5 1994/02/17 01:22:21 jtc Exp $";
#endif
#if !defined(VMS) && !defined(VMS_POSIX) && !defined(_MSC_VER)
#include <sys/param.h>
@ -60,14 +60,14 @@ static int close_redir P((struct redirect *rp));
static int wait_any P((int interesting));
#endif
static IOBUF *gawk_popen P((char *cmd, struct redirect *rp));
static IOBUF *iop_open P((char *file, char *how));
static IOBUF *iop_open P((const char *file, const char *how));
static int gawk_pclose P((struct redirect *rp));
static int do_pathopen P((char *file));
static int str2mode P((char *mode));
static int do_pathopen P((const char *file));
static int str2mode P((const char *mode));
static void spec_setup P((IOBUF *iop, int len, int allocate));
static int specfdopen P((IOBUF *iop, char *name, char *mode));
static int pidopen P((IOBUF *iop, char *name, char *mode));
static int useropen P((IOBUF *iop, char *name, char *mode));
static int specfdopen P((IOBUF *iop, const char *name, const char *mode));
static int pidopen P((IOBUF *iop, const char *name, const char *mode));
static int useropen P((IOBUF *iop, const char *name, const char *mode));
extern FILE *fdopen();
@ -266,6 +266,9 @@ do_input()
if (inrec(iop) == 0)
while (interpret(expression_value) && inrec(iop) == 0)
;
/* recover any space from C based alloca */
(void) alloca(0);
if (exiting)
break;
}
@ -282,10 +285,10 @@ int *errflg;
register char *str;
int tflag = 0;
int outflag = 0;
char *direction = "to";
char *mode;
const char *direction = "to";
const char *mode;
int fd;
char *what = NULL;
const char *what = NULL;
switch (tree->type) {
case Node_redirect_append:
@ -398,9 +401,13 @@ int *errflg;
rp->fp = stdout;
else if (fd == fileno(stderr))
rp->fp = stderr;
else
rp->fp = fdopen(fd, mode);
if (isatty(fd))
else {
rp->fp = fdopen(fd, (char *) mode);
/* don't leak file descriptors */
if (rp->fp == NULL)
close(fd);
}
if (rp->fp != NULL && isatty(fd))
rp->flag |= RED_NOBUF;
}
}
@ -593,7 +600,7 @@ close_io ()
static int
str2mode(mode)
char *mode;
const char *mode;
{
int ret;
@ -609,7 +616,9 @@ char *mode;
case 'a':
ret = O_WRONLY|O_APPEND|O_CREAT;
break;
default:
ret = 0; /* lint */
cant_happen();
}
return ret;
@ -626,10 +635,10 @@ char *mode;
int
devopen(name, mode)
char *name, *mode;
const char *name, *mode;
{
int openfd = INVALID_HANDLE;
char *cp, *ptr;
const char *cp, *ptr;
int flag = 0;
struct stat buf;
extern double strtod();
@ -646,7 +655,7 @@ char *name, *mode;
if (STREQ(name, "-"))
openfd = fileno(stdin);
else if (STREQN(name, "/dev/", 5) && stat(name, &buf) == -1) {
else if (STREQN(name, "/dev/", 5) && stat((char *) name, &buf) == -1) {
cp = name + 5;
if (STREQ(cp, "stdin") && (flag & O_RDONLY) == O_RDONLY)
@ -705,7 +714,7 @@ int allocate;
static int
specfdopen(iop, name, mode)
IOBUF *iop;
char *name, *mode;
const char *name, *mode;
{
int fd;
IOBUF *tp;
@ -728,7 +737,7 @@ char *name, *mode;
* to maximize portability.
*/
#ifndef GETPGRP_NOARG
#if defined(__svr4__) || defined(BSD4_4) || defined(_POSIX_SOURCE) || defined(_POSIX_JOB_CONTROL)
#if defined(__svr4__) || defined(BSD4_4) || defined(_POSIX_SOURCE)
#define GETPGRP_NOARG
#else
#if defined(i860) || defined(_AIX) || defined(hpux) || defined(VMS)
@ -752,7 +761,7 @@ char *name, *mode;
static int
pidopen(iop, name, mode)
IOBUF *iop;
char *name, *mode;
const char *name, *mode;
{
char tbuf[BUFSIZ];
int i;
@ -784,12 +793,12 @@ char *name, *mode;
static int
useropen(iop, name, mode)
IOBUF *iop;
char *name, *mode;
const char *name, *mode;
{
char tbuf[BUFSIZ], *cp;
int i;
#if defined(NGROUPS_MAX) && NGROUPS_MAX > 0
#if defined(atarist)
#if defined(atarist) || defined(__svr4__)
gid_t groupset[NGROUPS_MAX];
#else
int groupset[NGROUPS_MAX];
@ -825,16 +834,16 @@ char *name, *mode;
static IOBUF *
iop_open(name, mode)
char *name, *mode;
const char *name, *mode;
{
int openfd = INVALID_HANDLE;
int flag = 0;
struct stat buf;
IOBUF *iop;
static struct internal {
char *name;
const char *name;
int compare;
int (*fp)();
int (*fp) P((IOBUF*,const char *,const char *));
IOBUF iob;
} table[] = {
{ "/dev/fd/", 8, specfdopen },
@ -855,12 +864,12 @@ char *name, *mode;
if (STREQ(name, "-"))
openfd = fileno(stdin);
else if (STREQN(name, "/dev/", 5) && stat(name, &buf) == -1) {
else if (STREQN(name, "/dev/", 5) && stat((char *) name, &buf) == -1) {
int i;
for (i = 0; i < devcount; i++) {
if (STREQN(name, table[i].name, table[i].compare)) {
IOBUF *iop = & table[i].iob;
iop = & table[i].iob;
if (iop->buf != NULL) {
spec_setup(iop, 0, 0);
@ -1009,7 +1018,7 @@ gawk_pclose(rp)
struct redirect *rp;
{
int rval, aval, fd = rp->iop->fd;
FILE *kludge = fdopen(fd, "r"); /* pclose needs FILE* w/ right fileno */
FILE *kludge = fdopen(fd, (char *) "r"); /* pclose needs FILE* w/ right fileno */
rp->iop->fd = dup(fd); /* kludge to allow close() + pclose() */
rval = iop_close(rp->iop);
@ -1017,7 +1026,7 @@ struct redirect *rp;
aval = pclose(kludge);
return (rval < 0 ? rval : aval);
}
#else /* VMS */
#else /* VMS || OS2 || MSDOS */
static
struct {
@ -1067,7 +1076,7 @@ struct redirect *rp;
free(pipes[cur].command);
return rval;
}
#endif /* VMS */
#endif /* VMS || OS2 || MSDOS */
#endif /* PIPES_SIMULATED */
@ -1092,7 +1101,7 @@ NODE *tree;
rp = redirect(tree->rnode, &redir_error);
if (rp == NULL && redir_error) { /* failed redirect */
if (! do_unix) {
char *s = strerror(redir_error);
s = strerror(redir_error);
unref(ERRNO_node->var_value);
ERRNO_node->var_value =
@ -1107,7 +1116,7 @@ NODE *tree;
errcode = 0;
cnt = get_a_record(&s, iop, *RS, & errcode);
if (! do_unix && errcode != 0) {
char *s = strerror(errcode);
s = strerror(errcode);
unref(ERRNO_node->var_value);
ERRNO_node->var_value = make_string(s, strlen(s));
@ -1153,7 +1162,7 @@ NODE *tree;
int
pathopen (file)
char *file;
const char *file;
{
int fd = do_pathopen(file);
@ -1185,12 +1194,12 @@ char *file;
static int
do_pathopen (file)
char *file;
const char *file;
{
static char *savepath = DEFPATH; /* defined in config.h */
static const char *savepath = DEFPATH; /* defined in config.h */
static int first = 1;
char *awkpath, *cp;
char trypath[BUFSIZ];
const char *awkpath;
char *cp, trypath[BUFSIZ];
int fd;
if (STREQ(file, "-"))

View File

@ -3,7 +3,7 @@
*/
/*
* Copyright (C) 1986, 1988, 1989, 1991, 1992 the Free Software Foundation, Inc.
* Copyright (C) 1986, 1988, 1989, 1991, 1992, 1993 the Free Software Foundation, Inc.
*
* This file is part of GAWK, the GNU implementation of the
* AWK Progamming Language.
@ -24,8 +24,8 @@
*/
#ifndef lint
static char rcsid[] = "$Id: iop.c,v 1.2 1993/08/02 17:29:54 mycroft Exp $";
#endif /* not lint */
static char rcsid[] = "$Id: iop.c,v 1.3 1994/02/17 01:22:22 jtc Exp $";
#endif
#include "awk.h"
@ -66,7 +66,7 @@ int fd;
else if (fstat(fd, &stb) < 0)
return 8*512; /* conservative in case of DECnet access */
else
return 24*512;
return 32*512;
#else
/*
@ -150,17 +150,14 @@ int *errcode;
register char *bp = iop->off;
char *bufend;
char *start = iop->off; /* beginning of record */
int saw_newline;
char rs;
int eat_whitespace;
int saw_newline = 0, eat_whitespace = 0; /* used iff grRS==0 */
if (iop->cnt == EOF) /* previous read hit EOF */
return EOF;
if (grRS == 0) { /* special case: grRS == "" */
rs = '\n';
eat_whitespace = 0;
saw_newline = 0;
} else
rs = (char) grRS;

View File

@ -3,7 +3,7 @@
*/
/*
* Copyright (C) 1986, 1988, 1989, 1991, 1992 the Free Software Foundation, Inc.
* Copyright (C) 1986, 1988, 1989, 1991, 1992, 1993 the Free Software Foundation, Inc.
*
* This file is part of GAWK, the GNU implementation of the
* AWK Progamming Language.
@ -24,7 +24,7 @@
*/
#ifndef lint
static char rcsid[] = "$Id: main.c,v 1.3 1993/11/13 02:26:57 jtc Exp $";
static char rcsid[] = "$Id: main.c,v 1.4 1994/02/17 01:22:23 jtc Exp $";
#endif
#include "getopt.h"
@ -141,7 +141,8 @@ char **argv;
extern int optind;
extern int opterr;
extern char *optarg;
char *optlist = "+F:f:v:W:";
const char *optlist = "+F:f:v:W:m:";
int stopped_early = 0;
#ifdef __EMX__
_response(&argc, &argv);
@ -175,7 +176,6 @@ char **argv;
Nnull_string->flags = (PERM|STR|STRING|NUM|NUMBER);
/* Set up the special variables */
/*
* Note that this must be done BEFORE arg parsing else -F
* breaks horribly
@ -227,6 +227,19 @@ char **argv;
pre_assign(optarg);
break;
case 'm':
/*
* Research awk extension.
* -mf=nnn set # fields, gawk ignores
* -mr=nnn set record length, ditto
*/
if (do_lint)
warning("-m[fr] option irrelevant");
if ((optarg[0] != 'r' && optarg[0] != 'f')
|| optarg[1] != '=')
warning("-m option usage: -m[fn]=nnn");
break;
case 'W': /* gawk specific options */
gawk_option(optarg);
break;
@ -259,6 +272,14 @@ char **argv;
break;
#endif
case 0:
/*
* getopt_long found an option that sets a variable
* instead of returning a letter. Do nothing, just
* cycle around for the next one.
*/
break;
case '?':
default:
/*
@ -275,6 +296,7 @@ char **argv;
if (! do_posix
&& (optopt == 0 || strchr(optlist, optopt) == NULL)) {
optind--;
stopped_early = 1;
goto out;
} else if (optopt)
/* Use 1003.2 required message format */
@ -302,7 +324,7 @@ out:
output_is_tty = 1;
/* No -f or --source options, use next arg */
if (numfiles == -1) {
if (optind > argc - 1) /* no args left */
if (optind > argc - 1 || stopped_early) /* no args left or no program */
usage(1);
srcfiles[++numfiles].stype = CMDLINE;
srcfiles[numfiles].val = argv[optind];
@ -342,16 +364,15 @@ static void
usage(exitval)
int exitval;
{
char *opt1 = " -f progfile [--]";
#if defined(MSDOS) || defined(OS2)
char *opt2 = " [--] \"program\"";
const char *opt1 = " -f progfile [--]";
#if defined(MSDOS) || defined(OS2) || defined(VMS)
const char *opt2 = " [--] \"program\"";
#else
char *opt2 = " [--] 'program'";
const char *opt2 = " [--] 'program'";
#endif
char *regops = " [POSIX or GNU style options]";
const char *regops = " [POSIX or GNU style options]";
version();
fprintf(stderr, "Usage: %s%s%s file ...\n\t%s%s%s file ...\n",
fprintf(stderr, "Usage:\t%s%s%s file ...\n\t%s%s%s file ...\n",
myname, regops, opt1, myname, regops, opt2);
/* GNU long options info. Gack. */
@ -359,12 +380,13 @@ int exitval;
fputs("\t-f progfile\t\t--file=progfile\n", stderr);
fputs("\t-F fs\t\t\t--field-separator=fs\n", stderr);
fputs("\t-v var=val\t\t--assign=var=val\n", stderr);
fputs("\t-m[fr]=val\n", stderr);
fputs("\t-W compat\t\t--compat\n", stderr);
fputs("\t-W copyleft\t\t--copyleft\n", stderr);
fputs("\t-W copyright\t\t--copyright\n", stderr);
fputs("\t-W help\t\t\t--help\n", stderr);
fputs("\t-W lint\t\t\t--lint\n", stderr);
#if 0
#ifdef NOSTALGIA
fputs("\t-W nostalgia\t\t--nostalgia\n", stderr);
#endif
#ifdef DEBUG
@ -399,7 +421,6 @@ GNU General Public License for more details.\n\
along with this program; if not, write to the Free Software\n\
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.\n";
version();
fputs(blurb_part1, stderr);
fputs(blurb_part2, stderr);
fputs(blurb_part3, stderr);
@ -411,7 +432,8 @@ cmdline_fs(str)
char *str;
{
register NODE **tmp;
int len = strlen(str);
/* int len = strlen(str); *//* don't do that - we want to
avoid mismatched types */
tmp = get_lhs(FS_node, (Func_ptr *) 0);
unref(*tmp);
@ -428,7 +450,7 @@ char *str;
if (do_unix && ! do_posix)
str[0] = '\t';
}
*tmp = make_str_node(str, len, SCAN); /* do process escapes */
*tmp = make_str_node(str, strlen(str), SCAN); /* do process escapes */
set_FS();
}
@ -460,9 +482,9 @@ char **argv;
*/
struct varinit {
NODE **spec;
char *name;
const char *name;
NODETYPE type;
char *strval;
const char *strval;
AWKNUM numval;
Func_ptr assign;
};
@ -493,9 +515,10 @@ init_vars()
register struct varinit *vp;
for (vp = varinit; vp->name; vp++) {
*(vp->spec) = install(vp->name,
*(vp->spec) = install((char *) vp->name,
node(vp->strval == 0 ? make_number(vp->numval)
: make_string(vp->strval, strlen(vp->strval)),
: make_string((char *) vp->strval,
strlen(vp->strval)),
vp->type, (NODE *) NULL));
if (vp->assign)
(*(vp->assign))();
@ -731,6 +754,8 @@ static void
version()
{
fprintf(stderr, "%s, patchlevel %d\n", version_string, PATCHLEVEL);
/* per GNU coding standards, exit successfully, do nothing else */
exit(0);
}
/* this mess will improve in 2.16 */

View File

@ -3,7 +3,7 @@
*/
/*
* Copyright (C) 1986, 1988, 1989, 1991, 1992 the Free Software Foundation, Inc.
* Copyright (C) 1986, 1988, 1989, 1991, 1992, 1993 the Free Software Foundation, Inc.
*
* This file is part of GAWK, the GNU implementation of the
* AWK Progamming Language.
@ -24,8 +24,8 @@
*/
#ifndef lint
static char rcsid[] = "$Id: msg.c,v 1.2 1993/08/02 17:29:55 mycroft Exp $";
#endif /* not lint */
static char rcsid[] = "$Id: msg.c,v 1.3 1994/02/17 01:22:25 jtc Exp $";
#endif
#include "awk.h"
@ -35,8 +35,8 @@ char *source = NULL;
/* VARARGS2 */
void
err(s, emsg, argp)
char *s;
char *emsg;
const char *s;
const char *emsg;
va_list argp;
{
char *file;
@ -53,8 +53,9 @@ va_list argp;
}
if (FNR) {
file = FILENAME_node->var_value->stptr;
(void) putc('(', stderr);
if (file)
(void) fprintf(stderr, "(FILENAME=%s ", file);
(void) fprintf(stderr, "FILENAME=%s ", file);
(void) fprintf(stderr, "FNR=%d) ", FNR);
}
(void) fprintf(stderr, s);

View File

@ -3,7 +3,7 @@
*/
/*
* Copyright (C) 1986, 1988, 1989, 1991, 1992 the Free Software Foundation, Inc.
* Copyright (C) 1986, 1988, 1989, 1991, 1992, 1993 the Free Software Foundation, Inc.
*
* This file is part of GAWK, the GNU implementation of the
* AWK Progamming Language.
@ -24,8 +24,8 @@
*/
#ifndef lint
static char rcsid[] = "$Id: node.c,v 1.3 1993/11/13 02:27:00 jtc Exp $";
#endif /* not lint */
static char rcsid[] = "$Id: node.c,v 1.4 1994/02/17 01:22:27 jtc Exp $";
#endif
#include "awk.h"
@ -106,7 +106,7 @@ register NODE *n;
* (more complicated) variations on this theme didn't seem to pay off, but
* systematic testing might be in order at some point
*/
static char *values[] = {
static const char *values[] = {
"0",
"1",
"2",
@ -141,7 +141,7 @@ register NODE *s;
num = (long)s->numbr;
if ((AWKNUM) num == s->numbr) { /* integral value */
if (num < NVAL && num >= 0) {
sp = values[num];
sp = (char *) values[num];
s->stlen = 1;
} else {
(void) sprintf(sp, "%ld", num);
@ -149,7 +149,7 @@ register NODE *s;
}
s->stfmt = -1;
} else {
(void) sprintf(sp, CONVFMT, s->numbr);
NUMTOSTR(sp, CONVFMT, s->numbr);
s->stlen = strlen(sp);
s->stfmt = (char)CONVFMTidx;
}

View File

@ -1,3 +1 @@
/* $Id: patchlevel.h,v 1.3 1993/11/13 02:27:02 jtc Exp $ */
#define PATCHLEVEL 3
#define PATCHLEVEL 4

View File

@ -3,7 +3,7 @@
*/
/*
* Copyright (C) 1991, 1992, the Free Software Foundation, Inc.
* Copyright (C) 1991, 1992, 1993 the Free Software Foundation, Inc.
*
* This file is part of GAWK, the GNU implementation of the
* AWK Progamming Language.
@ -22,7 +22,7 @@
* along with GAWK; see the file COPYING. If not, write to
* the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*
* $Id: protos.h,v 1.2 1993/08/02 17:30:01 mycroft Exp $
* $Id: protos.h,v 1.3 1994/02/17 01:22:31 jtc Exp $
*/
#ifdef __STDC__
@ -53,7 +53,7 @@ extern char *strstr P((const char *s1, const char *s2));
extern int strlen P((const char *));
extern long strtol P((const char *, char **, int));
#if !defined(_MSC_VER) && !defined(__GNU_LIBRARY__)
extern int strftime P((char *, int, const char *, const struct tm *));
extern size_t strftime P((char *, size_t, const char *, const struct tm *));
#endif
extern time_t time P((time_t *));
extern aptr_t memset P((aptr_t, int, size_t));
@ -62,10 +62,9 @@ extern aptr_t memmove P((aptr_t, const aptr_t, size_t));
extern aptr_t memchr P((const aptr_t, int, size_t));
extern int memcmp P((const aptr_t, const aptr_t, size_t));
/* extern int fprintf P((FILE *, char *, ...)); */
extern int fprintf P(());
extern int fprintf P((FILE *, const char *, ...));
#if !defined(MSDOS) && !defined(__GNU_LIBRARY__)
extern int fwrite P((const char *, int, int, FILE *));
extern size_t fwrite P((const void *, size_t, size_t, FILE *));
extern int fputs P((const char *, FILE *));
extern int unlink P((const char *));
#endif
@ -77,7 +76,7 @@ extern void abort P(());
extern int isatty P((int));
extern void exit P((int));
extern int system P((const char *));
extern int sscanf P((/* char *, char *, ... */));
extern int sscanf P((const char *, const char *, ...));
#ifndef toupper
extern int toupper P((int));
#endif
@ -93,8 +92,8 @@ extern int stat P((const char *, struct stat *));
extern off_t lseek P((int, off_t, int));
extern int fseek P((FILE *, long, int));
extern int close P((int));
extern int creat P(());
extern int open P(());
extern int creat P((const char *, mode_t));
extern int open P((const char *, int, ...));
extern int pipe P((int *));
extern int dup P((int));
extern int dup2 P((int,int));

View File

@ -3,7 +3,7 @@
*/
/*
* Copyright (C) 1991, 1992 the Free Software Foundation, Inc.
* Copyright (C) 1991, 1992, 1993 the Free Software Foundation, Inc.
*
* This file is part of GAWK, the GNU implementation of the
* AWK Progamming Language.
@ -24,8 +24,8 @@
*/
#ifndef lint
static char rcsid[] = "$Id: re.c,v 1.3 1993/11/13 02:27:05 jtc Exp $";
#endif /* not lint */
static char rcsid[] = "$Id: re.c,v 1.4 1994/02/17 01:22:33 jtc Exp $";
#endif
#include "awk.h"
@ -34,12 +34,12 @@ static char rcsid[] = "$Id: re.c,v 1.3 1993/11/13 02:27:05 jtc Exp $";
Regexp *
make_regexp(s, len, ignorecase, dfa)
char *s;
int len;
size_t len;
int ignorecase;
int dfa;
{
Regexp *rp;
char *err;
const char *rerr;
char *src = s;
char *temp;
char *end = s + len;
@ -94,7 +94,7 @@ int dfa;
*dest = '\0' ; /* Only necessary if we print dest ? */
emalloc(rp, Regexp *, sizeof(*rp), "make_regexp");
memset((char *) rp, 0, sizeof(*rp));
emalloc(rp->pat.buffer, char *, 16, "make_regexp");
emalloc(rp->pat.buffer, unsigned char *, 16, "make_regexp");
rp->pat.allocated = 16;
emalloc(rp->pat.fastmap, char *, 256, "make_regexp");
@ -103,13 +103,14 @@ int dfa;
else
rp->pat.translate = NULL;
len = dest - temp;
if ((err = re_compile_pattern(temp, len, &(rp->pat))) != NULL)
fatal("%s: /%s/", err, temp);
if ((rerr = re_compile_pattern(temp, len, &(rp->pat))) != NULL)
fatal("%s: /%s/", rerr, temp);
if (dfa && !ignorecase) {
regcompile(temp, len, &(rp->dfareg), 1);
dfacomp(temp, len, &(rp->dfareg), 1);
rp->dfa = 1;
} else
rp->dfa = 0;
free(temp);
return rp;
}
@ -119,24 +120,24 @@ research(rp, str, start, len, need_start)
Regexp *rp;
register char *str;
int start;
register int len;
register size_t len;
int need_start;
{
char *ret = str;
if (rp->dfa) {
char save1;
char save2;
char save;
int count = 0;
int try_backref;
save1 = str[start+len];
str[start+len] = '\n';
save2 = str[start+len+1];
ret = regexecute(&(rp->dfareg), str+start, str+start+len+1, 1,
/*
* dfa likes to stick a '\n' right after the matched
* text. So we just save and restore the character.
*/
save = str[start+len];
ret = dfaexec(&(rp->dfareg), str+start, str+start+len, 1,
&count, &try_backref);
str[start+len] = save1;
str[start+len+1] = save2;
str[start+len] = save;
}
if (ret) {
if (need_start || rp->dfa == 0)
@ -155,12 +156,12 @@ Regexp *rp;
free(rp->pat.buffer);
free(rp->pat.fastmap);
if (rp->dfa)
reg_free(&(rp->dfareg));
dfafree(&(rp->dfareg));
free(rp);
}
void
reg_error(s)
dfaerror(s)
const char *s;
{
fatal(s);
@ -198,7 +199,8 @@ NODE *t;
t->re_text = dupnode(t1);
free_temp(t1);
}
t->re_reg = make_regexp(t->re_text->stptr, t->re_text->stlen, IGNORECASE, t->re_cnt);
t->re_reg = make_regexp(t->re_text->stptr, t->re_text->stlen,
IGNORECASE, t->re_cnt);
t->re_flags &= ~CASE;
t->re_flags |= IGNORECASE;
return t->re_reg;
@ -207,6 +209,8 @@ NODE *t;
void
resetup()
{
(void) re_set_syntax(RE_SYNTAX_AWK);
regsyntax(RE_SYNTAX_AWK, 0);
reg_syntax_t syn = RE_SYNTAX_AWK;
(void) re_set_syntax(syn);
dfasyntax(syn, 0);
}

File diff suppressed because it is too large Load Diff

View File

@ -1,10 +1,11 @@
/* Definitions for data structures callers pass the regex library.
/* Definitions for data structures and routines for the regular
expression library, version 0.12.
Copyright (C) 1985, 1989-90 Free Software Foundation, Inc.
Copyright (C) 1985, 1989, 1990, 1991, 1992, 1993 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 1, or (at your option)
the Free Software Foundation; either version 2, or (at your option)
any later version.
This program is distributed in the hope that it will be useful,
@ -16,247 +17,492 @@
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
$Id: regex.h,v 1.2 1993/08/02 17:30:13 mycroft Exp $
$Id: regex.h,v 1.3 1994/02/17 01:22:42 jtc Exp $
*/
#ifndef __REGEXP_LIBRARY
#define __REGEXP_LIBRARY
#ifndef __REGEXP_LIBRARY_H__
#define __REGEXP_LIBRARY_H__
/* Define number of parens for which we record the beginnings and ends.
This affects how much space the `struct re_registers' type takes up. */
#ifndef RE_NREGS
#define RE_NREGS 10
#endif
/* POSIX says that <sys/types.h> must be included (by the caller) before
<regex.h>. */
#define BYTEWIDTH 8
/* Maximum number of duplicates an interval can allow. */
#ifndef RE_DUP_MAX
#define RE_DUP_MAX ((1 << 15) - 1)
#ifdef VMS
/* VMS doesn't have `size_t' in <sys/types.h>, even though POSIX says it
should be there. */
#include <stddef.h>
#endif
/* This defines the various regexp syntaxes. */
extern long obscure_syntax;
/* The following two types have to be signed and unsigned integer type
wide enough to hold a value of a pointer. For most ANSI compilers
ptrdiff_t and size_t should be likely OK. Still size of these two
types is 2 for Microsoft C. Ugh... */
typedef long s_reg_t;
typedef unsigned long active_reg_t;
/* The following bits are used to determine the regexp syntax we
recognize. The set/not-set meanings are chosen so that Emacs syntax
remains the value 0. The bits are given in alphabetical order, and
the definitions shifted by one from the previous bit; thus, when we
add or remove a bit, only one other definition need change. */
typedef unsigned long reg_syntax_t;
/* The following bits are used in the obscure_syntax variable to choose among
alternative regexp syntaxes. */
/* If this bit is not set, then \ inside a bracket expression is literal.
If set, then such a \ quotes the following character. */
#define RE_BACKSLASH_ESCAPE_IN_LISTS (1L)
/* If this bit is set, plain parentheses serve as grouping, and backslash
parentheses are needed for literal searching.
If not set, backslash-parentheses are grouping, and plain parentheses
are for literal searching. */
#define RE_NO_BK_PARENS 1L
/* If this bit is not set, then + and ? are operators, and \+ and \? are
literals.
If set, then \+ and \? are operators and + and ? are literals. */
#define RE_BK_PLUS_QM (RE_BACKSLASH_ESCAPE_IN_LISTS << 1)
/* If this bit is set, plain | serves as the `or'-operator, and \| is a
literal.
If not set, \| serves as the `or'-operator, and | is a literal. */
#define RE_NO_BK_VBAR (1L << 1)
/* If this bit is not set, plain + or ? serves as an operator, and \+, \? are
literals.
If set, \+, \? are operators and plain +, ? are literals. */
#define RE_BK_PLUS_QM (1L << 2)
/* If this bit is set, | binds tighter than ^ or $.
If not set, the contrary. */
#define RE_TIGHT_VBAR (1L << 3)
/* If this bit is set, then treat newline as an OR operator.
If not set, treat it as a normal character. */
#define RE_NEWLINE_OR (1L << 4)
/* If this bit is set, then special characters may act as normal
characters in some contexts. Specifically, this applies to:
^ -- only special at the beginning, or after ( or |;
$ -- only special at the end, or before ) or |;
*, +, ? -- only special when not after the beginning, (, or |.
If this bit is not set, special characters (such as *, ^, and $)
always have their special meaning regardless of the surrounding
context. */
#define RE_CONTEXT_INDEP_OPS (1L << 5)
/* If this bit is not set, then \ before anything inside [ and ] is taken as
a real \.
If set, then such a \ escapes the following character. This is a
special case for awk. */
#define RE_AWK_CLASS_HACK (1L << 6)
/* If this bit is set, then \{ and \} or { and } serve as interval operators.
If not set, then \{ and \} and { and } are treated as literals. */
#define RE_INTERVALS (1L << 7)
/* If this bit is not set, then \{ and \} serve as interval operators and
{ and } are literals.
If set, then { and } serve as interval operators and \{ and \} are
literals. */
#define RE_NO_BK_CURLY_BRACES (1L << 8)
/* If this bit is set, then character classes are supported; they are:
[:alpha:], [:upper:], [:lower:], [:digit:], [:alnum:], [:xdigit:],
/* If this bit is set, then character classes are supported. They are:
[:alpha:], [:upper:], [:lower:], [:digit:], [:alnum:], [:xdigit:],
[:space:], [:print:], [:punct:], [:graph:], and [:cntrl:].
If not set, then character classes are not supported. */
#define RE_CHAR_CLASSES (1L << 9)
#define RE_CHAR_CLASSES (RE_BK_PLUS_QM << 1)
/* If this bit is set, then the dot re doesn't match a null byte.
If not set, it does. */
#define RE_DOT_NOT_NULL (1L << 10)
/* If this bit is set, then ^ and $ are always anchors (outside bracket
expressions, of course).
If this bit is not set, then it depends:
^ is an anchor if it is at the beginning of a regular
expression or after an open-group or an alternation operator;
$ is an anchor if it is at the end of a regular expression, or
before a close-group or an alternation operator.
/* If this bit is set, then [^...] doesn't match a newline.
If not set, it does. */
#define RE_HAT_NOT_NEWLINE (1L << 11)
This bit could be (re)combined with RE_CONTEXT_INDEP_OPS, because
POSIX draft 11.2 says that * etc. in leading positions is undefined.
We already implemented a previous draft which made those constructs
invalid, though, so we haven't changed the code back. */
#define RE_CONTEXT_INDEP_ANCHORS (RE_CHAR_CLASSES << 1)
/* If this bit is set, back references are recognized.
If not set, they aren't. */
#define RE_NO_BK_REFS (1L << 12)
/* If this bit is set, then special characters are always special
regardless of where they are in the pattern.
If this bit is not set, then special characters are special only in
some contexts; otherwise they are ordinary. Specifically,
* + ? and intervals are only special when not after the beginning,
open-group, or alternation operator. */
#define RE_CONTEXT_INDEP_OPS (RE_CONTEXT_INDEP_ANCHORS << 1)
/* If this bit is set, back references must refer to a preceding
subexpression. If not set, a back reference to a nonexistent
subexpression is treated as literal characters. */
#define RE_NO_EMPTY_BK_REF (1L << 13)
/* If this bit is set, then *, +, ?, and { cannot be first in an re or
immediately after an alternation or begin-group operator. */
#define RE_CONTEXT_INVALID_OPS (RE_CONTEXT_INDEP_OPS << 1)
/* If this bit is set, bracket expressions can't be empty.
If it is set, they can be empty. */
#define RE_NO_EMPTY_BRACKETS (1L << 14)
/* If this bit is set, then . matches newline.
If not set, then it doesn't. */
#define RE_DOT_NEWLINE (RE_CONTEXT_INVALID_OPS << 1)
/* If this bit is set, then *, +, ? and { cannot be first in an re or
immediately after a |, or a (. Furthermore, a | cannot be first or
last in an re, or immediately follow another | or a (. Also, a ^
cannot appear in a nonleading position and a $ cannot appear in a
nontrailing position (outside of bracket expressions, that is). */
#define RE_CONTEXTUAL_INVALID_OPS (1L << 15)
/* If this bit is set, then . doesn't match NUL.
If not set, then it does. */
#define RE_DOT_NOT_NULL (RE_DOT_NEWLINE << 1)
/* If this bit is set, then +, ? and | aren't recognized as operators.
If it's not, they are. */
#define RE_LIMITED_OPS (1L << 16)
/* If this bit is set, nonmatching lists [^...] do not match newline.
If not set, they do. */
#define RE_HAT_LISTS_NOT_NEWLINE (RE_DOT_NOT_NULL << 1)
/* If this bit is set, then an ending range point has to collate higher
or equal to the starting range point.
If it's not set, then when the ending range point collates higher
than the starting range point, the range is just considered empty. */
#define RE_NO_EMPTY_RANGES (1L << 17)
/* If this bit is set, either \{...\} or {...} defines an
interval, depending on RE_NO_BK_BRACES.
If not set, \{, \}, {, and } are literals. */
#define RE_INTERVALS (RE_HAT_LISTS_NOT_NEWLINE << 1)
/* If this bit is set, then a hyphen (-) can't be an ending range point.
If it isn't, then it can. */
#define RE_NO_HYPHEN_RANGE_END (1L << 18)
/* If this bit is set, +, ? and | aren't recognized as operators.
If not set, they are. */
#define RE_LIMITED_OPS (RE_INTERVALS << 1)
/* If this bit is set, newline is an alternation operator.
If not set, newline is literal. */
#define RE_NEWLINE_ALT (RE_LIMITED_OPS << 1)
/* Define combinations of bits for the standard possibilities. */
#define RE_SYNTAX_POSIX_AWK (RE_NO_BK_PARENS | RE_NO_BK_VBAR \
| RE_CONTEXT_INDEP_OPS)
#define RE_SYNTAX_AWK (RE_NO_BK_PARENS | RE_NO_BK_VBAR | RE_AWK_CLASS_HACK)
#define RE_SYNTAX_EGREP (RE_NO_BK_PARENS | RE_NO_BK_VBAR \
| RE_CONTEXT_INDEP_OPS | RE_NEWLINE_OR)
#define RE_SYNTAX_GREP (RE_BK_PLUS_QM | RE_NEWLINE_OR)
/* If this bit is set, then `{...}' defines an interval, and \{ and \}
are literals.
If not set, then `\{...\}' defines an interval. */
#define RE_NO_BK_BRACES (RE_NEWLINE_ALT << 1)
/* If this bit is set, (...) defines a group, and \( and \) are literals.
If not set, \(...\) defines a group, and ( and ) are literals. */
#define RE_NO_BK_PARENS (RE_NO_BK_BRACES << 1)
/* If this bit is set, then \<digit> matches <digit>.
If not set, then \<digit> is a back-reference. */
#define RE_NO_BK_REFS (RE_NO_BK_PARENS << 1)
/* If this bit is set, then | is an alternation operator, and \| is literal.
If not set, then \| is an alternation operator, and | is literal. */
#define RE_NO_BK_VBAR (RE_NO_BK_REFS << 1)
/* If this bit is set, then an ending range point collating higher
than the starting range point, as in [z-a], is invalid.
If not set, then when ending range point collates higher than the
starting range point, the range is ignored. */
#define RE_NO_EMPTY_RANGES (RE_NO_BK_VBAR << 1)
/* If this bit is set, then an unmatched ) is ordinary.
If not set, then an unmatched ) is invalid. */
#define RE_UNMATCHED_RIGHT_PAREN_ORD (RE_NO_EMPTY_RANGES << 1)
/* If this bit is set, do not process the GNU regex operators.
IF not set, then the GNU regex operators are recognized. */
#define RE_NO_GNU_OPS (RE_UNMATCHED_RIGHT_PAREN_ORD << 1)
/* This global variable defines the particular regexp syntax to use (for
some interfaces). When a regexp is compiled, the syntax used is
stored in the pattern buffer, so changing this does not affect
already-compiled regexps. */
extern reg_syntax_t re_syntax_options;
/* Define combinations of the above bits for the standard possibilities.
(The [[[ comments delimit what gets put into the Texinfo file, so
don't delete them!) */
/* [[[begin syntaxes]]] */
#define RE_SYNTAX_EMACS 0
#define RE_SYNTAX_POSIX_BASIC (RE_INTERVALS | RE_BK_PLUS_QM \
| RE_CHAR_CLASSES | RE_DOT_NOT_NULL \
| RE_HAT_NOT_NEWLINE | RE_NO_EMPTY_BK_REF \
| RE_NO_EMPTY_BRACKETS | RE_LIMITED_OPS \
| RE_NO_EMPTY_RANGES | RE_NO_HYPHEN_RANGE_END)
#define RE_SYNTAX_POSIX_EXTENDED (RE_INTERVALS | RE_NO_BK_CURLY_BRACES \
| RE_NO_BK_VBAR | RE_NO_BK_PARENS \
| RE_HAT_NOT_NEWLINE | RE_CHAR_CLASSES \
| RE_NO_EMPTY_BRACKETS | RE_CONTEXTUAL_INVALID_OPS \
| RE_NO_BK_REFS | RE_NO_EMPTY_RANGES \
| RE_NO_HYPHEN_RANGE_END)
#define RE_SYNTAX_AWK \
(RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DOT_NOT_NULL \
| RE_NO_BK_PARENS | RE_NO_BK_REFS \
| RE_NO_BK_VBAR | RE_NO_EMPTY_RANGES \
| RE_UNMATCHED_RIGHT_PAREN_ORD | RE_NO_GNU_OPS)
#define RE_SYNTAX_GNU_AWK \
(RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS)
#define RE_SYNTAX_POSIX_AWK \
(RE_SYNTAX_GNU_AWK | RE_NO_GNU_OPS)
#define RE_SYNTAX_GREP \
(RE_BK_PLUS_QM | RE_CHAR_CLASSES \
| RE_HAT_LISTS_NOT_NEWLINE | RE_INTERVALS \
| RE_NEWLINE_ALT)
#define RE_SYNTAX_EGREP \
(RE_CHAR_CLASSES | RE_CONTEXT_INDEP_ANCHORS \
| RE_CONTEXT_INDEP_OPS | RE_HAT_LISTS_NOT_NEWLINE \
| RE_NEWLINE_ALT | RE_NO_BK_PARENS \
| RE_NO_BK_VBAR)
#define RE_SYNTAX_POSIX_EGREP \
(RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES)
/* P1003.2/D11.2, section 4.20.7.1, lines 5078ff. */
#define RE_SYNTAX_ED RE_SYNTAX_POSIX_BASIC
#define RE_SYNTAX_SED RE_SYNTAX_POSIX_BASIC
/* Syntax bits common to both basic and extended POSIX regex syntax. */
#define _RE_SYNTAX_POSIX_COMMON \
(RE_CHAR_CLASSES | RE_DOT_NEWLINE | RE_DOT_NOT_NULL \
| RE_INTERVALS | RE_NO_EMPTY_RANGES)
#define RE_SYNTAX_POSIX_BASIC \
(_RE_SYNTAX_POSIX_COMMON | RE_BK_PLUS_QM)
/* Differs from ..._POSIX_BASIC only in that RE_BK_PLUS_QM becomes
RE_LIMITED_OPS, i.e., \? \+ \| are not recognized. Actually, this
isn't minimal, since other operators, such as \`, aren't disabled. */
#define RE_SYNTAX_POSIX_MINIMAL_BASIC \
(_RE_SYNTAX_POSIX_COMMON | RE_LIMITED_OPS)
#define RE_SYNTAX_POSIX_EXTENDED \
(_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \
| RE_CONTEXT_INDEP_OPS | RE_NO_BK_BRACES \
| RE_NO_BK_PARENS | RE_NO_BK_VBAR \
| RE_UNMATCHED_RIGHT_PAREN_ORD)
/* Differs from ..._POSIX_EXTENDED in that RE_CONTEXT_INVALID_OPS
replaces RE_CONTEXT_INDEP_OPS and RE_NO_BK_REFS is added. */
#define RE_SYNTAX_POSIX_MINIMAL_EXTENDED \
(_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \
| RE_CONTEXT_INVALID_OPS | RE_NO_BK_BRACES \
| RE_NO_BK_PARENS | RE_NO_BK_REFS \
| RE_NO_BK_VBAR | RE_UNMATCHED_RIGHT_PAREN_ORD)
/* [[[end syntaxes]]] */
/* Maximum number of duplicates an interval can allow. Some systems
(erroneously) define this in other header files, but we want our
value, so remove any previous define. */
#ifdef RE_DUP_MAX
#undef RE_DUP_MAX
#endif
/* if sizeof(int) == 2, then ((1 << 15) - 1) overflows */
#define RE_DUP_MAX (0x7fff)
/* This data structure is used to represent a compiled pattern. */
/* POSIX `cflags' bits (i.e., information for `regcomp'). */
/* If this bit is set, then use extended regular expression syntax.
If not set, then use basic regular expression syntax. */
#define REG_EXTENDED 1
/* If this bit is set, then ignore case when matching.
If not set, then case is significant. */
#define REG_ICASE (REG_EXTENDED << 1)
/* If this bit is set, then anchors do not match at newline
characters in the string.
If not set, then anchors do match at newlines. */
#define REG_NEWLINE (REG_ICASE << 1)
/* If this bit is set, then report only success or fail in regexec.
If not set, then returns differ between not matching and errors. */
#define REG_NOSUB (REG_NEWLINE << 1)
/* POSIX `eflags' bits (i.e., information for regexec). */
/* If this bit is set, then the beginning-of-line operator doesn't match
the beginning of the string (presumably because it's not the
beginning of a line).
If not set, then the beginning-of-line operator does match the
beginning of the string. */
#define REG_NOTBOL 1
/* Like REG_NOTBOL, except for the end-of-line. */
#define REG_NOTEOL (1 << 1)
/* If any error codes are removed, changed, or added, update the
`re_error_msg' table in regex.c. */
typedef enum
{
REG_NOERROR = 0, /* Success. */
REG_NOMATCH, /* Didn't find a match (for regexec). */
/* POSIX regcomp return error codes. (In the order listed in the
standard.) */
REG_BADPAT, /* Invalid pattern. */
REG_ECOLLATE, /* Not implemented. */
REG_ECTYPE, /* Invalid character class name. */
REG_EESCAPE, /* Trailing backslash. */
REG_ESUBREG, /* Invalid back reference. */
REG_EBRACK, /* Unmatched left bracket. */
REG_EPAREN, /* Parenthesis imbalance. */
REG_EBRACE, /* Unmatched \{. */
REG_BADBR, /* Invalid contents of \{\}. */
REG_ERANGE, /* Invalid range end. */
REG_ESPACE, /* Ran out of memory. */
REG_BADRPT, /* No preceding re for repetition op. */
/* Error codes we've added. */
REG_EEND, /* Premature end. */
REG_ESIZE, /* Compiled pattern bigger than 2^16 bytes. */
REG_ERPAREN /* Unmatched ) or \); not returned from regcomp. */
} reg_errcode_t;
/* This data structure represents a compiled pattern. Before calling
the pattern compiler, the fields `buffer', `allocated', `fastmap',
`translate', and `no_sub' can be set. After the pattern has been
compiled, the `re_nsub' field is available. All other fields are
private to the regex routines. */
struct re_pattern_buffer
{
char *buffer; /* Space holding the compiled pattern commands. */
long allocated; /* Size of space that `buffer' points to. */
long used; /* Length of portion of buffer actually occupied */
char *fastmap; /* Pointer to fastmap, if any, or zero if none. */
/* re_search uses the fastmap, if there is one,
to skip over totally implausible characters. */
char *translate; /* Translate table to apply to all characters before
comparing, or zero for no translation.
The translation is applied to a pattern when it is
compiled and to data when it is matched. */
char fastmap_accurate;
/* Set to zero when a new pattern is stored,
set to one when the fastmap is updated from it. */
char can_be_null; /* Set to one by compiling fastmap
if this pattern might match the null string.
It does not necessarily match the null string
in that case, but if this is zero, it cannot.
2 as value means can match null string
but at end of range or before a character
listed in the fastmap. */
};
{
/* [[[begin pattern_buffer]]] */
/* Space that holds the compiled pattern. It is declared as
`unsigned char *' because its elements are
sometimes used as array indexes. */
unsigned char *buffer;
/* Number of bytes to which `buffer' points. */
unsigned long allocated;
/* Number of bytes actually used in `buffer'. */
unsigned long used;
/* Syntax setting with which the pattern was compiled. */
reg_syntax_t syntax;
/* Pointer to a fastmap, if any, otherwise zero. re_search uses
the fastmap, if there is one, to skip over impossible
starting points for matches. */
char *fastmap;
/* Either a translate table to apply to all characters before
comparing them, or zero for no translation. The translation
is applied to a pattern when it is compiled and to a string
when it is matched. */
char *translate;
/* Number of subexpressions found by the compiler. */
size_t re_nsub;
/* Zero if this pattern cannot match the empty string, one else.
Well, in truth it's used only in `re_search_2', to see
whether or not we should use the fastmap, so we don't set
this absolutely perfectly; see `re_compile_fastmap' (the
`duplicate' case). */
unsigned can_be_null : 1;
/* If REGS_UNALLOCATED, allocate space in the `regs' structure
for `max (RE_NREGS, re_nsub + 1)' groups.
If REGS_REALLOCATE, reallocate space if necessary.
If REGS_FIXED, use what's there. */
#define REGS_UNALLOCATED 0
#define REGS_REALLOCATE 1
#define REGS_FIXED 2
unsigned regs_allocated : 2;
/* Set to zero when `regex_compile' compiles a pattern; set to one
by `re_compile_fastmap' if it updates the fastmap. */
unsigned fastmap_accurate : 1;
/* If set, `re_match_2' does not return information about
subexpressions. */
unsigned no_sub : 1;
/* If set, a beginning-of-line anchor doesn't match at the
beginning of the string. */
unsigned not_bol : 1;
/* Similarly for an end-of-line anchor. */
unsigned not_eol : 1;
/* If true, an anchor at a newline matches. */
unsigned newline_anchor : 1;
/* [[[end pattern_buffer]]] */
};
typedef struct re_pattern_buffer regex_t;
/* search.c (search_buffer) needs this one value. It is defined both in
regex.c and here. */
/* search.c (search_buffer) in Emacs needs this one opcode value. It is
defined both in `regex.c' and here. */
#define RE_EXACTN_VALUE 1
/* Structure to store register contents data in.
Pass the address of such a structure as an argument to re_match, etc.,
if you want this information back.
For i from 1 to RE_NREGS - 1, start[i] records the starting index in
the string of where the ith subexpression matched, and end[i] records
one after the ending index. start[0] and end[0] are analogous, for
the entire pattern. */
struct re_registers
{
int start[RE_NREGS];
int end[RE_NREGS];
};
/* Type for byte offsets within the string. POSIX mandates this. */
typedef int regoff_t;
/* This is the structure we store register match data in. See
regex.texinfo for a full description of what registers match. */
struct re_registers
{
unsigned num_regs;
regoff_t *start;
regoff_t *end;
};
/* If `regs_allocated' is REGS_UNALLOCATED in the pattern buffer,
`re_match_2' returns information about at least this many registers
the first time a `regs' structure is passed. */
#ifndef RE_NREGS
#define RE_NREGS 30
#endif
/* POSIX specification for registers. Aside from the different names than
`re_registers', POSIX uses an array of structures, instead of a
structure of arrays. */
typedef struct
{
regoff_t rm_so; /* Byte offset from string's start to substring's start. */
regoff_t rm_eo; /* Byte offset from string's start to substring's end. */
} regmatch_t;
/* Declarations for routines. */
/* To avoid duplicating every routine declaration -- once with a
prototype (if we are ANSI), and once without (if we aren't) -- we
use the following macro to declare argument types. This
unfortunately clutters up the declarations a bit, but I think it's
worth it. */
#ifdef __STDC__
extern char *re_compile_pattern (char *, size_t, struct re_pattern_buffer *);
/* Is this really advertised? */
extern void re_compile_fastmap (struct re_pattern_buffer *);
extern int re_search (struct re_pattern_buffer *, char*, int, int, int,
struct re_registers *);
extern int re_search_2 (struct re_pattern_buffer *, char *, int,
char *, int, int, int,
struct re_registers *, int);
extern int re_match (struct re_pattern_buffer *, char *, int, int,
struct re_registers *);
extern int re_match_2 (struct re_pattern_buffer *, char *, int,
char *, int, int, struct re_registers *, int);
extern long re_set_syntax (long syntax);
#define _RE_ARGS(args) args
#else /* not __STDC__ */
#define _RE_ARGS(args) ()
#endif /* not __STDC__ */
/* Sets the current default syntax to SYNTAX, and return the old syntax.
You can also simply assign to the `re_syntax_options' variable. */
extern reg_syntax_t re_set_syntax _RE_ARGS ((reg_syntax_t syntax));
/* Compile the regular expression PATTERN, with length LENGTH
and syntax given by the global `re_syntax_options', into the buffer
BUFFER. Return NULL if successful, and an error string if not. */
extern const char *re_compile_pattern
_RE_ARGS ((const char *pattern, size_t length,
struct re_pattern_buffer *buffer));
/* Compile a fastmap for the compiled pattern in BUFFER; used to
accelerate searches. Return 0 if successful and -2 if was an
internal error. */
extern int re_compile_fastmap _RE_ARGS ((struct re_pattern_buffer *buffer));
/* Search in the string STRING (with length LENGTH) for the pattern
compiled into BUFFER. Start searching at position START, for RANGE
characters. Return the starting position of the match, -1 for no
match, or -2 for an internal error. Also return register
information in REGS (if REGS and BUFFER->no_sub are nonzero). */
extern int re_search
_RE_ARGS ((struct re_pattern_buffer *buffer, const char *string,
int length, int start, int range, struct re_registers *regs));
/* Like `re_search', but search in the concatenation of STRING1 and
STRING2. Also, stop searching at index START + STOP. */
extern int re_search_2
_RE_ARGS ((struct re_pattern_buffer *buffer, const char *string1,
int length1, const char *string2, int length2,
int start, int range, struct re_registers *regs, int stop));
/* Like `re_search', but return how many characters in STRING the regexp
in BUFFER matched, starting at position START. */
extern int re_match
_RE_ARGS ((struct re_pattern_buffer *buffer, const char *string,
int length, int start, struct re_registers *regs));
/* Relates to `re_match' as `re_search_2' relates to `re_search'. */
extern int re_match_2
_RE_ARGS ((struct re_pattern_buffer *buffer, const char *string1,
int length1, const char *string2, int length2,
int start, struct re_registers *regs, int stop));
/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
ENDS. Subsequent matches using BUFFER and REGS will use this memory
for recording register information. STARTS and ENDS must be
allocated with malloc, and must each be at least `NUM_REGS * sizeof
(regoff_t)' bytes long.
If NUM_REGS == 0, then subsequent matches should allocate their own
register data.
Unless this function is called, the first search or match using
PATTERN_BUFFER will allocate its own register data, without
freeing the old data. */
extern void re_set_registers
_RE_ARGS ((struct re_pattern_buffer *buffer, struct re_registers *regs,
unsigned num_regs, regoff_t *starts, regoff_t *ends));
#ifndef GAWK
/* 4.2 bsd compatibility. */
extern char *re_comp (char *);
extern int re_exec (char *);
#endif
extern char *re_comp _RE_ARGS ((const char *));
extern int re_exec _RE_ARGS ((const char *));
#else /* !__STDC__ */
/* POSIX compatibility. */
extern int regcomp _RE_ARGS ((regex_t *preg, const char *pattern, int cflags));
extern int regexec
_RE_ARGS ((const regex_t *preg, const char *string, size_t nmatch,
regmatch_t pmatch[], int eflags));
extern size_t regerror
_RE_ARGS ((int errcode, const regex_t *preg, char *errbuf,
size_t errbuf_size));
extern void regfree _RE_ARGS ((regex_t *preg));
extern char *re_compile_pattern ();
/* Is this really advertised? */
extern void re_compile_fastmap ();
extern int re_search (), re_search_2 ();
extern int re_match (), re_match_2 ();
extern long re_set_syntax();
#ifndef GAWK
/* 4.2 bsd compatibility. */
extern char *re_comp ();
extern int re_exec ();
#endif
#endif /* __STDC__ */
#ifdef SYNTAX_TABLE
extern char *re_syntax_table;
#endif
#endif /* !__REGEXP_LIBRARY */
#endif /* not __REGEXP_LIBRARY_H__ */
/*
Local variables:
make-backup-files: t
version-control: t
trim-versions-without-asking: nil
End:
*/

View File

@ -1,5 +1,5 @@
/*char *version_string = "from: @(#)Gnu Awk (gawk) 2.15";*/
char *version_string = "$Id: version.c,v 1.2 1993/08/01 18:49:02 mycroft Exp $ 2.15";
/* DO NOT CHANGE VERSION STRING TO USE A REAL SCCS OR RCS ID */
char *version_string = "@(#)Gnu Awk (gawk) 2.15";
/* 1.02 fixed /= += *= etc to return the new Left Hand Side instead
of the Right Hand Side */
@ -43,5 +43,6 @@ char *version_string = "$Id: version.c,v 1.2 1993/08/01 18:49:02 mycroft Exp $ 2
/* 2.14 Mostly bug fixes. */
/* 2.15 Bug fixes plus intermixing of command-line source and files,
GNU long options, ARGIND, ERRNO and Plan 9 style /dev/ files. */
GNU long options, ARGIND, ERRNO and Plan 9 style /dev/ files.
`delete array'. OS/2 port added. */