Upgrade to Gawk 2.15.4.
This commit is contained in:
parent
652a63ee06
commit
dff3317321
|
@ -1,3 +1,53 @@
|
|||
Changes from 2.15.3 to 2.15.4
|
||||
-----------------------------
|
||||
|
||||
Lots of lint fixes, and do_sprintf made mostly ANSI C compatible.
|
||||
|
||||
Man page updated and edited.
|
||||
|
||||
Copyrights updated.
|
||||
|
||||
Arrays now grow dynamically, initially scaling up by an order of magnitude
|
||||
and then doubling, up to ~ 64K. This should keep gawk's performance
|
||||
graceful under heavy load.
|
||||
|
||||
New `delete array' feature added. Only documented in the man page.
|
||||
|
||||
Switched to dfa and regex suites from grep-2.0. These offer the ability to
|
||||
move to POSIX regexps in the next release.
|
||||
|
||||
Disabled GNU regex ops.
|
||||
|
||||
Research awk -m option now recognized. It does nothing in gawk, since gawk
|
||||
has no static limits. Only documented in the man page.
|
||||
|
||||
New bionic (faster, better, stronger than before) hashing function.
|
||||
|
||||
Bug fix in argument handling. `gawk -X' now notices there was no program.
|
||||
Additional bug fixes to make --compat and --lint work again.
|
||||
|
||||
Many changes for 16-bit cleanliness.
|
||||
|
||||
Add explicit alloca(0) in io.c to recover space from C alloca.
|
||||
|
||||
Fixed file descriptor leak in io.c.
|
||||
|
||||
The --version option now follows the GNU coding standards and exits.
|
||||
|
||||
Fixed several prototypes in protos.h.
|
||||
|
||||
Several tests updated. On Solaris, warn that the out? tests will fail.
|
||||
|
||||
Configuration files for SunOS with cc and Solaris 2.x added.
|
||||
|
||||
Improved error messages in awk.y on gawk extensions if do_unix or do_compat.
|
||||
|
||||
INSTALL file added.
|
||||
|
||||
Fixed Atari Makefile and several VMS specific changes.
|
||||
|
||||
Better conversion of numbers to strings on systems with broken sprintfs.
|
||||
|
||||
Changes from 2.15.2 to 2.15.3
|
||||
-----------------------------
|
||||
|
||||
|
|
|
@ -3,4 +3,8 @@ Hopefully they will all be fixed in the next major release of gawk.
|
|||
|
||||
Please keep in mind that the code is still undergoing significant evolution.
|
||||
|
||||
1. Gawk's printf is probably still not POSIX compliant.
|
||||
1. The interactions with the lexer and yyerror need reworking. It is possible
|
||||
to get line numbers that are one line off if --compat or --posix is
|
||||
true and either `next file' or `delete array' are used.
|
||||
|
||||
Really the whole lexical analysis stuff needs reworking.
|
||||
|
|
|
@ -10,7 +10,7 @@ See the installation instructions, below.
|
|||
|
||||
Known problems are given in the PROBLEMS file. Work to be done is
|
||||
described briefly in the FUTURES file. Verified ports are listed in
|
||||
the PORTS file. Changes in this version are summarized in the CHANGES file.
|
||||
the PORTS file. Changes in this version are summarized in the NEWS file.
|
||||
Please read the LIMITATIONS and ACKNOWLEDGMENT files.
|
||||
|
||||
Read the file POSIX for a discussion of how the standard says comparisons
|
||||
|
@ -28,6 +28,8 @@ INSTALLATION:
|
|||
|
||||
Check whether there is a system-specific README file for your system.
|
||||
|
||||
A quick overview of the installation process is in the file INSTALLATION.
|
||||
|
||||
Makefile.in may need some tailoring. The only changes necessary should
|
||||
be to change installation targets or to change compiler flags.
|
||||
The changes to make in Makefile.in are commented and should be obvious.
|
||||
|
@ -69,7 +71,7 @@ problem.
|
|||
|
||||
PRINTING THE MANUAL
|
||||
|
||||
The 'support' directory contains texinfo.tex 2.65, which will be necessary
|
||||
The 'support' directory contains texinfo.tex 2.115, which will be necessary
|
||||
for printing the manual, and the texindex.c program from the texinfo
|
||||
distribution which is also necessary. See the makefile for the steps needed
|
||||
to get a DVI file from the manual.
|
||||
|
@ -93,7 +95,7 @@ INTERNET: david@cs.dal.ca
|
|||
|
||||
Arnold Robbins
|
||||
1736 Reindeer Drive
|
||||
Atlanta, GA, 30329, USA
|
||||
Atlanta, GA, 30329-3528, USA
|
||||
|
||||
INTERNET: arnold@skeeve.atl.ga.us
|
||||
UUCP: { gatech, emory, emoryu1 }!skeeve!arnold
|
||||
|
@ -115,8 +117,10 @@ VMS:
|
|||
|
||||
Atari ST:
|
||||
Michal Jaegermann
|
||||
NTOMCZAK@vm.ucs.UAlberta.CA (e-mail only)
|
||||
michal@gortel.phys.ualberta.ca (e-mail only)
|
||||
|
||||
OS/2:
|
||||
Kai Uwe Rommel
|
||||
rommel@ars.muc.de (e-mail only)
|
||||
Darrel Hankerson
|
||||
hankedr@mail.auburn.edu (e-mail only)
|
||||
|
|
|
@ -1 +1 @@
|
|||
2.15.3
|
||||
2.15.4
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
*/
|
||||
|
||||
/*
|
||||
* Copyright (C) 1986, 1988, 1989, 1991, 1992 the Free Software Foundation, Inc.
|
||||
* Copyright (C) 1986, 1988, 1989, 1991, 1992, 1993 the Free Software Foundation, Inc.
|
||||
*
|
||||
* This file is part of GAWK, the GNU implementation of the
|
||||
* AWK Progamming Language.
|
||||
|
@ -24,12 +24,27 @@
|
|||
*/
|
||||
|
||||
#ifndef lint
|
||||
static char rcsid[] = "$Id: array.c,v 1.3 1993/11/13 02:26:15 jtc Exp $";
|
||||
#endif /* not lint */
|
||||
static char rcsid[] = "$Id: array.c,v 1.4 1994/02/17 01:21:57 jtc Exp $";
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Tree walks (``for (iggy in foo)'') and array deletions use expensive
|
||||
* linear searching. So what we do is start out with small arrays and
|
||||
* grow them as needed, so that our arrays are hopefully small enough,
|
||||
* most of the time, that they're pretty full and we're not looking at
|
||||
* wasted space.
|
||||
*
|
||||
* The decision is made to grow the array if the average chain length is
|
||||
* ``too big''. This is defined as the total number of entries in the table
|
||||
* divided by the size of the array being greater than some constant.
|
||||
*/
|
||||
|
||||
#define AVG_CHAIN_MAX 10 /* don't want to linear search more than this */
|
||||
|
||||
#include "awk.h"
|
||||
|
||||
static NODE *assoc_find P((NODE *symbol, NODE *subs, int hash1));
|
||||
static void grow_table P((NODE *symbol));
|
||||
|
||||
NODE *
|
||||
concat_exp(tree)
|
||||
|
@ -88,7 +103,7 @@ NODE *symbol;
|
|||
|
||||
if (symbol->var_array == 0)
|
||||
return;
|
||||
for (i = 0; i < HASHSIZE; i++) {
|
||||
for (i = 0; i < symbol->array_size; i++) {
|
||||
for (bucket = symbol->var_array[i]; bucket; bucket = next) {
|
||||
next = bucket->ahnext;
|
||||
unref(bucket->ahname);
|
||||
|
@ -97,17 +112,25 @@ NODE *symbol;
|
|||
}
|
||||
symbol->var_array[i] = 0;
|
||||
}
|
||||
free(symbol->var_array);
|
||||
symbol->var_array = NULL;
|
||||
symbol->array_size = symbol->table_size = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* calculate the hash function of the string in subs
|
||||
*/
|
||||
unsigned int
|
||||
hash(s, len)
|
||||
register char *s;
|
||||
hash(s, len, hsize)
|
||||
register const char *s;
|
||||
register size_t len;
|
||||
unsigned long hsize;
|
||||
{
|
||||
register unsigned long h = 0, g;
|
||||
register unsigned long h = 0;
|
||||
|
||||
#ifdef this_is_really_slow
|
||||
|
||||
register unsigned long g;
|
||||
|
||||
while (len--) {
|
||||
h = (h << 4) + *s++;
|
||||
|
@ -117,10 +140,84 @@ register size_t len;
|
|||
h = h ^ g;
|
||||
}
|
||||
}
|
||||
if (h < HASHSIZE)
|
||||
return h;
|
||||
else
|
||||
return h%HASHSIZE;
|
||||
|
||||
#else /* this_is_really_slow */
|
||||
/*
|
||||
* This is INCREDIBLY ugly, but fast. We break the string up into 8 byte
|
||||
* units. On the first time through the loop we get the "leftover bytes"
|
||||
* (strlen % 8). On every other iteration, we perform 8 HASHC's so we handle
|
||||
* all 8 bytes. Essentially, this saves us 7 cmp & branch instructions. If
|
||||
* this routine is heavily used enough, it's worth the ugly coding.
|
||||
*
|
||||
* OZ's original sdbm hash, copied from Margo Seltzers db package.
|
||||
*
|
||||
*/
|
||||
|
||||
/* Even more speed: */
|
||||
/* #define HASHC h = *s++ + 65599 * h */
|
||||
/* Because 65599 = pow(2,6) + pow(2,16) - 1 we multiply by shifts */
|
||||
#define HASHC htmp = (h << 6); \
|
||||
h = *s++ + htmp + (htmp << 10) - h
|
||||
|
||||
unsigned long htmp;
|
||||
|
||||
h = 0;
|
||||
|
||||
#if defined(VAXC)
|
||||
/*
|
||||
* [This was an implementation of "Duff's Device", but it has been
|
||||
* redone, separating the switch for extra iterations from the loop.
|
||||
* This is necessary because the DEC VAX-C compiler is STOOPID.]
|
||||
*/
|
||||
switch (len & (8 - 1)) {
|
||||
case 7: HASHC;
|
||||
case 6: HASHC;
|
||||
case 5: HASHC;
|
||||
case 4: HASHC;
|
||||
case 3: HASHC;
|
||||
case 2: HASHC;
|
||||
case 1: HASHC;
|
||||
default: break;
|
||||
}
|
||||
|
||||
if (len > (8 - 1)) {
|
||||
register size_t loop = len >> 3;
|
||||
do {
|
||||
HASHC;
|
||||
HASHC;
|
||||
HASHC;
|
||||
HASHC;
|
||||
HASHC;
|
||||
HASHC;
|
||||
HASHC;
|
||||
HASHC;
|
||||
} while (--loop);
|
||||
}
|
||||
#else /* !VAXC */
|
||||
/* "Duff's Device" for those who can handle it */
|
||||
if (len > 0) {
|
||||
register size_t loop = (len + 8 - 1) >> 3;
|
||||
|
||||
switch (len & (8 - 1)) {
|
||||
case 0:
|
||||
do { /* All fall throughs */
|
||||
HASHC;
|
||||
case 7: HASHC;
|
||||
case 6: HASHC;
|
||||
case 5: HASHC;
|
||||
case 4: HASHC;
|
||||
case 3: HASHC;
|
||||
case 2: HASHC;
|
||||
case 1: HASHC;
|
||||
} while (--loop);
|
||||
}
|
||||
}
|
||||
#endif /* !VAXC */
|
||||
#endif /* this_is_really_slow - not */
|
||||
|
||||
if (h >= hsize)
|
||||
h %= hsize;
|
||||
return h;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -162,7 +259,7 @@ NODE *symbol, *subs;
|
|||
if (symbol->var_array == 0)
|
||||
return 0;
|
||||
subs = concat_exp(subs); /* concat_exp returns a string node */
|
||||
hash1 = hash(subs->stptr, subs->stlen);
|
||||
hash1 = hash(subs->stptr, subs->stlen, (unsigned long) symbol->array_size);
|
||||
if (assoc_find(symbol, subs, hash1) == NULL) {
|
||||
free_temp(subs);
|
||||
return 0;
|
||||
|
@ -187,17 +284,16 @@ NODE *symbol, *subs;
|
|||
register NODE *bucket;
|
||||
|
||||
(void) force_string(subs);
|
||||
hash1 = hash(subs->stptr, subs->stlen);
|
||||
|
||||
if (symbol->var_array == 0) { /* this table really should grow
|
||||
* dynamically */
|
||||
size_t size;
|
||||
|
||||
size = sizeof(NODE *) * HASHSIZE;
|
||||
emalloc(symbol->var_array, NODE **, size, "assoc_lookup");
|
||||
memset((char *)symbol->var_array, 0, size);
|
||||
if (symbol->var_array == 0) {
|
||||
symbol->type = Node_var_array;
|
||||
symbol->array_size = symbol->table_size = 0; /* sanity */
|
||||
grow_table(symbol);
|
||||
hash1 = hash(subs->stptr, subs->stlen,
|
||||
(unsigned long) symbol->array_size);
|
||||
} else {
|
||||
hash1 = hash(subs->stptr, subs->stlen,
|
||||
(unsigned long) symbol->array_size);
|
||||
bucket = assoc_find(symbol, subs, hash1);
|
||||
if (bucket != NULL) {
|
||||
free_temp(subs);
|
||||
|
@ -209,6 +305,17 @@ NODE *symbol, *subs;
|
|||
if (do_lint && subs->stlen == 0)
|
||||
warning("subscript of array `%s' is null string",
|
||||
symbol->vname);
|
||||
|
||||
/* first see if we would need to grow the array, before installing */
|
||||
symbol->table_size++;
|
||||
if ((symbol->flags & ARRAYMAXED) == 0
|
||||
&& symbol->table_size/symbol->array_size > AVG_CHAIN_MAX) {
|
||||
grow_table(symbol);
|
||||
/* have to recompute hash value for new size */
|
||||
hash1 = hash(subs->stptr, subs->stlen,
|
||||
(unsigned long) symbol->array_size);
|
||||
}
|
||||
|
||||
getnode(bucket);
|
||||
bucket->type = Node_ahash;
|
||||
if (subs->flags & TEMP)
|
||||
|
@ -244,7 +351,7 @@ NODE *symbol, *tree;
|
|||
if (symbol->var_array == 0)
|
||||
return;
|
||||
subs = concat_exp(tree); /* concat_exp returns string node */
|
||||
hash1 = hash(subs->stptr, subs->stlen);
|
||||
hash1 = hash(subs->stptr, subs->stlen, (unsigned long) symbol->array_size);
|
||||
|
||||
last = NULL;
|
||||
for (bucket = symbol->var_array[hash1]; bucket; last = bucket, bucket = bucket->ahnext)
|
||||
|
@ -260,6 +367,14 @@ NODE *symbol, *tree;
|
|||
unref(bucket->ahname);
|
||||
unref(bucket->ahvalue);
|
||||
freenode(bucket);
|
||||
symbol->table_size--;
|
||||
if (symbol->table_size <= 0) {
|
||||
memset(symbol->var_array, '\0',
|
||||
sizeof(NODE *) * symbol->array_size);
|
||||
symbol->table_size = symbol->array_size = 0;
|
||||
free(symbol->var_array);
|
||||
symbol->var_array = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -267,12 +382,12 @@ assoc_scan(symbol, lookat)
|
|||
NODE *symbol;
|
||||
struct search *lookat;
|
||||
{
|
||||
if (!symbol->var_array) {
|
||||
if (symbol->var_array == NULL) {
|
||||
lookat->retval = NULL;
|
||||
return;
|
||||
}
|
||||
lookat->arr_ptr = symbol->var_array;
|
||||
lookat->arr_end = lookat->arr_ptr + HASHSIZE; /* added */
|
||||
lookat->arr_end = lookat->arr_ptr + symbol->array_size;
|
||||
lookat->bucket = symbol->var_array[0];
|
||||
assoc_next(lookat);
|
||||
}
|
||||
|
@ -295,3 +410,77 @@ struct search *lookat;
|
|||
}
|
||||
return;
|
||||
}
|
||||
|
||||
/* grow_table --- grow a hash table */
|
||||
|
||||
static void
|
||||
grow_table(symbol)
|
||||
NODE *symbol;
|
||||
{
|
||||
NODE **old, **new, *chain, *next;
|
||||
int i, j;
|
||||
unsigned long hash1;
|
||||
unsigned long oldsize, newsize;
|
||||
/*
|
||||
* This is an array of primes. We grow the table by an order of
|
||||
* magnitude each time (not just doubling) so that growing is a
|
||||
* rare operation. We expect, on average, that it won't happen
|
||||
* more than twice. The final size is also chosen to be small
|
||||
* enough so that MS-DOG mallocs can handle it. When things are
|
||||
* very large (> 8K), we just double more or less, instead of
|
||||
* just jumping from 8K to 64K.
|
||||
*/
|
||||
static long sizes[] = { 13, 127, 1021, 8191, 16381, 32749, 65497 };
|
||||
|
||||
/* find next biggest hash size */
|
||||
oldsize = symbol->array_size;
|
||||
newsize = 0;
|
||||
for (i = 0, j = sizeof(sizes)/sizeof(sizes[0]); i < j; i++) {
|
||||
if (oldsize < sizes[i]) {
|
||||
newsize = sizes[i];
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (newsize == oldsize) { /* table already at max (!) */
|
||||
symbol->flags |= ARRAYMAXED;
|
||||
return;
|
||||
}
|
||||
|
||||
/* allocate new table */
|
||||
emalloc(new, NODE **, newsize * sizeof(NODE *), "grow_table");
|
||||
memset(new, '\0', newsize * sizeof(NODE *));
|
||||
|
||||
/* brand new hash table, set things up and return */
|
||||
if (symbol->var_array == NULL) {
|
||||
symbol->table_size = 0;
|
||||
goto done;
|
||||
}
|
||||
|
||||
/* old hash table there, move stuff to new, free old */
|
||||
old = symbol->var_array;
|
||||
for (i = 0; i < oldsize; i++) {
|
||||
if (old[i] == NULL)
|
||||
continue;
|
||||
|
||||
for (chain = old[i]; chain != NULL; chain = next) {
|
||||
next = chain->ahnext;
|
||||
hash1 = hash(chain->ahname->stptr,
|
||||
chain->ahname->stlen, newsize);
|
||||
|
||||
/* remove from old list, add to new */
|
||||
chain->ahnext = new[hash1];
|
||||
new[hash1] = chain;
|
||||
|
||||
}
|
||||
}
|
||||
free(old);
|
||||
|
||||
done:
|
||||
/*
|
||||
* note that symbol->table_size does not change if an old array,
|
||||
* and is explicitly set to 0 if a new one.
|
||||
*/
|
||||
symbol->var_array = new;
|
||||
symbol->array_size = newsize;
|
||||
}
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
.\" $Id: awk.1,v 1.3 1993/11/13 02:26:18 jtc Exp $ -*- nroff -*-
|
||||
.\" $Id: awk.1,v 1.4 1994/02/17 01:21:59 jtc Exp $ -*- nroff -*-
|
||||
.ds PX \s-1POSIX\s+1
|
||||
.ds UX \s-1UNIX\s+1
|
||||
.ds AN \s-1ANSI\s+1
|
||||
.TH GAWK 1 "Nov 4 1993" "Free Software Foundation" "Utility Commands"
|
||||
.TH GAWK 1 "Dec 24 1993" "Free Software Foundation" "Utility Commands"
|
||||
.SH NAME
|
||||
gawk \- pattern scanning and processing language
|
||||
.SH SYNOPSIS
|
||||
|
@ -72,6 +72,11 @@ option.
|
|||
Each
|
||||
.B \-W
|
||||
option has a corresponding GNU style long option, as detailed below.
|
||||
Arguments to GNU style long options are either joined with the option
|
||||
by an
|
||||
.B =
|
||||
sign, with no intervening spaces, or they may be provided in the
|
||||
next command line argument.
|
||||
.PP
|
||||
.I Gawk
|
||||
accepts the following options.
|
||||
|
@ -115,6 +120,26 @@ Multiple
|
|||
(or
|
||||
.BR \-\^\-file )
|
||||
options may be used.
|
||||
.TP
|
||||
.PD 0
|
||||
.BI \-mf= NNN
|
||||
.TP
|
||||
.BI \-mr= NNN
|
||||
Set various memory limits to the value
|
||||
.IR NNN .
|
||||
The
|
||||
.B f
|
||||
flag sets the maximum number of fields, and the
|
||||
.B r
|
||||
flag sets the maximum record size. These two flags and the
|
||||
.B \-m
|
||||
option are from the AT&T Bell Labs research version of \*(UX
|
||||
.IR awk .
|
||||
They are ignored by
|
||||
.IR gawk ,
|
||||
since
|
||||
.I gawk
|
||||
has no pre-defined limits.
|
||||
.TP \w'\fB\-\^\-copyright\fR'u+1n
|
||||
.PD 0
|
||||
.B "\-W compat"
|
||||
|
@ -159,6 +184,8 @@ the error output.
|
|||
.B \-\^\-usage
|
||||
Print a relatively short summary of the available options on
|
||||
the error output.
|
||||
Per the GNU Coding Standards, these options cause an immediate,
|
||||
successful exit.
|
||||
.TP
|
||||
.PD 0
|
||||
.B "\-W lint"
|
||||
|
@ -249,6 +276,8 @@ This is useful mainly for knowing if the current copy of
|
|||
on your system
|
||||
is up to date with respect to whatever the Free Software Foundation
|
||||
is distributing.
|
||||
Per the GNU Coding Standards, these options cause an immediate,
|
||||
successful exit.
|
||||
.TP
|
||||
.B \-\^\-
|
||||
Signal the end of options. This is useful to allow further arguments to the
|
||||
|
@ -256,7 +285,13 @@ AWK program itself to start with a ``\-''.
|
|||
This is mainly for consistency with the argument parsing convention used
|
||||
by most other \*(PX programs.
|
||||
.PP
|
||||
Any other options are flagged as illegal, but are otherwise ignored.
|
||||
In compatibility mode,
|
||||
any other options are flagged as illegal, but are otherwise ignored.
|
||||
In normal operation, as long as program text has been supplied, unknown
|
||||
options are passed on to the AWK program in the
|
||||
.B ARGV
|
||||
array for processing. This is particularly useful for running AWK
|
||||
programs via the ``#!'' executable interpreter mechanism.
|
||||
.SH AWK PROGRAM EXECUTION
|
||||
.PP
|
||||
An AWK program consists of a sequence of pattern-action statements
|
||||
|
@ -271,23 +306,23 @@ and optional function definitions.
|
|||
.I Gawk
|
||||
first reads the program source from the
|
||||
.IR program-file (s)
|
||||
if specified, or from the first non-option argument on the command line.
|
||||
if specified,
|
||||
from arguments to
|
||||
.BR "\-W source=" ,
|
||||
or from the first non-option argument on the command line.
|
||||
The
|
||||
.B \-f
|
||||
option may be used multiple times on the command line.
|
||||
and
|
||||
.B "\-W source="
|
||||
options may be used multiple times on the command line.
|
||||
.I Gawk
|
||||
will read the program text as if all the
|
||||
.IR program-file s
|
||||
and command line source texts
|
||||
had been concatenated together. This is useful for building libraries
|
||||
of AWK functions, without having to include them in each new AWK
|
||||
program that uses them. To use a library function in a file from a
|
||||
program typed in on the command line, specify
|
||||
.B /dev/tty
|
||||
as one of the
|
||||
.IR program-file s,
|
||||
type your program, and end it with a
|
||||
.B ^D
|
||||
(control-d).
|
||||
program that uses them. It also provides the ability to mix library
|
||||
functions with command line programs.
|
||||
.PP
|
||||
The environment variable
|
||||
.B AWKPATH
|
||||
|
@ -303,11 +338,13 @@ option contains a ``/'' character, no path search is performed.
|
|||
.I Gawk
|
||||
executes AWK programs in the following order.
|
||||
First,
|
||||
all variable assignments specified via the
|
||||
.B \-v
|
||||
option are performed.
|
||||
Next,
|
||||
.I gawk
|
||||
compiles the program into an internal form.
|
||||
Next, all variable assignments specified via the
|
||||
.B \-v
|
||||
option are performed. Then,
|
||||
Then,
|
||||
.I gawk
|
||||
executes the code in the
|
||||
.B BEGIN
|
||||
|
@ -360,8 +397,8 @@ block(s) (if any).
|
|||
AWK variables are dynamic; they come into existence when they are
|
||||
first used. Their values are either floating-point numbers or strings,
|
||||
or both,
|
||||
depending upon how they are used. AWK also has one dimension
|
||||
arrays; multiply dimensioned arrays may be simulated.
|
||||
depending upon how they are used. AWK also has one dimensional
|
||||
arrays; arrays with multiple dimensions may be simulated.
|
||||
Several pre-defined variables are set as a program
|
||||
runs; these will be described as needed and summarized below.
|
||||
.SS Fields
|
||||
|
@ -436,6 +473,7 @@ cause the value of
|
|||
.B $0
|
||||
to be recomputed, with the fields being separated by the value of
|
||||
.BR OFS .
|
||||
References to negative numbered fields cause a fatal error.
|
||||
.SS Built-in Variables
|
||||
.PP
|
||||
AWK's built-in variables are:
|
||||
|
@ -483,7 +521,7 @@ If a system error occurs either doing a redirection for
|
|||
during a read for
|
||||
.BR getline ,
|
||||
or during a
|
||||
.BR close ,
|
||||
.BR close() ,
|
||||
then
|
||||
.B ERRNO
|
||||
will contain
|
||||
|
@ -650,6 +688,9 @@ loop to iterate over all the elements of an array.
|
|||
An element may be deleted from an array using the
|
||||
.B delete
|
||||
statement.
|
||||
The
|
||||
.B delete
|
||||
statement may also be used to delete the entire contents of an array.
|
||||
.SS Variable Typing And Conversion
|
||||
.PP
|
||||
Variables and fields
|
||||
|
@ -686,7 +727,7 @@ b = a ""
|
|||
.PP
|
||||
the variable
|
||||
.B b
|
||||
has a value of \fB"12"\fR and not \fB"12.00"\fR.
|
||||
has a string value of \fB"12"\fR and not \fB"12.00"\fR.
|
||||
.PP
|
||||
.I Gawk
|
||||
performs comparisons as follows:
|
||||
|
@ -815,7 +856,8 @@ the third. Only one of the second and third patterns is evaluated.
|
|||
.PP
|
||||
The
|
||||
.IB pattern1 ", " pattern2
|
||||
form of an expression is called a range pattern.
|
||||
form of an expression is called a
|
||||
.IR "range pattern" .
|
||||
It matches all input records starting with a line that matches
|
||||
.IR pattern1 ,
|
||||
and continuing until a record that matches
|
||||
|
@ -988,6 +1030,7 @@ as follows:
|
|||
\fBbreak\fR
|
||||
\fBcontinue\fR
|
||||
\fBdelete \fIarray\^\fB[\^\fIindex\^\fB]\fR
|
||||
\fBdelete \fIarray\^\fR
|
||||
\fBexit\fR [ \fIexpression\fR ]
|
||||
\fB{ \fIstatements \fB}
|
||||
.fi
|
||||
|
@ -1052,10 +1095,20 @@ Prints the current record.
|
|||
.TP
|
||||
.BI print " expr-list"
|
||||
Prints expressions.
|
||||
Each expression is separated by the value of the
|
||||
.B OFS
|
||||
variable. The output record is terminated with the value of the
|
||||
.B ORS
|
||||
variable.
|
||||
.TP
|
||||
.BI print " expr-list" " >" file
|
||||
Prints expressions on
|
||||
.IR file .
|
||||
Each expression is separated by the value of the
|
||||
.B OFS
|
||||
variable. The output record is terminated with the value of the
|
||||
.B ORS
|
||||
variable.
|
||||
.TP
|
||||
.BI printf " fmt, expr-list"
|
||||
Format and print.
|
||||
|
@ -1084,8 +1137,9 @@ In a similar fashion,
|
|||
.IB command " | getline"
|
||||
pipes into
|
||||
.BR getline .
|
||||
.BR Getline
|
||||
will return 0 on end of file, and \-1 on an error.
|
||||
The
|
||||
.BR getline
|
||||
command will return 0 on end of file, and \-1 on an error.
|
||||
.SS The \fIprintf\fP\^ Statement
|
||||
.PP
|
||||
The AWK versions of the
|
||||
|
@ -1159,6 +1213,7 @@ The expression should be left-justified within its field.
|
|||
The field should be padded to this width. If the number has a leading
|
||||
zero, then the field will be padded with zeros.
|
||||
Otherwise it is padded with blanks.
|
||||
This applies even to the non-numeric output formats.
|
||||
.TP
|
||||
.BI . prec
|
||||
A number indicating the maximum width of strings or digits to the right
|
||||
|
@ -1235,7 +1290,7 @@ is the value of the
|
|||
system call.
|
||||
If there are any additional fields, they are the group IDs returned by
|
||||
.IR getgroups (2).
|
||||
(Multiple groups may not be supported on all systems.)
|
||||
Multiple groups may not be supported on all systems.
|
||||
.TP
|
||||
.B /dev/stdin
|
||||
The standard input.
|
||||
|
@ -1366,6 +1421,9 @@ and returns the number of fields. If
|
|||
is omitted,
|
||||
.B FS
|
||||
is used instead.
|
||||
The array
|
||||
.I a
|
||||
is cleared first.
|
||||
.TP
|
||||
.BI sprintf( fmt , " expr-list" )
|
||||
prints
|
||||
|
@ -1483,11 +1541,11 @@ the
|
|||
As in \*(AN C, all following hexadecimal digits are considered part of
|
||||
the escape sequence.
|
||||
(This feature should tell us something about language design by committee.)
|
||||
E.g., "\ex1B" is the \s-1ASCII\s+1 \s-1ESC\s+1 (escape) character.
|
||||
E.g., \fB"\ex1B"\fR is the \s-1ASCII\s+1 \s-1ESC\s+1 (escape) character.
|
||||
.TP
|
||||
.BI \e ddd
|
||||
The character represented by the 1-, 2-, or 3-digit sequence of octal
|
||||
digits. E.g. "\e033" is the \s-1ASCII\s+1 \s-1ESC\s+1 (escape) character.
|
||||
digits. E.g. \fB"\e033"\fR is the \s-1ASCII\s+1 \s-1ESC\s+1 (escape) character.
|
||||
.TP
|
||||
.BI \e c
|
||||
The literal character
|
||||
|
@ -1568,7 +1626,15 @@ Concatenate and line number (a variation on a theme):
|
|||
.ft R
|
||||
.fi
|
||||
.SH SEE ALSO
|
||||
.IR egrep (1)
|
||||
.IR egrep (1),
|
||||
.IR getpid (2),
|
||||
.IR getppid (2),
|
||||
.IR getpgrp (2),
|
||||
.IR getuid (2),
|
||||
.IR geteuid (2),
|
||||
.IR getgid (2),
|
||||
.IR getegid (2),
|
||||
.IR getgroups (2)
|
||||
.PP
|
||||
.IR "The AWK Programming Language" ,
|
||||
Alfred V. Aho, Brian W. Kernighan, Peter J. Weinberger,
|
||||
|
@ -1606,7 +1672,7 @@ block was run. Applications came to depend on this ``feature.''
|
|||
When
|
||||
.I awk
|
||||
was changed to match its documentation, this option was added to
|
||||
accomodate applications that depended upon the old behavior.
|
||||
accommodate applications that depended upon the old behavior.
|
||||
(This feature was agreed upon by both the AT&T and GNU developers.)
|
||||
.PP
|
||||
The
|
||||
|
@ -1616,7 +1682,11 @@ option for implementation specific features is from the \*(PX standard.
|
|||
When processing arguments,
|
||||
.I gawk
|
||||
uses the special option ``\fB\-\^\-\fP'' to signal the end of
|
||||
arguments, and warns about, but otherwise ignores, undefined options.
|
||||
arguments.
|
||||
In compatibility mode, it will warn about, but otherwise ignore,
|
||||
undefined options.
|
||||
In normal operation, such arguments are passed on to the AWK program for
|
||||
it to process.
|
||||
.PP
|
||||
The AWK book does not define the return value of
|
||||
.BR srand() .
|
||||
|
@ -1712,6 +1782,11 @@ environment variable is not special.
|
|||
The use of
|
||||
.B "next file"
|
||||
to abandon processing of the current input file.
|
||||
.TP
|
||||
\(bu
|
||||
The use of
|
||||
.BI delete " array"
|
||||
to delete the entire contents of an array.
|
||||
.RE
|
||||
.PP
|
||||
The AWK book does not define the return value of the
|
||||
|
@ -1739,7 +1814,7 @@ option is ``t'', then
|
|||
will be set to the tab character.
|
||||
Since this is a rather ugly special case, it is not the default behavior.
|
||||
This behavior also does not occur if
|
||||
.B \-Wposix
|
||||
.B "\-W posix"
|
||||
has been specified.
|
||||
.ig
|
||||
.PP
|
||||
|
@ -1791,7 +1866,7 @@ a = length($0)
|
|||
This feature is marked as ``deprecated'' in the \*(PX standard, and
|
||||
.I gawk
|
||||
will issue a warning about its use if
|
||||
.B \-Wlint
|
||||
.B "\-W lint"
|
||||
is specified on the command line.
|
||||
.PP
|
||||
The other feature is the use of the
|
||||
|
@ -1807,7 +1882,7 @@ equivalent to the
|
|||
statement.
|
||||
.I Gawk
|
||||
will support this usage if
|
||||
.B \-Wposix
|
||||
.B "\-W posix"
|
||||
has not been specified.
|
||||
.SH BUGS
|
||||
The
|
||||
|
@ -1850,6 +1925,7 @@ the
|
|||
and
|
||||
.B \-e
|
||||
options of the 2.11 version are no longer recognized.
|
||||
This fact will not even be documented in the manual page for version 2.16.
|
||||
.SH AUTHORS
|
||||
The original version of \*(UX
|
||||
.I awk
|
||||
|
@ -1873,6 +1949,8 @@ compatible with the new version of \*(UX
|
|||
The initial DOS port was done by Conrad Kwok and Scott Garfinkle.
|
||||
Scott Deifik is the current DOS maintainer. Pat Rankin did the
|
||||
port to VMS, and Michal Jaegermann did the port to the Atari ST.
|
||||
The port to OS/2 was done by Kai Uwe Rommel, with contributions and
|
||||
help from Darrel Hankerson.
|
||||
.SH ACKNOWLEDGEMENTS
|
||||
Brian Kernighan of Bell Labs
|
||||
provided valuable assistance during testing and debugging.
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
*/
|
||||
|
||||
/*
|
||||
* Copyright (C) 1986, 1988, 1989, 1991, 1992 the Free Software Foundation, Inc.
|
||||
* Copyright (C) 1986, 1988, 1989, 1991, 1992, 1993 the Free Software Foundation, Inc.
|
||||
*
|
||||
* This file is part of GAWK, the GNU implementation of the
|
||||
* AWK Progamming Language.
|
||||
|
@ -22,7 +22,7 @@
|
|||
* along with GAWK; see the file COPYING. If not, write to
|
||||
* the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
*
|
||||
* $Id: awk.h,v 1.3 1993/11/13 02:26:21 jtc Exp $
|
||||
* $Id: awk.h,v 1.4 1994/02/17 01:22:01 jtc Exp $
|
||||
*/
|
||||
|
||||
/* ------------------------------ Includes ------------------------------ */
|
||||
|
@ -174,7 +174,7 @@ extern int getpgrp P((void));
|
|||
typedef struct Regexp {
|
||||
struct re_pattern_buffer pat;
|
||||
struct re_registers regs;
|
||||
struct regexp dfareg;
|
||||
struct dfa dfareg;
|
||||
int dfa;
|
||||
} Regexp;
|
||||
#define RESTART(rp,s) (rp)->regs.start[0]
|
||||
|
@ -198,6 +198,22 @@ extern int _text_read (int, char *, int);
|
|||
#define ENVSEP ':'
|
||||
#endif
|
||||
|
||||
#define DEFAULT_G_PRECISION 6
|
||||
|
||||
/* semi-temporary hack, mostly to gracefully handle VMS */
|
||||
#ifdef GFMT_WORKAROUND
|
||||
extern void sgfmt P((char *, const char *, int, int, int, double)); /* builtin.c */
|
||||
|
||||
/* Partial fix, to handle the most common case. */
|
||||
#define NUMTOSTR(str, format, num) \
|
||||
if (strcmp((format), "%.6g") == 0 || strcmp((format), "%g") == 0) \
|
||||
sgfmt(str, "%*.*g", 0, 1, DEFAULT_G_PRECISION, num); \
|
||||
else \
|
||||
(void) sprintf(str, format, num) /* NOTE: no semi-colon! */
|
||||
#else
|
||||
#define NUMTOSTR(str, format, num) (void) sprintf(str, format, num)
|
||||
#endif /* GFMT_WORKAROUND */
|
||||
|
||||
/* ------------------ Constants, Structures, Typedefs ------------------ */
|
||||
#define AWKNUM double
|
||||
|
||||
|
@ -335,6 +351,7 @@ typedef struct exp_node {
|
|||
union {
|
||||
struct exp_node *lptr;
|
||||
char *param_name;
|
||||
long ll;
|
||||
} l;
|
||||
union {
|
||||
struct exp_node *rptr;
|
||||
|
@ -347,6 +364,7 @@ typedef struct exp_node {
|
|||
union {
|
||||
char *name;
|
||||
struct exp_node *extra;
|
||||
long xl;
|
||||
} x;
|
||||
short number;
|
||||
unsigned char reflags;
|
||||
|
@ -392,8 +410,8 @@ typedef struct exp_node {
|
|||
# define NUM 32 /* numeric value is current */
|
||||
# define NUMBER 64 /* assigned as number */
|
||||
# define MAYBE_NUM 128 /* user input: if NUMERIC then
|
||||
* a NUMBER
|
||||
*/
|
||||
* a NUMBER */
|
||||
# define ARRAYMAXED 256 /* array is at max size */
|
||||
char *vname; /* variable's name */
|
||||
} NODE;
|
||||
|
||||
|
@ -426,6 +444,8 @@ typedef struct exp_node {
|
|||
|
||||
#define var_value lnode
|
||||
#define var_array sub.nodep.r.av
|
||||
#define array_size sub.nodep.l.ll
|
||||
#define table_size sub.nodep.x.xl
|
||||
|
||||
#define condpair lnode
|
||||
#define triggered sub.nodep.r.r_ent
|
||||
|
@ -433,8 +453,6 @@ typedef struct exp_node {
|
|||
#ifdef DONTDEF
|
||||
int primes[] = {31, 61, 127, 257, 509, 1021, 2053, 4099, 8191, 16381};
|
||||
#endif
|
||||
/* a quick profile suggests that the following is a good value */
|
||||
#define HASHSIZE 1021
|
||||
|
||||
typedef struct for_loop_header {
|
||||
NODE *init;
|
||||
|
@ -628,7 +646,7 @@ extern double _msc51bug;
|
|||
/* array.c */
|
||||
extern NODE *concat_exp P((NODE *tree));
|
||||
extern void assoc_clear P((NODE *symbol));
|
||||
extern unsigned int hash P((char *s, size_t len));
|
||||
extern unsigned int hash P((const char *s, size_t len, unsigned long hsize));
|
||||
extern int in_array P((NODE *symbol, NODE *subs));
|
||||
extern NODE **assoc_lookup P((NODE *symbol, NODE *subs));
|
||||
extern void do_delete P((NODE *symbol, NODE *tree));
|
||||
|
@ -639,7 +657,7 @@ extern char *tokexpand P((void));
|
|||
extern char nextc P((void));
|
||||
extern NODE *node P((NODE *left, NODETYPE op, NODE *right));
|
||||
extern NODE *install P((char *name, NODE *value));
|
||||
extern NODE *lookup P((char *name));
|
||||
extern NODE *lookup P((const char *name));
|
||||
extern NODE *variable P((char *name, int can_free));
|
||||
extern int yyparse P((void));
|
||||
/* builtin.c */
|
||||
|
@ -695,8 +713,8 @@ extern struct redirect *redirect P((NODE *tree, int *errflg));
|
|||
extern NODE *do_close P((NODE *tree));
|
||||
extern int flush_io P((void));
|
||||
extern int close_io P((void));
|
||||
extern int devopen P((char *name, char *mode));
|
||||
extern int pathopen P((char *file));
|
||||
extern int devopen P((const char *name, const char *mode));
|
||||
extern int pathopen P((const char *file));
|
||||
extern NODE *do_getline P((NODE *tree));
|
||||
extern void do_nextfile P((void));
|
||||
/* iop.c */
|
||||
|
@ -710,7 +728,7 @@ extern void load_environ P((void));
|
|||
extern char *arg_assign P((char *arg));
|
||||
extern SIGTYPE catchsig P((int sig, int code));
|
||||
/* msg.c */
|
||||
extern void err P((char *s, char *emsg, va_list argp));
|
||||
extern void err P((const char *s, const char *emsg, va_list argp));
|
||||
#if _MSC_VER == 510
|
||||
extern void msg P((va_list va_alist, ...));
|
||||
extern void warning P((va_list va_alist, ...));
|
||||
|
@ -734,8 +752,9 @@ extern void freenode P((NODE *it));
|
|||
extern void unref P((NODE *tmp));
|
||||
extern int parse_escape P((char **string_ptr));
|
||||
/* re.c */
|
||||
extern Regexp *make_regexp P((char *s, int len, int ignorecase, int dfa));
|
||||
extern int research P((Regexp *rp, char *str, int start, int len, int need_start));
|
||||
extern Regexp *make_regexp P((char *s, size_t len, int ignorecase, int dfa));
|
||||
extern int research P((Regexp *rp, char *str, int start,
|
||||
size_t len, int need_start));
|
||||
extern void refree P((Regexp *rp));
|
||||
extern void reg_error P((const char *s));
|
||||
extern Regexp *re_update P((NODE *t));
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
*/
|
||||
|
||||
/*
|
||||
* Copyright (C) 1986, 1988, 1989, 1991, 1992 the Free Software Foundation, Inc.
|
||||
* Copyright (C) 1986, 1988, 1989, 1991, 1992, 1993 the Free Software Foundation, Inc.
|
||||
*
|
||||
* This file is part of GAWK, the GNU implementation of the
|
||||
* AWK Progamming Language.
|
||||
|
@ -21,6 +21,8 @@
|
|||
* You should have received a copy of the GNU General Public License
|
||||
* along with GAWK; see the file COPYING. If not, write to
|
||||
* the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
*
|
||||
* $Id: awk.y,v 1.3 1994/02/17 01:22:02 jtc Exp $
|
||||
*/
|
||||
|
||||
%{
|
||||
|
@ -56,9 +58,10 @@ static char *thisline = NULL;
|
|||
#define YYDEBUG_LEXER_TEXT (lexeme)
|
||||
static int param_counter;
|
||||
static char *tokstart = NULL;
|
||||
static char *token = NULL;
|
||||
static char *tok = NULL;
|
||||
static char *tokend;
|
||||
|
||||
#define HASHSIZE 1021 /* this constant only used here */
|
||||
NODE *variables[HASHSIZE];
|
||||
|
||||
extern char *source;
|
||||
|
@ -291,7 +294,7 @@ regexp
|
|||
REGEXP '/'
|
||||
{
|
||||
NODE *n;
|
||||
int len;
|
||||
size_t len;
|
||||
|
||||
getnode(n);
|
||||
n->type = Node_regex;
|
||||
|
@ -386,10 +389,19 @@ statement
|
|||
if ($2 && $2 == lookup("file")) {
|
||||
if (do_lint)
|
||||
warning("`next file' is a gawk extension");
|
||||
else if (do_unix || do_posix)
|
||||
yyerror("`next file' is a gawk extension");
|
||||
else if (! io_allowed)
|
||||
yyerror("`next file' used in BEGIN or END action");
|
||||
if (do_unix || do_posix) {
|
||||
/*
|
||||
* can't use yyerror, since may have overshot
|
||||
* the source line
|
||||
*/
|
||||
errcount++;
|
||||
msg("`next file' is a gawk extension");
|
||||
}
|
||||
if (! io_allowed) {
|
||||
/* same thing */
|
||||
errcount++;
|
||||
msg("`next file' used in BEGIN or END action");
|
||||
}
|
||||
type = Node_K_nextfile;
|
||||
} else {
|
||||
if (! io_allowed)
|
||||
|
@ -406,6 +418,20 @@ statement
|
|||
{ $$ = node ($3, Node_K_return, (NODE *)NULL); }
|
||||
| LEX_DELETE NAME '[' expression_list ']' statement_term
|
||||
{ $$ = node (variable($2,1), Node_K_delete, $4); }
|
||||
| LEX_DELETE NAME statement_term
|
||||
{
|
||||
if (do_lint)
|
||||
warning("`delete array' is a gawk extension");
|
||||
if (do_unix || do_posix) {
|
||||
/*
|
||||
* can't use yyerror, since may have overshot
|
||||
* the source line
|
||||
*/
|
||||
errcount++;
|
||||
msg("`delete array' is a gawk extension");
|
||||
}
|
||||
$$ = node (variable($2,1), Node_K_delete, (NODE *) NULL);
|
||||
}
|
||||
| exp statement_term
|
||||
{ $$ = $1; }
|
||||
;
|
||||
|
@ -746,7 +772,7 @@ comma : ',' opt_nls { yyerrok; }
|
|||
%%
|
||||
|
||||
struct token {
|
||||
char *operator; /* text to match */
|
||||
const char *operator; /* text to match */
|
||||
NODETYPE value; /* node type */
|
||||
int class; /* lexical class */
|
||||
unsigned flags; /* # of args. allowed and compatability */
|
||||
|
@ -820,10 +846,11 @@ yyerror(va_alist)
|
|||
va_dcl
|
||||
{
|
||||
va_list args;
|
||||
char *mesg = NULL;
|
||||
const char *mesg = NULL;
|
||||
register char *bp, *cp;
|
||||
char *scan;
|
||||
char buf[120];
|
||||
static char end_of_file_line[] = "(END OF FILE)";
|
||||
|
||||
errcount++;
|
||||
/* Find the current line in the input file */
|
||||
|
@ -845,8 +872,8 @@ va_dcl
|
|||
while (bp < lexend && *bp && *bp != '\n')
|
||||
bp++;
|
||||
} else {
|
||||
thisline = "(END OF FILE)";
|
||||
bp = thisline + 13;
|
||||
thisline = end_of_file_line;
|
||||
bp = thisline + strlen(thisline);
|
||||
}
|
||||
msg("%.*s", (int) (bp - thisline), thisline);
|
||||
bp = buf;
|
||||
|
@ -982,7 +1009,7 @@ get_src_buf()
|
|||
return buf;
|
||||
}
|
||||
|
||||
#define tokadd(x) (*token++ = (x), token == tokend ? tokexpand() : token)
|
||||
#define tokadd(x) (*tok++ = (x), tok == tokend ? tokexpand() : tok)
|
||||
|
||||
char *
|
||||
tokexpand()
|
||||
|
@ -990,15 +1017,15 @@ tokexpand()
|
|||
static int toksize = 60;
|
||||
int tokoffset;
|
||||
|
||||
tokoffset = token - tokstart;
|
||||
tokoffset = tok - tokstart;
|
||||
toksize *= 2;
|
||||
if (tokstart)
|
||||
erealloc(tokstart, char *, toksize, "tokexpand");
|
||||
else
|
||||
emalloc(tokstart, char *, toksize, "tokexpand");
|
||||
tokend = tokstart + toksize;
|
||||
token = tokstart + tokoffset;
|
||||
return token;
|
||||
tok = tokstart + tokoffset;
|
||||
return tok;
|
||||
}
|
||||
|
||||
#if DEBUG
|
||||
|
@ -1053,7 +1080,7 @@ yylex()
|
|||
int in_brack = 0;
|
||||
|
||||
want_regexp = 0;
|
||||
token = tokstart;
|
||||
tok = tokstart;
|
||||
while ((c = nextc()) != 0) {
|
||||
switch (c) {
|
||||
case '[':
|
||||
|
@ -1094,7 +1121,7 @@ retry:
|
|||
|
||||
lexeme = lexptr ? lexptr - 1 : lexptr;
|
||||
thisline = NULL;
|
||||
token = tokstart;
|
||||
tok = tokstart;
|
||||
yylval.nodetypeval = Node_illegal;
|
||||
|
||||
switch (c) {
|
||||
|
@ -1115,13 +1142,23 @@ retry:
|
|||
|
||||
case '\\':
|
||||
#ifdef RELAXED_CONTINUATION
|
||||
if (!do_unix) { /* strip trailing white-space and/or comment */
|
||||
while ((c = nextc()) == ' ' || c == '\t') continue;
|
||||
/*
|
||||
* This code puports to allow comments and/or whitespace
|
||||
* after the `\' at the end of a line used for continuation.
|
||||
* Use it at your own risk. We think it's a bad idea, which
|
||||
* is why it's not on by default.
|
||||
*/
|
||||
if (!do_unix) {
|
||||
/* strip trailing white-space and/or comment */
|
||||
while ((c = nextc()) == ' ' || c == '\t')
|
||||
continue;
|
||||
if (c == '#')
|
||||
while ((c = nextc()) != '\n') if (!c) break;
|
||||
while ((c = nextc()) != '\n')
|
||||
if (c == '\0')
|
||||
break;
|
||||
pushback();
|
||||
}
|
||||
#endif /*RELAXED_CONTINUATION*/
|
||||
#endif /* RELAXED_CONTINUATION */
|
||||
if (nextc() == '\n') {
|
||||
sourceline++;
|
||||
goto retry;
|
||||
|
@ -1307,7 +1344,7 @@ retry:
|
|||
tokadd(c);
|
||||
}
|
||||
yylval.nodeval = make_str_node(tokstart,
|
||||
token - tokstart, esc_seen ? SCAN : 0);
|
||||
tok - tokstart, esc_seen ? SCAN : 0);
|
||||
yylval.nodeval->flags |= PERM;
|
||||
return YSTRING;
|
||||
|
||||
|
@ -1443,14 +1480,14 @@ retry:
|
|||
yyerror("Invalid char '%c' in expression\n", c);
|
||||
|
||||
/* it's some type of name-type-thing. Find its length */
|
||||
token = tokstart;
|
||||
tok = tokstart;
|
||||
while (is_identchar(c)) {
|
||||
tokadd(c);
|
||||
c = nextc();
|
||||
}
|
||||
tokadd('\0');
|
||||
emalloc(tokkey, char *, token - tokstart, "yylex");
|
||||
memcpy(tokkey, tokstart, token - tokstart);
|
||||
emalloc(tokkey, char *, tok - tokstart, "yylex");
|
||||
memcpy(tokkey, tokstart, tok - tokstart);
|
||||
pushback();
|
||||
|
||||
/* See if it is a special token. */
|
||||
|
@ -1653,7 +1690,7 @@ NODE *value;
|
|||
register int bucket;
|
||||
|
||||
len = strlen(name);
|
||||
bucket = hash(name, len);
|
||||
bucket = hash(name, len, (unsigned long) HASHSIZE);
|
||||
getnode(hp);
|
||||
hp->type = Node_hashnode;
|
||||
hp->hnext = variables[bucket];
|
||||
|
@ -1668,13 +1705,13 @@ NODE *value;
|
|||
/* find the most recent hash node for name installed by install */
|
||||
NODE *
|
||||
lookup(name)
|
||||
char *name;
|
||||
const char *name;
|
||||
{
|
||||
register NODE *bucket;
|
||||
register size_t len;
|
||||
|
||||
len = strlen(name);
|
||||
bucket = variables[hash(name, len)];
|
||||
bucket = variables[hash(name, len, (unsigned long) HASHSIZE)];
|
||||
while (bucket) {
|
||||
if (bucket->hlength == len && STREQN(bucket->hname, name, len))
|
||||
return bucket->hvalue;
|
||||
|
@ -1738,7 +1775,7 @@ int freeit;
|
|||
|
||||
name = np->param;
|
||||
len = strlen(name);
|
||||
save = &(variables[hash(name, len)]);
|
||||
save = &(variables[hash(name, len, (unsigned long) HASHSIZE)]);
|
||||
for (bucket = *save; bucket; bucket = bucket->hnext) {
|
||||
if (len == bucket->hlength && STREQN(bucket->hname, name, len)) {
|
||||
*save = bucket->hnext;
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
*/
|
||||
|
||||
/*
|
||||
* Copyright (C) 1986, 1988, 1989, 1991, 1992 the Free Software Foundation, Inc.
|
||||
* Copyright (C) 1986, 1988, 1989, 1991, 1992, 1993 the Free Software Foundation, Inc.
|
||||
*
|
||||
* This file is part of GAWK, the GNU implementation of the
|
||||
* AWK Progamming Language.
|
||||
|
@ -24,12 +24,11 @@
|
|||
*/
|
||||
|
||||
#ifndef lint
|
||||
static char rcsid[] = "$Id: builtin.c,v 1.3 1993/11/13 02:26:27 jtc Exp $";
|
||||
#endif /* not lint */
|
||||
static char rcsid[] = "$Id: builtin.c,v 1.4 1994/02/17 01:22:04 jtc Exp $";
|
||||
#endif
|
||||
|
||||
#include "awk.h"
|
||||
|
||||
|
||||
#ifndef SRANDOM_PROTO
|
||||
extern void srandom P((int seed));
|
||||
#endif
|
||||
|
@ -44,10 +43,6 @@ extern int output_is_tty;
|
|||
|
||||
static NODE *sub_common P((NODE *tree, int global));
|
||||
|
||||
#ifdef GFMT_WORKAROUND
|
||||
char *gfmt P((double g, int prec, char *buf));
|
||||
#endif
|
||||
|
||||
#ifdef _CRAY
|
||||
/* Work around a problem in conversion of doubles to exact integers. */
|
||||
#include <float.h>
|
||||
|
@ -66,20 +61,18 @@ double (*Log)() = log;
|
|||
#define Ceil(n) ceil(n)
|
||||
#endif
|
||||
|
||||
#if __STDC__
|
||||
static void
|
||||
efwrite(void *ptr, size_t size, size_t count, FILE *fp,
|
||||
char *from, struct redirect *rp,int flush)
|
||||
#else
|
||||
|
||||
static void efwrite P((const void *ptr, size_t size, size_t count, FILE *fp,
|
||||
const char *from, struct redirect *rp,int flush));
|
||||
|
||||
static void
|
||||
efwrite(ptr, size, count, fp, from, rp, flush)
|
||||
void *ptr;
|
||||
const void *ptr;
|
||||
size_t size, count;
|
||||
FILE *fp;
|
||||
char *from;
|
||||
const char *from;
|
||||
struct redirect *rp;
|
||||
int flush;
|
||||
#endif
|
||||
{
|
||||
errno = 0;
|
||||
if (fwrite(ptr, size, count, fp) != count)
|
||||
|
@ -220,22 +213,41 @@ NODE *tree;
|
|||
return tmp_number((AWKNUM) d);
|
||||
}
|
||||
|
||||
/* %e and %f formats are not properly implemented. Someone should fix them */
|
||||
/* Actually, this whole thing should be reimplemented. */
|
||||
/*
|
||||
* do_sprintf does the sprintf function. It is one of the uglier parts of
|
||||
* gawk. Thanks to Michal Jaegerman for taming this beast and making it
|
||||
* compatible with ANSI C.
|
||||
*/
|
||||
|
||||
NODE *
|
||||
do_sprintf(tree)
|
||||
NODE *tree;
|
||||
{
|
||||
/* copy 'l' bytes from 's' to 'obufout' checking for space in the process */
|
||||
/* difference of pointers should be of ptrdiff_t type, but let us be kind */
|
||||
#define bchunk(s,l) if(l) {\
|
||||
while((l)>ofre) {\
|
||||
long olen = obufout - obuf;\
|
||||
erealloc(obuf, char *, osiz*2, "do_sprintf");\
|
||||
ofre+=osiz;\
|
||||
osiz*=2;\
|
||||
obufout = obuf + olen;\
|
||||
}\
|
||||
memcpy(obuf+olen,s,(size_t)(l));\
|
||||
olen+=(l);\
|
||||
memcpy(obufout,s,(size_t)(l));\
|
||||
obufout+=(l);\
|
||||
ofre-=(l);\
|
||||
}
|
||||
/* copy one byte from 's' to 'obufout' checking for space in the process */
|
||||
#define bchunk_one(s) {\
|
||||
if(ofre <= 0) {\
|
||||
long olen = obufout - obuf;\
|
||||
erealloc(obuf, char *, osiz*2, "do_sprintf");\
|
||||
ofre+=osiz;\
|
||||
osiz*=2;\
|
||||
obufout = obuf + olen;\
|
||||
}\
|
||||
*obufout++ = *s;\
|
||||
--ofre;\
|
||||
}
|
||||
|
||||
/* Is there space for something L big in the buffer? */
|
||||
|
@ -259,15 +271,16 @@ NODE *tree;
|
|||
|
||||
NODE *r;
|
||||
int toofew = 0;
|
||||
char *obuf;
|
||||
size_t osiz, ofre, olen;
|
||||
static char chbuf[] = "0123456789abcdef";
|
||||
static char sp[] = " ";
|
||||
char *obuf, *obufout;
|
||||
size_t osiz, ofre;
|
||||
char *chbuf;
|
||||
char *s0, *s1;
|
||||
int cs1;
|
||||
int n0;
|
||||
NODE *sfmt, *arg;
|
||||
register NODE *carg;
|
||||
long fw, prec, lj, alt, big;
|
||||
long fw, prec;
|
||||
int lj, alt, big;
|
||||
long *cur;
|
||||
long val;
|
||||
#ifdef sun386 /* Can't cast unsigned (int/long) from ptr->value */
|
||||
|
@ -281,16 +294,17 @@ NODE *tree;
|
|||
char *cp;
|
||||
char *fill;
|
||||
double tmpval;
|
||||
char *pr_str;
|
||||
int ucasehex = 0;
|
||||
char signchar = 0;
|
||||
size_t len;
|
||||
|
||||
static char sp[] = " ";
|
||||
static char zero_string[] = "0";
|
||||
static char lchbuf[] = "0123456789abcdefx";
|
||||
static char Uchbuf[] = "0123456789ABCDEFX";
|
||||
|
||||
emalloc(obuf, char *, 120, "do_sprintf");
|
||||
obufout = obuf;
|
||||
osiz = 120;
|
||||
ofre = osiz - 1;
|
||||
olen = 0;
|
||||
sfmt = tree_eval(tree->lnode);
|
||||
sfmt = force_string(sfmt);
|
||||
carg = tree->rnode;
|
||||
|
@ -311,17 +325,17 @@ NODE *tree;
|
|||
|
||||
retry:
|
||||
--n0;
|
||||
switch (*s1++) {
|
||||
switch (cs1 = *s1++) {
|
||||
case '%':
|
||||
bchunk("%", 1);
|
||||
bchunk_one("%");
|
||||
s0 = s1;
|
||||
break;
|
||||
|
||||
case '0':
|
||||
if (fill != sp || lj)
|
||||
goto lose;
|
||||
if (lj)
|
||||
goto retry;
|
||||
if (cur == &fw)
|
||||
fill = "0"; /* FALL through */
|
||||
fill = zero_string; /* FALL through */
|
||||
case '1':
|
||||
case '2':
|
||||
case '3':
|
||||
|
@ -332,42 +346,58 @@ retry:
|
|||
case '8':
|
||||
case '9':
|
||||
if (cur == 0)
|
||||
goto lose;
|
||||
*cur = s1[-1] - '0';
|
||||
/* goto lose; */
|
||||
break;
|
||||
if (prec >= 0) /* this happens only when we have */
|
||||
/* a negative precision */
|
||||
*cur = cs1 - '0';
|
||||
while (n0 > 0 && *s1 >= '0' && *s1 <= '9') {
|
||||
--n0;
|
||||
*cur = *cur * 10 + *s1++ - '0';
|
||||
}
|
||||
if (prec < 0) { /* negative precision is discarded */
|
||||
prec = 0;
|
||||
cur = 0;
|
||||
}
|
||||
goto retry;
|
||||
case '*':
|
||||
if (cur == 0)
|
||||
goto lose;
|
||||
/* goto lose; */
|
||||
break;
|
||||
parse_next_arg();
|
||||
*cur = force_number(arg);
|
||||
free_temp(arg);
|
||||
goto retry;
|
||||
case ' ': /* print ' ' or '-' */
|
||||
/* 'space' flag is ignored */
|
||||
/* if '+' already present */
|
||||
if (signchar != 0)
|
||||
goto retry;
|
||||
/* FALL THROUGH */
|
||||
case '+': /* print '+' or '-' */
|
||||
signchar = *(s1-1);
|
||||
signchar = cs1;
|
||||
goto retry;
|
||||
case '-':
|
||||
if (lj || fill != sp)
|
||||
goto lose;
|
||||
lj++;
|
||||
if (cur == &prec) {
|
||||
prec = -1;
|
||||
goto retry;
|
||||
}
|
||||
fill = sp; /* if left justified then other */
|
||||
lj++; /* filling is ignored */
|
||||
goto retry;
|
||||
case '.':
|
||||
if (cur != &fw)
|
||||
goto lose;
|
||||
break;
|
||||
cur = ≺
|
||||
goto retry;
|
||||
case '#':
|
||||
if (alt)
|
||||
goto lose;
|
||||
if (cur != &fw)
|
||||
break;
|
||||
alt++;
|
||||
goto retry;
|
||||
case 'l':
|
||||
if (big)
|
||||
goto lose;
|
||||
break;
|
||||
big++;
|
||||
goto retry;
|
||||
case 'c':
|
||||
|
@ -381,44 +411,26 @@ retry:
|
|||
#endif
|
||||
cpbuf[0] = uval;
|
||||
prec = 1;
|
||||
pr_str = cpbuf;
|
||||
goto dopr_string;
|
||||
cp = cpbuf;
|
||||
goto pr_tail;
|
||||
}
|
||||
if (! prec)
|
||||
if (prec == 0)
|
||||
prec = 1;
|
||||
else if (prec > arg->stlen)
|
||||
prec = arg->stlen;
|
||||
pr_str = arg->stptr;
|
||||
goto dopr_string;
|
||||
cp = arg->stptr;
|
||||
goto pr_tail;
|
||||
case 's':
|
||||
parse_next_arg();
|
||||
arg = force_string(arg);
|
||||
if (!prec || prec > arg->stlen)
|
||||
if (prec == 0 || prec > arg->stlen)
|
||||
prec = arg->stlen;
|
||||
pr_str = arg->stptr;
|
||||
|
||||
dopr_string:
|
||||
if (fw > prec && !lj) {
|
||||
while (fw > prec) {
|
||||
bchunk(fill, 1);
|
||||
fw--;
|
||||
}
|
||||
}
|
||||
bchunk(pr_str, (int) prec);
|
||||
if (fw > prec) {
|
||||
while (fw > prec) {
|
||||
bchunk(fill, 1);
|
||||
fw--;
|
||||
}
|
||||
}
|
||||
s0 = s1;
|
||||
free_temp(arg);
|
||||
break;
|
||||
cp = arg->stptr;
|
||||
goto pr_tail;
|
||||
case 'd':
|
||||
case 'i':
|
||||
parse_next_arg();
|
||||
val = (long) force_number(arg);
|
||||
free_temp(arg);
|
||||
if (val < 0) {
|
||||
sgn = 1;
|
||||
val = -val;
|
||||
|
@ -432,30 +444,19 @@ retry:
|
|||
*--cp = '-';
|
||||
else if (signchar)
|
||||
*--cp = signchar;
|
||||
if (prec != 0) /* ignore '0' flag if */
|
||||
fill = sp; /* precision given */
|
||||
if (prec > fw)
|
||||
fw = prec;
|
||||
prec = cend - cp;
|
||||
if (fw > prec && !lj) {
|
||||
if (fill != sp && (*cp == '-' || signchar)) {
|
||||
bchunk(cp, 1);
|
||||
cp++;
|
||||
prec--;
|
||||
fw--;
|
||||
}
|
||||
while (fw > prec) {
|
||||
bchunk(fill, 1);
|
||||
fw--;
|
||||
}
|
||||
if (fw > prec && ! lj && fill != sp
|
||||
&& (*cp == '-' || signchar)) {
|
||||
bchunk_one(cp);
|
||||
cp++;
|
||||
prec--;
|
||||
fw--;
|
||||
}
|
||||
bchunk(cp, (int) prec);
|
||||
if (fw > prec) {
|
||||
while (fw > prec) {
|
||||
bchunk(fill, 1);
|
||||
fw--;
|
||||
}
|
||||
}
|
||||
s0 = s1;
|
||||
break;
|
||||
goto pr_tail;
|
||||
case 'u':
|
||||
base = 10;
|
||||
goto pr_unsigned;
|
||||
|
@ -463,140 +464,91 @@ retry:
|
|||
base = 8;
|
||||
goto pr_unsigned;
|
||||
case 'X':
|
||||
ucasehex = 1;
|
||||
case 'x':
|
||||
base = 16;
|
||||
goto pr_unsigned;
|
||||
pr_unsigned:
|
||||
if (cs1 == 'X')
|
||||
chbuf = Uchbuf;
|
||||
else
|
||||
chbuf = lchbuf;
|
||||
if (prec != 0) /* ignore '0' flag if */
|
||||
fill = sp; /* precision given */
|
||||
parse_next_arg();
|
||||
uval = (unsigned long) force_number(arg);
|
||||
free_temp(arg);
|
||||
do {
|
||||
*--cp = chbuf[uval % base];
|
||||
if (ucasehex && isalpha(*cp))
|
||||
*cp = toupper(*cp);
|
||||
uval /= base;
|
||||
} while (uval);
|
||||
if (alt && (base == 8 || base == 16)) {
|
||||
if (alt) {
|
||||
if (base == 16) {
|
||||
if (ucasehex)
|
||||
*--cp = 'X';
|
||||
else
|
||||
*--cp = 'x';
|
||||
}
|
||||
*--cp = '0';
|
||||
*--cp = cs1;
|
||||
*--cp = '0';
|
||||
if (fill != sp) {
|
||||
bchunk(cp, 2);
|
||||
cp += 2;
|
||||
fw -= 2;
|
||||
}
|
||||
} else if (base == 8)
|
||||
*--cp = '0';
|
||||
}
|
||||
prec = cend - cp;
|
||||
if (fw > prec && !lj) {
|
||||
pr_tail:
|
||||
if (! lj) {
|
||||
while (fw > prec) {
|
||||
bchunk(fill, 1);
|
||||
bchunk_one(fill);
|
||||
fw--;
|
||||
}
|
||||
}
|
||||
bchunk(cp, (int) prec);
|
||||
if (fw > prec) {
|
||||
while (fw > prec) {
|
||||
bchunk(fill, 1);
|
||||
fw--;
|
||||
}
|
||||
while (fw > prec) {
|
||||
bchunk_one(fill);
|
||||
fw--;
|
||||
}
|
||||
s0 = s1;
|
||||
break;
|
||||
case 'g':
|
||||
parse_next_arg();
|
||||
tmpval = force_number(arg);
|
||||
free_temp(arg);
|
||||
chksize(fw + prec + 9); /* 9==slop */
|
||||
|
||||
cp = cpbuf;
|
||||
*cp++ = '%';
|
||||
if (lj)
|
||||
*cp++ = '-';
|
||||
if (fill != sp)
|
||||
*cp++ = '0';
|
||||
#ifndef GFMT_WORKAROUND
|
||||
if (cur != &fw) {
|
||||
(void) strcpy(cp, "*.*g");
|
||||
(void) sprintf(obuf + olen, cpbuf, (int) fw, (int) prec, (double) tmpval);
|
||||
} else {
|
||||
(void) strcpy(cp, "*g");
|
||||
(void) sprintf(obuf + olen, cpbuf, (int) fw, (double) tmpval);
|
||||
}
|
||||
#else /* GFMT_WORKAROUND */
|
||||
{
|
||||
char *gptr, gbuf[120];
|
||||
#define DEFAULT_G_PRECISION 6
|
||||
if (fw + prec + 9 > sizeof gbuf) { /* 9==slop */
|
||||
emalloc(gptr, char *, fw+prec+9, "do_sprintf(gfmt)");
|
||||
} else
|
||||
gptr = gbuf;
|
||||
(void) gfmt((double) tmpval, cur != &fw ?
|
||||
(int) prec : DEFAULT_G_PRECISION, gptr);
|
||||
*cp++ = '*', *cp++ = 's', *cp = '\0';
|
||||
(void) sprintf(obuf + olen, cpbuf, (int) fw, gptr);
|
||||
if (fill != sp && *gptr == ' ') {
|
||||
char *p = gptr;
|
||||
do { *p++ = '0'; } while (*p == ' ');
|
||||
}
|
||||
if (gptr != gbuf) free(gptr);
|
||||
}
|
||||
#endif /* GFMT_WORKAROUND */
|
||||
len = strlen(obuf + olen);
|
||||
ofre -= len;
|
||||
olen += len;
|
||||
s0 = s1;
|
||||
break;
|
||||
|
||||
case 'f':
|
||||
parse_next_arg();
|
||||
tmpval = force_number(arg);
|
||||
free_temp(arg);
|
||||
chksize(fw + prec + 9); /* 9==slop */
|
||||
|
||||
cp = cpbuf;
|
||||
*cp++ = '%';
|
||||
if (lj)
|
||||
*cp++ = '-';
|
||||
if (fill != sp)
|
||||
*cp++ = '0';
|
||||
if (cur != &fw) {
|
||||
(void) strcpy(cp, "*.*f");
|
||||
(void) sprintf(obuf + olen, cpbuf, (int) fw, (int) prec, (double) tmpval);
|
||||
} else {
|
||||
(void) strcpy(cp, "*f");
|
||||
(void) sprintf(obuf + olen, cpbuf, (int) fw, (double) tmpval);
|
||||
}
|
||||
len = strlen(obuf + olen);
|
||||
ofre -= len;
|
||||
olen += len;
|
||||
s0 = s1;
|
||||
break;
|
||||
case 'e':
|
||||
case 'f':
|
||||
case 'g':
|
||||
case 'E':
|
||||
case 'G':
|
||||
parse_next_arg();
|
||||
tmpval = force_number(arg);
|
||||
free_temp(arg);
|
||||
chksize(fw + prec + 9); /* 9==slop */
|
||||
|
||||
cp = cpbuf;
|
||||
*cp++ = '%';
|
||||
if (lj)
|
||||
*cp++ = '-';
|
||||
if (signchar)
|
||||
*cp++ = signchar;
|
||||
if (alt)
|
||||
*cp++ = '#';
|
||||
if (fill != sp)
|
||||
*cp++ = '0';
|
||||
if (cur != &fw) {
|
||||
(void) strcpy(cp, "*.*e");
|
||||
(void) sprintf(obuf + olen, cpbuf, (int) fw, (int) prec, (double) tmpval);
|
||||
} else {
|
||||
(void) strcpy(cp, "*e");
|
||||
(void) sprintf(obuf + olen, cpbuf, (int) fw, (double) tmpval);
|
||||
}
|
||||
len = strlen(obuf + olen);
|
||||
cp = strcpy(cp, "*.*") + 3;
|
||||
*cp++ = cs1;
|
||||
*cp = '\0';
|
||||
if (prec <= 0)
|
||||
prec = DEFAULT_G_PRECISION;
|
||||
#ifndef GFMT_WORKAROUND
|
||||
(void) sprintf(obufout, cpbuf,
|
||||
(int) fw, (int) prec, (double) tmpval);
|
||||
#else /* GFMT_WORKAROUND */
|
||||
if (cs1 == 'g' || cs1 == 'G')
|
||||
(void) sgfmt(obufout, cpbuf, (int) alt,
|
||||
(int) fw, (int) prec, (double) tmpval);
|
||||
else
|
||||
(void) sprintf(obufout, cpbuf,
|
||||
(int) fw, (int) prec, (double) tmpval);
|
||||
#endif /* GFMT_WORKAROUND */
|
||||
len = strlen(obufout);
|
||||
ofre -= len;
|
||||
olen += len;
|
||||
obufout += len;
|
||||
s0 = s1;
|
||||
break;
|
||||
|
||||
default:
|
||||
lose:
|
||||
break;
|
||||
}
|
||||
if (toofew)
|
||||
|
@ -610,7 +562,7 @@ retry:
|
|||
warning("too many arguments supplied for format string");
|
||||
bchunk(s0, s1 - s0);
|
||||
free_temp(sfmt);
|
||||
r = make_str_node(obuf, olen, ALREADY_MALLOCED);
|
||||
r = make_str_node(obuf, obufout - obuf, ALREADY_MALLOCED);
|
||||
r->flags |= TEMP;
|
||||
return r;
|
||||
}
|
||||
|
@ -799,7 +751,8 @@ register NODE *tree;
|
|||
else {
|
||||
char buf[100];
|
||||
|
||||
sprintf(buf, OFMT, t1->numbr);
|
||||
NUMTOSTR(buf, OFMT, t1->numbr);
|
||||
free_temp(t1);
|
||||
t1 = tmp_string(buf, strlen(buf));
|
||||
}
|
||||
}
|
||||
|
@ -1128,41 +1081,75 @@ NODE *tree;
|
|||
}
|
||||
|
||||
#ifdef GFMT_WORKAROUND
|
||||
/*
|
||||
* printf's %g format [can't rely on gcvt()]
|
||||
* caveat: don't use as argument to *printf()!
|
||||
*/
|
||||
char *
|
||||
gfmt(g, prec, buf)
|
||||
double g; /* value to format */
|
||||
int prec; /* indicates desired significant digits, not decimal places */
|
||||
/*
|
||||
* printf's %g format [can't rely on gcvt()]
|
||||
* caveat: don't use as argument to *printf()!
|
||||
* 'format' string HAS to be of "<flags>*.*g" kind, or we bomb!
|
||||
*/
|
||||
void
|
||||
sgfmt(buf, format, alt, fwidth, prec, g)
|
||||
char *buf; /* return buffer; assumed big enough to hold result */
|
||||
const char *format;
|
||||
int alt; /* use alternate form flag */
|
||||
int fwidth; /* field width in a format */
|
||||
int prec; /* indicates desired significant digits, not decimal places */
|
||||
double g; /* value to format */
|
||||
{
|
||||
if (g == 0.0) {
|
||||
(void) strcpy(buf, "0"); /* easy special case */
|
||||
} else {
|
||||
register char *d, *e, *p;
|
||||
char dform[40];
|
||||
register char *gpos;
|
||||
register char *d, *e, *p;
|
||||
int again = 0;
|
||||
|
||||
/* start with 'e' format (it'll provide nice exponent) */
|
||||
if (prec < 1) prec = 1; /* at least 1 significant digit */
|
||||
(void) sprintf(buf, "%.*e", prec - 1, g);
|
||||
if ((e = strchr(buf, 'e')) != 0) { /* find exponent */
|
||||
int exp = atoi(e+1); /* fetch exponent */
|
||||
if (exp >= -4 && exp < prec) { /* per K&R2, B1.2 */
|
||||
/* switch to 'f' format and re-do */
|
||||
prec -= (exp + 1); /* decimal precision */
|
||||
(void) sprintf(buf, "%.*f", prec, g);
|
||||
e = buf + strlen(buf);
|
||||
}
|
||||
if ((d = strchr(buf, '.')) != 0) {
|
||||
/* remove trailing zeroes and decimal point */
|
||||
for (p = e; p > d && *--p == '0'; ) continue;
|
||||
if (*p == '.') --p;
|
||||
if (++p < e) /* copy exponent and NUL */
|
||||
while ((*p++ = *e++) != '\0') continue;
|
||||
}
|
||||
}
|
||||
strncpy(dform, format, sizeof dform - 1);
|
||||
dform[sizeof dform - 1] = '\0';
|
||||
gpos = strrchr(dform, '.');
|
||||
|
||||
if (g == 0.0 && alt == 0) { /* easy special case */
|
||||
*gpos++ = 'd';
|
||||
*gpos = '\0';
|
||||
(void) sprintf(buf, dform, fwidth, 0);
|
||||
return;
|
||||
}
|
||||
gpos += 2; /* advance to location of 'g' in the format */
|
||||
|
||||
if (prec <= 0) /* negative precision is ignored */
|
||||
prec = (prec < 0 ? DEFAULT_G_PRECISION : 1);
|
||||
|
||||
if (*gpos == 'G')
|
||||
again = 1;
|
||||
/* start with 'e' format (it'll provide nice exponent) */
|
||||
*gpos = 'e';
|
||||
prec -= 1;
|
||||
(void) sprintf(buf, dform, fwidth, prec, g);
|
||||
if ((e = strrchr(buf, 'e')) != NULL) { /* find exponent */
|
||||
int exp = atoi(e+1); /* fetch exponent */
|
||||
if (exp >= -4 && exp <= prec) { /* per K&R2, B1.2 */
|
||||
/* switch to 'f' format and re-do */
|
||||
*gpos = 'f';
|
||||
prec -= exp; /* decimal precision */
|
||||
(void) sprintf(buf, dform, fwidth, prec, g);
|
||||
e = buf + strlen(buf);
|
||||
while (*--e == ' ')
|
||||
continue;
|
||||
e += 1;
|
||||
}
|
||||
else if (again != 0)
|
||||
*gpos = 'E';
|
||||
|
||||
/* if 'alt' in force, then trailing zeros are not removed */
|
||||
if (alt == 0 && (d = strrchr(buf, '.')) != NULL) {
|
||||
/* throw away an excess of precision */
|
||||
for (p = e; p > d && *--p == '0'; )
|
||||
prec -= 1;
|
||||
if (d == p)
|
||||
prec -= 1;
|
||||
if (prec < 0)
|
||||
prec = 0;
|
||||
/* and do that once again */
|
||||
again = 1;
|
||||
}
|
||||
if (again != 0)
|
||||
(void) sprintf(buf, dform, fwidth, prec, g);
|
||||
}
|
||||
return buf;
|
||||
}
|
||||
#endif /* GFMT_WORKAROUND */
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -1,333 +1,133 @@
|
|||
/* dfa.h - declarations for GNU deterministic regexp compiler
|
||||
Copyright (C) 1988 Free Software Foundation, Inc.
|
||||
Written June, 1988 by Mike Haertel
|
||||
|
||||
NO WARRANTY
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2, or (at your option)
|
||||
any later version.
|
||||
|
||||
BECAUSE THIS PROGRAM IS LICENSED FREE OF CHARGE, WE PROVIDE ABSOLUTELY
|
||||
NO WARRANTY, TO THE EXTENT PERMITTED BY APPLICABLE STATE LAW. EXCEPT
|
||||
WHEN OTHERWISE STATED IN WRITING, FREE SOFTWARE FOUNDATION, INC,
|
||||
RICHARD M. STALLMAN AND/OR OTHER PARTIES PROVIDE THIS PROGRAM "AS IS"
|
||||
WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING,
|
||||
BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
|
||||
FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY
|
||||
AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE
|
||||
DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR
|
||||
CORRECTION.
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW WILL RICHARD M.
|
||||
STALLMAN, THE FREE SOFTWARE FOUNDATION, INC., AND/OR ANY OTHER PARTY
|
||||
WHO MAY MODIFY AND REDISTRIBUTE THIS PROGRAM AS PERMITTED BELOW, BE
|
||||
LIABLE TO YOU FOR DAMAGES, INCLUDING ANY LOST PROFITS, LOST MONIES, OR
|
||||
OTHER SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
|
||||
USE OR INABILITY TO USE (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR
|
||||
DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY THIRD PARTIES OR
|
||||
A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS) THIS
|
||||
PROGRAM, EVEN IF YOU HAVE BEEN ADVISED OF THE POSSIBILITY OF SUCH
|
||||
DAMAGES, OR FOR ANY CLAIM BY ANY OTHER PARTY.
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
|
||||
GENERAL PUBLIC LICENSE TO COPY
|
||||
|
||||
1. You may copy and distribute verbatim copies of this source file
|
||||
as you receive it, in any medium, provided that you conspicuously and
|
||||
appropriately publish on each copy a valid copyright notice "Copyright
|
||||
(C) 1988 Free Software Foundation, Inc."; and include following the
|
||||
copyright notice a verbatim copy of the above disclaimer of warranty
|
||||
and of this License. You may charge a distribution fee for the
|
||||
physical act of transferring a copy.
|
||||
|
||||
2. You may modify your copy or copies of this source file or
|
||||
any portion of it, and copy and distribute such modifications under
|
||||
the terms of Paragraph 1 above, provided that you also do the following:
|
||||
|
||||
a) cause the modified files to carry prominent notices stating
|
||||
that you changed the files and the date of any change; and
|
||||
|
||||
b) cause the whole of any work that you distribute or publish,
|
||||
that in whole or in part contains or is a derivative of this
|
||||
program or any part thereof, to be licensed at no charge to all
|
||||
third parties on terms identical to those contained in this
|
||||
License Agreement (except that you may choose to grant more extensive
|
||||
warranty protection to some or all third parties, at your option).
|
||||
|
||||
c) You may charge a distribution fee for the physical act of
|
||||
transferring a copy, and you may at your option offer warranty
|
||||
protection in exchange for a fee.
|
||||
|
||||
Mere aggregation of another unrelated program with this program (or its
|
||||
derivative) on a volume of a storage or distribution medium does not bring
|
||||
the other program under the scope of these terms.
|
||||
|
||||
3. You may copy and distribute this program or any portion of it in
|
||||
compiled, executable or object code form under the terms of Paragraphs
|
||||
1 and 2 above provided that you do the following:
|
||||
|
||||
a) accompany it with the complete corresponding machine-readable
|
||||
source code, which must be distributed under the terms of
|
||||
Paragraphs 1 and 2 above; or,
|
||||
|
||||
b) accompany it with a written offer, valid for at least three
|
||||
years, to give any third party free (except for a nominal
|
||||
shipping charge) a complete machine-readable copy of the
|
||||
corresponding source code, to be distributed under the terms of
|
||||
Paragraphs 1 and 2 above; or,
|
||||
|
||||
c) accompany it with the information you received as to where the
|
||||
corresponding source code may be obtained. (This alternative is
|
||||
allowed only for noncommercial distribution and only if you
|
||||
received the program in object code or executable form alone.)
|
||||
|
||||
For an executable file, complete source code means all the source code for
|
||||
all modules it contains; but, as a special exception, it need not include
|
||||
source code for modules which are standard libraries that accompany the
|
||||
operating system on which the executable file runs.
|
||||
|
||||
4. You may not copy, sublicense, distribute or transfer this program
|
||||
except as expressly provided under this License Agreement. Any attempt
|
||||
otherwise to copy, sublicense, distribute or transfer this program is void and
|
||||
your rights to use the program under this License agreement shall be
|
||||
automatically terminated. However, parties who have received computer
|
||||
software programs from you with this License Agreement will not have
|
||||
their licenses terminated so long as such parties remain in full compliance.
|
||||
|
||||
5. If you wish to incorporate parts of this program into other free
|
||||
programs whose distribution conditions are different, write to the Free
|
||||
Software Foundation at 675 Mass Ave, Cambridge, MA 02139. We have not yet
|
||||
worked out a simple rule that can be stated here, but we will often permit
|
||||
this. We will be guided by the two goals of preserving the free status of
|
||||
all derivatives our free software and of promoting the sharing and reuse of
|
||||
software.
|
||||
|
||||
|
||||
In other words, you are welcome to use, share and improve this program.
|
||||
You are forbidden to forbid anyone else to use, share and improve
|
||||
what you give them. Help stamp out software-hoarding!
|
||||
|
||||
$Id: dfa.h,v 1.3 1993/11/13 02:26:36 jtc Exp $
|
||||
$Id: dfa.h,v 1.4 1994/02/17 01:22:09 jtc Exp $
|
||||
*/
|
||||
|
||||
#ifdef __STDC__
|
||||
|
||||
#ifdef SOMEDAY
|
||||
#define ISALNUM(c) isalnum(c)
|
||||
#define ISALPHA(c) isalpha(c)
|
||||
#define ISUPPER(c) isupper(c)
|
||||
#else
|
||||
#define ISALNUM(c) (isascii(c) && isalnum(c))
|
||||
#define ISALPHA(c) (isascii(c) && isalpha(c))
|
||||
#define ISUPPER(c) (isascii(c) && isupper(c))
|
||||
#endif
|
||||
/* Written June, 1988 by Mike Haertel */
|
||||
|
||||
#else /* ! __STDC__ */
|
||||
|
||||
#define const
|
||||
|
||||
#define ISALNUM(c) (isascii(c) && isalnum(c))
|
||||
#define ISALPHA(c) (isascii(c) && isalpha(c))
|
||||
#define ISUPPER(c) (isascii(c) && isupper(c))
|
||||
|
||||
#endif /* ! __STDC__ */
|
||||
|
||||
/* 1 means plain parentheses serve as grouping, and backslash
|
||||
parentheses are needed for literal searching.
|
||||
0 means backslash-parentheses are grouping, and plain parentheses
|
||||
are for literal searching. */
|
||||
#ifndef RE_NO_BK_PARENS
|
||||
#define RE_NO_BK_PARENS 1L
|
||||
#endif
|
||||
|
||||
/* 1 means plain | serves as the "or"-operator, and \| is a literal.
|
||||
0 means \| serves as the "or"-operator, and | is a literal. */
|
||||
#ifndef RE_NO_BK_VBAR
|
||||
#define RE_NO_BK_VBAR (1L << 1)
|
||||
#endif
|
||||
|
||||
/* 0 means plain + or ? serves as an operator, and \+, \? are literals.
|
||||
1 means \+, \? are operators and plain +, ? are literals. */
|
||||
#ifndef RE_BK_PLUS_QM
|
||||
#define RE_BK_PLUS_QM (1L << 2)
|
||||
#endif
|
||||
|
||||
/* 1 means | binds tighter than ^ or $.
|
||||
0 means the contrary. */
|
||||
#ifndef RE_TIGHT_VBAR
|
||||
#define RE_TIGHT_VBAR (1L << 3)
|
||||
#endif
|
||||
|
||||
/* 1 means treat \n as an _OR operator
|
||||
0 means treat it as a normal character */
|
||||
#ifndef RE_NEWLINE_OR
|
||||
#define RE_NEWLINE_OR (1L << 4)
|
||||
#endif
|
||||
|
||||
/* 0 means that a special characters (such as *, ^, and $) always have
|
||||
their special meaning regardless of the surrounding context.
|
||||
1 means that special characters may act as normal characters in some
|
||||
contexts. Specifically, this applies to:
|
||||
^ - only special at the beginning, or after ( or |
|
||||
$ - only special at the end, or before ) or |
|
||||
*, +, ? - only special when not after the beginning, (, or | */
|
||||
#ifndef RE_CONTEXT_INDEP_OPS
|
||||
#define RE_CONTEXT_INDEP_OPS (1L << 5)
|
||||
#endif
|
||||
|
||||
/* 1 means that \ in a character class escapes the next character (typically
|
||||
a hyphen. It also is overloaded to mean that hyphen at the end of the range
|
||||
is allowable and means that the hyphen is to be taken literally. */
|
||||
#define RE_AWK_CLASS_HACK (1L << 6)
|
||||
|
||||
/* Now define combinations of bits for the standard possibilities. */
|
||||
#ifdef notdef
|
||||
#define RE_SYNTAX_AWK (RE_NO_BK_PARENS | RE_NO_BK_VBAR | RE_CONTEXT_INDEP_OPS)
|
||||
#define RE_SYNTAX_EGREP (RE_SYNTAX_AWK | RE_NEWLINE_OR)
|
||||
#define RE_SYNTAX_GREP (RE_BK_PLUS_QM | RE_NEWLINE_OR)
|
||||
#define RE_SYNTAX_EMACS 0
|
||||
#endif
|
||||
|
||||
/* The NULL pointer. */
|
||||
#ifndef NULL
|
||||
#define NULL 0
|
||||
#endif
|
||||
/* FIXME:
|
||||
2. We should not export so much of the DFA internals.
|
||||
In addition to clobbering modularity, we eat up valuable
|
||||
name space. */
|
||||
|
||||
/* Number of bits in an unsigned char. */
|
||||
#ifndef CHARBITS
|
||||
#define CHARBITS 8
|
||||
#endif
|
||||
|
||||
/* First integer value that is greater than any character code. */
|
||||
#define _NOTCHAR (1 << CHARBITS)
|
||||
#define NOTCHAR (1 << CHARBITS)
|
||||
|
||||
/* INTBITS need not be exact, just a lower bound. */
|
||||
#ifndef INTBITS
|
||||
#define INTBITS (CHARBITS * sizeof (int))
|
||||
#endif
|
||||
|
||||
/* Number of ints required to hold a bit for every character. */
|
||||
#define _CHARSET_INTS ((_NOTCHAR + INTBITS - 1) / INTBITS)
|
||||
#define CHARCLASS_INTS ((NOTCHAR + INTBITS - 1) / INTBITS)
|
||||
|
||||
/* Sets of unsigned characters are stored as bit vectors in arrays of ints. */
|
||||
typedef int _charset[_CHARSET_INTS];
|
||||
typedef int charclass[CHARCLASS_INTS];
|
||||
|
||||
/* The regexp is parsed into an array of tokens in postfix form. Some tokens
|
||||
are operators and others are terminal symbols. Most (but not all) of these
|
||||
codes are returned by the lexical analyzer. */
|
||||
#ifdef __STDC__
|
||||
|
||||
typedef enum
|
||||
{
|
||||
_END = -1, /* _END is a terminal symbol that matches the
|
||||
end of input; any value of _END or less in
|
||||
END = -1, /* END is a terminal symbol that matches the
|
||||
end of input; any value of END or less in
|
||||
the parse tree is such a symbol. Accepting
|
||||
states of the DFA are those that would have
|
||||
a transition on _END. */
|
||||
a transition on END. */
|
||||
|
||||
/* Ordinary character values are terminal symbols that match themselves. */
|
||||
|
||||
_EMPTY = _NOTCHAR, /* _EMPTY is a terminal symbol that matches
|
||||
EMPTY = NOTCHAR, /* EMPTY is a terminal symbol that matches
|
||||
the empty string. */
|
||||
|
||||
_BACKREF, /* _BACKREF is generated by \<digit>; it
|
||||
BACKREF, /* BACKREF is generated by \<digit>; it
|
||||
it not completely handled. If the scanner
|
||||
detects a transition on backref, it returns
|
||||
a kind of "semi-success" indicating that
|
||||
the match will have to be verified with
|
||||
a backtracking matcher. */
|
||||
|
||||
_BEGLINE, /* _BEGLINE is a terminal symbol that matches
|
||||
BEGLINE, /* BEGLINE is a terminal symbol that matches
|
||||
the empty string if it is at the beginning
|
||||
of a line. */
|
||||
|
||||
_ALLBEGLINE, /* _ALLBEGLINE is a terminal symbol that
|
||||
matches the empty string if it is at the
|
||||
beginning of a line; _ALLBEGLINE applies
|
||||
to the entire regexp and can only occur
|
||||
as the first token thereof. _ALLBEGLINE
|
||||
never appears in the parse tree; a _BEGLINE
|
||||
is prepended with _CAT to the entire
|
||||
regexp instead. */
|
||||
|
||||
_ENDLINE, /* _ENDLINE is a terminal symbol that matches
|
||||
ENDLINE, /* ENDLINE is a terminal symbol that matches
|
||||
the empty string if it is at the end of
|
||||
a line. */
|
||||
|
||||
_ALLENDLINE, /* _ALLENDLINE is to _ENDLINE as _ALLBEGLINE
|
||||
is to _BEGLINE. */
|
||||
|
||||
_BEGWORD, /* _BEGWORD is a terminal symbol that matches
|
||||
BEGWORD, /* BEGWORD is a terminal symbol that matches
|
||||
the empty string if it is at the beginning
|
||||
of a word. */
|
||||
|
||||
_ENDWORD, /* _ENDWORD is a terminal symbol that matches
|
||||
ENDWORD, /* ENDWORD is a terminal symbol that matches
|
||||
the empty string if it is at the end of
|
||||
a word. */
|
||||
|
||||
_LIMWORD, /* _LIMWORD is a terminal symbol that matches
|
||||
LIMWORD, /* LIMWORD is a terminal symbol that matches
|
||||
the empty string if it is at the beginning
|
||||
or the end of a word. */
|
||||
|
||||
_NOTLIMWORD, /* _NOTLIMWORD is a terminal symbol that
|
||||
NOTLIMWORD, /* NOTLIMWORD is a terminal symbol that
|
||||
matches the empty string if it is not at
|
||||
the beginning or end of a word. */
|
||||
|
||||
_QMARK, /* _QMARK is an operator of one argument that
|
||||
QMARK, /* QMARK is an operator of one argument that
|
||||
matches zero or one occurences of its
|
||||
argument. */
|
||||
|
||||
_STAR, /* _STAR is an operator of one argument that
|
||||
STAR, /* STAR is an operator of one argument that
|
||||
matches the Kleene closure (zero or more
|
||||
occurrences) of its argument. */
|
||||
|
||||
_PLUS, /* _PLUS is an operator of one argument that
|
||||
PLUS, /* PLUS is an operator of one argument that
|
||||
matches the positive closure (one or more
|
||||
occurrences) of its argument. */
|
||||
|
||||
_CAT, /* _CAT is an operator of two arguments that
|
||||
REPMN, /* REPMN is a lexical token corresponding
|
||||
to the {m,n} construct. REPMN never
|
||||
appears in the compiled token vector. */
|
||||
|
||||
CAT, /* CAT is an operator of two arguments that
|
||||
matches the concatenation of its
|
||||
arguments. _CAT is never returned by the
|
||||
arguments. CAT is never returned by the
|
||||
lexical analyzer. */
|
||||
|
||||
_OR, /* _OR is an operator of two arguments that
|
||||
OR, /* OR is an operator of two arguments that
|
||||
matches either of its arguments. */
|
||||
|
||||
_LPAREN, /* _LPAREN never appears in the parse tree,
|
||||
ORTOP, /* OR at the toplevel in the parse tree.
|
||||
This is used for a boyer-moore heuristic. */
|
||||
|
||||
LPAREN, /* LPAREN never appears in the parse tree,
|
||||
it is only a lexeme. */
|
||||
|
||||
_RPAREN, /* _RPAREN never appears in the parse tree. */
|
||||
RPAREN, /* RPAREN never appears in the parse tree. */
|
||||
|
||||
_SET /* _SET and (and any value greater) is a
|
||||
CSET /* CSET and (and any value greater) is a
|
||||
terminal symbol that matches any of a
|
||||
class of characters. */
|
||||
} _token;
|
||||
} token;
|
||||
|
||||
#else /* ! __STDC__ */
|
||||
|
||||
typedef short _token;
|
||||
|
||||
#define _END -1
|
||||
#define _EMPTY _NOTCHAR
|
||||
#define _BACKREF (_EMPTY + 1)
|
||||
#define _BEGLINE (_EMPTY + 2)
|
||||
#define _ALLBEGLINE (_EMPTY + 3)
|
||||
#define _ENDLINE (_EMPTY + 4)
|
||||
#define _ALLENDLINE (_EMPTY + 5)
|
||||
#define _BEGWORD (_EMPTY + 6)
|
||||
#define _ENDWORD (_EMPTY + 7)
|
||||
#define _LIMWORD (_EMPTY + 8)
|
||||
#define _NOTLIMWORD (_EMPTY + 9)
|
||||
#define _QMARK (_EMPTY + 10)
|
||||
#define _STAR (_EMPTY + 11)
|
||||
#define _PLUS (_EMPTY + 12)
|
||||
#define _CAT (_EMPTY + 13)
|
||||
#define _OR (_EMPTY + 14)
|
||||
#define _LPAREN (_EMPTY + 15)
|
||||
#define _RPAREN (_EMPTY + 16)
|
||||
#define _SET (_EMPTY + 17)
|
||||
|
||||
#endif /* ! __STDC__ */
|
||||
|
||||
/* Sets are stored in an array in the compiled regexp; the index of the
|
||||
array corresponding to a given set token is given by _SET_INDEX(t). */
|
||||
#define _SET_INDEX(t) ((t) - _SET)
|
||||
/* Sets are stored in an array in the compiled dfa; the index of the
|
||||
array corresponding to a given set token is given by SET_INDEX(t). */
|
||||
#define SET_INDEX(t) ((t) - CSET)
|
||||
|
||||
/* Sometimes characters can only be matched depending on the surrounding
|
||||
context. Such context decisions depend on what the previous character
|
||||
|
@ -347,36 +147,36 @@ typedef short _token;
|
|||
|
||||
Word-constituent characters are those that satisfy isalnum().
|
||||
|
||||
The macro _SUCCEEDS_IN_CONTEXT determines whether a a given constraint
|
||||
The macro SUCCEEDS_IN_CONTEXT determines whether a a given constraint
|
||||
succeeds in a particular context. Prevn is true if the previous character
|
||||
was a newline, currn is true if the lookahead character is a newline.
|
||||
Prevl and currl similarly depend upon whether the previous and current
|
||||
characters are word-constituent letters. */
|
||||
#define _MATCHES_NEWLINE_CONTEXT(constraint, prevn, currn) \
|
||||
((constraint) & (1 << (((prevn) ? 2 : 0) + ((currn) ? 1 : 0) + 4)))
|
||||
#define _MATCHES_LETTER_CONTEXT(constraint, prevl, currl) \
|
||||
((constraint) & (1 << (((prevl) ? 2 : 0) + ((currl) ? 1 : 0))))
|
||||
#define _SUCCEEDS_IN_CONTEXT(constraint, prevn, currn, prevl, currl) \
|
||||
(_MATCHES_NEWLINE_CONTEXT(constraint, prevn, currn) \
|
||||
&& _MATCHES_LETTER_CONTEXT(constraint, prevl, currl))
|
||||
#define MATCHES_NEWLINE_CONTEXT(constraint, prevn, currn) \
|
||||
((constraint) & 1 << (((prevn) ? 2 : 0) + ((currn) ? 1 : 0) + 4))
|
||||
#define MATCHES_LETTER_CONTEXT(constraint, prevl, currl) \
|
||||
((constraint) & 1 << (((prevl) ? 2 : 0) + ((currl) ? 1 : 0)))
|
||||
#define SUCCEEDS_IN_CONTEXT(constraint, prevn, currn, prevl, currl) \
|
||||
(MATCHES_NEWLINE_CONTEXT(constraint, prevn, currn) \
|
||||
&& MATCHES_LETTER_CONTEXT(constraint, prevl, currl))
|
||||
|
||||
/* The following macros give information about what a constraint depends on. */
|
||||
#define _PREV_NEWLINE_DEPENDENT(constraint) \
|
||||
#define PREV_NEWLINE_DEPENDENT(constraint) \
|
||||
(((constraint) & 0xc0) >> 2 != ((constraint) & 0x30))
|
||||
#define _PREV_LETTER_DEPENDENT(constraint) \
|
||||
#define PREV_LETTER_DEPENDENT(constraint) \
|
||||
(((constraint) & 0x0c) >> 2 != ((constraint) & 0x03))
|
||||
|
||||
/* Tokens that match the empty string subject to some constraint actually
|
||||
work by applying that constraint to determine what may follow them,
|
||||
taking into account what has gone before. The following values are
|
||||
the constraints corresponding to the special tokens previously defined. */
|
||||
#define _NO_CONSTRAINT 0xff
|
||||
#define _BEGLINE_CONSTRAINT 0xcf
|
||||
#define _ENDLINE_CONSTRAINT 0xaf
|
||||
#define _BEGWORD_CONSTRAINT 0xf2
|
||||
#define _ENDWORD_CONSTRAINT 0xf4
|
||||
#define _LIMWORD_CONSTRAINT 0xf6
|
||||
#define _NOTLIMWORD_CONSTRAINT 0xf9
|
||||
#define NO_CONSTRAINT 0xff
|
||||
#define BEGLINE_CONSTRAINT 0xcf
|
||||
#define ENDLINE_CONSTRAINT 0xaf
|
||||
#define BEGWORD_CONSTRAINT 0xf2
|
||||
#define ENDWORD_CONSTRAINT 0xf4
|
||||
#define LIMWORD_CONSTRAINT 0xf6
|
||||
#define NOTLIMWORD_CONSTRAINT 0xf9
|
||||
|
||||
/* States of the recognizer correspond to sets of positions in the parse
|
||||
tree, together with the constraints under which they may be matched.
|
||||
|
@ -386,44 +186,48 @@ typedef struct
|
|||
{
|
||||
unsigned index; /* Index into the parse array. */
|
||||
unsigned constraint; /* Constraint for matching this position. */
|
||||
} _position;
|
||||
} position;
|
||||
|
||||
/* Sets of positions are stored as arrays. */
|
||||
typedef struct
|
||||
{
|
||||
_position *elems; /* Elements of this position set. */
|
||||
position *elems; /* Elements of this position set. */
|
||||
int nelem; /* Number of elements in this set. */
|
||||
} _position_set;
|
||||
} position_set;
|
||||
|
||||
/* A state of the regexp consists of a set of positions, some flags,
|
||||
/* A state of the dfa consists of a set of positions, some flags,
|
||||
and the token value of the lowest-numbered position of the state that
|
||||
contains an _END token. */
|
||||
contains an END token. */
|
||||
typedef struct
|
||||
{
|
||||
int hash; /* Hash of the positions of this state. */
|
||||
_position_set elems; /* Positions this state could match. */
|
||||
position_set elems; /* Positions this state could match. */
|
||||
char newline; /* True if previous state matched newline. */
|
||||
char letter; /* True if previous state matched a letter. */
|
||||
char backref; /* True if this state matches a \<digit>. */
|
||||
unsigned char constraint; /* Constraint for this state to accept. */
|
||||
int first_end; /* Token value of the first _END in elems. */
|
||||
} _dfa_state;
|
||||
int first_end; /* Token value of the first END in elems. */
|
||||
} dfa_state;
|
||||
|
||||
/* If an r.e. is at most MUST_MAX characters long, we look for a string which
|
||||
must appear in it; whatever's found is dropped into the struct reg. */
|
||||
|
||||
#define MUST_MAX 50
|
||||
/* Element of a list of strings, at least one of which is known to
|
||||
appear in any R.E. matching the DFA. */
|
||||
struct dfamust
|
||||
{
|
||||
int exact;
|
||||
char *must;
|
||||
struct dfamust *next;
|
||||
};
|
||||
|
||||
/* A compiled regular expression. */
|
||||
struct regexp
|
||||
struct dfa
|
||||
{
|
||||
/* Stuff built by the scanner. */
|
||||
_charset *charsets; /* Array of character sets for _SET tokens. */
|
||||
int cindex; /* Index for adding new charsets. */
|
||||
int calloc; /* Number of charsets currently allocated. */
|
||||
charclass *charclasses; /* Array of character sets for CSET tokens. */
|
||||
int cindex; /* Index for adding new charclasses. */
|
||||
int calloc; /* Number of charclasses currently allocated. */
|
||||
|
||||
/* Stuff built by the parser. */
|
||||
_token *tokens; /* Postfix parse array. */
|
||||
token *tokens; /* Postfix parse array. */
|
||||
int tindex; /* Index for adding new tokens. */
|
||||
int talloc; /* Number of tokens currently allocated. */
|
||||
int depth; /* Depth required of an evaluation stack
|
||||
|
@ -431,15 +235,15 @@ struct regexp
|
|||
parse tree. */
|
||||
int nleaves; /* Number of leaves on the parse tree. */
|
||||
int nregexps; /* Count of parallel regexps being built
|
||||
with regparse(). */
|
||||
with dfaparse(). */
|
||||
|
||||
/* Stuff owned by the state builder. */
|
||||
_dfa_state *states; /* States of the regexp. */
|
||||
dfa_state *states; /* States of the dfa. */
|
||||
int sindex; /* Index for adding new states. */
|
||||
int salloc; /* Number of states currently allocated. */
|
||||
|
||||
/* Stuff built by the structure analyzer. */
|
||||
_position_set *follows; /* Array of follow sets, indexed by position
|
||||
position_set *follows; /* Array of follow sets, indexed by position
|
||||
index. The follow of a position is the set
|
||||
of positions containing characters that
|
||||
could conceivably follow a character
|
||||
|
@ -469,7 +273,7 @@ struct regexp
|
|||
int **fails; /* Transition tables after failing to accept
|
||||
on a state that potentially could do so. */
|
||||
int *success; /* Table of acceptance conditions used in
|
||||
regexecute and computed in build_state. */
|
||||
dfaexec and computed in build_state. */
|
||||
int *newlines; /* Transitions on newlines. The entry for a
|
||||
newline in any transition table is always
|
||||
-1 so we can count lines without wasting
|
||||
|
@ -477,40 +281,41 @@ struct regexp
|
|||
newline is stored separately and handled
|
||||
as a special case. Newline is also used
|
||||
as a sentinel at the end of the buffer. */
|
||||
char must[MUST_MAX];
|
||||
int mustn;
|
||||
struct dfamust *musts; /* List of strings, at least one of which
|
||||
is known to appear in any r.e. matching
|
||||
the dfa. */
|
||||
};
|
||||
|
||||
/* Some macros for user access to regexp internals. */
|
||||
/* Some macros for user access to dfa internals. */
|
||||
|
||||
/* ACCEPTING returns true if s could possibly be an accepting state of r. */
|
||||
#define ACCEPTING(s, r) ((r).states[s].constraint)
|
||||
|
||||
/* ACCEPTS_IN_CONTEXT returns true if the given state accepts in the
|
||||
specified context. */
|
||||
#define ACCEPTS_IN_CONTEXT(prevn, currn, prevl, currl, state, reg) \
|
||||
_SUCCEEDS_IN_CONTEXT((reg).states[state].constraint, \
|
||||
#define ACCEPTS_IN_CONTEXT(prevn, currn, prevl, currl, state, dfa) \
|
||||
SUCCEEDS_IN_CONTEXT((dfa).states[state].constraint, \
|
||||
prevn, currn, prevl, currl)
|
||||
|
||||
/* FIRST_MATCHING_REGEXP returns the index number of the first of parallel
|
||||
regexps that a given state could accept. Parallel regexps are numbered
|
||||
starting at 1. */
|
||||
#define FIRST_MATCHING_REGEXP(state, reg) (-(reg).states[state].first_end)
|
||||
#define FIRST_MATCHING_REGEXP(state, dfa) (-(dfa).states[state].first_end)
|
||||
|
||||
/* Entry points. */
|
||||
|
||||
#ifdef __STDC__
|
||||
|
||||
/* Regsyntax() takes two arguments; the first sets the syntax bits described
|
||||
/* dfasyntax() takes two arguments; the first sets the syntax bits described
|
||||
earlier in this file, and the second sets the case-folding flag. */
|
||||
extern void regsyntax(long, int);
|
||||
extern void dfasyntax(reg_syntax_t, int);
|
||||
|
||||
/* Compile the given string of the given length into the given struct regexp.
|
||||
/* Compile the given string of the given length into the given struct dfa.
|
||||
Final argument is a flag specifying whether to build a searching or an
|
||||
exact matcher. */
|
||||
extern void regcompile(const char *, size_t, struct regexp *, int);
|
||||
extern void dfacomp(char *, size_t, struct dfa *, int);
|
||||
|
||||
/* Execute the given struct regexp on the buffer of characters. The
|
||||
/* Execute the given struct dfa on the buffer of characters. The
|
||||
first char * points to the beginning, and the second points to the
|
||||
first character after the end of the buffer, which must be a writable
|
||||
place so a sentinel end-of-buffer marker can be stored there. The
|
||||
|
@ -522,37 +327,37 @@ extern void regcompile(const char *, size_t, struct regexp *, int);
|
|||
order to verify backreferencing; otherwise the flag will be cleared.
|
||||
Returns NULL if no match is found, or a pointer to the first
|
||||
character after the first & shortest matching string in the buffer. */
|
||||
extern char *regexecute(struct regexp *, char *, char *, int, int *, int *);
|
||||
extern char *dfaexec(struct dfa *, char *, char *, int, int *, int *);
|
||||
|
||||
/* Free the storage held by the components of a struct regexp. */
|
||||
extern void reg_free(struct regexp *);
|
||||
/* Free the storage held by the components of a struct dfa. */
|
||||
extern void dfafree(struct dfa *);
|
||||
|
||||
/* Entry points for people who know what they're doing. */
|
||||
|
||||
/* Initialize the components of a struct regexp. */
|
||||
extern void reginit(struct regexp *);
|
||||
/* Initialize the components of a struct dfa. */
|
||||
extern void dfainit(struct dfa *);
|
||||
|
||||
/* Incrementally parse a string of given length into a struct regexp. */
|
||||
extern void regparse(const char *, size_t, struct regexp *);
|
||||
/* Incrementally parse a string of given length into a struct dfa. */
|
||||
extern void dfaparse(char *, size_t, struct dfa *);
|
||||
|
||||
/* Analyze a parsed regexp; second argument tells whether to build a searching
|
||||
or an exact matcher. */
|
||||
extern void reganalyze(struct regexp *, int);
|
||||
extern void dfaanalyze(struct dfa *, int);
|
||||
|
||||
/* Compute, for each possible character, the transitions out of a given
|
||||
state, storing them in an array of integers. */
|
||||
extern void regstate(int, struct regexp *, int []);
|
||||
extern void dfastate(int, struct dfa *, int []);
|
||||
|
||||
/* Error handling. */
|
||||
|
||||
/* Regerror() is called by the regexp routines whenever an error occurs. It
|
||||
/* dfaerror() is called by the regexp routines whenever an error occurs. It
|
||||
takes a single argument, a NUL-terminated string describing the error.
|
||||
The default reg_error() prints the error message to stderr and exits.
|
||||
The user can provide a different reg_free() if so desired. */
|
||||
extern void reg_error(const char *);
|
||||
The default dfaerror() prints the error message to stderr and exits.
|
||||
The user can provide a different dfafree() if so desired. */
|
||||
extern void dfaerror(const char *);
|
||||
|
||||
#else /* ! __STDC__ */
|
||||
extern void regsyntax(), regcompile(), reg_free(), reginit(), regparse();
|
||||
extern void reganalyze(), regstate(), reg_error();
|
||||
extern char *regexecute();
|
||||
#endif
|
||||
extern void dfasyntax(), dfacomp(), dfafree(), dfainit(), dfaparse();
|
||||
extern void dfaanalyze(), dfastate(), dfaerror();
|
||||
extern char *dfaexec();
|
||||
#endif /* ! __STDC__ */
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
*/
|
||||
|
||||
/*
|
||||
* Copyright (C) 1986, 1988, 1989, 1991, 1992 the Free Software Foundation, Inc.
|
||||
* Copyright (C) 1986, 1988, 1989, 1991, 1992, 1993 the Free Software Foundation, Inc.
|
||||
*
|
||||
* This file is part of GAWK, the GNU implementation of the
|
||||
* AWK Progamming Language.
|
||||
|
@ -24,8 +24,8 @@
|
|||
*/
|
||||
|
||||
#ifndef lint
|
||||
static char rcsid[] = "$Id: eval.c,v 1.3 1993/11/13 02:26:39 jtc Exp $";
|
||||
#endif /* not lint */
|
||||
static char rcsid[] = "$Id: eval.c,v 1.4 1994/02/17 01:22:11 jtc Exp $";
|
||||
#endif
|
||||
|
||||
#include "awk.h"
|
||||
|
||||
|
@ -322,7 +322,10 @@ register NODE *volatile tree;
|
|||
break;
|
||||
|
||||
case Node_K_delete:
|
||||
do_delete(tree->lnode, tree->rnode);
|
||||
if (tree->rnode != NULL)
|
||||
do_delete(tree->lnode, tree->rnode);
|
||||
else
|
||||
assoc_clear(tree->lnode);
|
||||
break;
|
||||
|
||||
case Node_K_next:
|
||||
|
@ -971,18 +974,20 @@ NODE *arg_list; /* Node_expression_list of calling args. */
|
|||
/* should we free arg->var_value ? */
|
||||
arg->var_array = n->var_array;
|
||||
arg->type = Node_var_array;
|
||||
arg->array_size = n->array_size;
|
||||
arg->table_size = n->table_size;
|
||||
}
|
||||
unref(n->lnode);
|
||||
/* n->lnode overlays the array size, don't unref it if array */
|
||||
if (n->type != Node_var_array)
|
||||
unref(n->lnode);
|
||||
freenode(n);
|
||||
count--;
|
||||
}
|
||||
while (count-- > 0) {
|
||||
n = *sp++;
|
||||
/* if n is an (local) array, all the elements should be freed */
|
||||
if (n->type == Node_var_array) {
|
||||
if (n->type == Node_var_array)
|
||||
assoc_clear(n);
|
||||
free(n->var_array);
|
||||
}
|
||||
unref(n->lnode);
|
||||
freenode(n);
|
||||
}
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
*/
|
||||
|
||||
/*
|
||||
* Copyright (C) 1986, 1988, 1989, 1991, 1992 the Free Software Foundation, Inc.
|
||||
* Copyright (C) 1986, 1988, 1989, 1991, 1992, 1993 the Free Software Foundation, Inc.
|
||||
*
|
||||
* This file is part of GAWK, the GNU implementation of the
|
||||
* AWK Progamming Language.
|
||||
|
@ -24,22 +24,24 @@
|
|||
*/
|
||||
|
||||
#ifndef lint
|
||||
static char rcsid[] = "$Id: field.c,v 1.3 1993/11/13 02:26:43 jtc Exp $";
|
||||
#endif /* not lint */
|
||||
static char rcsid[] = "$Id: field.c,v 1.4 1994/02/17 01:22:13 jtc Exp $";
|
||||
#endif
|
||||
|
||||
#include "awk.h"
|
||||
|
||||
typedef void (* Setfunc) P((int, char*, int, NODE *));
|
||||
|
||||
static int (*parse_field) P((int, char **, int, NODE *,
|
||||
Regexp *, void (*)(), NODE *));
|
||||
Regexp *, Setfunc, NODE *));
|
||||
static void rebuild_record P((void));
|
||||
static int re_parse_field P((int, char **, int, NODE *,
|
||||
Regexp *, void (*)(), NODE *));
|
||||
Regexp *, Setfunc, NODE *));
|
||||
static int def_parse_field P((int, char **, int, NODE *,
|
||||
Regexp *, void (*)(), NODE *));
|
||||
Regexp *, Setfunc, NODE *));
|
||||
static int sc_parse_field P((int, char **, int, NODE *,
|
||||
Regexp *, void (*)(), NODE *));
|
||||
Regexp *, Setfunc, NODE *));
|
||||
static int fw_parse_field P((int, char **, int, NODE *,
|
||||
Regexp *, void (*)(), NODE *));
|
||||
Regexp *, Setfunc, NODE *));
|
||||
static void set_element P((int, char *, int, NODE *));
|
||||
static void grow_fields_arr P((int num));
|
||||
static void set_field P((int num, char *str, int len, NODE *dummy));
|
||||
|
@ -230,7 +232,7 @@ char **buf; /* on input: string to parse; on output: point to start next */
|
|||
int len;
|
||||
NODE *fs;
|
||||
Regexp *rp;
|
||||
void (*set) (); /* routine to set the value of the parsed field */
|
||||
Setfunc set; /* routine to set the value of the parsed field */
|
||||
NODE *n;
|
||||
{
|
||||
register char *scan = *buf;
|
||||
|
@ -248,9 +250,9 @@ NODE *n;
|
|||
scan++;
|
||||
field = scan;
|
||||
while (scan < end
|
||||
&& research(rp, scan, 0, (int)(end - scan), 1) != -1
|
||||
&& research(rp, scan, 0, (end - scan), 1) != -1
|
||||
&& nf < up_to) {
|
||||
if (REEND(rp, scan) == RESTART(rp, scan)) { /* null match */
|
||||
if (REEND(rp, scan) == RESTART(rp, scan)) { /* null match */
|
||||
scan++;
|
||||
if (scan == end) {
|
||||
(*set)(++nf, field, (int)(scan - field), n);
|
||||
|
@ -286,7 +288,7 @@ char **buf; /* on input: string to parse; on output: point to start next */
|
|||
int len;
|
||||
NODE *fs;
|
||||
Regexp *rp;
|
||||
void (*set) (); /* routine to set the value of the parsed field */
|
||||
Setfunc set; /* routine to set the value of the parsed field */
|
||||
NODE *n;
|
||||
{
|
||||
register char *scan = *buf;
|
||||
|
@ -340,7 +342,7 @@ char **buf; /* on input: string to parse; on output: point to start next */
|
|||
int len;
|
||||
NODE *fs;
|
||||
Regexp *rp;
|
||||
void (*set) (); /* routine to set the value of the parsed field */
|
||||
Setfunc set; /* routine to set the value of the parsed field */
|
||||
NODE *n;
|
||||
{
|
||||
register char *scan = *buf;
|
||||
|
@ -393,7 +395,7 @@ char **buf; /* on input: string to parse; on output: point to start next */
|
|||
int len;
|
||||
NODE *fs;
|
||||
Regexp *rp;
|
||||
void (*set) (); /* routine to set the value of the parsed field */
|
||||
Setfunc set; /* routine to set the value of the parsed field */
|
||||
NODE *n;
|
||||
{
|
||||
register char *scan = *buf;
|
||||
|
@ -518,7 +520,7 @@ NODE *tree;
|
|||
NODE *fs;
|
||||
char *s;
|
||||
int (*parseit)P((int, char **, int, NODE *,
|
||||
Regexp *, void (*)(), NODE *));
|
||||
Regexp *, Setfunc, NODE *));
|
||||
Regexp *rp = NULL;
|
||||
|
||||
t1 = tree_eval(tree->lnode);
|
||||
|
|
|
@ -21,8 +21,8 @@
|
|||
Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
|
||||
|
||||
#ifndef lint
|
||||
static char rcsid[] = "$Id: getopt.c,v 1.3 1993/11/13 02:26:46 jtc Exp $";
|
||||
#endif /* not lint */
|
||||
static char rcsid[] = "$Id: getopt.c,v 1.4 1994/02/17 01:22:16 jtc Exp $";
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#if defined (emacs) || defined (CONFIG_BROKETS)
|
||||
|
|
|
@ -15,7 +15,7 @@
|
|||
along with this program; if not, write to the Free Software
|
||||
Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
|
||||
$Id: getopt.h,v 1.3 1993/11/13 02:26:50 jtc Exp $
|
||||
$Id: getopt.h,v 1.4 1994/02/17 01:22:18 jtc Exp $
|
||||
*/
|
||||
|
||||
#ifndef _GETOPT_H
|
||||
|
@ -79,7 +79,7 @@ extern int optopt;
|
|||
|
||||
struct option
|
||||
{
|
||||
#if __STDC__
|
||||
#ifdef __STDC__
|
||||
const char *name;
|
||||
#else
|
||||
char *name;
|
||||
|
@ -97,7 +97,7 @@ struct option
|
|||
#define required_argument 1
|
||||
#define optional_argument 2
|
||||
|
||||
#if __STDC__
|
||||
#ifdef __STDC__
|
||||
#if defined(__GNU_LIBRARY__)
|
||||
/* Many other libraries have conflicting prototypes for getopt, with
|
||||
differences in the consts, in stdlib.h. To avoid compilation
|
||||
|
|
|
@ -17,8 +17,8 @@
|
|||
Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
|
||||
|
||||
#ifndef lint
|
||||
static char rcsid[] = "$Id: getopt1.c,v 1.3 1993/11/13 02:26:52 jtc Exp $";
|
||||
#endif /* not lint */
|
||||
static char rcsid[] = "$Id: getopt1.c,v 1.4 1994/02/17 01:22:19 jtc Exp $";
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#if defined (emacs) || defined (CONFIG_BROKETS)
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
*/
|
||||
|
||||
/*
|
||||
* Copyright (C) 1986, 1988, 1989, 1991, 1992 the Free Software Foundation, Inc.
|
||||
* Copyright (C) 1986, 1988, 1989, 1991, 1992, 1993 the Free Software Foundation, Inc.
|
||||
*
|
||||
* This file is part of GAWK, the GNU implementation of the
|
||||
* AWK Progamming Language.
|
||||
|
@ -24,8 +24,8 @@
|
|||
*/
|
||||
|
||||
#ifndef lint
|
||||
static char rcsid[] = "$Id: io.c,v 1.4 1993/11/13 02:26:54 jtc Exp $";
|
||||
#endif /* not lint */
|
||||
static char rcsid[] = "$Id: io.c,v 1.5 1994/02/17 01:22:21 jtc Exp $";
|
||||
#endif
|
||||
|
||||
#if !defined(VMS) && !defined(VMS_POSIX) && !defined(_MSC_VER)
|
||||
#include <sys/param.h>
|
||||
|
@ -60,14 +60,14 @@ static int close_redir P((struct redirect *rp));
|
|||
static int wait_any P((int interesting));
|
||||
#endif
|
||||
static IOBUF *gawk_popen P((char *cmd, struct redirect *rp));
|
||||
static IOBUF *iop_open P((char *file, char *how));
|
||||
static IOBUF *iop_open P((const char *file, const char *how));
|
||||
static int gawk_pclose P((struct redirect *rp));
|
||||
static int do_pathopen P((char *file));
|
||||
static int str2mode P((char *mode));
|
||||
static int do_pathopen P((const char *file));
|
||||
static int str2mode P((const char *mode));
|
||||
static void spec_setup P((IOBUF *iop, int len, int allocate));
|
||||
static int specfdopen P((IOBUF *iop, char *name, char *mode));
|
||||
static int pidopen P((IOBUF *iop, char *name, char *mode));
|
||||
static int useropen P((IOBUF *iop, char *name, char *mode));
|
||||
static int specfdopen P((IOBUF *iop, const char *name, const char *mode));
|
||||
static int pidopen P((IOBUF *iop, const char *name, const char *mode));
|
||||
static int useropen P((IOBUF *iop, const char *name, const char *mode));
|
||||
|
||||
extern FILE *fdopen();
|
||||
|
||||
|
@ -266,6 +266,9 @@ do_input()
|
|||
if (inrec(iop) == 0)
|
||||
while (interpret(expression_value) && inrec(iop) == 0)
|
||||
;
|
||||
/* recover any space from C based alloca */
|
||||
(void) alloca(0);
|
||||
|
||||
if (exiting)
|
||||
break;
|
||||
}
|
||||
|
@ -282,10 +285,10 @@ int *errflg;
|
|||
register char *str;
|
||||
int tflag = 0;
|
||||
int outflag = 0;
|
||||
char *direction = "to";
|
||||
char *mode;
|
||||
const char *direction = "to";
|
||||
const char *mode;
|
||||
int fd;
|
||||
char *what = NULL;
|
||||
const char *what = NULL;
|
||||
|
||||
switch (tree->type) {
|
||||
case Node_redirect_append:
|
||||
|
@ -398,9 +401,13 @@ int *errflg;
|
|||
rp->fp = stdout;
|
||||
else if (fd == fileno(stderr))
|
||||
rp->fp = stderr;
|
||||
else
|
||||
rp->fp = fdopen(fd, mode);
|
||||
if (isatty(fd))
|
||||
else {
|
||||
rp->fp = fdopen(fd, (char *) mode);
|
||||
/* don't leak file descriptors */
|
||||
if (rp->fp == NULL)
|
||||
close(fd);
|
||||
}
|
||||
if (rp->fp != NULL && isatty(fd))
|
||||
rp->flag |= RED_NOBUF;
|
||||
}
|
||||
}
|
||||
|
@ -593,7 +600,7 @@ close_io ()
|
|||
|
||||
static int
|
||||
str2mode(mode)
|
||||
char *mode;
|
||||
const char *mode;
|
||||
{
|
||||
int ret;
|
||||
|
||||
|
@ -609,7 +616,9 @@ char *mode;
|
|||
case 'a':
|
||||
ret = O_WRONLY|O_APPEND|O_CREAT;
|
||||
break;
|
||||
|
||||
default:
|
||||
ret = 0; /* lint */
|
||||
cant_happen();
|
||||
}
|
||||
return ret;
|
||||
|
@ -626,10 +635,10 @@ char *mode;
|
|||
|
||||
int
|
||||
devopen(name, mode)
|
||||
char *name, *mode;
|
||||
const char *name, *mode;
|
||||
{
|
||||
int openfd = INVALID_HANDLE;
|
||||
char *cp, *ptr;
|
||||
const char *cp, *ptr;
|
||||
int flag = 0;
|
||||
struct stat buf;
|
||||
extern double strtod();
|
||||
|
@ -646,7 +655,7 @@ char *name, *mode;
|
|||
|
||||
if (STREQ(name, "-"))
|
||||
openfd = fileno(stdin);
|
||||
else if (STREQN(name, "/dev/", 5) && stat(name, &buf) == -1) {
|
||||
else if (STREQN(name, "/dev/", 5) && stat((char *) name, &buf) == -1) {
|
||||
cp = name + 5;
|
||||
|
||||
if (STREQ(cp, "stdin") && (flag & O_RDONLY) == O_RDONLY)
|
||||
|
@ -705,7 +714,7 @@ int allocate;
|
|||
static int
|
||||
specfdopen(iop, name, mode)
|
||||
IOBUF *iop;
|
||||
char *name, *mode;
|
||||
const char *name, *mode;
|
||||
{
|
||||
int fd;
|
||||
IOBUF *tp;
|
||||
|
@ -728,7 +737,7 @@ char *name, *mode;
|
|||
* to maximize portability.
|
||||
*/
|
||||
#ifndef GETPGRP_NOARG
|
||||
#if defined(__svr4__) || defined(BSD4_4) || defined(_POSIX_SOURCE) || defined(_POSIX_JOB_CONTROL)
|
||||
#if defined(__svr4__) || defined(BSD4_4) || defined(_POSIX_SOURCE)
|
||||
#define GETPGRP_NOARG
|
||||
#else
|
||||
#if defined(i860) || defined(_AIX) || defined(hpux) || defined(VMS)
|
||||
|
@ -752,7 +761,7 @@ char *name, *mode;
|
|||
static int
|
||||
pidopen(iop, name, mode)
|
||||
IOBUF *iop;
|
||||
char *name, *mode;
|
||||
const char *name, *mode;
|
||||
{
|
||||
char tbuf[BUFSIZ];
|
||||
int i;
|
||||
|
@ -784,12 +793,12 @@ char *name, *mode;
|
|||
static int
|
||||
useropen(iop, name, mode)
|
||||
IOBUF *iop;
|
||||
char *name, *mode;
|
||||
const char *name, *mode;
|
||||
{
|
||||
char tbuf[BUFSIZ], *cp;
|
||||
int i;
|
||||
#if defined(NGROUPS_MAX) && NGROUPS_MAX > 0
|
||||
#if defined(atarist)
|
||||
#if defined(atarist) || defined(__svr4__)
|
||||
gid_t groupset[NGROUPS_MAX];
|
||||
#else
|
||||
int groupset[NGROUPS_MAX];
|
||||
|
@ -825,16 +834,16 @@ char *name, *mode;
|
|||
|
||||
static IOBUF *
|
||||
iop_open(name, mode)
|
||||
char *name, *mode;
|
||||
const char *name, *mode;
|
||||
{
|
||||
int openfd = INVALID_HANDLE;
|
||||
int flag = 0;
|
||||
struct stat buf;
|
||||
IOBUF *iop;
|
||||
static struct internal {
|
||||
char *name;
|
||||
const char *name;
|
||||
int compare;
|
||||
int (*fp)();
|
||||
int (*fp) P((IOBUF*,const char *,const char *));
|
||||
IOBUF iob;
|
||||
} table[] = {
|
||||
{ "/dev/fd/", 8, specfdopen },
|
||||
|
@ -855,12 +864,12 @@ char *name, *mode;
|
|||
|
||||
if (STREQ(name, "-"))
|
||||
openfd = fileno(stdin);
|
||||
else if (STREQN(name, "/dev/", 5) && stat(name, &buf) == -1) {
|
||||
else if (STREQN(name, "/dev/", 5) && stat((char *) name, &buf) == -1) {
|
||||
int i;
|
||||
|
||||
for (i = 0; i < devcount; i++) {
|
||||
if (STREQN(name, table[i].name, table[i].compare)) {
|
||||
IOBUF *iop = & table[i].iob;
|
||||
iop = & table[i].iob;
|
||||
|
||||
if (iop->buf != NULL) {
|
||||
spec_setup(iop, 0, 0);
|
||||
|
@ -1009,7 +1018,7 @@ gawk_pclose(rp)
|
|||
struct redirect *rp;
|
||||
{
|
||||
int rval, aval, fd = rp->iop->fd;
|
||||
FILE *kludge = fdopen(fd, "r"); /* pclose needs FILE* w/ right fileno */
|
||||
FILE *kludge = fdopen(fd, (char *) "r"); /* pclose needs FILE* w/ right fileno */
|
||||
|
||||
rp->iop->fd = dup(fd); /* kludge to allow close() + pclose() */
|
||||
rval = iop_close(rp->iop);
|
||||
|
@ -1017,7 +1026,7 @@ struct redirect *rp;
|
|||
aval = pclose(kludge);
|
||||
return (rval < 0 ? rval : aval);
|
||||
}
|
||||
#else /* VMS */
|
||||
#else /* VMS || OS2 || MSDOS */
|
||||
|
||||
static
|
||||
struct {
|
||||
|
@ -1067,7 +1076,7 @@ struct redirect *rp;
|
|||
free(pipes[cur].command);
|
||||
return rval;
|
||||
}
|
||||
#endif /* VMS */
|
||||
#endif /* VMS || OS2 || MSDOS */
|
||||
|
||||
#endif /* PIPES_SIMULATED */
|
||||
|
||||
|
@ -1092,7 +1101,7 @@ NODE *tree;
|
|||
rp = redirect(tree->rnode, &redir_error);
|
||||
if (rp == NULL && redir_error) { /* failed redirect */
|
||||
if (! do_unix) {
|
||||
char *s = strerror(redir_error);
|
||||
s = strerror(redir_error);
|
||||
|
||||
unref(ERRNO_node->var_value);
|
||||
ERRNO_node->var_value =
|
||||
|
@ -1107,7 +1116,7 @@ NODE *tree;
|
|||
errcode = 0;
|
||||
cnt = get_a_record(&s, iop, *RS, & errcode);
|
||||
if (! do_unix && errcode != 0) {
|
||||
char *s = strerror(errcode);
|
||||
s = strerror(errcode);
|
||||
|
||||
unref(ERRNO_node->var_value);
|
||||
ERRNO_node->var_value = make_string(s, strlen(s));
|
||||
|
@ -1153,7 +1162,7 @@ NODE *tree;
|
|||
|
||||
int
|
||||
pathopen (file)
|
||||
char *file;
|
||||
const char *file;
|
||||
{
|
||||
int fd = do_pathopen(file);
|
||||
|
||||
|
@ -1185,12 +1194,12 @@ char *file;
|
|||
|
||||
static int
|
||||
do_pathopen (file)
|
||||
char *file;
|
||||
const char *file;
|
||||
{
|
||||
static char *savepath = DEFPATH; /* defined in config.h */
|
||||
static const char *savepath = DEFPATH; /* defined in config.h */
|
||||
static int first = 1;
|
||||
char *awkpath, *cp;
|
||||
char trypath[BUFSIZ];
|
||||
const char *awkpath;
|
||||
char *cp, trypath[BUFSIZ];
|
||||
int fd;
|
||||
|
||||
if (STREQ(file, "-"))
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
*/
|
||||
|
||||
/*
|
||||
* Copyright (C) 1986, 1988, 1989, 1991, 1992 the Free Software Foundation, Inc.
|
||||
* Copyright (C) 1986, 1988, 1989, 1991, 1992, 1993 the Free Software Foundation, Inc.
|
||||
*
|
||||
* This file is part of GAWK, the GNU implementation of the
|
||||
* AWK Progamming Language.
|
||||
|
@ -24,8 +24,8 @@
|
|||
*/
|
||||
|
||||
#ifndef lint
|
||||
static char rcsid[] = "$Id: iop.c,v 1.2 1993/08/02 17:29:54 mycroft Exp $";
|
||||
#endif /* not lint */
|
||||
static char rcsid[] = "$Id: iop.c,v 1.3 1994/02/17 01:22:22 jtc Exp $";
|
||||
#endif
|
||||
|
||||
#include "awk.h"
|
||||
|
||||
|
@ -66,7 +66,7 @@ int fd;
|
|||
else if (fstat(fd, &stb) < 0)
|
||||
return 8*512; /* conservative in case of DECnet access */
|
||||
else
|
||||
return 24*512;
|
||||
return 32*512;
|
||||
|
||||
#else
|
||||
/*
|
||||
|
@ -150,17 +150,14 @@ int *errcode;
|
|||
register char *bp = iop->off;
|
||||
char *bufend;
|
||||
char *start = iop->off; /* beginning of record */
|
||||
int saw_newline;
|
||||
char rs;
|
||||
int eat_whitespace;
|
||||
int saw_newline = 0, eat_whitespace = 0; /* used iff grRS==0 */
|
||||
|
||||
if (iop->cnt == EOF) /* previous read hit EOF */
|
||||
return EOF;
|
||||
|
||||
if (grRS == 0) { /* special case: grRS == "" */
|
||||
rs = '\n';
|
||||
eat_whitespace = 0;
|
||||
saw_newline = 0;
|
||||
} else
|
||||
rs = (char) grRS;
|
||||
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
*/
|
||||
|
||||
/*
|
||||
* Copyright (C) 1986, 1988, 1989, 1991, 1992 the Free Software Foundation, Inc.
|
||||
* Copyright (C) 1986, 1988, 1989, 1991, 1992, 1993 the Free Software Foundation, Inc.
|
||||
*
|
||||
* This file is part of GAWK, the GNU implementation of the
|
||||
* AWK Progamming Language.
|
||||
|
@ -24,7 +24,7 @@
|
|||
*/
|
||||
|
||||
#ifndef lint
|
||||
static char rcsid[] = "$Id: main.c,v 1.3 1993/11/13 02:26:57 jtc Exp $";
|
||||
static char rcsid[] = "$Id: main.c,v 1.4 1994/02/17 01:22:23 jtc Exp $";
|
||||
#endif
|
||||
|
||||
#include "getopt.h"
|
||||
|
@ -141,7 +141,8 @@ char **argv;
|
|||
extern int optind;
|
||||
extern int opterr;
|
||||
extern char *optarg;
|
||||
char *optlist = "+F:f:v:W:";
|
||||
const char *optlist = "+F:f:v:W:m:";
|
||||
int stopped_early = 0;
|
||||
|
||||
#ifdef __EMX__
|
||||
_response(&argc, &argv);
|
||||
|
@ -175,7 +176,6 @@ char **argv;
|
|||
Nnull_string->flags = (PERM|STR|STRING|NUM|NUMBER);
|
||||
|
||||
/* Set up the special variables */
|
||||
|
||||
/*
|
||||
* Note that this must be done BEFORE arg parsing else -F
|
||||
* breaks horribly
|
||||
|
@ -227,6 +227,19 @@ char **argv;
|
|||
pre_assign(optarg);
|
||||
break;
|
||||
|
||||
case 'm':
|
||||
/*
|
||||
* Research awk extension.
|
||||
* -mf=nnn set # fields, gawk ignores
|
||||
* -mr=nnn set record length, ditto
|
||||
*/
|
||||
if (do_lint)
|
||||
warning("-m[fr] option irrelevant");
|
||||
if ((optarg[0] != 'r' && optarg[0] != 'f')
|
||||
|| optarg[1] != '=')
|
||||
warning("-m option usage: -m[fn]=nnn");
|
||||
break;
|
||||
|
||||
case 'W': /* gawk specific options */
|
||||
gawk_option(optarg);
|
||||
break;
|
||||
|
@ -259,6 +272,14 @@ char **argv;
|
|||
break;
|
||||
#endif
|
||||
|
||||
case 0:
|
||||
/*
|
||||
* getopt_long found an option that sets a variable
|
||||
* instead of returning a letter. Do nothing, just
|
||||
* cycle around for the next one.
|
||||
*/
|
||||
break;
|
||||
|
||||
case '?':
|
||||
default:
|
||||
/*
|
||||
|
@ -275,6 +296,7 @@ char **argv;
|
|||
if (! do_posix
|
||||
&& (optopt == 0 || strchr(optlist, optopt) == NULL)) {
|
||||
optind--;
|
||||
stopped_early = 1;
|
||||
goto out;
|
||||
} else if (optopt)
|
||||
/* Use 1003.2 required message format */
|
||||
|
@ -302,7 +324,7 @@ out:
|
|||
output_is_tty = 1;
|
||||
/* No -f or --source options, use next arg */
|
||||
if (numfiles == -1) {
|
||||
if (optind > argc - 1) /* no args left */
|
||||
if (optind > argc - 1 || stopped_early) /* no args left or no program */
|
||||
usage(1);
|
||||
srcfiles[++numfiles].stype = CMDLINE;
|
||||
srcfiles[numfiles].val = argv[optind];
|
||||
|
@ -342,16 +364,15 @@ static void
|
|||
usage(exitval)
|
||||
int exitval;
|
||||
{
|
||||
char *opt1 = " -f progfile [--]";
|
||||
#if defined(MSDOS) || defined(OS2)
|
||||
char *opt2 = " [--] \"program\"";
|
||||
const char *opt1 = " -f progfile [--]";
|
||||
#if defined(MSDOS) || defined(OS2) || defined(VMS)
|
||||
const char *opt2 = " [--] \"program\"";
|
||||
#else
|
||||
char *opt2 = " [--] 'program'";
|
||||
const char *opt2 = " [--] 'program'";
|
||||
#endif
|
||||
char *regops = " [POSIX or GNU style options]";
|
||||
const char *regops = " [POSIX or GNU style options]";
|
||||
|
||||
version();
|
||||
fprintf(stderr, "Usage: %s%s%s file ...\n\t%s%s%s file ...\n",
|
||||
fprintf(stderr, "Usage:\t%s%s%s file ...\n\t%s%s%s file ...\n",
|
||||
myname, regops, opt1, myname, regops, opt2);
|
||||
|
||||
/* GNU long options info. Gack. */
|
||||
|
@ -359,12 +380,13 @@ int exitval;
|
|||
fputs("\t-f progfile\t\t--file=progfile\n", stderr);
|
||||
fputs("\t-F fs\t\t\t--field-separator=fs\n", stderr);
|
||||
fputs("\t-v var=val\t\t--assign=var=val\n", stderr);
|
||||
fputs("\t-m[fr]=val\n", stderr);
|
||||
fputs("\t-W compat\t\t--compat\n", stderr);
|
||||
fputs("\t-W copyleft\t\t--copyleft\n", stderr);
|
||||
fputs("\t-W copyright\t\t--copyright\n", stderr);
|
||||
fputs("\t-W help\t\t\t--help\n", stderr);
|
||||
fputs("\t-W lint\t\t\t--lint\n", stderr);
|
||||
#if 0
|
||||
#ifdef NOSTALGIA
|
||||
fputs("\t-W nostalgia\t\t--nostalgia\n", stderr);
|
||||
#endif
|
||||
#ifdef DEBUG
|
||||
|
@ -399,7 +421,6 @@ GNU General Public License for more details.\n\
|
|||
along with this program; if not, write to the Free Software\n\
|
||||
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.\n";
|
||||
|
||||
version();
|
||||
fputs(blurb_part1, stderr);
|
||||
fputs(blurb_part2, stderr);
|
||||
fputs(blurb_part3, stderr);
|
||||
|
@ -411,7 +432,8 @@ cmdline_fs(str)
|
|||
char *str;
|
||||
{
|
||||
register NODE **tmp;
|
||||
int len = strlen(str);
|
||||
/* int len = strlen(str); *//* don't do that - we want to
|
||||
avoid mismatched types */
|
||||
|
||||
tmp = get_lhs(FS_node, (Func_ptr *) 0);
|
||||
unref(*tmp);
|
||||
|
@ -428,7 +450,7 @@ char *str;
|
|||
if (do_unix && ! do_posix)
|
||||
str[0] = '\t';
|
||||
}
|
||||
*tmp = make_str_node(str, len, SCAN); /* do process escapes */
|
||||
*tmp = make_str_node(str, strlen(str), SCAN); /* do process escapes */
|
||||
set_FS();
|
||||
}
|
||||
|
||||
|
@ -460,9 +482,9 @@ char **argv;
|
|||
*/
|
||||
struct varinit {
|
||||
NODE **spec;
|
||||
char *name;
|
||||
const char *name;
|
||||
NODETYPE type;
|
||||
char *strval;
|
||||
const char *strval;
|
||||
AWKNUM numval;
|
||||
Func_ptr assign;
|
||||
};
|
||||
|
@ -493,9 +515,10 @@ init_vars()
|
|||
register struct varinit *vp;
|
||||
|
||||
for (vp = varinit; vp->name; vp++) {
|
||||
*(vp->spec) = install(vp->name,
|
||||
*(vp->spec) = install((char *) vp->name,
|
||||
node(vp->strval == 0 ? make_number(vp->numval)
|
||||
: make_string(vp->strval, strlen(vp->strval)),
|
||||
: make_string((char *) vp->strval,
|
||||
strlen(vp->strval)),
|
||||
vp->type, (NODE *) NULL));
|
||||
if (vp->assign)
|
||||
(*(vp->assign))();
|
||||
|
@ -731,6 +754,8 @@ static void
|
|||
version()
|
||||
{
|
||||
fprintf(stderr, "%s, patchlevel %d\n", version_string, PATCHLEVEL);
|
||||
/* per GNU coding standards, exit successfully, do nothing else */
|
||||
exit(0);
|
||||
}
|
||||
|
||||
/* this mess will improve in 2.16 */
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
*/
|
||||
|
||||
/*
|
||||
* Copyright (C) 1986, 1988, 1989, 1991, 1992 the Free Software Foundation, Inc.
|
||||
* Copyright (C) 1986, 1988, 1989, 1991, 1992, 1993 the Free Software Foundation, Inc.
|
||||
*
|
||||
* This file is part of GAWK, the GNU implementation of the
|
||||
* AWK Progamming Language.
|
||||
|
@ -24,8 +24,8 @@
|
|||
*/
|
||||
|
||||
#ifndef lint
|
||||
static char rcsid[] = "$Id: msg.c,v 1.2 1993/08/02 17:29:55 mycroft Exp $";
|
||||
#endif /* not lint */
|
||||
static char rcsid[] = "$Id: msg.c,v 1.3 1994/02/17 01:22:25 jtc Exp $";
|
||||
#endif
|
||||
|
||||
#include "awk.h"
|
||||
|
||||
|
@ -35,8 +35,8 @@ char *source = NULL;
|
|||
/* VARARGS2 */
|
||||
void
|
||||
err(s, emsg, argp)
|
||||
char *s;
|
||||
char *emsg;
|
||||
const char *s;
|
||||
const char *emsg;
|
||||
va_list argp;
|
||||
{
|
||||
char *file;
|
||||
|
@ -53,8 +53,9 @@ va_list argp;
|
|||
}
|
||||
if (FNR) {
|
||||
file = FILENAME_node->var_value->stptr;
|
||||
(void) putc('(', stderr);
|
||||
if (file)
|
||||
(void) fprintf(stderr, "(FILENAME=%s ", file);
|
||||
(void) fprintf(stderr, "FILENAME=%s ", file);
|
||||
(void) fprintf(stderr, "FNR=%d) ", FNR);
|
||||
}
|
||||
(void) fprintf(stderr, s);
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
*/
|
||||
|
||||
/*
|
||||
* Copyright (C) 1986, 1988, 1989, 1991, 1992 the Free Software Foundation, Inc.
|
||||
* Copyright (C) 1986, 1988, 1989, 1991, 1992, 1993 the Free Software Foundation, Inc.
|
||||
*
|
||||
* This file is part of GAWK, the GNU implementation of the
|
||||
* AWK Progamming Language.
|
||||
|
@ -24,8 +24,8 @@
|
|||
*/
|
||||
|
||||
#ifndef lint
|
||||
static char rcsid[] = "$Id: node.c,v 1.3 1993/11/13 02:27:00 jtc Exp $";
|
||||
#endif /* not lint */
|
||||
static char rcsid[] = "$Id: node.c,v 1.4 1994/02/17 01:22:27 jtc Exp $";
|
||||
#endif
|
||||
|
||||
#include "awk.h"
|
||||
|
||||
|
@ -106,7 +106,7 @@ register NODE *n;
|
|||
* (more complicated) variations on this theme didn't seem to pay off, but
|
||||
* systematic testing might be in order at some point
|
||||
*/
|
||||
static char *values[] = {
|
||||
static const char *values[] = {
|
||||
"0",
|
||||
"1",
|
||||
"2",
|
||||
|
@ -141,7 +141,7 @@ register NODE *s;
|
|||
num = (long)s->numbr;
|
||||
if ((AWKNUM) num == s->numbr) { /* integral value */
|
||||
if (num < NVAL && num >= 0) {
|
||||
sp = values[num];
|
||||
sp = (char *) values[num];
|
||||
s->stlen = 1;
|
||||
} else {
|
||||
(void) sprintf(sp, "%ld", num);
|
||||
|
@ -149,7 +149,7 @@ register NODE *s;
|
|||
}
|
||||
s->stfmt = -1;
|
||||
} else {
|
||||
(void) sprintf(sp, CONVFMT, s->numbr);
|
||||
NUMTOSTR(sp, CONVFMT, s->numbr);
|
||||
s->stlen = strlen(sp);
|
||||
s->stfmt = (char)CONVFMTidx;
|
||||
}
|
||||
|
|
|
@ -1,3 +1 @@
|
|||
/* $Id: patchlevel.h,v 1.3 1993/11/13 02:27:02 jtc Exp $ */
|
||||
|
||||
#define PATCHLEVEL 3
|
||||
#define PATCHLEVEL 4
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
*/
|
||||
|
||||
/*
|
||||
* Copyright (C) 1991, 1992, the Free Software Foundation, Inc.
|
||||
* Copyright (C) 1991, 1992, 1993 the Free Software Foundation, Inc.
|
||||
*
|
||||
* This file is part of GAWK, the GNU implementation of the
|
||||
* AWK Progamming Language.
|
||||
|
@ -22,7 +22,7 @@
|
|||
* along with GAWK; see the file COPYING. If not, write to
|
||||
* the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
*
|
||||
* $Id: protos.h,v 1.2 1993/08/02 17:30:01 mycroft Exp $
|
||||
* $Id: protos.h,v 1.3 1994/02/17 01:22:31 jtc Exp $
|
||||
*/
|
||||
|
||||
#ifdef __STDC__
|
||||
|
@ -53,7 +53,7 @@ extern char *strstr P((const char *s1, const char *s2));
|
|||
extern int strlen P((const char *));
|
||||
extern long strtol P((const char *, char **, int));
|
||||
#if !defined(_MSC_VER) && !defined(__GNU_LIBRARY__)
|
||||
extern int strftime P((char *, int, const char *, const struct tm *));
|
||||
extern size_t strftime P((char *, size_t, const char *, const struct tm *));
|
||||
#endif
|
||||
extern time_t time P((time_t *));
|
||||
extern aptr_t memset P((aptr_t, int, size_t));
|
||||
|
@ -62,10 +62,9 @@ extern aptr_t memmove P((aptr_t, const aptr_t, size_t));
|
|||
extern aptr_t memchr P((const aptr_t, int, size_t));
|
||||
extern int memcmp P((const aptr_t, const aptr_t, size_t));
|
||||
|
||||
/* extern int fprintf P((FILE *, char *, ...)); */
|
||||
extern int fprintf P(());
|
||||
extern int fprintf P((FILE *, const char *, ...));
|
||||
#if !defined(MSDOS) && !defined(__GNU_LIBRARY__)
|
||||
extern int fwrite P((const char *, int, int, FILE *));
|
||||
extern size_t fwrite P((const void *, size_t, size_t, FILE *));
|
||||
extern int fputs P((const char *, FILE *));
|
||||
extern int unlink P((const char *));
|
||||
#endif
|
||||
|
@ -77,7 +76,7 @@ extern void abort P(());
|
|||
extern int isatty P((int));
|
||||
extern void exit P((int));
|
||||
extern int system P((const char *));
|
||||
extern int sscanf P((/* char *, char *, ... */));
|
||||
extern int sscanf P((const char *, const char *, ...));
|
||||
#ifndef toupper
|
||||
extern int toupper P((int));
|
||||
#endif
|
||||
|
@ -93,8 +92,8 @@ extern int stat P((const char *, struct stat *));
|
|||
extern off_t lseek P((int, off_t, int));
|
||||
extern int fseek P((FILE *, long, int));
|
||||
extern int close P((int));
|
||||
extern int creat P(());
|
||||
extern int open P(());
|
||||
extern int creat P((const char *, mode_t));
|
||||
extern int open P((const char *, int, ...));
|
||||
extern int pipe P((int *));
|
||||
extern int dup P((int));
|
||||
extern int dup2 P((int,int));
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
*/
|
||||
|
||||
/*
|
||||
* Copyright (C) 1991, 1992 the Free Software Foundation, Inc.
|
||||
* Copyright (C) 1991, 1992, 1993 the Free Software Foundation, Inc.
|
||||
*
|
||||
* This file is part of GAWK, the GNU implementation of the
|
||||
* AWK Progamming Language.
|
||||
|
@ -24,8 +24,8 @@
|
|||
*/
|
||||
|
||||
#ifndef lint
|
||||
static char rcsid[] = "$Id: re.c,v 1.3 1993/11/13 02:27:05 jtc Exp $";
|
||||
#endif /* not lint */
|
||||
static char rcsid[] = "$Id: re.c,v 1.4 1994/02/17 01:22:33 jtc Exp $";
|
||||
#endif
|
||||
|
||||
#include "awk.h"
|
||||
|
||||
|
@ -34,12 +34,12 @@ static char rcsid[] = "$Id: re.c,v 1.3 1993/11/13 02:27:05 jtc Exp $";
|
|||
Regexp *
|
||||
make_regexp(s, len, ignorecase, dfa)
|
||||
char *s;
|
||||
int len;
|
||||
size_t len;
|
||||
int ignorecase;
|
||||
int dfa;
|
||||
{
|
||||
Regexp *rp;
|
||||
char *err;
|
||||
const char *rerr;
|
||||
char *src = s;
|
||||
char *temp;
|
||||
char *end = s + len;
|
||||
|
@ -94,7 +94,7 @@ int dfa;
|
|||
*dest = '\0' ; /* Only necessary if we print dest ? */
|
||||
emalloc(rp, Regexp *, sizeof(*rp), "make_regexp");
|
||||
memset((char *) rp, 0, sizeof(*rp));
|
||||
emalloc(rp->pat.buffer, char *, 16, "make_regexp");
|
||||
emalloc(rp->pat.buffer, unsigned char *, 16, "make_regexp");
|
||||
rp->pat.allocated = 16;
|
||||
emalloc(rp->pat.fastmap, char *, 256, "make_regexp");
|
||||
|
||||
|
@ -103,13 +103,14 @@ int dfa;
|
|||
else
|
||||
rp->pat.translate = NULL;
|
||||
len = dest - temp;
|
||||
if ((err = re_compile_pattern(temp, len, &(rp->pat))) != NULL)
|
||||
fatal("%s: /%s/", err, temp);
|
||||
if ((rerr = re_compile_pattern(temp, len, &(rp->pat))) != NULL)
|
||||
fatal("%s: /%s/", rerr, temp);
|
||||
if (dfa && !ignorecase) {
|
||||
regcompile(temp, len, &(rp->dfareg), 1);
|
||||
dfacomp(temp, len, &(rp->dfareg), 1);
|
||||
rp->dfa = 1;
|
||||
} else
|
||||
rp->dfa = 0;
|
||||
|
||||
free(temp);
|
||||
return rp;
|
||||
}
|
||||
|
@ -119,24 +120,24 @@ research(rp, str, start, len, need_start)
|
|||
Regexp *rp;
|
||||
register char *str;
|
||||
int start;
|
||||
register int len;
|
||||
register size_t len;
|
||||
int need_start;
|
||||
{
|
||||
char *ret = str;
|
||||
|
||||
if (rp->dfa) {
|
||||
char save1;
|
||||
char save2;
|
||||
char save;
|
||||
int count = 0;
|
||||
int try_backref;
|
||||
|
||||
save1 = str[start+len];
|
||||
str[start+len] = '\n';
|
||||
save2 = str[start+len+1];
|
||||
ret = regexecute(&(rp->dfareg), str+start, str+start+len+1, 1,
|
||||
/*
|
||||
* dfa likes to stick a '\n' right after the matched
|
||||
* text. So we just save and restore the character.
|
||||
*/
|
||||
save = str[start+len];
|
||||
ret = dfaexec(&(rp->dfareg), str+start, str+start+len, 1,
|
||||
&count, &try_backref);
|
||||
str[start+len] = save1;
|
||||
str[start+len+1] = save2;
|
||||
str[start+len] = save;
|
||||
}
|
||||
if (ret) {
|
||||
if (need_start || rp->dfa == 0)
|
||||
|
@ -155,12 +156,12 @@ Regexp *rp;
|
|||
free(rp->pat.buffer);
|
||||
free(rp->pat.fastmap);
|
||||
if (rp->dfa)
|
||||
reg_free(&(rp->dfareg));
|
||||
dfafree(&(rp->dfareg));
|
||||
free(rp);
|
||||
}
|
||||
|
||||
void
|
||||
reg_error(s)
|
||||
dfaerror(s)
|
||||
const char *s;
|
||||
{
|
||||
fatal(s);
|
||||
|
@ -198,7 +199,8 @@ NODE *t;
|
|||
t->re_text = dupnode(t1);
|
||||
free_temp(t1);
|
||||
}
|
||||
t->re_reg = make_regexp(t->re_text->stptr, t->re_text->stlen, IGNORECASE, t->re_cnt);
|
||||
t->re_reg = make_regexp(t->re_text->stptr, t->re_text->stlen,
|
||||
IGNORECASE, t->re_cnt);
|
||||
t->re_flags &= ~CASE;
|
||||
t->re_flags |= IGNORECASE;
|
||||
return t->re_reg;
|
||||
|
@ -207,6 +209,8 @@ NODE *t;
|
|||
void
|
||||
resetup()
|
||||
{
|
||||
(void) re_set_syntax(RE_SYNTAX_AWK);
|
||||
regsyntax(RE_SYNTAX_AWK, 0);
|
||||
reg_syntax_t syn = RE_SYNTAX_AWK;
|
||||
|
||||
(void) re_set_syntax(syn);
|
||||
dfasyntax(syn, 0);
|
||||
}
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -1,10 +1,11 @@
|
|||
/* Definitions for data structures callers pass the regex library.
|
||||
/* Definitions for data structures and routines for the regular
|
||||
expression library, version 0.12.
|
||||
|
||||
Copyright (C) 1985, 1989-90 Free Software Foundation, Inc.
|
||||
Copyright (C) 1985, 1989, 1990, 1991, 1992, 1993 Free Software Foundation, Inc.
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 1, or (at your option)
|
||||
the Free Software Foundation; either version 2, or (at your option)
|
||||
any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
|
@ -16,247 +17,492 @@
|
|||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
|
||||
$Id: regex.h,v 1.2 1993/08/02 17:30:13 mycroft Exp $
|
||||
$Id: regex.h,v 1.3 1994/02/17 01:22:42 jtc Exp $
|
||||
*/
|
||||
|
||||
#ifndef __REGEXP_LIBRARY
|
||||
#define __REGEXP_LIBRARY
|
||||
#ifndef __REGEXP_LIBRARY_H__
|
||||
#define __REGEXP_LIBRARY_H__
|
||||
|
||||
/* Define number of parens for which we record the beginnings and ends.
|
||||
This affects how much space the `struct re_registers' type takes up. */
|
||||
#ifndef RE_NREGS
|
||||
#define RE_NREGS 10
|
||||
#endif
|
||||
/* POSIX says that <sys/types.h> must be included (by the caller) before
|
||||
<regex.h>. */
|
||||
|
||||
#define BYTEWIDTH 8
|
||||
|
||||
|
||||
/* Maximum number of duplicates an interval can allow. */
|
||||
#ifndef RE_DUP_MAX
|
||||
#define RE_DUP_MAX ((1 << 15) - 1)
|
||||
#ifdef VMS
|
||||
/* VMS doesn't have `size_t' in <sys/types.h>, even though POSIX says it
|
||||
should be there. */
|
||||
#include <stddef.h>
|
||||
#endif
|
||||
|
||||
|
||||
/* This defines the various regexp syntaxes. */
|
||||
extern long obscure_syntax;
|
||||
/* The following two types have to be signed and unsigned integer type
|
||||
wide enough to hold a value of a pointer. For most ANSI compilers
|
||||
ptrdiff_t and size_t should be likely OK. Still size of these two
|
||||
types is 2 for Microsoft C. Ugh... */
|
||||
typedef long s_reg_t;
|
||||
typedef unsigned long active_reg_t;
|
||||
|
||||
/* The following bits are used to determine the regexp syntax we
|
||||
recognize. The set/not-set meanings are chosen so that Emacs syntax
|
||||
remains the value 0. The bits are given in alphabetical order, and
|
||||
the definitions shifted by one from the previous bit; thus, when we
|
||||
add or remove a bit, only one other definition need change. */
|
||||
typedef unsigned long reg_syntax_t;
|
||||
|
||||
/* The following bits are used in the obscure_syntax variable to choose among
|
||||
alternative regexp syntaxes. */
|
||||
/* If this bit is not set, then \ inside a bracket expression is literal.
|
||||
If set, then such a \ quotes the following character. */
|
||||
#define RE_BACKSLASH_ESCAPE_IN_LISTS (1L)
|
||||
|
||||
/* If this bit is set, plain parentheses serve as grouping, and backslash
|
||||
parentheses are needed for literal searching.
|
||||
If not set, backslash-parentheses are grouping, and plain parentheses
|
||||
are for literal searching. */
|
||||
#define RE_NO_BK_PARENS 1L
|
||||
/* If this bit is not set, then + and ? are operators, and \+ and \? are
|
||||
literals.
|
||||
If set, then \+ and \? are operators and + and ? are literals. */
|
||||
#define RE_BK_PLUS_QM (RE_BACKSLASH_ESCAPE_IN_LISTS << 1)
|
||||
|
||||
/* If this bit is set, plain | serves as the `or'-operator, and \| is a
|
||||
literal.
|
||||
If not set, \| serves as the `or'-operator, and | is a literal. */
|
||||
#define RE_NO_BK_VBAR (1L << 1)
|
||||
|
||||
/* If this bit is not set, plain + or ? serves as an operator, and \+, \? are
|
||||
literals.
|
||||
If set, \+, \? are operators and plain +, ? are literals. */
|
||||
#define RE_BK_PLUS_QM (1L << 2)
|
||||
|
||||
/* If this bit is set, | binds tighter than ^ or $.
|
||||
If not set, the contrary. */
|
||||
#define RE_TIGHT_VBAR (1L << 3)
|
||||
|
||||
/* If this bit is set, then treat newline as an OR operator.
|
||||
If not set, treat it as a normal character. */
|
||||
#define RE_NEWLINE_OR (1L << 4)
|
||||
|
||||
/* If this bit is set, then special characters may act as normal
|
||||
characters in some contexts. Specifically, this applies to:
|
||||
^ -- only special at the beginning, or after ( or |;
|
||||
$ -- only special at the end, or before ) or |;
|
||||
*, +, ? -- only special when not after the beginning, (, or |.
|
||||
If this bit is not set, special characters (such as *, ^, and $)
|
||||
always have their special meaning regardless of the surrounding
|
||||
context. */
|
||||
#define RE_CONTEXT_INDEP_OPS (1L << 5)
|
||||
|
||||
/* If this bit is not set, then \ before anything inside [ and ] is taken as
|
||||
a real \.
|
||||
If set, then such a \ escapes the following character. This is a
|
||||
special case for awk. */
|
||||
#define RE_AWK_CLASS_HACK (1L << 6)
|
||||
|
||||
/* If this bit is set, then \{ and \} or { and } serve as interval operators.
|
||||
If not set, then \{ and \} and { and } are treated as literals. */
|
||||
#define RE_INTERVALS (1L << 7)
|
||||
|
||||
/* If this bit is not set, then \{ and \} serve as interval operators and
|
||||
{ and } are literals.
|
||||
If set, then { and } serve as interval operators and \{ and \} are
|
||||
literals. */
|
||||
#define RE_NO_BK_CURLY_BRACES (1L << 8)
|
||||
|
||||
/* If this bit is set, then character classes are supported; they are:
|
||||
[:alpha:], [:upper:], [:lower:], [:digit:], [:alnum:], [:xdigit:],
|
||||
/* If this bit is set, then character classes are supported. They are:
|
||||
[:alpha:], [:upper:], [:lower:], [:digit:], [:alnum:], [:xdigit:],
|
||||
[:space:], [:print:], [:punct:], [:graph:], and [:cntrl:].
|
||||
If not set, then character classes are not supported. */
|
||||
#define RE_CHAR_CLASSES (1L << 9)
|
||||
#define RE_CHAR_CLASSES (RE_BK_PLUS_QM << 1)
|
||||
|
||||
/* If this bit is set, then the dot re doesn't match a null byte.
|
||||
If not set, it does. */
|
||||
#define RE_DOT_NOT_NULL (1L << 10)
|
||||
/* If this bit is set, then ^ and $ are always anchors (outside bracket
|
||||
expressions, of course).
|
||||
If this bit is not set, then it depends:
|
||||
^ is an anchor if it is at the beginning of a regular
|
||||
expression or after an open-group or an alternation operator;
|
||||
$ is an anchor if it is at the end of a regular expression, or
|
||||
before a close-group or an alternation operator.
|
||||
|
||||
/* If this bit is set, then [^...] doesn't match a newline.
|
||||
If not set, it does. */
|
||||
#define RE_HAT_NOT_NEWLINE (1L << 11)
|
||||
This bit could be (re)combined with RE_CONTEXT_INDEP_OPS, because
|
||||
POSIX draft 11.2 says that * etc. in leading positions is undefined.
|
||||
We already implemented a previous draft which made those constructs
|
||||
invalid, though, so we haven't changed the code back. */
|
||||
#define RE_CONTEXT_INDEP_ANCHORS (RE_CHAR_CLASSES << 1)
|
||||
|
||||
/* If this bit is set, back references are recognized.
|
||||
If not set, they aren't. */
|
||||
#define RE_NO_BK_REFS (1L << 12)
|
||||
/* If this bit is set, then special characters are always special
|
||||
regardless of where they are in the pattern.
|
||||
If this bit is not set, then special characters are special only in
|
||||
some contexts; otherwise they are ordinary. Specifically,
|
||||
* + ? and intervals are only special when not after the beginning,
|
||||
open-group, or alternation operator. */
|
||||
#define RE_CONTEXT_INDEP_OPS (RE_CONTEXT_INDEP_ANCHORS << 1)
|
||||
|
||||
/* If this bit is set, back references must refer to a preceding
|
||||
subexpression. If not set, a back reference to a nonexistent
|
||||
subexpression is treated as literal characters. */
|
||||
#define RE_NO_EMPTY_BK_REF (1L << 13)
|
||||
/* If this bit is set, then *, +, ?, and { cannot be first in an re or
|
||||
immediately after an alternation or begin-group operator. */
|
||||
#define RE_CONTEXT_INVALID_OPS (RE_CONTEXT_INDEP_OPS << 1)
|
||||
|
||||
/* If this bit is set, bracket expressions can't be empty.
|
||||
If it is set, they can be empty. */
|
||||
#define RE_NO_EMPTY_BRACKETS (1L << 14)
|
||||
/* If this bit is set, then . matches newline.
|
||||
If not set, then it doesn't. */
|
||||
#define RE_DOT_NEWLINE (RE_CONTEXT_INVALID_OPS << 1)
|
||||
|
||||
/* If this bit is set, then *, +, ? and { cannot be first in an re or
|
||||
immediately after a |, or a (. Furthermore, a | cannot be first or
|
||||
last in an re, or immediately follow another | or a (. Also, a ^
|
||||
cannot appear in a nonleading position and a $ cannot appear in a
|
||||
nontrailing position (outside of bracket expressions, that is). */
|
||||
#define RE_CONTEXTUAL_INVALID_OPS (1L << 15)
|
||||
/* If this bit is set, then . doesn't match NUL.
|
||||
If not set, then it does. */
|
||||
#define RE_DOT_NOT_NULL (RE_DOT_NEWLINE << 1)
|
||||
|
||||
/* If this bit is set, then +, ? and | aren't recognized as operators.
|
||||
If it's not, they are. */
|
||||
#define RE_LIMITED_OPS (1L << 16)
|
||||
/* If this bit is set, nonmatching lists [^...] do not match newline.
|
||||
If not set, they do. */
|
||||
#define RE_HAT_LISTS_NOT_NEWLINE (RE_DOT_NOT_NULL << 1)
|
||||
|
||||
/* If this bit is set, then an ending range point has to collate higher
|
||||
or equal to the starting range point.
|
||||
If it's not set, then when the ending range point collates higher
|
||||
than the starting range point, the range is just considered empty. */
|
||||
#define RE_NO_EMPTY_RANGES (1L << 17)
|
||||
/* If this bit is set, either \{...\} or {...} defines an
|
||||
interval, depending on RE_NO_BK_BRACES.
|
||||
If not set, \{, \}, {, and } are literals. */
|
||||
#define RE_INTERVALS (RE_HAT_LISTS_NOT_NEWLINE << 1)
|
||||
|
||||
/* If this bit is set, then a hyphen (-) can't be an ending range point.
|
||||
If it isn't, then it can. */
|
||||
#define RE_NO_HYPHEN_RANGE_END (1L << 18)
|
||||
/* If this bit is set, +, ? and | aren't recognized as operators.
|
||||
If not set, they are. */
|
||||
#define RE_LIMITED_OPS (RE_INTERVALS << 1)
|
||||
|
||||
/* If this bit is set, newline is an alternation operator.
|
||||
If not set, newline is literal. */
|
||||
#define RE_NEWLINE_ALT (RE_LIMITED_OPS << 1)
|
||||
|
||||
/* Define combinations of bits for the standard possibilities. */
|
||||
#define RE_SYNTAX_POSIX_AWK (RE_NO_BK_PARENS | RE_NO_BK_VBAR \
|
||||
| RE_CONTEXT_INDEP_OPS)
|
||||
#define RE_SYNTAX_AWK (RE_NO_BK_PARENS | RE_NO_BK_VBAR | RE_AWK_CLASS_HACK)
|
||||
#define RE_SYNTAX_EGREP (RE_NO_BK_PARENS | RE_NO_BK_VBAR \
|
||||
| RE_CONTEXT_INDEP_OPS | RE_NEWLINE_OR)
|
||||
#define RE_SYNTAX_GREP (RE_BK_PLUS_QM | RE_NEWLINE_OR)
|
||||
/* If this bit is set, then `{...}' defines an interval, and \{ and \}
|
||||
are literals.
|
||||
If not set, then `\{...\}' defines an interval. */
|
||||
#define RE_NO_BK_BRACES (RE_NEWLINE_ALT << 1)
|
||||
|
||||
/* If this bit is set, (...) defines a group, and \( and \) are literals.
|
||||
If not set, \(...\) defines a group, and ( and ) are literals. */
|
||||
#define RE_NO_BK_PARENS (RE_NO_BK_BRACES << 1)
|
||||
|
||||
/* If this bit is set, then \<digit> matches <digit>.
|
||||
If not set, then \<digit> is a back-reference. */
|
||||
#define RE_NO_BK_REFS (RE_NO_BK_PARENS << 1)
|
||||
|
||||
/* If this bit is set, then | is an alternation operator, and \| is literal.
|
||||
If not set, then \| is an alternation operator, and | is literal. */
|
||||
#define RE_NO_BK_VBAR (RE_NO_BK_REFS << 1)
|
||||
|
||||
/* If this bit is set, then an ending range point collating higher
|
||||
than the starting range point, as in [z-a], is invalid.
|
||||
If not set, then when ending range point collates higher than the
|
||||
starting range point, the range is ignored. */
|
||||
#define RE_NO_EMPTY_RANGES (RE_NO_BK_VBAR << 1)
|
||||
|
||||
/* If this bit is set, then an unmatched ) is ordinary.
|
||||
If not set, then an unmatched ) is invalid. */
|
||||
#define RE_UNMATCHED_RIGHT_PAREN_ORD (RE_NO_EMPTY_RANGES << 1)
|
||||
|
||||
/* If this bit is set, do not process the GNU regex operators.
|
||||
IF not set, then the GNU regex operators are recognized. */
|
||||
#define RE_NO_GNU_OPS (RE_UNMATCHED_RIGHT_PAREN_ORD << 1)
|
||||
|
||||
/* This global variable defines the particular regexp syntax to use (for
|
||||
some interfaces). When a regexp is compiled, the syntax used is
|
||||
stored in the pattern buffer, so changing this does not affect
|
||||
already-compiled regexps. */
|
||||
extern reg_syntax_t re_syntax_options;
|
||||
|
||||
/* Define combinations of the above bits for the standard possibilities.
|
||||
(The [[[ comments delimit what gets put into the Texinfo file, so
|
||||
don't delete them!) */
|
||||
/* [[[begin syntaxes]]] */
|
||||
#define RE_SYNTAX_EMACS 0
|
||||
#define RE_SYNTAX_POSIX_BASIC (RE_INTERVALS | RE_BK_PLUS_QM \
|
||||
| RE_CHAR_CLASSES | RE_DOT_NOT_NULL \
|
||||
| RE_HAT_NOT_NEWLINE | RE_NO_EMPTY_BK_REF \
|
||||
| RE_NO_EMPTY_BRACKETS | RE_LIMITED_OPS \
|
||||
| RE_NO_EMPTY_RANGES | RE_NO_HYPHEN_RANGE_END)
|
||||
|
||||
#define RE_SYNTAX_POSIX_EXTENDED (RE_INTERVALS | RE_NO_BK_CURLY_BRACES \
|
||||
| RE_NO_BK_VBAR | RE_NO_BK_PARENS \
|
||||
| RE_HAT_NOT_NEWLINE | RE_CHAR_CLASSES \
|
||||
| RE_NO_EMPTY_BRACKETS | RE_CONTEXTUAL_INVALID_OPS \
|
||||
| RE_NO_BK_REFS | RE_NO_EMPTY_RANGES \
|
||||
| RE_NO_HYPHEN_RANGE_END)
|
||||
|
||||
#define RE_SYNTAX_AWK \
|
||||
(RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DOT_NOT_NULL \
|
||||
| RE_NO_BK_PARENS | RE_NO_BK_REFS \
|
||||
| RE_NO_BK_VBAR | RE_NO_EMPTY_RANGES \
|
||||
| RE_UNMATCHED_RIGHT_PAREN_ORD | RE_NO_GNU_OPS)
|
||||
|
||||
#define RE_SYNTAX_GNU_AWK \
|
||||
(RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS)
|
||||
|
||||
#define RE_SYNTAX_POSIX_AWK \
|
||||
(RE_SYNTAX_GNU_AWK | RE_NO_GNU_OPS)
|
||||
|
||||
#define RE_SYNTAX_GREP \
|
||||
(RE_BK_PLUS_QM | RE_CHAR_CLASSES \
|
||||
| RE_HAT_LISTS_NOT_NEWLINE | RE_INTERVALS \
|
||||
| RE_NEWLINE_ALT)
|
||||
|
||||
#define RE_SYNTAX_EGREP \
|
||||
(RE_CHAR_CLASSES | RE_CONTEXT_INDEP_ANCHORS \
|
||||
| RE_CONTEXT_INDEP_OPS | RE_HAT_LISTS_NOT_NEWLINE \
|
||||
| RE_NEWLINE_ALT | RE_NO_BK_PARENS \
|
||||
| RE_NO_BK_VBAR)
|
||||
|
||||
#define RE_SYNTAX_POSIX_EGREP \
|
||||
(RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES)
|
||||
|
||||
/* P1003.2/D11.2, section 4.20.7.1, lines 5078ff. */
|
||||
#define RE_SYNTAX_ED RE_SYNTAX_POSIX_BASIC
|
||||
|
||||
#define RE_SYNTAX_SED RE_SYNTAX_POSIX_BASIC
|
||||
|
||||
/* Syntax bits common to both basic and extended POSIX regex syntax. */
|
||||
#define _RE_SYNTAX_POSIX_COMMON \
|
||||
(RE_CHAR_CLASSES | RE_DOT_NEWLINE | RE_DOT_NOT_NULL \
|
||||
| RE_INTERVALS | RE_NO_EMPTY_RANGES)
|
||||
|
||||
#define RE_SYNTAX_POSIX_BASIC \
|
||||
(_RE_SYNTAX_POSIX_COMMON | RE_BK_PLUS_QM)
|
||||
|
||||
/* Differs from ..._POSIX_BASIC only in that RE_BK_PLUS_QM becomes
|
||||
RE_LIMITED_OPS, i.e., \? \+ \| are not recognized. Actually, this
|
||||
isn't minimal, since other operators, such as \`, aren't disabled. */
|
||||
#define RE_SYNTAX_POSIX_MINIMAL_BASIC \
|
||||
(_RE_SYNTAX_POSIX_COMMON | RE_LIMITED_OPS)
|
||||
|
||||
#define RE_SYNTAX_POSIX_EXTENDED \
|
||||
(_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \
|
||||
| RE_CONTEXT_INDEP_OPS | RE_NO_BK_BRACES \
|
||||
| RE_NO_BK_PARENS | RE_NO_BK_VBAR \
|
||||
| RE_UNMATCHED_RIGHT_PAREN_ORD)
|
||||
|
||||
/* Differs from ..._POSIX_EXTENDED in that RE_CONTEXT_INVALID_OPS
|
||||
replaces RE_CONTEXT_INDEP_OPS and RE_NO_BK_REFS is added. */
|
||||
#define RE_SYNTAX_POSIX_MINIMAL_EXTENDED \
|
||||
(_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \
|
||||
| RE_CONTEXT_INVALID_OPS | RE_NO_BK_BRACES \
|
||||
| RE_NO_BK_PARENS | RE_NO_BK_REFS \
|
||||
| RE_NO_BK_VBAR | RE_UNMATCHED_RIGHT_PAREN_ORD)
|
||||
/* [[[end syntaxes]]] */
|
||||
|
||||
/* Maximum number of duplicates an interval can allow. Some systems
|
||||
(erroneously) define this in other header files, but we want our
|
||||
value, so remove any previous define. */
|
||||
#ifdef RE_DUP_MAX
|
||||
#undef RE_DUP_MAX
|
||||
#endif
|
||||
/* if sizeof(int) == 2, then ((1 << 15) - 1) overflows */
|
||||
#define RE_DUP_MAX (0x7fff)
|
||||
|
||||
|
||||
/* This data structure is used to represent a compiled pattern. */
|
||||
/* POSIX `cflags' bits (i.e., information for `regcomp'). */
|
||||
|
||||
/* If this bit is set, then use extended regular expression syntax.
|
||||
If not set, then use basic regular expression syntax. */
|
||||
#define REG_EXTENDED 1
|
||||
|
||||
/* If this bit is set, then ignore case when matching.
|
||||
If not set, then case is significant. */
|
||||
#define REG_ICASE (REG_EXTENDED << 1)
|
||||
|
||||
/* If this bit is set, then anchors do not match at newline
|
||||
characters in the string.
|
||||
If not set, then anchors do match at newlines. */
|
||||
#define REG_NEWLINE (REG_ICASE << 1)
|
||||
|
||||
/* If this bit is set, then report only success or fail in regexec.
|
||||
If not set, then returns differ between not matching and errors. */
|
||||
#define REG_NOSUB (REG_NEWLINE << 1)
|
||||
|
||||
|
||||
/* POSIX `eflags' bits (i.e., information for regexec). */
|
||||
|
||||
/* If this bit is set, then the beginning-of-line operator doesn't match
|
||||
the beginning of the string (presumably because it's not the
|
||||
beginning of a line).
|
||||
If not set, then the beginning-of-line operator does match the
|
||||
beginning of the string. */
|
||||
#define REG_NOTBOL 1
|
||||
|
||||
/* Like REG_NOTBOL, except for the end-of-line. */
|
||||
#define REG_NOTEOL (1 << 1)
|
||||
|
||||
|
||||
/* If any error codes are removed, changed, or added, update the
|
||||
`re_error_msg' table in regex.c. */
|
||||
typedef enum
|
||||
{
|
||||
REG_NOERROR = 0, /* Success. */
|
||||
REG_NOMATCH, /* Didn't find a match (for regexec). */
|
||||
|
||||
/* POSIX regcomp return error codes. (In the order listed in the
|
||||
standard.) */
|
||||
REG_BADPAT, /* Invalid pattern. */
|
||||
REG_ECOLLATE, /* Not implemented. */
|
||||
REG_ECTYPE, /* Invalid character class name. */
|
||||
REG_EESCAPE, /* Trailing backslash. */
|
||||
REG_ESUBREG, /* Invalid back reference. */
|
||||
REG_EBRACK, /* Unmatched left bracket. */
|
||||
REG_EPAREN, /* Parenthesis imbalance. */
|
||||
REG_EBRACE, /* Unmatched \{. */
|
||||
REG_BADBR, /* Invalid contents of \{\}. */
|
||||
REG_ERANGE, /* Invalid range end. */
|
||||
REG_ESPACE, /* Ran out of memory. */
|
||||
REG_BADRPT, /* No preceding re for repetition op. */
|
||||
|
||||
/* Error codes we've added. */
|
||||
REG_EEND, /* Premature end. */
|
||||
REG_ESIZE, /* Compiled pattern bigger than 2^16 bytes. */
|
||||
REG_ERPAREN /* Unmatched ) or \); not returned from regcomp. */
|
||||
} reg_errcode_t;
|
||||
|
||||
/* This data structure represents a compiled pattern. Before calling
|
||||
the pattern compiler, the fields `buffer', `allocated', `fastmap',
|
||||
`translate', and `no_sub' can be set. After the pattern has been
|
||||
compiled, the `re_nsub' field is available. All other fields are
|
||||
private to the regex routines. */
|
||||
|
||||
struct re_pattern_buffer
|
||||
{
|
||||
char *buffer; /* Space holding the compiled pattern commands. */
|
||||
long allocated; /* Size of space that `buffer' points to. */
|
||||
long used; /* Length of portion of buffer actually occupied */
|
||||
char *fastmap; /* Pointer to fastmap, if any, or zero if none. */
|
||||
/* re_search uses the fastmap, if there is one,
|
||||
to skip over totally implausible characters. */
|
||||
char *translate; /* Translate table to apply to all characters before
|
||||
comparing, or zero for no translation.
|
||||
The translation is applied to a pattern when it is
|
||||
compiled and to data when it is matched. */
|
||||
char fastmap_accurate;
|
||||
/* Set to zero when a new pattern is stored,
|
||||
set to one when the fastmap is updated from it. */
|
||||
char can_be_null; /* Set to one by compiling fastmap
|
||||
if this pattern might match the null string.
|
||||
It does not necessarily match the null string
|
||||
in that case, but if this is zero, it cannot.
|
||||
2 as value means can match null string
|
||||
but at end of range or before a character
|
||||
listed in the fastmap. */
|
||||
};
|
||||
{
|
||||
/* [[[begin pattern_buffer]]] */
|
||||
/* Space that holds the compiled pattern. It is declared as
|
||||
`unsigned char *' because its elements are
|
||||
sometimes used as array indexes. */
|
||||
unsigned char *buffer;
|
||||
|
||||
/* Number of bytes to which `buffer' points. */
|
||||
unsigned long allocated;
|
||||
|
||||
/* Number of bytes actually used in `buffer'. */
|
||||
unsigned long used;
|
||||
|
||||
/* Syntax setting with which the pattern was compiled. */
|
||||
reg_syntax_t syntax;
|
||||
|
||||
/* Pointer to a fastmap, if any, otherwise zero. re_search uses
|
||||
the fastmap, if there is one, to skip over impossible
|
||||
starting points for matches. */
|
||||
char *fastmap;
|
||||
|
||||
/* Either a translate table to apply to all characters before
|
||||
comparing them, or zero for no translation. The translation
|
||||
is applied to a pattern when it is compiled and to a string
|
||||
when it is matched. */
|
||||
char *translate;
|
||||
|
||||
/* Number of subexpressions found by the compiler. */
|
||||
size_t re_nsub;
|
||||
|
||||
/* Zero if this pattern cannot match the empty string, one else.
|
||||
Well, in truth it's used only in `re_search_2', to see
|
||||
whether or not we should use the fastmap, so we don't set
|
||||
this absolutely perfectly; see `re_compile_fastmap' (the
|
||||
`duplicate' case). */
|
||||
unsigned can_be_null : 1;
|
||||
|
||||
/* If REGS_UNALLOCATED, allocate space in the `regs' structure
|
||||
for `max (RE_NREGS, re_nsub + 1)' groups.
|
||||
If REGS_REALLOCATE, reallocate space if necessary.
|
||||
If REGS_FIXED, use what's there. */
|
||||
#define REGS_UNALLOCATED 0
|
||||
#define REGS_REALLOCATE 1
|
||||
#define REGS_FIXED 2
|
||||
unsigned regs_allocated : 2;
|
||||
|
||||
/* Set to zero when `regex_compile' compiles a pattern; set to one
|
||||
by `re_compile_fastmap' if it updates the fastmap. */
|
||||
unsigned fastmap_accurate : 1;
|
||||
|
||||
/* If set, `re_match_2' does not return information about
|
||||
subexpressions. */
|
||||
unsigned no_sub : 1;
|
||||
|
||||
/* If set, a beginning-of-line anchor doesn't match at the
|
||||
beginning of the string. */
|
||||
unsigned not_bol : 1;
|
||||
|
||||
/* Similarly for an end-of-line anchor. */
|
||||
unsigned not_eol : 1;
|
||||
|
||||
/* If true, an anchor at a newline matches. */
|
||||
unsigned newline_anchor : 1;
|
||||
|
||||
/* [[[end pattern_buffer]]] */
|
||||
};
|
||||
|
||||
typedef struct re_pattern_buffer regex_t;
|
||||
|
||||
|
||||
/* search.c (search_buffer) needs this one value. It is defined both in
|
||||
regex.c and here. */
|
||||
/* search.c (search_buffer) in Emacs needs this one opcode value. It is
|
||||
defined both in `regex.c' and here. */
|
||||
#define RE_EXACTN_VALUE 1
|
||||
|
||||
|
||||
/* Structure to store register contents data in.
|
||||
|
||||
Pass the address of such a structure as an argument to re_match, etc.,
|
||||
if you want this information back.
|
||||
|
||||
For i from 1 to RE_NREGS - 1, start[i] records the starting index in
|
||||
the string of where the ith subexpression matched, and end[i] records
|
||||
one after the ending index. start[0] and end[0] are analogous, for
|
||||
the entire pattern. */
|
||||
|
||||
struct re_registers
|
||||
{
|
||||
int start[RE_NREGS];
|
||||
int end[RE_NREGS];
|
||||
};
|
||||
|
||||
|
||||
|
||||
/* Type for byte offsets within the string. POSIX mandates this. */
|
||||
typedef int regoff_t;
|
||||
|
||||
|
||||
/* This is the structure we store register match data in. See
|
||||
regex.texinfo for a full description of what registers match. */
|
||||
struct re_registers
|
||||
{
|
||||
unsigned num_regs;
|
||||
regoff_t *start;
|
||||
regoff_t *end;
|
||||
};
|
||||
|
||||
|
||||
/* If `regs_allocated' is REGS_UNALLOCATED in the pattern buffer,
|
||||
`re_match_2' returns information about at least this many registers
|
||||
the first time a `regs' structure is passed. */
|
||||
#ifndef RE_NREGS
|
||||
#define RE_NREGS 30
|
||||
#endif
|
||||
|
||||
|
||||
/* POSIX specification for registers. Aside from the different names than
|
||||
`re_registers', POSIX uses an array of structures, instead of a
|
||||
structure of arrays. */
|
||||
typedef struct
|
||||
{
|
||||
regoff_t rm_so; /* Byte offset from string's start to substring's start. */
|
||||
regoff_t rm_eo; /* Byte offset from string's start to substring's end. */
|
||||
} regmatch_t;
|
||||
|
||||
/* Declarations for routines. */
|
||||
|
||||
/* To avoid duplicating every routine declaration -- once with a
|
||||
prototype (if we are ANSI), and once without (if we aren't) -- we
|
||||
use the following macro to declare argument types. This
|
||||
unfortunately clutters up the declarations a bit, but I think it's
|
||||
worth it. */
|
||||
|
||||
#ifdef __STDC__
|
||||
|
||||
extern char *re_compile_pattern (char *, size_t, struct re_pattern_buffer *);
|
||||
/* Is this really advertised? */
|
||||
extern void re_compile_fastmap (struct re_pattern_buffer *);
|
||||
extern int re_search (struct re_pattern_buffer *, char*, int, int, int,
|
||||
struct re_registers *);
|
||||
extern int re_search_2 (struct re_pattern_buffer *, char *, int,
|
||||
char *, int, int, int,
|
||||
struct re_registers *, int);
|
||||
extern int re_match (struct re_pattern_buffer *, char *, int, int,
|
||||
struct re_registers *);
|
||||
extern int re_match_2 (struct re_pattern_buffer *, char *, int,
|
||||
char *, int, int, struct re_registers *, int);
|
||||
extern long re_set_syntax (long syntax);
|
||||
#define _RE_ARGS(args) args
|
||||
|
||||
#else /* not __STDC__ */
|
||||
|
||||
#define _RE_ARGS(args) ()
|
||||
|
||||
#endif /* not __STDC__ */
|
||||
|
||||
/* Sets the current default syntax to SYNTAX, and return the old syntax.
|
||||
You can also simply assign to the `re_syntax_options' variable. */
|
||||
extern reg_syntax_t re_set_syntax _RE_ARGS ((reg_syntax_t syntax));
|
||||
|
||||
/* Compile the regular expression PATTERN, with length LENGTH
|
||||
and syntax given by the global `re_syntax_options', into the buffer
|
||||
BUFFER. Return NULL if successful, and an error string if not. */
|
||||
extern const char *re_compile_pattern
|
||||
_RE_ARGS ((const char *pattern, size_t length,
|
||||
struct re_pattern_buffer *buffer));
|
||||
|
||||
|
||||
/* Compile a fastmap for the compiled pattern in BUFFER; used to
|
||||
accelerate searches. Return 0 if successful and -2 if was an
|
||||
internal error. */
|
||||
extern int re_compile_fastmap _RE_ARGS ((struct re_pattern_buffer *buffer));
|
||||
|
||||
|
||||
/* Search in the string STRING (with length LENGTH) for the pattern
|
||||
compiled into BUFFER. Start searching at position START, for RANGE
|
||||
characters. Return the starting position of the match, -1 for no
|
||||
match, or -2 for an internal error. Also return register
|
||||
information in REGS (if REGS and BUFFER->no_sub are nonzero). */
|
||||
extern int re_search
|
||||
_RE_ARGS ((struct re_pattern_buffer *buffer, const char *string,
|
||||
int length, int start, int range, struct re_registers *regs));
|
||||
|
||||
|
||||
/* Like `re_search', but search in the concatenation of STRING1 and
|
||||
STRING2. Also, stop searching at index START + STOP. */
|
||||
extern int re_search_2
|
||||
_RE_ARGS ((struct re_pattern_buffer *buffer, const char *string1,
|
||||
int length1, const char *string2, int length2,
|
||||
int start, int range, struct re_registers *regs, int stop));
|
||||
|
||||
|
||||
/* Like `re_search', but return how many characters in STRING the regexp
|
||||
in BUFFER matched, starting at position START. */
|
||||
extern int re_match
|
||||
_RE_ARGS ((struct re_pattern_buffer *buffer, const char *string,
|
||||
int length, int start, struct re_registers *regs));
|
||||
|
||||
|
||||
/* Relates to `re_match' as `re_search_2' relates to `re_search'. */
|
||||
extern int re_match_2
|
||||
_RE_ARGS ((struct re_pattern_buffer *buffer, const char *string1,
|
||||
int length1, const char *string2, int length2,
|
||||
int start, struct re_registers *regs, int stop));
|
||||
|
||||
|
||||
/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
|
||||
ENDS. Subsequent matches using BUFFER and REGS will use this memory
|
||||
for recording register information. STARTS and ENDS must be
|
||||
allocated with malloc, and must each be at least `NUM_REGS * sizeof
|
||||
(regoff_t)' bytes long.
|
||||
|
||||
If NUM_REGS == 0, then subsequent matches should allocate their own
|
||||
register data.
|
||||
|
||||
Unless this function is called, the first search or match using
|
||||
PATTERN_BUFFER will allocate its own register data, without
|
||||
freeing the old data. */
|
||||
extern void re_set_registers
|
||||
_RE_ARGS ((struct re_pattern_buffer *buffer, struct re_registers *regs,
|
||||
unsigned num_regs, regoff_t *starts, regoff_t *ends));
|
||||
|
||||
#ifndef GAWK
|
||||
/* 4.2 bsd compatibility. */
|
||||
extern char *re_comp (char *);
|
||||
extern int re_exec (char *);
|
||||
#endif
|
||||
extern char *re_comp _RE_ARGS ((const char *));
|
||||
extern int re_exec _RE_ARGS ((const char *));
|
||||
|
||||
#else /* !__STDC__ */
|
||||
/* POSIX compatibility. */
|
||||
extern int regcomp _RE_ARGS ((regex_t *preg, const char *pattern, int cflags));
|
||||
extern int regexec
|
||||
_RE_ARGS ((const regex_t *preg, const char *string, size_t nmatch,
|
||||
regmatch_t pmatch[], int eflags));
|
||||
extern size_t regerror
|
||||
_RE_ARGS ((int errcode, const regex_t *preg, char *errbuf,
|
||||
size_t errbuf_size));
|
||||
extern void regfree _RE_ARGS ((regex_t *preg));
|
||||
|
||||
extern char *re_compile_pattern ();
|
||||
/* Is this really advertised? */
|
||||
extern void re_compile_fastmap ();
|
||||
extern int re_search (), re_search_2 ();
|
||||
extern int re_match (), re_match_2 ();
|
||||
extern long re_set_syntax();
|
||||
|
||||
#ifndef GAWK
|
||||
/* 4.2 bsd compatibility. */
|
||||
extern char *re_comp ();
|
||||
extern int re_exec ();
|
||||
#endif
|
||||
|
||||
#endif /* __STDC__ */
|
||||
|
||||
|
||||
#ifdef SYNTAX_TABLE
|
||||
extern char *re_syntax_table;
|
||||
#endif
|
||||
|
||||
#endif /* !__REGEXP_LIBRARY */
|
||||
#endif /* not __REGEXP_LIBRARY_H__ */
|
||||
|
||||
/*
|
||||
Local variables:
|
||||
make-backup-files: t
|
||||
version-control: t
|
||||
trim-versions-without-asking: nil
|
||||
End:
|
||||
*/
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*char *version_string = "from: @(#)Gnu Awk (gawk) 2.15";*/
|
||||
char *version_string = "$Id: version.c,v 1.2 1993/08/01 18:49:02 mycroft Exp $ 2.15";
|
||||
/* DO NOT CHANGE VERSION STRING TO USE A REAL SCCS OR RCS ID */
|
||||
char *version_string = "@(#)Gnu Awk (gawk) 2.15";
|
||||
|
||||
/* 1.02 fixed /= += *= etc to return the new Left Hand Side instead
|
||||
of the Right Hand Side */
|
||||
|
@ -43,5 +43,6 @@ char *version_string = "$Id: version.c,v 1.2 1993/08/01 18:49:02 mycroft Exp $ 2
|
|||
/* 2.14 Mostly bug fixes. */
|
||||
|
||||
/* 2.15 Bug fixes plus intermixing of command-line source and files,
|
||||
GNU long options, ARGIND, ERRNO and Plan 9 style /dev/ files. */
|
||||
GNU long options, ARGIND, ERRNO and Plan 9 style /dev/ files.
|
||||
`delete array'. OS/2 port added. */
|
||||
|
||||
|
|
Loading…
Reference in New Issue