imprort latest nawk
This commit is contained in:
parent
a562507d8f
commit
35f471cb95
|
@ -0,0 +1,968 @@
|
|||
/****************************************************************
|
||||
Copyright (C) Lucent Technologies 1997
|
||||
All Rights Reserved
|
||||
|
||||
Permission to use, copy, modify, and distribute this software and
|
||||
its documentation for any purpose and without fee is hereby
|
||||
granted, provided that the above copyright notice appear in all
|
||||
copies and that both that the copyright notice and this
|
||||
permission notice and warranty disclaimer appear in supporting
|
||||
documentation, and that the name Lucent Technologies or any of
|
||||
its entities not be used in advertising or publicity pertaining
|
||||
to distribution of the software without specific, written prior
|
||||
permission.
|
||||
|
||||
LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
|
||||
INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
|
||||
IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
|
||||
SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
|
||||
IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
|
||||
ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
|
||||
THIS SOFTWARE.
|
||||
****************************************************************/
|
||||
|
||||
This file lists all bug fixes, changes, etc., made since the AWK book
|
||||
was sent to the printers in August, 1987.
|
||||
|
||||
May 23, 2010:
|
||||
fixed long-standing overflow bug in run.c; many thanks to
|
||||
nelson beebe for spotting it and providing the fix.
|
||||
|
||||
fixed bug that didn't parse -vd=1 properly; thanks to santiago
|
||||
vila for spotting it.
|
||||
|
||||
Feb 8, 2010:
|
||||
i give up. replaced isblank with isspace in b.c; there are
|
||||
no consistent header files.
|
||||
|
||||
Nov 26, 2009:
|
||||
fixed a long-standing issue with when FS takes effect. a
|
||||
change to FS is now noticed immediately for subsequent splits.
|
||||
|
||||
changed the name getline() to awkgetline() to avoid yet another
|
||||
name conflict somewhere.
|
||||
|
||||
Feb 11, 2009:
|
||||
temporarily for now defined HAS_ISBLANK, since that seems to
|
||||
be the best way through the thicket. isblank arrived in C99,
|
||||
but seems to be arriving at different systems at different
|
||||
times.
|
||||
|
||||
Oct 8, 2008:
|
||||
fixed typo in b.c that set tmpvec wrongly. no one had ever
|
||||
run into the problem, apparently. thanks to alistair crooks.
|
||||
|
||||
Oct 23, 2007:
|
||||
minor fix in lib.c: increase inputFS to 100, change malloc
|
||||
for fields to n+1.
|
||||
|
||||
fixed memory fault caused by out of order test in setsval.
|
||||
|
||||
thanks to david o'brien, freebsd, for both fixes.
|
||||
|
||||
May 1, 2007:
|
||||
fiddle in makefile to fix for BSD make; thanks to igor sobrado.
|
||||
|
||||
Mar 31, 2007:
|
||||
fixed some null pointer refs calling adjbuf.
|
||||
|
||||
Feb 21, 2007:
|
||||
fixed a bug in matching the null RE in sub and gsub. thanks to al aho
|
||||
who actually did the fix (in b.c), and to wolfgang seeberg for finding
|
||||
it and providing a very compact test case.
|
||||
|
||||
fixed quotation in b.c; thanks to Hal Pratt and the Princeton Dante
|
||||
Project.
|
||||
|
||||
removed some no-effect asserts in run.c.
|
||||
|
||||
fiddled maketab.c to not complain about bison-generated values.
|
||||
|
||||
removed the obsolete -V argument; fixed --version to print the
|
||||
version and exit.
|
||||
|
||||
fixed wording and an outright error in the usage message; thanks to igor
|
||||
sobrado and jason mcintyre.
|
||||
|
||||
fixed a bug in -d that caused core dump if no program followed.
|
||||
|
||||
Jan 1, 2007:
|
||||
dropped mac.code from makefile; there are few non-MacOSX
|
||||
mac's these days.
|
||||
|
||||
Jan 17, 2006:
|
||||
system() not flagged as unsafe in the unadvertised -safe option.
|
||||
found it while enhancing tests before shipping the ;login: article.
|
||||
practice what you preach.
|
||||
|
||||
removed the 9-years-obsolete -mr and -mf flags.
|
||||
|
||||
added -version and --version options.
|
||||
|
||||
core dump on linux with BEGIN {nextfile}, now fixed.
|
||||
|
||||
removed some #ifdef's in run.c and lex.c that appear to no
|
||||
longer be necessary.
|
||||
|
||||
Apr 24, 2005:
|
||||
modified lib.c so that values of $0 et al are preserved in the END
|
||||
block, apparently as required by posix. thanks to havard eidnes
|
||||
for the report and code.
|
||||
|
||||
Jan 14, 2005:
|
||||
fixed infinite loop in parsing, originally found by brian tsang.
|
||||
thanks to arnold robbins for a suggestion that started me
|
||||
rethinking it.
|
||||
|
||||
Dec 31, 2004:
|
||||
prevent overflow of -f array in main, head off potential error in
|
||||
call of SYNTAX(), test malloc return in lib.c, all with thanks to
|
||||
todd miller.
|
||||
|
||||
Dec 22, 2004:
|
||||
cranked up size of NCHARS; coverity thinks it can be overrun with
|
||||
smaller size, and i think that's right. added some assertions to b.c
|
||||
to catch places where it might overrun. the RE code is still fragile.
|
||||
|
||||
Dec 5, 2004:
|
||||
fixed a couple of overflow problems with ridiculous field numbers:
|
||||
e.g., print $(2^32-1). thanks to ruslan ermilov, giorgos keramidas
|
||||
and david o'brien at freebsd.org for patches. this really should
|
||||
be re-done from scratch.
|
||||
|
||||
Nov 21, 2004:
|
||||
fixed another 25-year-old RE bug, in split. it's another failure
|
||||
to (re-)initialize. thanks to steve fisher for spotting this and
|
||||
providing a good test case.
|
||||
|
||||
Nov 22, 2003:
|
||||
fixed a bug in regular expressions that dates (so help me) from 1977;
|
||||
it's been there from the beginning. an anchored longest match that
|
||||
was longer than the number of states triggered a failure to initialize
|
||||
the machine properly. many thanks to moinak ghosh for not only finding
|
||||
this one but for providing a fix, in some of the most mysterious
|
||||
code known to man.
|
||||
|
||||
fixed a storage leak in call() that appears to have been there since
|
||||
1983 or so -- a function without an explicit return that assigns a
|
||||
string to a parameter leaked a Cell. thanks to moinak ghosh for
|
||||
spotting this very subtle one.
|
||||
|
||||
Jul 31, 2003:
|
||||
fixed, thanks to andrey chernov and ruslan ermilov, a bug in lex.c
|
||||
that mis-handled the character 255 in input. (it was being compared
|
||||
to EOF with a signed comparison.)
|
||||
|
||||
Jul 29, 2003:
|
||||
fixed (i think) the long-standing botch that included the beginning of
|
||||
line state ^ for RE's in the set of valid characters; this led to a
|
||||
variety of odd problems, including failure to properly match certain
|
||||
regular expressions in non-US locales. thanks to ruslan for keeping
|
||||
at this one.
|
||||
|
||||
Jul 28, 2003:
|
||||
n-th try at getting internationalization right, with thanks to volker
|
||||
kiefel, arnold robbins and ruslan ermilov for advice, though they
|
||||
should not be blamed for the outcome. according to posix, "." is the
|
||||
radix character in programs and command line arguments regardless of
|
||||
the locale; otherwise, the locale should prevail for input and output
|
||||
of numbers. so it's intended to work that way.
|
||||
|
||||
i have rescinded the attempt to use strcoll in expanding shorthands in
|
||||
regular expressions (cclenter). its properties are much too
|
||||
surprising; for example [a-c] matches aAbBc in locale en_US but abBcC
|
||||
in locale fr_CA. i can see how this might arise by implementation
|
||||
but i cannot explain it to a human user. (this behavior can be seen
|
||||
in gawk as well; we're leaning on the same library.)
|
||||
|
||||
the issue appears to be that strcoll is meant for sorting, where
|
||||
merging upper and lower case may make sense (though note that unix
|
||||
sort does not do this by default either). it is not appropriate
|
||||
for regular expressions, where the goal is to match specific
|
||||
patterns of characters. in any case, the notations [:lower:], etc.,
|
||||
are available in awk, and they are more likely to work correctly in
|
||||
most locales.
|
||||
|
||||
a moratorium is hereby declared on internationalization changes.
|
||||
i apologize to friends and colleagues in other parts of the world.
|
||||
i would truly like to get this "right", but i don't know what
|
||||
that is, and i do not want to keep making changes until it's clear.
|
||||
|
||||
Jul 4, 2003:
|
||||
fixed bug that permitted non-terminated RE, as in "awk /x".
|
||||
|
||||
Jun 1, 2003:
|
||||
subtle change to split: if source is empty, number of elems
|
||||
is always 0 and the array is not set.
|
||||
|
||||
Mar 21, 2003:
|
||||
added some parens to isblank, in another attempt to make things
|
||||
internationally portable.
|
||||
|
||||
Mar 14, 2003:
|
||||
the internationalization changes, somewhat modified, are now
|
||||
reinstated. in theory awk will now do character comparisons
|
||||
and case conversions in national language, but "." will always
|
||||
be the decimal point separator on input and output regardless
|
||||
of national language. isblank(){} has an #ifndef.
|
||||
|
||||
this no longer compiles on windows: LC_MESSAGES isn't defined
|
||||
in vc6++.
|
||||
|
||||
fixed subtle behavior in field and record splitting: if FS is
|
||||
a single character and RS is not empty, \n is NOT a separator.
|
||||
this tortuous reading is found in the awk book; behavior now
|
||||
matches gawk and mawk.
|
||||
|
||||
Dec 13, 2002:
|
||||
for the moment, the internationalization changes of nov 29 are
|
||||
rolled back -- programs like x = 1.2 don't work in some locales,
|
||||
because the parser is expecting x = 1,2. until i understand this
|
||||
better, this will have to wait.
|
||||
|
||||
Nov 29, 2002:
|
||||
modified b.c (with tiny changes in main and run) to support
|
||||
locales, using strcoll and iswhatever tests for posix character
|
||||
classes. thanks to ruslan ermilov (ru@freebsd.org) for code.
|
||||
the function isblank doesn't seem to have propagated to any
|
||||
header file near me, so it's there explicitly. not properly
|
||||
tested on non-ascii character sets by me.
|
||||
|
||||
Jun 28, 2002:
|
||||
modified run/format() and tran/getsval() to do a slightly better
|
||||
job on using OFMT for output from print and CONVFMT for other
|
||||
number->string conversions, as promised by posix and done by
|
||||
gawk and mawk. there are still places where it doesn't work
|
||||
right if CONVFMT is changed; by then the STR attribute of the
|
||||
variable has been irrevocably set. thanks to arnold robbins for
|
||||
code and examples.
|
||||
|
||||
fixed subtle bug in format that could get core dump. thanks to
|
||||
Jaromir Dolecek <jdolecek@NetBSD.org> for finding and fixing.
|
||||
minor cleanup in run.c / format() at the same time.
|
||||
|
||||
added some tests for null pointers to debugging printf's, which
|
||||
were never intended for external consumption. thanks to dave
|
||||
kerns (dkerns@lucent.com) for pointing this out.
|
||||
|
||||
GNU compatibility: an empty regexp matches anything (thanks to
|
||||
dag-erling smorgrav, des@ofug.org). subject to reversion if
|
||||
this does more harm than good.
|
||||
|
||||
pervasive small changes to make things more const-correct, as
|
||||
reported by gcc's -Wwrite-strings. as it says in the gcc manual,
|
||||
this may be more nuisance than useful. provoked by a suggestion
|
||||
and code from arnaud desitter, arnaud@nimbus.geog.ox.ac.uk
|
||||
|
||||
minor documentation changes to note that this now compiles out
|
||||
of the box on Mac OS X.
|
||||
|
||||
Feb 10, 2002:
|
||||
changed types in posix chars structure to quiet solaris cc.
|
||||
|
||||
Jan 1, 2002:
|
||||
fflush() or fflush("") flushes all files and pipes.
|
||||
|
||||
length(arrayname) returns number of elements; thanks to
|
||||
arnold robbins for suggestion.
|
||||
|
||||
added a makefile.win to make it easier to build on windows.
|
||||
based on dan allen's buildwin.bat.
|
||||
|
||||
Nov 16, 2001:
|
||||
added support for posix character class names like [:digit:],
|
||||
which are not exactly shorter than [0-9] and perhaps no more
|
||||
portable. thanks to dag-erling smorgrav for code.
|
||||
|
||||
Feb 16, 2001:
|
||||
removed -m option; no longer needed, and it was actually
|
||||
broken (noted thanks to volker kiefel).
|
||||
|
||||
Feb 10, 2001:
|
||||
fixed an appalling bug in gettok: any sequence of digits, +,-, E, e,
|
||||
and period was accepted as a valid number if it started with a period.
|
||||
this would never have happened with the lex version.
|
||||
|
||||
other 1-character botches, now fixed, include a bare $ and a
|
||||
bare " at the end of the input.
|
||||
|
||||
Feb 7, 2001:
|
||||
more (const char *) casts in b.c and tran.c to silence warnings.
|
||||
|
||||
Nov 15, 2000:
|
||||
fixed a bug introduced in august 1997 that caused expressions
|
||||
like $f[1] to be syntax errors. thanks to arnold robbins for
|
||||
noticing this and providing a fix.
|
||||
|
||||
Oct 30, 2000:
|
||||
fixed some nextfile bugs: not handling all cases. thanks to
|
||||
arnold robbins for pointing this out. new regressions added.
|
||||
|
||||
close() is now a function. it returns whatever the library
|
||||
fclose returns, and -1 for closing a file or pipe that wasn't
|
||||
opened.
|
||||
|
||||
Sep 24, 2000:
|
||||
permit \n explicitly in character classes; won't work right
|
||||
if comes in as "[\n]" but ok as /[\n]/, because of multiple
|
||||
processing of \'s. thanks to arnold robbins.
|
||||
|
||||
July 5, 2000:
|
||||
minor fiddles in tran.c to keep compilers happy about uschar.
|
||||
thanks to norman wilson.
|
||||
|
||||
May 25, 2000:
|
||||
yet another attempt at making 8-bit input work, with another
|
||||
band-aid in b.c (member()), and some (uschar) casts to head
|
||||
off potential errors in subscripts (like isdigit). also
|
||||
changed HAT to NCHARS-2. thanks again to santiago vila.
|
||||
|
||||
changed maketab.c to ignore apparently out of range definitions
|
||||
instead of halting; new freeBSD generates one. thanks to
|
||||
jon snader <jsnader@ix.netcom.com> for pointing out the problem.
|
||||
|
||||
May 2, 2000:
|
||||
fixed an 8-bit problem in b.c by making several char*'s into
|
||||
unsigned char*'s. not clear i have them all yet. thanks to
|
||||
Santiago Vila <sanvila@unex.es> for the bug report.
|
||||
|
||||
Apr 21, 2000:
|
||||
finally found and fixed a memory leak in function call; it's
|
||||
been there since functions were added ~1983. thanks to
|
||||
jon bentley for the test case that found it.
|
||||
|
||||
added test in envinit to catch environment "variables" with
|
||||
names beginning with '='; thanks to Berend Hasselman.
|
||||
|
||||
Jul 28, 1999:
|
||||
added test in defn() to catch function foo(foo), which
|
||||
otherwise recurses until core dump. thanks to arnold
|
||||
robbins for noticing this.
|
||||
|
||||
Jun 20, 1999:
|
||||
added *bp in gettok in lex.c; appears possible to exit function
|
||||
without terminating the string. thanks to russ cox.
|
||||
|
||||
Jun 2, 1999:
|
||||
added function stdinit() to run to initialize files[] array,
|
||||
in case stdin, etc., are not constants; some compilers care.
|
||||
|
||||
May 10, 1999:
|
||||
replaced the ERROR ... FATAL, etc., macros with functions
|
||||
based on vprintf, to avoid problems caused by overrunning
|
||||
fixed-size errbuf array. thanks to ralph corderoy for the
|
||||
impetus, and for pointing out a string termination bug in
|
||||
qstring as well.
|
||||
|
||||
Apr 21, 1999:
|
||||
fixed bug that caused occasional core dumps with commandline
|
||||
variable with value ending in \. (thanks to nelson beebe for
|
||||
the test case.)
|
||||
|
||||
Apr 16, 1999:
|
||||
with code kindly provided by Bruce Lilly, awk now parses
|
||||
/=/ and similar constructs more sensibly in more places.
|
||||
Bruce also provided some helpful test cases.
|
||||
|
||||
Apr 5, 1999:
|
||||
changed true/false to True/False in run.c to make it
|
||||
easier to compile with C++. Added some casts on malloc
|
||||
and realloc to be honest about casts; ditto. changed
|
||||
ltype int to long in struct rrow to reduce some 64-bit
|
||||
complaints; other changes scattered throughout for the
|
||||
same purpose. thanks to Nelson Beebe for these portability
|
||||
improvements.
|
||||
|
||||
removed some horrible pointer-int casting in b.c and elsewhere
|
||||
by adding ptoi and itonp to localize the casts, which are
|
||||
all benign. fixed one incipient bug that showed up on sgi
|
||||
in 64-bit mode.
|
||||
|
||||
reset lineno for new source file; include filename in error
|
||||
message. also fixed line number error in continuation lines.
|
||||
(thanks to Nelson Beebe for both of these.)
|
||||
|
||||
Mar 24, 1999:
|
||||
Nelson Beebe notes that irix 5.3 yacc dies with a bogus
|
||||
error; use a newer version or switch to bison, since sgi
|
||||
is unlikely to fix it.
|
||||
|
||||
Mar 5, 1999:
|
||||
changed isnumber to is_number to avoid the problem caused by
|
||||
versions of ctype.h that include the name isnumber.
|
||||
|
||||
distribution now includes a script for building on a Mac,
|
||||
thanks to Dan Allen.
|
||||
|
||||
Feb 20, 1999:
|
||||
fixed memory leaks in run.c (call) and tran.c (setfval).
|
||||
thanks to Stephen Nutt for finding these and providing the fixes.
|
||||
|
||||
Jan 13, 1999:
|
||||
replaced srand argument by (unsigned int) in run.c;
|
||||
avoids problem on Mac and potentially on Unix & Windows.
|
||||
thanks to Dan Allen.
|
||||
|
||||
added a few (int) casts to silence useless compiler warnings.
|
||||
e.g., errorflag= in run.c jump().
|
||||
|
||||
added proctab.c to the bundle outout; one less thing
|
||||
to have to compile out of the box.
|
||||
|
||||
added calls to _popen and _pclose to the win95 stub for
|
||||
pipes (thanks to Steve Adams for this helpful suggestion).
|
||||
seems to work, though properties are not well understood
|
||||
by me, and it appears that under some circumstances the
|
||||
pipe output is truncated. Be careful.
|
||||
|
||||
Oct 19, 1998:
|
||||
fixed a couple of bugs in getrec: could fail to update $0
|
||||
after a getline var; because inputFS wasn't initialized,
|
||||
could split $0 on every character, a misleading diversion.
|
||||
|
||||
fixed caching bug in makedfa: LRU was actually removing
|
||||
least often used.
|
||||
|
||||
thanks to ross ridge for finding these, and for providing
|
||||
great bug reports.
|
||||
|
||||
May 12, 1998:
|
||||
fixed potential bug in readrec: might fail to update record
|
||||
pointer after growing. thanks to dan levy for spotting this
|
||||
and suggesting the fix.
|
||||
|
||||
Mar 12, 1998:
|
||||
added -V to print version number and die.
|
||||
|
||||
Feb 11, 1998:
|
||||
subtle silent bug in lex.c: if the program ended with a number
|
||||
longer than 1 digit, part of the input would be pushed back and
|
||||
parsed again because token buffer wasn't terminated right.
|
||||
example: awk 'length($0) > 10'. blush. at least i found it
|
||||
myself.
|
||||
|
||||
Aug 31, 1997:
|
||||
s/adelete/awkdelete/: SGI uses this in malloc.h.
|
||||
thanks to nelson beebe for pointing this one out.
|
||||
|
||||
Aug 21, 1997:
|
||||
fixed some bugs in sub and gsub when replacement includes \\.
|
||||
this is a dark, horrible corner, but at least now i believe that
|
||||
the behavior is the same as gawk and the intended posix standard.
|
||||
thanks to arnold robbins for advice here.
|
||||
|
||||
Aug 9, 1997:
|
||||
somewhat regretfully, replaced the ancient lex-based lexical
|
||||
analyzer with one written in C. it's longer, generates less code,
|
||||
and more portable; the old one depended too much on mysterious
|
||||
properties of lex that were not preserved in other environments.
|
||||
in theory these recognize the same language.
|
||||
|
||||
now using strtod to test whether a string is a number, instead of
|
||||
the convoluted original function. should be more portable and
|
||||
reliable if strtod is implemented right.
|
||||
|
||||
removed now-pointless optimization in makefile that tries to avoid
|
||||
recompilation when awkgram.y is changed but symbols are not.
|
||||
|
||||
removed most fixed-size arrays, though a handful remain, some
|
||||
of which are unchecked. you have been warned.
|
||||
|
||||
Aug 4, 1997:
|
||||
with some trepidation, replaced the ancient code that managed
|
||||
fields and $0 in fixed-size arrays with arrays that grow on
|
||||
demand. there is still some tension between trying to make this
|
||||
run fast and making it clean; not sure it's right yet.
|
||||
|
||||
the ill-conceived -mr and -mf arguments are now useful only
|
||||
for debugging. previous dynamic string code removed.
|
||||
|
||||
numerous other minor cleanups along the way.
|
||||
|
||||
Jul 30, 1997:
|
||||
using code provided by dan levy (to whom profuse thanks), replaced
|
||||
fixed-size arrays and awkward kludges by a fairly uniform mechanism
|
||||
to grow arrays as needed for printf, sub, gsub, etc.
|
||||
|
||||
Jul 23, 1997:
|
||||
falling off the end of a function returns "" and 0, not 0.
|
||||
thanks to arnold robbins.
|
||||
|
||||
Jun 17, 1997:
|
||||
replaced several fixed-size arrays by dynamically-created ones
|
||||
in run.c; added overflow tests to some previously unchecked cases.
|
||||
getline, toupper, tolower.
|
||||
|
||||
getline code is still broken in that recursive calls may wind
|
||||
up using the same space. [fixed later]
|
||||
|
||||
increased RECSIZE to 8192 to push problems further over the horizon.
|
||||
|
||||
added \r to \n as input line separator for programs, not data.
|
||||
damn CRLFs.
|
||||
|
||||
modified format() to permit explicit printf("%c", 0) to include
|
||||
a null byte in output. thanks to ken stailey for the fix.
|
||||
|
||||
added a "-safe" argument that disables file output (print >,
|
||||
print >>), process creation (cmd|getline, print |, system), and
|
||||
access to the environment (ENVIRON). this is a first approximation
|
||||
to a "safe" version of awk, but don't rely on it too much. thanks
|
||||
to joan feigenbaum and matt blaze for the inspiration long ago.
|
||||
|
||||
Jul 8, 1996:
|
||||
fixed long-standing bug in sub, gsub(/a/, "\\\\&"); thanks to
|
||||
ralph corderoy.
|
||||
|
||||
Jun 29, 1996:
|
||||
fixed awful bug in new field splitting; didn't get all the places
|
||||
where input was done.
|
||||
|
||||
Jun 28, 1996:
|
||||
changed field-splitting to conform to posix definition: fields are
|
||||
split using the value of FS at the time of input; it used to be
|
||||
the value when the field or NF was first referred to, a much less
|
||||
predictable definition. thanks to arnold robbins for encouragement
|
||||
to do the right thing.
|
||||
|
||||
May 28, 1996:
|
||||
fixed appalling but apparently unimportant bug in parsing octal
|
||||
numbers in reg exprs.
|
||||
|
||||
explicit hex in reg exprs now limited to 2 chars: \xa, \xaa.
|
||||
|
||||
May 27, 1996:
|
||||
cleaned up some declarations so gcc -Wall is now almost silent.
|
||||
|
||||
makefile now includes backup copies of ytab.c and lexyy.c in case
|
||||
one makes before looking; it also avoids recreating lexyy.c unless
|
||||
really needed.
|
||||
|
||||
s/aprintf/awkprint, s/asprintf/awksprintf/ to avoid some name clashes
|
||||
with unwisely-written header files.
|
||||
|
||||
thanks to jeffrey friedl for several of these.
|
||||
|
||||
May 26, 1996:
|
||||
an attempt to rationalize the (unsigned) char issue. almost all
|
||||
instances of unsigned char have been removed; the handful of places
|
||||
in b.c where chars are used as table indices have been hand-crafted.
|
||||
added some latin-1 tests to the regression, but i'm not confident;
|
||||
none of my compilers seem to care much. thanks to nelson beebe for
|
||||
pointing out some others that do care.
|
||||
|
||||
May 2, 1996:
|
||||
removed all register declarations.
|
||||
|
||||
enhanced split(), as in gawk, etc: split(s, a, "") splits s into
|
||||
a[1]...a[length(s)] with each character a single element.
|
||||
|
||||
made the same changes for field-splitting if FS is "".
|
||||
|
||||
added nextfile, as in gawk: causes immediate advance to next
|
||||
input file. (thanks to arnold robbins for inspiration and code).
|
||||
|
||||
small fixes to regexpr code: can now handle []], [[], and
|
||||
variants; [] is now a syntax error, rather than matching
|
||||
everything; [z-a] is now empty, not z. far from complete
|
||||
or correct, however. (thanks to jeffrey friedl for pointing out
|
||||
some awful behaviors.)
|
||||
|
||||
Apr 29, 1996:
|
||||
replaced uchar by uschar everywhere; apparently some compilers
|
||||
usurp this name and this causes conflicts.
|
||||
|
||||
fixed call to time in run.c (bltin); arg is time_t *.
|
||||
|
||||
replaced horrible pointer/long punning in b.c by a legitimate
|
||||
union. should be safer on 64-bit machines and cleaner everywhere.
|
||||
(thanks to nelson beebe for pointing out some of these problems.)
|
||||
|
||||
replaced nested comments by #if 0...#endif in run.c, lib.c.
|
||||
|
||||
removed getsval, setsval, execute macros from run.c and lib.c.
|
||||
machines are 100x faster than they were when these macros were
|
||||
first used.
|
||||
|
||||
revised filenames: awk.g.y => awkgram.y, awk.lx.l => awklex.l,
|
||||
y.tab.[ch] => ytab.[ch], lex.yy.c => lexyy.c, all in the aid of
|
||||
portability to nameless systems.
|
||||
|
||||
"make bundle" now includes yacc and lex output files for recipients
|
||||
who don't have yacc or lex.
|
||||
|
||||
Aug 15, 1995:
|
||||
initialized Cells in setsymtab more carefully; some fields
|
||||
were not set. (thanks to purify, all of whose complaints i
|
||||
think i now understand.)
|
||||
|
||||
fixed at least one error in gsub that looked at -1-th element
|
||||
of an array when substituting for a null match (e.g., $).
|
||||
|
||||
delete arrayname is now legal; it clears the elements but leaves
|
||||
the array, which may not be the right behavior.
|
||||
|
||||
modified makefile: my current make can't cope with the test used
|
||||
to avoid unnecessary yacc invocations.
|
||||
|
||||
Jul 17, 1995:
|
||||
added dynamically growing strings to awk.lx.l and b.c
|
||||
to permit regular expressions to be much bigger.
|
||||
the state arrays can still overflow.
|
||||
|
||||
Aug 24, 1994:
|
||||
detect duplicate arguments in function definitions (mdm).
|
||||
|
||||
May 11, 1994:
|
||||
trivial fix to printf to limit string size in sub().
|
||||
|
||||
Apr 22, 1994:
|
||||
fixed yet another subtle self-assignment problem:
|
||||
$1 = $2; $1 = $1 clobbered $1.
|
||||
|
||||
Regression tests now use private echo, to avoid quoting problems.
|
||||
|
||||
Feb 2, 1994:
|
||||
changed error() to print line number as %d, not %g.
|
||||
|
||||
Jul 23, 1993:
|
||||
cosmetic changes: increased sizes of some arrays,
|
||||
reworded some error messages.
|
||||
|
||||
added CONVFMT as in posix (just replaced OFMT in getsval)
|
||||
|
||||
FILENAME is now "" until the first thing that causes a file
|
||||
to be opened.
|
||||
|
||||
Nov 28, 1992:
|
||||
deleted yyunput and yyoutput from proto.h;
|
||||
different versions of lex give these different declarations.
|
||||
|
||||
May 31, 1992:
|
||||
added -mr N and -mf N options: more record and fields.
|
||||
these really ought to adjust automatically.
|
||||
|
||||
cleaned up some error messages; "out of space" now means
|
||||
malloc returned NULL in all cases.
|
||||
|
||||
changed rehash so that if it runs out, it just returns;
|
||||
things will continue to run slow, but maybe a bit longer.
|
||||
|
||||
Apr 24, 1992:
|
||||
remove redundant close of stdin when using -f -.
|
||||
|
||||
got rid of core dump with -d; awk -d just prints date.
|
||||
|
||||
Apr 12, 1992:
|
||||
added explicit check for /dev/std(in,out,err) in redirection.
|
||||
unlike gawk, no /dev/fd/n yet.
|
||||
|
||||
added (file/pipe) builtin. hard to test satisfactorily.
|
||||
not posix.
|
||||
|
||||
Feb 20, 1992:
|
||||
recompile after abortive changes; should be unchanged.
|
||||
|
||||
Dec 2, 1991:
|
||||
die-casting time: converted to ansi C, installed that.
|
||||
|
||||
Nov 30, 1991:
|
||||
fixed storage leak in freefa, failing to recover [N]CCL.
|
||||
thanks to Bill Jones (jones@cs.usask.ca)
|
||||
|
||||
Nov 19, 1991:
|
||||
use RAND_MAX instead of literal in builtin().
|
||||
|
||||
Nov 12, 1991:
|
||||
cranked up some fixed-size arrays in b.c, and added a test for
|
||||
overflow in penter. thanks to mark larsen.
|
||||
|
||||
Sep 24, 1991:
|
||||
increased buffer in gsub. a very crude fix to a general problem.
|
||||
and again on Sep 26.
|
||||
|
||||
Aug 18, 1991:
|
||||
enforce variable name syntax for commandline variables: has to
|
||||
start with letter or _.
|
||||
|
||||
Jul 27, 1991:
|
||||
allow newline after ; in for statements.
|
||||
|
||||
Jul 21, 1991:
|
||||
fixed so that in self-assignment like $1=$1, side effects
|
||||
like recomputing $0 take place. (this is getting subtle.)
|
||||
|
||||
Jun 30, 1991:
|
||||
better test for detecting too-long output record.
|
||||
|
||||
Jun 2, 1991:
|
||||
better defense against very long printf strings.
|
||||
made break and continue illegal outside of loops.
|
||||
|
||||
May 13, 1991:
|
||||
removed extra arg on gettemp, tempfree. minor error message rewording.
|
||||
|
||||
May 6, 1991:
|
||||
fixed silly bug in hex parsing in hexstr().
|
||||
removed an apparently unnecessary test in isnumber().
|
||||
warn about weird printf conversions.
|
||||
fixed unchecked array overwrite in relex().
|
||||
|
||||
changed for (i in array) to access elements in sorted order.
|
||||
then unchanged it -- it really does run slower in too many cases.
|
||||
left the code in place, commented out.
|
||||
|
||||
Feb 10, 1991:
|
||||
check error status on all writes, to avoid banging on full disks.
|
||||
|
||||
Jan 28, 1991:
|
||||
awk -f - reads the program from stdin.
|
||||
|
||||
Jan 11, 1991:
|
||||
failed to set numeric state on $0 in cmd|getline context in run.c.
|
||||
|
||||
Nov 2, 1990:
|
||||
fixed sleazy test for integrality in getsval; use modf.
|
||||
|
||||
Oct 29, 1990:
|
||||
fixed sleazy buggy code in lib.c that looked (incorrectly) for
|
||||
too long input lines.
|
||||
|
||||
Oct 14, 1990:
|
||||
fixed the bug on p. 198 in which it couldn't deduce that an
|
||||
argument was an array in some contexts. replaced the error
|
||||
message in intest() by code that damn well makes it an array.
|
||||
|
||||
Oct 8, 1990:
|
||||
fixed horrible bug: types and values were not preserved in
|
||||
some kinds of self-assignment. (in assign().)
|
||||
|
||||
Aug 24, 1990:
|
||||
changed NCHARS to 256 to handle 8-bit characters in strings
|
||||
presented to match(), etc.
|
||||
|
||||
Jun 26, 1990:
|
||||
changed struct rrow (awk.h) to use long instead of int for lval,
|
||||
since cfoll() stores a pointer in it. now works better when int's
|
||||
are smaller than pointers!
|
||||
|
||||
May 6, 1990:
|
||||
AVA fixed the grammar so that ! is uniformly of the same precedence as
|
||||
unary + and -. This renders illegal some constructs like !x=y, which
|
||||
now has to be parenthesized as !(x=y), and makes others work properly:
|
||||
!x+y is (!x)+y, and x!y is x !y, not two pattern-action statements.
|
||||
(These problems were pointed out by Bob Lenk of Posix.)
|
||||
|
||||
Added \x to regular expressions (already in strings).
|
||||
Limited octal to octal digits; \8 and \9 are not octal.
|
||||
Centralized the code for parsing escapes in regular expressions.
|
||||
Added a bunch of tests to T.re and T.sub to verify some of this.
|
||||
|
||||
Feb 9, 1990:
|
||||
fixed null pointer dereference bug in main.c: -F[nothing]. sigh.
|
||||
|
||||
restored srand behavior: it returns the current seed.
|
||||
|
||||
Jan 18, 1990:
|
||||
srand now returns previous seed value (0 to start).
|
||||
|
||||
Jan 5, 1990:
|
||||
fix potential problem in tran.c -- something was freed,
|
||||
then used in freesymtab.
|
||||
|
||||
Oct 18, 1989:
|
||||
another try to get the max number of open files set with
|
||||
relatively machine-independent code.
|
||||
|
||||
small fix to input() in case of multiple reads after EOF.
|
||||
|
||||
Oct 11, 1989:
|
||||
FILENAME is now defined in the BEGIN block -- too many old
|
||||
programs broke.
|
||||
|
||||
"-" means stdin in getline as well as on the commandline.
|
||||
|
||||
added a bunch of casts to the code to tell the truth about
|
||||
char * vs. unsigned char *, a right royal pain. added a
|
||||
setlocale call to the front of main, though probably no one
|
||||
has it usefully implemented yet.
|
||||
|
||||
Aug 24, 1989:
|
||||
removed redundant relational tests against nullnode if parse
|
||||
tree already had a relational at that point.
|
||||
|
||||
Aug 11, 1989:
|
||||
fixed bug: commandline variable assignment has to look like
|
||||
var=something. (consider the man page for =, in file =.1)
|
||||
|
||||
changed number of arguments to functions to static arrays
|
||||
to avoid repeated malloc calls.
|
||||
|
||||
Aug 2, 1989:
|
||||
restored -F (space) separator
|
||||
|
||||
Jul 30, 1989:
|
||||
added -v x=1 y=2 ... for immediate commandline variable assignment;
|
||||
done before the BEGIN block for sure. they have to precede the
|
||||
program if the program is on the commandline.
|
||||
Modified Aug 2 to require a separate -v for each assignment.
|
||||
|
||||
Jul 10, 1989:
|
||||
fixed ref-thru-zero bug in environment code in tran.c
|
||||
|
||||
Jun 23, 1989:
|
||||
add newline to usage message.
|
||||
|
||||
Jun 14, 1989:
|
||||
added some missing ansi printf conversion letters: %i %X %E %G.
|
||||
no sensible meaning for h or L, so they may not do what one expects.
|
||||
|
||||
made %* conversions work.
|
||||
|
||||
changed x^y so that if n is a positive integer, it's done
|
||||
by explicit multiplication, thus achieving maximum accuracy.
|
||||
(this should be done by pow() but it seems not to be locally.)
|
||||
done to x ^= y as well.
|
||||
|
||||
Jun 4, 1989:
|
||||
ENVIRON array contains environment: if shell variable V=thing,
|
||||
ENVIRON["V"] is "thing"
|
||||
|
||||
multiple -f arguments permitted. error reporting is naive.
|
||||
(they were permitted before, but only the last was used.)
|
||||
|
||||
fixed a really stupid botch in the debugging macro dprintf
|
||||
|
||||
fixed order of evaluation of commandline assignments to match
|
||||
what the book claims: an argument of the form x=e is evaluated
|
||||
at the time it would have been opened if it were a filename (p 63).
|
||||
this invalidates the suggested answer to ex 4-1 (p 195).
|
||||
|
||||
removed some code that permitted -F (space) fieldseparator,
|
||||
since it didn't quite work right anyway. (restored aug 2)
|
||||
|
||||
Apr 27, 1989:
|
||||
Line number now accumulated correctly for comment lines.
|
||||
|
||||
Apr 26, 1989:
|
||||
Debugging output now includes a version date,
|
||||
if one compiles it into the source each time.
|
||||
|
||||
Apr 9, 1989:
|
||||
Changed grammar to prohibit constants as 3rd arg of sub and gsub;
|
||||
prevents class of overwriting-a-constant errors. (Last one?)
|
||||
This invalidates the "banana" example on page 43 of the book.
|
||||
|
||||
Added \a ("alert"), \v (vertical tab), \xhhh (hexadecimal),
|
||||
as in ANSI, for strings. Rescinded the sloppiness that permitted
|
||||
non-octal digits in \ooo. Warning: not all compilers and libraries
|
||||
will be able to deal with \x correctly.
|
||||
|
||||
Jan 9, 1989:
|
||||
Fixed bug that caused tempcell list to contain a duplicate.
|
||||
The fix is kludgy.
|
||||
|
||||
Dec 17, 1988:
|
||||
Catches some more commandline errors in main.
|
||||
Removed redundant decl of modf in run.c (confuses some compilers).
|
||||
Warning: there's no single declaration of malloc, etc., in awk.h
|
||||
that seems to satisfy all compilers.
|
||||
|
||||
Dec 7, 1988:
|
||||
Added a bit of code to error printing to avoid printing nulls.
|
||||
(Not clear that it actually would.)
|
||||
|
||||
Nov 27, 1988:
|
||||
With fear and trembling, modified the grammar to permit
|
||||
multiple pattern-action statements on one line without
|
||||
an explicit separator. By definition, this capitulation
|
||||
to the ghost of ancient implementations remains undefined
|
||||
and thus subject to change without notice or apology.
|
||||
DO NOT COUNT ON IT.
|
||||
|
||||
Oct 30, 1988:
|
||||
Fixed bug in call() that failed to recover storage.
|
||||
|
||||
A warning is now generated if there are more arguments
|
||||
in the call than in the definition (in lieu of fixing
|
||||
another storage leak).
|
||||
|
||||
Oct 20, 1988:
|
||||
Fixed %c: if expr is numeric, use numeric value;
|
||||
otherwise print 1st char of string value. still
|
||||
doesn't work if the value is 0 -- won't print \0.
|
||||
|
||||
Added a few more checks for running out of malloc.
|
||||
|
||||
Oct 12, 1988:
|
||||
Fixed bug in call() that freed local arrays twice.
|
||||
|
||||
Fixed to handle deletion of non-existent array right;
|
||||
complains about attempt to delete non-array element.
|
||||
|
||||
Sep 30, 1988:
|
||||
Now guarantees to evaluate all arguments of built-in
|
||||
functions, as in C; the appearance is that arguments
|
||||
are evaluated before the function is called. Places
|
||||
affected are sub (gsub was ok), substr, printf, and
|
||||
all the built-in arithmetic functions in bltin().
|
||||
A warning is generated if a bltin() is called with
|
||||
the wrong number of arguments.
|
||||
|
||||
This requires changing makeprof on p167 of the book.
|
||||
|
||||
Aug 23, 1988:
|
||||
setting FILENAME in BEGIN caused core dump, apparently
|
||||
because it was freeing space not allocated by malloc.
|
||||
|
||||
July 24, 1988:
|
||||
fixed egregious error in toupper/tolower functions.
|
||||
still subject to rescinding, however.
|
||||
|
||||
July 2, 1988:
|
||||
flush stdout before opening file or pipe
|
||||
|
||||
July 2, 1988:
|
||||
performance bug in b.c/cgoto(): not freeing some sets of states.
|
||||
partial fix only right now, and the number of states increased
|
||||
to make it less obvious.
|
||||
|
||||
June 1, 1988:
|
||||
check error status on close
|
||||
|
||||
May 28, 1988:
|
||||
srand returns seed value it's using.
|
||||
see 1/18/90
|
||||
|
||||
May 22, 1988:
|
||||
Removed limit on depth of function calls.
|
||||
|
||||
May 10, 1988:
|
||||
Fixed lib.c to permit _ in commandline variable names.
|
||||
|
||||
Mar 25, 1988:
|
||||
main.c fixed to recognize -- as terminator of command-
|
||||
line options. Illegal options flagged.
|
||||
Error reporting slightly cleaned up.
|
||||
|
||||
Dec 2, 1987:
|
||||
Newer C compilers apply a strict scope rule to extern
|
||||
declarations within functions. Two extern declarations in
|
||||
lib.c and tran.c have been moved to obviate this problem.
|
||||
|
||||
Oct xx, 1987:
|
||||
Reluctantly added toupper and tolower functions.
|
||||
Subject to rescinding without notice.
|
||||
|
||||
Sep 17, 1987:
|
||||
Error-message printer had printf(s) instead of
|
||||
printf("%s",s); got core dumps when the message
|
||||
included a %.
|
||||
|
||||
Sep 12, 1987:
|
||||
Very long printf strings caused core dump;
|
||||
fixed aprintf, asprintf, format to catch them.
|
||||
Can still get a core dump in printf itself.
|
||||
|
||||
|
|
@ -0,0 +1,94 @@
|
|||
/****************************************************************
|
||||
Copyright (C) Lucent Technologies 1997
|
||||
All Rights Reserved
|
||||
|
||||
Permission to use, copy, modify, and distribute this software and
|
||||
its documentation for any purpose and without fee is hereby
|
||||
granted, provided that the above copyright notice appear in all
|
||||
copies and that both that the copyright notice and this
|
||||
permission notice and warranty disclaimer appear in supporting
|
||||
documentation, and that the name Lucent Technologies or any of
|
||||
its entities not be used in advertising or publicity pertaining
|
||||
to distribution of the software without specific, written prior
|
||||
permission.
|
||||
|
||||
LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
|
||||
INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
|
||||
IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
|
||||
SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
|
||||
IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
|
||||
ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
|
||||
THIS SOFTWARE.
|
||||
****************************************************************/
|
||||
|
||||
This is the version of awk described in "The AWK Programming Language",
|
||||
by Al Aho, Brian Kernighan, and Peter Weinberger
|
||||
(Addison-Wesley, 1988, ISBN 0-201-07981-X).
|
||||
|
||||
Changes, mostly bug fixes and occasional enhancements, are listed
|
||||
in FIXES. If you distribute this code further, please please please
|
||||
distribute FIXES with it. If you find errors, please report them
|
||||
to bwk@bell-labs.com. Thanks.
|
||||
|
||||
The program itself is created by
|
||||
make
|
||||
which should produce a sequence of messages roughly like this:
|
||||
|
||||
yacc -d awkgram.y
|
||||
|
||||
conflicts: 43 shift/reduce, 85 reduce/reduce
|
||||
mv y.tab.c ytab.c
|
||||
mv y.tab.h ytab.h
|
||||
cc -c ytab.c
|
||||
cc -c b.c
|
||||
cc -c main.c
|
||||
cc -c parse.c
|
||||
cc maketab.c -o maketab
|
||||
./maketab >proctab.c
|
||||
cc -c proctab.c
|
||||
cc -c tran.c
|
||||
cc -c lib.c
|
||||
cc -c run.c
|
||||
cc -c lex.c
|
||||
cc ytab.o b.o main.o parse.o proctab.o tran.o lib.o run.o lex.o -lm
|
||||
|
||||
This produces an executable a.out; you will eventually want to
|
||||
move this to some place like /usr/bin/awk.
|
||||
|
||||
If your system does not have yacc or bison (the GNU
|
||||
equivalent), you must compile the pieces manually. We have
|
||||
included yacc output in ytab.c and ytab.h, and backup copies in
|
||||
case you overwrite them. We have also included a copy of
|
||||
proctab.c so you do not need to run maketab.
|
||||
|
||||
NOTE: This version uses ANSI C, as you should also. We have
|
||||
compiled this without any changes using gcc -Wall and/or local C
|
||||
compilers on a variety of systems, but new systems or compilers
|
||||
may raise some new complaint; reports of difficulties are
|
||||
welcome.
|
||||
|
||||
This also compiles with Visual C++ on all flavors of Windows,
|
||||
*if* you provide versions of popen and pclose. The file
|
||||
missing95.c contains versions that can be used to get started
|
||||
with, though the underlying support has mysterious properties,
|
||||
the symptom of which can be truncated pipe output. Beware. The
|
||||
file makefile.win gives hints on how to proceed; if you run
|
||||
vcvars32.bat, it will set up necessary paths and parameters so
|
||||
you can subsequently run nmake -f makefile.win. Beware also that
|
||||
when running on Windows under command.com, various quoting
|
||||
conventions are different from Unix systems: single quotes won't
|
||||
work around arguments, and various characters like % are
|
||||
interpreted within double quotes.
|
||||
|
||||
This compiles without change on Macintosh OS X using gcc and
|
||||
the standard developer tools.
|
||||
|
||||
This is also said to compile on Macintosh OS 9 systems, using the
|
||||
file "buildmac" provided by Dan Allen (danallen@microsoft.com),
|
||||
to whom many thanks.
|
||||
|
||||
The version of malloc that comes with some systems is sometimes
|
||||
astonishly slow. If awk seems slow, you might try fixing that.
|
||||
More generally, turning on optimization can significantly improve
|
||||
awk's speed, perhaps by 1/3 for highest levels.
|
|
@ -0,0 +1,529 @@
|
|||
.de EX
|
||||
.nf
|
||||
.ft CW
|
||||
..
|
||||
.de EE
|
||||
.br
|
||||
.fi
|
||||
.ft 1
|
||||
..
|
||||
awk
|
||||
.TH AWK 1
|
||||
.CT 1 files prog_other
|
||||
.SH NAME
|
||||
awk \- pattern-directed scanning and processing language
|
||||
.SH SYNOPSIS
|
||||
.B awk
|
||||
[
|
||||
.BI \-F
|
||||
.I fs
|
||||
]
|
||||
[
|
||||
.BI \-v
|
||||
.I var=value
|
||||
]
|
||||
[
|
||||
.I 'prog'
|
||||
|
|
||||
.BI \-f
|
||||
.I progfile
|
||||
]
|
||||
[
|
||||
.I file ...
|
||||
]
|
||||
.SH DESCRIPTION
|
||||
.I Awk
|
||||
scans each input
|
||||
.I file
|
||||
for lines that match any of a set of patterns specified literally in
|
||||
.IR prog
|
||||
or in one or more files
|
||||
specified as
|
||||
.B \-f
|
||||
.IR progfile .
|
||||
With each pattern
|
||||
there can be an associated action that will be performed
|
||||
when a line of a
|
||||
.I file
|
||||
matches the pattern.
|
||||
Each line is matched against the
|
||||
pattern portion of every pattern-action statement;
|
||||
the associated action is performed for each matched pattern.
|
||||
The file name
|
||||
.B \-
|
||||
means the standard input.
|
||||
Any
|
||||
.IR file
|
||||
of the form
|
||||
.I var=value
|
||||
is treated as an assignment, not a filename,
|
||||
and is executed at the time it would have been opened if it were a filename.
|
||||
The option
|
||||
.B \-v
|
||||
followed by
|
||||
.I var=value
|
||||
is an assignment to be done before
|
||||
.I prog
|
||||
is executed;
|
||||
any number of
|
||||
.B \-v
|
||||
options may be present.
|
||||
The
|
||||
.B \-F
|
||||
.IR fs
|
||||
option defines the input field separator to be the regular expression
|
||||
.IR fs.
|
||||
.PP
|
||||
An input line is normally made up of fields separated by white space,
|
||||
or by regular expression
|
||||
.BR FS .
|
||||
The fields are denoted
|
||||
.BR $1 ,
|
||||
.BR $2 ,
|
||||
\&..., while
|
||||
.B $0
|
||||
refers to the entire line.
|
||||
If
|
||||
.BR FS
|
||||
is null, the input line is split into one field per character.
|
||||
.PP
|
||||
A pattern-action statement has the form
|
||||
.IP
|
||||
.IB pattern " { " action " }
|
||||
.PP
|
||||
A missing
|
||||
.BI { " action " }
|
||||
means print the line;
|
||||
a missing pattern always matches.
|
||||
Pattern-action statements are separated by newlines or semicolons.
|
||||
.PP
|
||||
An action is a sequence of statements.
|
||||
A statement can be one of the following:
|
||||
.PP
|
||||
.EX
|
||||
.ta \w'\f(CWdelete array[expression]'u
|
||||
.RS
|
||||
.nf
|
||||
.ft CW
|
||||
if(\fI expression \fP)\fI statement \fP\fR[ \fPelse\fI statement \fP\fR]\fP
|
||||
while(\fI expression \fP)\fI statement\fP
|
||||
for(\fI expression \fP;\fI expression \fP;\fI expression \fP)\fI statement\fP
|
||||
for(\fI var \fPin\fI array \fP)\fI statement\fP
|
||||
do\fI statement \fPwhile(\fI expression \fP)
|
||||
break
|
||||
continue
|
||||
{\fR [\fP\fI statement ... \fP\fR] \fP}
|
||||
\fIexpression\fP #\fR commonly\fP\fI var = expression\fP
|
||||
print\fR [ \fP\fIexpression-list \fP\fR] \fP\fR[ \fP>\fI expression \fP\fR]\fP
|
||||
printf\fI format \fP\fR[ \fP,\fI expression-list \fP\fR] \fP\fR[ \fP>\fI expression \fP\fR]\fP
|
||||
return\fR [ \fP\fIexpression \fP\fR]\fP
|
||||
next #\fR skip remaining patterns on this input line\fP
|
||||
nextfile #\fR skip rest of this file, open next, start at top\fP
|
||||
delete\fI array\fP[\fI expression \fP] #\fR delete an array element\fP
|
||||
delete\fI array\fP #\fR delete all elements of array\fP
|
||||
exit\fR [ \fP\fIexpression \fP\fR]\fP #\fR exit immediately; status is \fP\fIexpression\fP
|
||||
.fi
|
||||
.RE
|
||||
.EE
|
||||
.DT
|
||||
.PP
|
||||
Statements are terminated by
|
||||
semicolons, newlines or right braces.
|
||||
An empty
|
||||
.I expression-list
|
||||
stands for
|
||||
.BR $0 .
|
||||
String constants are quoted \&\f(CW"\ "\fR,
|
||||
with the usual C escapes recognized within.
|
||||
Expressions take on string or numeric values as appropriate,
|
||||
and are built using the operators
|
||||
.B + \- * / % ^
|
||||
(exponentiation), and concatenation (indicated by white space).
|
||||
The operators
|
||||
.B
|
||||
! ++ \-\- += \-= *= /= %= ^= > >= < <= == != ?:
|
||||
are also available in expressions.
|
||||
Variables may be scalars, array elements
|
||||
(denoted
|
||||
.IB x [ i ] )
|
||||
or fields.
|
||||
Variables are initialized to the null string.
|
||||
Array subscripts may be any string,
|
||||
not necessarily numeric;
|
||||
this allows for a form of associative memory.
|
||||
Multiple subscripts such as
|
||||
.B [i,j,k]
|
||||
are permitted; the constituents are concatenated,
|
||||
separated by the value of
|
||||
.BR SUBSEP .
|
||||
.PP
|
||||
The
|
||||
.B print
|
||||
statement prints its arguments on the standard output
|
||||
(or on a file if
|
||||
.BI > file
|
||||
or
|
||||
.BI >> file
|
||||
is present or on a pipe if
|
||||
.BI | cmd
|
||||
is present), separated by the current output field separator,
|
||||
and terminated by the output record separator.
|
||||
.I file
|
||||
and
|
||||
.I cmd
|
||||
may be literal names or parenthesized expressions;
|
||||
identical string values in different statements denote
|
||||
the same open file.
|
||||
The
|
||||
.B printf
|
||||
statement formats its expression list according to the format
|
||||
(see
|
||||
.IR printf (3)) .
|
||||
The built-in function
|
||||
.BI close( expr )
|
||||
closes the file or pipe
|
||||
.IR expr .
|
||||
The built-in function
|
||||
.BI fflush( expr )
|
||||
flushes any buffered output for the file or pipe
|
||||
.IR expr .
|
||||
.PP
|
||||
The mathematical functions
|
||||
.BR exp ,
|
||||
.BR log ,
|
||||
.BR sqrt ,
|
||||
.BR sin ,
|
||||
.BR cos ,
|
||||
and
|
||||
.BR atan2
|
||||
are built in.
|
||||
Other built-in functions:
|
||||
.TF length
|
||||
.TP
|
||||
.B length
|
||||
the length of its argument
|
||||
taken as a string,
|
||||
or of
|
||||
.B $0
|
||||
if no argument.
|
||||
.TP
|
||||
.B rand
|
||||
random number on (0,1)
|
||||
.TP
|
||||
.B srand
|
||||
sets seed for
|
||||
.B rand
|
||||
and returns the previous seed.
|
||||
.TP
|
||||
.B int
|
||||
truncates to an integer value
|
||||
.TP
|
||||
.BI substr( s , " m" , " n\fB)
|
||||
the
|
||||
.IR n -character
|
||||
substring of
|
||||
.I s
|
||||
that begins at position
|
||||
.IR m
|
||||
counted from 1.
|
||||
.TP
|
||||
.BI index( s , " t" )
|
||||
the position in
|
||||
.I s
|
||||
where the string
|
||||
.I t
|
||||
occurs, or 0 if it does not.
|
||||
.TP
|
||||
.BI match( s , " r" )
|
||||
the position in
|
||||
.I s
|
||||
where the regular expression
|
||||
.I r
|
||||
occurs, or 0 if it does not.
|
||||
The variables
|
||||
.B RSTART
|
||||
and
|
||||
.B RLENGTH
|
||||
are set to the position and length of the matched string.
|
||||
.TP
|
||||
.BI split( s , " a" , " fs\fB)
|
||||
splits the string
|
||||
.I s
|
||||
into array elements
|
||||
.IB a [1] ,
|
||||
.IB a [2] ,
|
||||
\&...,
|
||||
.IB a [ n ] ,
|
||||
and returns
|
||||
.IR n .
|
||||
The separation is done with the regular expression
|
||||
.I fs
|
||||
or with the field separator
|
||||
.B FS
|
||||
if
|
||||
.I fs
|
||||
is not given.
|
||||
An empty string as field separator splits the string
|
||||
into one array element per character.
|
||||
.TP
|
||||
.BI sub( r , " t" , " s\fB)
|
||||
substitutes
|
||||
.I t
|
||||
for the first occurrence of the regular expression
|
||||
.I r
|
||||
in the string
|
||||
.IR s .
|
||||
If
|
||||
.I s
|
||||
is not given,
|
||||
.B $0
|
||||
is used.
|
||||
.TP
|
||||
.B gsub
|
||||
same as
|
||||
.B sub
|
||||
except that all occurrences of the regular expression
|
||||
are replaced;
|
||||
.B sub
|
||||
and
|
||||
.B gsub
|
||||
return the number of replacements.
|
||||
.TP
|
||||
.BI sprintf( fmt , " expr" , " ...\fB )
|
||||
the string resulting from formatting
|
||||
.I expr ...
|
||||
according to the
|
||||
.IR printf (3)
|
||||
format
|
||||
.I fmt
|
||||
.TP
|
||||
.BI system( cmd )
|
||||
executes
|
||||
.I cmd
|
||||
and returns its exit status
|
||||
.TP
|
||||
.BI tolower( str )
|
||||
returns a copy of
|
||||
.I str
|
||||
with all upper-case characters translated to their
|
||||
corresponding lower-case equivalents.
|
||||
.TP
|
||||
.BI toupper( str )
|
||||
returns a copy of
|
||||
.I str
|
||||
with all lower-case characters translated to their
|
||||
corresponding upper-case equivalents.
|
||||
.PD
|
||||
.PP
|
||||
The ``function''
|
||||
.B getline
|
||||
sets
|
||||
.B $0
|
||||
to the next input record from the current input file;
|
||||
.B getline
|
||||
.BI < file
|
||||
sets
|
||||
.B $0
|
||||
to the next record from
|
||||
.IR file .
|
||||
.B getline
|
||||
.I x
|
||||
sets variable
|
||||
.I x
|
||||
instead.
|
||||
Finally,
|
||||
.IB cmd " | getline
|
||||
pipes the output of
|
||||
.I cmd
|
||||
into
|
||||
.BR getline ;
|
||||
each call of
|
||||
.B getline
|
||||
returns the next line of output from
|
||||
.IR cmd .
|
||||
In all cases,
|
||||
.B getline
|
||||
returns 1 for a successful input,
|
||||
0 for end of file, and \-1 for an error.
|
||||
.PP
|
||||
Patterns are arbitrary Boolean combinations
|
||||
(with
|
||||
.BR "! || &&" )
|
||||
of regular expressions and
|
||||
relational expressions.
|
||||
Regular expressions are as in
|
||||
.IR egrep ;
|
||||
see
|
||||
.IR grep (1).
|
||||
Isolated regular expressions
|
||||
in a pattern apply to the entire line.
|
||||
Regular expressions may also occur in
|
||||
relational expressions, using the operators
|
||||
.BR ~
|
||||
and
|
||||
.BR !~ .
|
||||
.BI / re /
|
||||
is a constant regular expression;
|
||||
any string (constant or variable) may be used
|
||||
as a regular expression, except in the position of an isolated regular expression
|
||||
in a pattern.
|
||||
.PP
|
||||
A pattern may consist of two patterns separated by a comma;
|
||||
in this case, the action is performed for all lines
|
||||
from an occurrence of the first pattern
|
||||
though an occurrence of the second.
|
||||
.PP
|
||||
A relational expression is one of the following:
|
||||
.IP
|
||||
.I expression matchop regular-expression
|
||||
.br
|
||||
.I expression relop expression
|
||||
.br
|
||||
.IB expression " in " array-name
|
||||
.br
|
||||
.BI ( expr , expr,... ") in " array-name
|
||||
.PP
|
||||
where a relop is any of the six relational operators in C,
|
||||
and a matchop is either
|
||||
.B ~
|
||||
(matches)
|
||||
or
|
||||
.B !~
|
||||
(does not match).
|
||||
A conditional is an arithmetic expression,
|
||||
a relational expression,
|
||||
or a Boolean combination
|
||||
of these.
|
||||
.PP
|
||||
The special patterns
|
||||
.B BEGIN
|
||||
and
|
||||
.B END
|
||||
may be used to capture control before the first input line is read
|
||||
and after the last.
|
||||
.B BEGIN
|
||||
and
|
||||
.B END
|
||||
do not combine with other patterns.
|
||||
.PP
|
||||
Variable names with special meanings:
|
||||
.TF FILENAME
|
||||
.TP
|
||||
.B CONVFMT
|
||||
conversion format used when converting numbers
|
||||
(default
|
||||
.BR "%.6g" )
|
||||
.TP
|
||||
.B FS
|
||||
regular expression used to separate fields; also settable
|
||||
by option
|
||||
.BI \-F fs.
|
||||
.TP
|
||||
.BR NF
|
||||
number of fields in the current record
|
||||
.TP
|
||||
.B NR
|
||||
ordinal number of the current record
|
||||
.TP
|
||||
.B FNR
|
||||
ordinal number of the current record in the current file
|
||||
.TP
|
||||
.B FILENAME
|
||||
the name of the current input file
|
||||
.TP
|
||||
.B RS
|
||||
input record separator (default newline)
|
||||
.TP
|
||||
.B OFS
|
||||
output field separator (default blank)
|
||||
.TP
|
||||
.B ORS
|
||||
output record separator (default newline)
|
||||
.TP
|
||||
.B OFMT
|
||||
output format for numbers (default
|
||||
.BR "%.6g" )
|
||||
.TP
|
||||
.B SUBSEP
|
||||
separates multiple subscripts (default 034)
|
||||
.TP
|
||||
.B ARGC
|
||||
argument count, assignable
|
||||
.TP
|
||||
.B ARGV
|
||||
argument array, assignable;
|
||||
non-null members are taken as filenames
|
||||
.TP
|
||||
.B ENVIRON
|
||||
array of environment variables; subscripts are names.
|
||||
.PD
|
||||
.PP
|
||||
Functions may be defined (at the position of a pattern-action statement) thus:
|
||||
.IP
|
||||
.B
|
||||
function foo(a, b, c) { ...; return x }
|
||||
.PP
|
||||
Parameters are passed by value if scalar and by reference if array name;
|
||||
functions may be called recursively.
|
||||
Parameters are local to the function; all other variables are global.
|
||||
Thus local variables may be created by providing excess parameters in
|
||||
the function definition.
|
||||
.SH EXAMPLES
|
||||
.TP
|
||||
.EX
|
||||
length($0) > 72
|
||||
.EE
|
||||
Print lines longer than 72 characters.
|
||||
.TP
|
||||
.EX
|
||||
{ print $2, $1 }
|
||||
.EE
|
||||
Print first two fields in opposite order.
|
||||
.PP
|
||||
.EX
|
||||
BEGIN { FS = ",[ \et]*|[ \et]+" }
|
||||
{ print $2, $1 }
|
||||
.EE
|
||||
.ns
|
||||
.IP
|
||||
Same, with input fields separated by comma and/or blanks and tabs.
|
||||
.PP
|
||||
.EX
|
||||
.nf
|
||||
{ s += $1 }
|
||||
END { print "sum is", s, " average is", s/NR }
|
||||
.fi
|
||||
.EE
|
||||
.ns
|
||||
.IP
|
||||
Add up first column, print sum and average.
|
||||
.TP
|
||||
.EX
|
||||
/start/, /stop/
|
||||
.EE
|
||||
Print all lines between start/stop pairs.
|
||||
.PP
|
||||
.EX
|
||||
.nf
|
||||
BEGIN { # Simulate echo(1)
|
||||
for (i = 1; i < ARGC; i++) printf "%s ", ARGV[i]
|
||||
printf "\en"
|
||||
exit }
|
||||
.fi
|
||||
.EE
|
||||
.SH SEE ALSO
|
||||
.IR lex (1),
|
||||
.IR sed (1)
|
||||
.br
|
||||
A. V. Aho, B. W. Kernighan, P. J. Weinberger,
|
||||
.I
|
||||
The AWK Programming Language,
|
||||
Addison-Wesley, 1988. ISBN 0-201-07981-X
|
||||
.SH BUGS
|
||||
There are no explicit conversions between numbers and strings.
|
||||
To force an expression to be treated as a number add 0 to it;
|
||||
to force it to be treated as a string concatenate
|
||||
\&\f(CW""\fP to it.
|
||||
.br
|
||||
The scope rules for variables in functions are a botch;
|
||||
the syntax is worse.
|
|
@ -0,0 +1,233 @@
|
|||
/****************************************************************
|
||||
Copyright (C) Lucent Technologies 1997
|
||||
All Rights Reserved
|
||||
|
||||
Permission to use, copy, modify, and distribute this software and
|
||||
its documentation for any purpose and without fee is hereby
|
||||
granted, provided that the above copyright notice appear in all
|
||||
copies and that both that the copyright notice and this
|
||||
permission notice and warranty disclaimer appear in supporting
|
||||
documentation, and that the name Lucent Technologies or any of
|
||||
its entities not be used in advertising or publicity pertaining
|
||||
to distribution of the software without specific, written prior
|
||||
permission.
|
||||
|
||||
LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
|
||||
INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
|
||||
IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
|
||||
SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
|
||||
IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
|
||||
ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
|
||||
THIS SOFTWARE.
|
||||
****************************************************************/
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
typedef double Awkfloat;
|
||||
|
||||
/* unsigned char is more trouble than it's worth */
|
||||
|
||||
typedef unsigned char uschar;
|
||||
|
||||
#define xfree(a) { if ((a) != NULL) { free((void *) (a)); (a) = NULL; } }
|
||||
|
||||
#define NN(p) ((p) ? (p) : "(null)") /* guaranteed non-null for dprintf
|
||||
*/
|
||||
#define DEBUG
|
||||
#ifdef DEBUG
|
||||
/* uses have to be doubly parenthesized */
|
||||
# define dprintf(x) if (dbg) printf x
|
||||
#else
|
||||
# define dprintf(x)
|
||||
#endif
|
||||
|
||||
extern int compile_time; /* 1 if compiling, 0 if running */
|
||||
extern int safe; /* 0 => unsafe, 1 => safe */
|
||||
|
||||
#define RECSIZE (8 * 1024) /* sets limit on records, fields, etc., etc. */
|
||||
extern int recsize; /* size of current record, orig RECSIZE */
|
||||
|
||||
extern char **FS;
|
||||
extern char **RS;
|
||||
extern char **ORS;
|
||||
extern char **OFS;
|
||||
extern char **OFMT;
|
||||
extern Awkfloat *NR;
|
||||
extern Awkfloat *FNR;
|
||||
extern Awkfloat *NF;
|
||||
extern char **FILENAME;
|
||||
extern char **SUBSEP;
|
||||
extern Awkfloat *RSTART;
|
||||
extern Awkfloat *RLENGTH;
|
||||
|
||||
extern char *record; /* points to $0 */
|
||||
extern int lineno; /* line number in awk program */
|
||||
extern int errorflag; /* 1 if error has occurred */
|
||||
extern int donefld; /* 1 if record broken into fields */
|
||||
extern int donerec; /* 1 if record is valid (no fld has changed */
|
||||
extern char inputFS[]; /* FS at time of input, for field splitting */
|
||||
|
||||
extern int dbg;
|
||||
|
||||
extern char *patbeg; /* beginning of pattern matched */
|
||||
extern int patlen; /* length of pattern matched. set in b.c */
|
||||
|
||||
/* Cell: all information about a variable or constant */
|
||||
|
||||
typedef struct Cell {
|
||||
uschar ctype; /* OCELL, OBOOL, OJUMP, etc. */
|
||||
uschar csub; /* CCON, CTEMP, CFLD, etc. */
|
||||
char *nval; /* name, for variables only */
|
||||
char *sval; /* string value */
|
||||
Awkfloat fval; /* value as number */
|
||||
int tval; /* type info: STR|NUM|ARR|FCN|FLD|CON|DONTFREE */
|
||||
struct Cell *cnext; /* ptr to next if chained */
|
||||
} Cell;
|
||||
|
||||
typedef struct Array { /* symbol table array */
|
||||
int nelem; /* elements in table right now */
|
||||
int size; /* size of tab */
|
||||
Cell **tab; /* hash table pointers */
|
||||
} Array;
|
||||
|
||||
#define NSYMTAB 50 /* initial size of a symbol table */
|
||||
extern Array *symtab;
|
||||
|
||||
extern Cell *nrloc; /* NR */
|
||||
extern Cell *fnrloc; /* FNR */
|
||||
extern Cell *nfloc; /* NF */
|
||||
extern Cell *rstartloc; /* RSTART */
|
||||
extern Cell *rlengthloc; /* RLENGTH */
|
||||
|
||||
/* Cell.tval values: */
|
||||
#define NUM 01 /* number value is valid */
|
||||
#define STR 02 /* string value is valid */
|
||||
#define DONTFREE 04 /* string space is not freeable */
|
||||
#define CON 010 /* this is a constant */
|
||||
#define ARR 020 /* this is an array */
|
||||
#define FCN 040 /* this is a function name */
|
||||
#define FLD 0100 /* this is a field $1, $2, ... */
|
||||
#define REC 0200 /* this is $0 */
|
||||
|
||||
|
||||
/* function types */
|
||||
#define FLENGTH 1
|
||||
#define FSQRT 2
|
||||
#define FEXP 3
|
||||
#define FLOG 4
|
||||
#define FINT 5
|
||||
#define FSYSTEM 6
|
||||
#define FRAND 7
|
||||
#define FSRAND 8
|
||||
#define FSIN 9
|
||||
#define FCOS 10
|
||||
#define FATAN 11
|
||||
#define FTOUPPER 12
|
||||
#define FTOLOWER 13
|
||||
#define FFLUSH 14
|
||||
|
||||
/* Node: parse tree is made of nodes, with Cell's at bottom */
|
||||
|
||||
typedef struct Node {
|
||||
int ntype;
|
||||
struct Node *nnext;
|
||||
int lineno;
|
||||
int nobj;
|
||||
struct Node *narg[1]; /* variable: actual size set by calling malloc */
|
||||
} Node;
|
||||
|
||||
#define NIL ((Node *) 0)
|
||||
|
||||
extern Node *winner;
|
||||
extern Node *nullstat;
|
||||
extern Node *nullnode;
|
||||
|
||||
/* ctypes */
|
||||
#define OCELL 1
|
||||
#define OBOOL 2
|
||||
#define OJUMP 3
|
||||
|
||||
/* Cell subtypes: csub */
|
||||
#define CFREE 7
|
||||
#define CCOPY 6
|
||||
#define CCON 5
|
||||
#define CTEMP 4
|
||||
#define CNAME 3
|
||||
#define CVAR 2
|
||||
#define CFLD 1
|
||||
#define CUNK 0
|
||||
|
||||
/* bool subtypes */
|
||||
#define BTRUE 11
|
||||
#define BFALSE 12
|
||||
|
||||
/* jump subtypes */
|
||||
#define JEXIT 21
|
||||
#define JNEXT 22
|
||||
#define JBREAK 23
|
||||
#define JCONT 24
|
||||
#define JRET 25
|
||||
#define JNEXTFILE 26
|
||||
|
||||
/* node types */
|
||||
#define NVALUE 1
|
||||
#define NSTAT 2
|
||||
#define NEXPR 3
|
||||
|
||||
|
||||
extern int pairstack[], paircnt;
|
||||
|
||||
#define notlegal(n) (n <= FIRSTTOKEN || n >= LASTTOKEN || proctab[n-FIRSTTOKEN] == nullproc)
|
||||
#define isvalue(n) ((n)->ntype == NVALUE)
|
||||
#define isexpr(n) ((n)->ntype == NEXPR)
|
||||
#define isjump(n) ((n)->ctype == OJUMP)
|
||||
#define isexit(n) ((n)->csub == JEXIT)
|
||||
#define isbreak(n) ((n)->csub == JBREAK)
|
||||
#define iscont(n) ((n)->csub == JCONT)
|
||||
#define isnext(n) ((n)->csub == JNEXT || (n)->csub == JNEXTFILE)
|
||||
#define isret(n) ((n)->csub == JRET)
|
||||
#define isrec(n) ((n)->tval & REC)
|
||||
#define isfld(n) ((n)->tval & FLD)
|
||||
#define isstr(n) ((n)->tval & STR)
|
||||
#define isnum(n) ((n)->tval & NUM)
|
||||
#define isarr(n) ((n)->tval & ARR)
|
||||
#define isfcn(n) ((n)->tval & FCN)
|
||||
#define istrue(n) ((n)->csub == BTRUE)
|
||||
#define istemp(n) ((n)->csub == CTEMP)
|
||||
#define isargument(n) ((n)->nobj == ARG)
|
||||
/* #define freeable(p) (!((p)->tval & DONTFREE)) */
|
||||
#define freeable(p) ( ((p)->tval & (STR|DONTFREE)) == STR )
|
||||
|
||||
/* structures used by regular expression matching machinery, mostly b.c: */
|
||||
|
||||
#define NCHARS (256+3) /* 256 handles 8-bit chars; 128 does 7-bit */
|
||||
/* watch out in match(), etc. */
|
||||
#define NSTATES 32
|
||||
|
||||
typedef struct rrow {
|
||||
long ltype; /* long avoids pointer warnings on 64-bit */
|
||||
union {
|
||||
int i;
|
||||
Node *np;
|
||||
uschar *up;
|
||||
} lval; /* because Al stores a pointer in it! */
|
||||
int *lfollow;
|
||||
} rrow;
|
||||
|
||||
typedef struct fa {
|
||||
uschar gototab[NSTATES][NCHARS];
|
||||
uschar out[NSTATES];
|
||||
uschar *restr;
|
||||
int *posns[NSTATES];
|
||||
int anchor;
|
||||
int use;
|
||||
int initstat;
|
||||
int curstat;
|
||||
int accept;
|
||||
int reset;
|
||||
struct rrow re[1]; /* variable: actual size set by calling malloc */
|
||||
} fa;
|
||||
|
||||
|
||||
#include "proto.h"
|
|
@ -0,0 +1,486 @@
|
|||
/****************************************************************
|
||||
Copyright (C) Lucent Technologies 1997
|
||||
All Rights Reserved
|
||||
|
||||
Permission to use, copy, modify, and distribute this software and
|
||||
its documentation for any purpose and without fee is hereby
|
||||
granted, provided that the above copyright notice appear in all
|
||||
copies and that both that the copyright notice and this
|
||||
permission notice and warranty disclaimer appear in supporting
|
||||
documentation, and that the name Lucent Technologies or any of
|
||||
its entities not be used in advertising or publicity pertaining
|
||||
to distribution of the software without specific, written prior
|
||||
permission.
|
||||
|
||||
LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
|
||||
INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
|
||||
IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
|
||||
SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
|
||||
IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
|
||||
ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
|
||||
THIS SOFTWARE.
|
||||
****************************************************************/
|
||||
|
||||
%{
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include "awk.h"
|
||||
|
||||
void checkdup(Node *list, Cell *item);
|
||||
int yywrap(void) { return(1); }
|
||||
|
||||
Node *beginloc = 0;
|
||||
Node *endloc = 0;
|
||||
int infunc = 0; /* = 1 if in arglist or body of func */
|
||||
int inloop = 0; /* = 1 if in while, for, do */
|
||||
char *curfname = 0; /* current function name */
|
||||
Node *arglist = 0; /* list of args for current function */
|
||||
%}
|
||||
|
||||
%union {
|
||||
Node *p;
|
||||
Cell *cp;
|
||||
int i;
|
||||
char *s;
|
||||
}
|
||||
|
||||
%token <i> FIRSTTOKEN /* must be first */
|
||||
%token <p> PROGRAM PASTAT PASTAT2 XBEGIN XEND
|
||||
%token <i> NL ',' '{' '(' '|' ';' '/' ')' '}' '[' ']'
|
||||
%token <i> ARRAY
|
||||
%token <i> MATCH NOTMATCH MATCHOP
|
||||
%token <i> FINAL DOT ALL CCL NCCL CHAR OR STAR QUEST PLUS EMPTYRE
|
||||
%token <i> AND BOR APPEND EQ GE GT LE LT NE IN
|
||||
%token <i> ARG BLTIN BREAK CLOSE CONTINUE DELETE DO EXIT FOR FUNC
|
||||
%token <i> SUB GSUB IF INDEX LSUBSTR MATCHFCN NEXT NEXTFILE
|
||||
%token <i> ADD MINUS MULT DIVIDE MOD
|
||||
%token <i> ASSIGN ASGNOP ADDEQ SUBEQ MULTEQ DIVEQ MODEQ POWEQ
|
||||
%token <i> PRINT PRINTF SPRINTF
|
||||
%token <p> ELSE INTEST CONDEXPR
|
||||
%token <i> POSTINCR PREINCR POSTDECR PREDECR
|
||||
%token <cp> VAR IVAR VARNF CALL NUMBER STRING
|
||||
%token <s> REGEXPR
|
||||
|
||||
%type <p> pas pattern ppattern plist pplist patlist prarg term re
|
||||
%type <p> pa_pat pa_stat pa_stats
|
||||
%type <s> reg_expr
|
||||
%type <p> simple_stmt opt_simple_stmt stmt stmtlist
|
||||
%type <p> var varname funcname varlist
|
||||
%type <p> for if else while
|
||||
%type <i> do st
|
||||
%type <i> pst opt_pst lbrace rbrace rparen comma nl opt_nl and bor
|
||||
%type <i> subop print
|
||||
|
||||
%right ASGNOP
|
||||
%right '?'
|
||||
%right ':'
|
||||
%left BOR
|
||||
%left AND
|
||||
%left GETLINE
|
||||
%nonassoc APPEND EQ GE GT LE LT NE MATCHOP IN '|'
|
||||
%left ARG BLTIN BREAK CALL CLOSE CONTINUE DELETE DO EXIT FOR FUNC
|
||||
%left GSUB IF INDEX LSUBSTR MATCHFCN NEXT NUMBER
|
||||
%left PRINT PRINTF RETURN SPLIT SPRINTF STRING SUB SUBSTR
|
||||
%left REGEXPR VAR VARNF IVAR WHILE '('
|
||||
%left CAT
|
||||
%left '+' '-'
|
||||
%left '*' '/' '%'
|
||||
%left NOT UMINUS
|
||||
%right POWER
|
||||
%right DECR INCR
|
||||
%left INDIRECT
|
||||
%token LASTTOKEN /* must be last */
|
||||
|
||||
%%
|
||||
|
||||
program:
|
||||
pas { if (errorflag==0)
|
||||
winner = (Node *)stat3(PROGRAM, beginloc, $1, endloc); }
|
||||
| error { yyclearin; bracecheck(); SYNTAX("bailing out"); }
|
||||
;
|
||||
|
||||
and:
|
||||
AND | and NL
|
||||
;
|
||||
|
||||
bor:
|
||||
BOR | bor NL
|
||||
;
|
||||
|
||||
comma:
|
||||
',' | comma NL
|
||||
;
|
||||
|
||||
do:
|
||||
DO | do NL
|
||||
;
|
||||
|
||||
else:
|
||||
ELSE | else NL
|
||||
;
|
||||
|
||||
for:
|
||||
FOR '(' opt_simple_stmt ';' opt_nl pattern ';' opt_nl opt_simple_stmt rparen {inloop++;} stmt
|
||||
{ --inloop; $$ = stat4(FOR, $3, notnull($6), $9, $12); }
|
||||
| FOR '(' opt_simple_stmt ';' ';' opt_nl opt_simple_stmt rparen {inloop++;} stmt
|
||||
{ --inloop; $$ = stat4(FOR, $3, NIL, $7, $10); }
|
||||
| FOR '(' varname IN varname rparen {inloop++;} stmt
|
||||
{ --inloop; $$ = stat3(IN, $3, makearr($5), $8); }
|
||||
;
|
||||
|
||||
funcname:
|
||||
VAR { setfname($1); }
|
||||
| CALL { setfname($1); }
|
||||
;
|
||||
|
||||
if:
|
||||
IF '(' pattern rparen { $$ = notnull($3); }
|
||||
;
|
||||
|
||||
lbrace:
|
||||
'{' | lbrace NL
|
||||
;
|
||||
|
||||
nl:
|
||||
NL | nl NL
|
||||
;
|
||||
|
||||
opt_nl:
|
||||
/* empty */ { $$ = 0; }
|
||||
| nl
|
||||
;
|
||||
|
||||
opt_pst:
|
||||
/* empty */ { $$ = 0; }
|
||||
| pst
|
||||
;
|
||||
|
||||
|
||||
opt_simple_stmt:
|
||||
/* empty */ { $$ = 0; }
|
||||
| simple_stmt
|
||||
;
|
||||
|
||||
pas:
|
||||
opt_pst { $$ = 0; }
|
||||
| opt_pst pa_stats opt_pst { $$ = $2; }
|
||||
;
|
||||
|
||||
pa_pat:
|
||||
pattern { $$ = notnull($1); }
|
||||
;
|
||||
|
||||
pa_stat:
|
||||
pa_pat { $$ = stat2(PASTAT, $1, stat2(PRINT, rectonode(), NIL)); }
|
||||
| pa_pat lbrace stmtlist '}' { $$ = stat2(PASTAT, $1, $3); }
|
||||
| pa_pat ',' pa_pat { $$ = pa2stat($1, $3, stat2(PRINT, rectonode(), NIL)); }
|
||||
| pa_pat ',' pa_pat lbrace stmtlist '}' { $$ = pa2stat($1, $3, $5); }
|
||||
| lbrace stmtlist '}' { $$ = stat2(PASTAT, NIL, $2); }
|
||||
| XBEGIN lbrace stmtlist '}'
|
||||
{ beginloc = linkum(beginloc, $3); $$ = 0; }
|
||||
| XEND lbrace stmtlist '}'
|
||||
{ endloc = linkum(endloc, $3); $$ = 0; }
|
||||
| FUNC funcname '(' varlist rparen {infunc++;} lbrace stmtlist '}'
|
||||
{ infunc--; curfname=0; defn((Cell *)$2, $4, $8); $$ = 0; }
|
||||
;
|
||||
|
||||
pa_stats:
|
||||
pa_stat
|
||||
| pa_stats opt_pst pa_stat { $$ = linkum($1, $3); }
|
||||
;
|
||||
|
||||
patlist:
|
||||
pattern
|
||||
| patlist comma pattern { $$ = linkum($1, $3); }
|
||||
;
|
||||
|
||||
ppattern:
|
||||
var ASGNOP ppattern { $$ = op2($2, $1, $3); }
|
||||
| ppattern '?' ppattern ':' ppattern %prec '?'
|
||||
{ $$ = op3(CONDEXPR, notnull($1), $3, $5); }
|
||||
| ppattern bor ppattern %prec BOR
|
||||
{ $$ = op2(BOR, notnull($1), notnull($3)); }
|
||||
| ppattern and ppattern %prec AND
|
||||
{ $$ = op2(AND, notnull($1), notnull($3)); }
|
||||
| ppattern MATCHOP reg_expr { $$ = op3($2, NIL, $1, (Node*)makedfa($3, 0)); }
|
||||
| ppattern MATCHOP ppattern
|
||||
{ if (constnode($3))
|
||||
$$ = op3($2, NIL, $1, (Node*)makedfa(strnode($3), 0));
|
||||
else
|
||||
$$ = op3($2, (Node *)1, $1, $3); }
|
||||
| ppattern IN varname { $$ = op2(INTEST, $1, makearr($3)); }
|
||||
| '(' plist ')' IN varname { $$ = op2(INTEST, $2, makearr($5)); }
|
||||
| ppattern term %prec CAT { $$ = op2(CAT, $1, $2); }
|
||||
| re
|
||||
| term
|
||||
;
|
||||
|
||||
pattern:
|
||||
var ASGNOP pattern { $$ = op2($2, $1, $3); }
|
||||
| pattern '?' pattern ':' pattern %prec '?'
|
||||
{ $$ = op3(CONDEXPR, notnull($1), $3, $5); }
|
||||
| pattern bor pattern %prec BOR
|
||||
{ $$ = op2(BOR, notnull($1), notnull($3)); }
|
||||
| pattern and pattern %prec AND
|
||||
{ $$ = op2(AND, notnull($1), notnull($3)); }
|
||||
| pattern EQ pattern { $$ = op2($2, $1, $3); }
|
||||
| pattern GE pattern { $$ = op2($2, $1, $3); }
|
||||
| pattern GT pattern { $$ = op2($2, $1, $3); }
|
||||
| pattern LE pattern { $$ = op2($2, $1, $3); }
|
||||
| pattern LT pattern { $$ = op2($2, $1, $3); }
|
||||
| pattern NE pattern { $$ = op2($2, $1, $3); }
|
||||
| pattern MATCHOP reg_expr { $$ = op3($2, NIL, $1, (Node*)makedfa($3, 0)); }
|
||||
| pattern MATCHOP pattern
|
||||
{ if (constnode($3))
|
||||
$$ = op3($2, NIL, $1, (Node*)makedfa(strnode($3), 0));
|
||||
else
|
||||
$$ = op3($2, (Node *)1, $1, $3); }
|
||||
| pattern IN varname { $$ = op2(INTEST, $1, makearr($3)); }
|
||||
| '(' plist ')' IN varname { $$ = op2(INTEST, $2, makearr($5)); }
|
||||
| pattern '|' GETLINE var {
|
||||
if (safe) SYNTAX("cmd | getline is unsafe");
|
||||
else $$ = op3(GETLINE, $4, itonp($2), $1); }
|
||||
| pattern '|' GETLINE {
|
||||
if (safe) SYNTAX("cmd | getline is unsafe");
|
||||
else $$ = op3(GETLINE, (Node*)0, itonp($2), $1); }
|
||||
| pattern term %prec CAT { $$ = op2(CAT, $1, $2); }
|
||||
| re
|
||||
| term
|
||||
;
|
||||
|
||||
plist:
|
||||
pattern comma pattern { $$ = linkum($1, $3); }
|
||||
| plist comma pattern { $$ = linkum($1, $3); }
|
||||
;
|
||||
|
||||
pplist:
|
||||
ppattern
|
||||
| pplist comma ppattern { $$ = linkum($1, $3); }
|
||||
;
|
||||
|
||||
prarg:
|
||||
/* empty */ { $$ = rectonode(); }
|
||||
| pplist
|
||||
| '(' plist ')' { $$ = $2; }
|
||||
;
|
||||
|
||||
print:
|
||||
PRINT | PRINTF
|
||||
;
|
||||
|
||||
pst:
|
||||
NL | ';' | pst NL | pst ';'
|
||||
;
|
||||
|
||||
rbrace:
|
||||
'}' | rbrace NL
|
||||
;
|
||||
|
||||
re:
|
||||
reg_expr
|
||||
{ $$ = op3(MATCH, NIL, rectonode(), (Node*)makedfa($1, 0)); }
|
||||
| NOT re { $$ = op1(NOT, notnull($2)); }
|
||||
;
|
||||
|
||||
reg_expr:
|
||||
'/' {startreg();} REGEXPR '/' { $$ = $3; }
|
||||
;
|
||||
|
||||
rparen:
|
||||
')' | rparen NL
|
||||
;
|
||||
|
||||
simple_stmt:
|
||||
print prarg '|' term {
|
||||
if (safe) SYNTAX("print | is unsafe");
|
||||
else $$ = stat3($1, $2, itonp($3), $4); }
|
||||
| print prarg APPEND term {
|
||||
if (safe) SYNTAX("print >> is unsafe");
|
||||
else $$ = stat3($1, $2, itonp($3), $4); }
|
||||
| print prarg GT term {
|
||||
if (safe) SYNTAX("print > is unsafe");
|
||||
else $$ = stat3($1, $2, itonp($3), $4); }
|
||||
| print prarg { $$ = stat3($1, $2, NIL, NIL); }
|
||||
| DELETE varname '[' patlist ']' { $$ = stat2(DELETE, makearr($2), $4); }
|
||||
| DELETE varname { $$ = stat2(DELETE, makearr($2), 0); }
|
||||
| pattern { $$ = exptostat($1); }
|
||||
| error { yyclearin; SYNTAX("illegal statement"); }
|
||||
;
|
||||
|
||||
st:
|
||||
nl
|
||||
| ';' opt_nl
|
||||
;
|
||||
|
||||
stmt:
|
||||
BREAK st { if (!inloop) SYNTAX("break illegal outside of loops");
|
||||
$$ = stat1(BREAK, NIL); }
|
||||
| CONTINUE st { if (!inloop) SYNTAX("continue illegal outside of loops");
|
||||
$$ = stat1(CONTINUE, NIL); }
|
||||
| do {inloop++;} stmt {--inloop;} WHILE '(' pattern ')' st
|
||||
{ $$ = stat2(DO, $3, notnull($7)); }
|
||||
| EXIT pattern st { $$ = stat1(EXIT, $2); }
|
||||
| EXIT st { $$ = stat1(EXIT, NIL); }
|
||||
| for
|
||||
| if stmt else stmt { $$ = stat3(IF, $1, $2, $4); }
|
||||
| if stmt { $$ = stat3(IF, $1, $2, NIL); }
|
||||
| lbrace stmtlist rbrace { $$ = $2; }
|
||||
| NEXT st { if (infunc)
|
||||
SYNTAX("next is illegal inside a function");
|
||||
$$ = stat1(NEXT, NIL); }
|
||||
| NEXTFILE st { if (infunc)
|
||||
SYNTAX("nextfile is illegal inside a function");
|
||||
$$ = stat1(NEXTFILE, NIL); }
|
||||
| RETURN pattern st { $$ = stat1(RETURN, $2); }
|
||||
| RETURN st { $$ = stat1(RETURN, NIL); }
|
||||
| simple_stmt st
|
||||
| while {inloop++;} stmt { --inloop; $$ = stat2(WHILE, $1, $3); }
|
||||
| ';' opt_nl { $$ = 0; }
|
||||
;
|
||||
|
||||
stmtlist:
|
||||
stmt
|
||||
| stmtlist stmt { $$ = linkum($1, $2); }
|
||||
;
|
||||
|
||||
subop:
|
||||
SUB | GSUB
|
||||
;
|
||||
|
||||
term:
|
||||
term '/' ASGNOP term { $$ = op2(DIVEQ, $1, $4); }
|
||||
| term '+' term { $$ = op2(ADD, $1, $3); }
|
||||
| term '-' term { $$ = op2(MINUS, $1, $3); }
|
||||
| term '*' term { $$ = op2(MULT, $1, $3); }
|
||||
| term '/' term { $$ = op2(DIVIDE, $1, $3); }
|
||||
| term '%' term { $$ = op2(MOD, $1, $3); }
|
||||
| term POWER term { $$ = op2(POWER, $1, $3); }
|
||||
| '-' term %prec UMINUS { $$ = op1(UMINUS, $2); }
|
||||
| '+' term %prec UMINUS { $$ = $2; }
|
||||
| NOT term %prec UMINUS { $$ = op1(NOT, notnull($2)); }
|
||||
| BLTIN '(' ')' { $$ = op2(BLTIN, itonp($1), rectonode()); }
|
||||
| BLTIN '(' patlist ')' { $$ = op2(BLTIN, itonp($1), $3); }
|
||||
| BLTIN { $$ = op2(BLTIN, itonp($1), rectonode()); }
|
||||
| CALL '(' ')' { $$ = op2(CALL, celltonode($1,CVAR), NIL); }
|
||||
| CALL '(' patlist ')' { $$ = op2(CALL, celltonode($1,CVAR), $3); }
|
||||
| CLOSE term { $$ = op1(CLOSE, $2); }
|
||||
| DECR var { $$ = op1(PREDECR, $2); }
|
||||
| INCR var { $$ = op1(PREINCR, $2); }
|
||||
| var DECR { $$ = op1(POSTDECR, $1); }
|
||||
| var INCR { $$ = op1(POSTINCR, $1); }
|
||||
| GETLINE var LT term { $$ = op3(GETLINE, $2, itonp($3), $4); }
|
||||
| GETLINE LT term { $$ = op3(GETLINE, NIL, itonp($2), $3); }
|
||||
| GETLINE var { $$ = op3(GETLINE, $2, NIL, NIL); }
|
||||
| GETLINE { $$ = op3(GETLINE, NIL, NIL, NIL); }
|
||||
| INDEX '(' pattern comma pattern ')'
|
||||
{ $$ = op2(INDEX, $3, $5); }
|
||||
| INDEX '(' pattern comma reg_expr ')'
|
||||
{ SYNTAX("index() doesn't permit regular expressions");
|
||||
$$ = op2(INDEX, $3, (Node*)$5); }
|
||||
| '(' pattern ')' { $$ = $2; }
|
||||
| MATCHFCN '(' pattern comma reg_expr ')'
|
||||
{ $$ = op3(MATCHFCN, NIL, $3, (Node*)makedfa($5, 1)); }
|
||||
| MATCHFCN '(' pattern comma pattern ')'
|
||||
{ if (constnode($5))
|
||||
$$ = op3(MATCHFCN, NIL, $3, (Node*)makedfa(strnode($5), 1));
|
||||
else
|
||||
$$ = op3(MATCHFCN, (Node *)1, $3, $5); }
|
||||
| NUMBER { $$ = celltonode($1, CCON); }
|
||||
| SPLIT '(' pattern comma varname comma pattern ')' /* string */
|
||||
{ $$ = op4(SPLIT, $3, makearr($5), $7, (Node*)STRING); }
|
||||
| SPLIT '(' pattern comma varname comma reg_expr ')' /* const /regexp/ */
|
||||
{ $$ = op4(SPLIT, $3, makearr($5), (Node*)makedfa($7, 1), (Node *)REGEXPR); }
|
||||
| SPLIT '(' pattern comma varname ')'
|
||||
{ $$ = op4(SPLIT, $3, makearr($5), NIL, (Node*)STRING); } /* default */
|
||||
| SPRINTF '(' patlist ')' { $$ = op1($1, $3); }
|
||||
| STRING { $$ = celltonode($1, CCON); }
|
||||
| subop '(' reg_expr comma pattern ')'
|
||||
{ $$ = op4($1, NIL, (Node*)makedfa($3, 1), $5, rectonode()); }
|
||||
| subop '(' pattern comma pattern ')'
|
||||
{ if (constnode($3))
|
||||
$$ = op4($1, NIL, (Node*)makedfa(strnode($3), 1), $5, rectonode());
|
||||
else
|
||||
$$ = op4($1, (Node *)1, $3, $5, rectonode()); }
|
||||
| subop '(' reg_expr comma pattern comma var ')'
|
||||
{ $$ = op4($1, NIL, (Node*)makedfa($3, 1), $5, $7); }
|
||||
| subop '(' pattern comma pattern comma var ')'
|
||||
{ if (constnode($3))
|
||||
$$ = op4($1, NIL, (Node*)makedfa(strnode($3), 1), $5, $7);
|
||||
else
|
||||
$$ = op4($1, (Node *)1, $3, $5, $7); }
|
||||
| SUBSTR '(' pattern comma pattern comma pattern ')'
|
||||
{ $$ = op3(SUBSTR, $3, $5, $7); }
|
||||
| SUBSTR '(' pattern comma pattern ')'
|
||||
{ $$ = op3(SUBSTR, $3, $5, NIL); }
|
||||
| var
|
||||
;
|
||||
|
||||
var:
|
||||
varname
|
||||
| varname '[' patlist ']' { $$ = op2(ARRAY, makearr($1), $3); }
|
||||
| IVAR { $$ = op1(INDIRECT, celltonode($1, CVAR)); }
|
||||
| INDIRECT term { $$ = op1(INDIRECT, $2); }
|
||||
;
|
||||
|
||||
varlist:
|
||||
/* nothing */ { arglist = $$ = 0; }
|
||||
| VAR { arglist = $$ = celltonode($1,CVAR); }
|
||||
| varlist comma VAR {
|
||||
checkdup($1, $3);
|
||||
arglist = $$ = linkum($1,celltonode($3,CVAR)); }
|
||||
;
|
||||
|
||||
varname:
|
||||
VAR { $$ = celltonode($1, CVAR); }
|
||||
| ARG { $$ = op1(ARG, itonp($1)); }
|
||||
| VARNF { $$ = op1(VARNF, (Node *) $1); }
|
||||
;
|
||||
|
||||
|
||||
while:
|
||||
WHILE '(' pattern rparen { $$ = notnull($3); }
|
||||
;
|
||||
|
||||
%%
|
||||
|
||||
void setfname(Cell *p)
|
||||
{
|
||||
if (isarr(p))
|
||||
SYNTAX("%s is an array, not a function", p->nval);
|
||||
else if (isfcn(p))
|
||||
SYNTAX("you can't define function %s more than once", p->nval);
|
||||
curfname = p->nval;
|
||||
}
|
||||
|
||||
int constnode(Node *p)
|
||||
{
|
||||
return isvalue(p) && ((Cell *) (p->narg[0]))->csub == CCON;
|
||||
}
|
||||
|
||||
char *strnode(Node *p)
|
||||
{
|
||||
return ((Cell *)(p->narg[0]))->sval;
|
||||
}
|
||||
|
||||
Node *notnull(Node *n)
|
||||
{
|
||||
switch (n->nobj) {
|
||||
case LE: case LT: case EQ: case NE: case GT: case GE:
|
||||
case BOR: case AND: case NOT:
|
||||
return n;
|
||||
default:
|
||||
return op2(NE, n, nullnode);
|
||||
}
|
||||
}
|
||||
|
||||
void checkdup(Node *vl, Cell *cp) /* check if name already in list */
|
||||
{
|
||||
char *s = cp->nval;
|
||||
for ( ; vl; vl = vl->nnext) {
|
||||
if (strcmp(s, ((Cell *)(vl->narg[0]))->nval) == 0) {
|
||||
SYNTAX("duplicate argument %s", s);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,954 @@
|
|||
/****************************************************************
|
||||
Copyright (C) Lucent Technologies 1997
|
||||
All Rights Reserved
|
||||
|
||||
Permission to use, copy, modify, and distribute this software and
|
||||
its documentation for any purpose and without fee is hereby
|
||||
granted, provided that the above copyright notice appear in all
|
||||
copies and that both that the copyright notice and this
|
||||
permission notice and warranty disclaimer appear in supporting
|
||||
documentation, and that the name Lucent Technologies or any of
|
||||
its entities not be used in advertising or publicity pertaining
|
||||
to distribution of the software without specific, written prior
|
||||
permission.
|
||||
|
||||
LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
|
||||
INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
|
||||
IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
|
||||
SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
|
||||
IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
|
||||
ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
|
||||
THIS SOFTWARE.
|
||||
****************************************************************/
|
||||
|
||||
/* lasciate ogne speranza, voi ch'intrate. */
|
||||
|
||||
#define DEBUG
|
||||
|
||||
#include <ctype.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include "awk.h"
|
||||
#include "ytab.h"
|
||||
|
||||
#define HAT (NCHARS+2) /* matches ^ in regular expr */
|
||||
/* NCHARS is 2**n */
|
||||
#define MAXLIN 22
|
||||
|
||||
#define type(v) (v)->nobj /* badly overloaded here */
|
||||
#define info(v) (v)->ntype /* badly overloaded here */
|
||||
#define left(v) (v)->narg[0]
|
||||
#define right(v) (v)->narg[1]
|
||||
#define parent(v) (v)->nnext
|
||||
|
||||
#define LEAF case CCL: case NCCL: case CHAR: case DOT: case FINAL: case ALL:
|
||||
#define ELEAF case EMPTYRE: /* empty string in regexp */
|
||||
#define UNARY case STAR: case PLUS: case QUEST:
|
||||
|
||||
/* encoding in tree Nodes:
|
||||
leaf (CCL, NCCL, CHAR, DOT, FINAL, ALL, EMPTYRE):
|
||||
left is index, right contains value or pointer to value
|
||||
unary (STAR, PLUS, QUEST): left is child, right is null
|
||||
binary (CAT, OR): left and right are children
|
||||
parent contains pointer to parent
|
||||
*/
|
||||
|
||||
|
||||
int *setvec;
|
||||
int *tmpset;
|
||||
int maxsetvec = 0;
|
||||
|
||||
int rtok; /* next token in current re */
|
||||
int rlxval;
|
||||
static uschar *rlxstr;
|
||||
static uschar *prestr; /* current position in current re */
|
||||
static uschar *lastre; /* origin of last re */
|
||||
|
||||
static int setcnt;
|
||||
static int poscnt;
|
||||
|
||||
char *patbeg;
|
||||
int patlen;
|
||||
|
||||
#define NFA 20 /* cache this many dynamic fa's */
|
||||
fa *fatab[NFA];
|
||||
int nfatab = 0; /* entries in fatab */
|
||||
|
||||
fa *makedfa(const char *s, int anchor) /* returns dfa for reg expr s */
|
||||
{
|
||||
int i, use, nuse;
|
||||
fa *pfa;
|
||||
static int now = 1;
|
||||
|
||||
if (setvec == 0) { /* first time through any RE */
|
||||
maxsetvec = MAXLIN;
|
||||
setvec = (int *) malloc(maxsetvec * sizeof(int));
|
||||
tmpset = (int *) malloc(maxsetvec * sizeof(int));
|
||||
if (setvec == 0 || tmpset == 0)
|
||||
overflo("out of space initializing makedfa");
|
||||
}
|
||||
|
||||
if (compile_time) /* a constant for sure */
|
||||
return mkdfa(s, anchor);
|
||||
for (i = 0; i < nfatab; i++) /* is it there already? */
|
||||
if (fatab[i]->anchor == anchor
|
||||
&& strcmp((const char *) fatab[i]->restr, s) == 0) {
|
||||
fatab[i]->use = now++;
|
||||
return fatab[i];
|
||||
}
|
||||
pfa = mkdfa(s, anchor);
|
||||
if (nfatab < NFA) { /* room for another */
|
||||
fatab[nfatab] = pfa;
|
||||
fatab[nfatab]->use = now++;
|
||||
nfatab++;
|
||||
return pfa;
|
||||
}
|
||||
use = fatab[0]->use; /* replace least-recently used */
|
||||
nuse = 0;
|
||||
for (i = 1; i < nfatab; i++)
|
||||
if (fatab[i]->use < use) {
|
||||
use = fatab[i]->use;
|
||||
nuse = i;
|
||||
}
|
||||
freefa(fatab[nuse]);
|
||||
fatab[nuse] = pfa;
|
||||
pfa->use = now++;
|
||||
return pfa;
|
||||
}
|
||||
|
||||
fa *mkdfa(const char *s, int anchor) /* does the real work of making a dfa */
|
||||
/* anchor = 1 for anchored matches, else 0 */
|
||||
{
|
||||
Node *p, *p1;
|
||||
fa *f;
|
||||
|
||||
p = reparse(s);
|
||||
p1 = op2(CAT, op2(STAR, op2(ALL, NIL, NIL), NIL), p);
|
||||
/* put ALL STAR in front of reg. exp. */
|
||||
p1 = op2(CAT, p1, op2(FINAL, NIL, NIL));
|
||||
/* put FINAL after reg. exp. */
|
||||
|
||||
poscnt = 0;
|
||||
penter(p1); /* enter parent pointers and leaf indices */
|
||||
if ((f = (fa *) calloc(1, sizeof(fa) + poscnt*sizeof(rrow))) == NULL)
|
||||
overflo("out of space for fa");
|
||||
f->accept = poscnt-1; /* penter has computed number of positions in re */
|
||||
cfoll(f, p1); /* set up follow sets */
|
||||
freetr(p1);
|
||||
if ((f->posns[0] = (int *) calloc(1, *(f->re[0].lfollow)*sizeof(int))) == NULL)
|
||||
overflo("out of space in makedfa");
|
||||
if ((f->posns[1] = (int *) calloc(1, sizeof(int))) == NULL)
|
||||
overflo("out of space in makedfa");
|
||||
*f->posns[1] = 0;
|
||||
f->initstat = makeinit(f, anchor);
|
||||
f->anchor = anchor;
|
||||
f->restr = (uschar *) tostring(s);
|
||||
return f;
|
||||
}
|
||||
|
||||
int makeinit(fa *f, int anchor)
|
||||
{
|
||||
int i, k;
|
||||
|
||||
f->curstat = 2;
|
||||
f->out[2] = 0;
|
||||
f->reset = 0;
|
||||
k = *(f->re[0].lfollow);
|
||||
xfree(f->posns[2]);
|
||||
if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL)
|
||||
overflo("out of space in makeinit");
|
||||
for (i=0; i <= k; i++) {
|
||||
(f->posns[2])[i] = (f->re[0].lfollow)[i];
|
||||
}
|
||||
if ((f->posns[2])[1] == f->accept)
|
||||
f->out[2] = 1;
|
||||
for (i=0; i < NCHARS; i++)
|
||||
f->gototab[2][i] = 0;
|
||||
f->curstat = cgoto(f, 2, HAT);
|
||||
if (anchor) {
|
||||
*f->posns[2] = k-1; /* leave out position 0 */
|
||||
for (i=0; i < k; i++) {
|
||||
(f->posns[0])[i] = (f->posns[2])[i];
|
||||
}
|
||||
|
||||
f->out[0] = f->out[2];
|
||||
if (f->curstat != 2)
|
||||
--(*f->posns[f->curstat]);
|
||||
}
|
||||
return f->curstat;
|
||||
}
|
||||
|
||||
void penter(Node *p) /* set up parent pointers and leaf indices */
|
||||
{
|
||||
switch (type(p)) {
|
||||
ELEAF
|
||||
LEAF
|
||||
info(p) = poscnt;
|
||||
poscnt++;
|
||||
break;
|
||||
UNARY
|
||||
penter(left(p));
|
||||
parent(left(p)) = p;
|
||||
break;
|
||||
case CAT:
|
||||
case OR:
|
||||
penter(left(p));
|
||||
penter(right(p));
|
||||
parent(left(p)) = p;
|
||||
parent(right(p)) = p;
|
||||
break;
|
||||
default: /* can't happen */
|
||||
FATAL("can't happen: unknown type %d in penter", type(p));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void freetr(Node *p) /* free parse tree */
|
||||
{
|
||||
switch (type(p)) {
|
||||
ELEAF
|
||||
LEAF
|
||||
xfree(p);
|
||||
break;
|
||||
UNARY
|
||||
freetr(left(p));
|
||||
xfree(p);
|
||||
break;
|
||||
case CAT:
|
||||
case OR:
|
||||
freetr(left(p));
|
||||
freetr(right(p));
|
||||
xfree(p);
|
||||
break;
|
||||
default: /* can't happen */
|
||||
FATAL("can't happen: unknown type %d in freetr", type(p));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* in the parsing of regular expressions, metacharacters like . have */
|
||||
/* to be seen literally; \056 is not a metacharacter. */
|
||||
|
||||
int hexstr(char **pp) /* find and eval hex string at pp, return new p */
|
||||
{ /* only pick up one 8-bit byte (2 chars) */
|
||||
uschar *p;
|
||||
int n = 0;
|
||||
int i;
|
||||
|
||||
for (i = 0, p = (uschar *) *pp; i < 2 && isxdigit(*p); i++, p++) {
|
||||
if (isdigit(*p))
|
||||
n = 16 * n + *p - '0';
|
||||
else if (*p >= 'a' && *p <= 'f')
|
||||
n = 16 * n + *p - 'a' + 10;
|
||||
else if (*p >= 'A' && *p <= 'F')
|
||||
n = 16 * n + *p - 'A' + 10;
|
||||
}
|
||||
*pp = (char *) p;
|
||||
return n;
|
||||
}
|
||||
|
||||
#define isoctdigit(c) ((c) >= '0' && (c) <= '7') /* multiple use of arg */
|
||||
|
||||
int quoted(char **pp) /* pick up next thing after a \\ */
|
||||
/* and increment *pp */
|
||||
{
|
||||
char *p = *pp;
|
||||
int c;
|
||||
|
||||
if ((c = *p++) == 't')
|
||||
c = '\t';
|
||||
else if (c == 'n')
|
||||
c = '\n';
|
||||
else if (c == 'f')
|
||||
c = '\f';
|
||||
else if (c == 'r')
|
||||
c = '\r';
|
||||
else if (c == 'b')
|
||||
c = '\b';
|
||||
else if (c == '\\')
|
||||
c = '\\';
|
||||
else if (c == 'x') { /* hexadecimal goo follows */
|
||||
c = hexstr(&p); /* this adds a null if number is invalid */
|
||||
} else if (isoctdigit(c)) { /* \d \dd \ddd */
|
||||
int n = c - '0';
|
||||
if (isoctdigit(*p)) {
|
||||
n = 8 * n + *p++ - '0';
|
||||
if (isoctdigit(*p))
|
||||
n = 8 * n + *p++ - '0';
|
||||
}
|
||||
c = n;
|
||||
} /* else */
|
||||
/* c = c; */
|
||||
*pp = p;
|
||||
return c;
|
||||
}
|
||||
|
||||
char *cclenter(const char *argp) /* add a character class */
|
||||
{
|
||||
int i, c, c2;
|
||||
uschar *p = (uschar *) argp;
|
||||
uschar *op, *bp;
|
||||
static uschar *buf = 0;
|
||||
static int bufsz = 100;
|
||||
|
||||
op = p;
|
||||
if (buf == 0 && (buf = (uschar *) malloc(bufsz)) == NULL)
|
||||
FATAL("out of space for character class [%.10s...] 1", p);
|
||||
bp = buf;
|
||||
for (i = 0; (c = *p++) != 0; ) {
|
||||
if (c == '\\') {
|
||||
c = quoted((char **) &p);
|
||||
} else if (c == '-' && i > 0 && bp[-1] != 0) {
|
||||
if (*p != 0) {
|
||||
c = bp[-1];
|
||||
c2 = *p++;
|
||||
if (c2 == '\\')
|
||||
c2 = quoted((char **) &p);
|
||||
if (c > c2) { /* empty; ignore */
|
||||
bp--;
|
||||
i--;
|
||||
continue;
|
||||
}
|
||||
while (c < c2) {
|
||||
if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, "cclenter1"))
|
||||
FATAL("out of space for character class [%.10s...] 2", p);
|
||||
*bp++ = ++c;
|
||||
i++;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, "cclenter2"))
|
||||
FATAL("out of space for character class [%.10s...] 3", p);
|
||||
*bp++ = c;
|
||||
i++;
|
||||
}
|
||||
*bp = 0;
|
||||
dprintf( ("cclenter: in = |%s|, out = |%s|\n", op, buf) );
|
||||
xfree(op);
|
||||
return (char *) tostring((char *) buf);
|
||||
}
|
||||
|
||||
void overflo(const char *s)
|
||||
{
|
||||
FATAL("regular expression too big: %.30s...", s);
|
||||
}
|
||||
|
||||
void cfoll(fa *f, Node *v) /* enter follow set of each leaf of vertex v into lfollow[leaf] */
|
||||
{
|
||||
int i;
|
||||
int *p;
|
||||
|
||||
switch (type(v)) {
|
||||
ELEAF
|
||||
LEAF
|
||||
f->re[info(v)].ltype = type(v);
|
||||
f->re[info(v)].lval.np = right(v);
|
||||
while (f->accept >= maxsetvec) { /* guessing here! */
|
||||
maxsetvec *= 4;
|
||||
setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
|
||||
tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
|
||||
if (setvec == 0 || tmpset == 0)
|
||||
overflo("out of space in cfoll()");
|
||||
}
|
||||
for (i = 0; i <= f->accept; i++)
|
||||
setvec[i] = 0;
|
||||
setcnt = 0;
|
||||
follow(v); /* computes setvec and setcnt */
|
||||
if ((p = (int *) calloc(1, (setcnt+1)*sizeof(int))) == NULL)
|
||||
overflo("out of space building follow set");
|
||||
f->re[info(v)].lfollow = p;
|
||||
*p = setcnt;
|
||||
for (i = f->accept; i >= 0; i--)
|
||||
if (setvec[i] == 1)
|
||||
*++p = i;
|
||||
break;
|
||||
UNARY
|
||||
cfoll(f,left(v));
|
||||
break;
|
||||
case CAT:
|
||||
case OR:
|
||||
cfoll(f,left(v));
|
||||
cfoll(f,right(v));
|
||||
break;
|
||||
default: /* can't happen */
|
||||
FATAL("can't happen: unknown type %d in cfoll", type(v));
|
||||
}
|
||||
}
|
||||
|
||||
int first(Node *p) /* collects initially active leaves of p into setvec */
|
||||
/* returns 0 if p matches empty string */
|
||||
{
|
||||
int b, lp;
|
||||
|
||||
switch (type(p)) {
|
||||
ELEAF
|
||||
LEAF
|
||||
lp = info(p); /* look for high-water mark of subscripts */
|
||||
while (setcnt >= maxsetvec || lp >= maxsetvec) { /* guessing here! */
|
||||
maxsetvec *= 4;
|
||||
setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
|
||||
tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
|
||||
if (setvec == 0 || tmpset == 0)
|
||||
overflo("out of space in first()");
|
||||
}
|
||||
if (type(p) == EMPTYRE) {
|
||||
setvec[lp] = 0;
|
||||
return(0);
|
||||
}
|
||||
if (setvec[lp] != 1) {
|
||||
setvec[lp] = 1;
|
||||
setcnt++;
|
||||
}
|
||||
if (type(p) == CCL && (*(char *) right(p)) == '\0')
|
||||
return(0); /* empty CCL */
|
||||
else return(1);
|
||||
case PLUS:
|
||||
if (first(left(p)) == 0) return(0);
|
||||
return(1);
|
||||
case STAR:
|
||||
case QUEST:
|
||||
first(left(p));
|
||||
return(0);
|
||||
case CAT:
|
||||
if (first(left(p)) == 0 && first(right(p)) == 0) return(0);
|
||||
return(1);
|
||||
case OR:
|
||||
b = first(right(p));
|
||||
if (first(left(p)) == 0 || b == 0) return(0);
|
||||
return(1);
|
||||
}
|
||||
FATAL("can't happen: unknown type %d in first", type(p)); /* can't happen */
|
||||
return(-1);
|
||||
}
|
||||
|
||||
void follow(Node *v) /* collects leaves that can follow v into setvec */
|
||||
{
|
||||
Node *p;
|
||||
|
||||
if (type(v) == FINAL)
|
||||
return;
|
||||
p = parent(v);
|
||||
switch (type(p)) {
|
||||
case STAR:
|
||||
case PLUS:
|
||||
first(v);
|
||||
follow(p);
|
||||
return;
|
||||
|
||||
case OR:
|
||||
case QUEST:
|
||||
follow(p);
|
||||
return;
|
||||
|
||||
case CAT:
|
||||
if (v == left(p)) { /* v is left child of p */
|
||||
if (first(right(p)) == 0) {
|
||||
follow(p);
|
||||
return;
|
||||
}
|
||||
} else /* v is right child */
|
||||
follow(p);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
int member(int c, const char *sarg) /* is c in s? */
|
||||
{
|
||||
uschar *s = (uschar *) sarg;
|
||||
|
||||
while (*s)
|
||||
if (c == *s++)
|
||||
return(1);
|
||||
return(0);
|
||||
}
|
||||
|
||||
int match(fa *f, const char *p0) /* shortest match ? */
|
||||
{
|
||||
int s, ns;
|
||||
uschar *p = (uschar *) p0;
|
||||
|
||||
s = f->reset ? makeinit(f,0) : f->initstat;
|
||||
if (f->out[s])
|
||||
return(1);
|
||||
do {
|
||||
/* assert(*p < NCHARS); */
|
||||
if ((ns = f->gototab[s][*p]) != 0)
|
||||
s = ns;
|
||||
else
|
||||
s = cgoto(f, s, *p);
|
||||
if (f->out[s])
|
||||
return(1);
|
||||
} while (*p++ != 0);
|
||||
return(0);
|
||||
}
|
||||
|
||||
int pmatch(fa *f, const char *p0) /* longest match, for sub */
|
||||
{
|
||||
int s, ns;
|
||||
uschar *p = (uschar *) p0;
|
||||
uschar *q;
|
||||
int i, k;
|
||||
|
||||
/* s = f->reset ? makeinit(f,1) : f->initstat; */
|
||||
if (f->reset) {
|
||||
f->initstat = s = makeinit(f,1);
|
||||
} else {
|
||||
s = f->initstat;
|
||||
}
|
||||
patbeg = (char *) p;
|
||||
patlen = -1;
|
||||
do {
|
||||
q = p;
|
||||
do {
|
||||
if (f->out[s]) /* final state */
|
||||
patlen = q-p;
|
||||
/* assert(*q < NCHARS); */
|
||||
if ((ns = f->gototab[s][*q]) != 0)
|
||||
s = ns;
|
||||
else
|
||||
s = cgoto(f, s, *q);
|
||||
if (s == 1) { /* no transition */
|
||||
if (patlen >= 0) {
|
||||
patbeg = (char *) p;
|
||||
return(1);
|
||||
}
|
||||
else
|
||||
goto nextin; /* no match */
|
||||
}
|
||||
} while (*q++ != 0);
|
||||
if (f->out[s])
|
||||
patlen = q-p-1; /* don't count $ */
|
||||
if (patlen >= 0) {
|
||||
patbeg = (char *) p;
|
||||
return(1);
|
||||
}
|
||||
nextin:
|
||||
s = 2;
|
||||
if (f->reset) {
|
||||
for (i = 2; i <= f->curstat; i++)
|
||||
xfree(f->posns[i]);
|
||||
k = *f->posns[0];
|
||||
if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL)
|
||||
overflo("out of space in pmatch");
|
||||
for (i = 0; i <= k; i++)
|
||||
(f->posns[2])[i] = (f->posns[0])[i];
|
||||
f->initstat = f->curstat = 2;
|
||||
f->out[2] = f->out[0];
|
||||
for (i = 0; i < NCHARS; i++)
|
||||
f->gototab[2][i] = 0;
|
||||
}
|
||||
} while (*p++ != 0);
|
||||
return (0);
|
||||
}
|
||||
|
||||
int nematch(fa *f, const char *p0) /* non-empty match, for sub */
|
||||
{
|
||||
int s, ns;
|
||||
uschar *p = (uschar *) p0;
|
||||
uschar *q;
|
||||
int i, k;
|
||||
|
||||
/* s = f->reset ? makeinit(f,1) : f->initstat; */
|
||||
if (f->reset) {
|
||||
f->initstat = s = makeinit(f,1);
|
||||
} else {
|
||||
s = f->initstat;
|
||||
}
|
||||
patlen = -1;
|
||||
while (*p) {
|
||||
q = p;
|
||||
do {
|
||||
if (f->out[s]) /* final state */
|
||||
patlen = q-p;
|
||||
/* assert(*q < NCHARS); */
|
||||
if ((ns = f->gototab[s][*q]) != 0)
|
||||
s = ns;
|
||||
else
|
||||
s = cgoto(f, s, *q);
|
||||
if (s == 1) { /* no transition */
|
||||
if (patlen > 0) {
|
||||
patbeg = (char *) p;
|
||||
return(1);
|
||||
} else
|
||||
goto nnextin; /* no nonempty match */
|
||||
}
|
||||
} while (*q++ != 0);
|
||||
if (f->out[s])
|
||||
patlen = q-p-1; /* don't count $ */
|
||||
if (patlen > 0 ) {
|
||||
patbeg = (char *) p;
|
||||
return(1);
|
||||
}
|
||||
nnextin:
|
||||
s = 2;
|
||||
if (f->reset) {
|
||||
for (i = 2; i <= f->curstat; i++)
|
||||
xfree(f->posns[i]);
|
||||
k = *f->posns[0];
|
||||
if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL)
|
||||
overflo("out of state space");
|
||||
for (i = 0; i <= k; i++)
|
||||
(f->posns[2])[i] = (f->posns[0])[i];
|
||||
f->initstat = f->curstat = 2;
|
||||
f->out[2] = f->out[0];
|
||||
for (i = 0; i < NCHARS; i++)
|
||||
f->gototab[2][i] = 0;
|
||||
}
|
||||
p++;
|
||||
}
|
||||
return (0);
|
||||
}
|
||||
|
||||
Node *reparse(const char *p) /* parses regular expression pointed to by p */
|
||||
{ /* uses relex() to scan regular expression */
|
||||
Node *np;
|
||||
|
||||
dprintf( ("reparse <%s>\n", p) );
|
||||
lastre = prestr = (uschar *) p; /* prestr points to string to be parsed */
|
||||
rtok = relex();
|
||||
/* GNU compatibility: an empty regexp matches anything */
|
||||
if (rtok == '\0') {
|
||||
/* FATAL("empty regular expression"); previous */
|
||||
return(op2(EMPTYRE, NIL, NIL));
|
||||
}
|
||||
np = regexp();
|
||||
if (rtok != '\0')
|
||||
FATAL("syntax error in regular expression %s at %s", lastre, prestr);
|
||||
return(np);
|
||||
}
|
||||
|
||||
Node *regexp(void) /* top-level parse of reg expr */
|
||||
{
|
||||
return (alt(concat(primary())));
|
||||
}
|
||||
|
||||
Node *primary(void)
|
||||
{
|
||||
Node *np;
|
||||
|
||||
switch (rtok) {
|
||||
case CHAR:
|
||||
np = op2(CHAR, NIL, itonp(rlxval));
|
||||
rtok = relex();
|
||||
return (unary(np));
|
||||
case ALL:
|
||||
rtok = relex();
|
||||
return (unary(op2(ALL, NIL, NIL)));
|
||||
case EMPTYRE:
|
||||
rtok = relex();
|
||||
return (unary(op2(ALL, NIL, NIL)));
|
||||
case DOT:
|
||||
rtok = relex();
|
||||
return (unary(op2(DOT, NIL, NIL)));
|
||||
case CCL:
|
||||
np = op2(CCL, NIL, (Node*) cclenter((char *) rlxstr));
|
||||
rtok = relex();
|
||||
return (unary(np));
|
||||
case NCCL:
|
||||
np = op2(NCCL, NIL, (Node *) cclenter((char *) rlxstr));
|
||||
rtok = relex();
|
||||
return (unary(np));
|
||||
case '^':
|
||||
rtok = relex();
|
||||
return (unary(op2(CHAR, NIL, itonp(HAT))));
|
||||
case '$':
|
||||
rtok = relex();
|
||||
return (unary(op2(CHAR, NIL, NIL)));
|
||||
case '(':
|
||||
rtok = relex();
|
||||
if (rtok == ')') { /* special pleading for () */
|
||||
rtok = relex();
|
||||
return unary(op2(CCL, NIL, (Node *) tostring("")));
|
||||
}
|
||||
np = regexp();
|
||||
if (rtok == ')') {
|
||||
rtok = relex();
|
||||
return (unary(np));
|
||||
}
|
||||
else
|
||||
FATAL("syntax error in regular expression %s at %s", lastre, prestr);
|
||||
default:
|
||||
FATAL("illegal primary in regular expression %s at %s", lastre, prestr);
|
||||
}
|
||||
return 0; /*NOTREACHED*/
|
||||
}
|
||||
|
||||
Node *concat(Node *np)
|
||||
{
|
||||
switch (rtok) {
|
||||
case CHAR: case DOT: case ALL: case EMPTYRE: case CCL: case NCCL: case '$': case '(':
|
||||
return (concat(op2(CAT, np, primary())));
|
||||
}
|
||||
return (np);
|
||||
}
|
||||
|
||||
Node *alt(Node *np)
|
||||
{
|
||||
if (rtok == OR) {
|
||||
rtok = relex();
|
||||
return (alt(op2(OR, np, concat(primary()))));
|
||||
}
|
||||
return (np);
|
||||
}
|
||||
|
||||
Node *unary(Node *np)
|
||||
{
|
||||
switch (rtok) {
|
||||
case STAR:
|
||||
rtok = relex();
|
||||
return (unary(op2(STAR, np, NIL)));
|
||||
case PLUS:
|
||||
rtok = relex();
|
||||
return (unary(op2(PLUS, np, NIL)));
|
||||
case QUEST:
|
||||
rtok = relex();
|
||||
return (unary(op2(QUEST, np, NIL)));
|
||||
default:
|
||||
return (np);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Character class definitions conformant to the POSIX locale as
|
||||
* defined in IEEE P1003.1 draft 7 of June 2001, assuming the source
|
||||
* and operating character sets are both ASCII (ISO646) or supersets
|
||||
* thereof.
|
||||
*
|
||||
* Note that to avoid overflowing the temporary buffer used in
|
||||
* relex(), the expanded character class (prior to range expansion)
|
||||
* must be less than twice the size of their full name.
|
||||
*/
|
||||
|
||||
/* Because isblank doesn't show up in any of the header files on any
|
||||
* system i use, it's defined here. if some other locale has a richer
|
||||
* definition of "blank", define HAS_ISBLANK and provide your own
|
||||
* version.
|
||||
* the parentheses here are an attempt to find a path through the maze
|
||||
* of macro definition and/or function and/or version provided. thanks
|
||||
* to nelson beebe for the suggestion; let's see if it works everywhere.
|
||||
*/
|
||||
|
||||
/* #define HAS_ISBLANK */
|
||||
#ifndef HAS_ISBLANK
|
||||
|
||||
int (xisblank)(int c)
|
||||
{
|
||||
return c==' ' || c=='\t';
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
struct charclass {
|
||||
const char *cc_name;
|
||||
int cc_namelen;
|
||||
int (*cc_func)(int);
|
||||
} charclasses[] = {
|
||||
{ "alnum", 5, isalnum },
|
||||
{ "alpha", 5, isalpha },
|
||||
{ "blank", 5, isspace }, /* was isblank */
|
||||
{ "cntrl", 5, iscntrl },
|
||||
{ "digit", 5, isdigit },
|
||||
{ "graph", 5, isgraph },
|
||||
{ "lower", 5, islower },
|
||||
{ "print", 5, isprint },
|
||||
{ "punct", 5, ispunct },
|
||||
{ "space", 5, isspace },
|
||||
{ "upper", 5, isupper },
|
||||
{ "xdigit", 6, isxdigit },
|
||||
{ NULL, 0, NULL },
|
||||
};
|
||||
|
||||
|
||||
int relex(void) /* lexical analyzer for reparse */
|
||||
{
|
||||
int c, n;
|
||||
int cflag;
|
||||
static uschar *buf = 0;
|
||||
static int bufsz = 100;
|
||||
uschar *bp;
|
||||
struct charclass *cc;
|
||||
int i;
|
||||
|
||||
switch (c = *prestr++) {
|
||||
case '|': return OR;
|
||||
case '*': return STAR;
|
||||
case '+': return PLUS;
|
||||
case '?': return QUEST;
|
||||
case '.': return DOT;
|
||||
case '\0': prestr--; return '\0';
|
||||
case '^':
|
||||
case '$':
|
||||
case '(':
|
||||
case ')':
|
||||
return c;
|
||||
case '\\':
|
||||
rlxval = quoted((char **) &prestr);
|
||||
return CHAR;
|
||||
default:
|
||||
rlxval = c;
|
||||
return CHAR;
|
||||
case '[':
|
||||
if (buf == 0 && (buf = (uschar *) malloc(bufsz)) == NULL)
|
||||
FATAL("out of space in reg expr %.10s..", lastre);
|
||||
bp = buf;
|
||||
if (*prestr == '^') {
|
||||
cflag = 1;
|
||||
prestr++;
|
||||
}
|
||||
else
|
||||
cflag = 0;
|
||||
n = 2 * strlen((const char *) prestr)+1;
|
||||
if (!adjbuf((char **) &buf, &bufsz, n, n, (char **) &bp, "relex1"))
|
||||
FATAL("out of space for reg expr %.10s...", lastre);
|
||||
for (; ; ) {
|
||||
if ((c = *prestr++) == '\\') {
|
||||
*bp++ = '\\';
|
||||
if ((c = *prestr++) == '\0')
|
||||
FATAL("nonterminated character class %.20s...", lastre);
|
||||
*bp++ = c;
|
||||
/* } else if (c == '\n') { */
|
||||
/* FATAL("newline in character class %.20s...", lastre); */
|
||||
} else if (c == '[' && *prestr == ':') {
|
||||
/* POSIX char class names, Dag-Erling Smorgrav, des@ofug.org */
|
||||
for (cc = charclasses; cc->cc_name; cc++)
|
||||
if (strncmp((const char *) prestr + 1, (const char *) cc->cc_name, cc->cc_namelen) == 0)
|
||||
break;
|
||||
if (cc->cc_name != NULL && prestr[1 + cc->cc_namelen] == ':' &&
|
||||
prestr[2 + cc->cc_namelen] == ']') {
|
||||
prestr += cc->cc_namelen + 3;
|
||||
for (i = 0; i < NCHARS; i++) {
|
||||
if (!adjbuf((char **) &buf, &bufsz, bp-buf+1, 100, (char **) &bp, "relex2"))
|
||||
FATAL("out of space for reg expr %.10s...", lastre);
|
||||
if (cc->cc_func(i)) {
|
||||
*bp++ = i;
|
||||
n++;
|
||||
}
|
||||
}
|
||||
} else
|
||||
*bp++ = c;
|
||||
} else if (c == '\0') {
|
||||
FATAL("nonterminated character class %.20s", lastre);
|
||||
} else if (bp == buf) { /* 1st char is special */
|
||||
*bp++ = c;
|
||||
} else if (c == ']') {
|
||||
*bp++ = 0;
|
||||
rlxstr = (uschar *) tostring((char *) buf);
|
||||
if (cflag == 0)
|
||||
return CCL;
|
||||
else
|
||||
return NCCL;
|
||||
} else
|
||||
*bp++ = c;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int cgoto(fa *f, int s, int c)
|
||||
{
|
||||
int i, j, k;
|
||||
int *p, *q;
|
||||
|
||||
assert(c == HAT || c < NCHARS);
|
||||
while (f->accept >= maxsetvec) { /* guessing here! */
|
||||
maxsetvec *= 4;
|
||||
setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
|
||||
tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
|
||||
if (setvec == 0 || tmpset == 0)
|
||||
overflo("out of space in cgoto()");
|
||||
}
|
||||
for (i = 0; i <= f->accept; i++)
|
||||
setvec[i] = 0;
|
||||
setcnt = 0;
|
||||
/* compute positions of gototab[s,c] into setvec */
|
||||
p = f->posns[s];
|
||||
for (i = 1; i <= *p; i++) {
|
||||
if ((k = f->re[p[i]].ltype) != FINAL) {
|
||||
if ((k == CHAR && c == ptoi(f->re[p[i]].lval.np))
|
||||
|| (k == DOT && c != 0 && c != HAT)
|
||||
|| (k == ALL && c != 0)
|
||||
|| (k == EMPTYRE && c != 0)
|
||||
|| (k == CCL && member(c, (char *) f->re[p[i]].lval.up))
|
||||
|| (k == NCCL && !member(c, (char *) f->re[p[i]].lval.up) && c != 0 && c != HAT)) {
|
||||
q = f->re[p[i]].lfollow;
|
||||
for (j = 1; j <= *q; j++) {
|
||||
if (q[j] >= maxsetvec) {
|
||||
maxsetvec *= 4;
|
||||
setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
|
||||
tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
|
||||
if (setvec == 0 || tmpset == 0)
|
||||
overflo("cgoto overflow");
|
||||
}
|
||||
if (setvec[q[j]] == 0) {
|
||||
setcnt++;
|
||||
setvec[q[j]] = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
/* determine if setvec is a previous state */
|
||||
tmpset[0] = setcnt;
|
||||
j = 1;
|
||||
for (i = f->accept; i >= 0; i--)
|
||||
if (setvec[i]) {
|
||||
tmpset[j++] = i;
|
||||
}
|
||||
/* tmpset == previous state? */
|
||||
for (i = 1; i <= f->curstat; i++) {
|
||||
p = f->posns[i];
|
||||
if ((k = tmpset[0]) != p[0])
|
||||
goto different;
|
||||
for (j = 1; j <= k; j++)
|
||||
if (tmpset[j] != p[j])
|
||||
goto different;
|
||||
/* setvec is state i */
|
||||
f->gototab[s][c] = i;
|
||||
return i;
|
||||
different:;
|
||||
}
|
||||
|
||||
/* add tmpset to current set of states */
|
||||
if (f->curstat >= NSTATES-1) {
|
||||
f->curstat = 2;
|
||||
f->reset = 1;
|
||||
for (i = 2; i < NSTATES; i++)
|
||||
xfree(f->posns[i]);
|
||||
} else
|
||||
++(f->curstat);
|
||||
for (i = 0; i < NCHARS; i++)
|
||||
f->gototab[f->curstat][i] = 0;
|
||||
xfree(f->posns[f->curstat]);
|
||||
if ((p = (int *) calloc(1, (setcnt+1)*sizeof(int))) == NULL)
|
||||
overflo("out of space in cgoto");
|
||||
|
||||
f->posns[f->curstat] = p;
|
||||
f->gototab[s][c] = f->curstat;
|
||||
for (i = 0; i <= setcnt; i++)
|
||||
p[i] = tmpset[i];
|
||||
if (setvec[f->accept])
|
||||
f->out[f->curstat] = 1;
|
||||
else
|
||||
f->out[f->curstat] = 0;
|
||||
return f->curstat;
|
||||
}
|
||||
|
||||
|
||||
void freefa(fa *f) /* free a finite automaton */
|
||||
{
|
||||
int i;
|
||||
|
||||
if (f == NULL)
|
||||
return;
|
||||
for (i = 0; i <= f->curstat; i++)
|
||||
xfree(f->posns[i]);
|
||||
for (i = 0; i <= f->accept; i++) {
|
||||
xfree(f->re[i].lfollow);
|
||||
if (f->re[i].ltype == CCL || f->re[i].ltype == NCCL)
|
||||
xfree((f->re[i].lval.np));
|
||||
}
|
||||
xfree(f->restr);
|
||||
xfree(f);
|
||||
}
|
|
@ -0,0 +1,582 @@
|
|||
/****************************************************************
|
||||
Copyright (C) Lucent Technologies 1997
|
||||
All Rights Reserved
|
||||
|
||||
Permission to use, copy, modify, and distribute this software and
|
||||
its documentation for any purpose and without fee is hereby
|
||||
granted, provided that the above copyright notice appear in all
|
||||
copies and that both that the copyright notice and this
|
||||
permission notice and warranty disclaimer appear in supporting
|
||||
documentation, and that the name Lucent Technologies or any of
|
||||
its entities not be used in advertising or publicity pertaining
|
||||
to distribution of the software without specific, written prior
|
||||
permission.
|
||||
|
||||
LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
|
||||
INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
|
||||
IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
|
||||
SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
|
||||
IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
|
||||
ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
|
||||
THIS SOFTWARE.
|
||||
****************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <ctype.h>
|
||||
#include "awk.h"
|
||||
#include "ytab.h"
|
||||
|
||||
extern YYSTYPE yylval;
|
||||
extern int infunc;
|
||||
|
||||
int lineno = 1;
|
||||
int bracecnt = 0;
|
||||
int brackcnt = 0;
|
||||
int parencnt = 0;
|
||||
|
||||
typedef struct Keyword {
|
||||
const char *word;
|
||||
int sub;
|
||||
int type;
|
||||
} Keyword;
|
||||
|
||||
Keyword keywords[] ={ /* keep sorted: binary searched */
|
||||
{ "BEGIN", XBEGIN, XBEGIN },
|
||||
{ "END", XEND, XEND },
|
||||
{ "NF", VARNF, VARNF },
|
||||
{ "atan2", FATAN, BLTIN },
|
||||
{ "break", BREAK, BREAK },
|
||||
{ "close", CLOSE, CLOSE },
|
||||
{ "continue", CONTINUE, CONTINUE },
|
||||
{ "cos", FCOS, BLTIN },
|
||||
{ "delete", DELETE, DELETE },
|
||||
{ "do", DO, DO },
|
||||
{ "else", ELSE, ELSE },
|
||||
{ "exit", EXIT, EXIT },
|
||||
{ "exp", FEXP, BLTIN },
|
||||
{ "fflush", FFLUSH, BLTIN },
|
||||
{ "for", FOR, FOR },
|
||||
{ "func", FUNC, FUNC },
|
||||
{ "function", FUNC, FUNC },
|
||||
{ "getline", GETLINE, GETLINE },
|
||||
{ "gsub", GSUB, GSUB },
|
||||
{ "if", IF, IF },
|
||||
{ "in", IN, IN },
|
||||
{ "index", INDEX, INDEX },
|
||||
{ "int", FINT, BLTIN },
|
||||
{ "length", FLENGTH, BLTIN },
|
||||
{ "log", FLOG, BLTIN },
|
||||
{ "match", MATCHFCN, MATCHFCN },
|
||||
{ "next", NEXT, NEXT },
|
||||
{ "nextfile", NEXTFILE, NEXTFILE },
|
||||
{ "print", PRINT, PRINT },
|
||||
{ "printf", PRINTF, PRINTF },
|
||||
{ "rand", FRAND, BLTIN },
|
||||
{ "return", RETURN, RETURN },
|
||||
{ "sin", FSIN, BLTIN },
|
||||
{ "split", SPLIT, SPLIT },
|
||||
{ "sprintf", SPRINTF, SPRINTF },
|
||||
{ "sqrt", FSQRT, BLTIN },
|
||||
{ "srand", FSRAND, BLTIN },
|
||||
{ "sub", SUB, SUB },
|
||||
{ "substr", SUBSTR, SUBSTR },
|
||||
{ "system", FSYSTEM, BLTIN },
|
||||
{ "tolower", FTOLOWER, BLTIN },
|
||||
{ "toupper", FTOUPPER, BLTIN },
|
||||
{ "while", WHILE, WHILE },
|
||||
};
|
||||
|
||||
#define RET(x) { if(dbg)printf("lex %s\n", tokname(x)); return(x); }
|
||||
|
||||
int peek(void)
|
||||
{
|
||||
int c = input();
|
||||
unput(c);
|
||||
return c;
|
||||
}
|
||||
|
||||
int gettok(char **pbuf, int *psz) /* get next input token */
|
||||
{
|
||||
int c, retc;
|
||||
char *buf = *pbuf;
|
||||
int sz = *psz;
|
||||
char *bp = buf;
|
||||
|
||||
c = input();
|
||||
if (c == 0)
|
||||
return 0;
|
||||
buf[0] = c;
|
||||
buf[1] = 0;
|
||||
if (!isalnum(c) && c != '.' && c != '_')
|
||||
return c;
|
||||
|
||||
*bp++ = c;
|
||||
if (isalpha(c) || c == '_') { /* it's a varname */
|
||||
for ( ; (c = input()) != 0; ) {
|
||||
if (bp-buf >= sz)
|
||||
if (!adjbuf(&buf, &sz, bp-buf+2, 100, &bp, "gettok"))
|
||||
FATAL( "out of space for name %.10s...", buf );
|
||||
if (isalnum(c) || c == '_')
|
||||
*bp++ = c;
|
||||
else {
|
||||
*bp = 0;
|
||||
unput(c);
|
||||
break;
|
||||
}
|
||||
}
|
||||
*bp = 0;
|
||||
retc = 'a'; /* alphanumeric */
|
||||
} else { /* maybe it's a number, but could be . */
|
||||
char *rem;
|
||||
/* read input until can't be a number */
|
||||
for ( ; (c = input()) != 0; ) {
|
||||
if (bp-buf >= sz)
|
||||
if (!adjbuf(&buf, &sz, bp-buf+2, 100, &bp, "gettok"))
|
||||
FATAL( "out of space for number %.10s...", buf );
|
||||
if (isdigit(c) || c == 'e' || c == 'E'
|
||||
|| c == '.' || c == '+' || c == '-')
|
||||
*bp++ = c;
|
||||
else {
|
||||
unput(c);
|
||||
break;
|
||||
}
|
||||
}
|
||||
*bp = 0;
|
||||
strtod(buf, &rem); /* parse the number */
|
||||
if (rem == buf) { /* it wasn't a valid number at all */
|
||||
buf[1] = 0; /* return one character as token */
|
||||
retc = buf[0]; /* character is its own type */
|
||||
unputstr(rem+1); /* put rest back for later */
|
||||
} else { /* some prefix was a number */
|
||||
unputstr(rem); /* put rest back for later */
|
||||
rem[0] = 0; /* truncate buf after number part */
|
||||
retc = '0'; /* type is number */
|
||||
}
|
||||
}
|
||||
*pbuf = buf;
|
||||
*psz = sz;
|
||||
return retc;
|
||||
}
|
||||
|
||||
int word(char *);
|
||||
int string(void);
|
||||
int regexpr(void);
|
||||
int sc = 0; /* 1 => return a } right now */
|
||||
int reg = 0; /* 1 => return a REGEXPR now */
|
||||
|
||||
int yylex(void)
|
||||
{
|
||||
int c;
|
||||
static char *buf = 0;
|
||||
static int bufsize = 5; /* BUG: setting this small causes core dump! */
|
||||
|
||||
if (buf == 0 && (buf = (char *) malloc(bufsize)) == NULL)
|
||||
FATAL( "out of space in yylex" );
|
||||
if (sc) {
|
||||
sc = 0;
|
||||
RET('}');
|
||||
}
|
||||
if (reg) {
|
||||
reg = 0;
|
||||
return regexpr();
|
||||
}
|
||||
for (;;) {
|
||||
c = gettok(&buf, &bufsize);
|
||||
if (c == 0)
|
||||
return 0;
|
||||
if (isalpha(c) || c == '_')
|
||||
return word(buf);
|
||||
if (isdigit(c)) {
|
||||
yylval.cp = setsymtab(buf, tostring(buf), atof(buf), CON|NUM, symtab);
|
||||
/* should this also have STR set? */
|
||||
RET(NUMBER);
|
||||
}
|
||||
|
||||
yylval.i = c;
|
||||
switch (c) {
|
||||
case '\n': /* {EOL} */
|
||||
RET(NL);
|
||||
case '\r': /* assume \n is coming */
|
||||
case ' ': /* {WS}+ */
|
||||
case '\t':
|
||||
break;
|
||||
case '#': /* #.* strip comments */
|
||||
while ((c = input()) != '\n' && c != 0)
|
||||
;
|
||||
unput(c);
|
||||
break;
|
||||
case ';':
|
||||
RET(';');
|
||||
case '\\':
|
||||
if (peek() == '\n') {
|
||||
input();
|
||||
} else if (peek() == '\r') {
|
||||
input(); input(); /* \n */
|
||||
lineno++;
|
||||
} else {
|
||||
RET(c);
|
||||
}
|
||||
break;
|
||||
case '&':
|
||||
if (peek() == '&') {
|
||||
input(); RET(AND);
|
||||
} else
|
||||
RET('&');
|
||||
case '|':
|
||||
if (peek() == '|') {
|
||||
input(); RET(BOR);
|
||||
} else
|
||||
RET('|');
|
||||
case '!':
|
||||
if (peek() == '=') {
|
||||
input(); yylval.i = NE; RET(NE);
|
||||
} else if (peek() == '~') {
|
||||
input(); yylval.i = NOTMATCH; RET(MATCHOP);
|
||||
} else
|
||||
RET(NOT);
|
||||
case '~':
|
||||
yylval.i = MATCH;
|
||||
RET(MATCHOP);
|
||||
case '<':
|
||||
if (peek() == '=') {
|
||||
input(); yylval.i = LE; RET(LE);
|
||||
} else {
|
||||
yylval.i = LT; RET(LT);
|
||||
}
|
||||
case '=':
|
||||
if (peek() == '=') {
|
||||
input(); yylval.i = EQ; RET(EQ);
|
||||
} else {
|
||||
yylval.i = ASSIGN; RET(ASGNOP);
|
||||
}
|
||||
case '>':
|
||||
if (peek() == '=') {
|
||||
input(); yylval.i = GE; RET(GE);
|
||||
} else if (peek() == '>') {
|
||||
input(); yylval.i = APPEND; RET(APPEND);
|
||||
} else {
|
||||
yylval.i = GT; RET(GT);
|
||||
}
|
||||
case '+':
|
||||
if (peek() == '+') {
|
||||
input(); yylval.i = INCR; RET(INCR);
|
||||
} else if (peek() == '=') {
|
||||
input(); yylval.i = ADDEQ; RET(ASGNOP);
|
||||
} else
|
||||
RET('+');
|
||||
case '-':
|
||||
if (peek() == '-') {
|
||||
input(); yylval.i = DECR; RET(DECR);
|
||||
} else if (peek() == '=') {
|
||||
input(); yylval.i = SUBEQ; RET(ASGNOP);
|
||||
} else
|
||||
RET('-');
|
||||
case '*':
|
||||
if (peek() == '=') { /* *= */
|
||||
input(); yylval.i = MULTEQ; RET(ASGNOP);
|
||||
} else if (peek() == '*') { /* ** or **= */
|
||||
input(); /* eat 2nd * */
|
||||
if (peek() == '=') {
|
||||
input(); yylval.i = POWEQ; RET(ASGNOP);
|
||||
} else {
|
||||
RET(POWER);
|
||||
}
|
||||
} else
|
||||
RET('*');
|
||||
case '/':
|
||||
RET('/');
|
||||
case '%':
|
||||
if (peek() == '=') {
|
||||
input(); yylval.i = MODEQ; RET(ASGNOP);
|
||||
} else
|
||||
RET('%');
|
||||
case '^':
|
||||
if (peek() == '=') {
|
||||
input(); yylval.i = POWEQ; RET(ASGNOP);
|
||||
} else
|
||||
RET(POWER);
|
||||
|
||||
case '$':
|
||||
/* BUG: awkward, if not wrong */
|
||||
c = gettok(&buf, &bufsize);
|
||||
if (isalpha(c)) {
|
||||
if (strcmp(buf, "NF") == 0) { /* very special */
|
||||
unputstr("(NF)");
|
||||
RET(INDIRECT);
|
||||
}
|
||||
c = peek();
|
||||
if (c == '(' || c == '[' || (infunc && isarg(buf) >= 0)) {
|
||||
unputstr(buf);
|
||||
RET(INDIRECT);
|
||||
}
|
||||
yylval.cp = setsymtab(buf, "", 0.0, STR|NUM, symtab);
|
||||
RET(IVAR);
|
||||
} else if (c == 0) { /* */
|
||||
SYNTAX( "unexpected end of input after $" );
|
||||
RET(';');
|
||||
} else {
|
||||
unputstr(buf);
|
||||
RET(INDIRECT);
|
||||
}
|
||||
|
||||
case '}':
|
||||
if (--bracecnt < 0)
|
||||
SYNTAX( "extra }" );
|
||||
sc = 1;
|
||||
RET(';');
|
||||
case ']':
|
||||
if (--brackcnt < 0)
|
||||
SYNTAX( "extra ]" );
|
||||
RET(']');
|
||||
case ')':
|
||||
if (--parencnt < 0)
|
||||
SYNTAX( "extra )" );
|
||||
RET(')');
|
||||
case '{':
|
||||
bracecnt++;
|
||||
RET('{');
|
||||
case '[':
|
||||
brackcnt++;
|
||||
RET('[');
|
||||
case '(':
|
||||
parencnt++;
|
||||
RET('(');
|
||||
|
||||
case '"':
|
||||
return string(); /* BUG: should be like tran.c ? */
|
||||
|
||||
default:
|
||||
RET(c);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int string(void)
|
||||
{
|
||||
int c, n;
|
||||
char *s, *bp;
|
||||
static char *buf = 0;
|
||||
static int bufsz = 500;
|
||||
|
||||
if (buf == 0 && (buf = (char *) malloc(bufsz)) == NULL)
|
||||
FATAL("out of space for strings");
|
||||
for (bp = buf; (c = input()) != '"'; ) {
|
||||
if (!adjbuf(&buf, &bufsz, bp-buf+2, 500, &bp, "string"))
|
||||
FATAL("out of space for string %.10s...", buf);
|
||||
switch (c) {
|
||||
case '\n':
|
||||
case '\r':
|
||||
case 0:
|
||||
SYNTAX( "non-terminated string %.10s...", buf );
|
||||
lineno++;
|
||||
if (c == 0) /* hopeless */
|
||||
FATAL( "giving up" );
|
||||
break;
|
||||
case '\\':
|
||||
c = input();
|
||||
switch (c) {
|
||||
case '"': *bp++ = '"'; break;
|
||||
case 'n': *bp++ = '\n'; break;
|
||||
case 't': *bp++ = '\t'; break;
|
||||
case 'f': *bp++ = '\f'; break;
|
||||
case 'r': *bp++ = '\r'; break;
|
||||
case 'b': *bp++ = '\b'; break;
|
||||
case 'v': *bp++ = '\v'; break;
|
||||
case 'a': *bp++ = '\007'; break;
|
||||
case '\\': *bp++ = '\\'; break;
|
||||
|
||||
case '0': case '1': case '2': /* octal: \d \dd \ddd */
|
||||
case '3': case '4': case '5': case '6': case '7':
|
||||
n = c - '0';
|
||||
if ((c = peek()) >= '0' && c < '8') {
|
||||
n = 8 * n + input() - '0';
|
||||
if ((c = peek()) >= '0' && c < '8')
|
||||
n = 8 * n + input() - '0';
|
||||
}
|
||||
*bp++ = n;
|
||||
break;
|
||||
|
||||
case 'x': /* hex \x0-9a-fA-F + */
|
||||
{ char xbuf[100], *px;
|
||||
for (px = xbuf; (c = input()) != 0 && px-xbuf < 100-2; ) {
|
||||
if (isdigit(c)
|
||||
|| (c >= 'a' && c <= 'f')
|
||||
|| (c >= 'A' && c <= 'F'))
|
||||
*px++ = c;
|
||||
else
|
||||
break;
|
||||
}
|
||||
*px = 0;
|
||||
unput(c);
|
||||
sscanf(xbuf, "%x", &n);
|
||||
*bp++ = n;
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
*bp++ = c;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
*bp++ = c;
|
||||
break;
|
||||
}
|
||||
}
|
||||
*bp = 0;
|
||||
s = tostring(buf);
|
||||
*bp++ = ' '; *bp++ = 0;
|
||||
yylval.cp = setsymtab(buf, s, 0.0, CON|STR|DONTFREE, symtab);
|
||||
RET(STRING);
|
||||
}
|
||||
|
||||
|
||||
int binsearch(char *w, Keyword *kp, int n)
|
||||
{
|
||||
int cond, low, mid, high;
|
||||
|
||||
low = 0;
|
||||
high = n - 1;
|
||||
while (low <= high) {
|
||||
mid = (low + high) / 2;
|
||||
if ((cond = strcmp(w, kp[mid].word)) < 0)
|
||||
high = mid - 1;
|
||||
else if (cond > 0)
|
||||
low = mid + 1;
|
||||
else
|
||||
return mid;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
int word(char *w)
|
||||
{
|
||||
Keyword *kp;
|
||||
int c, n;
|
||||
|
||||
n = binsearch(w, keywords, sizeof(keywords)/sizeof(keywords[0]));
|
||||
/* BUG: this ought to be inside the if; in theory could fault (daniel barrett) */
|
||||
kp = keywords + n;
|
||||
if (n != -1) { /* found in table */
|
||||
yylval.i = kp->sub;
|
||||
switch (kp->type) { /* special handling */
|
||||
case BLTIN:
|
||||
if (kp->sub == FSYSTEM && safe)
|
||||
SYNTAX( "system is unsafe" );
|
||||
RET(kp->type);
|
||||
case FUNC:
|
||||
if (infunc)
|
||||
SYNTAX( "illegal nested function" );
|
||||
RET(kp->type);
|
||||
case RETURN:
|
||||
if (!infunc)
|
||||
SYNTAX( "return not in function" );
|
||||
RET(kp->type);
|
||||
case VARNF:
|
||||
yylval.cp = setsymtab("NF", "", 0.0, NUM, symtab);
|
||||
RET(VARNF);
|
||||
default:
|
||||
RET(kp->type);
|
||||
}
|
||||
}
|
||||
c = peek(); /* look for '(' */
|
||||
if (c != '(' && infunc && (n=isarg(w)) >= 0) {
|
||||
yylval.i = n;
|
||||
RET(ARG);
|
||||
} else {
|
||||
yylval.cp = setsymtab(w, "", 0.0, STR|NUM|DONTFREE, symtab);
|
||||
if (c == '(') {
|
||||
RET(CALL);
|
||||
} else {
|
||||
RET(VAR);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void startreg(void) /* next call to yylex will return a regular expression */
|
||||
{
|
||||
reg = 1;
|
||||
}
|
||||
|
||||
int regexpr(void)
|
||||
{
|
||||
int c;
|
||||
static char *buf = 0;
|
||||
static int bufsz = 500;
|
||||
char *bp;
|
||||
|
||||
if (buf == 0 && (buf = (char *) malloc(bufsz)) == NULL)
|
||||
FATAL("out of space for rex expr");
|
||||
bp = buf;
|
||||
for ( ; (c = input()) != '/' && c != 0; ) {
|
||||
if (!adjbuf(&buf, &bufsz, bp-buf+3, 500, &bp, "regexpr"))
|
||||
FATAL("out of space for reg expr %.10s...", buf);
|
||||
if (c == '\n') {
|
||||
SYNTAX( "newline in regular expression %.10s...", buf );
|
||||
unput('\n');
|
||||
break;
|
||||
} else if (c == '\\') {
|
||||
*bp++ = '\\';
|
||||
*bp++ = input();
|
||||
} else {
|
||||
*bp++ = c;
|
||||
}
|
||||
}
|
||||
*bp = 0;
|
||||
if (c == 0)
|
||||
SYNTAX("non-terminated regular expression %.10s...", buf);
|
||||
yylval.s = tostring(buf);
|
||||
unput('/');
|
||||
RET(REGEXPR);
|
||||
}
|
||||
|
||||
/* low-level lexical stuff, sort of inherited from lex */
|
||||
|
||||
char ebuf[300];
|
||||
char *ep = ebuf;
|
||||
char yysbuf[100]; /* pushback buffer */
|
||||
char *yysptr = yysbuf;
|
||||
FILE *yyin = 0;
|
||||
|
||||
int input(void) /* get next lexical input character */
|
||||
{
|
||||
int c;
|
||||
extern char *lexprog;
|
||||
|
||||
if (yysptr > yysbuf)
|
||||
c = (uschar)*--yysptr;
|
||||
else if (lexprog != NULL) { /* awk '...' */
|
||||
if ((c = (uschar)*lexprog) != 0)
|
||||
lexprog++;
|
||||
} else /* awk -f ... */
|
||||
c = pgetc();
|
||||
if (c == '\n')
|
||||
lineno++;
|
||||
else if (c == EOF)
|
||||
c = 0;
|
||||
if (ep >= ebuf + sizeof ebuf)
|
||||
ep = ebuf;
|
||||
return *ep++ = c;
|
||||
}
|
||||
|
||||
void unput(int c) /* put lexical character back on input */
|
||||
{
|
||||
if (c == '\n')
|
||||
lineno--;
|
||||
if (yysptr >= yysbuf + sizeof(yysbuf))
|
||||
FATAL("pushed back too much: %.20s...", yysbuf);
|
||||
*yysptr++ = c;
|
||||
if (--ep < ebuf)
|
||||
ep = ebuf + sizeof(ebuf) - 1;
|
||||
}
|
||||
|
||||
void unputstr(const char *s) /* put a string back on input */
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = strlen(s)-1; i >= 0; i--)
|
||||
unput(s[i]);
|
||||
}
|
|
@ -0,0 +1,697 @@
|
|||
/****************************************************************
|
||||
Copyright (C) Lucent Technologies 1997
|
||||
All Rights Reserved
|
||||
|
||||
Permission to use, copy, modify, and distribute this software and
|
||||
its documentation for any purpose and without fee is hereby
|
||||
granted, provided that the above copyright notice appear in all
|
||||
copies and that both that the copyright notice and this
|
||||
permission notice and warranty disclaimer appear in supporting
|
||||
documentation, and that the name Lucent Technologies or any of
|
||||
its entities not be used in advertising or publicity pertaining
|
||||
to distribution of the software without specific, written prior
|
||||
permission.
|
||||
|
||||
LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
|
||||
INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
|
||||
IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
|
||||
SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
|
||||
IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
|
||||
ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
|
||||
THIS SOFTWARE.
|
||||
****************************************************************/
|
||||
|
||||
#define DEBUG
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <ctype.h>
|
||||
#include <errno.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdarg.h>
|
||||
#include "awk.h"
|
||||
#include "ytab.h"
|
||||
|
||||
FILE *infile = NULL;
|
||||
char *file = "";
|
||||
char *record;
|
||||
int recsize = RECSIZE;
|
||||
char *fields;
|
||||
int fieldssize = RECSIZE;
|
||||
|
||||
Cell **fldtab; /* pointers to Cells */
|
||||
char inputFS[100] = " ";
|
||||
|
||||
#define MAXFLD 2
|
||||
int nfields = MAXFLD; /* last allocated slot for $i */
|
||||
|
||||
int donefld; /* 1 = implies rec broken into fields */
|
||||
int donerec; /* 1 = record is valid (no flds have changed) */
|
||||
|
||||
int lastfld = 0; /* last used field */
|
||||
int argno = 1; /* current input argument number */
|
||||
extern Awkfloat *ARGC;
|
||||
|
||||
static Cell dollar0 = { OCELL, CFLD, NULL, "", 0.0, REC|STR|DONTFREE };
|
||||
static Cell dollar1 = { OCELL, CFLD, NULL, "", 0.0, FLD|STR|DONTFREE };
|
||||
|
||||
void recinit(unsigned int n)
|
||||
{
|
||||
if ( (record = (char *) malloc(n)) == NULL
|
||||
|| (fields = (char *) malloc(n+1)) == NULL
|
||||
|| (fldtab = (Cell **) malloc((nfields+1) * sizeof(Cell *))) == NULL
|
||||
|| (fldtab[0] = (Cell *) malloc(sizeof(Cell))) == NULL )
|
||||
FATAL("out of space for $0 and fields");
|
||||
*fldtab[0] = dollar0;
|
||||
fldtab[0]->sval = record;
|
||||
fldtab[0]->nval = tostring("0");
|
||||
makefields(1, nfields);
|
||||
}
|
||||
|
||||
void makefields(int n1, int n2) /* create $n1..$n2 inclusive */
|
||||
{
|
||||
char temp[50];
|
||||
int i;
|
||||
|
||||
for (i = n1; i <= n2; i++) {
|
||||
fldtab[i] = (Cell *) malloc(sizeof (struct Cell));
|
||||
if (fldtab[i] == NULL)
|
||||
FATAL("out of space in makefields %d", i);
|
||||
*fldtab[i] = dollar1;
|
||||
sprintf(temp, "%d", i);
|
||||
fldtab[i]->nval = tostring(temp);
|
||||
}
|
||||
}
|
||||
|
||||
void initgetrec(void)
|
||||
{
|
||||
int i;
|
||||
char *p;
|
||||
|
||||
for (i = 1; i < *ARGC; i++) {
|
||||
if (!isclvar(p = getargv(i))) { /* find 1st real filename */
|
||||
setsval(lookup("FILENAME", symtab), getargv(i));
|
||||
return;
|
||||
}
|
||||
setclvar(p); /* a commandline assignment before filename */
|
||||
argno++;
|
||||
}
|
||||
infile = stdin; /* no filenames, so use stdin */
|
||||
}
|
||||
|
||||
static int firsttime = 1;
|
||||
|
||||
int getrec(char **pbuf, int *pbufsize, int isrecord) /* get next input record */
|
||||
{ /* note: cares whether buf == record */
|
||||
int c;
|
||||
char *buf = *pbuf;
|
||||
uschar saveb0;
|
||||
int bufsize = *pbufsize, savebufsize = bufsize;
|
||||
|
||||
if (firsttime) {
|
||||
firsttime = 0;
|
||||
initgetrec();
|
||||
}
|
||||
dprintf( ("RS=<%s>, FS=<%s>, ARGC=%g, FILENAME=%s\n",
|
||||
*RS, *FS, *ARGC, *FILENAME) );
|
||||
if (isrecord) {
|
||||
donefld = 0;
|
||||
donerec = 1;
|
||||
}
|
||||
saveb0 = buf[0];
|
||||
buf[0] = 0;
|
||||
while (argno < *ARGC || infile == stdin) {
|
||||
dprintf( ("argno=%d, file=|%s|\n", argno, file) );
|
||||
if (infile == NULL) { /* have to open a new file */
|
||||
file = getargv(argno);
|
||||
if (*file == '\0') { /* it's been zapped */
|
||||
argno++;
|
||||
continue;
|
||||
}
|
||||
if (isclvar(file)) { /* a var=value arg */
|
||||
setclvar(file);
|
||||
argno++;
|
||||
continue;
|
||||
}
|
||||
*FILENAME = file;
|
||||
dprintf( ("opening file %s\n", file) );
|
||||
if (*file == '-' && *(file+1) == '\0')
|
||||
infile = stdin;
|
||||
else if ((infile = fopen(file, "r")) == NULL)
|
||||
FATAL("can't open file %s", file);
|
||||
setfval(fnrloc, 0.0);
|
||||
}
|
||||
c = readrec(&buf, &bufsize, infile);
|
||||
if (c != 0 || buf[0] != '\0') { /* normal record */
|
||||
if (isrecord) {
|
||||
if (freeable(fldtab[0]))
|
||||
xfree(fldtab[0]->sval);
|
||||
fldtab[0]->sval = buf; /* buf == record */
|
||||
fldtab[0]->tval = REC | STR | DONTFREE;
|
||||
if (is_number(fldtab[0]->sval)) {
|
||||
fldtab[0]->fval = atof(fldtab[0]->sval);
|
||||
fldtab[0]->tval |= NUM;
|
||||
}
|
||||
}
|
||||
setfval(nrloc, nrloc->fval+1);
|
||||
setfval(fnrloc, fnrloc->fval+1);
|
||||
*pbuf = buf;
|
||||
*pbufsize = bufsize;
|
||||
return 1;
|
||||
}
|
||||
/* EOF arrived on this file; set up next */
|
||||
if (infile != stdin)
|
||||
fclose(infile);
|
||||
infile = NULL;
|
||||
argno++;
|
||||
}
|
||||
buf[0] = saveb0;
|
||||
*pbuf = buf;
|
||||
*pbufsize = savebufsize;
|
||||
return 0; /* true end of file */
|
||||
}
|
||||
|
||||
void nextfile(void)
|
||||
{
|
||||
if (infile != NULL && infile != stdin)
|
||||
fclose(infile);
|
||||
infile = NULL;
|
||||
argno++;
|
||||
}
|
||||
|
||||
int readrec(char **pbuf, int *pbufsize, FILE *inf) /* read one record into buf */
|
||||
{
|
||||
int sep, c;
|
||||
char *rr, *buf = *pbuf;
|
||||
int bufsize = *pbufsize;
|
||||
|
||||
if (strlen(*FS) >= sizeof(inputFS))
|
||||
FATAL("field separator %.10s... is too long", *FS);
|
||||
strcpy(inputFS, *FS); /* for subsequent field splitting */
|
||||
if ((sep = **RS) == 0) {
|
||||
sep = '\n';
|
||||
while ((c=getc(inf)) == '\n' && c != EOF) /* skip leading \n's */
|
||||
;
|
||||
if (c != EOF)
|
||||
ungetc(c, inf);
|
||||
}
|
||||
for (rr = buf; ; ) {
|
||||
for (; (c=getc(inf)) != sep && c != EOF; ) {
|
||||
if (rr-buf+1 > bufsize)
|
||||
if (!adjbuf(&buf, &bufsize, 1+rr-buf, recsize, &rr, "readrec 1"))
|
||||
FATAL("input record `%.30s...' too long", buf);
|
||||
*rr++ = c;
|
||||
}
|
||||
if (**RS == sep || c == EOF)
|
||||
break;
|
||||
if ((c = getc(inf)) == '\n' || c == EOF) /* 2 in a row */
|
||||
break;
|
||||
if (!adjbuf(&buf, &bufsize, 2+rr-buf, recsize, &rr, "readrec 2"))
|
||||
FATAL("input record `%.30s...' too long", buf);
|
||||
*rr++ = '\n';
|
||||
*rr++ = c;
|
||||
}
|
||||
if (!adjbuf(&buf, &bufsize, 1+rr-buf, recsize, &rr, "readrec 3"))
|
||||
FATAL("input record `%.30s...' too long", buf);
|
||||
*rr = 0;
|
||||
dprintf( ("readrec saw <%s>, returns %d\n", buf, c == EOF && rr == buf ? 0 : 1) );
|
||||
*pbuf = buf;
|
||||
*pbufsize = bufsize;
|
||||
return c == EOF && rr == buf ? 0 : 1;
|
||||
}
|
||||
|
||||
char *getargv(int n) /* get ARGV[n] */
|
||||
{
|
||||
Cell *x;
|
||||
char *s, temp[50];
|
||||
extern Array *ARGVtab;
|
||||
|
||||
sprintf(temp, "%d", n);
|
||||
x = setsymtab(temp, "", 0.0, STR, ARGVtab);
|
||||
s = getsval(x);
|
||||
dprintf( ("getargv(%d) returns |%s|\n", n, s) );
|
||||
return s;
|
||||
}
|
||||
|
||||
void setclvar(char *s) /* set var=value from s */
|
||||
{
|
||||
char *p;
|
||||
Cell *q;
|
||||
|
||||
for (p=s; *p != '='; p++)
|
||||
;
|
||||
*p++ = 0;
|
||||
p = qstring(p, '\0');
|
||||
q = setsymtab(s, p, 0.0, STR, symtab);
|
||||
setsval(q, p);
|
||||
if (is_number(q->sval)) {
|
||||
q->fval = atof(q->sval);
|
||||
q->tval |= NUM;
|
||||
}
|
||||
dprintf( ("command line set %s to |%s|\n", s, p) );
|
||||
}
|
||||
|
||||
|
||||
void fldbld(void) /* create fields from current record */
|
||||
{
|
||||
/* this relies on having fields[] the same length as $0 */
|
||||
/* the fields are all stored in this one array with \0's */
|
||||
char *r, *fr, sep;
|
||||
Cell *p;
|
||||
int i, j, n;
|
||||
|
||||
if (donefld)
|
||||
return;
|
||||
if (!isstr(fldtab[0]))
|
||||
getsval(fldtab[0]);
|
||||
r = fldtab[0]->sval;
|
||||
n = strlen(r);
|
||||
if (n > fieldssize) {
|
||||
xfree(fields);
|
||||
if ((fields = (char *) malloc(n+1)) == NULL)
|
||||
FATAL("out of space for fields in fldbld %d", n);
|
||||
fieldssize = n;
|
||||
}
|
||||
fr = fields;
|
||||
i = 0; /* number of fields accumulated here */
|
||||
strcpy(inputFS, *FS);
|
||||
if (strlen(inputFS) > 1) { /* it's a regular expression */
|
||||
i = refldbld(r, inputFS);
|
||||
} else if ((sep = *inputFS) == ' ') { /* default whitespace */
|
||||
for (i = 0; ; ) {
|
||||
while (*r == ' ' || *r == '\t' || *r == '\n')
|
||||
r++;
|
||||
if (*r == 0)
|
||||
break;
|
||||
i++;
|
||||
if (i > nfields)
|
||||
growfldtab(i);
|
||||
if (freeable(fldtab[i]))
|
||||
xfree(fldtab[i]->sval);
|
||||
fldtab[i]->sval = fr;
|
||||
fldtab[i]->tval = FLD | STR | DONTFREE;
|
||||
do
|
||||
*fr++ = *r++;
|
||||
while (*r != ' ' && *r != '\t' && *r != '\n' && *r != '\0');
|
||||
*fr++ = 0;
|
||||
}
|
||||
*fr = 0;
|
||||
} else if ((sep = *inputFS) == 0) { /* new: FS="" => 1 char/field */
|
||||
for (i = 0; *r != 0; r++) {
|
||||
char buf[2];
|
||||
i++;
|
||||
if (i > nfields)
|
||||
growfldtab(i);
|
||||
if (freeable(fldtab[i]))
|
||||
xfree(fldtab[i]->sval);
|
||||
buf[0] = *r;
|
||||
buf[1] = 0;
|
||||
fldtab[i]->sval = tostring(buf);
|
||||
fldtab[i]->tval = FLD | STR;
|
||||
}
|
||||
*fr = 0;
|
||||
} else if (*r != 0) { /* if 0, it's a null field */
|
||||
/* subtlecase : if length(FS) == 1 && length(RS > 0)
|
||||
* \n is NOT a field separator (cf awk book 61,84).
|
||||
* this variable is tested in the inner while loop.
|
||||
*/
|
||||
int rtest = '\n'; /* normal case */
|
||||
if (strlen(*RS) > 0)
|
||||
rtest = '\0';
|
||||
for (;;) {
|
||||
i++;
|
||||
if (i > nfields)
|
||||
growfldtab(i);
|
||||
if (freeable(fldtab[i]))
|
||||
xfree(fldtab[i]->sval);
|
||||
fldtab[i]->sval = fr;
|
||||
fldtab[i]->tval = FLD | STR | DONTFREE;
|
||||
while (*r != sep && *r != rtest && *r != '\0') /* \n is always a separator */
|
||||
*fr++ = *r++;
|
||||
*fr++ = 0;
|
||||
if (*r++ == 0)
|
||||
break;
|
||||
}
|
||||
*fr = 0;
|
||||
}
|
||||
if (i > nfields)
|
||||
FATAL("record `%.30s...' has too many fields; can't happen", r);
|
||||
cleanfld(i+1, lastfld); /* clean out junk from previous record */
|
||||
lastfld = i;
|
||||
donefld = 1;
|
||||
for (j = 1; j <= lastfld; j++) {
|
||||
p = fldtab[j];
|
||||
if(is_number(p->sval)) {
|
||||
p->fval = atof(p->sval);
|
||||
p->tval |= NUM;
|
||||
}
|
||||
}
|
||||
setfval(nfloc, (Awkfloat) lastfld);
|
||||
if (dbg) {
|
||||
for (j = 0; j <= lastfld; j++) {
|
||||
p = fldtab[j];
|
||||
printf("field %d (%s): |%s|\n", j, p->nval, p->sval);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void cleanfld(int n1, int n2) /* clean out fields n1 .. n2 inclusive */
|
||||
{ /* nvals remain intact */
|
||||
Cell *p;
|
||||
int i;
|
||||
|
||||
for (i = n1; i <= n2; i++) {
|
||||
p = fldtab[i];
|
||||
if (freeable(p))
|
||||
xfree(p->sval);
|
||||
p->sval = "";
|
||||
p->tval = FLD | STR | DONTFREE;
|
||||
}
|
||||
}
|
||||
|
||||
void newfld(int n) /* add field n after end of existing lastfld */
|
||||
{
|
||||
if (n > nfields)
|
||||
growfldtab(n);
|
||||
cleanfld(lastfld+1, n);
|
||||
lastfld = n;
|
||||
setfval(nfloc, (Awkfloat) n);
|
||||
}
|
||||
|
||||
Cell *fieldadr(int n) /* get nth field */
|
||||
{
|
||||
if (n < 0)
|
||||
FATAL("trying to access out of range field %d", n);
|
||||
if (n > nfields) /* fields after NF are empty */
|
||||
growfldtab(n); /* but does not increase NF */
|
||||
return(fldtab[n]);
|
||||
}
|
||||
|
||||
void growfldtab(int n) /* make new fields up to at least $n */
|
||||
{
|
||||
int nf = 2 * nfields;
|
||||
size_t s;
|
||||
|
||||
if (n > nf)
|
||||
nf = n;
|
||||
s = (nf+1) * (sizeof (struct Cell *)); /* freebsd: how much do we need? */
|
||||
if (s / sizeof(struct Cell *) - 1 == nf) /* didn't overflow */
|
||||
fldtab = (Cell **) realloc(fldtab, s);
|
||||
else /* overflow sizeof int */
|
||||
xfree(fldtab); /* make it null */
|
||||
if (fldtab == NULL)
|
||||
FATAL("out of space creating %d fields", nf);
|
||||
makefields(nfields+1, nf);
|
||||
nfields = nf;
|
||||
}
|
||||
|
||||
int refldbld(const char *rec, const char *fs) /* build fields from reg expr in FS */
|
||||
{
|
||||
/* this relies on having fields[] the same length as $0 */
|
||||
/* the fields are all stored in this one array with \0's */
|
||||
char *fr;
|
||||
int i, tempstat, n;
|
||||
fa *pfa;
|
||||
|
||||
n = strlen(rec);
|
||||
if (n > fieldssize) {
|
||||
xfree(fields);
|
||||
if ((fields = (char *) malloc(n+1)) == NULL)
|
||||
FATAL("out of space for fields in refldbld %d", n);
|
||||
fieldssize = n;
|
||||
}
|
||||
fr = fields;
|
||||
*fr = '\0';
|
||||
if (*rec == '\0')
|
||||
return 0;
|
||||
pfa = makedfa(fs, 1);
|
||||
dprintf( ("into refldbld, rec = <%s>, pat = <%s>\n", rec, fs) );
|
||||
tempstat = pfa->initstat;
|
||||
for (i = 1; ; i++) {
|
||||
if (i > nfields)
|
||||
growfldtab(i);
|
||||
if (freeable(fldtab[i]))
|
||||
xfree(fldtab[i]->sval);
|
||||
fldtab[i]->tval = FLD | STR | DONTFREE;
|
||||
fldtab[i]->sval = fr;
|
||||
dprintf( ("refldbld: i=%d\n", i) );
|
||||
if (nematch(pfa, rec)) {
|
||||
pfa->initstat = 2; /* horrible coupling to b.c */
|
||||
dprintf( ("match %s (%d chars)\n", patbeg, patlen) );
|
||||
strncpy(fr, rec, patbeg-rec);
|
||||
fr += patbeg - rec + 1;
|
||||
*(fr-1) = '\0';
|
||||
rec = patbeg + patlen;
|
||||
} else {
|
||||
dprintf( ("no match %s\n", rec) );
|
||||
strcpy(fr, rec);
|
||||
pfa->initstat = tempstat;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return i;
|
||||
}
|
||||
|
||||
void recbld(void) /* create $0 from $1..$NF if necessary */
|
||||
{
|
||||
int i;
|
||||
char *r, *p;
|
||||
|
||||
if (donerec == 1)
|
||||
return;
|
||||
r = record;
|
||||
for (i = 1; i <= *NF; i++) {
|
||||
p = getsval(fldtab[i]);
|
||||
if (!adjbuf(&record, &recsize, 1+strlen(p)+r-record, recsize, &r, "recbld 1"))
|
||||
FATAL("created $0 `%.30s...' too long", record);
|
||||
while ((*r = *p++) != 0)
|
||||
r++;
|
||||
if (i < *NF) {
|
||||
if (!adjbuf(&record, &recsize, 2+strlen(*OFS)+r-record, recsize, &r, "recbld 2"))
|
||||
FATAL("created $0 `%.30s...' too long", record);
|
||||
for (p = *OFS; (*r = *p++) != 0; )
|
||||
r++;
|
||||
}
|
||||
}
|
||||
if (!adjbuf(&record, &recsize, 2+r-record, recsize, &r, "recbld 3"))
|
||||
FATAL("built giant record `%.30s...'", record);
|
||||
*r = '\0';
|
||||
dprintf( ("in recbld inputFS=%s, fldtab[0]=%p\n", inputFS, fldtab[0]) );
|
||||
|
||||
if (freeable(fldtab[0]))
|
||||
xfree(fldtab[0]->sval);
|
||||
fldtab[0]->tval = REC | STR | DONTFREE;
|
||||
fldtab[0]->sval = record;
|
||||
|
||||
dprintf( ("in recbld inputFS=%s, fldtab[0]=%p\n", inputFS, fldtab[0]) );
|
||||
dprintf( ("recbld = |%s|\n", record) );
|
||||
donerec = 1;
|
||||
}
|
||||
|
||||
int errorflag = 0;
|
||||
|
||||
void yyerror(const char *s)
|
||||
{
|
||||
SYNTAX("%s", s);
|
||||
}
|
||||
|
||||
void SYNTAX(const char *fmt, ...)
|
||||
{
|
||||
extern char *cmdname, *curfname;
|
||||
static int been_here = 0;
|
||||
va_list varg;
|
||||
|
||||
if (been_here++ > 2)
|
||||
return;
|
||||
fprintf(stderr, "%s: ", cmdname);
|
||||
va_start(varg, fmt);
|
||||
vfprintf(stderr, fmt, varg);
|
||||
va_end(varg);
|
||||
fprintf(stderr, " at source line %d", lineno);
|
||||
if (curfname != NULL)
|
||||
fprintf(stderr, " in function %s", curfname);
|
||||
if (compile_time == 1 && cursource() != NULL)
|
||||
fprintf(stderr, " source file %s", cursource());
|
||||
fprintf(stderr, "\n");
|
||||
errorflag = 2;
|
||||
eprint();
|
||||
}
|
||||
|
||||
void fpecatch(int n)
|
||||
{
|
||||
FATAL("floating point exception %d", n);
|
||||
}
|
||||
|
||||
extern int bracecnt, brackcnt, parencnt;
|
||||
|
||||
void bracecheck(void)
|
||||
{
|
||||
int c;
|
||||
static int beenhere = 0;
|
||||
|
||||
if (beenhere++)
|
||||
return;
|
||||
while ((c = input()) != EOF && c != '\0')
|
||||
bclass(c);
|
||||
bcheck2(bracecnt, '{', '}');
|
||||
bcheck2(brackcnt, '[', ']');
|
||||
bcheck2(parencnt, '(', ')');
|
||||
}
|
||||
|
||||
void bcheck2(int n, int c1, int c2)
|
||||
{
|
||||
if (n == 1)
|
||||
fprintf(stderr, "\tmissing %c\n", c2);
|
||||
else if (n > 1)
|
||||
fprintf(stderr, "\t%d missing %c's\n", n, c2);
|
||||
else if (n == -1)
|
||||
fprintf(stderr, "\textra %c\n", c2);
|
||||
else if (n < -1)
|
||||
fprintf(stderr, "\t%d extra %c's\n", -n, c2);
|
||||
}
|
||||
|
||||
void FATAL(const char *fmt, ...)
|
||||
{
|
||||
extern char *cmdname;
|
||||
va_list varg;
|
||||
|
||||
fflush(stdout);
|
||||
fprintf(stderr, "%s: ", cmdname);
|
||||
va_start(varg, fmt);
|
||||
vfprintf(stderr, fmt, varg);
|
||||
va_end(varg);
|
||||
error();
|
||||
if (dbg > 1) /* core dump if serious debugging on */
|
||||
abort();
|
||||
exit(2);
|
||||
}
|
||||
|
||||
void WARNING(const char *fmt, ...)
|
||||
{
|
||||
extern char *cmdname;
|
||||
va_list varg;
|
||||
|
||||
fflush(stdout);
|
||||
fprintf(stderr, "%s: ", cmdname);
|
||||
va_start(varg, fmt);
|
||||
vfprintf(stderr, fmt, varg);
|
||||
va_end(varg);
|
||||
error();
|
||||
}
|
||||
|
||||
void error()
|
||||
{
|
||||
extern Node *curnode;
|
||||
|
||||
fprintf(stderr, "\n");
|
||||
if (compile_time != 2 && NR && *NR > 0) {
|
||||
fprintf(stderr, " input record number %d", (int) (*FNR));
|
||||
if (strcmp(*FILENAME, "-") != 0)
|
||||
fprintf(stderr, ", file %s", *FILENAME);
|
||||
fprintf(stderr, "\n");
|
||||
}
|
||||
if (compile_time != 2 && curnode)
|
||||
fprintf(stderr, " source line number %d", curnode->lineno);
|
||||
else if (compile_time != 2 && lineno)
|
||||
fprintf(stderr, " source line number %d", lineno);
|
||||
if (compile_time == 1 && cursource() != NULL)
|
||||
fprintf(stderr, " source file %s", cursource());
|
||||
fprintf(stderr, "\n");
|
||||
eprint();
|
||||
}
|
||||
|
||||
void eprint(void) /* try to print context around error */
|
||||
{
|
||||
char *p, *q;
|
||||
int c;
|
||||
static int been_here = 0;
|
||||
extern char ebuf[], *ep;
|
||||
|
||||
if (compile_time == 2 || compile_time == 0 || been_here++ > 0)
|
||||
return;
|
||||
p = ep - 1;
|
||||
if (p > ebuf && *p == '\n')
|
||||
p--;
|
||||
for ( ; p > ebuf && *p != '\n' && *p != '\0'; p--)
|
||||
;
|
||||
while (*p == '\n')
|
||||
p++;
|
||||
fprintf(stderr, " context is\n\t");
|
||||
for (q=ep-1; q>=p && *q!=' ' && *q!='\t' && *q!='\n'; q--)
|
||||
;
|
||||
for ( ; p < q; p++)
|
||||
if (*p)
|
||||
putc(*p, stderr);
|
||||
fprintf(stderr, " >>> ");
|
||||
for ( ; p < ep; p++)
|
||||
if (*p)
|
||||
putc(*p, stderr);
|
||||
fprintf(stderr, " <<< ");
|
||||
if (*ep)
|
||||
while ((c = input()) != '\n' && c != '\0' && c != EOF) {
|
||||
putc(c, stderr);
|
||||
bclass(c);
|
||||
}
|
||||
putc('\n', stderr);
|
||||
ep = ebuf;
|
||||
}
|
||||
|
||||
void bclass(int c)
|
||||
{
|
||||
switch (c) {
|
||||
case '{': bracecnt++; break;
|
||||
case '}': bracecnt--; break;
|
||||
case '[': brackcnt++; break;
|
||||
case ']': brackcnt--; break;
|
||||
case '(': parencnt++; break;
|
||||
case ')': parencnt--; break;
|
||||
}
|
||||
}
|
||||
|
||||
double errcheck(double x, const char *s)
|
||||
{
|
||||
|
||||
if (errno == EDOM) {
|
||||
errno = 0;
|
||||
WARNING("%s argument out of domain", s);
|
||||
x = 1;
|
||||
} else if (errno == ERANGE) {
|
||||
errno = 0;
|
||||
WARNING("%s result out of range", s);
|
||||
x = 1;
|
||||
}
|
||||
return x;
|
||||
}
|
||||
|
||||
int isclvar(const char *s) /* is s of form var=something ? */
|
||||
{
|
||||
const char *os = s;
|
||||
|
||||
if (!isalpha((uschar) *s) && *s != '_')
|
||||
return 0;
|
||||
for ( ; *s; s++)
|
||||
if (!(isalnum((uschar) *s) || *s == '_'))
|
||||
break;
|
||||
return *s == '=' && s > os && *(s+1) != '=';
|
||||
}
|
||||
|
||||
/* strtod is supposed to be a proper test of what's a valid number */
|
||||
/* appears to be broken in gcc on linux: thinks 0x123 is a valid FP number */
|
||||
/* wrong: violates 4.10.1.4 of ansi C standard */
|
||||
|
||||
#include <math.h>
|
||||
int is_number(const char *s)
|
||||
{
|
||||
double r;
|
||||
char *ep;
|
||||
errno = 0;
|
||||
r = strtod(s, &ep);
|
||||
if (ep == s || r == HUGE_VAL || errno == ERANGE)
|
||||
return 0;
|
||||
while (*ep == ' ' || *ep == '\t' || *ep == '\n')
|
||||
ep++;
|
||||
if (*ep == '\0')
|
||||
return 1;
|
||||
else
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,194 @@
|
|||
/****************************************************************
|
||||
Copyright (C) Lucent Technologies 1997
|
||||
All Rights Reserved
|
||||
|
||||
Permission to use, copy, modify, and distribute this software and
|
||||
its documentation for any purpose and without fee is hereby
|
||||
granted, provided that the above copyright notice appear in all
|
||||
copies and that both that the copyright notice and this
|
||||
permission notice and warranty disclaimer appear in supporting
|
||||
documentation, and that the name Lucent Technologies or any of
|
||||
its entities not be used in advertising or publicity pertaining
|
||||
to distribution of the software without specific, written prior
|
||||
permission.
|
||||
|
||||
LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
|
||||
INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
|
||||
IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
|
||||
SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
|
||||
IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
|
||||
ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
|
||||
THIS SOFTWARE.
|
||||
****************************************************************/
|
||||
|
||||
const char *version = "version 20100523";
|
||||
|
||||
#define DEBUG
|
||||
#include <stdio.h>
|
||||
#include <ctype.h>
|
||||
#include <locale.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <signal.h>
|
||||
#include "awk.h"
|
||||
#include "ytab.h"
|
||||
|
||||
extern char **environ;
|
||||
extern int nfields;
|
||||
|
||||
int dbg = 0;
|
||||
char *cmdname; /* gets argv[0] for error messages */
|
||||
extern FILE *yyin; /* lex input file */
|
||||
char *lexprog; /* points to program argument if it exists */
|
||||
extern int errorflag; /* non-zero if any syntax errors; set by yyerror */
|
||||
int compile_time = 2; /* for error printing: */
|
||||
/* 2 = cmdline, 1 = compile, 0 = running */
|
||||
|
||||
#define MAX_PFILE 20 /* max number of -f's */
|
||||
|
||||
char *pfile[MAX_PFILE]; /* program filenames from -f's */
|
||||
int npfile = 0; /* number of filenames */
|
||||
int curpfile = 0; /* current filename */
|
||||
|
||||
int safe = 0; /* 1 => "safe" mode */
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
const char *fs = NULL;
|
||||
|
||||
setlocale(LC_CTYPE, "");
|
||||
setlocale(LC_NUMERIC, "C"); /* for parsing cmdline & prog */
|
||||
cmdname = argv[0];
|
||||
if (argc == 1) {
|
||||
fprintf(stderr,
|
||||
"usage: %s [-F fs] [-v var=value] [-f progfile | 'prog'] [file ...]\n",
|
||||
cmdname);
|
||||
exit(1);
|
||||
}
|
||||
signal(SIGFPE, fpecatch);
|
||||
yyin = NULL;
|
||||
symtab = makesymtab(NSYMTAB/NSYMTAB);
|
||||
while (argc > 1 && argv[1][0] == '-' && argv[1][1] != '\0') {
|
||||
if (strcmp(argv[1],"-version") == 0 || strcmp(argv[1],"--version") == 0) {
|
||||
printf("awk %s\n", version);
|
||||
exit(0);
|
||||
break;
|
||||
}
|
||||
if (strncmp(argv[1], "--", 2) == 0) { /* explicit end of args */
|
||||
argc--;
|
||||
argv++;
|
||||
break;
|
||||
}
|
||||
switch (argv[1][1]) {
|
||||
case 's':
|
||||
if (strcmp(argv[1], "-safe") == 0)
|
||||
safe = 1;
|
||||
break;
|
||||
case 'f': /* next argument is program filename */
|
||||
argc--;
|
||||
argv++;
|
||||
if (argc <= 1)
|
||||
FATAL("no program filename");
|
||||
if (npfile >= MAX_PFILE - 1)
|
||||
FATAL("too many -f options");
|
||||
pfile[npfile++] = argv[1];
|
||||
break;
|
||||
case 'F': /* set field separator */
|
||||
if (argv[1][2] != 0) { /* arg is -Fsomething */
|
||||
if (argv[1][2] == 't' && argv[1][3] == 0) /* wart: t=>\t */
|
||||
fs = "\t";
|
||||
else if (argv[1][2] != 0)
|
||||
fs = &argv[1][2];
|
||||
} else { /* arg is -F something */
|
||||
argc--; argv++;
|
||||
if (argc > 1 && argv[1][0] == 't' && argv[1][1] == 0) /* wart: t=>\t */
|
||||
fs = "\t";
|
||||
else if (argc > 1 && argv[1][0] != 0)
|
||||
fs = &argv[1][0];
|
||||
}
|
||||
if (fs == NULL || *fs == '\0')
|
||||
WARNING("field separator FS is empty");
|
||||
break;
|
||||
case 'v': /* -v a=1 to be done NOW. one -v for each */
|
||||
if (argv[1][2] == '\0' && --argc > 1 && isclvar((++argv)[1]))
|
||||
setclvar(argv[1]);
|
||||
else if (argv[1][2] != '\0')
|
||||
setclvar(&argv[1][2]);
|
||||
break;
|
||||
case 'd':
|
||||
dbg = atoi(&argv[1][2]);
|
||||
if (dbg == 0)
|
||||
dbg = 1;
|
||||
printf("awk %s\n", version);
|
||||
break;
|
||||
default:
|
||||
WARNING("unknown option %s ignored", argv[1]);
|
||||
break;
|
||||
}
|
||||
argc--;
|
||||
argv++;
|
||||
}
|
||||
/* argv[1] is now the first argument */
|
||||
if (npfile == 0) { /* no -f; first argument is program */
|
||||
if (argc <= 1) {
|
||||
if (dbg)
|
||||
exit(0);
|
||||
FATAL("no program given");
|
||||
}
|
||||
dprintf( ("program = |%s|\n", argv[1]) );
|
||||
lexprog = argv[1];
|
||||
argc--;
|
||||
argv++;
|
||||
}
|
||||
recinit(recsize);
|
||||
syminit();
|
||||
compile_time = 1;
|
||||
argv[0] = cmdname; /* put prog name at front of arglist */
|
||||
dprintf( ("argc=%d, argv[0]=%s\n", argc, argv[0]) );
|
||||
arginit(argc, argv);
|
||||
if (!safe)
|
||||
envinit(environ);
|
||||
yyparse();
|
||||
setlocale(LC_NUMERIC, ""); /* back to whatever it is locally */
|
||||
if (fs)
|
||||
*FS = qstring(fs, '\0');
|
||||
dprintf( ("errorflag=%d\n", errorflag) );
|
||||
if (errorflag == 0) {
|
||||
compile_time = 0;
|
||||
run(winner);
|
||||
} else
|
||||
bracecheck();
|
||||
return(errorflag);
|
||||
}
|
||||
|
||||
int pgetc(void) /* get 1 character from awk program */
|
||||
{
|
||||
int c;
|
||||
|
||||
for (;;) {
|
||||
if (yyin == NULL) {
|
||||
if (curpfile >= npfile)
|
||||
return EOF;
|
||||
if (strcmp(pfile[curpfile], "-") == 0)
|
||||
yyin = stdin;
|
||||
else if ((yyin = fopen(pfile[curpfile], "r")) == NULL)
|
||||
FATAL("can't open file %s", pfile[curpfile]);
|
||||
lineno = 1;
|
||||
}
|
||||
if ((c = getc(yyin)) != EOF)
|
||||
return c;
|
||||
if (yyin != stdin)
|
||||
fclose(yyin);
|
||||
yyin = NULL;
|
||||
curpfile++;
|
||||
}
|
||||
}
|
||||
|
||||
char *cursource(void) /* current source file name */
|
||||
{
|
||||
if (npfile > 0)
|
||||
return pfile[curpfile];
|
||||
else
|
||||
return NULL;
|
||||
}
|
|
@ -0,0 +1,86 @@
|
|||
# /****************************************************************
|
||||
# Copyright (C) Lucent Technologies 1997
|
||||
# All Rights Reserved
|
||||
#
|
||||
# Permission to use, copy, modify, and distribute this software and
|
||||
# its documentation for any purpose and without fee is hereby
|
||||
# granted, provided that the above copyright notice appear in all
|
||||
# copies and that both that the copyright notice and this
|
||||
# permission notice and warranty disclaimer appear in supporting
|
||||
# documentation, and that the name Lucent Technologies or any of
|
||||
# its entities not be used in advertising or publicity pertaining
|
||||
# to distribution of the software without specific, written prior
|
||||
# permission.
|
||||
#
|
||||
# LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
|
||||
# INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
|
||||
# IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
|
||||
# SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
|
||||
# IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
|
||||
# ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
|
||||
# THIS SOFTWARE.
|
||||
# ****************************************************************/
|
||||
|
||||
CFLAGS = -g
|
||||
CFLAGS = -O2
|
||||
CFLAGS =
|
||||
|
||||
CC = gcc -Wall -g
|
||||
CC = cc
|
||||
CC = gcc -Wall -g -Wwrite-strings
|
||||
CC = gcc -fprofile-arcs -ftest-coverage # then gcov f1.c; cat f1.c.gcov
|
||||
CC = gcc -O4
|
||||
|
||||
YACC = bison -y
|
||||
YACC = yacc
|
||||
YFLAGS = -d
|
||||
|
||||
OFILES = b.o main.o parse.o proctab.o tran.o lib.o run.o lex.o
|
||||
|
||||
SOURCE = awk.h ytab.c ytab.h proto.h awkgram.y lex.c b.c main.c \
|
||||
maketab.c parse.c lib.c run.c tran.c proctab.c missing95.c
|
||||
|
||||
LISTING = awk.h proto.h awkgram.y lex.c b.c main.c maketab.c parse.c \
|
||||
lib.c run.c tran.c missing95.c
|
||||
|
||||
SHIP = README FIXES $(SOURCE) ytab[ch].bak makefile makefile.win \
|
||||
vcvars32.bat buildwin.bat awk.1
|
||||
|
||||
a.out: ytab.o $(OFILES)
|
||||
$(CC) $(CFLAGS) ytab.o $(OFILES) $(ALLOC) -lm
|
||||
|
||||
$(OFILES): awk.h ytab.h proto.h
|
||||
|
||||
ytab.o: awk.h proto.h awkgram.y
|
||||
$(YACC) $(YFLAGS) awkgram.y
|
||||
mv y.tab.c ytab.c
|
||||
mv y.tab.h ytab.h
|
||||
$(CC) $(CFLAGS) -c ytab.c
|
||||
|
||||
proctab.c: maketab
|
||||
./maketab >proctab.c
|
||||
|
||||
maketab: ytab.h maketab.c
|
||||
$(CC) $(CFLAGS) maketab.c -o maketab
|
||||
|
||||
bundle:
|
||||
@cp ytab.h ytabh.bak
|
||||
@cp ytab.c ytabc.bak
|
||||
@bundle $(SHIP)
|
||||
|
||||
tar:
|
||||
@cp ytab.h ytabh.bak
|
||||
@cp ytab.c ytabc.bak
|
||||
@bundle $(SHIP) >awk.shar
|
||||
@tar cf awk.tar $(SHIP)
|
||||
gzip awk.tar
|
||||
ls -l awk.tar.gz
|
||||
@zip awk.zip $(SHIP)
|
||||
ls -l awk.zip
|
||||
|
||||
names:
|
||||
@echo $(LISTING)
|
||||
|
||||
clean:
|
||||
rm -f a.out *.o *.obj maketab maketab.exe *.bb *.bbg *.da *.gcov *.gcno *.gcda # proctab.c
|
|
@ -0,0 +1,168 @@
|
|||
/****************************************************************
|
||||
Copyright (C) Lucent Technologies 1997
|
||||
All Rights Reserved
|
||||
|
||||
Permission to use, copy, modify, and distribute this software and
|
||||
its documentation for any purpose and without fee is hereby
|
||||
granted, provided that the above copyright notice appear in all
|
||||
copies and that both that the copyright notice and this
|
||||
permission notice and warranty disclaimer appear in supporting
|
||||
documentation, and that the name Lucent Technologies or any of
|
||||
its entities not be used in advertising or publicity pertaining
|
||||
to distribution of the software without specific, written prior
|
||||
permission.
|
||||
|
||||
LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
|
||||
INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
|
||||
IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
|
||||
SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
|
||||
IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
|
||||
ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
|
||||
THIS SOFTWARE.
|
||||
****************************************************************/
|
||||
|
||||
/*
|
||||
* this program makes the table to link function names
|
||||
* and type indices that is used by execute() in run.c.
|
||||
* it finds the indices in ytab.h, produced by yacc.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include "awk.h"
|
||||
#include "ytab.h"
|
||||
|
||||
struct xx
|
||||
{ int token;
|
||||
const char *name;
|
||||
const char *pname;
|
||||
} proc[] = {
|
||||
{ PROGRAM, "program", NULL },
|
||||
{ BOR, "boolop", " || " },
|
||||
{ AND, "boolop", " && " },
|
||||
{ NOT, "boolop", " !" },
|
||||
{ NE, "relop", " != " },
|
||||
{ EQ, "relop", " == " },
|
||||
{ LE, "relop", " <= " },
|
||||
{ LT, "relop", " < " },
|
||||
{ GE, "relop", " >= " },
|
||||
{ GT, "relop", " > " },
|
||||
{ ARRAY, "array", NULL },
|
||||
{ INDIRECT, "indirect", "$(" },
|
||||
{ SUBSTR, "substr", "substr" },
|
||||
{ SUB, "sub", "sub" },
|
||||
{ GSUB, "gsub", "gsub" },
|
||||
{ INDEX, "sindex", "sindex" },
|
||||
{ SPRINTF, "awksprintf", "sprintf " },
|
||||
{ ADD, "arith", " + " },
|
||||
{ MINUS, "arith", " - " },
|
||||
{ MULT, "arith", " * " },
|
||||
{ DIVIDE, "arith", " / " },
|
||||
{ MOD, "arith", " % " },
|
||||
{ UMINUS, "arith", " -" },
|
||||
{ POWER, "arith", " **" },
|
||||
{ PREINCR, "incrdecr", "++" },
|
||||
{ POSTINCR, "incrdecr", "++" },
|
||||
{ PREDECR, "incrdecr", "--" },
|
||||
{ POSTDECR, "incrdecr", "--" },
|
||||
{ CAT, "cat", " " },
|
||||
{ PASTAT, "pastat", NULL },
|
||||
{ PASTAT2, "dopa2", NULL },
|
||||
{ MATCH, "matchop", " ~ " },
|
||||
{ NOTMATCH, "matchop", " !~ " },
|
||||
{ MATCHFCN, "matchop", "matchop" },
|
||||
{ INTEST, "intest", "intest" },
|
||||
{ PRINTF, "awkprintf", "printf" },
|
||||
{ PRINT, "printstat", "print" },
|
||||
{ CLOSE, "closefile", "closefile" },
|
||||
{ DELETE, "awkdelete", "awkdelete" },
|
||||
{ SPLIT, "split", "split" },
|
||||
{ ASSIGN, "assign", " = " },
|
||||
{ ADDEQ, "assign", " += " },
|
||||
{ SUBEQ, "assign", " -= " },
|
||||
{ MULTEQ, "assign", " *= " },
|
||||
{ DIVEQ, "assign", " /= " },
|
||||
{ MODEQ, "assign", " %= " },
|
||||
{ POWEQ, "assign", " ^= " },
|
||||
{ CONDEXPR, "condexpr", " ?: " },
|
||||
{ IF, "ifstat", "if(" },
|
||||
{ WHILE, "whilestat", "while(" },
|
||||
{ FOR, "forstat", "for(" },
|
||||
{ DO, "dostat", "do" },
|
||||
{ IN, "instat", "instat" },
|
||||
{ NEXT, "jump", "next" },
|
||||
{ NEXTFILE, "jump", "nextfile" },
|
||||
{ EXIT, "jump", "exit" },
|
||||
{ BREAK, "jump", "break" },
|
||||
{ CONTINUE, "jump", "continue" },
|
||||
{ RETURN, "jump", "ret" },
|
||||
{ BLTIN, "bltin", "bltin" },
|
||||
{ CALL, "call", "call" },
|
||||
{ ARG, "arg", "arg" },
|
||||
{ VARNF, "getnf", "NF" },
|
||||
{ GETLINE, "awkgetline", "getline" },
|
||||
{ 0, "", "" },
|
||||
};
|
||||
|
||||
#define SIZE (LASTTOKEN - FIRSTTOKEN + 1)
|
||||
const char *table[SIZE];
|
||||
char *names[SIZE];
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
const struct xx *p;
|
||||
int i, n, tok;
|
||||
char c;
|
||||
FILE *fp;
|
||||
char buf[200], name[200], def[200];
|
||||
|
||||
printf("#include <stdio.h>\n");
|
||||
printf("#include \"awk.h\"\n");
|
||||
printf("#include \"ytab.h\"\n\n");
|
||||
for (i = SIZE; --i >= 0; )
|
||||
names[i] = "";
|
||||
|
||||
if ((fp = fopen("ytab.h", "r")) == NULL) {
|
||||
fprintf(stderr, "maketab can't open ytab.h!\n");
|
||||
exit(1);
|
||||
}
|
||||
printf("static char *printname[%d] = {\n", SIZE);
|
||||
i = 0;
|
||||
while (fgets(buf, sizeof buf, fp) != NULL) {
|
||||
n = sscanf(buf, "%1c %s %s %d", &c, def, name, &tok);
|
||||
if (c != '#' || (n != 4 && strcmp(def,"define") != 0)) /* not a valid #define */
|
||||
continue;
|
||||
if (tok < FIRSTTOKEN || tok > LASTTOKEN) {
|
||||
/* fprintf(stderr, "maketab funny token %d %s ignored\n", tok, buf); */
|
||||
continue;
|
||||
}
|
||||
names[tok-FIRSTTOKEN] = (char *) malloc(strlen(name)+1);
|
||||
strcpy(names[tok-FIRSTTOKEN], name);
|
||||
printf("\t(char *) \"%s\",\t/* %d */\n", name, tok);
|
||||
i++;
|
||||
}
|
||||
printf("};\n\n");
|
||||
|
||||
for (p=proc; p->token!=0; p++)
|
||||
table[p->token-FIRSTTOKEN] = p->name;
|
||||
printf("\nCell *(*proctab[%d])(Node **, int) = {\n", SIZE);
|
||||
for (i=0; i<SIZE; i++)
|
||||
if (table[i]==0)
|
||||
printf("\tnullproc,\t/* %s */\n", names[i]);
|
||||
else
|
||||
printf("\t%s,\t/* %s */\n", table[i], names[i]);
|
||||
printf("};\n\n");
|
||||
|
||||
printf("char *tokname(int n)\n"); /* print a tokname() function */
|
||||
printf("{\n");
|
||||
printf(" static char buf[100];\n\n");
|
||||
printf(" if (n < FIRSTTOKEN || n > LASTTOKEN) {\n");
|
||||
printf(" sprintf(buf, \"token %%d\", n);\n");
|
||||
printf(" return buf;\n");
|
||||
printf(" }\n");
|
||||
printf(" return printname[n-FIRSTTOKEN];\n");
|
||||
printf("}\n");
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,276 @@
|
|||
/****************************************************************
|
||||
Copyright (C) Lucent Technologies 1997
|
||||
All Rights Reserved
|
||||
|
||||
Permission to use, copy, modify, and distribute this software and
|
||||
its documentation for any purpose and without fee is hereby
|
||||
granted, provided that the above copyright notice appear in all
|
||||
copies and that both that the copyright notice and this
|
||||
permission notice and warranty disclaimer appear in supporting
|
||||
documentation, and that the name Lucent Technologies or any of
|
||||
its entities not be used in advertising or publicity pertaining
|
||||
to distribution of the software without specific, written prior
|
||||
permission.
|
||||
|
||||
LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
|
||||
INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
|
||||
IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
|
||||
SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
|
||||
IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
|
||||
ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
|
||||
THIS SOFTWARE.
|
||||
****************************************************************/
|
||||
|
||||
#define DEBUG
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include "awk.h"
|
||||
#include "ytab.h"
|
||||
|
||||
Node *nodealloc(int n)
|
||||
{
|
||||
Node *x;
|
||||
|
||||
x = (Node *) malloc(sizeof(Node) + (n-1)*sizeof(Node *));
|
||||
if (x == NULL)
|
||||
FATAL("out of space in nodealloc");
|
||||
x->nnext = NULL;
|
||||
x->lineno = lineno;
|
||||
return(x);
|
||||
}
|
||||
|
||||
Node *exptostat(Node *a)
|
||||
{
|
||||
a->ntype = NSTAT;
|
||||
return(a);
|
||||
}
|
||||
|
||||
Node *node1(int a, Node *b)
|
||||
{
|
||||
Node *x;
|
||||
|
||||
x = nodealloc(1);
|
||||
x->nobj = a;
|
||||
x->narg[0]=b;
|
||||
return(x);
|
||||
}
|
||||
|
||||
Node *node2(int a, Node *b, Node *c)
|
||||
{
|
||||
Node *x;
|
||||
|
||||
x = nodealloc(2);
|
||||
x->nobj = a;
|
||||
x->narg[0] = b;
|
||||
x->narg[1] = c;
|
||||
return(x);
|
||||
}
|
||||
|
||||
Node *node3(int a, Node *b, Node *c, Node *d)
|
||||
{
|
||||
Node *x;
|
||||
|
||||
x = nodealloc(3);
|
||||
x->nobj = a;
|
||||
x->narg[0] = b;
|
||||
x->narg[1] = c;
|
||||
x->narg[2] = d;
|
||||
return(x);
|
||||
}
|
||||
|
||||
Node *node4(int a, Node *b, Node *c, Node *d, Node *e)
|
||||
{
|
||||
Node *x;
|
||||
|
||||
x = nodealloc(4);
|
||||
x->nobj = a;
|
||||
x->narg[0] = b;
|
||||
x->narg[1] = c;
|
||||
x->narg[2] = d;
|
||||
x->narg[3] = e;
|
||||
return(x);
|
||||
}
|
||||
|
||||
Node *stat1(int a, Node *b)
|
||||
{
|
||||
Node *x;
|
||||
|
||||
x = node1(a,b);
|
||||
x->ntype = NSTAT;
|
||||
return(x);
|
||||
}
|
||||
|
||||
Node *stat2(int a, Node *b, Node *c)
|
||||
{
|
||||
Node *x;
|
||||
|
||||
x = node2(a,b,c);
|
||||
x->ntype = NSTAT;
|
||||
return(x);
|
||||
}
|
||||
|
||||
Node *stat3(int a, Node *b, Node *c, Node *d)
|
||||
{
|
||||
Node *x;
|
||||
|
||||
x = node3(a,b,c,d);
|
||||
x->ntype = NSTAT;
|
||||
return(x);
|
||||
}
|
||||
|
||||
Node *stat4(int a, Node *b, Node *c, Node *d, Node *e)
|
||||
{
|
||||
Node *x;
|
||||
|
||||
x = node4(a,b,c,d,e);
|
||||
x->ntype = NSTAT;
|
||||
return(x);
|
||||
}
|
||||
|
||||
Node *op1(int a, Node *b)
|
||||
{
|
||||
Node *x;
|
||||
|
||||
x = node1(a,b);
|
||||
x->ntype = NEXPR;
|
||||
return(x);
|
||||
}
|
||||
|
||||
Node *op2(int a, Node *b, Node *c)
|
||||
{
|
||||
Node *x;
|
||||
|
||||
x = node2(a,b,c);
|
||||
x->ntype = NEXPR;
|
||||
return(x);
|
||||
}
|
||||
|
||||
Node *op3(int a, Node *b, Node *c, Node *d)
|
||||
{
|
||||
Node *x;
|
||||
|
||||
x = node3(a,b,c,d);
|
||||
x->ntype = NEXPR;
|
||||
return(x);
|
||||
}
|
||||
|
||||
Node *op4(int a, Node *b, Node *c, Node *d, Node *e)
|
||||
{
|
||||
Node *x;
|
||||
|
||||
x = node4(a,b,c,d,e);
|
||||
x->ntype = NEXPR;
|
||||
return(x);
|
||||
}
|
||||
|
||||
Node *celltonode(Cell *a, int b)
|
||||
{
|
||||
Node *x;
|
||||
|
||||
a->ctype = OCELL;
|
||||
a->csub = b;
|
||||
x = node1(0, (Node *) a);
|
||||
x->ntype = NVALUE;
|
||||
return(x);
|
||||
}
|
||||
|
||||
Node *rectonode(void) /* make $0 into a Node */
|
||||
{
|
||||
extern Cell *literal0;
|
||||
return op1(INDIRECT, celltonode(literal0, CUNK));
|
||||
}
|
||||
|
||||
Node *makearr(Node *p)
|
||||
{
|
||||
Cell *cp;
|
||||
|
||||
if (isvalue(p)) {
|
||||
cp = (Cell *) (p->narg[0]);
|
||||
if (isfcn(cp))
|
||||
SYNTAX( "%s is a function, not an array", cp->nval );
|
||||
else if (!isarr(cp)) {
|
||||
xfree(cp->sval);
|
||||
cp->sval = (char *) makesymtab(NSYMTAB);
|
||||
cp->tval = ARR;
|
||||
}
|
||||
}
|
||||
return p;
|
||||
}
|
||||
|
||||
#define PA2NUM 50 /* max number of pat,pat patterns allowed */
|
||||
int paircnt; /* number of them in use */
|
||||
int pairstack[PA2NUM]; /* state of each pat,pat */
|
||||
|
||||
Node *pa2stat(Node *a, Node *b, Node *c) /* pat, pat {...} */
|
||||
{
|
||||
Node *x;
|
||||
|
||||
x = node4(PASTAT2, a, b, c, itonp(paircnt));
|
||||
if (paircnt++ >= PA2NUM)
|
||||
SYNTAX( "limited to %d pat,pat statements", PA2NUM );
|
||||
x->ntype = NSTAT;
|
||||
return(x);
|
||||
}
|
||||
|
||||
Node *linkum(Node *a, Node *b)
|
||||
{
|
||||
Node *c;
|
||||
|
||||
if (errorflag) /* don't link things that are wrong */
|
||||
return a;
|
||||
if (a == NULL)
|
||||
return(b);
|
||||
else if (b == NULL)
|
||||
return(a);
|
||||
for (c = a; c->nnext != NULL; c = c->nnext)
|
||||
;
|
||||
c->nnext = b;
|
||||
return(a);
|
||||
}
|
||||
|
||||
void defn(Cell *v, Node *vl, Node *st) /* turn on FCN bit in definition, */
|
||||
{ /* body of function, arglist */
|
||||
Node *p;
|
||||
int n;
|
||||
|
||||
if (isarr(v)) {
|
||||
SYNTAX( "`%s' is an array name and a function name", v->nval );
|
||||
return;
|
||||
}
|
||||
if (isarg(v->nval) != -1) {
|
||||
SYNTAX( "`%s' is both function name and argument name", v->nval );
|
||||
return;
|
||||
}
|
||||
|
||||
v->tval = FCN;
|
||||
v->sval = (char *) st;
|
||||
n = 0; /* count arguments */
|
||||
for (p = vl; p; p = p->nnext)
|
||||
n++;
|
||||
v->fval = n;
|
||||
dprintf( ("defining func %s (%d args)\n", v->nval, n) );
|
||||
}
|
||||
|
||||
int isarg(const char *s) /* is s in argument list for current function? */
|
||||
{ /* return -1 if not, otherwise arg # */
|
||||
extern Node *arglist;
|
||||
Node *p = arglist;
|
||||
int n;
|
||||
|
||||
for (n = 0; p != 0; p = p->nnext, n++)
|
||||
if (strcmp(((Cell *)(p->narg[0]))->nval, s) == 0)
|
||||
return n;
|
||||
return -1;
|
||||
}
|
||||
|
||||
int ptoi(void *p) /* convert pointer to integer */
|
||||
{
|
||||
return (int) (long) p; /* swearing that p fits, of course */
|
||||
}
|
||||
|
||||
Node *itonp(int i) /* and vice versa */
|
||||
{
|
||||
return (Node *) (long) i;
|
||||
}
|
|
@ -0,0 +1,207 @@
|
|||
#include <stdio.h>
|
||||
#include "awk.h"
|
||||
#include "ytab.h"
|
||||
|
||||
static char *printname[93] = {
|
||||
(char *) "FIRSTTOKEN", /* 258 */
|
||||
(char *) "PROGRAM", /* 259 */
|
||||
(char *) "PASTAT", /* 260 */
|
||||
(char *) "PASTAT2", /* 261 */
|
||||
(char *) "XBEGIN", /* 262 */
|
||||
(char *) "XEND", /* 263 */
|
||||
(char *) "NL", /* 264 */
|
||||
(char *) "ARRAY", /* 265 */
|
||||
(char *) "MATCH", /* 266 */
|
||||
(char *) "NOTMATCH", /* 267 */
|
||||
(char *) "MATCHOP", /* 268 */
|
||||
(char *) "FINAL", /* 269 */
|
||||
(char *) "DOT", /* 270 */
|
||||
(char *) "ALL", /* 271 */
|
||||
(char *) "CCL", /* 272 */
|
||||
(char *) "NCCL", /* 273 */
|
||||
(char *) "CHAR", /* 274 */
|
||||
(char *) "OR", /* 275 */
|
||||
(char *) "STAR", /* 276 */
|
||||
(char *) "QUEST", /* 277 */
|
||||
(char *) "PLUS", /* 278 */
|
||||
(char *) "EMPTYRE", /* 279 */
|
||||
(char *) "AND", /* 280 */
|
||||
(char *) "BOR", /* 281 */
|
||||
(char *) "APPEND", /* 282 */
|
||||
(char *) "EQ", /* 283 */
|
||||
(char *) "GE", /* 284 */
|
||||
(char *) "GT", /* 285 */
|
||||
(char *) "LE", /* 286 */
|
||||
(char *) "LT", /* 287 */
|
||||
(char *) "NE", /* 288 */
|
||||
(char *) "IN", /* 289 */
|
||||
(char *) "ARG", /* 290 */
|
||||
(char *) "BLTIN", /* 291 */
|
||||
(char *) "BREAK", /* 292 */
|
||||
(char *) "CLOSE", /* 293 */
|
||||
(char *) "CONTINUE", /* 294 */
|
||||
(char *) "DELETE", /* 295 */
|
||||
(char *) "DO", /* 296 */
|
||||
(char *) "EXIT", /* 297 */
|
||||
(char *) "FOR", /* 298 */
|
||||
(char *) "FUNC", /* 299 */
|
||||
(char *) "SUB", /* 300 */
|
||||
(char *) "GSUB", /* 301 */
|
||||
(char *) "IF", /* 302 */
|
||||
(char *) "INDEX", /* 303 */
|
||||
(char *) "LSUBSTR", /* 304 */
|
||||
(char *) "MATCHFCN", /* 305 */
|
||||
(char *) "NEXT", /* 306 */
|
||||
(char *) "NEXTFILE", /* 307 */
|
||||
(char *) "ADD", /* 308 */
|
||||
(char *) "MINUS", /* 309 */
|
||||
(char *) "MULT", /* 310 */
|
||||
(char *) "DIVIDE", /* 311 */
|
||||
(char *) "MOD", /* 312 */
|
||||
(char *) "ASSIGN", /* 313 */
|
||||
(char *) "ASGNOP", /* 314 */
|
||||
(char *) "ADDEQ", /* 315 */
|
||||
(char *) "SUBEQ", /* 316 */
|
||||
(char *) "MULTEQ", /* 317 */
|
||||
(char *) "DIVEQ", /* 318 */
|
||||
(char *) "MODEQ", /* 319 */
|
||||
(char *) "POWEQ", /* 320 */
|
||||
(char *) "PRINT", /* 321 */
|
||||
(char *) "PRINTF", /* 322 */
|
||||
(char *) "SPRINTF", /* 323 */
|
||||
(char *) "ELSE", /* 324 */
|
||||
(char *) "INTEST", /* 325 */
|
||||
(char *) "CONDEXPR", /* 326 */
|
||||
(char *) "POSTINCR", /* 327 */
|
||||
(char *) "PREINCR", /* 328 */
|
||||
(char *) "POSTDECR", /* 329 */
|
||||
(char *) "PREDECR", /* 330 */
|
||||
(char *) "VAR", /* 331 */
|
||||
(char *) "IVAR", /* 332 */
|
||||
(char *) "VARNF", /* 333 */
|
||||
(char *) "CALL", /* 334 */
|
||||
(char *) "NUMBER", /* 335 */
|
||||
(char *) "STRING", /* 336 */
|
||||
(char *) "REGEXPR", /* 337 */
|
||||
(char *) "GETLINE", /* 338 */
|
||||
(char *) "SUBSTR", /* 339 */
|
||||
(char *) "SPLIT", /* 340 */
|
||||
(char *) "RETURN", /* 341 */
|
||||
(char *) "WHILE", /* 342 */
|
||||
(char *) "CAT", /* 343 */
|
||||
(char *) "UMINUS", /* 344 */
|
||||
(char *) "NOT", /* 345 */
|
||||
(char *) "POWER", /* 346 */
|
||||
(char *) "INCR", /* 347 */
|
||||
(char *) "DECR", /* 348 */
|
||||
(char *) "INDIRECT", /* 349 */
|
||||
(char *) "LASTTOKEN", /* 350 */
|
||||
};
|
||||
|
||||
|
||||
Cell *(*proctab[93])(Node **, int) = {
|
||||
nullproc, /* FIRSTTOKEN */
|
||||
program, /* PROGRAM */
|
||||
pastat, /* PASTAT */
|
||||
dopa2, /* PASTAT2 */
|
||||
nullproc, /* XBEGIN */
|
||||
nullproc, /* XEND */
|
||||
nullproc, /* NL */
|
||||
array, /* ARRAY */
|
||||
matchop, /* MATCH */
|
||||
matchop, /* NOTMATCH */
|
||||
nullproc, /* MATCHOP */
|
||||
nullproc, /* FINAL */
|
||||
nullproc, /* DOT */
|
||||
nullproc, /* ALL */
|
||||
nullproc, /* CCL */
|
||||
nullproc, /* NCCL */
|
||||
nullproc, /* CHAR */
|
||||
nullproc, /* OR */
|
||||
nullproc, /* STAR */
|
||||
nullproc, /* QUEST */
|
||||
nullproc, /* PLUS */
|
||||
nullproc, /* EMPTYRE */
|
||||
boolop, /* AND */
|
||||
boolop, /* BOR */
|
||||
nullproc, /* APPEND */
|
||||
relop, /* EQ */
|
||||
relop, /* GE */
|
||||
relop, /* GT */
|
||||
relop, /* LE */
|
||||
relop, /* LT */
|
||||
relop, /* NE */
|
||||
instat, /* IN */
|
||||
arg, /* ARG */
|
||||
bltin, /* BLTIN */
|
||||
jump, /* BREAK */
|
||||
closefile, /* CLOSE */
|
||||
jump, /* CONTINUE */
|
||||
awkdelete, /* DELETE */
|
||||
dostat, /* DO */
|
||||
jump, /* EXIT */
|
||||
forstat, /* FOR */
|
||||
nullproc, /* FUNC */
|
||||
sub, /* SUB */
|
||||
gsub, /* GSUB */
|
||||
ifstat, /* IF */
|
||||
sindex, /* INDEX */
|
||||
nullproc, /* LSUBSTR */
|
||||
matchop, /* MATCHFCN */
|
||||
jump, /* NEXT */
|
||||
jump, /* NEXTFILE */
|
||||
arith, /* ADD */
|
||||
arith, /* MINUS */
|
||||
arith, /* MULT */
|
||||
arith, /* DIVIDE */
|
||||
arith, /* MOD */
|
||||
assign, /* ASSIGN */
|
||||
nullproc, /* ASGNOP */
|
||||
assign, /* ADDEQ */
|
||||
assign, /* SUBEQ */
|
||||
assign, /* MULTEQ */
|
||||
assign, /* DIVEQ */
|
||||
assign, /* MODEQ */
|
||||
assign, /* POWEQ */
|
||||
printstat, /* PRINT */
|
||||
awkprintf, /* PRINTF */
|
||||
awksprintf, /* SPRINTF */
|
||||
nullproc, /* ELSE */
|
||||
intest, /* INTEST */
|
||||
condexpr, /* CONDEXPR */
|
||||
incrdecr, /* POSTINCR */
|
||||
incrdecr, /* PREINCR */
|
||||
incrdecr, /* POSTDECR */
|
||||
incrdecr, /* PREDECR */
|
||||
nullproc, /* VAR */
|
||||
nullproc, /* IVAR */
|
||||
getnf, /* VARNF */
|
||||
call, /* CALL */
|
||||
nullproc, /* NUMBER */
|
||||
nullproc, /* STRING */
|
||||
nullproc, /* REGEXPR */
|
||||
awkgetline, /* GETLINE */
|
||||
substr, /* SUBSTR */
|
||||
split, /* SPLIT */
|
||||
jump, /* RETURN */
|
||||
whilestat, /* WHILE */
|
||||
cat, /* CAT */
|
||||
arith, /* UMINUS */
|
||||
boolop, /* NOT */
|
||||
arith, /* POWER */
|
||||
nullproc, /* INCR */
|
||||
nullproc, /* DECR */
|
||||
indirect, /* INDIRECT */
|
||||
nullproc, /* LASTTOKEN */
|
||||
};
|
||||
|
||||
char *tokname(int n)
|
||||
{
|
||||
static char buf[100];
|
||||
|
||||
if (n < FIRSTTOKEN || n > LASTTOKEN) {
|
||||
sprintf(buf, "token %d", n);
|
||||
return buf;
|
||||
}
|
||||
return printname[n-FIRSTTOKEN];
|
||||
}
|
|
@ -0,0 +1,195 @@
|
|||
/****************************************************************
|
||||
Copyright (C) Lucent Technologies 1997
|
||||
All Rights Reserved
|
||||
|
||||
Permission to use, copy, modify, and distribute this software and
|
||||
its documentation for any purpose and without fee is hereby
|
||||
granted, provided that the above copyright notice appear in all
|
||||
copies and that both that the copyright notice and this
|
||||
permission notice and warranty disclaimer appear in supporting
|
||||
documentation, and that the name Lucent Technologies or any of
|
||||
its entities not be used in advertising or publicity pertaining
|
||||
to distribution of the software without specific, written prior
|
||||
permission.
|
||||
|
||||
LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
|
||||
INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
|
||||
IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
|
||||
SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
|
||||
IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
|
||||
ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
|
||||
THIS SOFTWARE.
|
||||
****************************************************************/
|
||||
|
||||
extern int yywrap(void);
|
||||
extern void setfname(Cell *);
|
||||
extern int constnode(Node *);
|
||||
extern char *strnode(Node *);
|
||||
extern Node *notnull(Node *);
|
||||
extern int yyparse(void);
|
||||
|
||||
extern int yylex(void);
|
||||
extern void startreg(void);
|
||||
extern int input(void);
|
||||
extern void unput(int);
|
||||
extern void unputstr(const char *);
|
||||
extern int yylook(void);
|
||||
extern int yyback(int *, int);
|
||||
extern int yyinput(void);
|
||||
|
||||
extern fa *makedfa(const char *, int);
|
||||
extern fa *mkdfa(const char *, int);
|
||||
extern int makeinit(fa *, int);
|
||||
extern void penter(Node *);
|
||||
extern void freetr(Node *);
|
||||
extern int hexstr(char **);
|
||||
extern int quoted(char **);
|
||||
extern char *cclenter(const char *);
|
||||
extern void overflo(const char *);
|
||||
extern void cfoll(fa *, Node *);
|
||||
extern int first(Node *);
|
||||
extern void follow(Node *);
|
||||
extern int member(int, const char *);
|
||||
extern int match(fa *, const char *);
|
||||
extern int pmatch(fa *, const char *);
|
||||
extern int nematch(fa *, const char *);
|
||||
extern Node *reparse(const char *);
|
||||
extern Node *regexp(void);
|
||||
extern Node *primary(void);
|
||||
extern Node *concat(Node *);
|
||||
extern Node *alt(Node *);
|
||||
extern Node *unary(Node *);
|
||||
extern int relex(void);
|
||||
extern int cgoto(fa *, int, int);
|
||||
extern void freefa(fa *);
|
||||
|
||||
extern int pgetc(void);
|
||||
extern char *cursource(void);
|
||||
|
||||
extern Node *nodealloc(int);
|
||||
extern Node *exptostat(Node *);
|
||||
extern Node *node1(int, Node *);
|
||||
extern Node *node2(int, Node *, Node *);
|
||||
extern Node *node3(int, Node *, Node *, Node *);
|
||||
extern Node *node4(int, Node *, Node *, Node *, Node *);
|
||||
extern Node *stat3(int, Node *, Node *, Node *);
|
||||
extern Node *op2(int, Node *, Node *);
|
||||
extern Node *op1(int, Node *);
|
||||
extern Node *stat1(int, Node *);
|
||||
extern Node *op3(int, Node *, Node *, Node *);
|
||||
extern Node *op4(int, Node *, Node *, Node *, Node *);
|
||||
extern Node *stat2(int, Node *, Node *);
|
||||
extern Node *stat4(int, Node *, Node *, Node *, Node *);
|
||||
extern Node *celltonode(Cell *, int);
|
||||
extern Node *rectonode(void);
|
||||
extern Node *makearr(Node *);
|
||||
extern Node *pa2stat(Node *, Node *, Node *);
|
||||
extern Node *linkum(Node *, Node *);
|
||||
extern void defn(Cell *, Node *, Node *);
|
||||
extern int isarg(const char *);
|
||||
extern char *tokname(int);
|
||||
extern Cell *(*proctab[])(Node **, int);
|
||||
extern int ptoi(void *);
|
||||
extern Node *itonp(int);
|
||||
|
||||
extern void syminit(void);
|
||||
extern void arginit(int, char **);
|
||||
extern void envinit(char **);
|
||||
extern Array *makesymtab(int);
|
||||
extern void freesymtab(Cell *);
|
||||
extern void freeelem(Cell *, const char *);
|
||||
extern Cell *setsymtab(const char *, const char *, double, unsigned int, Array *);
|
||||
extern int hash(const char *, int);
|
||||
extern void rehash(Array *);
|
||||
extern Cell *lookup(const char *, Array *);
|
||||
extern double setfval(Cell *, double);
|
||||
extern void funnyvar(Cell *, const char *);
|
||||
extern char *setsval(Cell *, const char *);
|
||||
extern double getfval(Cell *);
|
||||
extern char *getsval(Cell *);
|
||||
extern char *getpssval(Cell *); /* for print */
|
||||
extern char *tostring(const char *);
|
||||
extern char *qstring(const char *, int);
|
||||
|
||||
extern void recinit(unsigned int);
|
||||
extern void initgetrec(void);
|
||||
extern void makefields(int, int);
|
||||
extern void growfldtab(int n);
|
||||
extern int getrec(char **, int *, int);
|
||||
extern void nextfile(void);
|
||||
extern int readrec(char **buf, int *bufsize, FILE *inf);
|
||||
extern char *getargv(int);
|
||||
extern void setclvar(char *);
|
||||
extern void fldbld(void);
|
||||
extern void cleanfld(int, int);
|
||||
extern void newfld(int);
|
||||
extern int refldbld(const char *, const char *);
|
||||
extern void recbld(void);
|
||||
extern Cell *fieldadr(int);
|
||||
extern void yyerror(const char *);
|
||||
extern void fpecatch(int);
|
||||
extern void bracecheck(void);
|
||||
extern void bcheck2(int, int, int);
|
||||
extern void SYNTAX(const char *, ...);
|
||||
extern void FATAL(const char *, ...);
|
||||
extern void WARNING(const char *, ...);
|
||||
extern void error(void);
|
||||
extern void eprint(void);
|
||||
extern void bclass(int);
|
||||
extern double errcheck(double, const char *);
|
||||
extern int isclvar(const char *);
|
||||
extern int is_number(const char *);
|
||||
|
||||
extern int adjbuf(char **pb, int *sz, int min, int q, char **pbp, const char *what);
|
||||
extern void run(Node *);
|
||||
extern Cell *execute(Node *);
|
||||
extern Cell *program(Node **, int);
|
||||
extern Cell *call(Node **, int);
|
||||
extern Cell *copycell(Cell *);
|
||||
extern Cell *arg(Node **, int);
|
||||
extern Cell *jump(Node **, int);
|
||||
extern Cell *awkgetline(Node **, int);
|
||||
extern Cell *getnf(Node **, int);
|
||||
extern Cell *array(Node **, int);
|
||||
extern Cell *awkdelete(Node **, int);
|
||||
extern Cell *intest(Node **, int);
|
||||
extern Cell *matchop(Node **, int);
|
||||
extern Cell *boolop(Node **, int);
|
||||
extern Cell *relop(Node **, int);
|
||||
extern void tfree(Cell *);
|
||||
extern Cell *gettemp(void);
|
||||
extern Cell *field(Node **, int);
|
||||
extern Cell *indirect(Node **, int);
|
||||
extern Cell *substr(Node **, int);
|
||||
extern Cell *sindex(Node **, int);
|
||||
extern int format(char **, int *, const char *, Node *);
|
||||
extern Cell *awksprintf(Node **, int);
|
||||
extern Cell *awkprintf(Node **, int);
|
||||
extern Cell *arith(Node **, int);
|
||||
extern double ipow(double, int);
|
||||
extern Cell *incrdecr(Node **, int);
|
||||
extern Cell *assign(Node **, int);
|
||||
extern Cell *cat(Node **, int);
|
||||
extern Cell *pastat(Node **, int);
|
||||
extern Cell *dopa2(Node **, int);
|
||||
extern Cell *split(Node **, int);
|
||||
extern Cell *condexpr(Node **, int);
|
||||
extern Cell *ifstat(Node **, int);
|
||||
extern Cell *whilestat(Node **, int);
|
||||
extern Cell *dostat(Node **, int);
|
||||
extern Cell *forstat(Node **, int);
|
||||
extern Cell *instat(Node **, int);
|
||||
extern Cell *bltin(Node **, int);
|
||||
extern Cell *printstat(Node **, int);
|
||||
extern Cell *nullproc(Node **, int);
|
||||
extern FILE *redirect(int, Node *);
|
||||
extern FILE *openfile(int, const char *);
|
||||
extern const char *filename(FILE *);
|
||||
extern Cell *closefile(Node **, int);
|
||||
extern void closeall(void);
|
||||
extern Cell *sub(Node **, int);
|
||||
extern Cell *gsub(Node **, int);
|
||||
|
||||
extern FILE *popen(const char *, const char *);
|
||||
extern int pclose(FILE *);
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,455 @@
|
|||
/****************************************************************
|
||||
Copyright (C) Lucent Technologies 1997
|
||||
All Rights Reserved
|
||||
|
||||
Permission to use, copy, modify, and distribute this software and
|
||||
its documentation for any purpose and without fee is hereby
|
||||
granted, provided that the above copyright notice appear in all
|
||||
copies and that both that the copyright notice and this
|
||||
permission notice and warranty disclaimer appear in supporting
|
||||
documentation, and that the name Lucent Technologies or any of
|
||||
its entities not be used in advertising or publicity pertaining
|
||||
to distribution of the software without specific, written prior
|
||||
permission.
|
||||
|
||||
LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
|
||||
INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
|
||||
IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
|
||||
SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
|
||||
IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
|
||||
ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
|
||||
THIS SOFTWARE.
|
||||
****************************************************************/
|
||||
|
||||
#define DEBUG
|
||||
#include <stdio.h>
|
||||
#include <math.h>
|
||||
#include <ctype.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include "awk.h"
|
||||
#include "ytab.h"
|
||||
|
||||
#define FULLTAB 2 /* rehash when table gets this x full */
|
||||
#define GROWTAB 4 /* grow table by this factor */
|
||||
|
||||
Array *symtab; /* main symbol table */
|
||||
|
||||
char **FS; /* initial field sep */
|
||||
char **RS; /* initial record sep */
|
||||
char **OFS; /* output field sep */
|
||||
char **ORS; /* output record sep */
|
||||
char **OFMT; /* output format for numbers */
|
||||
char **CONVFMT; /* format for conversions in getsval */
|
||||
Awkfloat *NF; /* number of fields in current record */
|
||||
Awkfloat *NR; /* number of current record */
|
||||
Awkfloat *FNR; /* number of current record in current file */
|
||||
char **FILENAME; /* current filename argument */
|
||||
Awkfloat *ARGC; /* number of arguments from command line */
|
||||
char **SUBSEP; /* subscript separator for a[i,j,k]; default \034 */
|
||||
Awkfloat *RSTART; /* start of re matched with ~; origin 1 (!) */
|
||||
Awkfloat *RLENGTH; /* length of same */
|
||||
|
||||
Cell *fsloc; /* FS */
|
||||
Cell *nrloc; /* NR */
|
||||
Cell *nfloc; /* NF */
|
||||
Cell *fnrloc; /* FNR */
|
||||
Array *ARGVtab; /* symbol table containing ARGV[...] */
|
||||
Array *ENVtab; /* symbol table containing ENVIRON[...] */
|
||||
Cell *rstartloc; /* RSTART */
|
||||
Cell *rlengthloc; /* RLENGTH */
|
||||
Cell *symtabloc; /* SYMTAB */
|
||||
|
||||
Cell *nullloc; /* a guaranteed empty cell */
|
||||
Node *nullnode; /* zero&null, converted into a node for comparisons */
|
||||
Cell *literal0;
|
||||
|
||||
extern Cell **fldtab;
|
||||
|
||||
void syminit(void) /* initialize symbol table with builtin vars */
|
||||
{
|
||||
literal0 = setsymtab("0", "0", 0.0, NUM|STR|CON|DONTFREE, symtab);
|
||||
/* this is used for if(x)... tests: */
|
||||
nullloc = setsymtab("$zero&null", "", 0.0, NUM|STR|CON|DONTFREE, symtab);
|
||||
nullnode = celltonode(nullloc, CCON);
|
||||
|
||||
fsloc = setsymtab("FS", " ", 0.0, STR|DONTFREE, symtab);
|
||||
FS = &fsloc->sval;
|
||||
RS = &setsymtab("RS", "\n", 0.0, STR|DONTFREE, symtab)->sval;
|
||||
OFS = &setsymtab("OFS", " ", 0.0, STR|DONTFREE, symtab)->sval;
|
||||
ORS = &setsymtab("ORS", "\n", 0.0, STR|DONTFREE, symtab)->sval;
|
||||
OFMT = &setsymtab("OFMT", "%.6g", 0.0, STR|DONTFREE, symtab)->sval;
|
||||
CONVFMT = &setsymtab("CONVFMT", "%.6g", 0.0, STR|DONTFREE, symtab)->sval;
|
||||
FILENAME = &setsymtab("FILENAME", "", 0.0, STR|DONTFREE, symtab)->sval;
|
||||
nfloc = setsymtab("NF", "", 0.0, NUM, symtab);
|
||||
NF = &nfloc->fval;
|
||||
nrloc = setsymtab("NR", "", 0.0, NUM, symtab);
|
||||
NR = &nrloc->fval;
|
||||
fnrloc = setsymtab("FNR", "", 0.0, NUM, symtab);
|
||||
FNR = &fnrloc->fval;
|
||||
SUBSEP = &setsymtab("SUBSEP", "\034", 0.0, STR|DONTFREE, symtab)->sval;
|
||||
rstartloc = setsymtab("RSTART", "", 0.0, NUM, symtab);
|
||||
RSTART = &rstartloc->fval;
|
||||
rlengthloc = setsymtab("RLENGTH", "", 0.0, NUM, symtab);
|
||||
RLENGTH = &rlengthloc->fval;
|
||||
symtabloc = setsymtab("SYMTAB", "", 0.0, ARR, symtab);
|
||||
symtabloc->sval = (char *) symtab;
|
||||
}
|
||||
|
||||
void arginit(int ac, char **av) /* set up ARGV and ARGC */
|
||||
{
|
||||
Cell *cp;
|
||||
int i;
|
||||
char temp[50];
|
||||
|
||||
ARGC = &setsymtab("ARGC", "", (Awkfloat) ac, NUM, symtab)->fval;
|
||||
cp = setsymtab("ARGV", "", 0.0, ARR, symtab);
|
||||
ARGVtab = makesymtab(NSYMTAB); /* could be (int) ARGC as well */
|
||||
cp->sval = (char *) ARGVtab;
|
||||
for (i = 0; i < ac; i++) {
|
||||
sprintf(temp, "%d", i);
|
||||
if (is_number(*av))
|
||||
setsymtab(temp, *av, atof(*av), STR|NUM, ARGVtab);
|
||||
else
|
||||
setsymtab(temp, *av, 0.0, STR, ARGVtab);
|
||||
av++;
|
||||
}
|
||||
}
|
||||
|
||||
void envinit(char **envp) /* set up ENVIRON variable */
|
||||
{
|
||||
Cell *cp;
|
||||
char *p;
|
||||
|
||||
cp = setsymtab("ENVIRON", "", 0.0, ARR, symtab);
|
||||
ENVtab = makesymtab(NSYMTAB);
|
||||
cp->sval = (char *) ENVtab;
|
||||
for ( ; *envp; envp++) {
|
||||
if ((p = strchr(*envp, '=')) == NULL)
|
||||
continue;
|
||||
if( p == *envp ) /* no left hand side name in env string */
|
||||
continue;
|
||||
*p++ = 0; /* split into two strings at = */
|
||||
if (is_number(p))
|
||||
setsymtab(*envp, p, atof(p), STR|NUM, ENVtab);
|
||||
else
|
||||
setsymtab(*envp, p, 0.0, STR, ENVtab);
|
||||
p[-1] = '='; /* restore in case env is passed down to a shell */
|
||||
}
|
||||
}
|
||||
|
||||
Array *makesymtab(int n) /* make a new symbol table */
|
||||
{
|
||||
Array *ap;
|
||||
Cell **tp;
|
||||
|
||||
ap = (Array *) malloc(sizeof(Array));
|
||||
tp = (Cell **) calloc(n, sizeof(Cell *));
|
||||
if (ap == NULL || tp == NULL)
|
||||
FATAL("out of space in makesymtab");
|
||||
ap->nelem = 0;
|
||||
ap->size = n;
|
||||
ap->tab = tp;
|
||||
return(ap);
|
||||
}
|
||||
|
||||
void freesymtab(Cell *ap) /* free a symbol table */
|
||||
{
|
||||
Cell *cp, *temp;
|
||||
Array *tp;
|
||||
int i;
|
||||
|
||||
if (!isarr(ap))
|
||||
return;
|
||||
tp = (Array *) ap->sval;
|
||||
if (tp == NULL)
|
||||
return;
|
||||
for (i = 0; i < tp->size; i++) {
|
||||
for (cp = tp->tab[i]; cp != NULL; cp = temp) {
|
||||
xfree(cp->nval);
|
||||
if (freeable(cp))
|
||||
xfree(cp->sval);
|
||||
temp = cp->cnext; /* avoids freeing then using */
|
||||
free(cp);
|
||||
tp->nelem--;
|
||||
}
|
||||
tp->tab[i] = 0;
|
||||
}
|
||||
if (tp->nelem != 0)
|
||||
WARNING("can't happen: inconsistent element count freeing %s", ap->nval);
|
||||
free(tp->tab);
|
||||
free(tp);
|
||||
}
|
||||
|
||||
void freeelem(Cell *ap, const char *s) /* free elem s from ap (i.e., ap["s"] */
|
||||
{
|
||||
Array *tp;
|
||||
Cell *p, *prev = NULL;
|
||||
int h;
|
||||
|
||||
tp = (Array *) ap->sval;
|
||||
h = hash(s, tp->size);
|
||||
for (p = tp->tab[h]; p != NULL; prev = p, p = p->cnext)
|
||||
if (strcmp(s, p->nval) == 0) {
|
||||
if (prev == NULL) /* 1st one */
|
||||
tp->tab[h] = p->cnext;
|
||||
else /* middle somewhere */
|
||||
prev->cnext = p->cnext;
|
||||
if (freeable(p))
|
||||
xfree(p->sval);
|
||||
free(p->nval);
|
||||
free(p);
|
||||
tp->nelem--;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
Cell *setsymtab(const char *n, const char *s, Awkfloat f, unsigned t, Array *tp)
|
||||
{
|
||||
int h;
|
||||
Cell *p;
|
||||
|
||||
if (n != NULL && (p = lookup(n, tp)) != NULL) {
|
||||
dprintf( ("setsymtab found %p: n=%s s=\"%s\" f=%g t=%o\n",
|
||||
p, NN(p->nval), NN(p->sval), p->fval, p->tval) );
|
||||
return(p);
|
||||
}
|
||||
p = (Cell *) malloc(sizeof(Cell));
|
||||
if (p == NULL)
|
||||
FATAL("out of space for symbol table at %s", n);
|
||||
p->nval = tostring(n);
|
||||
p->sval = s ? tostring(s) : tostring("");
|
||||
p->fval = f;
|
||||
p->tval = t;
|
||||
p->csub = CUNK;
|
||||
p->ctype = OCELL;
|
||||
tp->nelem++;
|
||||
if (tp->nelem > FULLTAB * tp->size)
|
||||
rehash(tp);
|
||||
h = hash(n, tp->size);
|
||||
p->cnext = tp->tab[h];
|
||||
tp->tab[h] = p;
|
||||
dprintf( ("setsymtab set %p: n=%s s=\"%s\" f=%g t=%o\n",
|
||||
p, p->nval, p->sval, p->fval, p->tval) );
|
||||
return(p);
|
||||
}
|
||||
|
||||
int hash(const char *s, int n) /* form hash value for string s */
|
||||
{
|
||||
unsigned hashval;
|
||||
|
||||
for (hashval = 0; *s != '\0'; s++)
|
||||
hashval = (*s + 31 * hashval);
|
||||
return hashval % n;
|
||||
}
|
||||
|
||||
void rehash(Array *tp) /* rehash items in small table into big one */
|
||||
{
|
||||
int i, nh, nsz;
|
||||
Cell *cp, *op, **np;
|
||||
|
||||
nsz = GROWTAB * tp->size;
|
||||
np = (Cell **) calloc(nsz, sizeof(Cell *));
|
||||
if (np == NULL) /* can't do it, but can keep running. */
|
||||
return; /* someone else will run out later. */
|
||||
for (i = 0; i < tp->size; i++) {
|
||||
for (cp = tp->tab[i]; cp; cp = op) {
|
||||
op = cp->cnext;
|
||||
nh = hash(cp->nval, nsz);
|
||||
cp->cnext = np[nh];
|
||||
np[nh] = cp;
|
||||
}
|
||||
}
|
||||
free(tp->tab);
|
||||
tp->tab = np;
|
||||
tp->size = nsz;
|
||||
}
|
||||
|
||||
Cell *lookup(const char *s, Array *tp) /* look for s in tp */
|
||||
{
|
||||
Cell *p;
|
||||
int h;
|
||||
|
||||
h = hash(s, tp->size);
|
||||
for (p = tp->tab[h]; p != NULL; p = p->cnext)
|
||||
if (strcmp(s, p->nval) == 0)
|
||||
return(p); /* found it */
|
||||
return(NULL); /* not found */
|
||||
}
|
||||
|
||||
Awkfloat setfval(Cell *vp, Awkfloat f) /* set float val of a Cell */
|
||||
{
|
||||
int fldno;
|
||||
|
||||
if ((vp->tval & (NUM | STR)) == 0)
|
||||
funnyvar(vp, "assign to");
|
||||
if (isfld(vp)) {
|
||||
donerec = 0; /* mark $0 invalid */
|
||||
fldno = atoi(vp->nval);
|
||||
if (fldno > *NF)
|
||||
newfld(fldno);
|
||||
dprintf( ("setting field %d to %g\n", fldno, f) );
|
||||
} else if (isrec(vp)) {
|
||||
donefld = 0; /* mark $1... invalid */
|
||||
donerec = 1;
|
||||
}
|
||||
if (freeable(vp))
|
||||
xfree(vp->sval); /* free any previous string */
|
||||
vp->tval &= ~STR; /* mark string invalid */
|
||||
vp->tval |= NUM; /* mark number ok */
|
||||
dprintf( ("setfval %p: %s = %g, t=%o\n", vp, NN(vp->nval), f, vp->tval) );
|
||||
return vp->fval = f;
|
||||
}
|
||||
|
||||
void funnyvar(Cell *vp, const char *rw)
|
||||
{
|
||||
if (isarr(vp))
|
||||
FATAL("can't %s %s; it's an array name.", rw, vp->nval);
|
||||
if (vp->tval & FCN)
|
||||
FATAL("can't %s %s; it's a function.", rw, vp->nval);
|
||||
WARNING("funny variable %p: n=%s s=\"%s\" f=%g t=%o",
|
||||
vp, vp->nval, vp->sval, vp->fval, vp->tval);
|
||||
}
|
||||
|
||||
char *setsval(Cell *vp, const char *s) /* set string val of a Cell */
|
||||
{
|
||||
char *t;
|
||||
int fldno;
|
||||
|
||||
dprintf( ("starting setsval %p: %s = \"%s\", t=%o, r,f=%d,%d\n",
|
||||
vp, NN(vp->nval), s, vp->tval, donerec, donefld) );
|
||||
if ((vp->tval & (NUM | STR)) == 0)
|
||||
funnyvar(vp, "assign to");
|
||||
if (isfld(vp)) {
|
||||
donerec = 0; /* mark $0 invalid */
|
||||
fldno = atoi(vp->nval);
|
||||
if (fldno > *NF)
|
||||
newfld(fldno);
|
||||
dprintf( ("setting field %d to %s (%p)\n", fldno, s, s) );
|
||||
} else if (isrec(vp)) {
|
||||
donefld = 0; /* mark $1... invalid */
|
||||
donerec = 1;
|
||||
}
|
||||
t = tostring(s); /* in case it's self-assign */
|
||||
if (freeable(vp))
|
||||
xfree(vp->sval);
|
||||
vp->tval &= ~NUM;
|
||||
vp->tval |= STR;
|
||||
vp->tval &= ~DONTFREE;
|
||||
dprintf( ("setsval %p: %s = \"%s (%p) \", t=%o r,f=%d,%d\n",
|
||||
vp, NN(vp->nval), t,t, vp->tval, donerec, donefld) );
|
||||
return(vp->sval = t);
|
||||
}
|
||||
|
||||
Awkfloat getfval(Cell *vp) /* get float val of a Cell */
|
||||
{
|
||||
if ((vp->tval & (NUM | STR)) == 0)
|
||||
funnyvar(vp, "read value of");
|
||||
if (isfld(vp) && donefld == 0)
|
||||
fldbld();
|
||||
else if (isrec(vp) && donerec == 0)
|
||||
recbld();
|
||||
if (!isnum(vp)) { /* not a number */
|
||||
vp->fval = atof(vp->sval); /* best guess */
|
||||
if (is_number(vp->sval) && !(vp->tval&CON))
|
||||
vp->tval |= NUM; /* make NUM only sparingly */
|
||||
}
|
||||
dprintf( ("getfval %p: %s = %g, t=%o\n", vp, NN(vp->nval), vp->fval, vp->tval) );
|
||||
return(vp->fval);
|
||||
}
|
||||
|
||||
static char *get_str_val(Cell *vp, char **fmt) /* get string val of a Cell */
|
||||
{
|
||||
char s[100]; /* BUG: unchecked */
|
||||
double dtemp;
|
||||
|
||||
if ((vp->tval & (NUM | STR)) == 0)
|
||||
funnyvar(vp, "read value of");
|
||||
if (isfld(vp) && donefld == 0)
|
||||
fldbld();
|
||||
else if (isrec(vp) && donerec == 0)
|
||||
recbld();
|
||||
if (isstr(vp) == 0) {
|
||||
if (freeable(vp))
|
||||
xfree(vp->sval);
|
||||
if (modf(vp->fval, &dtemp) == 0) /* it's integral */
|
||||
sprintf(s, "%.30g", vp->fval);
|
||||
else
|
||||
sprintf(s, *fmt, vp->fval);
|
||||
vp->sval = tostring(s);
|
||||
vp->tval &= ~DONTFREE;
|
||||
vp->tval |= STR;
|
||||
}
|
||||
dprintf( ("getsval %p: %s = \"%s (%p)\", t=%o\n", vp, NN(vp->nval), vp->sval, vp->sval, vp->tval) );
|
||||
return(vp->sval);
|
||||
}
|
||||
|
||||
char *getsval(Cell *vp) /* get string val of a Cell */
|
||||
{
|
||||
return get_str_val(vp, CONVFMT);
|
||||
}
|
||||
|
||||
char *getpssval(Cell *vp) /* get string val of a Cell for print */
|
||||
{
|
||||
return get_str_val(vp, OFMT);
|
||||
}
|
||||
|
||||
|
||||
char *tostring(const char *s) /* make a copy of string s */
|
||||
{
|
||||
char *p;
|
||||
|
||||
p = (char *) malloc(strlen(s)+1);
|
||||
if (p == NULL)
|
||||
FATAL("out of space in tostring on %s", s);
|
||||
strcpy(p, s);
|
||||
return(p);
|
||||
}
|
||||
|
||||
char *qstring(const char *is, int delim) /* collect string up to next delim */
|
||||
{
|
||||
const char *os = is;
|
||||
int c, n;
|
||||
uschar *s = (uschar *) is;
|
||||
uschar *buf, *bp;
|
||||
|
||||
if ((buf = (uschar *) malloc(strlen(is)+3)) == NULL)
|
||||
FATAL( "out of space in qstring(%s)", s);
|
||||
for (bp = buf; (c = *s) != delim; s++) {
|
||||
if (c == '\n')
|
||||
SYNTAX( "newline in string %.20s...", os );
|
||||
else if (c != '\\')
|
||||
*bp++ = c;
|
||||
else { /* \something */
|
||||
c = *++s;
|
||||
if (c == 0) { /* \ at end */
|
||||
*bp++ = '\\';
|
||||
break; /* for loop */
|
||||
}
|
||||
switch (c) {
|
||||
case '\\': *bp++ = '\\'; break;
|
||||
case 'n': *bp++ = '\n'; break;
|
||||
case 't': *bp++ = '\t'; break;
|
||||
case 'b': *bp++ = '\b'; break;
|
||||
case 'f': *bp++ = '\f'; break;
|
||||
case 'r': *bp++ = '\r'; break;
|
||||
default:
|
||||
if (!isdigit(c)) {
|
||||
*bp++ = c;
|
||||
break;
|
||||
}
|
||||
n = c - '0';
|
||||
if (isdigit(s[1])) {
|
||||
n = 8 * n + *++s - '0';
|
||||
if (isdigit(s[1]))
|
||||
n = 8 * n + *++s - '0';
|
||||
}
|
||||
*bp++ = n;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
*bp++ = 0;
|
||||
return (char *) buf;
|
||||
}
|
Loading…
Reference in New Issue