7f0ff9b2ee
XXX gensub() doesn't handle backreferences (\0 .... \9) yet
503 lines
14 KiB
Plaintext
503 lines
14 KiB
Plaintext
/****************************************************************
|
|
Copyright (C) Lucent Technologies 1997
|
|
All Rights Reserved
|
|
|
|
Permission to use, copy, modify, and distribute this software and
|
|
its documentation for any purpose and without fee is hereby
|
|
granted, provided that the above copyright notice appear in all
|
|
copies and that both that the copyright notice and this
|
|
permission notice and warranty disclaimer appear in supporting
|
|
documentation, and that the name Lucent Technologies or any of
|
|
its entities not be used in advertising or publicity pertaining
|
|
to distribution of the software without specific, written prior
|
|
permission.
|
|
|
|
LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
|
|
INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
|
|
IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
|
|
SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
|
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
|
|
IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
|
|
ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
|
|
THIS SOFTWARE.
|
|
****************************************************************/
|
|
|
|
%{
|
|
#include <stdio.h>
|
|
#include <string.h>
|
|
#include "awk.h"
|
|
|
|
void checkdup(Node *list, Cell *item);
|
|
int yywrap(void) { return(1); }
|
|
|
|
Node *beginloc = 0;
|
|
Node *endloc = 0;
|
|
int infunc = 0; /* = 1 if in arglist or body of func */
|
|
int inloop = 0; /* = 1 if in while, for, do */
|
|
char *curfname = 0; /* current function name */
|
|
Node *arglist = 0; /* list of args for current function */
|
|
%}
|
|
|
|
%union {
|
|
Node *p;
|
|
Cell *cp;
|
|
int i;
|
|
char *s;
|
|
}
|
|
|
|
%token <i> FIRSTTOKEN /* must be first */
|
|
%token <p> PROGRAM PASTAT PASTAT2 XBEGIN XEND
|
|
%token <i> NL ',' '{' '(' '|' ';' '/' ')' '}' '[' ']'
|
|
%token <i> ARRAY
|
|
%token <i> MATCH NOTMATCH MATCHOP
|
|
%token <i> FINAL DOT ALL CCL NCCL CHAR OR STAR QUEST PLUS
|
|
%token <i> AND BOR APPEND EQ GE GT LE LT NE IN
|
|
%token <i> ARG BLTIN BREAK CLOSE CONTINUE DELETE DO EXIT FOR FUNC
|
|
%token <i> SUB GSUB IF INDEX LSUBSTR MATCHFCN NEXT NEXTFILE
|
|
%token <i> ADD MINUS MULT DIVIDE MOD
|
|
%token <i> ASSIGN ASGNOP ADDEQ SUBEQ MULTEQ DIVEQ MODEQ POWEQ
|
|
%token <i> PRINT PRINTF SPRINTF
|
|
%token <p> ELSE INTEST CONDEXPR
|
|
%token <i> POSTINCR PREINCR POSTDECR PREDECR
|
|
%token <cp> VAR IVAR VARNF CALL NUMBER STRING
|
|
%token <s> REGEXPR
|
|
|
|
%type <p> pas pattern ppattern plist pplist patlist prarg term re
|
|
%type <p> pa_pat pa_stat pa_stats
|
|
%type <s> reg_expr
|
|
%type <p> simple_stmt opt_simple_stmt stmt stmtlist
|
|
%type <p> var varname funcname varlist
|
|
%type <p> for if else while
|
|
%type <i> do st
|
|
%type <i> pst opt_pst lbrace rbrace rparen comma nl opt_nl and bor
|
|
%type <i> subop print
|
|
|
|
%right ASGNOP
|
|
%right '?'
|
|
%right ':'
|
|
%left BOR
|
|
%left AND
|
|
%left GETLINE
|
|
%nonassoc APPEND EQ GE GT LE LT NE MATCHOP IN '|'
|
|
%left ARG BLTIN BREAK CALL CLOSE CONTINUE DELETE DO EXIT FOR FUNC
|
|
%left GENSUB GSUB IF INDEX LSUBSTR MATCHFCN NEXT NUMBER
|
|
%left PRINT PRINTF RETURN SPLIT SPRINTF STRING SUB SUBSTR
|
|
%left REGEXPR VAR VARNF IVAR WHILE '('
|
|
%left CAT
|
|
%left '+' '-'
|
|
%left '*' '/' '%'
|
|
%left NOT UMINUS
|
|
%right POWER
|
|
%right DECR INCR
|
|
%left INDIRECT
|
|
%token LASTTOKEN /* must be last */
|
|
|
|
%%
|
|
|
|
program:
|
|
pas { if (errorflag==0)
|
|
winner = (Node *)stat3(PROGRAM, beginloc, $1, endloc); }
|
|
| error { yyclearin; bracecheck(); SYNTAX("bailing out"); }
|
|
;
|
|
|
|
and:
|
|
AND | and NL
|
|
;
|
|
|
|
bor:
|
|
BOR | bor NL
|
|
;
|
|
|
|
comma:
|
|
',' | comma NL
|
|
;
|
|
|
|
do:
|
|
DO | do NL
|
|
;
|
|
|
|
else:
|
|
ELSE | else NL
|
|
;
|
|
|
|
for:
|
|
FOR '(' opt_simple_stmt ';' opt_nl pattern ';' opt_nl opt_simple_stmt rparen {inloop++;} stmt
|
|
{ --inloop; $$ = stat4(FOR, $3, notnull($6), $9, $12); }
|
|
| FOR '(' opt_simple_stmt ';' ';' opt_nl opt_simple_stmt rparen {inloop++;} stmt
|
|
{ --inloop; $$ = stat4(FOR, $3, NIL, $7, $10); }
|
|
| FOR '(' varname IN varname rparen {inloop++;} stmt
|
|
{ --inloop; $$ = stat3(IN, $3, makearr($5), $8); }
|
|
;
|
|
|
|
funcname:
|
|
VAR { setfname($1); }
|
|
| CALL { setfname($1); }
|
|
;
|
|
|
|
if:
|
|
IF '(' pattern rparen { $$ = notnull($3); }
|
|
;
|
|
|
|
lbrace:
|
|
'{' | lbrace NL
|
|
;
|
|
|
|
nl:
|
|
NL | nl NL
|
|
;
|
|
|
|
opt_nl:
|
|
/* empty */ { $$ = 0; }
|
|
| nl
|
|
;
|
|
|
|
opt_pst:
|
|
/* empty */ { $$ = 0; }
|
|
| pst
|
|
;
|
|
|
|
|
|
opt_simple_stmt:
|
|
/* empty */ { $$ = 0; }
|
|
| simple_stmt
|
|
;
|
|
|
|
pas:
|
|
opt_pst { $$ = 0; }
|
|
| opt_pst pa_stats opt_pst { $$ = $2; }
|
|
;
|
|
|
|
pa_pat:
|
|
pattern { $$ = notnull($1); }
|
|
;
|
|
|
|
pa_stat:
|
|
pa_pat { $$ = stat2(PASTAT, $1, stat2(PRINT, rectonode(), NIL)); }
|
|
| pa_pat lbrace stmtlist '}' { $$ = stat2(PASTAT, $1, $3); }
|
|
| pa_pat ',' pa_pat { $$ = pa2stat($1, $3, stat2(PRINT, rectonode(), NIL)); }
|
|
| pa_pat ',' pa_pat lbrace stmtlist '}' { $$ = pa2stat($1, $3, $5); }
|
|
| lbrace stmtlist '}' { $$ = stat2(PASTAT, NIL, $2); }
|
|
| XBEGIN lbrace stmtlist '}'
|
|
{ beginloc = linkum(beginloc, $3); $$ = 0; }
|
|
| XEND lbrace stmtlist '}'
|
|
{ endloc = linkum(endloc, $3); $$ = 0; }
|
|
| FUNC funcname '(' varlist rparen {infunc++;} lbrace stmtlist '}'
|
|
{ infunc--; curfname=0; defn((Cell *)$2, $4, $8); $$ = 0; }
|
|
;
|
|
|
|
pa_stats:
|
|
pa_stat
|
|
| pa_stats opt_pst pa_stat { $$ = linkum($1, $3); }
|
|
;
|
|
|
|
patlist:
|
|
pattern
|
|
| patlist comma pattern { $$ = linkum($1, $3); }
|
|
;
|
|
|
|
ppattern:
|
|
var ASGNOP ppattern { $$ = op2($2, $1, $3); }
|
|
| ppattern '?' ppattern ':' ppattern %prec '?'
|
|
{ $$ = op3(CONDEXPR, notnull($1), $3, $5); }
|
|
| ppattern bor ppattern %prec BOR
|
|
{ $$ = op2(BOR, notnull($1), notnull($3)); }
|
|
| ppattern and ppattern %prec AND
|
|
{ $$ = op2(AND, notnull($1), notnull($3)); }
|
|
| ppattern MATCHOP reg_expr { $$ = op3($2, NIL, $1, (Node*)makedfa($3, 0)); }
|
|
| ppattern MATCHOP ppattern
|
|
{ if (constnode($3))
|
|
$$ = op3($2, NIL, $1, (Node*)makedfa(strnode($3), 0));
|
|
else
|
|
$$ = op3($2, (Node *)1, $1, $3); }
|
|
| ppattern IN varname { $$ = op2(INTEST, $1, makearr($3)); }
|
|
| '(' plist ')' IN varname { $$ = op2(INTEST, $2, makearr($5)); }
|
|
| ppattern term %prec CAT { $$ = op2(CAT, $1, $2); }
|
|
| re
|
|
| term
|
|
;
|
|
|
|
pattern:
|
|
var ASGNOP pattern { $$ = op2($2, $1, $3); }
|
|
| pattern '?' pattern ':' pattern %prec '?'
|
|
{ $$ = op3(CONDEXPR, notnull($1), $3, $5); }
|
|
| pattern bor pattern %prec BOR
|
|
{ $$ = op2(BOR, notnull($1), notnull($3)); }
|
|
| pattern and pattern %prec AND
|
|
{ $$ = op2(AND, notnull($1), notnull($3)); }
|
|
| pattern EQ pattern { $$ = op2($2, $1, $3); }
|
|
| pattern GE pattern { $$ = op2($2, $1, $3); }
|
|
| pattern GT pattern { $$ = op2($2, $1, $3); }
|
|
| pattern LE pattern { $$ = op2($2, $1, $3); }
|
|
| pattern LT pattern { $$ = op2($2, $1, $3); }
|
|
| pattern NE pattern { $$ = op2($2, $1, $3); }
|
|
| pattern MATCHOP reg_expr { $$ = op3($2, NIL, $1, (Node*)makedfa($3, 0)); }
|
|
| pattern MATCHOP pattern
|
|
{ if (constnode($3))
|
|
$$ = op3($2, NIL, $1, (Node*)makedfa(strnode($3), 0));
|
|
else
|
|
$$ = op3($2, (Node *)1, $1, $3); }
|
|
| pattern IN varname { $$ = op2(INTEST, $1, makearr($3)); }
|
|
| '(' plist ')' IN varname { $$ = op2(INTEST, $2, makearr($5)); }
|
|
| pattern '|' GETLINE var {
|
|
if (safe) SYNTAX("cmd | getline is unsafe");
|
|
else $$ = op3(GETLINE, $4, itonp($2), $1); }
|
|
| pattern '|' GETLINE {
|
|
if (safe) SYNTAX("cmd | getline is unsafe");
|
|
else $$ = op3(GETLINE, (Node*)0, itonp($2), $1); }
|
|
| pattern term %prec CAT { $$ = op2(CAT, $1, $2); }
|
|
| re
|
|
| term
|
|
;
|
|
|
|
plist:
|
|
pattern comma pattern { $$ = linkum($1, $3); }
|
|
| plist comma pattern { $$ = linkum($1, $3); }
|
|
;
|
|
|
|
pplist:
|
|
ppattern
|
|
| pplist comma ppattern { $$ = linkum($1, $3); }
|
|
;
|
|
|
|
prarg:
|
|
/* empty */ { $$ = rectonode(); }
|
|
| pplist
|
|
| '(' plist ')' { $$ = $2; }
|
|
;
|
|
|
|
print:
|
|
PRINT | PRINTF
|
|
;
|
|
|
|
pst:
|
|
NL | ';' | pst NL | pst ';'
|
|
;
|
|
|
|
rbrace:
|
|
'}' | rbrace NL
|
|
;
|
|
|
|
re:
|
|
reg_expr
|
|
{ $$ = op3(MATCH, NIL, rectonode(), (Node*)makedfa($1, 0)); }
|
|
| NOT re { $$ = op1(NOT, notnull($2)); }
|
|
;
|
|
|
|
reg_expr:
|
|
'/' {startreg();} REGEXPR '/' { $$ = $3; }
|
|
;
|
|
|
|
rparen:
|
|
')' | rparen NL
|
|
;
|
|
|
|
simple_stmt:
|
|
print prarg '|' term {
|
|
if (safe) SYNTAX("print | is unsafe");
|
|
else $$ = stat3($1, $2, itonp($3), $4); }
|
|
| print prarg APPEND term {
|
|
if (safe) SYNTAX("print >> is unsafe");
|
|
else $$ = stat3($1, $2, itonp($3), $4); }
|
|
| print prarg GT term {
|
|
if (safe) SYNTAX("print > is unsafe");
|
|
else $$ = stat3($1, $2, itonp($3), $4); }
|
|
| print prarg { $$ = stat3($1, $2, NIL, NIL); }
|
|
| DELETE varname '[' patlist ']' { $$ = stat2(DELETE, makearr($2), $4); }
|
|
| DELETE varname { $$ = stat2(DELETE, makearr($2), 0); }
|
|
| pattern { $$ = exptostat($1); }
|
|
| error { yyclearin; SYNTAX("illegal statement"); }
|
|
;
|
|
|
|
st:
|
|
nl
|
|
| ';' opt_nl
|
|
;
|
|
|
|
stmt:
|
|
BREAK st { if (!inloop) SYNTAX("break illegal outside of loops");
|
|
$$ = stat1(BREAK, NIL); }
|
|
| CONTINUE st { if (!inloop) SYNTAX("continue illegal outside of loops");
|
|
$$ = stat1(CONTINUE, NIL); }
|
|
| do {inloop++;} stmt {--inloop;} WHILE '(' pattern ')' st
|
|
{ $$ = stat2(DO, $3, notnull($7)); }
|
|
| EXIT pattern st { $$ = stat1(EXIT, $2); }
|
|
| EXIT st { $$ = stat1(EXIT, NIL); }
|
|
| for
|
|
| if stmt else stmt { $$ = stat3(IF, $1, $2, $4); }
|
|
| if stmt { $$ = stat3(IF, $1, $2, NIL); }
|
|
| lbrace stmtlist rbrace { $$ = $2; }
|
|
| NEXT st { if (infunc)
|
|
SYNTAX("next is illegal inside a function");
|
|
$$ = stat1(NEXT, NIL); }
|
|
| NEXTFILE st { if (infunc)
|
|
SYNTAX("nextfile is illegal inside a function");
|
|
$$ = stat1(NEXTFILE, NIL); }
|
|
| RETURN pattern st { $$ = stat1(RETURN, $2); }
|
|
| RETURN st { $$ = stat1(RETURN, NIL); }
|
|
| simple_stmt st
|
|
| while {inloop++;} stmt { --inloop; $$ = stat2(WHILE, $1, $3); }
|
|
| ';' opt_nl { $$ = 0; }
|
|
;
|
|
|
|
stmtlist:
|
|
stmt
|
|
| stmtlist stmt { $$ = linkum($1, $2); }
|
|
;
|
|
|
|
subop:
|
|
SUB | GSUB
|
|
;
|
|
|
|
term:
|
|
term '/' ASGNOP term { $$ = op2(DIVEQ, $1, $4); }
|
|
| term '+' term { $$ = op2(ADD, $1, $3); }
|
|
| term '-' term { $$ = op2(MINUS, $1, $3); }
|
|
| term '*' term { $$ = op2(MULT, $1, $3); }
|
|
| term '/' term { $$ = op2(DIVIDE, $1, $3); }
|
|
| term '%' term { $$ = op2(MOD, $1, $3); }
|
|
| term POWER term { $$ = op2(POWER, $1, $3); }
|
|
| '-' term %prec UMINUS { $$ = op1(UMINUS, $2); }
|
|
| '+' term %prec UMINUS { $$ = $2; }
|
|
| NOT term %prec UMINUS { $$ = op1(NOT, notnull($2)); }
|
|
| BLTIN '(' ')' { $$ = op2(BLTIN, itonp($1), rectonode()); }
|
|
| BLTIN '(' patlist ')' { $$ = op2(BLTIN, itonp($1), $3); }
|
|
| BLTIN { $$ = op2(BLTIN, itonp($1), rectonode()); }
|
|
| CALL '(' ')' { $$ = op2(CALL, celltonode($1,CVAR), NIL); }
|
|
| CALL '(' patlist ')' { $$ = op2(CALL, celltonode($1,CVAR), $3); }
|
|
| CLOSE term { $$ = op1(CLOSE, $2); }
|
|
| DECR var { $$ = op1(PREDECR, $2); }
|
|
| INCR var { $$ = op1(PREINCR, $2); }
|
|
| var DECR { $$ = op1(POSTDECR, $1); }
|
|
| var INCR { $$ = op1(POSTINCR, $1); }
|
|
| GENSUB '(' reg_expr comma pattern comma pattern ')'
|
|
{ $$ = op5(GENSUB, NIL, (Node*)makedfa($3, 1), $5, $7, rectonode()); }
|
|
| GENSUB '(' pattern comma pattern comma pattern ')'
|
|
{ if (constnode($3))
|
|
$$ = op5(GENSUB, NIL, (Node *)makedfa(strnode($3), 1), $5, $7, rectonode());
|
|
else
|
|
$$ = op5(GENSUB, (Node *)1, $3, $5, $7, rectonode());
|
|
}
|
|
| GENSUB '(' reg_expr comma pattern comma pattern comma pattern ')'
|
|
{ $$ = op5(GENSUB, NIL, (Node*)makedfa($3, 1), $5, $7, $9); }
|
|
| GENSUB '(' pattern comma pattern comma pattern comma pattern ')'
|
|
{ if (constnode($3))
|
|
$$ = op5(GENSUB, NIL, (Node *)makedfa(strnode($3),1), $5,$7,$9);
|
|
else
|
|
$$ = op5(GENSUB, (Node *)1, $3, $5, $7, $9);
|
|
}
|
|
| GETLINE var LT term { $$ = op3(GETLINE, $2, itonp($3), $4); }
|
|
| GETLINE LT term { $$ = op3(GETLINE, NIL, itonp($2), $3); }
|
|
| GETLINE var { $$ = op3(GETLINE, $2, NIL, NIL); }
|
|
| GETLINE { $$ = op3(GETLINE, NIL, NIL, NIL); }
|
|
| INDEX '(' pattern comma pattern ')'
|
|
{ $$ = op2(INDEX, $3, $5); }
|
|
| INDEX '(' pattern comma reg_expr ')'
|
|
{ SYNTAX("index() doesn't permit regular expressions");
|
|
$$ = op2(INDEX, $3, (Node*)$5); }
|
|
| '(' pattern ')' { $$ = $2; }
|
|
| MATCHFCN '(' pattern comma reg_expr ')'
|
|
{ $$ = op3(MATCHFCN, NIL, $3, (Node*)makedfa($5, 1)); }
|
|
| MATCHFCN '(' pattern comma pattern ')'
|
|
{ if (constnode($5))
|
|
$$ = op3(MATCHFCN, NIL, $3, (Node*)makedfa(strnode($5), 1));
|
|
else
|
|
$$ = op3(MATCHFCN, (Node *)1, $3, $5); }
|
|
| NUMBER { $$ = celltonode($1, CCON); }
|
|
| SPLIT '(' pattern comma varname comma pattern ')' /* string */
|
|
{ $$ = op4(SPLIT, $3, makearr($5), $7, (Node*)STRING); }
|
|
| SPLIT '(' pattern comma varname comma reg_expr ')' /* const /regexp/ */
|
|
{ $$ = op4(SPLIT, $3, makearr($5), (Node*)makedfa($7, 1), (Node *)REGEXPR); }
|
|
| SPLIT '(' pattern comma varname ')'
|
|
{ $$ = op4(SPLIT, $3, makearr($5), NIL, (Node*)STRING); } /* default */
|
|
| SPRINTF '(' patlist ')' { $$ = op1($1, $3); }
|
|
| STRING { $$ = celltonode($1, CCON); }
|
|
| subop '(' reg_expr comma pattern ')'
|
|
{ $$ = op4($1, NIL, (Node*)makedfa($3, 1), $5, rectonode()); }
|
|
| subop '(' pattern comma pattern ')'
|
|
{ if (constnode($3))
|
|
$$ = op4($1, NIL, (Node*)makedfa(strnode($3), 1), $5, rectonode());
|
|
else
|
|
$$ = op4($1, (Node *)1, $3, $5, rectonode()); }
|
|
| subop '(' reg_expr comma pattern comma var ')'
|
|
{ $$ = op4($1, NIL, (Node*)makedfa($3, 1), $5, $7); }
|
|
| subop '(' pattern comma pattern comma var ')'
|
|
{ if (constnode($3))
|
|
$$ = op4($1, NIL, (Node*)makedfa(strnode($3), 1), $5, $7);
|
|
else
|
|
$$ = op4($1, (Node *)1, $3, $5, $7); }
|
|
| SUBSTR '(' pattern comma pattern comma pattern ')'
|
|
{ $$ = op3(SUBSTR, $3, $5, $7); }
|
|
| SUBSTR '(' pattern comma pattern ')'
|
|
{ $$ = op3(SUBSTR, $3, $5, NIL); }
|
|
| var
|
|
;
|
|
|
|
var:
|
|
varname
|
|
| varname '[' patlist ']' { $$ = op2(ARRAY, makearr($1), $3); }
|
|
| IVAR { $$ = op1(INDIRECT, celltonode($1, CVAR)); }
|
|
| INDIRECT term { $$ = op1(INDIRECT, $2); }
|
|
;
|
|
|
|
varlist:
|
|
/* nothing */ { arglist = $$ = 0; }
|
|
| VAR { arglist = $$ = celltonode($1,CVAR); }
|
|
| varlist comma VAR {
|
|
checkdup($1, $3);
|
|
arglist = $$ = linkum($1,celltonode($3,CVAR)); }
|
|
;
|
|
|
|
varname:
|
|
VAR { $$ = celltonode($1, CVAR); }
|
|
| ARG { $$ = op1(ARG, itonp($1)); }
|
|
| VARNF { $$ = op1(VARNF, (Node *) $1); }
|
|
;
|
|
|
|
|
|
while:
|
|
WHILE '(' pattern rparen { $$ = notnull($3); }
|
|
;
|
|
|
|
%%
|
|
|
|
void setfname(Cell *p)
|
|
{
|
|
if (isarr(p))
|
|
SYNTAX("%s is an array, not a function", p->nval);
|
|
else if (isfcn(p))
|
|
SYNTAX("you can't define function %s more than once", p->nval);
|
|
curfname = p->nval;
|
|
}
|
|
|
|
int constnode(Node *p)
|
|
{
|
|
return isvalue(p) && ((Cell *) (p->narg[0]))->csub == CCON;
|
|
}
|
|
|
|
char *strnode(Node *p)
|
|
{
|
|
return ((Cell *)(p->narg[0]))->sval;
|
|
}
|
|
|
|
Node *notnull(Node *n)
|
|
{
|
|
switch (n->nobj) {
|
|
case LE: case LT: case EQ: case NE: case GT: case GE:
|
|
case BOR: case AND: case NOT:
|
|
return n;
|
|
default:
|
|
return op2(NE, n, nullnode);
|
|
}
|
|
}
|
|
|
|
void checkdup(Node *vl, Cell *cp) /* check if name already in list */
|
|
{
|
|
char *s = cp->nval;
|
|
for ( ; vl; vl = vl->nnext) {
|
|
if (strcmp(s, ((Cell *)(vl->narg[0]))->nval) == 0) {
|
|
SYNTAX("duplicate argument %s", s);
|
|
break;
|
|
}
|
|
}
|
|
}
|