NetBSD/bin/sh/parser.c
christos ab28bede4c Prefix "Syntax Error" with the program name, if the command name is not
available. At least this way we get an idea of what program gives us the
message.
2006-10-04 15:00:38 +00:00

1657 lines
35 KiB
C

/* $NetBSD: parser.c,v 1.62 2006/10/04 15:00:38 christos Exp $ */
/*-
* Copyright (c) 1991, 1993
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Kenneth Almquist.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
#ifndef lint
#if 0
static char sccsid[] = "@(#)parser.c 8.7 (Berkeley) 5/16/95";
#else
__RCSID("$NetBSD: parser.c,v 1.62 2006/10/04 15:00:38 christos Exp $");
#endif
#endif /* not lint */
#include <stdlib.h>
#include "shell.h"
#include "parser.h"
#include "nodes.h"
#include "expand.h" /* defines rmescapes() */
#include "eval.h" /* defines commandname */
#include "redir.h" /* defines copyfd() */
#include "syntax.h"
#include "options.h"
#include "input.h"
#include "output.h"
#include "var.h"
#include "error.h"
#include "memalloc.h"
#include "mystring.h"
#include "alias.h"
#include "show.h"
#ifndef SMALL
#include "myhistedit.h"
#endif
/*
* Shell command parser.
*/
#define EOFMARKLEN 79
/* values returned by readtoken */
#include "token.h"
#define OPENBRACE '{'
#define CLOSEBRACE '}'
struct heredoc {
struct heredoc *next; /* next here document in list */
union node *here; /* redirection node */
char *eofmark; /* string indicating end of input */
int striptabs; /* if set, strip leading tabs */
};
static int noalias = 0; /* when set, don't handle aliases */
struct heredoc *heredoclist; /* list of here documents to read */
int parsebackquote; /* nonzero if we are inside backquotes */
int doprompt; /* if set, prompt the user */
int needprompt; /* true if interactive and at start of line */
int lasttoken; /* last token read */
MKINIT int tokpushback; /* last token pushed back */
char *wordtext; /* text of last word returned by readtoken */
MKINIT int checkkwd; /* 1 == check for kwds, 2 == also eat newlines */
struct nodelist *backquotelist;
union node *redirnode;
struct heredoc *heredoc;
int quoteflag; /* set if (part of) last token was quoted */
int startlinno; /* line # where last token started */
STATIC union node *list(int);
STATIC union node *andor(void);
STATIC union node *pipeline(void);
STATIC union node *command(void);
STATIC union node *simplecmd(union node **, union node *);
STATIC union node *makename(void);
STATIC void parsefname(void);
STATIC void parseheredoc(void);
STATIC int peektoken(void);
STATIC int readtoken(void);
STATIC int xxreadtoken(void);
STATIC int readtoken1(int, char const *, char *, int);
STATIC int noexpand(char *);
STATIC void synexpect(int) __attribute__((__noreturn__));
STATIC void synerror(const char *) __attribute__((__noreturn__));
STATIC void setprompt(int);
/*
* Read and parse a command. Returns NEOF on end of file. (NULL is a
* valid parse tree indicating a blank line.)
*/
union node *
parsecmd(int interact)
{
int t;
tokpushback = 0;
doprompt = interact;
if (doprompt)
setprompt(1);
else
setprompt(0);
needprompt = 0;
t = readtoken();
if (t == TEOF)
return NEOF;
if (t == TNL)
return NULL;
tokpushback++;
return list(1);
}
STATIC union node *
list(int nlflag)
{
union node *n1, *n2, *n3;
int tok;
checkkwd = 2;
if (nlflag == 0 && tokendlist[peektoken()])
return NULL;
n1 = NULL;
for (;;) {
n2 = andor();
tok = readtoken();
if (tok == TBACKGND) {
if (n2->type == NCMD || n2->type == NPIPE) {
n2->ncmd.backgnd = 1;
} else if (n2->type == NREDIR) {
n2->type = NBACKGND;
} else {
n3 = (union node *)stalloc(sizeof (struct nredir));
n3->type = NBACKGND;
n3->nredir.n = n2;
n3->nredir.redirect = NULL;
n2 = n3;
}
}
if (n1 == NULL) {
n1 = n2;
}
else {
n3 = (union node *)stalloc(sizeof (struct nbinary));
n3->type = NSEMI;
n3->nbinary.ch1 = n1;
n3->nbinary.ch2 = n2;
n1 = n3;
}
switch (tok) {
case TBACKGND:
case TSEMI:
tok = readtoken();
/* fall through */
case TNL:
if (tok == TNL) {
parseheredoc();
if (nlflag)
return n1;
} else {
tokpushback++;
}
checkkwd = 2;
if (tokendlist[peektoken()])
return n1;
break;
case TEOF:
if (heredoclist)
parseheredoc();
else
pungetc(); /* push back EOF on input */
return n1;
default:
if (nlflag)
synexpect(-1);
tokpushback++;
return n1;
}
}
}
STATIC union node *
andor(void)
{
union node *n1, *n2, *n3;
int t;
n1 = pipeline();
for (;;) {
if ((t = readtoken()) == TAND) {
t = NAND;
} else if (t == TOR) {
t = NOR;
} else {
tokpushback++;
return n1;
}
n2 = pipeline();
n3 = (union node *)stalloc(sizeof (struct nbinary));
n3->type = t;
n3->nbinary.ch1 = n1;
n3->nbinary.ch2 = n2;
n1 = n3;
}
}
STATIC union node *
pipeline(void)
{
union node *n1, *n2, *pipenode;
struct nodelist *lp, *prev;
int negate;
negate = 0;
TRACE(("pipeline: entered\n"));
while (readtoken() == TNOT)
negate = !negate;
tokpushback++;
n1 = command();
if (readtoken() == TPIPE) {
pipenode = (union node *)stalloc(sizeof (struct npipe));
pipenode->type = NPIPE;
pipenode->npipe.backgnd = 0;
lp = (struct nodelist *)stalloc(sizeof (struct nodelist));
pipenode->npipe.cmdlist = lp;
lp->n = n1;
do {
prev = lp;
lp = (struct nodelist *)stalloc(sizeof (struct nodelist));
lp->n = command();
prev->next = lp;
} while (readtoken() == TPIPE);
lp->next = NULL;
n1 = pipenode;
}
tokpushback++;
if (negate) {
n2 = (union node *)stalloc(sizeof (struct nnot));
n2->type = NNOT;
n2->nnot.com = n1;
return n2;
} else
return n1;
}
STATIC union node *
command(void)
{
union node *n1, *n2;
union node *ap, **app;
union node *cp, **cpp;
union node *redir, **rpp;
int t, negate = 0;
checkkwd = 2;
redir = NULL;
n1 = NULL;
rpp = &redir;
/* Check for redirection which may precede command */
while (readtoken() == TREDIR) {
*rpp = n2 = redirnode;
rpp = &n2->nfile.next;
parsefname();
}
tokpushback++;
while (readtoken() == TNOT) {
TRACE(("command: TNOT recognized\n"));
negate = !negate;
}
tokpushback++;
switch (readtoken()) {
case TIF:
n1 = (union node *)stalloc(sizeof (struct nif));
n1->type = NIF;
n1->nif.test = list(0);
if (readtoken() != TTHEN)
synexpect(TTHEN);
n1->nif.ifpart = list(0);
n2 = n1;
while (readtoken() == TELIF) {
n2->nif.elsepart = (union node *)stalloc(sizeof (struct nif));
n2 = n2->nif.elsepart;
n2->type = NIF;
n2->nif.test = list(0);
if (readtoken() != TTHEN)
synexpect(TTHEN);
n2->nif.ifpart = list(0);
}
if (lasttoken == TELSE)
n2->nif.elsepart = list(0);
else {
n2->nif.elsepart = NULL;
tokpushback++;
}
if (readtoken() != TFI)
synexpect(TFI);
checkkwd = 1;
break;
case TWHILE:
case TUNTIL: {
int got;
n1 = (union node *)stalloc(sizeof (struct nbinary));
n1->type = (lasttoken == TWHILE)? NWHILE : NUNTIL;
n1->nbinary.ch1 = list(0);
if ((got=readtoken()) != TDO) {
TRACE(("expecting DO got %s %s\n", tokname[got], got == TWORD ? wordtext : ""));
synexpect(TDO);
}
n1->nbinary.ch2 = list(0);
if (readtoken() != TDONE)
synexpect(TDONE);
checkkwd = 1;
break;
}
case TFOR:
if (readtoken() != TWORD || quoteflag || ! goodname(wordtext))
synerror("Bad for loop variable");
n1 = (union node *)stalloc(sizeof (struct nfor));
n1->type = NFOR;
n1->nfor.var = wordtext;
if (readtoken() == TWORD && ! quoteflag && equal(wordtext, "in")) {
app = &ap;
while (readtoken() == TWORD) {
n2 = (union node *)stalloc(sizeof (struct narg));
n2->type = NARG;
n2->narg.text = wordtext;
n2->narg.backquote = backquotelist;
*app = n2;
app = &n2->narg.next;
}
*app = NULL;
n1->nfor.args = ap;
if (lasttoken != TNL && lasttoken != TSEMI)
synexpect(-1);
} else {
static char argvars[5] = {CTLVAR, VSNORMAL|VSQUOTE,
'@', '=', '\0'};
n2 = (union node *)stalloc(sizeof (struct narg));
n2->type = NARG;
n2->narg.text = argvars;
n2->narg.backquote = NULL;
n2->narg.next = NULL;
n1->nfor.args = n2;
/*
* Newline or semicolon here is optional (but note
* that the original Bourne shell only allowed NL).
*/
if (lasttoken != TNL && lasttoken != TSEMI)
tokpushback++;
}
checkkwd = 2;
if ((t = readtoken()) == TDO)
t = TDONE;
else if (t == TBEGIN)
t = TEND;
else
synexpect(-1);
n1->nfor.body = list(0);
if (readtoken() != t)
synexpect(t);
checkkwd = 1;
break;
case TCASE:
n1 = (union node *)stalloc(sizeof (struct ncase));
n1->type = NCASE;
if (readtoken() != TWORD)
synexpect(TWORD);
n1->ncase.expr = n2 = (union node *)stalloc(sizeof (struct narg));
n2->type = NARG;
n2->narg.text = wordtext;
n2->narg.backquote = backquotelist;
n2->narg.next = NULL;
while (readtoken() == TNL);
if (lasttoken != TWORD || ! equal(wordtext, "in"))
synerror("expecting \"in\"");
cpp = &n1->ncase.cases;
noalias = 1;
checkkwd = 2, readtoken();
do {
*cpp = cp = (union node *)stalloc(sizeof (struct nclist));
if (lasttoken == TLP)
readtoken();
cp->type = NCLIST;
app = &cp->nclist.pattern;
for (;;) {
*app = ap = (union node *)stalloc(sizeof (struct narg));
ap->type = NARG;
ap->narg.text = wordtext;
ap->narg.backquote = backquotelist;
if (checkkwd = 2, readtoken() != TPIPE)
break;
app = &ap->narg.next;
readtoken();
}
ap->narg.next = NULL;
noalias = 0;
if (lasttoken != TRP) {
synexpect(TRP);
}
cp->nclist.body = list(0);
checkkwd = 2;
if ((t = readtoken()) != TESAC) {
if (t != TENDCASE) {
noalias = 0;
synexpect(TENDCASE);
} else {
noalias = 1;
checkkwd = 2;
readtoken();
}
}
cpp = &cp->nclist.next;
} while(lasttoken != TESAC);
noalias = 0;
*cpp = NULL;
checkkwd = 1;
break;
case TLP:
n1 = (union node *)stalloc(sizeof (struct nredir));
n1->type = NSUBSHELL;
n1->nredir.n = list(0);
n1->nredir.redirect = NULL;
if (readtoken() != TRP)
synexpect(TRP);
checkkwd = 1;
break;
case TBEGIN:
n1 = list(0);
if (readtoken() != TEND)
synexpect(TEND);
checkkwd = 1;
break;
/* Handle an empty command like other simple commands. */
case TSEMI:
/*
* An empty command before a ; doesn't make much sense, and
* should certainly be disallowed in the case of `if ;'.
*/
if (!redir)
synexpect(-1);
case TAND:
case TOR:
case TNL:
case TEOF:
case TWORD:
case TRP:
tokpushback++;
n1 = simplecmd(rpp, redir);
goto checkneg;
default:
synexpect(-1);
/* NOTREACHED */
}
/* Now check for redirection which may follow command */
while (readtoken() == TREDIR) {
*rpp = n2 = redirnode;
rpp = &n2->nfile.next;
parsefname();
}
tokpushback++;
*rpp = NULL;
if (redir) {
if (n1->type != NSUBSHELL) {
n2 = (union node *)stalloc(sizeof (struct nredir));
n2->type = NREDIR;
n2->nredir.n = n1;
n1 = n2;
}
n1->nredir.redirect = redir;
}
checkneg:
if (negate) {
n2 = (union node *)stalloc(sizeof (struct nnot));
n2->type = NNOT;
n2->nnot.com = n1;
return n2;
}
else
return n1;
}
STATIC union node *
simplecmd(union node **rpp, union node *redir)
{
union node *args, **app;
union node **orig_rpp = rpp;
union node *n = NULL, *n2;
int negate = 0;
/* If we don't have any redirections already, then we must reset */
/* rpp to be the address of the local redir variable. */
if (redir == 0)
rpp = &redir;
args = NULL;
app = &args;
/*
* We save the incoming value, because we need this for shell
* functions. There can not be a redirect or an argument between
* the function name and the open parenthesis.
*/
orig_rpp = rpp;
while (readtoken() == TNOT) {
TRACE(("command: TNOT recognized\n"));
negate = !negate;
}
tokpushback++;
for (;;) {
if (readtoken() == TWORD) {
n = (union node *)stalloc(sizeof (struct narg));
n->type = NARG;
n->narg.text = wordtext;
n->narg.backquote = backquotelist;
*app = n;
app = &n->narg.next;
} else if (lasttoken == TREDIR) {
*rpp = n = redirnode;
rpp = &n->nfile.next;
parsefname(); /* read name of redirection file */
} else if (lasttoken == TLP && app == &args->narg.next
&& rpp == orig_rpp) {
/* We have a function */
if (readtoken() != TRP)
synexpect(TRP);
#ifdef notdef
if (! goodname(n->narg.text))
synerror("Bad function name");
#endif
n->type = NDEFUN;
n->narg.next = command();
goto checkneg;
} else {
tokpushback++;
break;
}
}
*app = NULL;
*rpp = NULL;
n = (union node *)stalloc(sizeof (struct ncmd));
n->type = NCMD;
n->ncmd.backgnd = 0;
n->ncmd.args = args;
n->ncmd.redirect = redir;
checkneg:
if (negate) {
n2 = (union node *)stalloc(sizeof (struct nnot));
n2->type = NNOT;
n2->nnot.com = n;
return n2;
}
else
return n;
}
STATIC union node *
makename(void)
{
union node *n;
n = (union node *)stalloc(sizeof (struct narg));
n->type = NARG;
n->narg.next = NULL;
n->narg.text = wordtext;
n->narg.backquote = backquotelist;
return n;
}
void fixredir(union node *n, const char *text, int err)
{
TRACE(("Fix redir %s %d\n", text, err));
if (!err)
n->ndup.vname = NULL;
if (is_digit(text[0]) && text[1] == '\0')
n->ndup.dupfd = digit_val(text[0]);
else if (text[0] == '-' && text[1] == '\0')
n->ndup.dupfd = -1;
else {
if (err)
synerror("Bad fd number");
else
n->ndup.vname = makename();
}
}
STATIC void
parsefname(void)
{
union node *n = redirnode;
if (readtoken() != TWORD)
synexpect(-1);
if (n->type == NHERE) {
struct heredoc *here = heredoc;
struct heredoc *p;
int i;
if (quoteflag == 0)
n->type = NXHERE;
TRACE(("Here document %d\n", n->type));
if (here->striptabs) {
while (*wordtext == '\t')
wordtext++;
}
if (! noexpand(wordtext) || (i = strlen(wordtext)) == 0 || i > EOFMARKLEN)
synerror("Illegal eof marker for << redirection");
rmescapes(wordtext);
here->eofmark = wordtext;
here->next = NULL;
if (heredoclist == NULL)
heredoclist = here;
else {
for (p = heredoclist ; p->next ; p = p->next);
p->next = here;
}
} else if (n->type == NTOFD || n->type == NFROMFD) {
fixredir(n, wordtext, 0);
} else {
n->nfile.fname = makename();
}
}
/*
* Input any here documents.
*/
STATIC void
parseheredoc(void)
{
struct heredoc *here;
union node *n;
while (heredoclist) {
here = heredoclist;
heredoclist = here->next;
if (needprompt) {
setprompt(2);
needprompt = 0;
}
readtoken1(pgetc(), here->here->type == NHERE? SQSYNTAX : DQSYNTAX,
here->eofmark, here->striptabs);
n = (union node *)stalloc(sizeof (struct narg));
n->narg.type = NARG;
n->narg.next = NULL;
n->narg.text = wordtext;
n->narg.backquote = backquotelist;
here->here->nhere.doc = n;
}
}
STATIC int
peektoken(void)
{
int t;
t = readtoken();
tokpushback++;
return (t);
}
STATIC int
readtoken(void)
{
int t;
int savecheckkwd = checkkwd;
#ifdef DEBUG
int alreadyseen = tokpushback;
#endif
struct alias *ap;
top:
t = xxreadtoken();
if (checkkwd) {
/*
* eat newlines
*/
if (checkkwd == 2) {
checkkwd = 0;
while (t == TNL) {
parseheredoc();
t = xxreadtoken();
}
} else
checkkwd = 0;
/*
* check for keywords and aliases
*/
if (t == TWORD && !quoteflag)
{
const char *const *pp;
for (pp = parsekwd; *pp; pp++) {
if (**pp == *wordtext && equal(*pp, wordtext))
{
lasttoken = t = pp -
parsekwd + KWDOFFSET;
TRACE(("keyword %s recognized\n", tokname[t]));
goto out;
}
}
if(!noalias &&
(ap = lookupalias(wordtext, 1)) != NULL) {
pushstring(ap->val, strlen(ap->val), ap);
checkkwd = savecheckkwd;
goto top;
}
}
out:
checkkwd = (t == TNOT) ? savecheckkwd : 0;
}
#ifdef DEBUG
if (!alreadyseen)
TRACE(("token %s %s\n", tokname[t], t == TWORD ? wordtext : ""));
else
TRACE(("reread token %s %s\n", tokname[t], t == TWORD ? wordtext : ""));
#endif
return (t);
}
/*
* Read the next input token.
* If the token is a word, we set backquotelist to the list of cmds in
* backquotes. We set quoteflag to true if any part of the word was
* quoted.
* If the token is TREDIR, then we set redirnode to a structure containing
* the redirection.
* In all cases, the variable startlinno is set to the number of the line
* on which the token starts.
*
* [Change comment: here documents and internal procedures]
* [Readtoken shouldn't have any arguments. Perhaps we should make the
* word parsing code into a separate routine. In this case, readtoken
* doesn't need to have any internal procedures, but parseword does.
* We could also make parseoperator in essence the main routine, and
* have parseword (readtoken1?) handle both words and redirection.]
*/
#define RETURN(token) return lasttoken = token
STATIC int
xxreadtoken(void)
{
int c;
if (tokpushback) {
tokpushback = 0;
return lasttoken;
}
if (needprompt) {
setprompt(2);
needprompt = 0;
}
startlinno = plinno;
for (;;) { /* until token or start of word found */
c = pgetc_macro();
if (c == ' ' || c == '\t')
continue; /* quick check for white space first */
switch (c) {
case ' ': case '\t':
continue;
case '#':
while ((c = pgetc()) != '\n' && c != PEOF);
pungetc();
continue;
case '\\':
if (pgetc() == '\n') {
startlinno = ++plinno;
if (doprompt)
setprompt(2);
else
setprompt(0);
continue;
}
pungetc();
goto breakloop;
case '\n':
plinno++;
needprompt = doprompt;
RETURN(TNL);
case PEOF:
RETURN(TEOF);
case '&':
if (pgetc() == '&')
RETURN(TAND);
pungetc();
RETURN(TBACKGND);
case '|':
if (pgetc() == '|')
RETURN(TOR);
pungetc();
RETURN(TPIPE);
case ';':
if (pgetc() == ';')
RETURN(TENDCASE);
pungetc();
RETURN(TSEMI);
case '(':
RETURN(TLP);
case ')':
RETURN(TRP);
default:
goto breakloop;
}
}
breakloop:
return readtoken1(c, BASESYNTAX, (char *)NULL, 0);
#undef RETURN
}
/*
* If eofmark is NULL, read a word or a redirection symbol. If eofmark
* is not NULL, read a here document. In the latter case, eofmark is the
* word which marks the end of the document and striptabs is true if
* leading tabs should be stripped from the document. The argument firstc
* is the first character of the input token or document.
*
* Because C does not have internal subroutines, I have simulated them
* using goto's to implement the subroutine linkage. The following macros
* will run code that appears at the end of readtoken1.
*/
#define CHECKEND() {goto checkend; checkend_return:;}
#define PARSEREDIR() {goto parseredir; parseredir_return:;}
#define PARSESUB() {goto parsesub; parsesub_return:;}
#define PARSEBACKQOLD() {oldstyle = 1; goto parsebackq; parsebackq_oldreturn:;}
#define PARSEBACKQNEW() {oldstyle = 0; goto parsebackq; parsebackq_newreturn:;}
#define PARSEARITH() {goto parsearith; parsearith_return:;}
/*
* Keep track of nested doublequotes in dblquote and doublequotep.
* We use dblquote for the first 32 levels, and we expand to a malloc'ed
* region for levels above that. Usually we never need to malloc.
* This code assumes that an int is 32 bits. We don't use uint32_t,
* because the rest of the code does not.
*/
#define ISDBLQUOTE() ((varnest < 32) ? (dblquote & (1 << varnest)) : \
(dblquotep[(varnest / 32) - 1] & (1 << (varnest % 32))))
#define SETDBLQUOTE() \
if (varnest < 32) \
dblquote |= (1 << varnest); \
else \
dblquotep[(varnest / 32) - 1] |= (1 << (varnest % 32))
#define CLRDBLQUOTE() \
if (varnest < 32) \
dblquote &= ~(1 << varnest); \
else \
dblquotep[(varnest / 32) - 1] &= ~(1 << (varnest % 32))
STATIC int
readtoken1(int firstc, char const *syntax, char *eofmark, int striptabs)
{
int c = firstc;
char *out;
int len;
char line[EOFMARKLEN + 1];
struct nodelist *bqlist;
int quotef;
int *dblquotep = NULL;
size_t maxnest = 32;
int dblquote;
int varnest; /* levels of variables expansion */
int arinest; /* levels of arithmetic expansion */
int parenlevel; /* levels of parens in arithmetic */
int oldstyle;
char const *prevsyntax; /* syntax before arithmetic */
#if __GNUC__
/* Avoid longjmp clobbering */
(void) &maxnest;
(void) &dblquotep;
(void) &out;
(void) &quotef;
(void) &dblquote;
(void) &varnest;
(void) &arinest;
(void) &parenlevel;
(void) &oldstyle;
(void) &prevsyntax;
(void) &syntax;
prevsyntax = NULL; /* XXX gcc4 */
#endif
startlinno = plinno;
dblquote = 0;
varnest = 0;
if (syntax == DQSYNTAX) {
SETDBLQUOTE();
}
quotef = 0;
bqlist = NULL;
arinest = 0;
parenlevel = 0;
STARTSTACKSTR(out);
loop: { /* for each line, until end of word */
#if ATTY
if (c == '\034' && doprompt
&& attyset() && ! equal(termval(), "emacs")) {
attyline();
if (syntax == BASESYNTAX)
return readtoken();
c = pgetc();
goto loop;
}
#endif
CHECKEND(); /* set c to PEOF if at end of here document */
for (;;) { /* until end of line or end of word */
CHECKSTRSPACE(4, out); /* permit 4 calls to USTPUTC */
switch(syntax[c]) {
case CNL: /* '\n' */
if (syntax == BASESYNTAX)
goto endword; /* exit outer loop */
USTPUTC(c, out);
plinno++;
if (doprompt)
setprompt(2);
else
setprompt(0);
c = pgetc();
goto loop; /* continue outer loop */
case CWORD:
USTPUTC(c, out);
break;
case CCTL:
if (eofmark == NULL || ISDBLQUOTE())
USTPUTC(CTLESC, out);
USTPUTC(c, out);
break;
case CBACK: /* backslash */
c = pgetc();
if (c == PEOF) {
USTPUTC('\\', out);
pungetc();
break;
}
if (c == '\n') {
if (doprompt)
setprompt(2);
else
setprompt(0);
break;
}
quotef = 1;
if (ISDBLQUOTE() && c != '\\' &&
c != '`' && c != '$' &&
(c != '"' || eofmark != NULL))
USTPUTC('\\', out);
if (SQSYNTAX[c] == CCTL)
USTPUTC(CTLESC, out);
else if (eofmark == NULL) {
USTPUTC(CTLQUOTEMARK, out);
USTPUTC(c, out);
if (varnest != 0)
USTPUTC(CTLQUOTEEND, out);
break;
}
USTPUTC(c, out);
break;
case CSQUOTE:
if (syntax != SQSYNTAX) {
if (eofmark == NULL)
USTPUTC(CTLQUOTEMARK, out);
quotef = 1;
syntax = SQSYNTAX;
break;
}
if (eofmark != NULL && arinest == 0 &&
varnest == 0) {
/* Ignore inside quoted here document */
USTPUTC(c, out);
break;
}
/* End of single quotes... */
if (arinest)
syntax = ARISYNTAX;
else {
syntax = BASESYNTAX;
if (varnest != 0)
USTPUTC(CTLQUOTEEND, out);
}
break;
case CDQUOTE:
if (eofmark != NULL && arinest == 0 &&
varnest == 0) {
/* Ignore inside here document */
USTPUTC(c, out);
break;
}
quotef = 1;
if (arinest) {
if (ISDBLQUOTE()) {
syntax = ARISYNTAX;
CLRDBLQUOTE();
} else {
syntax = DQSYNTAX;
SETDBLQUOTE();
USTPUTC(CTLQUOTEMARK, out);
}
break;
}
if (eofmark != NULL)
break;
if (ISDBLQUOTE()) {
if (varnest != 0)
USTPUTC(CTLQUOTEEND, out);
syntax = BASESYNTAX;
CLRDBLQUOTE();
} else {
syntax = DQSYNTAX;
SETDBLQUOTE();
USTPUTC(CTLQUOTEMARK, out);
}
break;
case CVAR: /* '$' */
PARSESUB(); /* parse substitution */
break;
case CENDVAR: /* CLOSEBRACE */
if (varnest > 0 && !ISDBLQUOTE()) {
varnest--;
USTPUTC(CTLENDVAR, out);
} else {
USTPUTC(c, out);
}
break;
case CLP: /* '(' in arithmetic */
parenlevel++;
USTPUTC(c, out);
break;
case CRP: /* ')' in arithmetic */
if (parenlevel > 0) {
USTPUTC(c, out);
--parenlevel;
} else {
if (pgetc() == ')') {
if (--arinest == 0) {
USTPUTC(CTLENDARI, out);
syntax = prevsyntax;
if (syntax == DQSYNTAX)
SETDBLQUOTE();
else
CLRDBLQUOTE();
} else
USTPUTC(')', out);
} else {
/*
* unbalanced parens
* (don't 2nd guess - no error)
*/
pungetc();
USTPUTC(')', out);
}
}
break;
case CBQUOTE: /* '`' */
PARSEBACKQOLD();
break;
case CEOF:
goto endword; /* exit outer loop */
default:
if (varnest == 0)
goto endword; /* exit outer loop */
USTPUTC(c, out);
}
c = pgetc_macro();
}
}
endword:
if (syntax == ARISYNTAX)
synerror("Missing '))'");
if (syntax != BASESYNTAX && ! parsebackquote && eofmark == NULL)
synerror("Unterminated quoted string");
if (varnest != 0) {
startlinno = plinno;
/* { */
synerror("Missing '}'");
}
USTPUTC('\0', out);
len = out - stackblock();
out = stackblock();
if (eofmark == NULL) {
if ((c == '>' || c == '<')
&& quotef == 0
&& len <= 2
&& (*out == '\0' || is_digit(*out))) {
PARSEREDIR();
return lasttoken = TREDIR;
} else {
pungetc();
}
}
quoteflag = quotef;
backquotelist = bqlist;
grabstackblock(len);
wordtext = out;
if (dblquotep != NULL)
ckfree(dblquotep);
return lasttoken = TWORD;
/* end of readtoken routine */
/*
* Check to see whether we are at the end of the here document. When this
* is called, c is set to the first character of the next input line. If
* we are at the end of the here document, this routine sets the c to PEOF.
*/
checkend: {
if (eofmark) {
if (striptabs) {
while (c == '\t')
c = pgetc();
}
if (c == *eofmark) {
if (pfgets(line, sizeof line) != NULL) {
char *p, *q;
p = line;
for (q = eofmark + 1 ; *q && *p == *q ; p++, q++);
if (*p == '\n' && *q == '\0') {
c = PEOF;
plinno++;
needprompt = doprompt;
} else {
pushstring(line, strlen(line), NULL);
}
}
}
}
goto checkend_return;
}
/*
* Parse a redirection operator. The variable "out" points to a string
* specifying the fd to be redirected. The variable "c" contains the
* first character of the redirection operator.
*/
parseredir: {
char fd = *out;
union node *np;
np = (union node *)stalloc(sizeof (struct nfile));
if (c == '>') {
np->nfile.fd = 1;
c = pgetc();
if (c == '>')
np->type = NAPPEND;
else if (c == '|')
np->type = NCLOBBER;
else if (c == '&')
np->type = NTOFD;
else {
np->type = NTO;
pungetc();
}
} else { /* c == '<' */
np->nfile.fd = 0;
switch (c = pgetc()) {
case '<':
if (sizeof (struct nfile) != sizeof (struct nhere)) {
np = (union node *)stalloc(sizeof (struct nhere));
np->nfile.fd = 0;
}
np->type = NHERE;
heredoc = (struct heredoc *)stalloc(sizeof (struct heredoc));
heredoc->here = np;
if ((c = pgetc()) == '-') {
heredoc->striptabs = 1;
} else {
heredoc->striptabs = 0;
pungetc();
}
break;
case '&':
np->type = NFROMFD;
break;
case '>':
np->type = NFROMTO;
break;
default:
np->type = NFROM;
pungetc();
break;
}
}
if (fd != '\0')
np->nfile.fd = digit_val(fd);
redirnode = np;
goto parseredir_return;
}
/*
* Parse a substitution. At this point, we have read the dollar sign
* and nothing else.
*/
parsesub: {
int subtype;
int typeloc;
int flags;
char *p;
static const char types[] = "}-+?=";
c = pgetc();
if (c != '(' && c != OPENBRACE && !is_name(c) && !is_special(c)) {
USTPUTC('$', out);
pungetc();
} else if (c == '(') { /* $(command) or $((arith)) */
if (pgetc() == '(') {
PARSEARITH();
} else {
pungetc();
PARSEBACKQNEW();
}
} else {
USTPUTC(CTLVAR, out);
typeloc = out - stackblock();
USTPUTC(VSNORMAL, out);
subtype = VSNORMAL;
if (c == OPENBRACE) {
c = pgetc();
if (c == '#') {
if ((c = pgetc()) == CLOSEBRACE)
c = '#';
else
subtype = VSLENGTH;
}
else
subtype = 0;
}
if (is_name(c)) {
do {
STPUTC(c, out);
c = pgetc();
} while (is_in_name(c));
} else if (is_digit(c)) {
do {
USTPUTC(c, out);
c = pgetc();
} while (is_digit(c));
}
else if (is_special(c)) {
USTPUTC(c, out);
c = pgetc();
}
else
badsub: synerror("Bad substitution");
STPUTC('=', out);
flags = 0;
if (subtype == 0) {
switch (c) {
case ':':
flags = VSNUL;
c = pgetc();
/*FALLTHROUGH*/
default:
p = strchr(types, c);
if (p == NULL)
goto badsub;
subtype = p - types + VSNORMAL;
break;
case '%':
case '#':
{
int cc = c;
subtype = c == '#' ? VSTRIMLEFT :
VSTRIMRIGHT;
c = pgetc();
if (c == cc)
subtype++;
else
pungetc();
break;
}
}
} else {
pungetc();
}
if (ISDBLQUOTE() || arinest)
flags |= VSQUOTE;
*(stackblock() + typeloc) = subtype | flags;
if (subtype != VSNORMAL) {
varnest++;
if (varnest >= maxnest) {
dblquotep = ckrealloc(dblquotep, maxnest / 8);
dblquotep[(maxnest / 32) - 1] = 0;
maxnest += 32;
}
}
}
goto parsesub_return;
}
/*
* Called to parse command substitutions. Newstyle is set if the command
* is enclosed inside $(...); nlpp is a pointer to the head of the linked
* list of commands (passed by reference), and savelen is the number of
* characters on the top of the stack which must be preserved.
*/
parsebackq: {
struct nodelist **nlpp;
int savepbq;
union node *n;
char *volatile str;
struct jmploc jmploc;
struct jmploc *volatile savehandler;
int savelen;
int saveprompt;
#ifdef __GNUC__
(void) &saveprompt;
#endif
savepbq = parsebackquote;
if (setjmp(jmploc.loc)) {
if (str)
ckfree(str);
parsebackquote = 0;
handler = savehandler;
longjmp(handler->loc, 1);
}
INTOFF;
str = NULL;
savelen = out - stackblock();
if (savelen > 0) {
str = ckmalloc(savelen);
memcpy(str, stackblock(), savelen);
}
savehandler = handler;
handler = &jmploc;
INTON;
if (oldstyle) {
/* We must read until the closing backquote, giving special
treatment to some slashes, and then push the string and
reread it as input, interpreting it normally. */
char *pout;
int pc;
int psavelen;
char *pstr;
STARTSTACKSTR(pout);
for (;;) {
if (needprompt) {
setprompt(2);
needprompt = 0;
}
switch (pc = pgetc()) {
case '`':
goto done;
case '\\':
if ((pc = pgetc()) == '\n') {
plinno++;
if (doprompt)
setprompt(2);
else
setprompt(0);
/*
* If eating a newline, avoid putting
* the newline into the new character
* stream (via the STPUTC after the
* switch).
*/
continue;
}
if (pc != '\\' && pc != '`' && pc != '$'
&& (!ISDBLQUOTE() || pc != '"'))
STPUTC('\\', pout);
break;
case '\n':
plinno++;
needprompt = doprompt;
break;
case PEOF:
startlinno = plinno;
synerror("EOF in backquote substitution");
break;
default:
break;
}
STPUTC(pc, pout);
}
done:
STPUTC('\0', pout);
psavelen = pout - stackblock();
if (psavelen > 0) {
pstr = grabstackstr(pout);
setinputstring(pstr, 1);
}
}
nlpp = &bqlist;
while (*nlpp)
nlpp = &(*nlpp)->next;
*nlpp = (struct nodelist *)stalloc(sizeof (struct nodelist));
(*nlpp)->next = NULL;
parsebackquote = oldstyle;
if (oldstyle) {
saveprompt = doprompt;
doprompt = 0;
}
n = list(0);
if (oldstyle)
doprompt = saveprompt;
else {
if (readtoken() != TRP)
synexpect(TRP);
}
(*nlpp)->n = n;
if (oldstyle) {
/*
* Start reading from old file again, ignoring any pushed back
* tokens left from the backquote parsing
*/
popfile();
tokpushback = 0;
}
while (stackblocksize() <= savelen)
growstackblock();
STARTSTACKSTR(out);
if (str) {
memcpy(out, str, savelen);
STADJUST(savelen, out);
INTOFF;
ckfree(str);
str = NULL;
INTON;
}
parsebackquote = savepbq;
handler = savehandler;
if (arinest || ISDBLQUOTE())
USTPUTC(CTLBACKQ | CTLQUOTE, out);
else
USTPUTC(CTLBACKQ, out);
if (oldstyle)
goto parsebackq_oldreturn;
else
goto parsebackq_newreturn;
}
/*
* Parse an arithmetic expansion (indicate start of one and set state)
*/
parsearith: {
if (++arinest == 1) {
prevsyntax = syntax;
syntax = ARISYNTAX;
USTPUTC(CTLARI, out);
if (ISDBLQUOTE())
USTPUTC('"',out);
else
USTPUTC(' ',out);
} else {
/*
* we collapse embedded arithmetic expansion to
* parenthesis, which should be equivalent
*/
USTPUTC('(', out);
}
goto parsearith_return;
}
} /* end of readtoken */
#ifdef mkinit
RESET {
tokpushback = 0;
checkkwd = 0;
}
#endif
/*
* Returns true if the text contains nothing to expand (no dollar signs
* or backquotes).
*/
STATIC int
noexpand(char *text)
{
char *p;
char c;
p = text;
while ((c = *p++) != '\0') {
if (c == CTLQUOTEMARK)
continue;
if (c == CTLESC)
p++;
else if (BASESYNTAX[(int)c] == CCTL)
return 0;
}
return 1;
}
/*
* Return true if the argument is a legal variable name (a letter or
* underscore followed by zero or more letters, underscores, and digits).
*/
int
goodname(char *name)
{
char *p;
p = name;
if (! is_name(*p))
return 0;
while (*++p) {
if (! is_in_name(*p))
return 0;
}
return 1;
}
/*
* Called when an unexpected token is read during the parse. The argument
* is the token that is expected, or -1 if more than one type of token can
* occur at this point.
*/
STATIC void
synexpect(int token)
{
char msg[64];
if (token >= 0) {
fmtstr(msg, 64, "%s unexpected (expecting %s)",
tokname[lasttoken], tokname[token]);
} else {
fmtstr(msg, 64, "%s unexpected", tokname[lasttoken]);
}
synerror(msg);
/* NOTREACHED */
}
STATIC void
synerror(const char *msg)
{
if (commandname)
outfmt(&errout, "%s: %d: ", commandname, startlinno);
else
outfmt(&errout, "%s: ", getprogname());
outfmt(&errout, "Syntax error: %s\n", msg);
error((char *)NULL);
/* NOTREACHED */
}
STATIC void
setprompt(int which)
{
whichprompt = which;
#ifndef SMALL
if (!el)
#endif
out2str(getprompt(NULL));
}
/*
* called by editline -- any expansions to the prompt
* should be added here.
*/
const char *
getprompt(void *unused)
{
switch (whichprompt) {
case 0:
return "";
case 1:
return ps1val();
case 2:
return ps2val();
default:
return "<internal prompt error>";
}
}