NetBSD/bin/ksh/lex.c

1410 lines
29 KiB
C
Raw Normal View History

/* $NetBSD: lex.c,v 1.12 2005/09/11 22:16:00 christos Exp $ */
1997-01-12 22:11:37 +03:00
1996-09-22 03:35:13 +04:00
/*
* lexical analysis and source input
*/
2003-06-23 15:38:51 +04:00
#include <sys/cdefs.h>
#ifndef lint
__RCSID("$NetBSD: lex.c,v 1.12 2005/09/11 22:16:00 christos Exp $");
2003-06-23 15:38:51 +04:00
#endif
1996-09-22 03:35:13 +04:00
#include "sh.h"
#include <ctype.h>
1999-10-20 19:09:58 +04:00
/* Structure to keep track of the lexing state and the various pieces of info
* needed for each particular state.
*/
typedef struct lex_state Lex_state;
struct lex_state {
int ls_state;
union {
/* $(...) */
struct scsparen_info {
int nparen; /* count open parenthesis */
int csstate; /* XXX remove */
#define ls_scsparen ls_info.u_scsparen
} u_scsparen;
/* $((...)) */
struct sasparen_info {
int nparen; /* count open parenthesis */
int start; /* marks start of $(( in output str */
#define ls_sasparen ls_info.u_sasparen
} u_sasparen;
/* ((...)) */
struct sletparen_info {
int nparen; /* count open parenthesis */
#define ls_sletparen ls_info.u_sletparen
} u_sletparen;
/* `...` */
struct sbquote_info {
int indquotes; /* true if in double quotes: "`...`" */
#define ls_sbquote ls_info.u_sbquote
} u_sbquote;
Lex_state *base; /* used to point to next state block */
} ls_info;
};
typedef struct State_info State_info;
struct State_info {
Lex_state *base;
Lex_state *end;
};
1996-09-22 03:35:13 +04:00
static void readhere ARGS((struct ioword *iop));
1996-10-09 19:29:01 +04:00
static int getsc__ ARGS((void));
1996-09-22 03:35:13 +04:00
static void getsc_line ARGS((Source *s));
1999-10-20 19:09:58 +04:00
static int getsc_bn ARGS((void));
1996-09-22 03:35:13 +04:00
static char *get_brace_var ARGS((XString *wsp, char *wp));
static int arraysub ARGS((char **strp));
1996-10-09 19:29:01 +04:00
static const char *ungetsc ARGS((int c));
static void gethere ARGS((void));
1999-10-20 19:09:58 +04:00
static Lex_state *push_state_ ARGS((State_info *si, Lex_state *old_end));
static Lex_state *pop_state_ ARGS((State_info *si, Lex_state *old_end));
1996-09-22 03:35:13 +04:00
1996-10-09 19:29:01 +04:00
static int backslash_skip;
static int ignore_backslash_newline;
1996-09-22 03:35:13 +04:00
1996-10-09 19:29:01 +04:00
/* optimized getsc_bn() */
#define getsc() (*source->str != '\0' && *source->str != '\\' \
&& !backslash_skip ? *source->str++ : getsc_bn())
/* optimized getsc__() */
#define getsc_() ((*source->str != '\0') ? *source->str++ : getsc__())
1996-09-22 03:35:13 +04:00
1999-10-20 19:09:58 +04:00
#define STATE_BSIZE 32
#define PUSH_STATE(s) do { \
if (++statep == state_info.end) \
statep = push_state_(&state_info, statep); \
state = statep->ls_state = (s); \
} while (0)
#define POP_STATE() do { \
if (--statep == state_info.base) \
statep = pop_state_(&state_info, statep); \
state = statep->ls_state; \
} while (0)
1996-09-22 03:35:13 +04:00
/*
* Lexical analyzer
*
* tokens are not regular expressions, they are LL(1).
* for example, "${var:-${PWD}}", and "$(size $(whence ksh))".
* hence the state stack.
*/
int
yylex(cf)
int cf;
{
1999-10-20 19:09:58 +04:00
Lex_state states[STATE_BSIZE], *statep;
State_info state_info;
1996-09-22 03:35:13 +04:00
register int c, state;
XString ws; /* expandable output word */
register char *wp; /* output word pointer */
1999-10-20 19:09:58 +04:00
char *sp, *dp;
int c2;
1996-09-22 03:35:13 +04:00
Again:
1999-10-20 19:09:58 +04:00
states[0].ls_state = -1;
states[0].ls_info.base = (Lex_state *) 0;
statep = &states[1];
state_info.base = states;
state_info.end = &states[STATE_BSIZE];
1996-09-22 03:35:13 +04:00
Xinit(ws, wp, 64, ATEMP);
1996-10-09 19:29:01 +04:00
backslash_skip = 0;
ignore_backslash_newline = 0;
1996-09-22 03:35:13 +04:00
if (cf&ONEWORD)
1999-10-20 19:09:58 +04:00
state = SWORD;
1996-10-09 19:12:31 +04:00
#ifdef KSH
1996-09-22 03:35:13 +04:00
else if (cf&LETEXPR) {
*wp++ = OQUOTE; /* enclose arguments in (double) quotes */
1999-10-20 19:09:58 +04:00
state = SLETPAREN;
statep->ls_sletparen.nparen = 0;
1996-10-09 19:12:31 +04:00
}
#endif /* KSH */
else { /* normal lexing */
1999-10-20 19:09:58 +04:00
state = (cf & HEREDELIM) ? SHEREDELIM : SBASE;
1996-09-22 03:35:13 +04:00
while ((c = getsc()) == ' ' || c == '\t')
;
1996-10-09 19:29:01 +04:00
if (c == '#') {
ignore_backslash_newline++;
1996-09-22 03:35:13 +04:00
while ((c = getsc()) != '\0' && c != '\n')
;
1996-10-09 19:29:01 +04:00
ignore_backslash_newline--;
}
1996-09-22 03:35:13 +04:00
ungetsc(c);
}
if (source->flags & SF_ALIAS) { /* trailing ' ' in alias definition */
source->flags &= ~SF_ALIAS;
/* In POSIX mode, a trailing space only counts if we are
* parsing a simple command
*/
if (!Flag(FPOSIX) || (cf & CMDWORD))
cf |= ALIAS;
}
1999-10-20 19:09:58 +04:00
/* Initial state: one of SBASE SHEREDELIM SWORD SASPAREN */
statep->ls_state = state;
1996-09-22 03:35:13 +04:00
/* collect non-special or quoted characters to form word */
1999-10-20 19:09:58 +04:00
while (!((c = getsc()) == 0
|| ((state == SBASE || state == SHEREDELIM)
&& ctype(c, C_LEX1))))
1996-09-22 03:35:13 +04:00
{
Xcheck(ws, wp);
switch (state) {
case SBASE:
if (c == '[' && (cf & (VARASN|ARRAYVAR))) {
*wp = EOS; /* temporary */
if (is_wdvarname(Xstring(ws, wp), FALSE))
{
char *p, *tmp;
if (arraysub(&tmp)) {
*wp++ = CHAR;
*wp++ = c;
for (p = tmp; *p; ) {
Xcheck(ws, wp);
*wp++ = CHAR;
*wp++ = *p++;
}
afree(tmp, ATEMP);
break;
} else {
Source *s;
s = pushs(SREREAD,
source->areap);
s->start = s->str
= s->u.freeme = tmp;
s->next = source;
source = s;
}
}
*wp++ = CHAR;
*wp++ = c;
break;
}
/* fall through.. */
Sbase1: /* includes *(...|...) pattern (*+?@!) */
#ifdef KSH
if (c == '*' || c == '@' || c == '+' || c == '?'
|| c == '!')
{
c2 = getsc();
if (c2 == '(' /*)*/ ) {
*wp++ = OPAT;
*wp++ = c;
1999-10-20 19:09:58 +04:00
PUSH_STATE(SPATTERN);
1996-09-22 03:35:13 +04:00
break;
}
ungetsc(c2);
}
#endif /* KSH */
/* fall through.. */
Sbase2: /* doesn't include *(...|...) pattern (*+?@!) */
switch (c) {
case '\\':
c = getsc();
#ifdef OS2
if (isalnum((unsigned char)c)) {
1996-10-09 19:29:01 +04:00
*wp++ = CHAR, *wp++ = '\\';
*wp++ = CHAR, *wp++ = c;
} else
1996-09-22 03:35:13 +04:00
#endif
1996-10-09 19:29:01 +04:00
if (c) /* trailing \ is lost */
*wp++ = QCHAR, *wp++ = c;
1996-09-22 03:35:13 +04:00
break;
case '\'':
*wp++ = OQUOTE;
1996-10-09 19:29:01 +04:00
ignore_backslash_newline++;
1999-10-20 19:09:58 +04:00
PUSH_STATE(SSQUOTE);
1996-09-22 03:35:13 +04:00
break;
case '"':
*wp++ = OQUOTE;
1999-10-20 19:09:58 +04:00
PUSH_STATE(SDQUOTE);
1996-09-22 03:35:13 +04:00
break;
default:
goto Subst;
}
break;
Subst:
switch (c) {
case '\\':
c = getsc();
switch (c) {
case '\\':
1996-09-22 03:35:13 +04:00
case '$': case '`':
*wp++ = QCHAR, *wp++ = c;
break;
case '"':
if ((cf & HEREDOC) == 0) {
*wp++ = QCHAR, *wp++ = c;
break;
}
/* FALLTROUGH */
1996-09-22 03:35:13 +04:00
default:
Xcheck(ws, wp);
1996-10-09 19:29:01 +04:00
if (c) { /* trailing \ is lost */
*wp++ = CHAR, *wp++ = '\\';
*wp++ = CHAR, *wp++ = c;
}
1996-09-22 03:35:13 +04:00
break;
}
break;
case '$':
c = getsc();
if (c == '(') /*)*/ {
c = getsc();
if (c == '(') /*)*/ {
1999-10-20 19:09:58 +04:00
PUSH_STATE(SASPAREN);
statep->ls_sasparen.nparen = 2;
statep->ls_sasparen.start =
Xsavepos(ws, wp);
1996-09-22 03:35:13 +04:00
*wp++ = EXPRSUB;
} else {
ungetsc(c);
1999-10-20 19:09:58 +04:00
PUSH_STATE(SCSPAREN);
statep->ls_scsparen.nparen = 1;
statep->ls_scsparen.csstate = 0;
1996-09-22 03:35:13 +04:00
*wp++ = COMSUB;
}
} else if (c == '{') /*}*/ {
*wp++ = OSUBST;
1999-10-20 19:09:58 +04:00
*wp++ = '{'; /*}*/
1996-09-22 03:35:13 +04:00
wp = get_brace_var(&ws, wp);
1999-10-20 19:09:58 +04:00
c = getsc();
/* allow :# and :% (ksh88 compat) */
if (c == ':') {
*wp++ = CHAR, *wp++ = c;
c = getsc();
}
1996-09-22 03:35:13 +04:00
/* If this is a trim operation,
1999-10-20 19:09:58 +04:00
* treat (,|,) specially in STBRACE.
1996-09-22 03:35:13 +04:00
*/
if (c == '#' || c == '%') {
1999-10-20 19:09:58 +04:00
ungetsc(c);
PUSH_STATE(STBRACE);
1996-09-22 03:35:13 +04:00
} else {
ungetsc(c);
1999-10-20 19:09:58 +04:00
PUSH_STATE(SBRACE);
1996-09-22 03:35:13 +04:00
}
} else if (ctype(c, C_ALPHA)) {
*wp++ = OSUBST;
1999-10-20 19:09:58 +04:00
*wp++ = 'X';
1996-09-22 03:35:13 +04:00
do {
Xcheck(ws, wp);
*wp++ = c;
c = getsc();
} while (ctype(c, C_ALPHA|C_DIGIT));
*wp++ = '\0';
*wp++ = CSUBST;
1999-10-20 19:09:58 +04:00
*wp++ = 'X';
1996-09-22 03:35:13 +04:00
ungetsc(c);
} else if (ctype(c, C_DIGIT|C_VAR1)) {
Xcheck(ws, wp);
*wp++ = OSUBST;
1999-10-20 19:09:58 +04:00
*wp++ = 'X';
1996-09-22 03:35:13 +04:00
*wp++ = c;
*wp++ = '\0';
*wp++ = CSUBST;
1999-10-20 19:09:58 +04:00
*wp++ = 'X';
1996-09-22 03:35:13 +04:00
} else {
*wp++ = CHAR, *wp++ = '$';
ungetsc(c);
}
break;
case '`':
1999-10-20 19:09:58 +04:00
PUSH_STATE(SBQUOTE);
1996-09-22 03:35:13 +04:00
*wp++ = COMSUB;
/* Need to know if we are inside double quotes
* since sh/at&t-ksh translate the \" to " in
* "`..\"..`".
1999-10-20 19:09:58 +04:00
* This is not done in posix mode (section
* 3.2.3, Double Quotes: "The backquote shall
* retain its special meaning introducing the
* other form of command substitution (see
* 3.6.3). The portion of the quoted string
* from the initial backquote and the
* characters up to the next backquote that
* is not preceded by a backslash (having
* escape characters removed) defines that
* command whose output replaces `...` when
* the word is expanded."
* Section 3.6.3, Command Substitution:
* "Within the backquoted style of command
* substitution, backslash shall retain its
* literal meaning, except when followed by
* $ ` \.").
1996-09-22 03:35:13 +04:00
*/
1999-10-20 19:09:58 +04:00
statep->ls_sbquote.indquotes = 0;
if (!Flag(FPOSIX)) {
Lex_state *s = statep;
Lex_state *base = state_info.base;
while (1) {
for (; s != base; s--) {
if (s->ls_state == SDQUOTE) {
statep->ls_sbquote.indquotes = 1;
break;
}
}
if (s != base)
break;
if (!(s = s->ls_info.base))
break;
base = s-- - STATE_BSIZE;
}
}
1996-09-22 03:35:13 +04:00
break;
default:
*wp++ = CHAR, *wp++ = c;
}
break;
case SSQUOTE:
if (c == '\'') {
1999-10-20 19:09:58 +04:00
POP_STATE();
1996-09-22 03:35:13 +04:00
*wp++ = CQUOTE;
1996-10-09 19:29:01 +04:00
ignore_backslash_newline--;
1996-09-22 03:35:13 +04:00
} else
*wp++ = QCHAR, *wp++ = c;
break;
case SDQUOTE:
if (c == '"') {
1999-10-20 19:09:58 +04:00
POP_STATE();
1996-09-22 03:35:13 +04:00
*wp++ = CQUOTE;
} else
goto Subst;
break;
1999-10-20 19:09:58 +04:00
case SCSPAREN: /* $( .. ) */
1996-09-22 03:35:13 +04:00
/* todo: deal with $(...) quoting properly
* kludge to partly fake quoting inside $(..): doesn't
* really work because nested $(..) or ${..} inside
* double quotes aren't dealt with.
*/
1999-10-20 19:09:58 +04:00
switch (statep->ls_scsparen.csstate) {
1996-09-22 03:35:13 +04:00
case 0: /* normal */
switch (c) {
case '(':
1999-10-20 19:09:58 +04:00
statep->ls_scsparen.nparen++;
1996-09-22 03:35:13 +04:00
break;
case ')':
1999-10-20 19:09:58 +04:00
statep->ls_scsparen.nparen--;
1996-09-22 03:35:13 +04:00
break;
case '\\':
1999-10-20 19:09:58 +04:00
statep->ls_scsparen.csstate = 1;
1996-09-22 03:35:13 +04:00
break;
case '"':
1999-10-20 19:09:58 +04:00
statep->ls_scsparen.csstate = 2;
1996-09-22 03:35:13 +04:00
break;
case '\'':
1999-10-20 19:09:58 +04:00
statep->ls_scsparen.csstate = 4;
1996-10-09 19:29:01 +04:00
ignore_backslash_newline++;
1996-09-22 03:35:13 +04:00
break;
}
break;
case 1: /* backslash in normal mode */
case 3: /* backslash in double quotes */
1999-10-20 19:09:58 +04:00
--statep->ls_scsparen.csstate;
1996-09-22 03:35:13 +04:00
break;
case 2: /* double quotes */
if (c == '"')
1999-10-20 19:09:58 +04:00
statep->ls_scsparen.csstate = 0;
1996-09-22 03:35:13 +04:00
else if (c == '\\')
1999-10-20 19:09:58 +04:00
statep->ls_scsparen.csstate = 3;
1996-09-22 03:35:13 +04:00
break;
case 4: /* single quotes */
1996-10-09 19:29:01 +04:00
if (c == '\'') {
1999-10-20 19:09:58 +04:00
statep->ls_scsparen.csstate = 0;
1996-10-09 19:29:01 +04:00
ignore_backslash_newline--;
}
1996-09-22 03:35:13 +04:00
break;
}
1999-10-20 19:09:58 +04:00
if (statep->ls_scsparen.nparen == 0) {
POP_STATE();
1996-09-22 03:35:13 +04:00
*wp++ = 0; /* end of COMSUB */
} else
*wp++ = c;
break;
1999-10-20 19:09:58 +04:00
case SASPAREN: /* $(( .. )) */
1996-09-22 03:35:13 +04:00
/* todo: deal with $((...); (...)) properly */
/* XXX should nest using existing state machine
* (embed "..", $(...), etc.) */
if (c == '(')
1999-10-20 19:09:58 +04:00
statep->ls_sasparen.nparen++;
1996-09-22 03:35:13 +04:00
else if (c == ')') {
1999-10-20 19:09:58 +04:00
statep->ls_sasparen.nparen--;
if (statep->ls_sasparen.nparen == 1) {
1996-09-22 03:35:13 +04:00
/*(*/
if ((c2 = getsc()) == ')') {
1999-10-20 19:09:58 +04:00
POP_STATE();
1996-09-22 03:35:13 +04:00
*wp++ = 0; /* end of EXPRSUB */
break;
} else {
1999-10-20 19:09:58 +04:00
char *s;
1996-09-22 03:35:13 +04:00
ungetsc(c2);
/* mismatched parenthesis -
* assume we were really
* parsing a $(..) expression
*/
1999-10-20 19:09:58 +04:00
s = Xrestpos(ws, wp,
statep->ls_sasparen.start);
memmove(s + 1, s, wp - s);
*s++ = COMSUB;
*s = '('; /*)*/
1996-09-22 03:35:13 +04:00
wp++;
1999-10-20 19:09:58 +04:00
statep->ls_scsparen.nparen = 1;
statep->ls_scsparen.csstate = 0;
state = statep->ls_state
= SCSPAREN;
1996-09-22 03:35:13 +04:00
}
}
}
*wp++ = c;
break;
case SBRACE:
/*{*/
if (c == '}') {
1999-10-20 19:09:58 +04:00
POP_STATE();
1996-09-22 03:35:13 +04:00
*wp++ = CSUBST;
1999-10-20 19:09:58 +04:00
*wp++ = /*{*/ '}';
1996-09-22 03:35:13 +04:00
} else
goto Sbase1;
break;
case STBRACE:
1999-10-20 19:09:58 +04:00
/* Same as SBRACE, except (,|,) treated specially */
1996-09-22 03:35:13 +04:00
/*{*/
if (c == '}') {
1999-10-20 19:09:58 +04:00
POP_STATE();
1996-09-22 03:35:13 +04:00
*wp++ = CSUBST;
1999-10-20 19:09:58 +04:00
*wp++ = /*{*/ '}';
1996-09-22 03:35:13 +04:00
} else if (c == '|') {
*wp++ = SPAT;
1999-10-20 19:09:58 +04:00
} else if (c == '(') {
*wp++ = OPAT;
*wp++ = ' '; /* simile for @ */
PUSH_STATE(SPATTERN);
1996-09-22 03:35:13 +04:00
} else
goto Sbase1;
break;
case SBQUOTE:
if (c == '`') {
*wp++ = 0;
1999-10-20 19:09:58 +04:00
POP_STATE();
1996-09-22 03:35:13 +04:00
} else if (c == '\\') {
switch (c = getsc()) {
case '\\':
case '$': case '`':
*wp++ = c;
break;
case '"':
1999-10-20 19:09:58 +04:00
if (statep->ls_sbquote.indquotes) {
1996-09-22 03:35:13 +04:00
*wp++ = c;
break;
}
/* fall through.. */
default:
1996-10-09 19:29:01 +04:00
if (c) { /* trailing \ is lost */
*wp++ = '\\';
*wp++ = c;
}
1996-09-22 03:35:13 +04:00
break;
}
} else
*wp++ = c;
break;
case SWORD: /* ONEWORD */
goto Subst;
1996-10-09 19:12:31 +04:00
#ifdef KSH
1999-10-20 19:09:58 +04:00
case SLETPAREN: /* LETEXPR: (( ... )) */
1996-09-22 03:35:13 +04:00
/*(*/
if (c == ')') {
1999-10-20 19:09:58 +04:00
if (statep->ls_sletparen.nparen > 0)
--statep->ls_sletparen.nparen;
1996-09-22 03:35:13 +04:00
/*(*/
else if ((c2 = getsc()) == ')') {
c = 0;
*wp++ = CQUOTE;
goto Done;
} else
ungetsc(c2);
} else if (c == '(')
/* parenthesis inside quotes and backslashes
* are lost, but at&t ksh doesn't count them
* either
*/
1999-10-20 19:09:58 +04:00
++statep->ls_sletparen.nparen;
1996-09-22 03:35:13 +04:00
goto Sbase2;
1996-10-09 19:12:31 +04:00
#endif /* KSH */
1996-09-22 03:35:13 +04:00
case SHEREDELIM: /* <<,<<- delimiter */
/* XXX chuck this state (and the next) - use
* the existing states ($ and \`..` should be
* stripped of their specialness after the
* fact).
*/
/* here delimiters need a special case since
* $ and `..` are not to be treated specially
*/
if (c == '\\') {
c = getsc();
1996-10-09 19:29:01 +04:00
if (c) { /* trailing \ is lost */
1996-09-22 03:35:13 +04:00
*wp++ = QCHAR;
*wp++ = c;
}
} else if (c == '\'') {
1999-10-20 19:09:58 +04:00
PUSH_STATE(SSQUOTE);
1996-09-22 03:35:13 +04:00
*wp++ = OQUOTE;
1996-10-09 19:29:01 +04:00
ignore_backslash_newline++;
1996-09-22 03:35:13 +04:00
} else if (c == '"') {
1999-10-20 19:09:58 +04:00
state = statep->ls_state = SHEREDQUOTE;
1996-09-22 03:35:13 +04:00
*wp++ = OQUOTE;
} else {
*wp++ = CHAR;
*wp++ = c;
}
break;
case SHEREDQUOTE: /* " in <<,<<- delimiter */
if (c == '"') {
*wp++ = CQUOTE;
1999-10-20 19:09:58 +04:00
state = statep->ls_state = SHEREDELIM;
1996-09-22 03:35:13 +04:00
} else {
1996-10-09 19:29:01 +04:00
if (c == '\\') {
switch (c = getsc()) {
case '\\': case '"':
case '$': case '`':
break;
default:
if (c) { /* trailing \ lost */
*wp++ = CHAR;
*wp++ = '\\';
}
break;
}
}
1996-09-22 03:35:13 +04:00
*wp++ = CHAR;
*wp++ = c;
}
break;
case SPATTERN: /* in *(...|...) pattern (*+?@!) */
if ( /*(*/ c == ')') {
*wp++ = CPAT;
1999-10-20 19:09:58 +04:00
POP_STATE();
} else if (c == '|') {
1996-09-22 03:35:13 +04:00
*wp++ = SPAT;
1999-10-20 19:09:58 +04:00
} else if (c == '(') {
*wp++ = OPAT;
*wp++ = ' '; /* simile for @ */
PUSH_STATE(SPATTERN);
} else
1996-09-22 03:35:13 +04:00
goto Sbase1;
break;
}
}
Done:
Xcheck(ws, wp);
1999-10-20 19:09:58 +04:00
if (statep != &states[1])
/* XXX figure out what is missing */
1996-09-22 03:35:13 +04:00
yyerror("no closing quote\n");
/* This done to avoid tests for SHEREDELIM wherever SBASE tested */
if (state == SHEREDELIM)
state = SBASE;
1999-10-20 19:09:58 +04:00
dp = Xstring(ws, wp);
if ((c == '<' || c == '>') && state == SBASE
&& ((c2 = Xlength(ws, wp)) == 0
|| (c2 == 2 && dp[0] == CHAR && digit(dp[1]))))
{
struct ioword *iop =
(struct ioword *) alloc(sizeof(*iop), ATEMP);
if (c2 == 2)
iop->unit = dp[1] - '0';
else
iop->unit = c == '>'; /* 0 for <, 1 for > */
c2 = getsc();
/* <<, >>, <> are ok, >< is not */
if (c == c2 || (c == '<' && c2 == '>')) {
iop->flag = c == c2 ?
(c == '>' ? IOCAT : IOHERE) : IORDWR;
1999-10-20 19:49:15 +04:00
if (iop->flag == IOHERE) {
if ((c2 = getsc()) == '-') {
1999-10-20 19:09:58 +04:00
iop->flag |= IOSKIP;
1999-10-20 19:49:15 +04:00
} else {
1999-10-20 19:09:58 +04:00
ungetsc(c2);
1999-10-20 19:49:15 +04:00
}
}
1999-10-20 19:09:58 +04:00
} else if (c2 == '&')
iop->flag = IODUP | (c == '<' ? IORDUP : 0);
else {
iop->flag = c == '>' ? IOWRITE : IOREAD;
if (c == '>' && c2 == '|')
iop->flag |= IOCLOB;
else
ungetsc(c2);
}
iop->name = (char *) 0;
iop->delim = (char *) 0;
iop->heredoc = (char *) 0;
Xfree(ws, wp); /* free word */
yylval.iop = iop;
return REDIR;
1996-09-22 03:35:13 +04:00
}
1999-10-20 19:09:58 +04:00
if (wp == dp && state == SBASE) {
1996-09-22 03:35:13 +04:00
Xfree(ws, wp); /* free word */
/* no word, process LEX1 character */
switch (c) {
default:
return c;
case '|':
case '&':
case ';':
if ((c2 = getsc()) == c)
c = (c == ';') ? BREAK :
(c == '|') ? LOGOR :
(c == '&') ? LOGAND :
YYERRCODE;
#ifdef KSH
else if (c == '|' && c2 == '&')
c = COPROC;
#endif /* KSH */
else
ungetsc(c2);
return c;
case '\n':
gethere();
if (cf & CONTIN)
goto Again;
return c;
case '(': /*)*/
1996-10-09 19:12:31 +04:00
#ifdef KSH
1996-09-22 03:35:13 +04:00
if ((c2 = getsc()) == '(') /*)*/
1999-10-20 19:09:58 +04:00
/* XXX need to handle ((...); (...)) */
1996-09-22 03:35:13 +04:00
c = MDPAREN;
else
ungetsc(c2);
1996-10-09 19:12:31 +04:00
#endif /* KSH */
1996-09-22 03:35:13 +04:00
return c;
/*(*/
case ')':
return c;
}
}
*wp++ = EOS; /* terminate word */
yylval.cp = Xclose(ws, wp);
1996-10-09 19:12:31 +04:00
if (state == SWORD
#ifdef KSH
1999-10-20 19:09:58 +04:00
|| state == SLETPAREN
1996-10-09 19:12:31 +04:00
#endif /* KSH */
) /* ONEWORD? */
1996-09-22 03:35:13 +04:00
return LWORD;
ungetsc(c); /* unget terminator */
/* copy word to unprefixed string ident */
for (sp = yylval.cp, dp = ident; dp < ident+IDENT && (c = *sp++) == CHAR; )
*dp++ = *sp++;
/* Make sure the ident array stays '\0' padded */
1996-09-22 03:35:13 +04:00
memset(dp, 0, (ident+IDENT) - dp + 1);
if (c != EOS)
*ident = '\0'; /* word is not unquoted */
if (*ident != '\0' && (cf&(KEYWORD|ALIAS))) {
struct tbl *p;
int h = hash(ident);
/* { */
if ((cf & KEYWORD) && (p = tsearch(&keywords, ident, h))
&& (!(cf & ESACONLY) || p->val.i == ESAC || p->val.i == '}'))
{
afree(yylval.cp, ATEMP);
return p->val.i;
}
if ((cf & ALIAS) && (p = tsearch(&aliases, ident, h))
&& (p->flag & ISSET))
{
register Source *s;
for (s = source; s->type == SALIAS; s = s->next)
if (s->u.tblp == p)
return LWORD;
/* push alias expansion */
s = pushs(SALIAS, source->areap);
s->start = s->str = p->val.s;
s->u.tblp = p;
s->next = source;
source = s;
afree(yylval.cp, ATEMP);
goto Again;
}
}
return LWORD;
}
static void
gethere()
{
register struct ioword **p;
for (p = heres; p < herep; p++)
readhere(*p);
herep = heres;
}
/*
* read "<<word" text into temp file
*/
static void
readhere(iop)
1999-10-20 19:09:58 +04:00
struct ioword *iop;
1996-09-22 03:35:13 +04:00
{
register int c;
char *volatile eof;
char *eofp;
1996-10-09 19:29:01 +04:00
int skiptabs;
1999-10-20 19:09:58 +04:00
XString xs;
char *xp;
int xpos;
1996-09-22 03:35:13 +04:00
eof = evalstr(iop->delim, 0);
1996-10-09 19:29:01 +04:00
if (!(iop->flag & IOEVAL))
ignore_backslash_newline++;
1999-10-20 19:09:58 +04:00
Xinit(xs, xp, 256, ATEMP);
1996-09-22 03:35:13 +04:00
for (;;) {
eofp = eof;
skiptabs = iop->flag & IOSKIP;
1999-10-20 19:09:58 +04:00
xpos = Xsavepos(xs, xp);
1996-10-09 19:29:01 +04:00
while ((c = getsc()) != 0) {
1996-09-22 03:35:13 +04:00
if (skiptabs) {
if (c == '\t')
continue;
skiptabs = 0;
}
if (c != *eofp)
break;
1999-10-20 19:09:58 +04:00
Xcheck(xs, xp);
Xput(xs, xp, c);
1996-09-22 03:35:13 +04:00
eofp++;
}
/* Allow EOF here so commands with out trailing newlines
* will work (eg, ksh -c '...', $(...), etc).
*/
1999-10-20 19:09:58 +04:00
if (*eofp == '\0' && (c == 0 || c == '\n')) {
xp = Xrestpos(xs, xp, xpos);
1996-09-22 03:35:13 +04:00
break;
1999-10-20 19:09:58 +04:00
}
1996-09-22 03:35:13 +04:00
ungetsc(c);
1996-10-09 19:29:01 +04:00
while ((c = getsc()) != '\n') {
1996-09-22 03:35:13 +04:00
if (c == 0)
yyerror("here document `%s' unclosed\n", eof);
1999-10-20 19:09:58 +04:00
Xcheck(xs, xp);
Xput(xs, xp, c);
1996-09-22 03:35:13 +04:00
}
1999-10-20 19:09:58 +04:00
Xcheck(xs, xp);
Xput(xs, xp, c);
1996-09-22 03:35:13 +04:00
}
1999-10-20 19:09:58 +04:00
Xput(xs, xp, '\0');
iop->heredoc = Xclose(xs, xp);
1996-10-09 19:29:01 +04:00
if (!(iop->flag & IOEVAL))
ignore_backslash_newline--;
1996-09-22 03:35:13 +04:00
}
void
#ifdef HAVE_PROTOTYPES
yyerror(const char *fmt, ...)
#else
yyerror(fmt, va_alist)
const char *fmt;
va_dcl
#endif
{
va_list va;
/* pop aliases and re-reads */
while (source->type == SALIAS || source->type == SREREAD)
source = source->next;
source->str = null; /* zap pending input */
error_prefix(TRUE);
SH_VA_START(va, fmt);
shf_vfprintf(shl_out, fmt, va);
va_end(va);
errorf(null);
}
/*
* input for yylex with alias expansion
*/
Source *
pushs(type, areap)
int type;
Area *areap;
{
register Source *s;
s = (Source *) alloc(sizeof(Source), areap);
s->type = type;
s->str = null;
s->start = NULL;
s->line = 0;
s->errline = 0;
s->file = NULL;
s->flags = 0;
s->next = NULL;
s->areap = areap;
if (type == SFILE || type == SSTDIN) {
char *dummy;
Xinit(s->xs, dummy, 256, s->areap);
} else
memset(&s->xs, 0, sizeof(s->xs));
return s;
}
static int
1996-10-09 19:29:01 +04:00
getsc__()
1996-09-22 03:35:13 +04:00
{
register Source *s = source;
register int c;
while ((c = *s->str++) == 0) {
s->str = NULL; /* return 0 for EOF by default */
switch (s->type) {
case SEOF:
s->str = null;
return 0;
case SSTDIN:
case SFILE:
getsc_line(s);
break;
case SWSTR:
break;
case SSTRING:
break;
case SWORDS:
s->start = s->str = *s->u.strv++;
s->type = SWORDSEP;
break;
case SWORDSEP:
if (*s->u.strv == NULL) {
s->start = s->str = newline;
s->type = SEOF;
} else {
s->start = s->str = space;
s->type = SWORDS;
}
break;
case SALIAS:
if (s->flags & SF_ALIASEND) {
/* pass on an unused SF_ALIAS flag */
source = s->next;
source->flags |= s->flags & SF_ALIAS;
s = source;
} else if (*s->u.tblp->val.s
1998-11-04 21:27:20 +03:00
&& isspace((unsigned char)strchr(s->u.tblp->val.s, 0)[-1]))
1996-09-22 03:35:13 +04:00
{
source = s = s->next; /* pop source stack */
1996-10-09 19:29:01 +04:00
/* Note that this alias ended with a space,
* enabling alias expansion on the following
* word.
*/
1996-09-22 03:35:13 +04:00
s->flags |= SF_ALIAS;
} else {
1996-10-09 19:29:01 +04:00
/* At this point, we need to keep the current
* alias in the source list so recursive
* aliases can be detected and we also need
* to return the next character. Do this
* by temporarily popping the alias to get
* the next character and then put it back
* in the source list with the SF_ALIASEND
* flag set.
1996-09-22 03:35:13 +04:00
*/
1996-10-09 19:29:01 +04:00
source = s->next; /* pop source stack */
source->flags |= s->flags & SF_ALIAS;
c = getsc__();
if (c) {
s->flags |= SF_ALIASEND;
s->ugbuf[0] = c; s->ugbuf[1] = '\0';
s->start = s->str = s->ugbuf;
s->next = source;
source = s;
} else {
s = source;
/* avoid reading eof twice */
s->str = NULL;
1996-12-18 07:50:04 +03:00
break;
1996-10-09 19:29:01 +04:00
}
1996-09-22 03:35:13 +04:00
}
continue;
case SREREAD:
1996-10-09 19:29:01 +04:00
if (s->start != s->ugbuf) /* yuck */
1996-09-22 03:35:13 +04:00
afree(s->u.freeme, ATEMP);
source = s = s->next;
continue;
}
if (s->str == NULL) {
s->type = SEOF;
s->start = s->str = null;
return '\0';
}
if (s->flags & SF_ECHO) {
shf_puts(s->str, shl_out);
shf_flush(shl_out);
}
}
return c;
}
static void
getsc_line(s)
Source *s;
{
char *xp = Xstring(s->xs, xp);
int interactive = Flag(FTALKING) && s->type == SSTDIN;
int have_tty = interactive && (s->flags & SF_TTY);
/* Done here to ensure nothing odd happens when a timeout occurs */
XcheckN(s->xs, xp, LINE);
*xp = '\0';
s->start = s->str = xp;
#ifdef KSH
if (have_tty && ksh_tmout) {
ksh_tmout_state = TMOUT_READING;
alarm(ksh_tmout);
}
#endif /* KSH */
#ifdef EDIT
if (have_tty && (0
# ifdef VI
|| Flag(FVI)
# endif /* VI */
# ifdef EMACS
|| Flag(FEMACS) || Flag(FGMACS)
# endif /* EMACS */
))
{
int nread;
nread = x_read(xp, LINE);
if (nread < 0) /* read error */
nread = 0;
xp[nread] = '\0';
xp += nread;
}
else
#endif /* EDIT */
{
if (interactive) {
pprompt(prompt, 0);
} else
s->line++;
while (1) {
char *p = shf_getse(xp, Xnleft(s->xs, xp), s->u.shf);
if (!p && shf_error(s->u.shf)
&& shf_errno(s->u.shf) == EINTR)
{
shf_clearerr(s->u.shf);
if (trap)
runtraps(0);
continue;
}
if (!p || (xp = p, xp[-1] == '\n'))
break;
/* double buffer size */
xp++; /* move past null so doubling works... */
XcheckN(s->xs, xp, Xlength(s->xs, xp));
xp--; /* ...and move back again */
}
/* flush any unwanted input so other programs/builtins
* can read it. Not very optimal, but less error prone
* than flushing else where, dealing with redirections,
* etc..
* todo: reduce size of shf buffer (~128?) if SSTDIN
*/
if (s->type == SSTDIN)
shf_flush(s->u.shf);
}
/* XXX: temporary kludge to restore source after a
* trap may have been executed.
*/
source = s;
#ifdef KSH
if (have_tty && ksh_tmout)
{
ksh_tmout_state = TMOUT_EXECUTING;
alarm(0);
}
#endif /* KSH */
s->start = s->str = Xstring(s->xs, xp);
strip_nuls(Xstring(s->xs, xp), Xlength(s->xs, xp));
/* Note: if input is all nulls, this is not eof */
if (Xlength(s->xs, xp) == 0) { /* EOF */
if (s->type == SFILE)
shf_fdclose(s->u.shf);
s->str = NULL;
} else if (interactive) {
#ifdef HISTORY
char *p = Xstring(s->xs, xp);
if (cur_prompt == PS1)
while (*p && ctype(*p, C_IFS) && ctype(*p, C_IFSWS))
p++;
if (*p) {
# ifdef EASY_HISTORY
if (cur_prompt == PS2)
histappend(Xstring(s->xs, xp), 1);
else
# endif /* EASY_HISTORY */
{
s->line++;
histsave(s->line, s->str, 1);
}
}
#endif /* HISTORY */
}
if (interactive)
set_prompt(PS2, (Source *) 0);
}
void
set_prompt(to, s)
int to;
Source *s;
{
cur_prompt = to;
switch (to) {
case PS1: /* command */
#ifdef KSH
/* Substitute ! and !! here, before substitutions are done
* so ! in expanded variables are not expanded.
* NOTE: this is not what at&t ksh does (it does it after
* substitutions, POSIX doesn't say which is to be done.
*/
{
struct shf *shf;
char * volatile ps1;
1996-09-22 03:35:13 +04:00
Area *saved_atemp;
ps1 = str_val(global("PS1"));
shf = shf_sopen((char *) 0, strlen(ps1) * 2,
SHF_WR | SHF_DYNAMIC, (struct shf *) 0);
while (*ps1) {
if (*ps1 != '!' || *++ps1 == '!')
shf_putchar(*ps1++, shf);
else
shf_fprintf(shf, "%d",
s ? s->line + 1 : 0);
}
ps1 = shf_sclose(shf);
saved_atemp = ATEMP;
newenv(E_ERRH);
if (ksh_sigsetjmp(e->jbuf, 0)) {
prompt = safe_prompt;
/* Don't print an error - assume it has already
* been printed. Reason is we may have forked
* to run a command and the child may be
* unwinding its stack through this code as it
* exits.
*/
} else
prompt = str_save(substitute(ps1, 0),
saved_atemp);
quitenv();
}
#else /* KSH */
prompt = str_val(global("PS1"));
#endif /* KSH */
break;
case PS2: /* command continuation */
prompt = str_val(global("PS2"));
break;
}
}
/* See also related routine, promptlen() in edit.c */
void
pprompt(cp, ntruncate)
const char *cp;
int ntruncate;
{
#if 0
char nbuf[32];
int c;
while (*cp != 0) {
if (*cp != '!')
c = *cp++;
else if (*++cp == '!')
c = *cp++;
else {
int len;
char *p;
shf_snprintf(p = nbuf, sizeof(nbuf), "%d",
source->line + 1);
len = strlen(nbuf);
if (ntruncate) {
if (ntruncate >= len) {
ntruncate -= len;
continue;
}
p += ntruncate;
len -= ntruncate;
ntruncate = 0;
}
shf_write(p, len, shl_out);
continue;
}
if (ntruncate)
--ntruncate;
else
shf_putc(c, shl_out);
}
#endif /* 0 */
1996-10-09 19:12:31 +04:00
shf_puts(cp + ntruncate, shl_out);
shf_flush(shl_out);
1996-09-22 03:35:13 +04:00
}
/* Read the variable part of a ${...} expression (ie, up to but not including
* the :[-+?=#%] or close-brace.
*/
static char *
get_brace_var(wsp, wp)
XString *wsp;
char *wp;
{
enum parse_state {
PS_INITIAL, PS_SAW_HASH, PS_IDENT,
PS_NUMBER, PS_VAR1, PS_END
}
state;
char c;
state = PS_INITIAL;
while (1) {
1996-10-09 19:29:01 +04:00
c = getsc();
1996-09-22 03:35:13 +04:00
/* State machine to figure out where the variable part ends. */
switch (state) {
case PS_INITIAL:
if (c == '#') {
state = PS_SAW_HASH;
break;
}
/* fall through.. */
case PS_SAW_HASH:
if (letter(c))
state = PS_IDENT;
else if (digit(c))
state = PS_NUMBER;
else if (ctype(c, C_VAR1))
state = PS_VAR1;
else
state = PS_END;
break;
case PS_IDENT:
if (!letnum(c)) {
state = PS_END;
if (c == '[') {
char *tmp, *p;
if (!arraysub(&tmp))
yyerror("missing ]\n");
*wp++ = c;
for (p = tmp; *p; ) {
Xcheck(*wsp, wp);
*wp++ = *p++;
}
afree(tmp, ATEMP);
1996-10-09 19:29:01 +04:00
c = getsc(); /* the ] */
1996-09-22 03:35:13 +04:00
}
}
break;
case PS_NUMBER:
if (!digit(c))
state = PS_END;
break;
case PS_VAR1:
state = PS_END;
break;
case PS_END: /* keep gcc happy */
break;
}
if (state == PS_END) {
*wp++ = '\0'; /* end of variable part */
ungetsc(c);
break;
}
Xcheck(*wsp, wp);
*wp++ = c;
}
return wp;
}
/*
* Save an array subscript - returns true if matching bracket found, false
* if eof or newline was found.
* (Returned string double null terminated)
*/
static int
arraysub(strp)
char **strp;
{
XString ws;
char *wp;
char c;
int depth = 1; /* we are just past the initial [ */
Xinit(ws, wp, 32, ATEMP);
do {
1996-10-09 19:29:01 +04:00
c = getsc();
1996-09-22 03:35:13 +04:00
Xcheck(ws, wp);
*wp++ = c;
if (c == '[')
depth++;
else if (c == ']')
depth--;
} while (depth > 0 && c && c != '\n');
*wp++ = '\0';
*strp = Xclose(ws, wp);
return depth == 0 ? 1 : 0;
}
/* Unget a char: handles case when we are already at the start of the buffer */
static const char *
1996-10-09 19:29:01 +04:00
ungetsc(c)
1996-09-22 03:35:13 +04:00
int c;
{
1996-10-09 19:29:01 +04:00
if (backslash_skip)
backslash_skip--;
1996-09-22 03:35:13 +04:00
/* Don't unget eof... */
if (source->str == null && c == '\0')
return source->str;
if (source->str > source->start)
source->str--;
else {
Source *s;
s = pushs(SREREAD, source->areap);
1996-10-09 19:29:01 +04:00
s->ugbuf[0] = c; s->ugbuf[1] = '\0';
s->start = s->str = s->ugbuf;
1996-09-22 03:35:13 +04:00
s->next = source;
source = s;
}
return source->str;
}
1996-10-09 19:29:01 +04:00
1996-09-22 03:35:13 +04:00
/* Called to get a char that isn't a \newline sequence. */
static int
1996-10-09 19:29:01 +04:00
getsc_bn ARGS((void))
1996-09-22 03:35:13 +04:00
{
1996-10-09 19:29:01 +04:00
int c, c2;
if (ignore_backslash_newline)
return getsc_();
if (backslash_skip == 1) {
backslash_skip = 2;
return getsc_();
}
backslash_skip = 0;
1996-09-22 03:35:13 +04:00
while (1) {
c = getsc_();
1996-10-09 19:29:01 +04:00
if (c == '\\') {
if ((c2 = getsc_()) == '\n')
/* ignore the \newline; get the next char... */
continue;
ungetsc(c2);
backslash_skip = 1;
1996-09-22 03:35:13 +04:00
}
1996-10-09 19:29:01 +04:00
return c;
1996-09-22 03:35:13 +04:00
}
}
1999-10-20 19:09:58 +04:00
static Lex_state *
push_state_(si, old_end)
State_info *si;
Lex_state *old_end;
{
Lex_state *new = alloc(sizeof(Lex_state) * STATE_BSIZE, ATEMP);
new[0].ls_info.base = old_end;
si->base = &new[0];
si->end = &new[STATE_BSIZE];
return &new[1];
}
static Lex_state *
pop_state_(si, old_end)
State_info *si;
Lex_state *old_end;
{
Lex_state *old_base = si->base;
si->base = old_end->ls_info.base - STATE_BSIZE;
si->end = old_end->ls_info.base;
afree(old_base, ATEMP);
2003-01-20 08:29:53 +03:00
return si->base + STATE_BSIZE - 1;
1999-10-20 19:09:58 +04:00
}