862 lines
21 KiB
C
862 lines
21 KiB
C
|
|
/********************************************
|
|
scan.c
|
|
copyright 1991, Michael D. Brennan
|
|
|
|
This is a source file for mawk, an implementation of
|
|
the AWK programming language.
|
|
|
|
Mawk is distributed without warranty under the terms of
|
|
the GNU General Public License, version 2, 1991.
|
|
********************************************/
|
|
|
|
|
|
/* $Log: scan.c,v $
|
|
/* Revision 1.2 1993/07/02 23:57:54 jtc
|
|
/* Updated to mawk 1.1.4
|
|
/*
|
|
* Revision 5.4.1.1 1993/01/15 03:33:50 mike
|
|
* patch3: safer double to int conversion
|
|
*
|
|
* Revision 5.4 1992/11/29 18:57:50 mike
|
|
* field expressions convert to long so 16 bit and 32 bit
|
|
* systems behave the same
|
|
*
|
|
* Revision 5.3 1992/07/08 15:43:41 brennan
|
|
* patch2: length returns. I am a wimp
|
|
*
|
|
* Revision 5.2 1992/02/21 14:16:53 brennan
|
|
* fix: getline <=
|
|
*
|
|
* Revision 5.1 91/12/05 07:56:27 brennan
|
|
* 1.1 pre-release
|
|
*
|
|
*/
|
|
|
|
|
|
#include "mawk.h"
|
|
#include "sizes.h"
|
|
#include "scan.h"
|
|
#include "memory.h"
|
|
#include "field.h"
|
|
#include "init.h"
|
|
#include "fin.h"
|
|
#include "repl.h"
|
|
#include "code.h"
|
|
|
|
#if HAVE_FCNTL_H
|
|
#include <fcntl.h>
|
|
#endif
|
|
|
|
#include "files.h"
|
|
|
|
|
|
/* static functions */
|
|
static void PROTO(scan_fillbuff, (void) ) ;
|
|
static void PROTO(scan_open, (void) ) ;
|
|
static int PROTO(slow_next, (void) ) ;
|
|
static void PROTO(eat_comment, (void) ) ;
|
|
static void PROTO(eat_semi_colon, (void) ) ;
|
|
static double PROTO(collect_decimal, (int, int *) ) ;
|
|
static int PROTO(collect_string, (void) ) ;
|
|
static int PROTO(collect_RE, (void) ) ;
|
|
|
|
|
|
/*-----------------------------
|
|
program file management
|
|
*----------------------------*/
|
|
|
|
char *pfile_name ;
|
|
STRING *program_string ;
|
|
PFILE *pfile_list ;
|
|
static unsigned char *buffer ;
|
|
static unsigned char *buffp ;
|
|
/* unsigned so it works with 8 bit chars */
|
|
static int program_fd ;
|
|
static int eof_flag ;
|
|
|
|
void scan_init(cmdline_program)
|
|
char * cmdline_program ;
|
|
{
|
|
if ( cmdline_program )
|
|
{
|
|
program_fd = -1 ; /* command line program */
|
|
program_string = new_STRING((char *)0,
|
|
strlen(cmdline_program) + 1 ) ;
|
|
(void) strcpy(program_string->str, cmdline_program) ;
|
|
/* simulate file termination */
|
|
program_string->str[program_string->len-1] = '\n' ;
|
|
buffp = (unsigned char *) program_string->str ;
|
|
eof_flag = 1 ;
|
|
}
|
|
else /* program from file[s] */
|
|
{
|
|
scan_open() ;
|
|
buffp = buffer = (unsigned char *) zmalloc( BUFFSZ+1 ) ;
|
|
scan_fillbuff() ;
|
|
}
|
|
|
|
eat_nl() ; /* scan to first token */
|
|
if ( next() == 0 ) { errmsg(0, "no program") ; mawk_exit(1) ; }
|
|
un_next() ;
|
|
|
|
}
|
|
|
|
static void scan_open() /* open pfile_name */
|
|
{
|
|
if ( pfile_name[0] == '-' && pfile_name[1] == 0 )
|
|
program_fd = 0 ;
|
|
else
|
|
if ( (program_fd = open(pfile_name, O_RDONLY, 0)) == -1 )
|
|
{ errmsg( errno, "cannot open %s", pfile_name) ; mawk_exit(1) ; }
|
|
}
|
|
|
|
void scan_cleanup()
|
|
{
|
|
if ( program_fd >= 0 ) zfree(buffer, BUFFSZ+1) ;
|
|
else free_STRING(program_string) ;
|
|
|
|
if ( program_fd > 0 ) (void) close(program_fd) ;
|
|
|
|
/* redefine SPACE as [ \t\n] */
|
|
|
|
scan_code['\n'] = posix_space_flag && rs_shadow.type != SEP_MLR
|
|
? SC_UNEXPECTED : SC_SPACE ;
|
|
scan_code['\f'] = SC_UNEXPECTED ; /*value doesn't matter */
|
|
scan_code['\013'] = SC_UNEXPECTED ; /* \v not space */
|
|
scan_code['\r'] = SC_UNEXPECTED ;
|
|
}
|
|
|
|
/*--------------------------------
|
|
global variables shared by yyparse() and yylex()
|
|
and used for error messages too
|
|
*-------------------------------*/
|
|
|
|
int current_token = -1 ;
|
|
unsigned token_lineno ;
|
|
unsigned compile_error_count ;
|
|
int NR_flag ; /* are we tracking NR */
|
|
int paren_cnt ;
|
|
int brace_cnt ;
|
|
int print_flag ; /* changes meaning of '>' */
|
|
int getline_flag ; /* changes meaning of '<' */
|
|
|
|
extern YYSTYPE yylval ;
|
|
|
|
/*----------------------------------------
|
|
file reading functions
|
|
next() and un_next(c) are macros in scan.h
|
|
|
|
*---------------------*/
|
|
|
|
static unsigned lineno = 1 ;
|
|
|
|
|
|
static void scan_fillbuff()
|
|
{ unsigned r ;
|
|
|
|
r = fillbuff(program_fd, (char *)buffer, BUFFSZ) ;
|
|
if ( r < BUFFSZ )
|
|
{ eof_flag = 1 ;
|
|
/* check eof is terminated */
|
|
if ( r && buffer[r-1] != '\n' )
|
|
{ buffer[r] = '\n' ; buffer[r+1] = 0 ; }
|
|
}
|
|
}
|
|
|
|
/* read one character -- slowly */
|
|
static int slow_next()
|
|
{
|
|
|
|
while ( *buffp == 0 )
|
|
{
|
|
if ( !eof_flag )
|
|
{ buffp = buffer ; scan_fillbuff() ; }
|
|
else
|
|
if ( pfile_list /* open another program file */ )
|
|
{
|
|
PFILE *q ;
|
|
|
|
if ( program_fd > 0 ) (void) close(program_fd) ;
|
|
eof_flag = 0 ;
|
|
pfile_name = pfile_list->fname ;
|
|
q = pfile_list ;
|
|
pfile_list = pfile_list->link ;
|
|
ZFREE(q) ;
|
|
scan_open() ;
|
|
token_lineno = lineno = 1 ;
|
|
}
|
|
else break /* real eof */ ;
|
|
}
|
|
|
|
return *buffp++ ; /* note can un_next() , eof which is zero */
|
|
}
|
|
|
|
static void eat_comment()
|
|
{ register int c ;
|
|
|
|
while ( (c = next()) != '\n' && scan_code[c] ) ;
|
|
un_next() ;
|
|
}
|
|
|
|
/* this is how we handle extra semi-colons that are
|
|
now allowed to separate pattern-action blocks
|
|
|
|
A proof that they are useless clutter to the language:
|
|
we throw them away
|
|
*/
|
|
|
|
static void eat_semi_colon()
|
|
/* eat one semi-colon on the current line */
|
|
{ register int c ;
|
|
|
|
while ( scan_code[c = next()] == SC_SPACE ) ;
|
|
if ( c != ';' ) un_next() ;
|
|
}
|
|
|
|
void eat_nl() /* eat all space including newlines */
|
|
{
|
|
while ( 1 )
|
|
switch( scan_code[next()] )
|
|
{
|
|
case SC_COMMENT :
|
|
eat_comment() ;
|
|
break ;
|
|
|
|
case SC_NL : lineno++ ;
|
|
/* fall thru */
|
|
case SC_SPACE : break ;
|
|
default :
|
|
un_next() ; return ;
|
|
}
|
|
}
|
|
|
|
int yylex()
|
|
{
|
|
register int c ;
|
|
|
|
token_lineno = lineno ;
|
|
|
|
reswitch:
|
|
|
|
switch( scan_code[c = next()] )
|
|
{
|
|
case 0 :
|
|
ct_ret(EOF) ;
|
|
|
|
case SC_SPACE : goto reswitch ;
|
|
|
|
case SC_COMMENT :
|
|
eat_comment() ; goto reswitch ;
|
|
|
|
case SC_NL :
|
|
lineno++ ; eat_nl() ;
|
|
ct_ret(NL) ;
|
|
|
|
case SC_ESCAPE :
|
|
while ( scan_code[ c = next() ] == SC_SPACE ) ;
|
|
if ( c == '\n')
|
|
{ token_lineno = ++lineno ; goto reswitch ; }
|
|
if ( c == 0 ) ct_ret(EOF) ;
|
|
un_next() ;
|
|
yylval.ival = '\\' ;
|
|
ct_ret(UNEXPECTED) ;
|
|
|
|
|
|
case SC_SEMI_COLON :
|
|
eat_nl() ;
|
|
ct_ret(SEMI_COLON) ;
|
|
|
|
case SC_LBRACE :
|
|
eat_nl() ; brace_cnt++ ;
|
|
ct_ret(LBRACE) ;
|
|
|
|
case SC_PLUS :
|
|
switch( next() )
|
|
{
|
|
case '+' :
|
|
yylval.ival = '+' ;
|
|
string_buff[0] =
|
|
string_buff[1] = '+' ;
|
|
string_buff[2] = 0 ;
|
|
ct_ret(INC_or_DEC) ;
|
|
|
|
case '=' :
|
|
ct_ret(ADD_ASG) ;
|
|
|
|
default : un_next() ; ct_ret(PLUS) ;
|
|
}
|
|
|
|
case SC_MINUS :
|
|
switch( next() )
|
|
{
|
|
case '-' :
|
|
yylval.ival = '-' ;
|
|
string_buff[0] =
|
|
string_buff[1] = '-' ;
|
|
string_buff[2] = 0 ;
|
|
ct_ret(INC_or_DEC) ;
|
|
|
|
case '=' :
|
|
ct_ret(SUB_ASG) ;
|
|
|
|
default : un_next() ; ct_ret(MINUS) ;
|
|
}
|
|
|
|
case SC_COMMA : eat_nl() ; ct_ret(COMMA) ;
|
|
|
|
case SC_MUL : test1_ret('=', MUL_ASG, MUL) ;
|
|
case SC_DIV :
|
|
{ static int can_precede_div[] =
|
|
{ DOUBLE, STRING_, RPAREN, ID, D_ID, RE, RBOX, FIELD,
|
|
GETLINE, INC_or_DEC, -1 } ;
|
|
|
|
int *p = can_precede_div ;
|
|
|
|
do
|
|
if ( *p == current_token )
|
|
{
|
|
if ( *p != INC_or_DEC )
|
|
test1_ret('=', DIV_ASG, DIV) ;
|
|
|
|
if ( next() == '=' )
|
|
{ un_next() ; ct_ret( collect_RE() ) ; }
|
|
}
|
|
|
|
while ( * ++p != -1 ) ;
|
|
|
|
ct_ret( collect_RE() ) ;
|
|
}
|
|
|
|
case SC_MOD : test1_ret('=', MOD_ASG, MOD) ;
|
|
case SC_POW : test1_ret('=' , POW_ASG, POW) ;
|
|
case SC_LPAREN :
|
|
paren_cnt++ ;
|
|
ct_ret(LPAREN) ;
|
|
|
|
case SC_RPAREN :
|
|
if ( --paren_cnt < 0 )
|
|
{ compile_error( "extra ')'" ) ;
|
|
paren_cnt = 0 ;
|
|
goto reswitch ; }
|
|
|
|
ct_ret(RPAREN) ;
|
|
|
|
case SC_LBOX : ct_ret(LBOX) ;
|
|
case SC_RBOX : ct_ret(RBOX) ;
|
|
|
|
case SC_MATCH :
|
|
string_buff[0] = '~' ; string_buff[0] = 0 ;
|
|
yylval.ival = 1 ;
|
|
ct_ret(MATCH) ;
|
|
|
|
case SC_EQUAL :
|
|
test1_ret( '=', EQ, ASSIGN ) ;
|
|
|
|
case SC_NOT : /* ! */
|
|
if ( (c = next()) == '~' )
|
|
{
|
|
string_buff[0] = '!' ;
|
|
string_buff[1] = '~' ;
|
|
string_buff[2] = 0 ;
|
|
yylval.ival = 0 ;
|
|
ct_ret(MATCH) ;
|
|
}
|
|
else
|
|
if ( c == '=' ) ct_ret(NEQ) ;
|
|
|
|
un_next() ;
|
|
ct_ret(NOT) ;
|
|
|
|
|
|
case SC_LT : /* '<' */
|
|
if ( next() == '=' ) ct_ret(LTE) ;
|
|
else un_next() ;
|
|
|
|
if ( getline_flag )
|
|
{ getline_flag = 0 ; ct_ret(IO_IN) ; }
|
|
else ct_ret(LT) ;
|
|
|
|
case SC_GT : /* '>' */
|
|
if ( print_flag && paren_cnt == 0 )
|
|
{ print_flag = 0 ;
|
|
/* there are 3 types of IO_OUT
|
|
-- build the error string in string_buff */
|
|
string_buff[0] = '>' ;
|
|
if ( next() == '>' )
|
|
{
|
|
yylval.ival = F_APPEND ;
|
|
string_buff[1] = '>' ;
|
|
string_buff[2] = 0 ;
|
|
}
|
|
else
|
|
{ un_next() ;
|
|
yylval.ival = F_TRUNC ;
|
|
string_buff[1] = 0 ;
|
|
}
|
|
return current_token = IO_OUT ;
|
|
}
|
|
|
|
test1_ret('=', GTE, GT) ;
|
|
|
|
case SC_OR :
|
|
if ( next() == '|' )
|
|
{ eat_nl() ; ct_ret(OR) ; }
|
|
else
|
|
{ un_next() ;
|
|
|
|
if ( print_flag && paren_cnt == 0 )
|
|
{ print_flag = 0 ;
|
|
yylval.ival = PIPE_OUT;
|
|
string_buff[0] = '|' ;
|
|
string_buff[1] = 0 ;
|
|
ct_ret(IO_OUT) ;
|
|
}
|
|
else ct_ret(PIPE) ;
|
|
}
|
|
|
|
case SC_AND :
|
|
if ( next() == '&' )
|
|
{ eat_nl() ; ct_ret(AND) ; }
|
|
else
|
|
{ un_next() ; yylval.ival = '&' ; ct_ret(UNEXPECTED) ; }
|
|
|
|
case SC_QMARK : ct_ret(QMARK) ;
|
|
case SC_COLON : ct_ret(COLON) ;
|
|
case SC_RBRACE :
|
|
if ( --brace_cnt < 0 )
|
|
{ compile_error("extra '}'" ) ;
|
|
eat_semi_colon() ;
|
|
brace_cnt = 0 ; goto reswitch ; }
|
|
|
|
if ( (c = current_token) == NL || c == SEMI_COLON
|
|
|| c == SC_FAKE_SEMI_COLON || c == RBRACE )
|
|
{
|
|
/* if the brace_cnt is zero , we've completed
|
|
a pattern action block. If the user insists
|
|
on adding a semi-colon on the same line
|
|
we will eat it. Note what we do below:
|
|
physical law -- conservation of semi-colons */
|
|
|
|
if ( brace_cnt == 0 ) eat_semi_colon() ;
|
|
eat_nl() ;
|
|
ct_ret(RBRACE) ;
|
|
}
|
|
|
|
/* supply missing semi-colon to statement that
|
|
precedes a '}' */
|
|
brace_cnt++ ; un_next() ;
|
|
current_token = SC_FAKE_SEMI_COLON ;
|
|
return SEMI_COLON ;
|
|
|
|
case SC_DIGIT :
|
|
case SC_DOT :
|
|
{ double d ;
|
|
int flag ;
|
|
static double double_zero = 0.0 ;
|
|
static double double_one = 1.0 ;
|
|
|
|
if ( (d = collect_decimal(c, &flag)) == 0.0 )
|
|
if ( flag ) ct_ret(flag) ;
|
|
else yylval.ptr = (PTR) &double_zero ;
|
|
else if ( d == 1.0 ) yylval.ptr = (PTR) &double_one ;
|
|
else
|
|
{ yylval.ptr = (PTR) ZMALLOC(double) ;
|
|
*(double*)yylval.ptr = d ;
|
|
}
|
|
ct_ret( DOUBLE ) ;
|
|
}
|
|
|
|
case SC_DOLLAR : /* '$' */
|
|
{ double d ;
|
|
int flag ;
|
|
|
|
while ( scan_code[c = next()] == SC_SPACE ) ;
|
|
if ( scan_code[c] != SC_DIGIT &&
|
|
scan_code[c] != SC_DOT )
|
|
{ un_next() ; ct_ret(DOLLAR) ; }
|
|
/* compute field address at compile time */
|
|
if ( (d = collect_decimal(c, &flag)) == 0.0 )
|
|
if ( flag ) ct_ret(flag) ; /* an error */
|
|
else yylval.cp = &field[0] ;
|
|
else
|
|
{
|
|
if ( d > MAX_FIELD )
|
|
{ compile_error(
|
|
"$%g exceeds maximum field(%d)" , d, MAX_FIELD) ;
|
|
d = MAX_FIELD ;
|
|
}
|
|
yylval.cp = field_ptr((int)d) ;
|
|
}
|
|
|
|
ct_ret(FIELD) ;
|
|
}
|
|
|
|
case SC_DQUOTE :
|
|
return current_token = collect_string() ;
|
|
|
|
case SC_IDCHAR : /* collect an identifier */
|
|
{ unsigned char *p =
|
|
(unsigned char *)string_buff + 1 ;
|
|
SYMTAB *stp ;
|
|
|
|
string_buff[0] = c ;
|
|
|
|
while (
|
|
(c = scan_code[ *p++ = next()]) == SC_IDCHAR ||
|
|
c == SC_DIGIT ) ;
|
|
|
|
un_next() ; * --p = 0 ;
|
|
|
|
switch( (stp = find(string_buff))->type )
|
|
{ case ST_NONE :
|
|
/* check for function call before defined */
|
|
if ( next() == '(' )
|
|
{ stp->type = ST_FUNCT ;
|
|
stp->stval.fbp = (FBLOCK *)
|
|
zmalloc(sizeof(FBLOCK)) ;
|
|
stp->stval.fbp->name = stp->name ;
|
|
stp->stval.fbp->code = (INST *) 0 ;
|
|
yylval.fbp = stp->stval.fbp ;
|
|
current_token = FUNCT_ID ;
|
|
}
|
|
else
|
|
{ yylval.stp = stp ;
|
|
current_token =
|
|
current_token == DOLLAR ? D_ID : ID ;
|
|
}
|
|
un_next() ;
|
|
break ;
|
|
|
|
case ST_NR :
|
|
NR_flag = 1 ;
|
|
stp->type = ST_VAR ;
|
|
/* fall thru */
|
|
|
|
case ST_VAR :
|
|
case ST_ARRAY :
|
|
case ST_LOCAL_NONE :
|
|
case ST_LOCAL_VAR :
|
|
case ST_LOCAL_ARRAY :
|
|
|
|
yylval.stp = stp ;
|
|
current_token =
|
|
current_token == DOLLAR ? D_ID : ID ;
|
|
break ;
|
|
|
|
case ST_ENV :
|
|
stp->type = ST_ARRAY ;
|
|
stp->stval.array = new_ARRAY() ;
|
|
load_environ(stp->stval.array) ;
|
|
yylval.stp = stp ;
|
|
current_token =
|
|
current_token == DOLLAR ? D_ID : ID ;
|
|
break ;
|
|
|
|
case ST_FUNCT :
|
|
yylval.fbp = stp->stval.fbp ;
|
|
current_token = FUNCT_ID ;
|
|
break ;
|
|
|
|
case ST_KEYWORD :
|
|
current_token = stp->stval.kw ;
|
|
break ;
|
|
|
|
case ST_BUILTIN :
|
|
yylval.bip = stp->stval.bip ;
|
|
current_token = BUILTIN ;
|
|
break ;
|
|
|
|
case ST_LENGTH :
|
|
|
|
yylval.bip = stp->stval.bip ;
|
|
|
|
/* check for length alone, this is an ugly
|
|
hack */
|
|
while ( scan_code[ c = next() ] == SC_SPACE ) ;
|
|
un_next() ;
|
|
|
|
current_token = c == '(' ? BUILTIN : LENGTH ;
|
|
break ;
|
|
|
|
case ST_FIELD :
|
|
yylval.cp = stp->stval.cp ;
|
|
current_token = FIELD ;
|
|
break ;
|
|
|
|
default :
|
|
bozo("find returned bad st type") ;
|
|
}
|
|
return current_token ;
|
|
}
|
|
|
|
|
|
case SC_UNEXPECTED :
|
|
yylval.ival = c & 0xff ;
|
|
ct_ret(UNEXPECTED) ;
|
|
}
|
|
return 0 ; /* never get here make lint happy */
|
|
}
|
|
|
|
/* collect a decimal constant in temp_buff.
|
|
Return the value and error conditions by reference */
|
|
|
|
static double collect_decimal(c, flag)
|
|
int c ; int *flag ;
|
|
{ register unsigned char *p = (unsigned char*) string_buff + 1;
|
|
unsigned char *endp ;
|
|
double d ;
|
|
|
|
*flag = 0 ;
|
|
string_buff[0] = c ;
|
|
|
|
if ( c == '.' )
|
|
{ if ( scan_code[*p++ = next()] != SC_DIGIT )
|
|
{ *flag = UNEXPECTED ; yylval.ival = '.' ;
|
|
return 0.0 ; }
|
|
}
|
|
else
|
|
{ while ( scan_code[*p++ = next()] == SC_DIGIT ) ;
|
|
if ( p[-1] != '.' )
|
|
{ un_next() ; p-- ; }
|
|
}
|
|
/* get rest of digits after decimal point */
|
|
while ( scan_code[*p++ = next()] == SC_DIGIT ) ;
|
|
|
|
/* check for exponent */
|
|
if ( p[-1] != 'e' && p[-1] != 'E' )
|
|
{ un_next() ; * --p = 0 ; }
|
|
else /* get the exponent */
|
|
if ( scan_code[*p = next()] != SC_DIGIT &&
|
|
*p != '-' && *p != '+' )
|
|
{ *++p = 0 ; *flag = BAD_DECIMAL ;
|
|
return 0.0 ; }
|
|
else /* get the rest of the exponent */
|
|
{ p++ ;
|
|
while ( scan_code[*p++ = next()] == SC_DIGIT ) ;
|
|
un_next() ; * --p = 0 ;
|
|
}
|
|
|
|
errno = 0 ; /* check for overflow/underflow */
|
|
d = strtod( string_buff, (char **)&endp ) ;
|
|
|
|
#ifndef STRTOD_UNDERFLOW_ON_ZERO_BUG
|
|
if ( errno )
|
|
compile_error( "%s : decimal %sflow" , string_buff,
|
|
d == 0.0 ? "under" : "over") ;
|
|
#else /* sun4 bug */
|
|
if ( errno && d != 0.0 )
|
|
compile_error( "%s : decimal overflow", string_buff) ;
|
|
#endif
|
|
|
|
if ( endp < p )
|
|
{ *flag = BAD_DECIMAL ; return 0.0 ; }
|
|
return d ;
|
|
}
|
|
|
|
/*---------- process escape characters ---------------*/
|
|
|
|
static char hex_val['f' - 'A' + 1] = {
|
|
10,11,12,13,14,15, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0,
|
|
10,11,12,13,14,15 } ;
|
|
|
|
#define isoctal(x) ((x)>='0'&&(x)<='7')
|
|
|
|
#define hex_value(x) hex_val[(x)-'A']
|
|
|
|
#define ishex(x) (scan_code[x] == SC_DIGIT ||\
|
|
'A' <= (x) && (x) <= 'f' && hex_value(x))
|
|
|
|
static int PROTO(octal, (char **)) ;
|
|
static int PROTO(hex, (char **)) ;
|
|
|
|
/* process one , two or three octal digits
|
|
moving a pointer forward by reference */
|
|
static int octal( start_p )
|
|
char **start_p ;
|
|
{ register char *p = *start_p ;
|
|
register unsigned x ;
|
|
|
|
x = *p++ - '0' ;
|
|
if ( isoctal(*p) )
|
|
{
|
|
x = (x<<3) + *p++ - '0' ;
|
|
if ( isoctal(*p) ) x = (x<<3) + *p++ - '0' ;
|
|
}
|
|
*start_p = p ;
|
|
return x & 0xff ;
|
|
}
|
|
|
|
/* process one or two hex digits
|
|
moving a pointer forward by reference */
|
|
|
|
static int hex( start_p )
|
|
char **start_p ;
|
|
{ register unsigned char *p = (unsigned char*) *start_p ;
|
|
register unsigned x ;
|
|
unsigned t ;
|
|
|
|
if ( scan_code[*p] == SC_DIGIT )
|
|
x = *p++ - '0' ;
|
|
else x = hex_value(*p++) ;
|
|
|
|
if ( scan_code[*p] == SC_DIGIT )
|
|
x = (x<<4) + *p++ - '0' ;
|
|
else
|
|
if ( 'A' <= *p && *p <= 'f' && (t = hex_value(*p)) )
|
|
{ x = (x<<4) + t ; p++ ; }
|
|
|
|
*start_p = (char *) p ;
|
|
return x ;
|
|
}
|
|
|
|
#define ET_END 9
|
|
|
|
static struct { char in , out ; } escape_test[ET_END+1] = {
|
|
'n' , '\n',
|
|
't' , '\t',
|
|
'f' , '\f',
|
|
'b' , '\b',
|
|
'r' , '\r',
|
|
'a' , '\07',
|
|
'v' , '\013',
|
|
'\\', '\\',
|
|
'\"', '\"',
|
|
0 , 0 } ;
|
|
|
|
|
|
/* process the escape characters in a string, in place . */
|
|
|
|
char *rm_escape(s)
|
|
char *s ;
|
|
{ register char *p, *q ;
|
|
char *t ;
|
|
int i ;
|
|
|
|
q = p = s ;
|
|
|
|
while ( *p )
|
|
if ( *p == '\\' )
|
|
{
|
|
escape_test[ET_END].in = * ++p ; /* sentinal */
|
|
i = 0 ;
|
|
while ( escape_test[i].in != *p ) i++ ;
|
|
|
|
if ( i != ET_END ) /* in table */
|
|
{
|
|
p++ ; *q++ = escape_test[i].out ;
|
|
}
|
|
else
|
|
if ( isoctal(*p) )
|
|
{
|
|
t = p ; *q++ = octal(&t) ; p = t ;
|
|
}
|
|
else
|
|
if ( *p == 'x' && ishex(*(unsigned char*)(p+1)) )
|
|
{
|
|
t = p+1 ; *q++ = hex(&t) ; p = t ;
|
|
}
|
|
else
|
|
if ( *p == 0 ) /* can only happen with command line assign */
|
|
*q++ = '\\' ;
|
|
else /* not an escape sequence */
|
|
{
|
|
*q++ = '\\' ; *q++ = *p++ ;
|
|
}
|
|
}
|
|
else *q++ = *p++ ;
|
|
|
|
*q = 0 ;
|
|
return s ;
|
|
}
|
|
|
|
static int collect_string()
|
|
{ register unsigned char *p = (unsigned char *)string_buff ;
|
|
int c ;
|
|
int e_flag = 0 ; /* on if have an escape char */
|
|
|
|
while ( 1 )
|
|
switch( scan_code[ *p++ = next() ] )
|
|
{ case SC_DQUOTE : /* done */
|
|
* --p = 0 ; goto out ;
|
|
|
|
case SC_NL :
|
|
p[-1] = 0 ;
|
|
/* fall thru */
|
|
|
|
case 0 : /* unterminated string */
|
|
compile_error(
|
|
"runaway string constant \"%.10s ..." ,
|
|
string_buff, token_lineno ) ;
|
|
mawk_exit(1) ;
|
|
|
|
case SC_ESCAPE :
|
|
if ( (c = next()) == '\n' )
|
|
{ p-- ; lineno++ ; }
|
|
else
|
|
if ( c == 0 ) un_next() ;
|
|
else
|
|
{ *p++ = c ; e_flag = 1 ; }
|
|
|
|
break ;
|
|
|
|
default : break ;
|
|
}
|
|
|
|
out:
|
|
yylval.ptr = (PTR) new_STRING(
|
|
e_flag ? rm_escape( string_buff )
|
|
: string_buff ) ;
|
|
return STRING_ ;
|
|
}
|
|
|
|
|
|
static int collect_RE()
|
|
{ register unsigned char *p = (unsigned char*) string_buff ;
|
|
int c ;
|
|
STRING *sval ;
|
|
|
|
while ( 1 )
|
|
switch( scan_code[ *p++ = next() ] )
|
|
{ case SC_DIV : /* done */
|
|
* --p = 0 ; goto out ;
|
|
|
|
case SC_NL :
|
|
p[-1] = 0 ;
|
|
/* fall thru */
|
|
|
|
case 0 : /* unterminated re */
|
|
compile_error(
|
|
"runaway regular expression /%.10s ..." ,
|
|
string_buff, token_lineno ) ;
|
|
mawk_exit(1) ;
|
|
|
|
case SC_ESCAPE :
|
|
switch( c = next() )
|
|
{ case '/' :
|
|
p[-1] = '/' ; break ;
|
|
|
|
case '\n' :
|
|
p-- ; break ;
|
|
|
|
case 0 :
|
|
un_next() ; break ;
|
|
|
|
default :
|
|
*p++ = c ; break ;
|
|
}
|
|
break ;
|
|
}
|
|
|
|
out:
|
|
/* now we've got the RE, so compile it */
|
|
sval = new_STRING( string_buff ) ;
|
|
yylval.ptr = re_compile(sval) ;
|
|
free_STRING(sval) ;
|
|
return RE ;
|
|
}
|
|
|