NetBSD/gnu/usr.bin/gawk/re.c

223 lines
4.5 KiB
C
Raw Normal View History

1993-07-08 02:18:43 +04:00
/*
* re.c - compile regular expressions.
*/
/*
1995-04-05 04:11:36 +04:00
* Copyright (C) 1991-1995 the Free Software Foundation, Inc.
1993-07-08 02:18:43 +04:00
*
* This file is part of GAWK, the GNU implementation of the
* AWK Progamming Language.
*
* GAWK is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* GAWK is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GAWK; see the file COPYING. If not, write to
* the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#include "awk.h"
/* Generate compiled regular expressions */
Regexp *
make_regexp(s, len, ignorecase, dfa)
char *s;
1994-02-17 04:21:51 +03:00
size_t len;
1993-07-08 02:18:43 +04:00
int ignorecase;
int dfa;
{
Regexp *rp;
1994-02-17 04:21:51 +03:00
const char *rerr;
1993-07-08 02:18:43 +04:00
char *src = s;
char *temp;
char *end = s + len;
register char *dest;
register int c;
/* Handle escaped characters first. */
/* Build a copy of the string (in dest) with the
escaped characters translated, and generate the regex
from that.
*/
emalloc(dest, char *, len + 2, "make_regexp");
temp = dest;
while (src < end) {
if (*src == '\\') {
c = *++src;
switch (c) {
case 'a':
case 'b':
case 'f':
case 'n':
case 'r':
case 't':
case 'v':
case 'x':
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
c = parse_escape(&src);
if (c < 0)
cant_happen();
*dest++ = (char)c;
break;
default:
*dest++ = '\\';
*dest++ = (char)c;
src++;
break;
} /* switch */
} else {
*dest++ = *src++; /* not '\\' */
}
} /* for */
*dest = '\0' ; /* Only necessary if we print dest ? */
emalloc(rp, Regexp *, sizeof(*rp), "make_regexp");
memset((char *) rp, 0, sizeof(*rp));
1994-02-17 04:21:51 +03:00
emalloc(rp->pat.buffer, unsigned char *, 16, "make_regexp");
1993-07-08 02:18:43 +04:00
rp->pat.allocated = 16;
emalloc(rp->pat.fastmap, char *, 256, "make_regexp");
if (ignorecase)
rp->pat.translate = casetable;
else
rp->pat.translate = NULL;
len = dest - temp;
1994-02-17 04:21:51 +03:00
if ((rerr = re_compile_pattern(temp, len, &(rp->pat))) != NULL)
fatal("%s: /%s/", rerr, temp);
1995-04-05 04:11:36 +04:00
/* gack. this must be done *after* re_compile_pattern */
rp->pat.newline_anchor = 0; /* don't get \n in middle of string */
1993-07-08 02:18:43 +04:00
if (dfa && !ignorecase) {
1994-02-17 04:21:51 +03:00
dfacomp(temp, len, &(rp->dfareg), 1);
1993-07-08 02:18:43 +04:00
rp->dfa = 1;
} else
rp->dfa = 0;
1994-02-17 04:21:51 +03:00
1993-07-08 02:18:43 +04:00
free(temp);
return rp;
}
int
research(rp, str, start, len, need_start)
Regexp *rp;
register char *str;
int start;
1994-02-17 04:21:51 +03:00
register size_t len;
1993-07-08 02:18:43 +04:00
int need_start;
{
char *ret = str;
if (rp->dfa) {
1994-02-17 04:21:51 +03:00
char save;
1993-07-08 02:18:43 +04:00
int count = 0;
int try_backref;
1994-02-17 04:21:51 +03:00
/*
* dfa likes to stick a '\n' right after the matched
* text. So we just save and restore the character.
*/
save = str[start+len];
ret = dfaexec(&(rp->dfareg), str+start, str+start+len, 1,
1993-07-08 02:18:43 +04:00
&count, &try_backref);
1994-02-17 04:21:51 +03:00
str[start+len] = save;
1993-07-08 02:18:43 +04:00
}
if (ret) {
1995-04-05 04:11:36 +04:00
if (need_start || rp->dfa == 0) {
int result = re_search(&(rp->pat), str, start+len,
start, len, &(rp->regs));
/* recover any space from C based alloca */
(void) alloca(0);
return result;
} else
1993-07-08 02:18:43 +04:00
return 1;
} else
return -1;
}
void
refree(rp)
Regexp *rp;
{
free(rp->pat.buffer);
free(rp->pat.fastmap);
1995-04-05 04:11:36 +04:00
if (rp->regs.start)
free(rp->regs.start);
if (rp->regs.end)
free(rp->regs.end);
1993-07-08 02:18:43 +04:00
if (rp->dfa)
1994-02-17 04:21:51 +03:00
dfafree(&(rp->dfareg));
1993-07-08 02:18:43 +04:00
free(rp);
}
void
1994-02-17 04:21:51 +03:00
dfaerror(s)
1993-07-08 02:18:43 +04:00
const char *s;
{
fatal(s);
}
Regexp *
re_update(t)
NODE *t;
{
NODE *t1;
# define CASE 1
if ((t->re_flags & CASE) == IGNORECASE) {
if (t->re_flags & CONST)
return t->re_reg;
t1 = force_string(tree_eval(t->re_exp));
if (t->re_text) {
if (cmp_nodes(t->re_text, t1) == 0) {
free_temp(t1);
return t->re_reg;
}
unref(t->re_text);
}
t->re_text = dupnode(t1);
free_temp(t1);
}
if (t->re_reg)
refree(t->re_reg);
if (t->re_cnt)
t->re_cnt++;
if (t->re_cnt > 10)
t->re_cnt = 0;
if (!t->re_text) {
t1 = force_string(tree_eval(t->re_exp));
t->re_text = dupnode(t1);
free_temp(t1);
}
1994-02-17 04:21:51 +03:00
t->re_reg = make_regexp(t->re_text->stptr, t->re_text->stlen,
IGNORECASE, t->re_cnt);
1993-07-08 02:18:43 +04:00
t->re_flags &= ~CASE;
t->re_flags |= IGNORECASE;
return t->re_reg;
}
void
resetup()
{
1994-02-17 04:21:51 +03:00
reg_syntax_t syn = RE_SYNTAX_AWK;
(void) re_set_syntax(syn);
dfasyntax(syn, 0);
1993-07-08 02:18:43 +04:00
}