312 lines
6.9 KiB
C
312 lines
6.9 KiB
C
/* $NetBSD: rxp.c,v 1.13 2009/08/27 00:31:12 dholland Exp $ */
|
|
|
|
/*-
|
|
* Copyright (c) 1991, 1993
|
|
* The Regents of the University of California. All rights reserved.
|
|
*
|
|
* This code is derived from software contributed to Berkeley by
|
|
* Jim R. Oldroyd at The Instruction Set and Keith Gabryelski at
|
|
* Commodore Business Machines.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
* 3. Neither the name of the University nor the names of its contributors
|
|
* may be used to endorse or promote products derived from this software
|
|
* without specific prior written permission.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
* SUCH DAMAGE.
|
|
*/
|
|
|
|
#include <sys/cdefs.h>
|
|
#ifndef lint
|
|
#if 0
|
|
static char sccsid[] = "@(#)rxp.c 8.1 (Berkeley) 5/31/93";
|
|
#else
|
|
__RCSID("$NetBSD: rxp.c,v 1.13 2009/08/27 00:31:12 dholland Exp $");
|
|
#endif
|
|
#endif /* not lint */
|
|
|
|
/*
|
|
* regular expression parser
|
|
*
|
|
* external functions and return values are:
|
|
* rxp_compile(s)
|
|
* TRUE success
|
|
* FALSE parse failure; error message will be in char rxperr[]
|
|
* metas are:
|
|
* {...} optional pattern, equialent to [...|]
|
|
* | alternate pattern
|
|
* [...] pattern delimiters
|
|
*
|
|
* rxp_match(s)
|
|
* TRUE string s matches compiled pattern
|
|
* FALSE match failure or regexp error
|
|
*
|
|
* rxp_expand()
|
|
* char * reverse-engineered regular expression string
|
|
* NULL regexp error
|
|
*/
|
|
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <ctype.h>
|
|
#include "quiz.h"
|
|
/* regexp tokens, arg */
|
|
#define LIT (-1) /* literal character, char */
|
|
#define SOT (-2) /* start text anchor, - */
|
|
#define EOT (-3) /* end text anchor, - */
|
|
#define GRP_S (-4) /* start alternate grp, ptr_to_end */
|
|
#define GRP_E (-5) /* end group, - */
|
|
#define ALT_S (-6) /* alternate starts, ptr_to_next */
|
|
#define ALT_E (-7) /* alternate ends, - */
|
|
#define END (-8) /* end of regexp, - */
|
|
|
|
typedef short Rxp_t; /* type for regexp tokens */
|
|
|
|
static Rxp_t rxpbuf[RXP_LINE_SZ]; /* compiled regular expression buffer */
|
|
char rxperr[128]; /* parser error message */
|
|
|
|
static int rxp__compile(const char *, int);
|
|
static char *rxp__expand(int);
|
|
static int rxp__match(const char *, int, Rxp_t *, Rxp_t *, const char *);
|
|
|
|
int
|
|
rxp_compile(const char *s)
|
|
{
|
|
return (rxp__compile(s, TRUE));
|
|
}
|
|
|
|
static int
|
|
rxp__compile(const char *s, int first)
|
|
{
|
|
static Rxp_t *rp;
|
|
static const char *sp;
|
|
Rxp_t *grp_ptr;
|
|
Rxp_t *alt_ptr;
|
|
int esc, err;
|
|
|
|
esc = 0;
|
|
if (first) {
|
|
rp = rxpbuf;
|
|
sp = s;
|
|
*rp++ = SOT; /* auto-anchor: pat is really ^pat$ */
|
|
*rp++ = GRP_S; /* auto-group: ^pat$ is really ^[pat]$ */
|
|
*rp++ = 0;
|
|
}
|
|
*rp++ = ALT_S;
|
|
alt_ptr = rp;
|
|
*rp++ = 0;
|
|
for (; *sp; ++sp) {
|
|
if (rp - rxpbuf >= RXP_LINE_SZ - 4) {
|
|
(void)snprintf(rxperr, sizeof(rxperr),
|
|
"regular expression too long %s", s);
|
|
return (FALSE);
|
|
}
|
|
if (*sp == ':' && !esc)
|
|
break;
|
|
if (esc) {
|
|
*rp++ = LIT;
|
|
*rp++ = *sp;
|
|
esc = 0;
|
|
}
|
|
else switch (*sp) {
|
|
case '\\':
|
|
esc = 1;
|
|
break;
|
|
case '{':
|
|
case '[':
|
|
*rp++ = GRP_S;
|
|
grp_ptr = rp;
|
|
*rp++ = 0;
|
|
sp++;
|
|
if ((err = rxp__compile(s, FALSE)) != TRUE)
|
|
return (err);
|
|
*rp++ = GRP_E;
|
|
*grp_ptr = rp - rxpbuf;
|
|
break;
|
|
case '}':
|
|
case ']':
|
|
case '|':
|
|
*rp++ = ALT_E;
|
|
*alt_ptr = rp - rxpbuf;
|
|
if (*sp != ']') {
|
|
*rp++ = ALT_S;
|
|
alt_ptr = rp;
|
|
*rp++ = 0;
|
|
}
|
|
if (*sp != '|') {
|
|
if (*sp != ']') {
|
|
*rp++ = ALT_E;
|
|
*alt_ptr = rp - rxpbuf;
|
|
}
|
|
if (first) {
|
|
(void)snprintf(rxperr, sizeof(rxperr),
|
|
"unmatched alternator in regexp %s",
|
|
s);
|
|
return (FALSE);
|
|
}
|
|
return (TRUE);
|
|
}
|
|
break;
|
|
default:
|
|
*rp++ = LIT;
|
|
*rp++ = *sp;
|
|
esc = 0;
|
|
break;
|
|
}
|
|
}
|
|
if (!first) {
|
|
(void)snprintf(rxperr, sizeof(rxperr),
|
|
"unmatched alternator in regexp %s", s);
|
|
return (FALSE);
|
|
}
|
|
*rp++ = ALT_E;
|
|
*alt_ptr = rp - rxpbuf;
|
|
*rp++ = GRP_E;
|
|
*(rxpbuf + 2) = rp - rxpbuf;
|
|
*rp++ = EOT;
|
|
*rp = END;
|
|
return (TRUE);
|
|
}
|
|
|
|
/*
|
|
* match string against compiled regular expression
|
|
*/
|
|
int
|
|
rxp_match(const char *s)
|
|
{
|
|
return (rxp__match(s, TRUE, NULL, NULL, NULL));
|
|
}
|
|
|
|
static int
|
|
rxp__match(const char *s,
|
|
int first,
|
|
Rxp_t *j_succ, /* jump here on successful alt match */
|
|
Rxp_t *j_fail, /* jump here on failed match */
|
|
const char *sp_fail) /* reset sp to here on failed match */
|
|
{
|
|
static Rxp_t *rp;
|
|
static const char *sp;
|
|
int ch;
|
|
Rxp_t *grp_end = NULL;
|
|
|
|
if (first) {
|
|
rp = rxpbuf;
|
|
sp = s;
|
|
}
|
|
while (rp < rxpbuf + RXP_LINE_SZ && *rp != END)
|
|
switch(*rp) {
|
|
case LIT:
|
|
rp++;
|
|
ch = isascii(*rp) && isupper(*rp) ? tolower(*rp) : *rp;
|
|
if (ch != *sp++) {
|
|
rp = j_fail;
|
|
sp = sp_fail;
|
|
return (FALSE);
|
|
}
|
|
rp++;
|
|
break;
|
|
case SOT:
|
|
if (sp != s)
|
|
return (FALSE);
|
|
rp++;
|
|
break;
|
|
case EOT:
|
|
if (*sp != 0)
|
|
return (FALSE);
|
|
rp++;
|
|
break;
|
|
case GRP_S:
|
|
rp++;
|
|
grp_end = rxpbuf + *rp++;
|
|
break;
|
|
case ALT_S:
|
|
rp++;
|
|
rxp__match(sp, FALSE, grp_end, rxpbuf + *rp++, sp);
|
|
break;
|
|
case ALT_E:
|
|
rp = j_succ;
|
|
return (TRUE);
|
|
case GRP_E:
|
|
rp = j_fail;
|
|
sp = sp_fail;
|
|
return (FALSE);
|
|
default:
|
|
abort();
|
|
}
|
|
return (*rp != END ? FALSE : TRUE);
|
|
}
|
|
|
|
/*
|
|
* Reverse engineer the regular expression, by picking first of all alternates.
|
|
*/
|
|
char *
|
|
rxp_expand(void)
|
|
{
|
|
return (rxp__expand(TRUE));
|
|
}
|
|
|
|
static char *
|
|
rxp__expand(int first)
|
|
{
|
|
static char buf[RXP_LINE_SZ/2];
|
|
static Rxp_t *rp;
|
|
static char *bp;
|
|
Rxp_t *grp_ptr;
|
|
char *err;
|
|
|
|
if (first) {
|
|
rp = rxpbuf;
|
|
bp = buf;
|
|
}
|
|
while (rp < rxpbuf + RXP_LINE_SZ && *rp != END)
|
|
switch(*rp) {
|
|
case LIT:
|
|
rp++;
|
|
*bp++ = *rp++;
|
|
break;
|
|
case GRP_S:
|
|
rp++;
|
|
grp_ptr = rxpbuf + *rp;
|
|
rp++;
|
|
if ((err = rxp__expand(FALSE)) == NULL)
|
|
return (err);
|
|
rp = grp_ptr;
|
|
break;
|
|
case ALT_E:
|
|
return (buf);
|
|
case ALT_S:
|
|
rp++;
|
|
/* FALLTHROUGH */
|
|
case SOT:
|
|
case EOT:
|
|
case GRP_E:
|
|
rp++;
|
|
break;
|
|
default:
|
|
return (NULL);
|
|
}
|
|
if (first) {
|
|
if (*rp != END)
|
|
return (NULL);
|
|
*bp = '\0';
|
|
}
|
|
return (buf);
|
|
}
|