410 lines
10 KiB
C
410 lines
10 KiB
C
|
/*++
|
||
|
/* NAME
|
||
|
/* dict_regexp 3
|
||
|
/* SUMMARY
|
||
|
/* dictionary manager interface to REGEXP regular expression library
|
||
|
/* SYNOPSIS
|
||
|
/* #include <dict_regexp.h>
|
||
|
/*
|
||
|
/* DICT *dict_regexp_open(name, dummy, dict_flags)
|
||
|
/* const char *name;
|
||
|
/* int dummy;
|
||
|
/* int dict_flags;
|
||
|
/* DESCRIPTION
|
||
|
/* dict_regexp_open() opens the named file and compiles the contained
|
||
|
/* regular expressions.
|
||
|
/*
|
||
|
/* The lookup interface will match only user@domain form addresses.
|
||
|
/* SEE ALSO
|
||
|
/* dict(3) generic dictionary manager
|
||
|
/* AUTHOR(S)
|
||
|
/* LaMont Jones
|
||
|
/* lamont@hp.com
|
||
|
/*
|
||
|
/* Based on PCRE dictionary contributed by Andrew McNamara
|
||
|
/* andrewm@connect.com.au
|
||
|
/* connect.com.au Pty. Ltd.
|
||
|
/* Level 3, 213 Miller St
|
||
|
/* North Sydney, NSW, Australia
|
||
|
/*
|
||
|
/* Wietse Venema
|
||
|
/* IBM T.J. Watson Research
|
||
|
/* P.O. Box 704
|
||
|
/* Yorktown Heights, NY 10598, USA
|
||
|
/*--*/
|
||
|
|
||
|
/* System library. */
|
||
|
|
||
|
#include "sys_defs.h"
|
||
|
|
||
|
#ifdef HAS_POSIX_REGEXP
|
||
|
|
||
|
#include <stdlib.h>
|
||
|
#include <unistd.h>
|
||
|
#include <string.h>
|
||
|
#include <ctype.h>
|
||
|
#include <regex.h>
|
||
|
|
||
|
/* Utility library. */
|
||
|
|
||
|
#include "mymalloc.h"
|
||
|
#include "msg.h"
|
||
|
#include "safe.h"
|
||
|
#include "vstream.h"
|
||
|
#include "vstring.h"
|
||
|
#include "stringops.h"
|
||
|
#include "readlline.h"
|
||
|
#include "dict.h"
|
||
|
#include "dict_regexp.h"
|
||
|
#include "mac_parse.h"
|
||
|
|
||
|
typedef struct dict_regexp_list {
|
||
|
struct dict_regexp_list *next; /* Next regexp in dict */
|
||
|
regex_t *expr[2]; /* The compiled pattern(s) */
|
||
|
char *replace; /* Replacement string */
|
||
|
int lineno; /* Source file line number */
|
||
|
} DICT_REGEXP_RULE;
|
||
|
|
||
|
typedef struct {
|
||
|
DICT dict; /* generic members */
|
||
|
char *map; /* map name */
|
||
|
int flags; /* unused at the moment */
|
||
|
regmatch_t *pmatch; /* Cut substrings */
|
||
|
int nmatch; /* number of elements in pmatch */
|
||
|
DICT_REGEXP_RULE *head; /* first rule */
|
||
|
} DICT_REGEXP;
|
||
|
|
||
|
/*
|
||
|
* Context for macro expansion callback.
|
||
|
*/
|
||
|
struct dict_regexp_context {
|
||
|
DICT_REGEXP *dict; /* the dictionary entry */
|
||
|
DICT_REGEXP_RULE *rule; /* the rule we matched */
|
||
|
VSTRING *buf; /* target string buffer */
|
||
|
const char *subject; /* str against which we match */
|
||
|
};
|
||
|
|
||
|
/*
|
||
|
* dict_regexp_update - not supported
|
||
|
*/
|
||
|
static void dict_regexp_update(DICT *dict, const char *unused_name,
|
||
|
const char *unused_value)
|
||
|
{
|
||
|
DICT_REGEXP *dict_regexp = (DICT_REGEXP *) dict;
|
||
|
|
||
|
msg_fatal("dict_regexp_update: attempt to update regexp map %s",
|
||
|
dict_regexp->map);
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* Macro expansion callback - replace $0-${99} with strings cut from
|
||
|
* matched string.
|
||
|
*/
|
||
|
static int dict_regexp_action(int type, VSTRING *buf, char *ptr)
|
||
|
{
|
||
|
struct dict_regexp_context *ctxt = (struct dict_regexp_context *) ptr;
|
||
|
DICT_REGEXP_RULE *rule = ctxt->rule;
|
||
|
DICT_REGEXP *dict = ctxt->dict;
|
||
|
int n;
|
||
|
|
||
|
if (type == MAC_PARSE_VARNAME) {
|
||
|
n = atoi(vstring_str(buf));
|
||
|
if (n >= dict->nmatch)
|
||
|
msg_fatal("regexp %s, line %d: replacement index out of range",
|
||
|
dict->map, rule->lineno);
|
||
|
if (dict->pmatch[n].rm_so < 0 ||
|
||
|
dict->pmatch[n].rm_so == dict->pmatch[n].rm_eo) {
|
||
|
return (MAC_PARSE_UNDEF); /* empty string or not
|
||
|
* matched */
|
||
|
}
|
||
|
vstring_strncat(ctxt->buf, ctxt->subject + dict->pmatch[n].rm_so,
|
||
|
dict->pmatch[n].rm_eo - dict->pmatch[n].rm_so);
|
||
|
} else
|
||
|
/* Straight text - duplicate with no substitution */
|
||
|
vstring_strcat(ctxt->buf, vstring_str(buf));
|
||
|
return (0);
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* Look up regexp dict and perform string substitution on matched
|
||
|
* strings.
|
||
|
*/
|
||
|
static const char *dict_regexp_lookup(DICT *dict, const char *name)
|
||
|
{
|
||
|
DICT_REGEXP *dict_regexp = (DICT_REGEXP *) dict;
|
||
|
DICT_REGEXP_RULE *rule;
|
||
|
struct dict_regexp_context ctxt;
|
||
|
static VSTRING *buf;
|
||
|
int error;
|
||
|
|
||
|
dict_errno = 0;
|
||
|
|
||
|
if (msg_verbose)
|
||
|
msg_info("dict_regexp_lookup: %s: %s", dict_regexp->map, name);
|
||
|
|
||
|
/* Search for a matching expression */
|
||
|
for (rule = dict_regexp->head; rule; rule = rule->next) {
|
||
|
error = regexec(rule->expr[0], name, rule->expr[0]->re_nsub + 1,
|
||
|
dict_regexp->pmatch, 0);
|
||
|
if (!error) {
|
||
|
if (rule->expr[1]) {
|
||
|
error = regexec(rule->expr[1], name, rule->expr[1]->re_nsub + 1,
|
||
|
dict_regexp->pmatch + rule->expr[0]->re_nsub + 1, 0);
|
||
|
if (!error) {
|
||
|
continue;
|
||
|
} else if (error != REG_NOMATCH) {
|
||
|
char errbuf[256];
|
||
|
|
||
|
(void) regerror(error, rule->expr[1], errbuf, sizeof(errbuf));
|
||
|
msg_fatal("regexp map %s, line %d: %s.",
|
||
|
dict_regexp->map, rule->lineno, errbuf);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* We found a match. Do some final initialization on the
|
||
|
* subexpression fields, and perform substitution on replacement
|
||
|
* string
|
||
|
*/
|
||
|
if (!buf)
|
||
|
buf = vstring_alloc(10);
|
||
|
VSTRING_RESET(buf);
|
||
|
ctxt.buf = buf;
|
||
|
ctxt.subject = name;
|
||
|
ctxt.rule = rule;
|
||
|
ctxt.dict = dict_regexp;
|
||
|
|
||
|
if (mac_parse(rule->replace, dict_regexp_action, (char *) &ctxt) & MAC_PARSE_ERROR)
|
||
|
msg_fatal("regexp map %s, line %d: bad replacement syntax.",
|
||
|
dict_regexp->map, rule->lineno);
|
||
|
|
||
|
VSTRING_TERMINATE(buf);
|
||
|
return (vstring_str(buf));
|
||
|
} else if (error && error != REG_NOMATCH) {
|
||
|
char errbuf[256];
|
||
|
|
||
|
(void) regerror(error, rule->expr[0], errbuf, sizeof(errbuf));
|
||
|
msg_fatal("regexp map %s, line %d: %s.",
|
||
|
dict_regexp->map, rule->lineno, errbuf);
|
||
|
return ((char *) 0);
|
||
|
}
|
||
|
}
|
||
|
return ((char *) 0);
|
||
|
}
|
||
|
|
||
|
/* dict_regexp_close - close regexp dictionary */
|
||
|
|
||
|
static void dict_regexp_close(DICT *dict)
|
||
|
{
|
||
|
DICT_REGEXP *dict_regexp = (DICT_REGEXP *) dict;
|
||
|
DICT_REGEXP_RULE *rule,
|
||
|
*next;
|
||
|
int i;
|
||
|
|
||
|
for (rule = dict_regexp->head; rule; rule = next) {
|
||
|
next = rule->next;
|
||
|
for (i = 0; i < 2; i++) {
|
||
|
if (rule->expr[i]) {
|
||
|
regfree(rule->expr[i]);
|
||
|
myfree((char *) rule->expr[i]);
|
||
|
}
|
||
|
}
|
||
|
myfree((char *) rule->replace);
|
||
|
myfree((char *) rule);
|
||
|
}
|
||
|
if (dict_regexp->pmatch)
|
||
|
myfree((char *) dict_regexp->pmatch);
|
||
|
myfree(dict_regexp->map);
|
||
|
myfree((char *) dict_regexp);
|
||
|
}
|
||
|
|
||
|
static regex_t *dict_regexp_get_expr(int lineno, char **bufp, VSTREAM *map_fp)
|
||
|
{
|
||
|
char *p = *bufp,
|
||
|
*regexp,
|
||
|
re_delim;
|
||
|
int re_options,
|
||
|
error;
|
||
|
regex_t *expr;
|
||
|
|
||
|
re_delim = *p++;
|
||
|
regexp = p;
|
||
|
|
||
|
/* Search for second delimiter, handling backslash escape */
|
||
|
while (*p) {
|
||
|
if (*p == '\\') {
|
||
|
if (p[1])
|
||
|
p++;
|
||
|
else
|
||
|
break;
|
||
|
} else if (*p == re_delim) {
|
||
|
break;
|
||
|
}
|
||
|
++p;
|
||
|
}
|
||
|
if (!*p) {
|
||
|
msg_warn("%s, line %d: no closing regexp delimiter: %c",
|
||
|
VSTREAM_PATH(map_fp), lineno, re_delim);
|
||
|
return NULL;
|
||
|
}
|
||
|
*p++ = '\0'; /* Null term the regexp */
|
||
|
|
||
|
re_options = REG_EXTENDED | REG_ICASE;
|
||
|
while (*p) {
|
||
|
if (!*p || ISSPACE(*p) || (*p == '!' && p[1] == re_delim)) {
|
||
|
/* end of the regexp */
|
||
|
expr = (regex_t *) mymalloc(sizeof(*expr));
|
||
|
error = regcomp(expr, regexp, re_options);
|
||
|
if (error != 0) {
|
||
|
char errbuf[256];
|
||
|
|
||
|
(void) regerror(error, expr, errbuf, sizeof(errbuf));
|
||
|
msg_warn("%s, line %d: error in regexp: %s.",
|
||
|
VSTREAM_PATH(map_fp), lineno, errbuf);
|
||
|
myfree((char *) expr);
|
||
|
return NULL;
|
||
|
}
|
||
|
*bufp = p;
|
||
|
return expr;
|
||
|
} else {
|
||
|
switch (*p) {
|
||
|
case 'i':
|
||
|
re_options ^= REG_ICASE;
|
||
|
break;
|
||
|
case 'm':
|
||
|
re_options ^= REG_NEWLINE;
|
||
|
break;
|
||
|
case 'x':
|
||
|
re_options ^= REG_EXTENDED;
|
||
|
break;
|
||
|
default:
|
||
|
msg_warn("%s, line %d: unknown regexp option '%c'",
|
||
|
VSTREAM_PATH(map_fp), lineno, *p);
|
||
|
}
|
||
|
++p;
|
||
|
}
|
||
|
}
|
||
|
return NULL;
|
||
|
}
|
||
|
|
||
|
static DICT_REGEXP_RULE *dict_regexp_parseline(int lineno, char *line, int *nsub, VSTREAM *map_fp)
|
||
|
{
|
||
|
DICT_REGEXP_RULE *rule;
|
||
|
char *p,
|
||
|
re_delim;
|
||
|
regex_t *expr1,
|
||
|
*expr2;
|
||
|
int total_nsub;
|
||
|
|
||
|
p = line;
|
||
|
re_delim = *p;
|
||
|
|
||
|
expr1 = dict_regexp_get_expr(lineno, &p, map_fp);
|
||
|
if (!expr1) {
|
||
|
return NULL;
|
||
|
} else if (*p == '!' && p[1] == re_delim) {
|
||
|
p++;
|
||
|
expr2 = dict_regexp_get_expr(lineno, &p, map_fp);
|
||
|
if (!expr2) {
|
||
|
myfree((char *) expr1);
|
||
|
return NULL;
|
||
|
}
|
||
|
total_nsub = expr1->re_nsub + expr2->re_nsub + 2;
|
||
|
} else {
|
||
|
expr2 = NULL;
|
||
|
total_nsub = expr1->re_nsub + 1;
|
||
|
}
|
||
|
if (nsub)
|
||
|
*nsub = total_nsub;
|
||
|
|
||
|
if (!ISSPACE(*p)) {
|
||
|
msg_warn("%s, line %d: Too many expressions.",
|
||
|
VSTREAM_PATH(map_fp), lineno);
|
||
|
myfree((char *) expr1);
|
||
|
if (expr2)
|
||
|
myfree((char *) expr2);
|
||
|
return NULL;
|
||
|
}
|
||
|
rule = (DICT_REGEXP_RULE *) mymalloc(sizeof(DICT_REGEXP_RULE));
|
||
|
|
||
|
while (*p && ISSPACE(*p))
|
||
|
++p;
|
||
|
|
||
|
if (!*p) {
|
||
|
msg_warn("%s, line %d: no replacement text: using empty string",
|
||
|
VSTREAM_PATH(map_fp), lineno);
|
||
|
p = "";
|
||
|
}
|
||
|
rule->expr[0] = expr1;
|
||
|
rule->expr[1] = expr2;
|
||
|
rule->replace = mystrdup(p);
|
||
|
rule->lineno = lineno;
|
||
|
rule->next = NULL;
|
||
|
return rule;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* dict_regexp_open - load and compile a file containing regular expressions
|
||
|
*/
|
||
|
DICT *dict_regexp_open(const char *map, int unused_flags, int dict_flags)
|
||
|
{
|
||
|
DICT_REGEXP *dict_regexp;
|
||
|
VSTREAM *map_fp;
|
||
|
VSTRING *line_buffer;
|
||
|
DICT_REGEXP_RULE *rule,
|
||
|
*last_rule = NULL;
|
||
|
int lineno = 0;
|
||
|
int max_nsub = 0;
|
||
|
int nsub;
|
||
|
char *p;
|
||
|
|
||
|
line_buffer = vstring_alloc(100);
|
||
|
|
||
|
dict_regexp = (DICT_REGEXP *) mymalloc(sizeof(*dict_regexp));
|
||
|
dict_regexp->dict.lookup = dict_regexp_lookup;
|
||
|
dict_regexp->dict.update = dict_regexp_update;
|
||
|
dict_regexp->dict.close = dict_regexp_close;
|
||
|
dict_regexp->dict.fd = -1;
|
||
|
dict_regexp->map = mystrdup(map);
|
||
|
dict_regexp->dict.flags = dict_flags | DICT_FLAG_PATTERN;
|
||
|
dict_regexp->head = 0;
|
||
|
dict_regexp->pmatch = 0;
|
||
|
|
||
|
if ((map_fp = vstream_fopen(map, O_RDONLY, 0)) == 0) {
|
||
|
msg_fatal("open %s: %m", map);
|
||
|
}
|
||
|
while (readlline(line_buffer, map_fp, &lineno, READLL_STRIPNL)) {
|
||
|
p = vstring_str(line_buffer);
|
||
|
|
||
|
if (*p == '#') /* Skip comments */
|
||
|
continue;
|
||
|
|
||
|
if (*p == 0) /* Skip blank lines */
|
||
|
continue;
|
||
|
|
||
|
rule = dict_regexp_parseline(lineno, p, &nsub, map_fp);
|
||
|
if (rule) {
|
||
|
if (nsub > max_nsub)
|
||
|
max_nsub = nsub;
|
||
|
|
||
|
if (last_rule == NULL)
|
||
|
dict_regexp->head = rule;
|
||
|
else
|
||
|
last_rule->next = rule;
|
||
|
last_rule = rule;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (max_nsub > 0)
|
||
|
dict_regexp->pmatch =
|
||
|
(regmatch_t *) mymalloc(sizeof(regmatch_t) * max_nsub);
|
||
|
dict_regexp->nmatch = max_nsub;
|
||
|
|
||
|
vstring_free(line_buffer);
|
||
|
vstream_fclose(map_fp);
|
||
|
|
||
|
return (&dict_regexp->dict);
|
||
|
}
|
||
|
|
||
|
#endif
|