387 lines
9.6 KiB
C
387 lines
9.6 KiB
C
/*++
|
|
/* NAME
|
|
/* dict_pcre 3
|
|
/* SUMMARY
|
|
/* dictionary manager interface to PCRE regular expression library
|
|
/* SYNOPSIS
|
|
/* #include <dict_pcre.h>
|
|
/*
|
|
/* DICT *dict_pcre_open(name, dummy, dict_flags)
|
|
/* const char *name;
|
|
/* int dummy;
|
|
/* int dict_flags;
|
|
/* DESCRIPTION
|
|
/* dict_pcre_open() opens the named file and compiles the contained
|
|
/* regular expressions.
|
|
/*
|
|
/* The lookup interface will match only user@domain form addresses.
|
|
/* SEE ALSO
|
|
/* dict(3) generic dictionary manager
|
|
/* AUTHOR(S)
|
|
/* Andrew McNamara
|
|
/* andrewm@connect.com.au
|
|
/* connect.com.au Pty. Ltd.
|
|
/* Level 3, 213 Miller St
|
|
/* North Sydney, NSW, Australia
|
|
/*
|
|
/* Wietse Venema
|
|
/* IBM T.J. Watson Research
|
|
/* P.O. Box 704
|
|
/* Yorktown Heights, NY 10598, USA
|
|
/*--*/
|
|
|
|
#include "sys_defs.h"
|
|
|
|
#ifdef HAS_PCRE
|
|
|
|
/* System library. */
|
|
|
|
#include <stdio.h> /* sprintf() prototype */
|
|
#include <stdlib.h>
|
|
#include <unistd.h>
|
|
#include <string.h>
|
|
#include <ctype.h>
|
|
|
|
/* Utility library. */
|
|
|
|
#include "mymalloc.h"
|
|
#include "msg.h"
|
|
#include "safe.h"
|
|
#include "vstream.h"
|
|
#include "vstring.h"
|
|
#include "stringops.h"
|
|
#include "readlline.h"
|
|
#include "dict.h"
|
|
#include "dict_pcre.h"
|
|
#include "mac_parse.h"
|
|
|
|
/* PCRE library */
|
|
|
|
#include "pcre.h"
|
|
|
|
#define PCRE_MAX_CAPTURE 99 /* Max strings captured by regexp - */
|
|
/* essentially the max number of (..) */
|
|
|
|
struct dict_pcre_list {
|
|
pcre *pattern; /* The compiled pattern */
|
|
pcre_extra *hints; /* Hints to speed pattern execution */
|
|
char *replace; /* Replacement string */
|
|
int lineno; /* Source file line number */
|
|
struct dict_pcre_list *next; /* Next regexp in dict */
|
|
};
|
|
|
|
typedef struct {
|
|
DICT dict; /* generic members */
|
|
char *map; /* map name */
|
|
int flags; /* unused at the moment */
|
|
struct dict_pcre_list *head;
|
|
} DICT_PCRE;
|
|
|
|
static dict_pcre_init = 0; /* flag need to init pcre library */
|
|
|
|
/*
|
|
* dict_pcre_update - not supported
|
|
*/
|
|
static void dict_pcre_update(DICT *dict, const char *unused_name,
|
|
const char *unused_value)
|
|
{
|
|
DICT_PCRE *dict_pcre = (DICT_PCRE *) dict;
|
|
|
|
msg_fatal("dict_pcre_update: attempt to update regexp map %s",
|
|
dict_pcre->map);
|
|
}
|
|
|
|
/*
|
|
* Context for macro expansion callback.
|
|
*/
|
|
struct dict_pcre_context {
|
|
const char *dict_name; /* source dict name */
|
|
int lineno; /* source file line number */
|
|
VSTRING *buf; /* target string buffer */
|
|
const char *subject; /* str against which we match */
|
|
int offsets[PCRE_MAX_CAPTURE * 3]; /* Cut substrings */
|
|
int matches; /* Count of cuts */
|
|
};
|
|
|
|
/*
|
|
* Macro expansion callback - replace $0-${99} with strings cut from
|
|
* matched string.
|
|
*/
|
|
static int dict_pcre_action(int type, VSTRING *buf, char *ptr)
|
|
{
|
|
struct dict_pcre_context *ctxt = (struct dict_pcre_context *) ptr;
|
|
const char *pp;
|
|
int n,
|
|
ret;
|
|
|
|
if (type == MAC_PARSE_VARNAME) {
|
|
n = atoi(vstring_str(buf));
|
|
ret = pcre_get_substring(ctxt->subject, ctxt->offsets, ctxt->matches,
|
|
n, &pp);
|
|
if (ret < 0) {
|
|
if (ret == PCRE_ERROR_NOSUBSTRING)
|
|
msg_fatal("regexp %s, line %d: replace index out of range",
|
|
ctxt->dict_name, ctxt->lineno);
|
|
else
|
|
msg_fatal("regexp %s, line %d: pcre_get_substring error: %d",
|
|
ctxt->dict_name, ctxt->lineno, ret);
|
|
}
|
|
if (*pp == 0) {
|
|
myfree((char *) pp);
|
|
return (MAC_PARSE_UNDEF);
|
|
}
|
|
vstring_strcat(ctxt->buf, pp);
|
|
myfree((char *) pp);
|
|
return (0);
|
|
} else
|
|
/* Straight text - duplicate with no substitution */
|
|
vstring_strcat(ctxt->buf, vstring_str(buf));
|
|
|
|
return (0);
|
|
}
|
|
|
|
/*
|
|
* Look up regexp dict and perform string substitution on matched
|
|
* strings.
|
|
*/
|
|
static const char *dict_pcre_lookup(DICT *dict, const char *name)
|
|
{
|
|
DICT_PCRE *dict_pcre = (DICT_PCRE *) dict;
|
|
struct dict_pcre_list *pcre_list;
|
|
int name_len = strlen(name);
|
|
struct dict_pcre_context ctxt;
|
|
static VSTRING *buf;
|
|
|
|
dict_errno = 0;
|
|
|
|
if (msg_verbose)
|
|
msg_info("dict_pcre_lookup: %s: %s", dict_pcre->map, name);
|
|
|
|
/* Search for a matching expression */
|
|
ctxt.matches = 0;
|
|
for (pcre_list = dict_pcre->head; pcre_list; pcre_list = pcre_list->next) {
|
|
if (pcre_list->pattern) {
|
|
ctxt.matches = pcre_exec(pcre_list->pattern, pcre_list->hints,
|
|
name, name_len, 0, 0, ctxt.offsets, PCRE_MAX_CAPTURE * 3);
|
|
if (ctxt.matches != PCRE_ERROR_NOMATCH) {
|
|
if (ctxt.matches > 0)
|
|
break; /* Got a match! */
|
|
else {
|
|
/* An error */
|
|
switch (ctxt.matches) {
|
|
case 0:
|
|
msg_warn("regexp map %s, line %d: too many (...)",
|
|
dict_pcre->map, pcre_list->lineno);
|
|
break;
|
|
case PCRE_ERROR_NULL:
|
|
case PCRE_ERROR_BADOPTION:
|
|
msg_fatal("regexp map %s, line %d: bad args to re_exec",
|
|
dict_pcre->map, pcre_list->lineno);
|
|
break;
|
|
case PCRE_ERROR_BADMAGIC:
|
|
case PCRE_ERROR_UNKNOWN_NODE:
|
|
msg_fatal("regexp map %s, line %d: corrupt compiled regexp",
|
|
dict_pcre->map, pcre_list->lineno);
|
|
break;
|
|
default:
|
|
msg_fatal("regexp map %s, line %d: unknown re_exec error: %d",
|
|
dict_pcre->map, pcre_list->lineno, ctxt.matches);
|
|
break;
|
|
}
|
|
return ((char *) 0);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/* If we've got a match, */
|
|
if (ctxt.matches > 0) {
|
|
/* Then perform substitution on replacement string */
|
|
if (buf == 0)
|
|
buf = vstring_alloc(10);
|
|
VSTRING_RESET(buf);
|
|
ctxt.buf = buf;
|
|
ctxt.subject = name;
|
|
ctxt.dict_name = dict_pcre->map;
|
|
ctxt.lineno = pcre_list->lineno;
|
|
|
|
if (mac_parse(pcre_list->replace, dict_pcre_action, (char *) &ctxt) & MAC_PARSE_ERROR)
|
|
msg_fatal("regexp map %s, line %d: bad replacement syntax",
|
|
dict_pcre->map, pcre_list->lineno);
|
|
|
|
VSTRING_TERMINATE(buf);
|
|
return (vstring_str(buf));
|
|
}
|
|
return ((char *) 0);
|
|
}
|
|
|
|
/* dict_pcre_close - close pcre dictionary */
|
|
|
|
static void dict_pcre_close(DICT *dict)
|
|
{
|
|
DICT_PCRE *dict_pcre = (DICT_PCRE *) dict;
|
|
struct dict_pcre_list *pcre_list;
|
|
|
|
for (pcre_list = dict_pcre->head; pcre_list; pcre_list = pcre_list->next) {
|
|
if (pcre_list->pattern)
|
|
myfree((char *) pcre_list->pattern);
|
|
if (pcre_list->hints)
|
|
myfree((char *) pcre_list->hints);
|
|
if (pcre_list->replace)
|
|
myfree((char *) pcre_list->replace);
|
|
}
|
|
myfree(dict_pcre->map);
|
|
myfree((char *) dict_pcre);
|
|
}
|
|
|
|
/*
|
|
* dict_pcre_open - load and compile a file containing regular expressions
|
|
*/
|
|
DICT *dict_pcre_open(const char *map, int unused_flags, int dict_flags)
|
|
{
|
|
DICT_PCRE *dict_pcre;
|
|
VSTREAM *map_fp;
|
|
VSTRING *line_buffer;
|
|
struct dict_pcre_list *pcre_list = NULL,
|
|
*pl;
|
|
int lineno = 0;
|
|
char *regexp,
|
|
*p,
|
|
re_delimiter;
|
|
int re_options;
|
|
pcre *pattern;
|
|
pcre_extra *hints;
|
|
const char *error;
|
|
int errptr;
|
|
|
|
line_buffer = vstring_alloc(100);
|
|
|
|
dict_pcre = (DICT_PCRE *) mymalloc(sizeof(*dict_pcre));
|
|
dict_pcre->dict.lookup = dict_pcre_lookup;
|
|
dict_pcre->dict.update = dict_pcre_update;
|
|
dict_pcre->dict.close = dict_pcre_close;
|
|
dict_pcre->dict.fd = -1;
|
|
dict_pcre->map = mystrdup(map);
|
|
dict_pcre->dict.flags = dict_flags | DICT_FLAG_PATTERN;
|
|
dict_pcre->head = NULL;
|
|
|
|
if (dict_pcre_init == 0) {
|
|
pcre_malloc = (void *(*) (size_t)) mymalloc;
|
|
pcre_free = (void (*) (void *)) myfree;
|
|
dict_pcre_init = 1;
|
|
}
|
|
if ((map_fp = vstream_fopen(map, O_RDONLY, 0)) == 0) {
|
|
msg_fatal("open %s: %m", map);
|
|
}
|
|
while (readlline(line_buffer, map_fp, &lineno, READLL_STRIPNL)) {
|
|
|
|
if (*vstring_str(line_buffer) == '#') /* Skip comments */
|
|
continue;
|
|
|
|
if (*vstring_str(line_buffer) == 0) /* Skip blank lines */
|
|
continue;
|
|
|
|
p = vstring_str(line_buffer);
|
|
re_delimiter = *p++;
|
|
regexp = p;
|
|
|
|
/* Search for second delimiter, handling backslash escape */
|
|
while (*p) {
|
|
if (*p == '\\') {
|
|
++p;
|
|
if (*p == 0)
|
|
break;
|
|
} else if (*p == re_delimiter)
|
|
break;
|
|
++p;
|
|
}
|
|
|
|
if (!*p) {
|
|
msg_warn("%s, line %d: no closing regexp delimiter: %c",
|
|
VSTREAM_PATH(map_fp), lineno, re_delimiter);
|
|
continue;
|
|
}
|
|
*p++ = '\0'; /* Null term the regexp */
|
|
|
|
/* Now parse any regexp options */
|
|
re_options = PCRE_CASELESS;
|
|
while (*p && !ISSPACE(*p)) {
|
|
switch (*p) {
|
|
case 'i':
|
|
re_options ^= PCRE_CASELESS;
|
|
break;
|
|
case 'm':
|
|
re_options ^= PCRE_MULTILINE;
|
|
break;
|
|
case 's':
|
|
re_options ^= PCRE_DOTALL;
|
|
break;
|
|
case 'x':
|
|
re_options ^= PCRE_EXTENDED;
|
|
break;
|
|
case 'A':
|
|
re_options ^= PCRE_ANCHORED;
|
|
break;
|
|
case 'E':
|
|
re_options ^= PCRE_DOLLAR_ENDONLY;
|
|
break;
|
|
case 'U':
|
|
re_options ^= PCRE_UNGREEDY;
|
|
break;
|
|
case 'X':
|
|
re_options ^= PCRE_EXTRA;
|
|
break;
|
|
default:
|
|
msg_warn("%s, line %d: unknown regexp option '%c'",
|
|
VSTREAM_PATH(map_fp), lineno, *p);
|
|
}
|
|
++p;
|
|
}
|
|
|
|
while (*p && ISSPACE(*p))
|
|
++p;
|
|
|
|
if (!*p) {
|
|
msg_warn("%s, line %d: no replacement text",
|
|
VSTREAM_PATH(map_fp), lineno);
|
|
p = "";
|
|
}
|
|
/* Compile the patern */
|
|
pattern = pcre_compile(regexp, re_options, &error, &errptr, NULL);
|
|
if (pattern == NULL) {
|
|
msg_warn("%s, line %d: error in regex at offset %d: %s",
|
|
VSTREAM_PATH(map_fp), lineno, errptr, error);
|
|
continue;
|
|
}
|
|
hints = pcre_study(pattern, 0, &error);
|
|
if (error != NULL) {
|
|
msg_warn("%s, line %d: error while studying regex: %s",
|
|
VSTREAM_PATH(map_fp), lineno, error);
|
|
myfree((char *) pattern);
|
|
continue;
|
|
}
|
|
/* Add it to the list */
|
|
pl = (struct dict_pcre_list *) mymalloc(sizeof(struct dict_pcre_list));
|
|
|
|
/* Save the replacement string (if any) */
|
|
pl->replace = mystrdup(p);
|
|
pl->pattern = pattern;
|
|
pl->hints = hints;
|
|
pl->next = NULL;
|
|
pl->lineno = lineno;
|
|
|
|
if (pcre_list == NULL)
|
|
dict_pcre->head = pl;
|
|
else
|
|
pcre_list->next = pl;
|
|
pcre_list = pl;
|
|
}
|
|
|
|
vstring_free(line_buffer);
|
|
vstream_fclose(map_fp);
|
|
|
|
return (&dict_pcre->dict);
|
|
}
|
|
|
|
#endif /* HAS_PCRE */
|