reliability fix, merge libc/regex fix to avoid memory exhaust problem.
This commit is contained in:
parent
d03cf6c291
commit
b70d1274ae
|
@ -1,4 +1,4 @@
|
|||
/* $NetBSD: engine.c,v 1.6 2009/04/12 14:47:51 tnozaki Exp $ */
|
||||
/* $NetBSD: engine.c,v 1.7 2011/11/19 17:45:11 tnozaki Exp $ */
|
||||
|
||||
/*-
|
||||
* Copyright (c) 1992, 1993, 1994 Henry Spencer.
|
||||
|
@ -168,8 +168,8 @@ int eflags;
|
|||
/* prescreening; this does wonders for this rather slow code */
|
||||
if (g->must != NULL) {
|
||||
for (dp = start; dp < stop; dp++)
|
||||
if (*dp == g->must[0] && stop - dp >= g->mlen &&
|
||||
MEMCMP(dp, g->must, (size_t)g->mlen) == 0)
|
||||
if (*dp == g->must[0] && (size_t)(stop - dp) >= g->mlen &&
|
||||
MEMCMP(dp, g->must, g->mlen) == 0)
|
||||
break;
|
||||
if (dp == stop) /* we didn't find g->must */
|
||||
return(REG_NOMATCH);
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/* $NetBSD: regcomp.c,v 1.6 2011/03/21 14:53:03 tnozaki Exp $ */
|
||||
/* $NetBSD: regcomp.c,v 1.7 2011/11/19 17:45:11 tnozaki Exp $ */
|
||||
|
||||
/*-
|
||||
* Copyright (c) 1992, 1993, 1994 Henry Spencer.
|
||||
|
@ -82,11 +82,11 @@ extern "C" {
|
|||
#endif
|
||||
|
||||
/* === regcomp.c === */
|
||||
static void p_ere __P((struct parse *p, int stop));
|
||||
static void p_ere_exp __P((struct parse *p));
|
||||
static void p_ere __P((struct parse *p, int stop, size_t reclimit));
|
||||
static void p_ere_exp __P((struct parse *p, size_t reclimit));
|
||||
static void p_str __P((struct parse *p));
|
||||
static void p_bre __P((struct parse *p, int end1, int end2));
|
||||
static int p_simp_re __P((struct parse *p, int starordinary));
|
||||
static void p_bre __P((struct parse *p, int end1, int end2, size_t reclimit));
|
||||
static int p_simp_re __P((struct parse *p, int starordinary, size_t reclimit));
|
||||
static int p_count __P((struct parse *p));
|
||||
static void p_bracket __P((struct parse *p));
|
||||
static void p_b_term __P((struct parse *p, cset *cs));
|
||||
|
@ -98,7 +98,7 @@ static char othercase __P((int ch));
|
|||
static void bothcases __P((struct parse *p, int ch));
|
||||
static void ordinary __P((struct parse *p, int ch));
|
||||
static void nonnewline __P((struct parse *p));
|
||||
static void repeat __P((struct parse *p, sopno start, int from, int to));
|
||||
static void repeat __P((struct parse *p, sopno start, int from, int to, size_t reclimit));
|
||||
static int seterr __P((struct parse *p, int e));
|
||||
static cset *allocset __P((struct parse *p));
|
||||
static void freeset __P((struct parse *p, cset *cs));
|
||||
|
@ -122,7 +122,7 @@ static sopno dupl __P((struct parse *p, sopno start, sopno finish));
|
|||
static void doemit __P((struct parse *p, sop op, size_t opnd));
|
||||
static void doinsert __P((struct parse *p, sop op, size_t opnd, sopno pos));
|
||||
static void dofwd __P((struct parse *p, sopno pos, sop value));
|
||||
static void enlarge __P((struct parse *p, sopno size));
|
||||
static int enlarge __P((struct parse *p, sopno size));
|
||||
static void stripsnug __P((struct parse *p, struct re_guts *g));
|
||||
static void findmust __P((struct parse *p, struct re_guts *g));
|
||||
static sopno pluscount __P((struct parse *p, struct re_guts *g));
|
||||
|
@ -170,6 +170,13 @@ static int never = 0; /* for use in asserts; shuts lint up */
|
|||
#define never 0 /* some <assert.h>s have bugs too */
|
||||
#endif
|
||||
|
||||
#define MEMLIMIT 0x8000000
|
||||
#define MEMSIZE(p) \
|
||||
((p)->ncsalloc / CHAR_BIT * (p)->g->csetsize + \
|
||||
(p)->ncsalloc * sizeof(cset) + \
|
||||
(p)->ssize * sizeof(sop))
|
||||
#define RECLIMIT 256
|
||||
|
||||
/*
|
||||
- regcomp - interface for parser and compilation
|
||||
= extern int regcomp(regex_t *, const RCHAR_T *, int);
|
||||
|
@ -258,11 +265,11 @@ regcomp(regex_t *preg, const RCHAR_T *pattern, int cflags)
|
|||
EMIT(OEND, 0);
|
||||
g->firststate = THERE();
|
||||
if (cflags®_EXTENDED)
|
||||
p_ere(p, OUT);
|
||||
p_ere(p, OUT, 0);
|
||||
else if (cflags®_NOSPEC)
|
||||
p_str(p);
|
||||
else
|
||||
p_bre(p, OUT, OUT);
|
||||
p_bre(p, OUT, OUT, 0);
|
||||
EMIT(OEND, 0);
|
||||
g->laststate = THERE();
|
||||
|
||||
|
@ -289,10 +296,10 @@ regcomp(regex_t *preg, const RCHAR_T *pattern, int cflags)
|
|||
|
||||
/*
|
||||
- p_ere - ERE parser top level, concatenation and alternation
|
||||
== static void p_ere(register struct parse *p, int stop);
|
||||
== static void p_ere(register struct parse *p, int stop, size_t reclimit);
|
||||
*/
|
||||
static void
|
||||
p_ere(register struct parse *p, int stop)
|
||||
p_ere(register struct parse *p, int stop, size_t reclimit)
|
||||
|
||||
/* character this ERE should end at */
|
||||
{
|
||||
|
@ -302,11 +309,16 @@ p_ere(register struct parse *p, int stop)
|
|||
register sopno conc;
|
||||
register int first = 1; /* is this the first alternative? */
|
||||
|
||||
if (reclimit++ > RECLIMIT || p->error == REG_ESPACE) {
|
||||
p->error = REG_ESPACE;
|
||||
return;
|
||||
}
|
||||
|
||||
for (;;) {
|
||||
/* do a bunch of concatenated expressions */
|
||||
conc = HERE();
|
||||
while (MORE() && (c = PEEK()) != '|' && c != stop)
|
||||
p_ere_exp(p);
|
||||
p_ere_exp(p, reclimit);
|
||||
(void)REQUIRE(HERE() != conc, REG_EMPTY); /* require nonempty */
|
||||
|
||||
if (!EAT('|'))
|
||||
|
@ -338,7 +350,7 @@ p_ere(register struct parse *p, int stop)
|
|||
== static void p_ere_exp(register struct parse *p);
|
||||
*/
|
||||
static void
|
||||
p_ere_exp(register struct parse *p)
|
||||
p_ere_exp(register struct parse *p, size_t reclimit)
|
||||
{
|
||||
register char c;
|
||||
register sopno pos;
|
||||
|
@ -360,7 +372,7 @@ p_ere_exp(register struct parse *p)
|
|||
p->pbegin[subno] = HERE();
|
||||
EMIT(OLPAREN, subno);
|
||||
if (!SEE(')'))
|
||||
p_ere(p, ')');
|
||||
p_ere(p, ')', reclimit);
|
||||
if (subno < NPAREN) {
|
||||
p->pend[subno] = HERE();
|
||||
assert(p->pend[subno] != 0);
|
||||
|
@ -462,7 +474,7 @@ p_ere_exp(register struct parse *p)
|
|||
count2 = INFINITY;
|
||||
} else /* just a single number */
|
||||
count2 = count;
|
||||
repeat(p, pos, count, count2);
|
||||
repeat(p, pos, count, count2, 0);
|
||||
if (!EAT('}')) { /* error heuristics */
|
||||
while (MORE() && PEEK() != '}')
|
||||
NEXT();
|
||||
|
@ -496,7 +508,7 @@ p_str(register struct parse *p)
|
|||
/*
|
||||
- p_bre - BRE parser top level, anchoring and concatenation
|
||||
== static void p_bre(register struct parse *p, register int end1, \
|
||||
== register int end2);
|
||||
== register int end2, size_t reclimit);
|
||||
* Giving end1 as OUT essentially eliminates the end1/end2 check.
|
||||
*
|
||||
* This implementation is a bit of a kludge, in that a trailing $ is first
|
||||
|
@ -506,22 +518,29 @@ p_str(register struct parse *p)
|
|||
* The amount of lookahead needed to avoid this kludge is excessive.
|
||||
*/
|
||||
static void
|
||||
p_bre(register struct parse *p, register int end1, register int end2)
|
||||
p_bre(register struct parse *p, register int end1, register int end2, size_t reclimit)
|
||||
|
||||
/* first terminating character */
|
||||
/* second terminating character */
|
||||
{
|
||||
register sopno start = HERE();
|
||||
register sopno start;
|
||||
register int first = 1; /* first subexpression? */
|
||||
register int wasdollar = 0;
|
||||
|
||||
if (reclimit++ > RECLIMIT || p->error == REG_ESPACE) {
|
||||
p->error = REG_ESPACE;
|
||||
return;
|
||||
}
|
||||
|
||||
start = HERE();
|
||||
|
||||
if (EAT('^')) {
|
||||
EMIT(OBOL, 0);
|
||||
p->g->iflags |= USEBOL;
|
||||
p->g->nbol++;
|
||||
}
|
||||
while (MORE() && !SEETWO(end1, end2)) {
|
||||
wasdollar = p_simp_re(p, first);
|
||||
wasdollar = p_simp_re(p, first, reclimit);
|
||||
first = 0;
|
||||
}
|
||||
if (wasdollar) { /* oops, that was a trailing anchor */
|
||||
|
@ -536,10 +555,10 @@ p_bre(register struct parse *p, register int end1, register int end2)
|
|||
|
||||
/*
|
||||
- p_simp_re - parse a simple RE, an atom possibly followed by a repetition
|
||||
== static int p_simp_re(register struct parse *p, int starordinary);
|
||||
== static int p_simp_re(register struct parse *p, int starordinary, size_t reclimit);
|
||||
*/
|
||||
static int /* was the simple RE an unbackslashed $? */
|
||||
p_simp_re(register struct parse *p, int starordinary)
|
||||
p_simp_re(register struct parse *p, int starordinary, size_t reclimit)
|
||||
|
||||
/* is a leading * an ordinary character? */
|
||||
{
|
||||
|
@ -571,7 +590,7 @@ p_simp_re(register struct parse *p, int starordinary)
|
|||
EMIT(OLPAREN, subno);
|
||||
/* the MORE here is an error heuristic */
|
||||
if (MORE() && !SEETWO('\\', ')'))
|
||||
p_bre(p, '\\', ')');
|
||||
p_bre(p, '\\', ')', reclimit);
|
||||
if (subno < NPAREN) {
|
||||
p->pend[subno] = HERE();
|
||||
assert(p->pend[subno] != 0);
|
||||
|
@ -646,7 +665,7 @@ p_simp_re(register struct parse *p, int starordinary)
|
|||
count2 = INFINITY;
|
||||
} else /* just a single number */
|
||||
count2 = count;
|
||||
repeat(p, pos, count, count2);
|
||||
repeat(p, pos, count, count2, reclimit);
|
||||
if (!EATTWO('\\', '}')) { /* error heuristics */
|
||||
while (MORE() && !SEETWO('\\', '}'))
|
||||
NEXT();
|
||||
|
@ -688,11 +707,15 @@ p_count(register struct parse *p)
|
|||
static void
|
||||
p_bracket(register struct parse *p)
|
||||
{
|
||||
register cset *cs = allocset(p);
|
||||
register cset *cs;
|
||||
register int invert = 0;
|
||||
static RCHAR_T bow[] = { '[', ':', '<', ':', ']', ']' };
|
||||
static RCHAR_T eow[] = { '[', ':', '>', ':', ']', ']' };
|
||||
|
||||
cs = allocset(p);
|
||||
if (cs == NULL)
|
||||
return;
|
||||
|
||||
/* Dept of Truly Sickening Special-Case Kludges */
|
||||
if (p->next + 5 < p->end && MEMCMP(p->next, bow, 6) == 0) {
|
||||
EMIT(OBOW, 0);
|
||||
|
@ -1005,25 +1028,29 @@ nonnewline(register struct parse *p)
|
|||
|
||||
/*
|
||||
- repeat - generate code for a bounded repetition, recursively if needed
|
||||
== static void repeat(register struct parse *p, sopno start, int from, int to);
|
||||
== static void repeat(register struct parse *p, sopno start, int from, int to, size_t reclimit);
|
||||
*/
|
||||
static void
|
||||
repeat(register struct parse *p, sopno start, int from, int to)
|
||||
repeat(register struct parse *p, sopno start, int from, int to, size_t reclimit)
|
||||
|
||||
/* operand from here to end of strip */
|
||||
/* repeated from this number */
|
||||
/* to this number of times (maybe INFINITY) */
|
||||
{
|
||||
register sopno finish = HERE();
|
||||
register sopno finish;
|
||||
# define N 2
|
||||
# define INF 3
|
||||
# define REP(f, t) ((f)*8 + (t))
|
||||
# define MAP(n) (((n) <= 1) ? (n) : ((n) == INFINITY) ? INF : N)
|
||||
register sopno copy;
|
||||
|
||||
if (p->error != 0) /* head off possible runaway recursion */
|
||||
if (reclimit++ > RECLIMIT)
|
||||
p->error = REG_ESPACE;
|
||||
if (p->error)
|
||||
return;
|
||||
|
||||
finish = HERE();
|
||||
|
||||
assert(from <= to);
|
||||
|
||||
switch (REP(MAP(from), MAP(to))) {
|
||||
|
@ -1035,7 +1062,7 @@ repeat(register struct parse *p, sopno start, int from, int to)
|
|||
case REP(0, INF): /* as x{1,}? */
|
||||
/* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
|
||||
INSERT(OCH_, start); /* offset is wrong... */
|
||||
repeat(p, start+1, 1, to);
|
||||
repeat(p, start+1, 1, to, reclimit);
|
||||
ASTERN(OOR1, start);
|
||||
AHEAD(start); /* ... fix it */
|
||||
EMIT(OOR2, 0);
|
||||
|
@ -1055,7 +1082,7 @@ repeat(register struct parse *p, sopno start, int from, int to)
|
|||
ASTERN(O_CH, THERETHERE());
|
||||
copy = dupl(p, start+1, finish+1);
|
||||
assert(copy == finish+4);
|
||||
repeat(p, copy, 1, to-1);
|
||||
repeat(p, copy, 1, to-1, reclimit);
|
||||
break;
|
||||
case REP(1, INF): /* as x+ */
|
||||
INSERT(OPLUS_, start);
|
||||
|
@ -1063,11 +1090,11 @@ repeat(register struct parse *p, sopno start, int from, int to)
|
|||
break;
|
||||
case REP(N, N): /* as xx{m-1,n-1} */
|
||||
copy = dupl(p, start, finish);
|
||||
repeat(p, copy, from-1, to-1);
|
||||
repeat(p, copy, from-1, to-1, reclimit);
|
||||
break;
|
||||
case REP(N, INF): /* as xx{n-1,INF} */
|
||||
copy = dupl(p, start, finish);
|
||||
repeat(p, copy, from-1, to);
|
||||
repeat(p, copy, from-1, to, reclimit);
|
||||
break;
|
||||
default: /* "can't happen" */
|
||||
SETERROR(REG_ASSERT); /* just in case */
|
||||
|
@ -1108,6 +1135,8 @@ allocset(register struct parse *p)
|
|||
nc = p->ncsalloc;
|
||||
assert(nc % CHAR_BIT == 0);
|
||||
nbytes = nc / CHAR_BIT * css;
|
||||
if (MEMSIZE(p) > MEMLIMIT)
|
||||
goto oomem;
|
||||
if (p->g->sets == NULL)
|
||||
p->g->sets = (cset *)malloc(nc * sizeof(cset));
|
||||
else
|
||||
|
@ -1126,13 +1155,14 @@ allocset(register struct parse *p)
|
|||
(void) memset((char *)p->g->setbits + (nbytes - css),
|
||||
0, css);
|
||||
else {
|
||||
oomem:
|
||||
no = 0;
|
||||
SETERROR(REG_ESPACE);
|
||||
/* caller's responsibility not to do set ops */
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
assert(p->g->sets != NULL); /* xxx */
|
||||
cs = &p->g->sets[no];
|
||||
cs->ptr = p->g->setbits + css*((no)/CHAR_BIT);
|
||||
cs->mask = 1 << ((no) % CHAR_BIT);
|
||||
|
@ -1419,7 +1449,8 @@ dupl(register struct parse *p, sopno start, sopno finish)
|
|||
assert(finish >= start);
|
||||
if (len == 0)
|
||||
return(ret);
|
||||
enlarge(p, p->ssize + len); /* this many unexpected additions */
|
||||
if (!enlarge(p, p->ssize + len)) /* this many unexpected additions */
|
||||
return ret;
|
||||
assert(p->ssize >= p->slen + len);
|
||||
(void) memcpy((char *)(p->strip + p->slen),
|
||||
(char *)(p->strip + start), (size_t)len*sizeof(sop));
|
||||
|
@ -1449,8 +1480,8 @@ doemit(register struct parse *p, sop op, size_t opnd)
|
|||
|
||||
/* deal with undersized strip */
|
||||
if (p->slen >= p->ssize)
|
||||
enlarge(p, (p->ssize+1) / 2 * 3); /* +50% */
|
||||
assert(p->slen < p->ssize);
|
||||
if (!enlarge(p, (p->ssize+1) / 2 * 3)) /* +50% */
|
||||
return;
|
||||
|
||||
/* finally, it's all reduced to the easy case */
|
||||
p->strip[p->slen] = op;
|
||||
|
@ -1516,30 +1547,35 @@ dofwd(register struct parse *p, register sopno pos, sop value)
|
|||
|
||||
/*
|
||||
- enlarge - enlarge the strip
|
||||
== static void enlarge(register struct parse *p, sopno size);
|
||||
== static int enlarge(register struct parse *p, sopno size);
|
||||
*/
|
||||
static void
|
||||
static int
|
||||
enlarge(register struct parse *p, register sopno size)
|
||||
{
|
||||
register sop *sp;
|
||||
register RCHAR_T *dp;
|
||||
sopno osize;
|
||||
|
||||
if (p->ssize >= size)
|
||||
return;
|
||||
return 1;
|
||||
|
||||
sp = (sop *)realloc(p->strip, size*sizeof(sop));
|
||||
if (sp == NULL) {
|
||||
SETERROR(REG_ESPACE);
|
||||
return;
|
||||
}
|
||||
osize = p->ssize;
|
||||
p->ssize = size;
|
||||
if (MEMSIZE(p) > MEMLIMIT)
|
||||
goto oomem;
|
||||
sp = realloc(p->strip, p->ssize * sizeof(sop));
|
||||
if (sp == NULL)
|
||||
goto oomem;
|
||||
p->strip = sp;
|
||||
dp = (RCHAR_T *)realloc(p->stripdata, size*sizeof(RCHAR_T));
|
||||
dp = realloc(p->stripdata, p->ssize * sizeof(RCHAR_T));
|
||||
if (dp == NULL) {
|
||||
oomem:
|
||||
p->ssize = osize;
|
||||
SETERROR(REG_ESPACE);
|
||||
return;
|
||||
return 0;
|
||||
}
|
||||
p->stripdata = dp;
|
||||
p->ssize = size;
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/* $NetBSD: regex2.h,v 1.3 2011/03/21 14:53:03 tnozaki Exp $ */
|
||||
/* $NetBSD: regex2.h,v 1.4 2011/11/19 17:45:11 tnozaki Exp $ */
|
||||
|
||||
/*-
|
||||
* Copyright (c) 1992, 1993, 1994 Henry Spencer.
|
||||
|
@ -78,7 +78,7 @@
|
|||
* immediately *preceding* "execution" of that operator.
|
||||
*/
|
||||
typedef char sop; /* strip operator */
|
||||
typedef int sopno;
|
||||
typedef size_t sopno;
|
||||
/* operators meaning operand */
|
||||
/* (back, fwd are offsets) */
|
||||
#define OEND (1) /* endmarker - */
|
||||
|
@ -140,8 +140,8 @@ struct re_guts {
|
|||
# define MAGIC2 ((('R'^0200)<<8)|'E')
|
||||
sop *strip; /* malloced area for strip */
|
||||
RCHAR_T *stripdata; /* malloced area for stripdata */
|
||||
int csetsize; /* number of bits in a cset vector */
|
||||
int ncsets; /* number of csets in use */
|
||||
size_t csetsize; /* number of bits in a cset vector */
|
||||
size_t ncsets; /* number of csets in use */
|
||||
cset *sets; /* -> cset [ncsets] */
|
||||
uch *setbits; /* -> uch[csetsize][ncsets/CHAR_BIT] */
|
||||
int cflags; /* copy of regcomp() cflags argument */
|
||||
|
@ -152,14 +152,14 @@ struct re_guts {
|
|||
# define USEBOL 01 /* used ^ */
|
||||
# define USEEOL 02 /* used $ */
|
||||
# define BAD 04 /* something wrong */
|
||||
int nbol; /* number of ^ used */
|
||||
int neol; /* number of $ used */
|
||||
size_t nbol; /* number of ^ used */
|
||||
size_t neol; /* number of $ used */
|
||||
#if 0
|
||||
int ncategories; /* how many character categories */
|
||||
size_t ncategories; /* how many character categories */
|
||||
cat_t *categories; /* ->catspace[-CHAR_MIN] */
|
||||
#endif
|
||||
RCHAR_T *must; /* match must contain this string */
|
||||
int mlen; /* length of must */
|
||||
size_t mlen; /* length of must */
|
||||
size_t nsub; /* copy of re_nsub */
|
||||
int backrefs; /* does it use back references? */
|
||||
sopno nplus; /* how deep does it nest +s? */
|
||||
|
|
Loading…
Reference in New Issue