diff --git a/dist/nvi/regex/engine.c b/dist/nvi/regex/engine.c index 69dc50059f51..06dd4ec7148e 100644 --- a/dist/nvi/regex/engine.c +++ b/dist/nvi/regex/engine.c @@ -1,4 +1,4 @@ -/* $NetBSD: engine.c,v 1.6 2009/04/12 14:47:51 tnozaki Exp $ */ +/* $NetBSD: engine.c,v 1.7 2011/11/19 17:45:11 tnozaki Exp $ */ /*- * Copyright (c) 1992, 1993, 1994 Henry Spencer. @@ -168,8 +168,8 @@ int eflags; /* prescreening; this does wonders for this rather slow code */ if (g->must != NULL) { for (dp = start; dp < stop; dp++) - if (*dp == g->must[0] && stop - dp >= g->mlen && - MEMCMP(dp, g->must, (size_t)g->mlen) == 0) + if (*dp == g->must[0] && (size_t)(stop - dp) >= g->mlen && + MEMCMP(dp, g->must, g->mlen) == 0) break; if (dp == stop) /* we didn't find g->must */ return(REG_NOMATCH); diff --git a/dist/nvi/regex/regcomp.c b/dist/nvi/regex/regcomp.c index 5abc6a7a5fef..7d198687e30e 100644 --- a/dist/nvi/regex/regcomp.c +++ b/dist/nvi/regex/regcomp.c @@ -1,4 +1,4 @@ -/* $NetBSD: regcomp.c,v 1.6 2011/03/21 14:53:03 tnozaki Exp $ */ +/* $NetBSD: regcomp.c,v 1.7 2011/11/19 17:45:11 tnozaki Exp $ */ /*- * Copyright (c) 1992, 1993, 1994 Henry Spencer. @@ -82,11 +82,11 @@ extern "C" { #endif /* === regcomp.c === */ -static void p_ere __P((struct parse *p, int stop)); -static void p_ere_exp __P((struct parse *p)); +static void p_ere __P((struct parse *p, int stop, size_t reclimit)); +static void p_ere_exp __P((struct parse *p, size_t reclimit)); static void p_str __P((struct parse *p)); -static void p_bre __P((struct parse *p, int end1, int end2)); -static int p_simp_re __P((struct parse *p, int starordinary)); +static void p_bre __P((struct parse *p, int end1, int end2, size_t reclimit)); +static int p_simp_re __P((struct parse *p, int starordinary, size_t reclimit)); static int p_count __P((struct parse *p)); static void p_bracket __P((struct parse *p)); static void p_b_term __P((struct parse *p, cset *cs)); @@ -98,7 +98,7 @@ static char othercase __P((int ch)); static void bothcases __P((struct parse *p, int ch)); static void ordinary __P((struct parse *p, int ch)); static void nonnewline __P((struct parse *p)); -static void repeat __P((struct parse *p, sopno start, int from, int to)); +static void repeat __P((struct parse *p, sopno start, int from, int to, size_t reclimit)); static int seterr __P((struct parse *p, int e)); static cset *allocset __P((struct parse *p)); static void freeset __P((struct parse *p, cset *cs)); @@ -122,7 +122,7 @@ static sopno dupl __P((struct parse *p, sopno start, sopno finish)); static void doemit __P((struct parse *p, sop op, size_t opnd)); static void doinsert __P((struct parse *p, sop op, size_t opnd, sopno pos)); static void dofwd __P((struct parse *p, sopno pos, sop value)); -static void enlarge __P((struct parse *p, sopno size)); +static int enlarge __P((struct parse *p, sopno size)); static void stripsnug __P((struct parse *p, struct re_guts *g)); static void findmust __P((struct parse *p, struct re_guts *g)); static sopno pluscount __P((struct parse *p, struct re_guts *g)); @@ -170,6 +170,13 @@ static int never = 0; /* for use in asserts; shuts lint up */ #define never 0 /* some s have bugs too */ #endif +#define MEMLIMIT 0x8000000 +#define MEMSIZE(p) \ + ((p)->ncsalloc / CHAR_BIT * (p)->g->csetsize + \ + (p)->ncsalloc * sizeof(cset) + \ + (p)->ssize * sizeof(sop)) +#define RECLIMIT 256 + /* - regcomp - interface for parser and compilation = extern int regcomp(regex_t *, const RCHAR_T *, int); @@ -258,11 +265,11 @@ regcomp(regex_t *preg, const RCHAR_T *pattern, int cflags) EMIT(OEND, 0); g->firststate = THERE(); if (cflags®_EXTENDED) - p_ere(p, OUT); + p_ere(p, OUT, 0); else if (cflags®_NOSPEC) p_str(p); else - p_bre(p, OUT, OUT); + p_bre(p, OUT, OUT, 0); EMIT(OEND, 0); g->laststate = THERE(); @@ -289,10 +296,10 @@ regcomp(regex_t *preg, const RCHAR_T *pattern, int cflags) /* - p_ere - ERE parser top level, concatenation and alternation - == static void p_ere(register struct parse *p, int stop); + == static void p_ere(register struct parse *p, int stop, size_t reclimit); */ static void -p_ere(register struct parse *p, int stop) +p_ere(register struct parse *p, int stop, size_t reclimit) /* character this ERE should end at */ { @@ -302,11 +309,16 @@ p_ere(register struct parse *p, int stop) register sopno conc; register int first = 1; /* is this the first alternative? */ + if (reclimit++ > RECLIMIT || p->error == REG_ESPACE) { + p->error = REG_ESPACE; + return; + } + for (;;) { /* do a bunch of concatenated expressions */ conc = HERE(); while (MORE() && (c = PEEK()) != '|' && c != stop) - p_ere_exp(p); + p_ere_exp(p, reclimit); (void)REQUIRE(HERE() != conc, REG_EMPTY); /* require nonempty */ if (!EAT('|')) @@ -338,7 +350,7 @@ p_ere(register struct parse *p, int stop) == static void p_ere_exp(register struct parse *p); */ static void -p_ere_exp(register struct parse *p) +p_ere_exp(register struct parse *p, size_t reclimit) { register char c; register sopno pos; @@ -360,7 +372,7 @@ p_ere_exp(register struct parse *p) p->pbegin[subno] = HERE(); EMIT(OLPAREN, subno); if (!SEE(')')) - p_ere(p, ')'); + p_ere(p, ')', reclimit); if (subno < NPAREN) { p->pend[subno] = HERE(); assert(p->pend[subno] != 0); @@ -462,7 +474,7 @@ p_ere_exp(register struct parse *p) count2 = INFINITY; } else /* just a single number */ count2 = count; - repeat(p, pos, count, count2); + repeat(p, pos, count, count2, 0); if (!EAT('}')) { /* error heuristics */ while (MORE() && PEEK() != '}') NEXT(); @@ -496,7 +508,7 @@ p_str(register struct parse *p) /* - p_bre - BRE parser top level, anchoring and concatenation == static void p_bre(register struct parse *p, register int end1, \ - == register int end2); + == register int end2, size_t reclimit); * Giving end1 as OUT essentially eliminates the end1/end2 check. * * This implementation is a bit of a kludge, in that a trailing $ is first @@ -506,22 +518,29 @@ p_str(register struct parse *p) * The amount of lookahead needed to avoid this kludge is excessive. */ static void -p_bre(register struct parse *p, register int end1, register int end2) +p_bre(register struct parse *p, register int end1, register int end2, size_t reclimit) /* first terminating character */ /* second terminating character */ { - register sopno start = HERE(); + register sopno start; register int first = 1; /* first subexpression? */ register int wasdollar = 0; + if (reclimit++ > RECLIMIT || p->error == REG_ESPACE) { + p->error = REG_ESPACE; + return; + } + + start = HERE(); + if (EAT('^')) { EMIT(OBOL, 0); p->g->iflags |= USEBOL; p->g->nbol++; } while (MORE() && !SEETWO(end1, end2)) { - wasdollar = p_simp_re(p, first); + wasdollar = p_simp_re(p, first, reclimit); first = 0; } if (wasdollar) { /* oops, that was a trailing anchor */ @@ -536,10 +555,10 @@ p_bre(register struct parse *p, register int end1, register int end2) /* - p_simp_re - parse a simple RE, an atom possibly followed by a repetition - == static int p_simp_re(register struct parse *p, int starordinary); + == static int p_simp_re(register struct parse *p, int starordinary, size_t reclimit); */ static int /* was the simple RE an unbackslashed $? */ -p_simp_re(register struct parse *p, int starordinary) +p_simp_re(register struct parse *p, int starordinary, size_t reclimit) /* is a leading * an ordinary character? */ { @@ -571,7 +590,7 @@ p_simp_re(register struct parse *p, int starordinary) EMIT(OLPAREN, subno); /* the MORE here is an error heuristic */ if (MORE() && !SEETWO('\\', ')')) - p_bre(p, '\\', ')'); + p_bre(p, '\\', ')', reclimit); if (subno < NPAREN) { p->pend[subno] = HERE(); assert(p->pend[subno] != 0); @@ -646,7 +665,7 @@ p_simp_re(register struct parse *p, int starordinary) count2 = INFINITY; } else /* just a single number */ count2 = count; - repeat(p, pos, count, count2); + repeat(p, pos, count, count2, reclimit); if (!EATTWO('\\', '}')) { /* error heuristics */ while (MORE() && !SEETWO('\\', '}')) NEXT(); @@ -688,11 +707,15 @@ p_count(register struct parse *p) static void p_bracket(register struct parse *p) { - register cset *cs = allocset(p); + register cset *cs; register int invert = 0; static RCHAR_T bow[] = { '[', ':', '<', ':', ']', ']' }; static RCHAR_T eow[] = { '[', ':', '>', ':', ']', ']' }; + cs = allocset(p); + if (cs == NULL) + return; + /* Dept of Truly Sickening Special-Case Kludges */ if (p->next + 5 < p->end && MEMCMP(p->next, bow, 6) == 0) { EMIT(OBOW, 0); @@ -1005,25 +1028,29 @@ nonnewline(register struct parse *p) /* - repeat - generate code for a bounded repetition, recursively if needed - == static void repeat(register struct parse *p, sopno start, int from, int to); + == static void repeat(register struct parse *p, sopno start, int from, int to, size_t reclimit); */ static void -repeat(register struct parse *p, sopno start, int from, int to) +repeat(register struct parse *p, sopno start, int from, int to, size_t reclimit) /* operand from here to end of strip */ /* repeated from this number */ /* to this number of times (maybe INFINITY) */ { - register sopno finish = HERE(); + register sopno finish; # define N 2 # define INF 3 # define REP(f, t) ((f)*8 + (t)) # define MAP(n) (((n) <= 1) ? (n) : ((n) == INFINITY) ? INF : N) register sopno copy; - if (p->error != 0) /* head off possible runaway recursion */ + if (reclimit++ > RECLIMIT) + p->error = REG_ESPACE; + if (p->error) return; + finish = HERE(); + assert(from <= to); switch (REP(MAP(from), MAP(to))) { @@ -1035,7 +1062,7 @@ repeat(register struct parse *p, sopno start, int from, int to) case REP(0, INF): /* as x{1,}? */ /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */ INSERT(OCH_, start); /* offset is wrong... */ - repeat(p, start+1, 1, to); + repeat(p, start+1, 1, to, reclimit); ASTERN(OOR1, start); AHEAD(start); /* ... fix it */ EMIT(OOR2, 0); @@ -1055,7 +1082,7 @@ repeat(register struct parse *p, sopno start, int from, int to) ASTERN(O_CH, THERETHERE()); copy = dupl(p, start+1, finish+1); assert(copy == finish+4); - repeat(p, copy, 1, to-1); + repeat(p, copy, 1, to-1, reclimit); break; case REP(1, INF): /* as x+ */ INSERT(OPLUS_, start); @@ -1063,11 +1090,11 @@ repeat(register struct parse *p, sopno start, int from, int to) break; case REP(N, N): /* as xx{m-1,n-1} */ copy = dupl(p, start, finish); - repeat(p, copy, from-1, to-1); + repeat(p, copy, from-1, to-1, reclimit); break; case REP(N, INF): /* as xx{n-1,INF} */ copy = dupl(p, start, finish); - repeat(p, copy, from-1, to); + repeat(p, copy, from-1, to, reclimit); break; default: /* "can't happen" */ SETERROR(REG_ASSERT); /* just in case */ @@ -1108,6 +1135,8 @@ allocset(register struct parse *p) nc = p->ncsalloc; assert(nc % CHAR_BIT == 0); nbytes = nc / CHAR_BIT * css; + if (MEMSIZE(p) > MEMLIMIT) + goto oomem; if (p->g->sets == NULL) p->g->sets = (cset *)malloc(nc * sizeof(cset)); else @@ -1126,13 +1155,14 @@ allocset(register struct parse *p) (void) memset((char *)p->g->setbits + (nbytes - css), 0, css); else { +oomem: no = 0; SETERROR(REG_ESPACE); /* caller's responsibility not to do set ops */ + return NULL; } } - assert(p->g->sets != NULL); /* xxx */ cs = &p->g->sets[no]; cs->ptr = p->g->setbits + css*((no)/CHAR_BIT); cs->mask = 1 << ((no) % CHAR_BIT); @@ -1419,7 +1449,8 @@ dupl(register struct parse *p, sopno start, sopno finish) assert(finish >= start); if (len == 0) return(ret); - enlarge(p, p->ssize + len); /* this many unexpected additions */ + if (!enlarge(p, p->ssize + len)) /* this many unexpected additions */ + return ret; assert(p->ssize >= p->slen + len); (void) memcpy((char *)(p->strip + p->slen), (char *)(p->strip + start), (size_t)len*sizeof(sop)); @@ -1449,8 +1480,8 @@ doemit(register struct parse *p, sop op, size_t opnd) /* deal with undersized strip */ if (p->slen >= p->ssize) - enlarge(p, (p->ssize+1) / 2 * 3); /* +50% */ - assert(p->slen < p->ssize); + if (!enlarge(p, (p->ssize+1) / 2 * 3)) /* +50% */ + return; /* finally, it's all reduced to the easy case */ p->strip[p->slen] = op; @@ -1516,30 +1547,35 @@ dofwd(register struct parse *p, register sopno pos, sop value) /* - enlarge - enlarge the strip - == static void enlarge(register struct parse *p, sopno size); + == static int enlarge(register struct parse *p, sopno size); */ -static void +static int enlarge(register struct parse *p, register sopno size) { register sop *sp; register RCHAR_T *dp; + sopno osize; if (p->ssize >= size) - return; + return 1; - sp = (sop *)realloc(p->strip, size*sizeof(sop)); - if (sp == NULL) { - SETERROR(REG_ESPACE); - return; - } + osize = p->ssize; + p->ssize = size; + if (MEMSIZE(p) > MEMLIMIT) + goto oomem; + sp = realloc(p->strip, p->ssize * sizeof(sop)); + if (sp == NULL) + goto oomem; p->strip = sp; - dp = (RCHAR_T *)realloc(p->stripdata, size*sizeof(RCHAR_T)); + dp = realloc(p->stripdata, p->ssize * sizeof(RCHAR_T)); if (dp == NULL) { +oomem: + p->ssize = osize; SETERROR(REG_ESPACE); - return; + return 0; } p->stripdata = dp; - p->ssize = size; + return 1; } /* diff --git a/dist/nvi/regex/regex2.h b/dist/nvi/regex/regex2.h index 50764db38a5d..8fe338876073 100644 --- a/dist/nvi/regex/regex2.h +++ b/dist/nvi/regex/regex2.h @@ -1,4 +1,4 @@ -/* $NetBSD: regex2.h,v 1.3 2011/03/21 14:53:03 tnozaki Exp $ */ +/* $NetBSD: regex2.h,v 1.4 2011/11/19 17:45:11 tnozaki Exp $ */ /*- * Copyright (c) 1992, 1993, 1994 Henry Spencer. @@ -78,7 +78,7 @@ * immediately *preceding* "execution" of that operator. */ typedef char sop; /* strip operator */ -typedef int sopno; +typedef size_t sopno; /* operators meaning operand */ /* (back, fwd are offsets) */ #define OEND (1) /* endmarker - */ @@ -140,8 +140,8 @@ struct re_guts { # define MAGIC2 ((('R'^0200)<<8)|'E') sop *strip; /* malloced area for strip */ RCHAR_T *stripdata; /* malloced area for stripdata */ - int csetsize; /* number of bits in a cset vector */ - int ncsets; /* number of csets in use */ + size_t csetsize; /* number of bits in a cset vector */ + size_t ncsets; /* number of csets in use */ cset *sets; /* -> cset [ncsets] */ uch *setbits; /* -> uch[csetsize][ncsets/CHAR_BIT] */ int cflags; /* copy of regcomp() cflags argument */ @@ -152,14 +152,14 @@ struct re_guts { # define USEBOL 01 /* used ^ */ # define USEEOL 02 /* used $ */ # define BAD 04 /* something wrong */ - int nbol; /* number of ^ used */ - int neol; /* number of $ used */ + size_t nbol; /* number of ^ used */ + size_t neol; /* number of $ used */ #if 0 - int ncategories; /* how many character categories */ + size_t ncategories; /* how many character categories */ cat_t *categories; /* ->catspace[-CHAR_MIN] */ #endif RCHAR_T *must; /* match must contain this string */ - int mlen; /* length of must */ + size_t mlen; /* length of must */ size_t nsub; /* copy of re_nsub */ int backrefs; /* does it use back references? */ sopno nplus; /* how deep does it nest +s? */