diff --git a/src/backend/regex/regcomp.c b/src/backend/regex/regcomp.c index 9f71177d31..3d7f11af8c 100644 --- a/src/backend/regex/regcomp.c +++ b/src/backend/regex/regcomp.c @@ -43,7 +43,7 @@ static int freev(struct vars *, int); static void makesearch(struct vars *, struct nfa *); static struct subre *parse(struct vars *, int, int, struct state *, struct state *); static struct subre *parsebranch(struct vars *, int, int, struct state *, struct state *, int); -static void parseqatom(struct vars *, int, int, struct state *, struct state *, struct subre *); +static struct subre *parseqatom(struct vars *, int, int, struct state *, struct state *, struct subre *); static void nonword(struct vars *, int, struct state *, struct state *); static void word(struct vars *, int, struct state *, struct state *); static void charclass(struct vars *, enum char_classes, @@ -756,7 +756,7 @@ parsebranch(struct vars *v, seencontent = 1; /* NB, recursion in parseqatom() may swallow rest of branch */ - parseqatom(v, stopper, type, lp, right, t); + t = parseqatom(v, stopper, type, lp, right, t); NOERRN(); } @@ -777,8 +777,12 @@ parsebranch(struct vars *v, * The bookkeeping near the end cooperates very closely with parsebranch(); * in particular, it contains a recursion that can involve parsing the rest * of the branch, making this function's name somewhat inaccurate. + * + * Usually, the return value is just "top", but in some cases where we + * have parsed the rest of the branch, we may deem "top" redundant and + * free it, returning some child subre instead. */ -static void +static struct subre * parseqatom(struct vars *v, int stopper, /* EOS or ')' */ int type, /* LACON (lookaround subRE) or PLAIN */ @@ -818,84 +822,84 @@ parseqatom(struct vars *v, if (v->cflags & REG_NLANCH) ARCV(BEHIND, v->nlcolor); NEXT(); - return; + return top; break; case '$': ARCV('$', 1); if (v->cflags & REG_NLANCH) ARCV(AHEAD, v->nlcolor); NEXT(); - return; + return top; break; case SBEGIN: ARCV('^', 1); /* BOL */ ARCV('^', 0); /* or BOS */ NEXT(); - return; + return top; break; case SEND: ARCV('$', 1); /* EOL */ ARCV('$', 0); /* or EOS */ NEXT(); - return; + return top; break; case '<': wordchrs(v); s = newstate(v->nfa); - NOERR(); + NOERRN(); nonword(v, BEHIND, lp, s); word(v, AHEAD, s, rp); NEXT(); - return; + return top; break; case '>': wordchrs(v); s = newstate(v->nfa); - NOERR(); + NOERRN(); word(v, BEHIND, lp, s); nonword(v, AHEAD, s, rp); NEXT(); - return; + return top; break; case WBDRY: wordchrs(v); s = newstate(v->nfa); - NOERR(); + NOERRN(); nonword(v, BEHIND, lp, s); word(v, AHEAD, s, rp); s = newstate(v->nfa); - NOERR(); + NOERRN(); word(v, BEHIND, lp, s); nonword(v, AHEAD, s, rp); NEXT(); - return; + return top; break; case NWBDRY: wordchrs(v); s = newstate(v->nfa); - NOERR(); + NOERRN(); word(v, BEHIND, lp, s); word(v, AHEAD, s, rp); s = newstate(v->nfa); - NOERR(); + NOERRN(); nonword(v, BEHIND, lp, s); nonword(v, AHEAD, s, rp); NEXT(); - return; + return top; break; case LACON: /* lookaround constraint */ latype = v->nextvalue; NEXT(); s = newstate(v->nfa); s2 = newstate(v->nfa); - NOERR(); + NOERRN(); t = parse(v, ')', LACON, s, s2); freesubre(v, t); /* internal structure irrelevant */ - NOERR(); + NOERRN(); assert(SEE(')')); NEXT(); processlacon(v, s, s2, latype, lp, rp); - return; + return top; break; /* then errors, to get them out of the way */ case '*': @@ -903,18 +907,18 @@ parseqatom(struct vars *v, case '?': case '{': ERR(REG_BADRPT); - return; + return top; break; default: ERR(REG_ASSERT); - return; + return top; break; /* then plain characters, and minor variants on that theme */ case ')': /* unbalanced paren */ if ((v->cflags & REG_ADVANCED) != REG_EXTENDED) { ERR(REG_EPAREN); - return; + return top; } /* legal in EREs due to specification botch */ NOTE(REG_UPBOTCH); @@ -923,7 +927,7 @@ parseqatom(struct vars *v, case PLAIN: onechr(v, v->nextvalue, lp, rp); okcolors(v->nfa, v->cm); - NOERR(); + NOERRN(); NEXT(); break; case '[': @@ -972,14 +976,14 @@ parseqatom(struct vars *v, */ s = newstate(v->nfa); s2 = newstate(v->nfa); - NOERR(); + NOERRN(); EMPTYARC(lp, s); EMPTYARC(s2, rp); - NOERR(); + NOERRN(); atom = parse(v, ')', type, s, s2); assert(SEE(')') || ISERR()); NEXT(); - NOERR(); + NOERRN(); if (cap) { assert(v->subs[subno] == NULL); @@ -994,7 +998,7 @@ parseqatom(struct vars *v, { /* generate no-op wrapper node to handle "((x))" */ t = subre(v, '(', atom->flags | CAP, lp, rp); - NOERR(); + NOERRN(); t->capno = subno; t->child = atom; atom = t; @@ -1006,10 +1010,10 @@ parseqatom(struct vars *v, INSIST(type != LACON, REG_ESUBREG); INSIST(v->nextvalue < v->nsubs, REG_ESUBREG); INSIST(v->subs[v->nextvalue] != NULL, REG_ESUBREG); - NOERR(); + NOERRN(); assert(v->nextvalue > 0); atom = subre(v, 'b', BACKR, lp, rp); - NOERR(); + NOERRN(); subno = v->nextvalue; atom->backno = subno; EMPTYARC(lp, rp); /* temporarily, so there's something */ @@ -1050,7 +1054,7 @@ parseqatom(struct vars *v, if (m > n) { ERR(REG_BADBR); - return; + return top; } /* {m,n} exercises preference, even if it's {m,m} */ qprefer = (v->nextvalue) ? LONGER : SHORTER; @@ -1064,7 +1068,7 @@ parseqatom(struct vars *v, if (!SEE('}')) { /* catches errors too */ ERR(REG_BADBR); - return; + return top; } NEXT(); break; @@ -1083,7 +1087,7 @@ parseqatom(struct vars *v, v->subs[subno] = NULL; delsub(v->nfa, lp, rp); EMPTYARC(lp, rp); - return; + return top; } /* if not a messy case, avoid hard part */ @@ -1096,7 +1100,7 @@ parseqatom(struct vars *v, if (atom != NULL) freesubre(v, atom); top->flags = f; - return; + return top; } /* @@ -1110,7 +1114,7 @@ parseqatom(struct vars *v, if (atom == NULL) { atom = subre(v, '=', 0, lp, rp); - NOERR(); + NOERRN(); } /*---------- @@ -1131,20 +1135,20 @@ parseqatom(struct vars *v, */ s = newstate(v->nfa); /* first, new endpoints for the atom */ s2 = newstate(v->nfa); - NOERR(); + NOERRN(); moveouts(v->nfa, lp, s); moveins(v->nfa, rp, s2); - NOERR(); + NOERRN(); atom->begin = s; atom->end = s2; s = newstate(v->nfa); /* set up starting state */ - NOERR(); + NOERRN(); EMPTYARC(lp, s); - NOERR(); + NOERRN(); /* break remaining subRE into x{...} and what follows */ t = subre(v, '.', COMBINE(qprefer, atom->flags), lp, rp); - NOERR(); + NOERRN(); t->child = atom; atomp = &t->child; @@ -1163,7 +1167,7 @@ parseqatom(struct vars *v, */ assert(top->op == '=' && top->child == NULL); top->child = subre(v, '=', top->flags, top->begin, lp); - NOERR(); + NOERRN(); top->op = '.'; top->child->sibling = t; /* top->flags will get updated later */ @@ -1182,11 +1186,11 @@ parseqatom(struct vars *v, */ dupnfa(v->nfa, v->subs[subno]->begin, v->subs[subno]->end, atom->begin, atom->end); - NOERR(); + NOERRN(); /* The backref node's NFA should not enforce any constraints */ removeconstraints(v->nfa, atom->begin, atom->end); - NOERR(); + NOERRN(); } /* @@ -1226,7 +1230,7 @@ parseqatom(struct vars *v, repeat(v, atom->begin, atom->end, m, n); f = COMBINE(qprefer, atom->flags); t = subre(v, '=', f, atom->begin, atom->end); - NOERR(); + NOERRN(); freesubre(v, atom); *atomp = t; /* rest of branch can be strung starting from t->end */ @@ -1247,9 +1251,9 @@ parseqatom(struct vars *v, repeat(v, s, atom->begin, m - 1, (n == DUPINF) ? n : n - 1); f = COMBINE(qprefer, atom->flags); t = subre(v, '.', f, s, atom->end); /* prefix and atom */ - NOERR(); + NOERRN(); t->child = subre(v, '=', PREF(f), s, atom->begin); - NOERR(); + NOERRN(); t->child->sibling = atom; *atomp = t; /* rest of branch can be strung starting from atom->end */ @@ -1259,14 +1263,14 @@ parseqatom(struct vars *v, { /* general case: need an iteration node */ s2 = newstate(v->nfa); - NOERR(); + NOERRN(); moveouts(v->nfa, atom->end, s2); - NOERR(); + NOERRN(); dupnfa(v->nfa, atom->begin, atom->end, s, s2); repeat(v, s, s2, m, n); f = COMBINE(qprefer, atom->flags); t = subre(v, '*', f, s, s2); - NOERR(); + NOERRN(); t->min = (short) m; t->max = (short) n; t->child = atom; @@ -1280,7 +1284,7 @@ parseqatom(struct vars *v, { /* parse all the rest of the branch, and insert in t->child->sibling */ t->child->sibling = parsebranch(v, stopper, type, s2, rp, 1); - NOERR(); + NOERRN(); assert(SEE('|') || SEE(stopper) || SEE(EOS)); /* here's the promised update of the flags */ @@ -1299,9 +1303,7 @@ parseqatom(struct vars *v, * * If the messy atom was the first thing in the branch, then * top->child is vacuous and we can get rid of one level of - * concatenation. Since the caller is holding a pointer to the top - * node, we can't remove that node; but we're allowed to change its - * properties. + * concatenation. */ assert(top->child->op == '='); if (top->child->begin == top->child->end) @@ -1351,21 +1353,13 @@ parseqatom(struct vars *v, { assert(!MESSY(top->child->flags)); t = top->child->sibling; - freesubre(v, top->child); - top->op = t->op; - top->flags = t->flags; - top->latype = t->latype; - top->id = t->id; - top->capno = t->capno; - top->backno = t->backno; - top->min = t->min; - top->max = t->max; - top->child = t->child; - top->begin = t->begin; - top->end = t->end; - freesrnode(v, t); + top->child->sibling = NULL; + freesubre(v, top); + top = t; } } + + return top; } /* @@ -2109,7 +2103,9 @@ freesrnode(struct vars *v, /* might be NULL */ if (!NULLCNFA(sr->cnfa)) freecnfa(&sr->cnfa); - sr->flags = 0; + sr->flags = 0; /* in particular, not INUSE */ + sr->child = sr->sibling = NULL; + sr->begin = sr->end = NULL; if (v != NULL && v->treechain != NULL) { diff --git a/src/test/modules/test_regex/expected/test_regex.out b/src/test/modules/test_regex/expected/test_regex.out index 01d50ec1e3..44da7d2019 100644 --- a/src/test/modules/test_regex/expected/test_regex.out +++ b/src/test/modules/test_regex/expected/test_regex.out @@ -3468,6 +3468,14 @@ select * from test_regex(' TO (([a-z0-9._]+|"([^"]+|"")+")+)', 'asd TO foo', 'M' {" TO foo",foo,o,NULL} (2 rows) +-- expectMatch 21.36 RPQ ((.))(\2){0} xy x x x {} +select * from test_regex('((.))(\2){0}', 'xy', 'RPQ'); + test_regex +-------------------------------------------- + {3,REG_UBACKREF,REG_UBOUNDS,REG_UNONPOSIX} + {x,x,x,NULL} +(2 rows) + -- doing 22 "multicharacter collating elements" -- # again ugh -- MCCEs are not implemented in Postgres, so we skip all these tests diff --git a/src/test/modules/test_regex/sql/test_regex.sql b/src/test/modules/test_regex/sql/test_regex.sql index 7f5bc6e418..9224fdfdd3 100644 --- a/src/test/modules/test_regex/sql/test_regex.sql +++ b/src/test/modules/test_regex/sql/test_regex.sql @@ -1009,6 +1009,8 @@ select * from test_regex('(.*).*', 'abc', 'N'); select * from test_regex('(a*)*', 'bc', 'N'); -- expectMatch 21.35 M { TO (([a-z0-9._]+|"([^"]+|"")+")+)} {asd TO foo} { TO foo} foo o {} select * from test_regex(' TO (([a-z0-9._]+|"([^"]+|"")+")+)', 'asd TO foo', 'M'); +-- expectMatch 21.36 RPQ ((.))(\2){0} xy x x x {} +select * from test_regex('((.))(\2){0}', 'xy', 'RPQ'); -- doing 22 "multicharacter collating elements" -- # again ugh