diff --git a/src/backend/regex/regc_color.c b/src/backend/regex/regc_color.c index 30bda0e5ad..8ae788f519 100644 --- a/src/backend/regex/regc_color.c +++ b/src/backend/regex/regc_color.c @@ -1075,9 +1075,19 @@ colorcomplement(struct nfa *nfa, assert(of != from); - /* A RAINBOW arc matches all colors, making the complement empty */ + /* + * A RAINBOW arc matches all colors, making the complement empty. But we + * can't just return without making any arcs, because that would leave the + * NFA disconnected which would break any future delsub(). Instead, make + * a CANTMATCH arc. Also set the HASCANTMATCH flag so we know we need to + * clean that up at the start of NFA optimization. + */ if (findarc(of, PLAIN, RAINBOW) != NULL) + { + newarc(nfa, CANTMATCH, 0, from, to); + nfa->flags |= HASCANTMATCH; return; + } /* Otherwise, transiently mark the colors that appear in of's out-arcs */ for (a = of->outs; a != NULL; a = a->outchain) @@ -1089,6 +1099,12 @@ colorcomplement(struct nfa *nfa, assert(!UNUSEDCOLOR(cd)); cd->flags |= COLMARK; } + + /* + * There's no syntax for re-complementing a color set, so we cannot + * see CANTMATCH arcs here. + */ + assert(a->type != CANTMATCH); } /* Scan colors, clear transient marks, add arcs for unmarked colors */ diff --git a/src/backend/regex/regc_nfa.c b/src/backend/regex/regc_nfa.c index f1819a24f6..acd2286def 100644 --- a/src/backend/regex/regc_nfa.c +++ b/src/backend/regex/regc_nfa.c @@ -1462,6 +1462,7 @@ removetraverse(struct nfa *nfa, { case PLAIN: case EMPTY: + case CANTMATCH: /* nothing to do */ break; case AHEAD: @@ -1599,6 +1600,12 @@ optimize(struct nfa *nfa, if (verbose) fprintf(f, "\ninitial cleanup:\n"); #endif + /* If we have any CANTMATCH arcs, drop them; but this is uncommon */ + if (nfa->flags & HASCANTMATCH) + { + removecantmatch(nfa); + nfa->flags &= ~HASCANTMATCH; + } cleanup(nfa); /* may simplify situation */ #ifdef REG_DEBUG if (verbose) @@ -2922,6 +2929,34 @@ clonesuccessorstates(struct nfa *nfa, } } +/* + * removecantmatch - remove CANTMATCH arcs, which are no longer useful + * once we are done with the parsing phase. (We need them only to + * preserve connectedness of NFA subgraphs during parsing.) + */ +static void +removecantmatch(struct nfa *nfa) +{ + struct state *s; + + for (s = nfa->states; s != NULL; s = s->next) + { + struct arc *a; + struct arc *nexta; + + for (a = s->outs; a != NULL; a = nexta) + { + nexta = a->outchain; + if (a->type == CANTMATCH) + { + freearc(nfa, a); + if (NISERR()) + return; + } + } + } +} + /* * cleanup - clean up NFA after optimizations */ @@ -3627,6 +3662,8 @@ dumpnfa(struct nfa *nfa, fprintf(f, ", eol [%ld]", (long) nfa->eos[1]); if (nfa->flags & HASLACONS) fprintf(f, ", haslacons"); + if (nfa->flags & HASCANTMATCH) + fprintf(f, ", hascantmatch"); if (nfa->flags & MATCHALL) { fprintf(f, ", minmatchall %d", nfa->minmatchall); @@ -3749,6 +3786,9 @@ dumparc(struct arc *a, break; case EMPTY: break; + case CANTMATCH: + fprintf(f, "X"); + break; default: fprintf(f, "0x%x/0%lo", a->type, (long) a->co); break; diff --git a/src/backend/regex/regcomp.c b/src/backend/regex/regcomp.c index 8a6cfb2973..15b264e50f 100644 --- a/src/backend/regex/regcomp.c +++ b/src/backend/regex/regcomp.c @@ -215,6 +215,7 @@ static void clonesuccessorstates(struct nfa *nfa, struct state *ssource, struct state *spredecessor, struct arc *refarc, char *curdonemap, char *outerdonemap, int nstates); +static void removecantmatch(struct nfa *nfa); static void cleanup(struct nfa *nfa); static void markreachable(struct nfa *nfa, struct state *s, struct state *okay, struct state *mark); @@ -342,6 +343,7 @@ struct vars #define BEHIND 'r' /* color-lookbehind arc */ #define WBDRY 'w' /* word boundary constraint */ #define NWBDRY 'W' /* non-word-boundary constraint */ +#define CANTMATCH 'x' /* arc that cannot match anything */ #define SBEGIN 'A' /* beginning of string (even if not BOL) */ #define SEND 'Z' /* end of string (even if not EOL) */ @@ -2368,6 +2370,7 @@ nfanode(struct vars *v, nfa = newnfa(v, v->cm, v->nfa); NOERRZ(); dupnfa(nfa, t->begin, t->end, nfa->init, nfa->final); + nfa->flags = v->nfa->flags; if (!ISERR()) specialcolors(nfa); if (!ISERR()) diff --git a/src/include/regex/regguts.h b/src/include/regex/regguts.h index 3ca3647e11..fd69299a16 100644 --- a/src/include/regex/regguts.h +++ b/src/include/regex/regguts.h @@ -410,6 +410,8 @@ struct cnfa int flags; /* bitmask of the following flags: */ #define HASLACONS 01 /* uses lookaround constraints */ #define MATCHALL 02 /* matches all strings of a range of lengths */ +#define HASCANTMATCH 04 /* contains CANTMATCH arcs */ + /* Note: HASCANTMATCH appears in nfa structs' flags, but never in cnfas */ int pre; /* setup state number */ int post; /* teardown state number */ color bos[2]; /* colors, if any, assigned to BOS and BOL */ diff --git a/src/test/modules/test_regex/expected/test_regex.out b/src/test/modules/test_regex/expected/test_regex.out index 731ba506d3..c44c717edf 100644 --- a/src/test/modules/test_regex/expected/test_regex.out +++ b/src/test/modules/test_regex/expected/test_regex.out @@ -2071,6 +2071,20 @@ select * from test_regex('[\s\S]*', '012 3456789abc_*', 'LNPE'); {"012 3456789abc_*"} (2 rows) +-- bug #18708: +select * from test_regex('(?:[^\d\D]){0}', '0123456789abc*', 'LNPQE'); + test_regex +-------------------------------------------------------------------- + {0,REG_UBOUNDS,REG_UBBS,REG_UNONPOSIX,REG_ULOCALE,REG_UEMPTYMATCH} + {""} +(2 rows) + +select * from test_regex('[^\d\D]', '0123456789abc*', 'ILPE'); + test_regex +-------------------------------------------------------- + {0,REG_UBBS,REG_UNONPOSIX,REG_ULOCALE,REG_UIMPOSSIBLE} +(1 row) + -- check char classes' handling of newlines select * from test_regex('\s+', E'abc \n def', 'LP'); test_regex diff --git a/src/test/modules/test_regex/sql/test_regex.sql b/src/test/modules/test_regex/sql/test_regex.sql index 478fa2c547..b2a847577e 100644 --- a/src/test/modules/test_regex/sql/test_regex.sql +++ b/src/test/modules/test_regex/sql/test_regex.sql @@ -619,6 +619,9 @@ select * from test_regex('[^1\D0]', 'abc0123456789*', 'LPE'); select * from test_regex('\W', '0123456789abc_*', 'LP'); select * from test_regex('[\W]', '0123456789abc_*', 'LPE'); select * from test_regex('[\s\S]*', '012 3456789abc_*', 'LNPE'); +-- bug #18708: +select * from test_regex('(?:[^\d\D]){0}', '0123456789abc*', 'LNPQE'); +select * from test_regex('[^\d\D]', '0123456789abc*', 'ILPE'); -- check char classes' handling of newlines select * from test_regex('\s+', E'abc \n def', 'LP');