From 3068e45799327298a3f4c22b03db2aa48e2ab0da Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Fri, 27 Aug 2021 12:18:58 -0400 Subject: [PATCH] Handle interaction of regexp's makesearch and MATCHALL more honestly. Second thoughts about commit 824bf7190: we apply makesearch() to an NFA after having determined whether it is a MATCHALL pattern. Prepending ".*" doesn't make it non-MATCHALL, but it does change the maximum possible match length, and makesearch() failed to update that. This has no ill effects given the stylized usage of search NFAs, but it seems like it's better to keep the data structure consistent. In particular, fixing this allows more honest handling of the MATCHALL check in matchuntil(): we can now assert that maxmatchall is infinity, instead of lamely assuming that it should act that way. In passing, improve the code in dump[c]nfa so that infinite maxmatchall is printed as "inf" not a magic number. --- src/backend/regex/regc_nfa.c | 18 ++++++++++++++---- src/backend/regex/regcomp.c | 7 +++++++ src/backend/regex/rege_dfa.c | 8 ++------ 3 files changed, 23 insertions(+), 10 deletions(-) diff --git a/src/backend/regex/regc_nfa.c b/src/backend/regex/regc_nfa.c index 6d77c59e12..0e93c74287 100644 --- a/src/backend/regex/regc_nfa.c +++ b/src/backend/regex/regc_nfa.c @@ -3602,8 +3602,13 @@ dumpnfa(struct nfa *nfa, if (nfa->flags & HASLACONS) fprintf(f, ", haslacons"); if (nfa->flags & MATCHALL) - fprintf(f, ", minmatchall %d, maxmatchall %d", - nfa->minmatchall, nfa->maxmatchall); + { + fprintf(f, ", minmatchall %d", nfa->minmatchall); + if (nfa->maxmatchall == DUPINF) + fprintf(f, ", maxmatchall inf"); + else + fprintf(f, ", maxmatchall %d", nfa->maxmatchall); + } fprintf(f, "\n"); for (s = nfa->states; s != NULL; s = s->next) { @@ -3766,8 +3771,13 @@ dumpcnfa(struct cnfa *cnfa, if (cnfa->flags & HASLACONS) fprintf(f, ", haslacons"); if (cnfa->flags & MATCHALL) - fprintf(f, ", minmatchall %d, maxmatchall %d", - cnfa->minmatchall, cnfa->maxmatchall); + { + fprintf(f, ", minmatchall %d", cnfa->minmatchall); + if (cnfa->maxmatchall == DUPINF) + fprintf(f, ", maxmatchall inf"); + else + fprintf(f, ", maxmatchall %d", cnfa->maxmatchall); + } fprintf(f, "\n"); for (st = 0; st < cnfa->nstates; st++) dumpcstate(st, cnfa, f); diff --git a/src/backend/regex/regcomp.c b/src/backend/regex/regcomp.c index bfe12eb08b..b735fa6eaf 100644 --- a/src/backend/regex/regcomp.c +++ b/src/backend/regex/regcomp.c @@ -598,6 +598,13 @@ makesearch(struct vars *v, /* and ^* and \A* too -- not always necessary, but harmless */ newarc(nfa, PLAIN, nfa->bos[0], pre, pre); newarc(nfa, PLAIN, nfa->bos[1], pre, pre); + + /* + * The pattern is still MATCHALL if it was before, but the max match + * length is now infinity. + */ + if (nfa->flags & MATCHALL) + nfa->maxmatchall = DUPINF; } /* diff --git a/src/backend/regex/rege_dfa.c b/src/backend/regex/rege_dfa.c index 1d79d73446..ba1289c64a 100644 --- a/src/backend/regex/rege_dfa.c +++ b/src/backend/regex/rege_dfa.c @@ -385,14 +385,10 @@ matchuntil(struct vars *v, { size_t nchr = probe - v->start; - /* - * It might seem that we should check maxmatchall too, but the .* at - * the front of the pattern absorbs any extra characters (and it was - * tacked on *after* computing minmatchall/maxmatchall). Thus, we - * should match if there are at least minmatchall characters. - */ if (nchr < d->cnfa->minmatchall) return 0; + /* maxmatchall will always be infinity, cf. makesearch() */ + assert(d->cnfa->maxmatchall == DUPINF); return 1; }