Fix performance issue in new regex match-all detection code.

Commit 824bf7190 introduced a new search of the NFAs generated by regex compilation. I failed to think hard about the performance characteristics of that search, with the predictable outcome that it's bad: weird regexes can trigger exponential search time. Worse, there's no check-for-interrupt in that code, so you can't even cancel the query if this happens. Fix by introducing memo-ization of the search results, so that any one NFA state need be examined in detail just once. This potentially uses a lot of memory, but we can bound the memory usage by putting a limit on the number of states for which we'll try to prove match-all-ness. That is sane because we already have a limit (DUPINF) on the maximum finite string length that a matchall regex can match; and patterns that involve much more than DUPINF states would probably exceed that limit anyway. Also, rearrange the logic so that we check the basic is-the-graph- all-RAINBOW-arcs property before we start the recursive search to determine path lengths. This will ensure that we fall out quickly whenever the NFA couldn't possibly be matchall. Also stick in a check-for-interrupt, just in case these measures don't completely eliminate the risk of slowness. Discussion: https://postgr.es/m/3483895.1619898362@sss.pgh.pa.us
2021-05-03 11:42:31 -04:00 · 2021-05-03 11:42:31 -04:00 · f68970e33f
commit f68970e33f
parent b94409a02f
4 changed files with 378 additions and 175 deletions
--- a/src/backend/regex/regc_nfa.c
+++ b/src/backend/regex/regc_nfa.c
@ -3032,96 +3032,189 @@ analyze(struct nfa *nfa)
 static void
 checkmatchall(struct nfa *nfa)
 {
-	bool		hasmatch[DUPINF + 1];
-	int			minmatch,
-				maxmatch,
-				morematch;
+	bool	  **haspaths;
+	struct state *s;
+	int			i;

 	/*
-	 * hasmatch[i] will be set true if a match of length i is feasible, for i
-	 * from 0 to DUPINF-1.  hasmatch[DUPINF] will be set true if every match
-	 * length of DUPINF or more is feasible.
+	 * If there are too many states, don't bother trying to detect matchall.
+	 * This limit serves to bound the time and memory we could consume below.
+	 * Note that even if the graph is all-RAINBOW, if there are significantly
+	 * more than DUPINF states then it's likely that there are paths of length
+	 * more than DUPINF, which would force us to fail anyhow.  In practice,
+	 * plausible ways of writing a matchall regex with maximum finite path
+	 * length K tend not to have very many more than K states.
 	 */
-	memset(hasmatch, 0, sizeof(hasmatch));
+	if (nfa->nstates > DUPINF * 2)
+		return;
+
+	/*
+	 * First, scan all the states to verify that only RAINBOW arcs appear,
+	 * plus pseudocolor arcs adjacent to the pre and post states.  This lets
+	 * us quickly eliminate most cases that aren't matchall NFAs.
+	 */
+	for (s = nfa->states; s != NULL; s = s->next)
+	{
+		struct arc *a;
+
+		for (a = s->outs; a != NULL; a = a->outchain)
+		{
+			if (a->type != PLAIN)
+				return;			/* any LACONs make it non-matchall */
+			if (a->co != RAINBOW)
+			{
+				if (nfa->cm->cd[a->co].flags & PSEUDO)
+				{
+					/*
+					 * Pseudocolor arc: verify it's in a valid place (this
+					 * seems quite unlikely to fail, but let's be sure).
+					 */
+					if (s == nfa->pre &&
+						(a->co == nfa->bos[0] || a->co == nfa->bos[1]))
+						 /* okay BOS/BOL arc */ ;
+					else if (a->to == nfa->post &&
+							 (a->co == nfa->eos[0] || a->co == nfa->eos[1]))
+						 /* okay EOS/EOL arc */ ;
+					else
+						return; /* unexpected pseudocolor arc */
+					/* We'll check these arcs some more below. */
+				}
+				else
+					return;		/* any other color makes it non-matchall */
+			}
+		}
+		/* Also, assert that the tmp fields are available for use. */
+		assert(s->tmp == NULL);
+	}
+
+	/*
+	 * The next cheapest check we can make is to verify that the BOS/BOL
+	 * outarcs of the pre state reach the same states as its RAINBOW outarcs.
+	 * If they don't, the NFA expresses some constraints on the character
+	 * before the matched string, making it non-matchall.  Likewise, the
+	 * EOS/EOL inarcs of the post state must match its RAINBOW inarcs.
+	 */
+	if (!check_out_colors_match(nfa->pre, RAINBOW, nfa->bos[0]) ||
+		!check_out_colors_match(nfa->pre, RAINBOW, nfa->bos[1]) ||
+		!check_in_colors_match(nfa->post, RAINBOW, nfa->eos[0]) ||
+		!check_in_colors_match(nfa->post, RAINBOW, nfa->eos[1]))
+		return;
+
+	/*
+	 * Initialize an array of path-length arrays, in which
+	 * checkmatchall_recurse will return per-state results.  This lets us
+	 * memo-ize the recursive search and avoid exponential time consumption.
+	 */
+	haspaths = (bool **) MALLOC(nfa->nstates * sizeof(bool *));
+	if (haspaths == NULL)
+		return;					/* fail quietly */
+	memset(haspaths, 0, nfa->nstates * sizeof(bool *));

 	/*
 	 * Recursively search the graph for all-RAINBOW paths to the "post" state,
-	 * starting at the "pre" state.  The -1 initial depth accounts for the
-	 * fact that transitions out of the "pre" state are not part of the
-	 * matched string.  We likewise don't count the final transition to the
-	 * "post" state as part of the match length.  (But we still insist that
-	 * those transitions have RAINBOW arcs, otherwise there are lookbehind or
-	 * lookahead constraints at the start/end of the pattern.)
+	 * starting at the "pre" state, and computing the lengths of the paths.
+	 * (Given the preceding checks, there should be at least one such path.
+	 * However we could get back a false result anyway, in case there are
+	 * multi-state loops, paths exceeding DUPINF+1 length, or non-algorithmic
+	 * failures such as ENOMEM.)
 	 */
-	if (!checkmatchall_recurse(nfa, nfa->pre, false, -1, hasmatch))
-		return;
+	if (checkmatchall_recurse(nfa, nfa->pre, haspaths))
+	{
+		/* The useful result is the path length array for the pre state */
+		bool	   *haspath = haspaths[nfa->pre->no];
+		int			minmatch,
+					maxmatch,
+					morematch;

-	/*
-	 * We found some all-RAINBOW paths, and not anything that we couldn't
-	 * handle.  Now verify that pseudocolor arcs adjacent to the pre and post
-	 * states match the RAINBOW arcs there.  (We could do this while
-	 * recursing, but it's expensive and unlikely to fail, so do it last.)
-	 */
-	if (!check_out_colors_match(nfa->pre, RAINBOW, nfa->bos[0]) ||
-		!check_out_colors_match(nfa->pre, nfa->bos[0], RAINBOW) ||
-		!check_out_colors_match(nfa->pre, RAINBOW, nfa->bos[1]) ||
-		!check_out_colors_match(nfa->pre, nfa->bos[1], RAINBOW))
-		return;
-	if (!check_in_colors_match(nfa->post, RAINBOW, nfa->eos[0]) ||
-		!check_in_colors_match(nfa->post, nfa->eos[0], RAINBOW) ||
-		!check_in_colors_match(nfa->post, RAINBOW, nfa->eos[1]) ||
-		!check_in_colors_match(nfa->post, nfa->eos[1], RAINBOW))
-		return;
+		assert(haspath != NULL);

-	/*
-	 * hasmatch[] now represents the set of possible match lengths; but we
-	 * want to reduce that to a min and max value, because it doesn't seem
-	 * worth complicating regexec.c to deal with nonconsecutive possible match
-	 * lengths.  Find min and max of first run of lengths, then verify there
-	 * are no nonconsecutive lengths.
-	 */
-	for (minmatch = 0; minmatch <= DUPINF; minmatch++)
-	{
-		if (hasmatch[minmatch])
-			break;
-	}
-	assert(minmatch <= DUPINF); /* else checkmatchall_recurse lied */
-	for (maxmatch = minmatch; maxmatch < DUPINF; maxmatch++)
-	{
-		if (!hasmatch[maxmatch + 1])
-			break;
-	}
-	for (morematch = maxmatch + 1; morematch <= DUPINF; morematch++)
-	{
-		if (hasmatch[morematch])
-			return;				/* fail, there are nonconsecutive lengths */
+		/*
+		 * haspath[] now represents the set of possible path lengths; but we
+		 * want to reduce that to a min and max value, because it doesn't seem
+		 * worth complicating regexec.c to deal with nonconsecutive possible
+		 * match lengths.  Find min and max of first run of lengths, then
+		 * verify there are no nonconsecutive lengths.
+		 */
+		for (minmatch = 0; minmatch <= DUPINF + 1; minmatch++)
+		{
+			if (haspath[minmatch])
+				break;
+		}
+		assert(minmatch <= DUPINF + 1); /* else checkmatchall_recurse lied */
+		for (maxmatch = minmatch; maxmatch < DUPINF + 1; maxmatch++)
+		{
+			if (!haspath[maxmatch + 1])
+				break;
+		}
+		for (morematch = maxmatch + 1; morematch <= DUPINF + 1; morematch++)
+		{
+			if (haspath[morematch])
+			{
+				haspath = NULL; /* fail, there are nonconsecutive lengths */
+				break;
+			}
+		}
+
+		if (haspath != NULL)
+		{
+			/*
+			 * Success, so record the info.  Here we have a fine point: the
+			 * path length from the pre state includes the pre-to-initial
+			 * transition, so it's one more than the actually matched string
+			 * length.  (We avoided counting the final-to-post transition
+			 * within checkmatchall_recurse, but not this one.)  This is why
+			 * checkmatchall_recurse allows one more level of path length than
+			 * might seem necessary.  This decrement also takes care of
+			 * converting checkmatchall_recurse's definition of "infinity" as
+			 * "DUPINF+1" to our normal representation as "DUPINF".
+			 */
+			assert(minmatch > 0);	/* else pre and post states were adjacent */
+			nfa->minmatchall = minmatch - 1;
+			nfa->maxmatchall = maxmatch - 1;
+			nfa->flags |= MATCHALL;
+		}
 	}

-	/* Success, so record the info */
-	nfa->minmatchall = minmatch;
-	nfa->maxmatchall = maxmatch;
-	nfa->flags |= MATCHALL;
+	/* Clean up */
+	for (i = 0; i < nfa->nstates; i++)
+	{
+		if (haspaths[i] != NULL)
+			FREE(haspaths[i]);
+	}
+	FREE(haspaths);
 }

 /*
 * checkmatchall_recurse - recursive search for checkmatchall
 *
- * s is the current state
- * foundloop is true if any predecessor state has a loop-to-self
- * depth is the current recursion depth (starting at -1)
- * hasmatch[] is the output area for recording feasible match lengths
+ * s is the state to be examined in this recursion level.
+ * haspaths[] is an array of per-state exit path length arrays.
 *
- * We return true if there is at least one all-RAINBOW path to the "post"
- * state and no non-matchall paths; otherwise false.  Note we assume that
- * any dead-end paths have already been removed, else we might return
- * false unnecessarily.
+ * We return true if the search was performed successfully, false if
+ * we had to fail because of multi-state loops or other internal reasons.
+ * (Because "dead" states that can't reach the post state have been
+ * eliminated, and we already verified that only RAINBOW and matching
+ * pseudocolor arcs exist, every state should have RAINBOW path(s) to
+ * the post state.  Hence we take a false result from recursive calls
+ * as meaning that we'd better fail altogether, not just that that
+ * particular state can't reach the post state.)
+ *
+ * On success, we store a malloc'd result array in haspaths[s->no],
+ * showing the possible path lengths from s to the post state.
+ * Each state's haspath[] array is of length DUPINF+2.  The entries from
+ * k = 0 to DUPINF are true if there is an all-RAINBOW path of length k
+ * from this state to the string end.  haspath[DUPINF+1] is true if all
+ * path lengths >= DUPINF+1 are possible.  (Situations that cannot be
+ * represented under these rules cause failure.)
+ *
+ * checkmatchall is responsible for eventually freeing the haspath[] arrays.
 */
 static bool
-checkmatchall_recurse(struct nfa *nfa, struct state *s,
-					  bool foundloop, int depth,
-					  bool *hasmatch)
+checkmatchall_recurse(struct nfa *nfa, struct state *s, bool **haspaths)
 {
 	bool		result = false;
+	bool		foundloop = false;
+	bool	   *haspath;
 	struct arc *a;

 	/*
@ -3131,61 +3224,20 @@ checkmatchall_recurse(struct nfa *nfa, struct state *s,
 	if (STACK_TOO_DEEP(nfa->v->re))
 		return false;

-	/*
-	 * Likewise, if we get to a depth too large to represent correctly in
-	 * maxmatchall, fail quietly.
-	 */
-	if (depth >= DUPINF)
-		return false;
-
-	/*
-	 * Scan the outarcs to detect cases we can't handle, and to see if there
-	 * is a loop-to-self here.  We need to know about any such loop before we
-	 * recurse, so it's hard to avoid making two passes over the outarcs.  In
-	 * any case, checking for showstoppers before we recurse is probably best.
-	 */
-	for (a = s->outs; a != NULL; a = a->outchain)
+	/* In case the search takes a long time, check for cancel */
+	if (CANCEL_REQUESTED(nfa->v->re))
 	{
-		if (a->type != PLAIN)
-			return false;		/* any LACONs make it non-matchall */
-		if (a->co != RAINBOW)
-		{
-			if (nfa->cm->cd[a->co].flags & PSEUDO)
-			{
-				/*
-				 * Pseudocolor arc: verify it's in a valid place (this seems
-				 * quite unlikely to fail, but let's be sure).
-				 */
-				if (s == nfa->pre &&
-					(a->co == nfa->bos[0] || a->co == nfa->bos[1]))
-					 /* okay BOS/BOL arc */ ;
-				else if (a->to == nfa->post &&
-						 (a->co == nfa->eos[0] || a->co == nfa->eos[1]))
-					 /* okay EOS/EOL arc */ ;
-				else
-					return false;	/* unexpected pseudocolor arc */
-				/* We'll finish checking these arcs after the recursion */
-				continue;
-			}
-			return false;		/* any other color makes it non-matchall */
-		}
-		if (a->to == s)
-		{
-			/*
-			 * We found a cycle of length 1, so remember that to pass down to
-			 * successor states.  (It doesn't matter if there was also such a
-			 * loop at a predecessor state.)
-			 */
-			foundloop = true;
-		}
-		else if (a->to->tmp)
-		{
-			/* We found a cycle of length > 1, so fail. */
-			return false;
-		}
+		NERR(REG_CANCEL);
+		return false;
 	}

-	/* We need to recurse, so mark state as under consideration */
+	/* Create a haspath array for this state */
+	haspath = (bool *) MALLOC((DUPINF + 2) * sizeof(bool));
+	if (haspath == NULL)
+		return false;			/* again, treat as non-matchall */
+	memset(haspath, 0, (DUPINF + 2) * sizeof(bool));
+
+	/* Mark this state as being visited */
 	assert(s->tmp == NULL);
 	s->tmp = s;

@ -3197,95 +3249,202 @@ checkmatchall_recurse(struct nfa *nfa, struct state *s,
 		{
 			/* We found an all-RAINBOW path to the post state */
 			result = true;
-			/* ... which should not be adjacent to the pre state */
-			if (depth < 0)
-			{
-				NERR(REG_ASSERT);
-				return false;
-			}
-			/* Record potential match lengths */
-			hasmatch[depth] = true;
-			if (foundloop)
-			{
-				/* A predecessor loop makes all larger lengths match, too */
-				int			i;

-				for (i = depth + 1; i <= DUPINF; i++)
-					hasmatch[i] = true;
-			}
+			/*
+			 * Mark this state as being zero steps away from the string end
+			 * (the transition to the post state isn't counted).
+			 */
+			haspath[0] = true;
 		}
-		else if (a->to != s)
+		else if (a->to == s)
 		{
-			/* This is a new path forward; recurse to investigate */
-			result = checkmatchall_recurse(nfa, a->to,
-										   foundloop, depth + 1,
-										   hasmatch);
-			/* Fail if any recursive path fails */
-			if (!result)
+			/* We found a cycle of length 1, which we'll deal with below. */
+			foundloop = true;
+		}
+		else if (a->to->tmp != NULL)
+		{
+			/* It's busy, so we found a cycle of length > 1, so fail. */
+			result = false;
+			break;
+		}
+		else
+		{
+			/* Consider paths forward through this to-state. */
+			bool	   *nexthaspath;
+			int			i;
+
+			/* If to-state was not already visited, recurse */
+			if (haspaths[a->to->no] == NULL)
+			{
+				result = checkmatchall_recurse(nfa, a->to, haspaths);
+				/* Fail if any recursive path fails */
+				if (!result)
+					break;
+			}
+			else
+			{
+				/* The previous visit must have found path(s) to the end */
+				result = true;
+			}
+			assert(a->to->tmp == NULL);
+			nexthaspath = haspaths[a->to->no];
+			assert(nexthaspath != NULL);
+
+			/*
+			 * Now, for every path of length i from a->to to the string end,
+			 * there is a path of length i + 1 from s to the string end.
+			 */
+			if (nexthaspath[DUPINF] != nexthaspath[DUPINF + 1])
+			{
+				/*
+				 * a->to has a path of length exactly DUPINF, but not longer;
+				 * or it has paths of all lengths > DUPINF but not one of
+				 * exactly that length.  In either case, we cannot represent
+				 * the possible path lengths from s correctly, so fail.
+				 */
+				result = false;
 				break;
+			}
+			/* Merge knowledge of these path lengths into what we have */
+			for (i = 0; i < DUPINF; i++)
+				haspath[i + 1] |= nexthaspath[i];
+			/* Infinity + 1 is still infinity */
+			haspath[DUPINF + 1] |= nexthaspath[DUPINF + 1];
 		}
 	}

+	if (result && foundloop)
+	{
+		/*
+		 * If there is a length-1 loop at this state, then find the shortest
+		 * known path length to the end.  The loop means that every larger
+		 * path length is possible, too.  (It doesn't matter whether any of
+		 * the longer lengths were already known possible.)
+		 */
+		int			i;
+
+		for (i = 0; i <= DUPINF; i++)
+		{
+			if (haspath[i])
+				break;
+		}
+		for (i++; i <= DUPINF + 1; i++)
+			haspath[i] = true;
+	}
+
+	/* Report out the completed path length map */
+	assert(s->no < nfa->nstates);
+	assert(haspaths[s->no] == NULL);
+	haspaths[s->no] = haspath;
+
+	/* Mark state no longer busy */
 	s->tmp = NULL;
+
 	return result;
 }

 /*
 * check_out_colors_match - subroutine for checkmatchall
 *
- * Check if every s outarc of color co1 has a matching outarc of color co2.
- * (checkmatchall_recurse already verified that all of the outarcs are PLAIN,
- * so we need not examine arc types here.  Also, since it verified that there
- * are only RAINBOW and pseudocolor arcs, there shouldn't be enough arcs for
- * this brute-force O(N^2) implementation to cause problems.)
+ * Check whether the set of states reachable from s by arcs of color co1
+ * is equivalent to the set reachable by arcs of color co2.
+ * checkmatchall already verified that all of the NFA's arcs are PLAIN,
+ * so we need not examine arc types here.
 */
 static bool
 check_out_colors_match(struct state *s, color co1, color co2)
 {
-	struct arc *a1;
-	struct arc *a2;
+	bool		result = true;
+	struct arc *a;

-	for (a1 = s->outs; a1 != NULL; a1 = a1->outchain)
+	/*
+	 * To do this in linear time, we assume that the NFA contains no duplicate
+	 * arcs.  Run through the out-arcs, marking states reachable by arcs of
+	 * color co1.  Run through again, un-marking states reachable by arcs of
+	 * color co2; if we see a not-marked state, we know this co2 arc is
+	 * unmatched.  Then run through again, checking for still-marked states,
+	 * and in any case leaving all the tmp fields reset to NULL.
+	 */
+	for (a = s->outs; a != NULL; a = a->outchain)
 	{
-		if (a1->co != co1)
-			continue;
-		for (a2 = s->outs; a2 != NULL; a2 = a2->outchain)
+		if (a->co == co1)
 		{
-			if (a2->co == co2 && a2->to == a1->to)
-				break;
+			assert(a->to->tmp == NULL);
+			a->to->tmp = a->to;
 		}
-		if (a2 == NULL)
-			return false;
 	}
-	return true;
+	for (a = s->outs; a != NULL; a = a->outchain)
+	{
+		if (a->co == co2)
+		{
+			if (a->to->tmp != NULL)
+				a->to->tmp = NULL;
+			else
+				result = false; /* unmatched co2 arc */
+		}
+	}
+	for (a = s->outs; a != NULL; a = a->outchain)
+	{
+		if (a->co == co1)
+		{
+			if (a->to->tmp != NULL)
+			{
+				result = false; /* unmatched co1 arc */
+				a->to->tmp = NULL;
+			}
+		}
+	}
+	return result;
 }

 /*
 * check_in_colors_match - subroutine for checkmatchall
 *
- * Check if every s inarc of color co1 has a matching inarc of color co2.
- * (For paranoia's sake, ignore any non-PLAIN arcs here.  But we're still
- * not expecting very many arcs.)
+ * Check whether the set of states that can reach s by arcs of color co1
+ * is equivalent to the set that can reach s by arcs of color co2.
+ * checkmatchall already verified that all of the NFA's arcs are PLAIN,
+ * so we need not examine arc types here.
 */
 static bool
 check_in_colors_match(struct state *s, color co1, color co2)
 {
-	struct arc *a1;
-	struct arc *a2;
+	bool		result = true;
+	struct arc *a;

-	for (a1 = s->ins; a1 != NULL; a1 = a1->inchain)
+	/*
+	 * Identical algorithm to check_out_colors_match, except examine the
+	 * from-states of s' inarcs.
+	 */
+	for (a = s->ins; a != NULL; a = a->inchain)
 	{
-		if (a1->type != PLAIN || a1->co != co1)
-			continue;
-		for (a2 = s->ins; a2 != NULL; a2 = a2->inchain)
+		if (a->co == co1)
 		{
-			if (a2->type == PLAIN && a2->co == co2 && a2->from == a1->from)
-				break;
+			assert(a->from->tmp == NULL);
+			a->from->tmp = a->from;
 		}
-		if (a2 == NULL)
-			return false;
 	}
-	return true;
+	for (a = s->ins; a != NULL; a = a->inchain)
+	{
+		if (a->co == co2)
+		{
+			if (a->from->tmp != NULL)
+				a->from->tmp = NULL;
+			else
+				result = false; /* unmatched co2 arc */
+		}
+	}
+	for (a = s->ins; a != NULL; a = a->inchain)
+	{
+		if (a->co == co1)
+		{
+			if (a->from->tmp != NULL)
+			{
+				result = false; /* unmatched co1 arc */
+				a->from->tmp = NULL;
+			}
+		}
+	}
+	return result;
 }

 /*
--- a/src/backend/regex/regcomp.c
+++ b/src/backend/regex/regcomp.c
@ -182,8 +182,7 @@ static void markreachable(struct nfa *, struct state *, struct state *, struct s
 static void markcanreach(struct nfa *, struct state *, struct state *, struct state *);
 static long analyze(struct nfa *);
 static void checkmatchall(struct nfa *);
-static bool checkmatchall_recurse(struct nfa *, struct state *,
-								  bool, int, bool *);
+static bool checkmatchall_recurse(struct nfa *, struct state *, bool **);
 static bool check_out_colors_match(struct state *, color, color);
 static bool check_in_colors_match(struct state *, color, color);
 static void compact(struct nfa *, struct cnfa *);
--- a/src/test/regress/expected/regex.out
+++ b/src/test/regress/expected/regex.out
@ -567,6 +567,43 @@ select 'a' ~ '()+\1';
 t
 (1 row)

+-- Add coverage for some cases in checkmatchall
+select regexp_match('xy', '.|...');
+ regexp_match 
+--------------
+ {x}
+(1 row)
+
+select regexp_match('xyz', '.|...');
+ regexp_match 
+--------------
+ {xyz}
+(1 row)
+
+select regexp_match('xy', '.*');
+ regexp_match 
+--------------
+ {xy}
+(1 row)
+
+select regexp_match('fooba', '(?:..)*');
+ regexp_match 
+--------------
+ {foob}
+(1 row)
+
+select regexp_match('xyz', repeat('.', 260));
+ regexp_match 
+--------------
+ 
+(1 row)
+
+select regexp_match('foo', '(?:.|){99}');
+ regexp_match 
+--------------
+ {foo}
+(1 row)
+
 -- Error conditions
 select 'xyz' ~ 'x(\w)(?=\1)';  -- no backrefs in LACONs
 ERROR:  invalid regular expression: invalid backreference number
--- a/src/test/regress/sql/regex.sql
+++ b/src/test/regress/sql/regex.sql
@ -135,6 +135,14 @@ select 'a' ~ '.. ()|\1';
 select 'a' ~ '()*\1';
 select 'a' ~ '()+\1';

+-- Add coverage for some cases in checkmatchall
+select regexp_match('xy', '.|...');
+select regexp_match('xyz', '.|...');
+select regexp_match('xy', '.*');
+select regexp_match('fooba', '(?:..)*');
+select regexp_match('xyz', repeat('.', 260));
+select regexp_match('foo', '(?:.|){99}');
+
 -- Error conditions
 select 'xyz' ~ 'x(\w)(?=\1)';  -- no backrefs in LACONs
 select 'xyz' ~ 'x(\w)(?=(\1))';