diff --git a/src/backend/regex/regcomp.c b/src/backend/regex/regcomp.c
index 3d7f11af8c..4e160d54b8 100644
--- a/src/backend/regex/regcomp.c
+++ b/src/backend/regex/regcomp.c
@@ -233,6 +233,13 @@ static int	cmp(const chr *, const chr *, size_t);
 static int	casecmp(const chr *, const chr *, size_t);
 
 
+/* info we need during compilation about a known capturing subexpression */
+struct subinfo
+{
+	struct state *left;			/* left end of its sub-NFA */
+	struct state *right;		/* right end of its sub-NFA */
+};
+
 /* internal variables, bundled for easy passing around */
 struct vars
 {
@@ -245,10 +252,10 @@ struct vars
 	int			nexttype;		/* type of next token */
 	chr			nextvalue;		/* value (if any) of next token */
 	int			lexcon;			/* lexical context type (see regc_lex.c) */
-	int			nsubexp;		/* subexpression count */
-	struct subre **subs;		/* subRE pointer vector */
-	size_t		nsubs;			/* length of vector */
-	struct subre *sub10[10];	/* initial vector, enough for most */
+	int			nsubexp;		/* number of known capturing subexpressions */
+	struct subinfo *subs;		/* info about known capturing subexpressions */
+	size_t		nsubs;			/* allocated length of subs[] vector */
+	struct subinfo sub10[10];	/* initial vector, enough for most */
 	struct nfa *nfa;			/* the NFA */
 	struct colormap *cm;		/* character color map */
 	color		nlcolor;		/* color of newline */
@@ -368,7 +375,7 @@ pg_regcomp(regex_t *re,
 	v->subs = v->sub10;
 	v->nsubs = 10;
 	for (j = 0; j < v->nsubs; j++)
-		v->subs[j] = NULL;
+		v->subs[j].left = v->subs[j].right = NULL;
 	v->nfa = NULL;
 	v->cm = NULL;
 	v->nlcolor = COLORLESS;
@@ -504,13 +511,13 @@ pg_regcomp(regex_t *re,
 }
 
 /*
- * moresubs - enlarge subRE vector
+ * moresubs - enlarge capturing-subexpressions vector
  */
 static void
 moresubs(struct vars *v,
 		 int wanted)			/* want enough room for this one */
 {
-	struct subre **p;
+	struct subinfo *p;
 	size_t		n;
 
 	assert(wanted > 0 && (size_t) wanted >= v->nsubs);
@@ -518,13 +525,13 @@ moresubs(struct vars *v,
 
 	if (v->subs == v->sub10)
 	{
-		p = (struct subre **) MALLOC(n * sizeof(struct subre *));
+		p = (struct subinfo *) MALLOC(n * sizeof(struct subinfo));
 		if (p != NULL)
 			memcpy(VS(p), VS(v->subs),
-				   v->nsubs * sizeof(struct subre *));
+				   v->nsubs * sizeof(struct subinfo));
 	}
 	else
-		p = (struct subre **) REALLOC(v->subs, n * sizeof(struct subre *));
+		p = (struct subinfo *) REALLOC(v->subs, n * sizeof(struct subinfo));
 	if (p == NULL)
 	{
 		ERR(REG_ESPACE);
@@ -532,7 +539,7 @@ moresubs(struct vars *v,
 	}
 	v->subs = p;
 	for (p = &v->subs[v->nsubs]; v->nsubs < n; p++, v->nsubs++)
-		*p = NULL;
+		p->left = p->right = NULL;
 	assert(v->nsubs == n);
 	assert((size_t) wanted < v->nsubs);
 }
@@ -969,10 +976,14 @@ parseqatom(struct vars *v,
 			NEXT();
 
 			/*
-			 * Make separate endpoints to ensure we keep this sub-NFA cleanly
-			 * separate from what surrounds it.  We need to be sure that when
-			 * we duplicate the sub-NFA for a backref, we get the right states
-			 * and no others.
+			 * Make separate endpoint states to keep this sub-NFA distinct
+			 * from what surrounds it.  We need to be sure that when we
+			 * duplicate the sub-NFA for a backref, we get the right
+			 * states/arcs and no others.  In particular, letting a backref
+			 * duplicate the sub-NFA from lp to rp would be quite wrong,
+			 * because we may add quantification superstructure around this
+			 * atom below.  (Perhaps we could skip the extra states for
+			 * non-capturing parens, but it seems not worth the trouble.)
 			 */
 			s = newstate(v->nfa);
 			s2 = newstate(v->nfa);
@@ -986,8 +997,10 @@ parseqatom(struct vars *v,
 			NOERRN();
 			if (cap)
 			{
-				assert(v->subs[subno] == NULL);
-				v->subs[subno] = atom;
+				/* save the sub-NFA's endpoints for future backrefs to use */
+				assert(v->subs[subno].left == NULL);
+				v->subs[subno].left = s;
+				v->subs[subno].right = s2;
 				if (atom->capno == 0)
 				{
 					/* normal case: just mark the atom as capturing */
@@ -997,7 +1010,7 @@ parseqatom(struct vars *v,
 				else
 				{
 					/* generate no-op wrapper node to handle "((x))" */
-					t = subre(v, '(', atom->flags | CAP, lp, rp);
+					t = subre(v, '(', atom->flags | CAP, s, s2);
 					NOERRN();
 					t->capno = subno;
 					t->child = atom;
@@ -1009,7 +1022,7 @@ parseqatom(struct vars *v,
 		case BACKREF:			/* the Feature From The Black Lagoon */
 			INSIST(type != LACON, REG_ESUBREG);
 			INSIST(v->nextvalue < v->nsubs, REG_ESUBREG);
-			INSIST(v->subs[v->nextvalue] != NULL, REG_ESUBREG);
+			INSIST(v->subs[v->nextvalue].left != NULL, REG_ESUBREG);
 			NOERRN();
 			assert(v->nextvalue > 0);
 			atom = subre(v, 'b', BACKR, lp, rp);
@@ -1084,7 +1097,7 @@ parseqatom(struct vars *v,
 		if (atom != NULL)
 			freesubre(v, atom);
 		if (atomtype == '(')
-			v->subs[subno] = NULL;
+			v->subs[subno].left = v->subs[subno].right = NULL;
 		delsub(v->nfa, lp, rp);
 		EMPTYARC(lp, rp);
 		return top;
@@ -1177,14 +1190,14 @@ parseqatom(struct vars *v,
 	{
 		assert(atom->begin->nouts == 1);	/* just the EMPTY */
 		delsub(v->nfa, atom->begin, atom->end);
-		assert(v->subs[subno] != NULL);
+		assert(v->subs[subno].left != NULL);
 
 		/*
 		 * And here's why the recursion got postponed: it must wait until the
 		 * skeleton is filled in, because it may hit a backref that wants to
 		 * copy the filled-in skeleton.
 		 */
-		dupnfa(v->nfa, v->subs[subno]->begin, v->subs[subno]->end,
+		dupnfa(v->nfa, v->subs[subno].left, v->subs[subno].right,
 			   atom->begin, atom->end);
 		NOERRN();