diff --git a/src/backend/regex/regc_nfa.c b/src/backend/regex/regc_nfa.c index 4c03ff1403..ecc91a8ede 100644 --- a/src/backend/regex/regc_nfa.c +++ b/src/backend/regex/regc_nfa.c @@ -1513,6 +1513,7 @@ pullback(struct nfa * nfa, struct state *nexts; struct arc *a; struct arc *nexta; + struct state *intermediates; int progress; /* find and pull until there are no more */ @@ -1522,14 +1523,25 @@ pullback(struct nfa * nfa, for (s = nfa->states; s != NULL && !NISERR(); s = nexts) { nexts = s->next; + intermediates = NULL; for (a = s->outs; a != NULL && !NISERR(); a = nexta) { nexta = a->outchain; if (a->type == '^' || a->type == BEHIND) - if (pull(nfa, a)) + if (pull(nfa, a, &intermediates)) progress = 1; - assert(nexta == NULL || s->no != FREESTATE); } + /* clear tmp fields of intermediate states created here */ + while (intermediates != NULL) + { + struct state *ns = intermediates->tmp; + + intermediates->tmp = NULL; + intermediates = ns; + } + /* if s is now useless, get rid of it */ + if ((s->nins == 0 || s->nouts == 0) && !s->flag) + dropstate(nfa, s); } if (progress && f != NULL) dumpnfa(nfa, f); @@ -1557,13 +1569,26 @@ pullback(struct nfa * nfa, /* * pull - pull a back constraint backward past its source state - * A significant property of this function is that it deletes at most - * one state -- the constraint's from state -- and only if the constraint - * was that state's last outarc. + * + * Returns 1 if successful (which it always is unless the source is the + * start state or we have an internal error), 0 if nothing happened. + * + * A significant property of this function is that it deletes no pre-existing + * states, and no outarcs of the constraint's from state other than the given + * constraint arc. This makes the loops in pullback() safe, at the cost that + * we may leave useless states behind. Therefore, we leave it to pullback() + * to delete such states. + * + * If the from state has multiple back-constraint outarcs, and/or multiple + * compatible constraint inarcs, we only need to create one new intermediate + * state per combination of predecessor and successor states. *intermediates + * points to a list of such intermediate states for this from state (chained + * through their tmp fields). */ -static int /* 0 couldn't, 1 could */ +static int pull(struct nfa * nfa, - struct arc * con) + struct arc * con, + struct state ** intermediates) { struct state *from = con->from; struct state *to = con->to; @@ -1580,7 +1605,11 @@ pull(struct nfa * nfa, return 1; } - /* first, clone from state if necessary to avoid other outarcs */ + /* + * First, clone from state if necessary to avoid other outarcs. This may + * seem wasteful, but it simplifies the logic, and we'll get rid of the + * clone state again at the bottom. + */ if (from->nouts > 1) { s = newstate(nfa); @@ -1589,13 +1618,15 @@ pull(struct nfa * nfa, copyins(nfa, from, s, 1); /* duplicate inarcs */ cparc(nfa, con, s, to); /* move constraint arc */ freearc(nfa, con); + if (NISERR()) + return 0; from = s; con = from->outs; } assert(from->nouts == 1); /* propagate the constraint into the from state's inarcs */ - for (a = from->ins; a != NULL; a = nexta) + for (a = from->ins; a != NULL && !NISERR(); a = nexta) { nexta = a->inchain; switch (combine(con, a)) @@ -1606,13 +1637,23 @@ pull(struct nfa * nfa, case SATISFIED: /* no action needed */ break; case COMPATIBLE: /* swap the two arcs, more or less */ - s = newstate(nfa); - if (NISERR()) - return 0; - cparc(nfa, a, s, to); /* anticipate move */ + /* need an intermediate state, but might have one already */ + for (s = *intermediates; s != NULL; s = s->tmp) + { + assert(s->nins > 0 && s->nouts > 0); + if (s->ins->from == a->from && s->outs->to == to) + break; + } + if (s == NULL) + { + s = newstate(nfa); + if (NISERR()) + return 0; + s->tmp = *intermediates; + *intermediates = s; + } cparc(nfa, con, a->from, s); - if (NISERR()) - return 0; + cparc(nfa, a, s, to); freearc(nfa, a); break; default: @@ -1623,7 +1664,8 @@ pull(struct nfa * nfa, /* remaining inarcs, if any, incorporate the constraint */ moveins(nfa, from, to); - dropstate(nfa, from); /* will free the constraint */ + freearc(nfa, con); + /* from state is now useless, but we leave it to pullback() to clean up */ return 1; } @@ -1638,6 +1680,7 @@ pushfwd(struct nfa * nfa, struct state *nexts; struct arc *a; struct arc *nexta; + struct state *intermediates; int progress; /* find and push until there are no more */ @@ -1647,14 +1690,25 @@ pushfwd(struct nfa * nfa, for (s = nfa->states; s != NULL && !NISERR(); s = nexts) { nexts = s->next; + intermediates = NULL; for (a = s->ins; a != NULL && !NISERR(); a = nexta) { nexta = a->inchain; if (a->type == '$' || a->type == AHEAD) - if (push(nfa, a)) + if (push(nfa, a, &intermediates)) progress = 1; - assert(nexta == NULL || s->no != FREESTATE); } + /* clear tmp fields of intermediate states created here */ + while (intermediates != NULL) + { + struct state *ns = intermediates->tmp; + + intermediates->tmp = NULL; + intermediates = ns; + } + /* if s is now useless, get rid of it */ + if ((s->nins == 0 || s->nouts == 0) && !s->flag) + dropstate(nfa, s); } if (progress && f != NULL) dumpnfa(nfa, f); @@ -1682,13 +1736,26 @@ pushfwd(struct nfa * nfa, /* * push - push a forward constraint forward past its destination state - * A significant property of this function is that it deletes at most - * one state -- the constraint's to state -- and only if the constraint - * was that state's last inarc. + * + * Returns 1 if successful (which it always is unless the destination is the + * post state or we have an internal error), 0 if nothing happened. + * + * A significant property of this function is that it deletes no pre-existing + * states, and no inarcs of the constraint's to state other than the given + * constraint arc. This makes the loops in pushfwd() safe, at the cost that + * we may leave useless states behind. Therefore, we leave it to pushfwd() + * to delete such states. + * + * If the to state has multiple forward-constraint inarcs, and/or multiple + * compatible constraint outarcs, we only need to create one new intermediate + * state per combination of predecessor and successor states. *intermediates + * points to a list of such intermediate states for this to state (chained + * through their tmp fields). */ -static int /* 0 couldn't, 1 could */ +static int push(struct nfa * nfa, - struct arc * con) + struct arc * con, + struct state ** intermediates) { struct state *from = con->from; struct state *to = con->to; @@ -1705,22 +1772,28 @@ push(struct nfa * nfa, return 1; } - /* first, clone to state if necessary to avoid other inarcs */ + /* + * First, clone to state if necessary to avoid other inarcs. This may + * seem wasteful, but it simplifies the logic, and we'll get rid of the + * clone state again at the bottom. + */ if (to->nins > 1) { s = newstate(nfa); if (NISERR()) return 0; copyouts(nfa, to, s, 1); /* duplicate outarcs */ - cparc(nfa, con, from, s); /* move constraint */ + cparc(nfa, con, from, s); /* move constraint arc */ freearc(nfa, con); + if (NISERR()) + return 0; to = s; con = to->ins; } assert(to->nins == 1); /* propagate the constraint into the to state's outarcs */ - for (a = to->outs; a != NULL; a = nexta) + for (a = to->outs; a != NULL && !NISERR(); a = nexta) { nexta = a->outchain; switch (combine(con, a)) @@ -1731,13 +1804,23 @@ push(struct nfa * nfa, case SATISFIED: /* no action needed */ break; case COMPATIBLE: /* swap the two arcs, more or less */ - s = newstate(nfa); - if (NISERR()) - return 0; - cparc(nfa, con, s, a->to); /* anticipate move */ + /* need an intermediate state, but might have one already */ + for (s = *intermediates; s != NULL; s = s->tmp) + { + assert(s->nins > 0 && s->nouts > 0); + if (s->ins->from == from && s->outs->to == a->to) + break; + } + if (s == NULL) + { + s = newstate(nfa); + if (NISERR()) + return 0; + s->tmp = *intermediates; + *intermediates = s; + } + cparc(nfa, con, s, a->to); cparc(nfa, a, from, s); - if (NISERR()) - return 0; freearc(nfa, a); break; default: @@ -1748,7 +1831,8 @@ push(struct nfa * nfa, /* remaining outarcs, if any, incorporate the constraint */ moveouts(nfa, to, from); - dropstate(nfa, to); /* will free the constraint */ + freearc(nfa, con); + /* to state is now useless, but we leave it to pushfwd() to clean up */ return 1; } diff --git a/src/backend/regex/regcomp.c b/src/backend/regex/regcomp.c index 91eb77124c..6b031075e7 100644 --- a/src/backend/regex/regcomp.c +++ b/src/backend/regex/regcomp.c @@ -149,9 +149,9 @@ static void cleartraverse(struct nfa *, struct state *); static void specialcolors(struct nfa *); static long optimize(struct nfa *, FILE *); static void pullback(struct nfa *, FILE *); -static int pull(struct nfa *, struct arc *); +static int pull(struct nfa *, struct arc *, struct state **); static void pushfwd(struct nfa *, FILE *); -static int push(struct nfa *, struct arc *); +static int push(struct nfa *, struct arc *, struct state **); #define INCOMPATIBLE 1 /* destroys arc */ #define SATISFIED 2 /* constraint satisfied */