Instead of trying to force WHERE clauses into CNF or DNF normal form,
just look for common clauses that can be pulled out of ORs. Per recent discussion, extracting common clauses seems to be the only really useful effect of normalization, and if we do it explicitly then we can avoid cluttering the qual with partially-redundant duplicated expressions, which was an unpleasant side-effect of the old approach.
This commit is contained in:
parent
7af16b2a25
commit
9888192fb7
@ -200,15 +200,11 @@ planner()
|
||||
do final cleanup after planning.
|
||||
-subquery_planner()
|
||||
pull up subqueries from rangetable, if possible
|
||||
simplify constant expressions
|
||||
canonicalize qual
|
||||
Attempt to reduce WHERE clause to either CNF or DNF canonical form.
|
||||
CNF (top-level-AND) is preferred, since the optimizer can then use
|
||||
any of the AND subclauses to filter tuples; but quals that are in
|
||||
or close to DNF form will suffer exponential expansion if we try to
|
||||
force them to CNF. In pathological cases either transform may expand
|
||||
the qual unreasonably; so we may have to leave it un-normalized,
|
||||
thereby reducing the accuracy of selectivity estimates.
|
||||
Attempt to simplify WHERE clause to the most useful form; this includes
|
||||
flattening nested AND/ORs and detecting clauses that are duplicated in
|
||||
different branches of an OR.
|
||||
simplify constant expressions
|
||||
process sublinks
|
||||
convert Vars of outer query levels into Params
|
||||
--grouping_planner()
|
||||
|
@ -8,7 +8,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/optimizer/prep/prepqual.c,v 1.40 2003/12/28 21:57:37 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/optimizer/prep/prepqual.c,v 1.41 2003/12/30 21:49:19 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -20,6 +20,7 @@
|
||||
#include "optimizer/prep.h"
|
||||
#include "utils/lsyscache.h"
|
||||
|
||||
|
||||
static Node *flatten_andors_mutator(Node *node, void *context);
|
||||
static void flatten_andors_and_walker(FastList *out_list, List *andlist);
|
||||
static void flatten_andors_or_walker(FastList *out_list, List *orlist);
|
||||
@ -29,73 +30,33 @@ static List *pull_ors(List *orlist);
|
||||
static void pull_ors_walker(FastList *out_list, List *orlist);
|
||||
static Expr *find_nots(Expr *qual);
|
||||
static Expr *push_nots(Expr *qual);
|
||||
static Expr *find_ors(Expr *qual);
|
||||
static Expr *or_normalize(List *orlist);
|
||||
static Expr *find_ands(Expr *qual);
|
||||
static Expr *and_normalize(List *andlist);
|
||||
static Expr *qual_cleanup(Expr *qual);
|
||||
static List *remove_duplicates(List *list);
|
||||
static void count_bool_nodes(Expr *qual, double *nodes,
|
||||
double *cnfnodes, double *dnfnodes);
|
||||
|
||||
/*****************************************************************************
|
||||
*
|
||||
* CNF/DNF CONVERSION ROUTINES
|
||||
*
|
||||
* These routines convert an arbitrary boolean expression into
|
||||
* conjunctive normal form or disjunctive normal form.
|
||||
*
|
||||
* Normalization is only carried out in the top AND/OR/NOT portion
|
||||
* of the given tree; we do not attempt to normalize boolean expressions
|
||||
* that may appear as arguments of operators or functions in the tree.
|
||||
*
|
||||
* Query qualifications (WHERE clauses) are ordinarily transformed into
|
||||
* CNF, ie, AND-of-ORs form, because then the optimizer can use any one
|
||||
* of the independent AND clauses as a filtering qualification. However,
|
||||
* quals that are naturally expressed as OR-of-ANDs can suffer an
|
||||
* exponential growth in size in this transformation, so we also consider
|
||||
* converting to DNF (OR-of-ANDs), and we may also leave well enough alone
|
||||
* if both transforms cause unreasonable growth. The OR-of-ANDs format
|
||||
* is useful for indexscan implementation, so we prefer that format when
|
||||
* there is just one relation involved.
|
||||
*
|
||||
* canonicalize_qual() does "smart" conversion to either CNF or DNF, per
|
||||
* the above considerations, while cnfify() and dnfify() simply perform
|
||||
* the demanded transformation. The latter two are presently dead code.
|
||||
*****************************************************************************/
|
||||
static Expr *find_duplicate_ors(Expr *qual);
|
||||
static Expr *process_duplicate_ors(List *orlist);
|
||||
|
||||
|
||||
/*
|
||||
* canonicalize_qual
|
||||
* Convert a qualification to the most useful normalized form.
|
||||
* Convert a qualification expression to the most useful form.
|
||||
*
|
||||
* The name of this routine is a holdover from a time when it would try to
|
||||
* force the expression into canonical AND-of-ORs or OR-of-ANDs form.
|
||||
* Eventually, we recognized that that had more theoretical purity than
|
||||
* actual usefulness, and so now the transformation doesn't involve any
|
||||
* notion of reaching a canonical form.
|
||||
*
|
||||
* Returns the modified qualification.
|
||||
*
|
||||
* XXX This code could be much smarter, at the cost of also being slower,
|
||||
* if we tried to compute selectivities and/or see whether there are
|
||||
* actually indexes to support an indexscan implementation of a DNF qual.
|
||||
* We could even try converting the CNF clauses that mention a single
|
||||
* relation into a single DNF clause to see if that looks cheaper to
|
||||
* implement. For now, though, we just try to avoid doing anything
|
||||
* quite as stupid as unconditionally converting to CNF was...
|
||||
*/
|
||||
Expr *
|
||||
canonicalize_qual(Expr *qual)
|
||||
{
|
||||
Expr *newqual;
|
||||
double nodes,
|
||||
cnfnodes,
|
||||
dnfnodes;
|
||||
bool cnfok,
|
||||
dnfok;
|
||||
|
||||
/* Quick exit for empty qual */
|
||||
if (qual == NULL)
|
||||
return NULL;
|
||||
|
||||
/*
|
||||
* Flatten AND and OR groups throughout the tree. This improvement is
|
||||
* always worthwhile, so do it unconditionally.
|
||||
* Flatten AND and OR groups throughout the expression tree.
|
||||
*/
|
||||
newqual = (Expr *) flatten_andors((Node *) qual);
|
||||
|
||||
@ -108,152 +69,14 @@ canonicalize_qual(Expr *qual)
|
||||
newqual = find_nots(newqual);
|
||||
|
||||
/*
|
||||
* Choose whether to convert to CNF, or DNF, or leave well enough
|
||||
* alone.
|
||||
*
|
||||
* We make an approximate estimate of the number of bottom-level nodes
|
||||
* that will appear in the CNF and DNF forms of the query.
|
||||
* Pull up redundant subclauses in OR-of-AND trees. Again, we do this
|
||||
* only within the top-level AND/OR structure.
|
||||
*/
|
||||
count_bool_nodes(newqual, &nodes, &cnfnodes, &dnfnodes);
|
||||
|
||||
/*
|
||||
* First heuristic is to forget about *both* normal forms if there are
|
||||
* a huge number of terms in the qual clause. This would only happen
|
||||
* with machine-generated queries, presumably; and most likely such a
|
||||
* query is already in either CNF or DNF.
|
||||
*/
|
||||
cnfok = dnfok = true;
|
||||
if (nodes >= 500.0)
|
||||
cnfok = dnfok = false;
|
||||
|
||||
/*
|
||||
* Second heuristic is to forget about either CNF or DNF if it shows
|
||||
* unreasonable growth compared to the original form of the qual,
|
||||
* where we define "unreasonable" a tad arbitrarily as 4x more
|
||||
* operators.
|
||||
*/
|
||||
if (cnfnodes >= 4.0 * nodes)
|
||||
cnfok = false;
|
||||
if (dnfnodes >= 4.0 * nodes)
|
||||
dnfok = false;
|
||||
|
||||
/*
|
||||
* Third heuristic is to prefer DNF if top level is already an OR, and
|
||||
* only one relation is mentioned, and DNF is no larger than the CNF
|
||||
* representation. (Pretty shaky; can we improve on this?)
|
||||
*/
|
||||
if (cnfok && dnfok && dnfnodes <= cnfnodes &&
|
||||
or_clause((Node *) newqual) &&
|
||||
NumRelids((Node *) newqual) == 1)
|
||||
cnfok = false;
|
||||
|
||||
/*
|
||||
* Otherwise, we prefer CNF.
|
||||
*
|
||||
* XXX obviously, these rules could be improved upon.
|
||||
*/
|
||||
if (cnfok)
|
||||
{
|
||||
/*
|
||||
* Normalize into conjunctive normal form, and clean up the
|
||||
* result.
|
||||
*/
|
||||
newqual = qual_cleanup(find_ors(newqual));
|
||||
}
|
||||
else if (dnfok)
|
||||
{
|
||||
/*
|
||||
* Normalize into disjunctive normal form, and clean up the
|
||||
* result.
|
||||
*/
|
||||
newqual = qual_cleanup(find_ands(newqual));
|
||||
}
|
||||
newqual = find_duplicate_ors(newqual);
|
||||
|
||||
return newqual;
|
||||
}
|
||||
|
||||
#ifdef NOT_USED
|
||||
/*
|
||||
* cnfify
|
||||
* Convert a qualification to conjunctive normal form by applying
|
||||
* successive normalizations.
|
||||
*
|
||||
* Returns the modified qualification.
|
||||
*
|
||||
* If 'removeAndFlag' is true then it removes explicit AND at the top level,
|
||||
* producing a list of implicitly-ANDed conditions. Otherwise, a regular
|
||||
* boolean expression is returned. Since most callers pass 'true', we
|
||||
* prefer to declare the result as List *, not Expr *.
|
||||
*/
|
||||
List *
|
||||
cnfify(Expr *qual, bool removeAndFlag)
|
||||
{
|
||||
Expr *newqual;
|
||||
|
||||
if (qual == NULL)
|
||||
return NIL;
|
||||
|
||||
/*
|
||||
* Flatten AND and OR groups throughout the tree. This improvement is
|
||||
* always worthwhile.
|
||||
*/
|
||||
newqual = flatten_andors(qual);
|
||||
|
||||
/*
|
||||
* Push down NOTs. We do this only in the top-level boolean
|
||||
* expression, without examining arguments of operators/functions.
|
||||
*/
|
||||
newqual = find_nots(newqual);
|
||||
/* Normalize into conjunctive normal form. */
|
||||
newqual = find_ors(newqual);
|
||||
/* Clean up the result. */
|
||||
newqual = qual_cleanup(newqual);
|
||||
|
||||
if (removeAndFlag)
|
||||
newqual = (Expr *) make_ands_implicit(newqual);
|
||||
|
||||
return (List *) newqual;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef NOT_USED
|
||||
/*
|
||||
* dnfify
|
||||
* Convert a qualification to disjunctive normal form by applying
|
||||
* successive normalizations.
|
||||
*
|
||||
* Returns the modified qualification.
|
||||
*
|
||||
* We do not offer a 'removeOrFlag' in this case; the usages are
|
||||
* different.
|
||||
*/
|
||||
static Expr *
|
||||
dnfify(Expr *qual)
|
||||
{
|
||||
Expr *newqual;
|
||||
|
||||
if (qual == NULL)
|
||||
return NULL;
|
||||
|
||||
/*
|
||||
* Flatten AND and OR groups throughout the tree. This improvement is
|
||||
* always worthwhile.
|
||||
*/
|
||||
newqual = flatten_andors(qual);
|
||||
|
||||
/*
|
||||
* Push down NOTs. We do this only in the top-level boolean
|
||||
* expression, without examining arguments of operators/functions.
|
||||
*/
|
||||
newqual = find_nots(newqual);
|
||||
/* Normalize into disjunctive normal form. */
|
||||
newqual = find_ands(newqual);
|
||||
/* Clean up the result. */
|
||||
newqual = qual_cleanup(newqual);
|
||||
|
||||
return newqual;
|
||||
}
|
||||
#endif
|
||||
|
||||
/*--------------------
|
||||
* The parser regards AND and OR as purely binary operators, so a qual like
|
||||
@ -271,13 +94,16 @@ dnfify(Expr *qual)
|
||||
*--------------------
|
||||
*/
|
||||
|
||||
/*--------------------
|
||||
/*
|
||||
* flatten_andors
|
||||
* Given an expression tree, simplify nested AND/OR clauses into flat
|
||||
* AND/OR clauses with more arguments. The entire tree is processed.
|
||||
*
|
||||
* Returns the rebuilt expr (note original structure is not touched).
|
||||
*--------------------
|
||||
*
|
||||
* This is exported so that other modules can perform the part of
|
||||
* canonicalize_qual processing that applies to entire trees, rather
|
||||
* than just the top-level boolean expressions.
|
||||
*/
|
||||
Node *
|
||||
flatten_andors(Node *node)
|
||||
@ -413,6 +239,7 @@ pull_ors_walker(FastList *out_list, List *orlist)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* find_nots
|
||||
* Traverse the qualification, looking for NOTs to take care of.
|
||||
@ -469,7 +296,7 @@ push_nots(Expr *qual)
|
||||
* possible? */
|
||||
|
||||
/*
|
||||
* Negate an operator clause if possible: ("NOT" (< A B)) => (> A B)
|
||||
* Negate an operator clause if possible: (NOT (< A B)) => (> A B)
|
||||
* Otherwise, retain the clause as it is (the NOT can't be pushed
|
||||
* down any farther).
|
||||
*/
|
||||
@ -491,8 +318,8 @@ push_nots(Expr *qual)
|
||||
{
|
||||
/*--------------------
|
||||
* Apply DeMorgan's Laws:
|
||||
* ("NOT" ("AND" A B)) => ("OR" ("NOT" A) ("NOT" B))
|
||||
* ("NOT" ("OR" A B)) => ("AND" ("NOT" A) ("NOT" B))
|
||||
* (NOT (AND A B)) => (OR (NOT A) (NOT B))
|
||||
* (NOT (OR A B)) => (AND (NOT A) (NOT B))
|
||||
* i.e., swap AND for OR and negate all the subclauses.
|
||||
*--------------------
|
||||
*/
|
||||
@ -532,145 +359,38 @@ push_nots(Expr *qual)
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* find_ors
|
||||
* Given a qualification tree with the NOTs pushed down, convert it
|
||||
* to a tree in CNF by repeatedly applying the rule:
|
||||
* ("OR" A ("AND" B C)) => ("AND" ("OR" A B) ("OR" A C))
|
||||
|
||||
/*--------------------
|
||||
* The following code attempts to apply the inverse OR distributive law:
|
||||
* ((A AND B) OR (A AND C)) => (A AND (B OR C))
|
||||
* That is, locate OR clauses in which every subclause contains an
|
||||
* identical term, and pull out the duplicated terms.
|
||||
*
|
||||
* Note that 'or' clauses will always be turned into 'and' clauses
|
||||
* if they contain any 'and' subclauses. Also, we do not descend
|
||||
* below the top-level AND/OR structure.
|
||||
*
|
||||
* Returns the modified qualification. AND/OR flatness is preserved.
|
||||
* This may seem like a fairly useless activity, but it turns out to be
|
||||
* applicable to many machine-generated queries, and there are also queries
|
||||
* in some of the TPC benchmarks that need it. This was in fact almost the
|
||||
* sole useful side-effect of the old prepqual code that tried to force
|
||||
* the query into canonical AND-of-ORs form: the canonical equivalent of
|
||||
* ((A AND B) OR (A AND C))
|
||||
* is
|
||||
* ((A OR A) AND (A OR C) AND (B OR A) AND (B OR C))
|
||||
* which the code was able to simplify to
|
||||
* (A AND (A OR C) AND (B OR A) AND (B OR C))
|
||||
* thus successfully extracting the common condition A --- but at the cost
|
||||
* of cluttering the qual with many redundant clauses.
|
||||
*--------------------
|
||||
*/
|
||||
static Expr *
|
||||
find_ors(Expr *qual)
|
||||
{
|
||||
if (qual == NULL)
|
||||
return NULL;
|
||||
|
||||
if (and_clause((Node *) qual))
|
||||
{
|
||||
List *andlist = NIL;
|
||||
List *temp;
|
||||
|
||||
foreach(temp, ((BoolExpr *) qual)->args)
|
||||
andlist = lappend(andlist, find_ors(lfirst(temp)));
|
||||
return make_andclause(pull_ands(andlist));
|
||||
}
|
||||
else if (or_clause((Node *) qual))
|
||||
{
|
||||
List *orlist = NIL;
|
||||
List *temp;
|
||||
|
||||
foreach(temp, ((BoolExpr *) qual)->args)
|
||||
orlist = lappend(orlist, find_ors(lfirst(temp)));
|
||||
return or_normalize(pull_ors(orlist));
|
||||
}
|
||||
else
|
||||
return qual;
|
||||
}
|
||||
|
||||
/*
|
||||
* or_normalize
|
||||
* Given a list of exprs which are 'or'ed together, try to apply
|
||||
* the distributive law
|
||||
* ("OR" A ("AND" B C)) => ("AND" ("OR" A B) ("OR" A C))
|
||||
* to convert the top-level OR clause to a top-level AND clause.
|
||||
* find_duplicate_ors
|
||||
* Given a qualification tree with the NOTs pushed down, search for
|
||||
* OR clauses to which the inverse OR distributive law might apply.
|
||||
* Only the top-level AND/OR structure is searched.
|
||||
*
|
||||
* Returns the resulting expression (could be an AND clause, an OR
|
||||
* clause, or maybe even a single subexpression).
|
||||
* Returns the modified qualification. AND/OR flatness is preserved.
|
||||
*/
|
||||
static Expr *
|
||||
or_normalize(List *orlist)
|
||||
{
|
||||
Expr *distributable = NULL;
|
||||
int num_subclauses = 1;
|
||||
List *andclauses = NIL;
|
||||
List *temp;
|
||||
|
||||
if (orlist == NIL)
|
||||
return NULL; /* probably can't happen */
|
||||
if (lnext(orlist) == NIL)
|
||||
return lfirst(orlist); /* single-expression OR (can this happen?) */
|
||||
|
||||
/*
|
||||
* If we have a choice of AND clauses, pick the one with the most
|
||||
* subclauses. Because we initialized num_subclauses = 1, any AND
|
||||
* clauses with only one arg will be ignored as useless.
|
||||
*/
|
||||
foreach(temp, orlist)
|
||||
{
|
||||
Expr *clause = lfirst(temp);
|
||||
|
||||
if (and_clause((Node *) clause))
|
||||
{
|
||||
int nclauses = length(((BoolExpr *) clause)->args);
|
||||
|
||||
if (nclauses > num_subclauses)
|
||||
{
|
||||
distributable = clause;
|
||||
num_subclauses = nclauses;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* if there's no suitable AND clause, we can't transform the OR */
|
||||
if (!distributable)
|
||||
return make_orclause(orlist);
|
||||
|
||||
/*
|
||||
* Caution: lremove destructively modifies the input orlist. This
|
||||
* should be OK, since or_normalize is only called with freshly
|
||||
* constructed lists that are not referenced elsewhere.
|
||||
*/
|
||||
orlist = lremove(distributable, orlist);
|
||||
|
||||
foreach(temp, ((BoolExpr *) distributable)->args)
|
||||
{
|
||||
Expr *andclause = lfirst(temp);
|
||||
List *neworlist;
|
||||
|
||||
/*
|
||||
* We are going to insert the orlist into multiple places in the
|
||||
* result expression. For most expression types, it'd be OK to
|
||||
* just have multiple links to the same subtree, but this fails
|
||||
* badly for SubLinks (and perhaps other cases?). For safety, we
|
||||
* make a distinct copy for each place the orlist is inserted.
|
||||
*/
|
||||
if (lnext(temp) == NIL)
|
||||
neworlist = orlist; /* can use original tree at the end */
|
||||
else
|
||||
neworlist = copyObject(orlist);
|
||||
|
||||
/*
|
||||
* pull_ors is needed here in case andclause has a top-level OR.
|
||||
* Then we recursively apply or_normalize, since there might be an
|
||||
* AND subclause in the resulting OR-list.
|
||||
*/
|
||||
andclause = or_normalize(pull_ors(lcons(andclause, neworlist)));
|
||||
andclauses = lappend(andclauses, andclause);
|
||||
}
|
||||
|
||||
/* pull_ands is needed in case any sub-or_normalize succeeded */
|
||||
return make_andclause(pull_ands(andclauses));
|
||||
}
|
||||
|
||||
/*
|
||||
* find_ands
|
||||
* Given a qualification tree with the NOTs pushed down, convert it
|
||||
* to a tree in DNF by repeatedly applying the rule:
|
||||
* ("AND" A ("OR" B C)) => ("OR" ("AND" A B) ("AND" A C))
|
||||
*
|
||||
* Note that 'and' clauses will always be turned into 'or' clauses
|
||||
* if they contain any 'or' subclauses. Also, we do not descend
|
||||
* below the top-level AND/OR structure.
|
||||
*
|
||||
* Returns the modified qualification. AND/OR flatness is preserved.
|
||||
*/
|
||||
static Expr *
|
||||
find_ands(Expr *qual)
|
||||
find_duplicate_ors(Expr *qual)
|
||||
{
|
||||
if (qual == NULL)
|
||||
return NULL;
|
||||
@ -680,261 +400,195 @@ find_ands(Expr *qual)
|
||||
List *orlist = NIL;
|
||||
List *temp;
|
||||
|
||||
/* Recurse */
|
||||
foreach(temp, ((BoolExpr *) qual)->args)
|
||||
orlist = lappend(orlist, find_ands(lfirst(temp)));
|
||||
return make_orclause(pull_ors(orlist));
|
||||
orlist = lappend(orlist, find_duplicate_ors(lfirst(temp)));
|
||||
/*
|
||||
* Don't need pull_ors() since this routine will never introduce
|
||||
* an OR where there wasn't one before.
|
||||
*/
|
||||
return process_duplicate_ors(orlist);
|
||||
}
|
||||
else if (and_clause((Node *) qual))
|
||||
{
|
||||
List *andlist = NIL;
|
||||
List *temp;
|
||||
|
||||
/* Recurse */
|
||||
foreach(temp, ((BoolExpr *) qual)->args)
|
||||
andlist = lappend(andlist, find_ands(lfirst(temp)));
|
||||
return and_normalize(pull_ands(andlist));
|
||||
andlist = lappend(andlist, find_duplicate_ors(lfirst(temp)));
|
||||
/* Flatten any ANDs introduced just below here */
|
||||
andlist = pull_ands(andlist);
|
||||
/* The AND list can't get shorter, so result is always an AND */
|
||||
return make_andclause(andlist);
|
||||
}
|
||||
else
|
||||
return qual;
|
||||
}
|
||||
|
||||
/*
|
||||
* and_normalize
|
||||
* Given a list of exprs which are 'and'ed together, try to apply
|
||||
* the distributive law
|
||||
* ("AND" A ("OR" B C)) => ("OR" ("AND" A B) ("AND" A C))
|
||||
* to convert the top-level AND clause to a top-level OR clause.
|
||||
* process_duplicate_ors
|
||||
* Given a list of exprs which are ORed together, try to apply
|
||||
* the inverse OR distributive law.
|
||||
*
|
||||
* Returns the resulting expression (could be an AND clause, an OR
|
||||
* clause, or maybe even a single subexpression).
|
||||
*/
|
||||
static Expr *
|
||||
and_normalize(List *andlist)
|
||||
process_duplicate_ors(List *orlist)
|
||||
{
|
||||
Expr *distributable = NULL;
|
||||
int num_subclauses = 1;
|
||||
List *orclauses = NIL;
|
||||
List *reference = NIL;
|
||||
int num_subclauses = 0;
|
||||
List *winners;
|
||||
List *neworlist;
|
||||
List *temp;
|
||||
List *temp2;
|
||||
|
||||
if (andlist == NIL)
|
||||
if (orlist == NIL)
|
||||
return NULL; /* probably can't happen */
|
||||
if (lnext(andlist) == NIL)
|
||||
return lfirst(andlist); /* single-expression AND (can this
|
||||
* happen?) */
|
||||
if (lnext(orlist) == NIL)
|
||||
return lfirst(orlist); /* single-expression OR (can this happen?) */
|
||||
|
||||
/*
|
||||
* If we have a choice of OR clauses, pick the one with the most
|
||||
* subclauses. Because we initialized num_subclauses = 1, any OR
|
||||
* clauses with only one arg will be ignored as useless.
|
||||
* Choose the shortest AND clause as the reference list --- obviously,
|
||||
* any subclause not in this clause isn't in all the clauses.
|
||||
* If we find a clause that's not an AND, we can treat it as a
|
||||
* one-element AND clause, which necessarily wins as shortest.
|
||||
*/
|
||||
foreach(temp, andlist)
|
||||
foreach(temp, orlist)
|
||||
{
|
||||
Expr *clause = lfirst(temp);
|
||||
|
||||
if (or_clause((Node *) clause))
|
||||
if (and_clause((Node *) clause))
|
||||
{
|
||||
int nclauses = length(((BoolExpr *) clause)->args);
|
||||
List *subclauses = ((BoolExpr *) clause)->args;
|
||||
int nclauses = length(subclauses);
|
||||
|
||||
if (nclauses > num_subclauses)
|
||||
if (reference == NIL || nclauses < num_subclauses)
|
||||
{
|
||||
distributable = clause;
|
||||
reference = subclauses;
|
||||
num_subclauses = nclauses;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
reference = makeList1(clause);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Just in case, eliminate any duplicates in the reference list.
|
||||
*/
|
||||
reference = set_union(NIL, reference);
|
||||
|
||||
/*
|
||||
* Check each element of the reference list to see if it's in all the
|
||||
* OR clauses. Build a new list of winning clauses.
|
||||
*/
|
||||
winners = NIL;
|
||||
foreach(temp, reference)
|
||||
{
|
||||
Expr *refclause = lfirst(temp);
|
||||
bool win = true;
|
||||
|
||||
foreach(temp2, orlist)
|
||||
{
|
||||
Expr *clause = lfirst(temp2);
|
||||
|
||||
if (and_clause((Node *) clause))
|
||||
{
|
||||
if (!member(refclause, ((BoolExpr *) clause)->args))
|
||||
{
|
||||
win = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (!equal(refclause, clause))
|
||||
{
|
||||
win = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (win)
|
||||
winners = lappend(winners, refclause);
|
||||
}
|
||||
|
||||
/*
|
||||
* If no winners, we can't transform the OR
|
||||
*/
|
||||
if (winners == NIL)
|
||||
return make_orclause(orlist);
|
||||
|
||||
/*
|
||||
* Generate new OR list consisting of the remaining sub-clauses.
|
||||
*
|
||||
* If any clause degenerates to empty, then we have a situation like
|
||||
* (A AND B) OR (A), which can be reduced to just A --- that is, the
|
||||
* additional conditions in other arms of the OR are irrelevant.
|
||||
*
|
||||
* Note that because we use set_difference, any multiple occurrences of
|
||||
* a winning clause in an AND sub-clause will be removed automatically.
|
||||
*/
|
||||
neworlist = NIL;
|
||||
foreach(temp, orlist)
|
||||
{
|
||||
Expr *clause = lfirst(temp);
|
||||
|
||||
if (and_clause((Node *) clause))
|
||||
{
|
||||
List *subclauses = ((BoolExpr *) clause)->args;
|
||||
|
||||
subclauses = set_difference(subclauses, winners);
|
||||
if (subclauses != NIL)
|
||||
{
|
||||
if (lnext(subclauses) == NIL)
|
||||
neworlist = lappend(neworlist, lfirst(subclauses));
|
||||
else
|
||||
neworlist = lappend(neworlist, make_andclause(subclauses));
|
||||
}
|
||||
else
|
||||
{
|
||||
neworlist = NIL; /* degenerate case, see above */
|
||||
break;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (!member(clause, winners))
|
||||
neworlist = lappend(neworlist, clause);
|
||||
else
|
||||
{
|
||||
neworlist = NIL; /* degenerate case, see above */
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* if there's no suitable OR clause, we can't transform the AND */
|
||||
if (!distributable)
|
||||
return make_andclause(andlist);
|
||||
/*
|
||||
* Append reduced OR to the winners list, if it's not degenerate, handling
|
||||
* the special case of one element correctly (can that really happen?).
|
||||
* Also be careful to maintain AND/OR flatness in case we pulled up a
|
||||
* sub-sub-OR-clause.
|
||||
*/
|
||||
if (neworlist != NIL)
|
||||
{
|
||||
if (lnext(neworlist) == NIL)
|
||||
winners = lappend(winners, lfirst(neworlist));
|
||||
else
|
||||
winners = lappend(winners, make_orclause(pull_ors(neworlist)));
|
||||
}
|
||||
|
||||
/*
|
||||
* Caution: lremove destructively modifies the input andlist. This
|
||||
* should be OK, since and_normalize is only called with freshly
|
||||
* constructed lists that are not referenced elsewhere.
|
||||
* And return the constructed AND clause, again being wary of a single
|
||||
* element and AND/OR flatness.
|
||||
*/
|
||||
andlist = lremove(distributable, andlist);
|
||||
|
||||
foreach(temp, ((BoolExpr *) distributable)->args)
|
||||
{
|
||||
Expr *orclause = lfirst(temp);
|
||||
List *newandlist;
|
||||
|
||||
/*
|
||||
* We are going to insert the andlist into multiple places in the
|
||||
* result expression. For most expression types, it'd be OK to
|
||||
* just have multiple links to the same subtree, but this fails
|
||||
* badly for SubLinks (and perhaps other cases?). For safety, we
|
||||
* make a distinct copy for each place the andlist is inserted.
|
||||
*/
|
||||
if (lnext(temp) == NIL)
|
||||
newandlist = andlist; /* can use original tree at the
|
||||
* end */
|
||||
else
|
||||
newandlist = copyObject(andlist);
|
||||
|
||||
/*
|
||||
* pull_ands is needed here in case orclause has a top-level AND.
|
||||
* Then we recursively apply and_normalize, since there might be
|
||||
* an OR subclause in the resulting AND-list.
|
||||
*/
|
||||
orclause = and_normalize(pull_ands(lcons(orclause, newandlist)));
|
||||
orclauses = lappend(orclauses, orclause);
|
||||
}
|
||||
|
||||
/* pull_ors is needed in case any sub-and_normalize succeeded */
|
||||
return make_orclause(pull_ors(orclauses));
|
||||
}
|
||||
|
||||
/*
|
||||
* qual_cleanup
|
||||
* Fix up a qualification by removing duplicate entries (which could be
|
||||
* created during normalization, if identical subexpressions from different
|
||||
* parts of the tree are brought together). Also, check for AND and OR
|
||||
* clauses with only one remaining subexpression, and simplify.
|
||||
*
|
||||
* Returns the modified qualification.
|
||||
*/
|
||||
static Expr *
|
||||
qual_cleanup(Expr *qual)
|
||||
{
|
||||
if (qual == NULL)
|
||||
return NULL;
|
||||
|
||||
if (and_clause((Node *) qual))
|
||||
{
|
||||
List *andlist = NIL;
|
||||
List *temp;
|
||||
|
||||
foreach(temp, ((BoolExpr *) qual)->args)
|
||||
andlist = lappend(andlist, qual_cleanup(lfirst(temp)));
|
||||
|
||||
andlist = remove_duplicates(pull_ands(andlist));
|
||||
|
||||
if (length(andlist) > 1)
|
||||
return make_andclause(andlist);
|
||||
else
|
||||
return lfirst(andlist);
|
||||
}
|
||||
else if (or_clause((Node *) qual))
|
||||
{
|
||||
List *orlist = NIL;
|
||||
List *temp;
|
||||
|
||||
foreach(temp, ((BoolExpr *) qual)->args)
|
||||
orlist = lappend(orlist, qual_cleanup(lfirst(temp)));
|
||||
|
||||
orlist = remove_duplicates(pull_ors(orlist));
|
||||
|
||||
if (length(orlist) > 1)
|
||||
return make_orclause(orlist);
|
||||
else
|
||||
return lfirst(orlist);
|
||||
}
|
||||
if (lnext(winners) == NIL)
|
||||
return (Expr *) lfirst(winners);
|
||||
else
|
||||
return qual;
|
||||
}
|
||||
|
||||
/*
|
||||
* remove_duplicates
|
||||
*/
|
||||
static List *
|
||||
remove_duplicates(List *list)
|
||||
{
|
||||
List *result = NIL;
|
||||
List *i;
|
||||
|
||||
if (length(list) <= 1)
|
||||
return list;
|
||||
|
||||
foreach(i, list)
|
||||
{
|
||||
if (!member(lfirst(i), result))
|
||||
result = lappend(result, lfirst(i));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/*
|
||||
* count_bool_nodes
|
||||
* Support for heuristics in canonicalize_qual(): count the
|
||||
* number of nodes that are inputs to the top level AND/OR
|
||||
* part of a qual tree, and estimate how many nodes will appear
|
||||
* in the CNF'ified or DNF'ified equivalent of the expression.
|
||||
*
|
||||
* This is just an approximate calculation; it cannot detect possible
|
||||
* simplifications from eliminating duplicate subclauses. The idea is just to
|
||||
* cheaply determine whether CNF will be markedly worse than DNF or vice
|
||||
* versa.
|
||||
*
|
||||
* The counts/estimates are represented as doubles to avoid risk of overflow.
|
||||
*/
|
||||
static void
|
||||
count_bool_nodes(Expr *qual,
|
||||
double *nodes,
|
||||
double *cnfnodes,
|
||||
double *dnfnodes)
|
||||
{
|
||||
List *temp;
|
||||
double subnodes,
|
||||
subcnfnodes,
|
||||
subdnfnodes;
|
||||
|
||||
if (and_clause((Node *) qual))
|
||||
{
|
||||
*nodes = *cnfnodes = 0.0;
|
||||
*dnfnodes = 1.0; /* DNF nodes will be product of sub-counts */
|
||||
|
||||
foreach(temp, ((BoolExpr *) qual)->args)
|
||||
{
|
||||
count_bool_nodes(lfirst(temp),
|
||||
&subnodes, &subcnfnodes, &subdnfnodes);
|
||||
*nodes += subnodes;
|
||||
*cnfnodes += subcnfnodes;
|
||||
*dnfnodes *= subdnfnodes;
|
||||
}
|
||||
|
||||
/*
|
||||
* we could get dnfnodes < cnfnodes here, if all the sub-nodes are
|
||||
* simple ones with count 1. Make sure dnfnodes isn't too small.
|
||||
*/
|
||||
if (*dnfnodes < *cnfnodes)
|
||||
*dnfnodes = *cnfnodes;
|
||||
}
|
||||
else if (or_clause((Node *) qual))
|
||||
{
|
||||
*nodes = *dnfnodes = 0.0;
|
||||
*cnfnodes = 1.0; /* CNF nodes will be product of sub-counts */
|
||||
|
||||
foreach(temp, ((BoolExpr *) qual)->args)
|
||||
{
|
||||
count_bool_nodes(lfirst(temp),
|
||||
&subnodes, &subcnfnodes, &subdnfnodes);
|
||||
*nodes += subnodes;
|
||||
*cnfnodes *= subcnfnodes;
|
||||
*dnfnodes += subdnfnodes;
|
||||
}
|
||||
|
||||
/*
|
||||
* we could get cnfnodes < dnfnodes here, if all the sub-nodes are
|
||||
* simple ones with count 1. Make sure cnfnodes isn't too small.
|
||||
*/
|
||||
if (*cnfnodes < *dnfnodes)
|
||||
*cnfnodes = *dnfnodes;
|
||||
}
|
||||
else if (contain_subplans((Node *) qual))
|
||||
{
|
||||
/*
|
||||
* charge extra for subexpressions containing sub-SELECTs, to
|
||||
* discourage us from rearranging them in a way that might
|
||||
* generate N copies of a subselect rather than one. The magic
|
||||
* constant here interacts with the "4x maximum growth" heuristic
|
||||
* in canonicalize_qual().
|
||||
*/
|
||||
*nodes = 1.0;
|
||||
*cnfnodes = *dnfnodes = 25.0;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* anything else counts 1 for my purposes */
|
||||
*nodes = *cnfnodes = *dnfnodes = 1.0;
|
||||
}
|
||||
return make_andclause(pull_ands(winners));
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user