Apply the heuristic proposed by Taral (see pgsql-general archives for
2-Oct-98 or TODO.detail/cnfify) to decide whether we want to reduce WHERE clause to CNF form, DNF form, or neither. This is a HUGE win. The heuristic conditions could probably still use a little tweaking to make sure we don't pick CNF when DNF would be better, or vice versa, but the risk of exponential explosion in cnfify() is gone. I was able to run ten-thousand-AND-subclause queries through the planner in a reasonable amount of time.
This commit is contained in:
parent
b53955f38a
commit
003dd965d2
@ -8,7 +8,7 @@
|
|||||||
*
|
*
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $Header: /cvsroot/pgsql/src/backend/optimizer/prep/prepqual.c,v 1.21 2000/01/26 05:56:39 momjian Exp $
|
* $Header: /cvsroot/pgsql/src/backend/optimizer/prep/prepqual.c,v 1.22 2000/01/28 03:22:36 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -32,7 +32,8 @@ static Expr *find_ands(Expr *qual);
|
|||||||
static Expr *and_normalize(List *andlist);
|
static Expr *and_normalize(List *andlist);
|
||||||
static Expr *qual_cleanup(Expr *qual);
|
static Expr *qual_cleanup(Expr *qual);
|
||||||
static List *remove_duplicates(List *list);
|
static List *remove_duplicates(List *list);
|
||||||
static int count_bool_nodes(Expr *qual);
|
static void count_bool_nodes(Expr *qual, double *nodes,
|
||||||
|
double *cnfnodes, double *dnfnodes);
|
||||||
|
|
||||||
/*****************************************************************************
|
/*****************************************************************************
|
||||||
*
|
*
|
||||||
@ -84,12 +85,12 @@ static int count_bool_nodes(Expr *qual);
|
|||||||
List *
|
List *
|
||||||
canonicalize_qual(Expr *qual, bool removeAndFlag)
|
canonicalize_qual(Expr *qual, bool removeAndFlag)
|
||||||
{
|
{
|
||||||
Expr *newqual,
|
Expr *newqual;
|
||||||
*cnfqual,
|
double nodes,
|
||||||
*dnfqual;
|
cnfnodes,
|
||||||
int qualcnt,
|
dnfnodes;
|
||||||
cnfcnt,
|
bool cnfok,
|
||||||
dnfcnt;
|
dnfok;
|
||||||
|
|
||||||
if (qual == NULL)
|
if (qual == NULL)
|
||||||
return NIL;
|
return NIL;
|
||||||
@ -98,57 +99,64 @@ canonicalize_qual(Expr *qual, bool removeAndFlag)
|
|||||||
* This improvement is always worthwhile, so do it unconditionally.
|
* This improvement is always worthwhile, so do it unconditionally.
|
||||||
*/
|
*/
|
||||||
qual = flatten_andors(qual);
|
qual = flatten_andors(qual);
|
||||||
|
|
||||||
/* Push down NOTs. We do this only in the top-level boolean
|
/* Push down NOTs. We do this only in the top-level boolean
|
||||||
* expression, without examining arguments of operators/functions.
|
* expression, without examining arguments of operators/functions.
|
||||||
* Even so, it might not be a win if we are unable to find negators
|
* Even so, it might not be a win if we are unable to find negators
|
||||||
* for all the operators involved; so we keep the flattened-but-not-
|
* for all the operators involved; perhaps we should compare before-
|
||||||
* NOT-pushed qual as the reference point for comparsions.
|
* and-after tree sizes?
|
||||||
*/
|
*/
|
||||||
newqual = find_nots(qual);
|
newqual = find_nots(qual);
|
||||||
/*
|
|
||||||
* Generate both CNF and DNF forms from newqual.
|
|
||||||
*/
|
|
||||||
/* Normalize into conjunctive normal form, and clean up the result. */
|
|
||||||
cnfqual = qual_cleanup(find_ors(newqual));
|
|
||||||
/* Likewise for DNF */
|
|
||||||
dnfqual = qual_cleanup(find_ands(newqual));
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Now, choose whether to return qual, cnfqual, or dnfqual.
|
* Choose whether to convert to CNF, or DNF, or leave well enough alone.
|
||||||
*
|
*
|
||||||
* First heuristic is to forget about either CNF or DNF if it shows
|
* We make an approximate estimate of the number of bottom-level nodes
|
||||||
|
* that will appear in the CNF and DNF forms of the query.
|
||||||
|
*/
|
||||||
|
count_bool_nodes(newqual, &nodes, &cnfnodes, &dnfnodes);
|
||||||
|
/*
|
||||||
|
* First heuristic is to forget about *both* normal forms if there are
|
||||||
|
* a huge number of terms in the qual clause. This would only happen
|
||||||
|
* with machine-generated queries, presumably; and most likely such
|
||||||
|
* a query is already in either CNF or DNF.
|
||||||
|
*/
|
||||||
|
cnfok = dnfok = true;
|
||||||
|
if (nodes >= 500.0)
|
||||||
|
cnfok = dnfok = false;
|
||||||
|
/*
|
||||||
|
* Second heuristic is to forget about either CNF or DNF if it shows
|
||||||
* unreasonable growth compared to the original form of the qual,
|
* unreasonable growth compared to the original form of the qual,
|
||||||
* where we define "unreasonable" a tad arbitrarily as 4x more
|
* where we define "unreasonable" a tad arbitrarily as 4x more
|
||||||
* operators.
|
* operators.
|
||||||
*/
|
*/
|
||||||
qualcnt = count_bool_nodes(qual);
|
if (cnfnodes >= 4.0 * nodes)
|
||||||
cnfcnt = count_bool_nodes(cnfqual);
|
cnfok = false;
|
||||||
dnfcnt = count_bool_nodes(dnfqual);
|
if (dnfnodes >= 4.0 * nodes)
|
||||||
if (cnfcnt >= 4 * qualcnt)
|
dnfok = false;
|
||||||
cnfqual = NULL; /* mark CNF not usable */
|
|
||||||
if (dnfcnt >= 4 * qualcnt)
|
|
||||||
dnfqual = NULL; /* mark DNF not usable */
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Second heuristic is to prefer DNF if only one relation is mentioned
|
* Third heuristic is to prefer DNF if top level is already an OR,
|
||||||
* and it is smaller than the CNF representation.
|
* and only one relation is mentioned, and DNF is no larger than
|
||||||
|
* the CNF representation. (Pretty shaky; can we improve on this?)
|
||||||
*/
|
*/
|
||||||
if (dnfqual && dnfcnt < cnfcnt && NumRelids((Node *) dnfqual) == 1)
|
if (dnfok && dnfnodes <= cnfnodes && or_clause((Node *) newqual) &&
|
||||||
cnfqual = NULL;
|
NumRelids((Node *) newqual) == 1)
|
||||||
|
cnfok = false;
|
||||||
/*
|
/*
|
||||||
* Otherwise, we prefer CNF.
|
* Otherwise, we prefer CNF.
|
||||||
*
|
*
|
||||||
* XXX obviously, these rules could be improved upon.
|
* XXX obviously, these rules could be improved upon.
|
||||||
*/
|
*/
|
||||||
|
if (cnfok)
|
||||||
/* pick preferred survivor */
|
{
|
||||||
if (cnfqual)
|
/* Normalize into conjunctive normal form, and clean up the result. */
|
||||||
newqual = cnfqual;
|
newqual = qual_cleanup(find_ors(newqual));
|
||||||
else if (dnfqual)
|
}
|
||||||
newqual = dnfqual;
|
else if (dnfok)
|
||||||
else
|
{
|
||||||
newqual = qual;
|
/* Normalize into disjunctive normal form, and clean up the result. */
|
||||||
|
newqual = qual_cleanup(find_ands(newqual));
|
||||||
|
}
|
||||||
|
|
||||||
/* Convert to implicit-AND list if requested */
|
/* Convert to implicit-AND list if requested */
|
||||||
if (removeAndFlag)
|
if (removeAndFlag)
|
||||||
@ -828,27 +836,72 @@ remove_duplicates(List *list)
|
|||||||
/*
|
/*
|
||||||
* count_bool_nodes
|
* count_bool_nodes
|
||||||
* Support for heuristics in canonicalize_qual(): count the
|
* Support for heuristics in canonicalize_qual(): count the
|
||||||
* number of nodes in the top level AND/OR/NOT part of a qual tree
|
* number of nodes that are inputs to the top level AND/OR/NOT
|
||||||
|
* part of a qual tree, and estimate how many nodes will appear
|
||||||
|
* in the CNF'ified or DNF'ified equivalent of the expression.
|
||||||
|
*
|
||||||
|
* This is just an approximate calculation; it doesn't deal with NOTs
|
||||||
|
* very well, and of course it cannot detect possible simplifications
|
||||||
|
* from eliminating duplicate subclauses. The idea is just to cheaply
|
||||||
|
* determine whether CNF will be markedly worse than DNF or vice versa.
|
||||||
|
*
|
||||||
|
* The counts/estimates are represented as doubles to avoid risk of overflow.
|
||||||
*/
|
*/
|
||||||
static int
|
static void
|
||||||
count_bool_nodes(Expr *qual)
|
count_bool_nodes(Expr *qual,
|
||||||
|
double *nodes,
|
||||||
|
double *cnfnodes,
|
||||||
|
double *dnfnodes)
|
||||||
{
|
{
|
||||||
if (qual == NULL)
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
if (and_clause((Node *) qual) ||
|
|
||||||
or_clause((Node *) qual))
|
|
||||||
{
|
|
||||||
int sum = 1; /* for the and/or itself */
|
|
||||||
List *temp;
|
List *temp;
|
||||||
|
double subnodes, subcnfnodes, subdnfnodes;
|
||||||
|
|
||||||
|
if (and_clause((Node *) qual))
|
||||||
|
{
|
||||||
|
*nodes = *cnfnodes = 0.0;
|
||||||
|
*dnfnodes = 1.0; /* DNF nodes will be product of sub-counts */
|
||||||
|
|
||||||
foreach(temp, qual->args)
|
foreach(temp, qual->args)
|
||||||
sum += count_bool_nodes(lfirst(temp));
|
{
|
||||||
|
count_bool_nodes(lfirst(temp),
|
||||||
|
&subnodes, &subcnfnodes, &subdnfnodes);
|
||||||
|
*nodes += subnodes;
|
||||||
|
*cnfnodes += subcnfnodes;
|
||||||
|
*dnfnodes *= subdnfnodes;
|
||||||
|
}
|
||||||
|
/* we could get dnfnodes < cnfnodes here, if all the sub-nodes are
|
||||||
|
* simple ones with count 1. Make sure dnfnodes isn't too small.
|
||||||
|
*/
|
||||||
|
if (*dnfnodes < *cnfnodes)
|
||||||
|
*dnfnodes = *cnfnodes;
|
||||||
|
}
|
||||||
|
else if (or_clause((Node *) qual))
|
||||||
|
{
|
||||||
|
*nodes = *dnfnodes = 0.0;
|
||||||
|
*cnfnodes = 1.0; /* CNF nodes will be product of sub-counts */
|
||||||
|
|
||||||
return sum;
|
foreach(temp, qual->args)
|
||||||
|
{
|
||||||
|
count_bool_nodes(lfirst(temp),
|
||||||
|
&subnodes, &subcnfnodes, &subdnfnodes);
|
||||||
|
*nodes += subnodes;
|
||||||
|
*cnfnodes *= subcnfnodes;
|
||||||
|
*dnfnodes += subdnfnodes;
|
||||||
|
}
|
||||||
|
/* we could get cnfnodes < dnfnodes here, if all the sub-nodes are
|
||||||
|
* simple ones with count 1. Make sure cnfnodes isn't too small.
|
||||||
|
*/
|
||||||
|
if (*cnfnodes < *dnfnodes)
|
||||||
|
*cnfnodes = *dnfnodes;
|
||||||
}
|
}
|
||||||
else if (not_clause((Node *) qual))
|
else if (not_clause((Node *) qual))
|
||||||
return count_bool_nodes(get_notclausearg(qual)) + 1;
|
{
|
||||||
|
count_bool_nodes(get_notclausearg(qual),
|
||||||
|
nodes, cnfnodes, dnfnodes);
|
||||||
|
}
|
||||||
else
|
else
|
||||||
return 1; /* anything else counts 1 for my purposes */
|
{
|
||||||
|
/* anything else counts 1 for my purposes */
|
||||||
|
*nodes = *cnfnodes = *dnfnodes = 1.0;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user