diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c index 6af480629e..68fb4eda07 100644 --- a/src/backend/optimizer/path/indxpath.c +++ b/src/backend/optimizer/path/indxpath.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/path/indxpath.c,v 1.70 1999/08/21 03:49:00 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/path/indxpath.c,v 1.71 1999/09/13 00:17:19 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -48,6 +48,9 @@ static void match_index_orclauses(RelOptInfo *rel, RelOptInfo *index, int indexk int xclass, List *restrictinfo_list); static List *match_index_orclause(RelOptInfo *rel, RelOptInfo *index, int indexkey, int xclass, List *or_clauses, List *other_matching_indices); +static bool match_or_subclause_to_indexkey(RelOptInfo *rel, RelOptInfo *index, + int indexkey, int xclass, + Expr *clause); static List *group_clauses_by_indexkey(RelOptInfo *rel, RelOptInfo *index, int *indexkeys, Oid *classes, List *restrictinfo_list); static List *group_clauses_by_ikey_for_joins(RelOptInfo *rel, RelOptInfo *index, @@ -327,8 +330,8 @@ match_index_orclause(RelOptInfo *rel, { Expr *clause = lfirst(clist); - if (match_clause_to_indexkey(rel, index, indexkey, xclass, - clause, false)) + if (match_or_subclause_to_indexkey(rel, index, indexkey, xclass, + clause)) { /* OK to add this index to sublist for this subclause */ lfirst(matching_indices) = lcons(index, @@ -341,6 +344,38 @@ match_index_orclause(RelOptInfo *rel, return index_list; } +/* + * See if a subclause of an OR clause matches an index. + * + * We accept the subclause if it is an operator clause that matches the + * index, or if it is an AND clause all of whose members are operators + * that match the index. (XXX Would accepting a single match be useful?) + */ +static bool +match_or_subclause_to_indexkey(RelOptInfo *rel, + RelOptInfo *index, + int indexkey, + int xclass, + Expr *clause) +{ + if (and_clause((Node *) clause)) + { + List *item; + + foreach(item, clause->args) + { + if (! match_clause_to_indexkey(rel, index, indexkey, xclass, + lfirst(item), false)) + return false; + } + return true; + } + else + return match_clause_to_indexkey(rel, index, indexkey, xclass, + clause, false); +} + + /**************************************************************************** * ---- ROUTINES TO CHECK RESTRICTIONS ---- ****************************************************************************/ @@ -559,7 +594,8 @@ group_clauses_by_ikey_for_joins(RelOptInfo *rel, * * Returns true if the clause can be used with this index key. * - * NOTE: returns false if clause is an or_clause; that's handled elsewhere. + * NOTE: returns false if clause is an OR or AND clause; to the extent + * we cope with those at all, it is done by higher-level routines. */ static bool match_clause_to_indexkey(RelOptInfo *rel, diff --git a/src/backend/optimizer/plan/planmain.c b/src/backend/optimizer/plan/planmain.c index 995d2865b0..cab9efc152 100644 --- a/src/backend/optimizer/plan/planmain.c +++ b/src/backend/optimizer/plan/planmain.c @@ -7,7 +7,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planmain.c,v 1.43 1999/08/22 23:56:45 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planmain.c,v 1.44 1999/09/13 00:17:25 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -75,9 +75,9 @@ query_planner(Query *root, if (root->hasSubLinks) qual = (List *) SS_process_sublinks((Node *) qual); - qual = cnfify((Expr *) qual, true); + qual = canonicalize_qual((Expr *) qual, true); #ifdef OPTIMIZER_DEBUG - printf("After cnfify()\n"); + printf("After canonicalize_qual()\n"); pprint(qual); #endif diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index b5c8d7d3de..a22d3ed43a 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -7,7 +7,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planner.c,v 1.66 1999/08/26 05:07:41 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planner.c,v 1.67 1999/09/13 00:17:25 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -324,8 +324,9 @@ union_planner(Query *parse) elog(ERROR, "Sub-SELECT in HAVING clause must use only GROUPed attributes from outer SELECT"); } - /* convert the havingQual to conjunctive normal form (cnf) */ - parse->havingQual = (Node *) cnfify((Expr *) parse->havingQual, true); + /* convert the havingQual to implicit-AND normal form */ + parse->havingQual = (Node *) + canonicalize_qual((Expr *) parse->havingQual, true); /* * Require an aggregate function to appear in each clause of the diff --git a/src/backend/optimizer/prep/prepqual.c b/src/backend/optimizer/prep/prepqual.c index 72e26f3507..41288bd368 100644 --- a/src/backend/optimizer/prep/prepqual.c +++ b/src/backend/optimizer/prep/prepqual.c @@ -7,7 +7,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/prep/prepqual.c,v 1.19 1999/09/12 18:08:17 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/prep/prepqual.c,v 1.20 1999/09/13 00:17:13 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -29,6 +29,9 @@ static Expr *find_ors(Expr *qual); static Expr *or_normalize(List *orlist); static Expr *find_ands(Expr *qual); static Expr *and_normalize(List *andlist); +static Expr *qual_cleanup(Expr *qual); +static List *remove_duplicates(List *list); +static int count_bool_nodes(Expr *qual); /***************************************************************************** * @@ -37,20 +40,124 @@ static Expr *and_normalize(List *andlist); * These routines convert an arbitrary boolean expression into * conjunctive normal form or disjunctive normal form. * - * The result of these routines differs from a "true" CNF/DNF in that - * we do not bother to detect common subexpressions; e.g., ("AND" A A) - * does not get simplified to A. Testing for identical subexpressions - * is a waste of time if the query is written intelligently, and it - * takes an unreasonable amount of time if there are many subexpressions - * (since it's roughly O(N^2) in the number of subexpressions). + * Normalization is only carried out in the top AND/OR/NOT portion + * of the given tree; we do not attempt to normalize boolean expressions + * that may appear as arguments of operators or functions in the tree. * - * Because of that restriction, it would be unwise to apply dnfify() - * to the result of cnfify() or vice versa. Instead apply both to - * the original user-written qual expression. + * Query qualifications (WHERE clauses) are ordinarily transformed into + * CNF, ie, AND-of-ORs form, because then the optimizer can use any one + * of the independent AND clauses as a filtering qualification. However, + * quals that are naturally expressed as OR-of-ANDs can suffer an + * exponential growth in size in this transformation, so we also consider + * converting to DNF (OR-of-ANDs), and we may also leave well enough alone + * if both transforms cause unreasonable growth. The OR-of-ANDs format + * is useful for indexscan implementation, so we prefer that format when + * there is just one relation involved. * + * canonicalize_qual() does "smart" conversion to either CNF or DNF, per + * the above considerations, while cnfify() and dnfify() simply perform + * the demanded transformation. The latter two may become dead code + * eventually. *****************************************************************************/ +/* + * canonicalize_qual + * Convert a qualification to the most useful normalized form. + * + * Returns the modified qualification. + * + * If 'removeAndFlag' is true then it removes explicit AND at the top level, + * producing a list of implicitly-ANDed conditions. Otherwise, a regular + * boolean expression is returned. Since most callers pass 'true', we + * prefer to declare the result as List *, not Expr *. + * + * XXX This code could be much smarter, at the cost of also being slower, + * if we tried to compute selectivities and/or see whether there are + * actually indexes to support an indexscan implementation of a DNF qual. + * We could even try converting the CNF clauses that mention a single + * relation into a single DNF clause to see if that looks cheaper to + * implement. For now, though, we just try to avoid doing anything + * quite as stupid as unconditionally converting to CNF was... + */ +List * +canonicalize_qual(Expr *qual, bool removeAndFlag) +{ + Expr *newqual, + *cnfqual, + *dnfqual; + int qualcnt, + cnfcnt, + dnfcnt; + + if (qual == NULL) + return NIL; + + /* Flatten AND and OR groups throughout the tree. + * This improvement is always worthwhile, so do it unconditionally. + */ + qual = flatten_andors(qual); + /* Push down NOTs. We do this only in the top-level boolean + * expression, without examining arguments of operators/functions. + * Even so, it might not be a win if we are unable to find negators + * for all the operators involved; so we keep the flattened-but-not- + * NOT-pushed qual as the reference point for comparsions. + */ + newqual = find_nots(qual); + /* + * Generate both CNF and DNF forms from newqual. + */ + /* Normalize into conjunctive normal form, and clean up the result. */ + cnfqual = qual_cleanup(find_ors(newqual)); + /* Likewise for DNF */ + dnfqual = qual_cleanup(find_ands(newqual)); + + /* + * Now, choose whether to return qual, cnfqual, or dnfqual. + * + * First heuristic is to forget about either CNF or DNF if it shows + * unreasonable growth compared to the original form of the qual, + * where we define "unreasonable" a tad arbitrarily as 4x more + * operators. + */ + qualcnt = count_bool_nodes(qual); + cnfcnt = count_bool_nodes(cnfqual); + dnfcnt = count_bool_nodes(dnfqual); + if (cnfcnt >= 4 * qualcnt) + cnfqual = NULL; /* mark CNF not usable */ + if (dnfcnt >= 4 * qualcnt) + dnfqual = NULL; /* mark DNF not usable */ + + /* + * Second heuristic is to prefer DNF if only one relation is mentioned + * and it is smaller than the CNF representation. + */ + if (dnfqual && dnfcnt < cnfcnt && NumRelids((Node *) dnfqual) == 1) + cnfqual = NULL; + + /* + * Otherwise, we prefer CNF. + * + * XXX obviously, these rules could be improved upon. + */ + + /* pick preferred survivor */ + if (cnfqual) + newqual = cnfqual; + else if (dnfqual) + newqual = dnfqual; + else + newqual = qual; + + /* Convert to implicit-AND list if requested */ + if (removeAndFlag) + { + newqual = (Expr *) make_ands_implicit(newqual); + } + + return (List *) newqual; +} + /* * cnfify * Convert a qualification to conjunctive normal form by applying @@ -81,6 +188,8 @@ cnfify(Expr *qual, bool removeAndFlag) newqual = find_nots(newqual); /* Normalize into conjunctive normal form. */ newqual = find_ors(newqual); + /* Clean up the result. */ + newqual = qual_cleanup(newqual); if (removeAndFlag) { @@ -118,6 +227,8 @@ dnfify(Expr *qual) newqual = find_nots(newqual); /* Normalize into disjunctive normal form. */ newqual = find_ands(newqual); + /* Clean up the result. */ + newqual = qual_cleanup(newqual); return newqual; } @@ -641,3 +752,102 @@ and_normalize(List *andlist) /* pull_ors is needed in case any sub-and_normalize succeeded */ return make_orclause(pull_ors(orclauses)); } + +/* + * qual_cleanup + * Fix up a qualification by removing duplicate entries (which could be + * created during normalization, if identical subexpressions from different + * parts of the tree are brought together). Also, check for AND and OR + * clauses with only one remaining subexpression, and simplify. + * + * Returns the modified qualification. + */ +static Expr * +qual_cleanup(Expr *qual) +{ + if (qual == NULL) + return NULL; + + if (and_clause((Node *) qual)) + { + List *andlist = NIL; + List *temp; + + foreach(temp, qual->args) + andlist = lappend(andlist, qual_cleanup(lfirst(temp))); + + andlist = remove_duplicates(pull_ands(andlist)); + + if (length(andlist) > 1) + return make_andclause(andlist); + else + return lfirst(andlist); + } + else if (or_clause((Node *) qual)) + { + List *orlist = NIL; + List *temp; + + foreach(temp, qual->args) + orlist = lappend(orlist, qual_cleanup(lfirst(temp))); + + orlist = remove_duplicates(pull_ors(orlist)); + + if (length(orlist) > 1) + return make_orclause(orlist); + else + return lfirst(orlist); + } + else if (not_clause((Node *) qual)) + return make_notclause(qual_cleanup(get_notclausearg(qual))); + else + return qual; +} + +/* + * remove_duplicates + */ +static List * +remove_duplicates(List *list) +{ + List *result = NIL; + List *i; + + if (length(list) <= 1) + return list; + + foreach(i, list) + { + if (! member(lfirst(i), result)) + result = lappend(result, lfirst(i)); + } + return result; +} + +/* + * count_bool_nodes + * Support for heuristics in canonicalize_qual(): count the + * number of nodes in the top level AND/OR/NOT part of a qual tree + */ +static int +count_bool_nodes(Expr *qual) +{ + if (qual == NULL) + return 0; + + if (and_clause((Node *) qual) || + or_clause((Node *) qual)) + { + int sum = 1; /* for the and/or itself */ + List *temp; + + foreach(temp, qual->args) + sum += count_bool_nodes(lfirst(temp)); + + return sum; + } + else if (not_clause((Node *) qual)) + return count_bool_nodes(get_notclausearg(qual)) + 1; + else + return 1; /* anything else counts 1 for my purposes */ +} diff --git a/src/include/optimizer/prep.h b/src/include/optimizer/prep.h index 161419f29d..66da07f51d 100644 --- a/src/include/optimizer/prep.h +++ b/src/include/optimizer/prep.h @@ -6,7 +6,7 @@ * * Copyright (c) 1994, Regents of the University of California * - * $Id: prep.h,v 1.18 1999/09/12 18:08:10 tgl Exp $ + * $Id: prep.h,v 1.19 1999/09/13 00:17:08 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -19,6 +19,7 @@ /* * prototypes for prepqual.c */ +extern List *canonicalize_qual(Expr *qual, bool removeAndFlag); extern List *cnfify(Expr *qual, bool removeAndFlag); extern Expr *dnfify(Expr *qual);