Improve UniquePath logic to detect the case where the input is already
known unique (eg, it is a SELECT DISTINCT ... subquery), and not do a redundant unique-ification step.
This commit is contained in:
parent
cce442da6d
commit
5c74ce23db
@ -8,7 +8,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.226 2004/01/05 05:07:35 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.227 2004/01/05 18:04:38 tgl Exp $
|
||||
*
|
||||
* NOTES
|
||||
* Every node type that can appear in stored rules' parsetrees *must*
|
||||
@ -1023,7 +1023,7 @@ _outUniquePath(StringInfo str, UniquePath *node)
|
||||
_outPathInfo(str, (Path *) node);
|
||||
|
||||
WRITE_NODE_FIELD(subpath);
|
||||
WRITE_BOOL_FIELD(use_hash);
|
||||
WRITE_ENUM_FIELD(umethod, UniquePathMethod);
|
||||
WRITE_FLOAT_FIELD(rows, "%.0f");
|
||||
}
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.162 2004/01/05 05:07:35 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.163 2004/01/05 18:04:38 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -605,10 +605,14 @@ create_unique_plan(Query *root, UniquePath *best_path)
|
||||
subplan->targetlist = newtlist;
|
||||
}
|
||||
|
||||
/* Done if we don't need to do any actual unique-ifying */
|
||||
if (best_path->umethod == UNIQUE_PATH_NOOP)
|
||||
return subplan;
|
||||
|
||||
/* Copy tlist again to make one we can put sorting labels on */
|
||||
my_tlist = copyObject(subplan->targetlist);
|
||||
|
||||
if (best_path->use_hash)
|
||||
if (best_path->umethod == UNIQUE_PATH_HASH)
|
||||
{
|
||||
long numGroups;
|
||||
|
||||
|
@ -8,7 +8,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/optimizer/util/clauses.c,v 1.159 2004/01/04 03:51:52 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/optimizer/util/clauses.c,v 1.160 2004/01/05 18:04:39 tgl Exp $
|
||||
*
|
||||
* HISTORY
|
||||
* AUTHOR DATE MAJOR EVENT
|
||||
@ -921,6 +921,21 @@ has_distinct_on_clause(Query *query)
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* Test whether a query uses simple DISTINCT, ie, has a distinct-list that
|
||||
* is the same as the set of output columns.
|
||||
*/
|
||||
bool
|
||||
has_distinct_clause(Query *query)
|
||||
{
|
||||
/* Is there a DISTINCT clause at all? */
|
||||
if (query->distinctClause == NIL)
|
||||
return false;
|
||||
|
||||
/* It's DISTINCT if it's not DISTINCT ON */
|
||||
return !has_distinct_on_clause(query);
|
||||
}
|
||||
|
||||
|
||||
/*****************************************************************************
|
||||
* *
|
||||
|
@ -8,7 +8,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.97 2004/01/05 05:07:35 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.98 2004/01/05 18:04:39 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -20,12 +20,14 @@
|
||||
#include "executor/executor.h"
|
||||
#include "miscadmin.h"
|
||||
#include "nodes/plannodes.h"
|
||||
#include "optimizer/clauses.h"
|
||||
#include "optimizer/cost.h"
|
||||
#include "optimizer/pathnode.h"
|
||||
#include "optimizer/paths.h"
|
||||
#include "optimizer/restrictinfo.h"
|
||||
#include "parser/parse_expr.h"
|
||||
#include "parser/parse_oper.h"
|
||||
#include "parser/parsetree.h"
|
||||
#include "utils/memutils.h"
|
||||
#include "utils/selfuncs.h"
|
||||
#include "utils/syscache.h"
|
||||
@ -546,6 +548,30 @@ create_unique_path(Query *root, RelOptInfo *rel, Path *subpath)
|
||||
|
||||
pathnode->subpath = subpath;
|
||||
|
||||
/*
|
||||
* If the input is a subquery that uses DISTINCT, we don't need to do
|
||||
* anything; its output is already unique. (Are there any other cases
|
||||
* in which we can easily prove the input must be distinct?)
|
||||
*/
|
||||
if (rel->rtekind == RTE_SUBQUERY)
|
||||
{
|
||||
RangeTblEntry *rte = rt_fetch(rel->relid, root->rtable);
|
||||
Query *subquery = rte->subquery;
|
||||
|
||||
if (has_distinct_clause(subquery))
|
||||
{
|
||||
pathnode->umethod = UNIQUE_PATH_NOOP;
|
||||
pathnode->rows = rel->rows;
|
||||
pathnode->path.startup_cost = subpath->startup_cost;
|
||||
pathnode->path.total_cost = subpath->total_cost;
|
||||
pathnode->path.pathkeys = subpath->pathkeys;
|
||||
|
||||
rel->cheapest_unique_path = (Path *) pathnode;
|
||||
|
||||
return pathnode;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Try to identify the targetlist that will actually be unique-ified.
|
||||
* In current usage, this routine is only used for sub-selects of IN
|
||||
@ -599,7 +625,7 @@ create_unique_path(Query *root, RelOptInfo *rel, Path *subpath)
|
||||
* compare costs. We only try this if we know the targetlist for sure
|
||||
* (else we can't be sure about the datatypes involved).
|
||||
*/
|
||||
pathnode->use_hash = false;
|
||||
pathnode->umethod = UNIQUE_PATH_SORT;
|
||||
if (enable_hashagg && sub_targetlist && hash_safe_tlist(sub_targetlist))
|
||||
{
|
||||
/*
|
||||
@ -617,11 +643,11 @@ create_unique_path(Query *root, RelOptInfo *rel, Path *subpath)
|
||||
subpath->total_cost,
|
||||
rel->rows);
|
||||
if (agg_path.total_cost < sort_path.total_cost)
|
||||
pathnode->use_hash = true;
|
||||
pathnode->umethod = UNIQUE_PATH_HASH;
|
||||
}
|
||||
}
|
||||
|
||||
if (pathnode->use_hash)
|
||||
if (pathnode->umethod == UNIQUE_PATH_HASH)
|
||||
{
|
||||
pathnode->path.startup_cost = agg_path.startup_cost;
|
||||
pathnode->path.total_cost = agg_path.total_cost;
|
||||
|
@ -7,7 +7,7 @@
|
||||
* Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.91 2004/01/05 05:07:36 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.92 2004/01/05 18:04:39 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -442,15 +442,26 @@ typedef struct MaterialPath
|
||||
* its subpath.
|
||||
*
|
||||
* This is unlike the other Path nodes in that it can actually generate
|
||||
* two different plans: either hash-based or sort-based implementation.
|
||||
* The decision is sufficiently localized that it's not worth having two
|
||||
* separate Path node types.
|
||||
* different plans: either hash-based or sort-based implementation, or a
|
||||
* no-op if the input path can be proven distinct already. The decision
|
||||
* is sufficiently localized that it's not worth having separate Path node
|
||||
* types. (Note: in the no-op case, we could eliminate the UniquePath node
|
||||
* entirely and just return the subpath; but it's convenient to have a
|
||||
* UniquePath in the path tree to signal upper-level routines that the input
|
||||
* is known distinct.)
|
||||
*/
|
||||
typedef enum
|
||||
{
|
||||
UNIQUE_PATH_NOOP, /* input is known unique already */
|
||||
UNIQUE_PATH_HASH, /* use hashing */
|
||||
UNIQUE_PATH_SORT /* use sorting */
|
||||
} UniquePathMethod;
|
||||
|
||||
typedef struct UniquePath
|
||||
{
|
||||
Path path;
|
||||
Path *subpath;
|
||||
bool use_hash;
|
||||
UniquePathMethod umethod;
|
||||
double rows; /* estimated number of result tuples */
|
||||
} UniquePath;
|
||||
|
||||
|
@ -7,7 +7,7 @@
|
||||
* Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* $PostgreSQL: pgsql/src/include/optimizer/clauses.h,v 1.71 2004/01/04 03:51:52 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/include/optimizer/clauses.h,v 1.72 2004/01/05 18:04:39 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -57,6 +57,7 @@ extern bool is_pseudo_constant_clause(Node *clause);
|
||||
extern bool is_pseudo_constant_clause_relids(Node *clause, Relids relids);
|
||||
extern List *pull_constant_clauses(List *quals, List **constantQual);
|
||||
|
||||
extern bool has_distinct_clause(Query *query);
|
||||
extern bool has_distinct_on_clause(Query *query);
|
||||
|
||||
extern int NumRelids(Node *clause);
|
||||
|
Loading…
x
Reference in New Issue
Block a user