Improve planner's choices about when to use hashing vs sorting for DISTINCT.
The previous coding missed a bet by sometimes picking the "sorted" path from query_planner even though hashing would be preferable. To fix, we have to be willing to make the choice sooner. This contorts things a little bit, but I thought of a factorization that makes it not too awful.
This commit is contained in:
parent
cbe9d6beb4
commit
76b6ee3f38
@ -8,7 +8,7 @@
|
|||||||
*
|
*
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.263 2010/01/02 16:57:47 momjian Exp $
|
* $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.264 2010/02/10 03:38:35 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -71,11 +71,15 @@ static double preprocess_limit(PlannerInfo *root,
|
|||||||
static void preprocess_groupclause(PlannerInfo *root);
|
static void preprocess_groupclause(PlannerInfo *root);
|
||||||
static bool choose_hashed_grouping(PlannerInfo *root,
|
static bool choose_hashed_grouping(PlannerInfo *root,
|
||||||
double tuple_fraction, double limit_tuples,
|
double tuple_fraction, double limit_tuples,
|
||||||
|
double path_rows, int path_width,
|
||||||
Path *cheapest_path, Path *sorted_path,
|
Path *cheapest_path, Path *sorted_path,
|
||||||
double dNumGroups, AggClauseCounts *agg_counts);
|
double dNumGroups, AggClauseCounts *agg_counts);
|
||||||
static bool choose_hashed_distinct(PlannerInfo *root,
|
static bool choose_hashed_distinct(PlannerInfo *root,
|
||||||
Plan *input_plan, List *input_pathkeys,
|
|
||||||
double tuple_fraction, double limit_tuples,
|
double tuple_fraction, double limit_tuples,
|
||||||
|
double path_rows, int path_width,
|
||||||
|
Cost cheapest_startup_cost, Cost cheapest_total_cost,
|
||||||
|
Cost sorted_startup_cost, Cost sorted_total_cost,
|
||||||
|
List *sorted_pathkeys,
|
||||||
double dNumDistinctRows);
|
double dNumDistinctRows);
|
||||||
static List *make_subplanTargetList(PlannerInfo *root, List *tlist,
|
static List *make_subplanTargetList(PlannerInfo *root, List *tlist,
|
||||||
AttrNumber **groupColIdx, bool *need_tlist_eval);
|
AttrNumber **groupColIdx, bool *need_tlist_eval);
|
||||||
@ -855,6 +859,8 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
|
|||||||
Plan *result_plan;
|
Plan *result_plan;
|
||||||
List *current_pathkeys;
|
List *current_pathkeys;
|
||||||
double dNumGroups = 0;
|
double dNumGroups = 0;
|
||||||
|
bool use_hashed_distinct = false;
|
||||||
|
bool tested_hashed_distinct = false;
|
||||||
|
|
||||||
/* Tweak caller-supplied tuple_fraction if have LIMIT/OFFSET */
|
/* Tweak caller-supplied tuple_fraction if have LIMIT/OFFSET */
|
||||||
if (parse->limitCount || parse->limitOffset)
|
if (parse->limitCount || parse->limitOffset)
|
||||||
@ -945,6 +951,8 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
|
|||||||
long numGroups = 0;
|
long numGroups = 0;
|
||||||
AggClauseCounts agg_counts;
|
AggClauseCounts agg_counts;
|
||||||
int numGroupCols;
|
int numGroupCols;
|
||||||
|
double path_rows;
|
||||||
|
int path_width;
|
||||||
bool use_hashed_grouping = false;
|
bool use_hashed_grouping = false;
|
||||||
WindowFuncLists *wflists = NULL;
|
WindowFuncLists *wflists = NULL;
|
||||||
List *activeWindows = NIL;
|
List *activeWindows = NIL;
|
||||||
@ -1088,51 +1096,62 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
|
|||||||
&cheapest_path, &sorted_path, &dNumGroups);
|
&cheapest_path, &sorted_path, &dNumGroups);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If grouping, decide whether to use sorted or hashed grouping.
|
* Extract rowcount and width estimates for possible use in grouping
|
||||||
|
* decisions. Beware here of the possibility that
|
||||||
|
* cheapest_path->parent is NULL (ie, there is no FROM clause).
|
||||||
*/
|
*/
|
||||||
|
if (cheapest_path->parent)
|
||||||
|
{
|
||||||
|
path_rows = cheapest_path->parent->rows;
|
||||||
|
path_width = cheapest_path->parent->width;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
path_rows = 1; /* assume non-set result */
|
||||||
|
path_width = 100; /* arbitrary */
|
||||||
|
}
|
||||||
|
|
||||||
if (parse->groupClause)
|
if (parse->groupClause)
|
||||||
{
|
{
|
||||||
bool can_hash;
|
|
||||||
bool can_sort;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Executor doesn't support hashed aggregation with DISTINCT or
|
* If grouping, decide whether to use sorted or hashed grouping.
|
||||||
* ORDER BY aggregates. (Doing so would imply storing *all* the
|
|
||||||
* input values in the hash table, and/or running many sorts in
|
|
||||||
* parallel, either of which seems like a certain loser.)
|
|
||||||
*/
|
*/
|
||||||
can_hash = (agg_counts.numOrderedAggs == 0 &&
|
|
||||||
grouping_is_hashable(parse->groupClause));
|
|
||||||
can_sort = grouping_is_sortable(parse->groupClause);
|
|
||||||
if (can_hash && can_sort)
|
|
||||||
{
|
|
||||||
/* we have a meaningful choice to make ... */
|
|
||||||
use_hashed_grouping =
|
use_hashed_grouping =
|
||||||
choose_hashed_grouping(root,
|
choose_hashed_grouping(root,
|
||||||
tuple_fraction, limit_tuples,
|
tuple_fraction, limit_tuples,
|
||||||
|
path_rows, path_width,
|
||||||
cheapest_path, sorted_path,
|
cheapest_path, sorted_path,
|
||||||
dNumGroups, &agg_counts);
|
dNumGroups, &agg_counts);
|
||||||
}
|
|
||||||
else if (can_hash)
|
|
||||||
use_hashed_grouping = true;
|
|
||||||
else if (can_sort)
|
|
||||||
use_hashed_grouping = false;
|
|
||||||
else
|
|
||||||
ereport(ERROR,
|
|
||||||
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
|
||||||
errmsg("could not implement GROUP BY"),
|
|
||||||
errdetail("Some of the datatypes only support hashing, while others only support sorting.")));
|
|
||||||
|
|
||||||
/* Also convert # groups to long int --- but 'ware overflow! */
|
/* Also convert # groups to long int --- but 'ware overflow! */
|
||||||
numGroups = (long) Min(dNumGroups, (double) LONG_MAX);
|
numGroups = (long) Min(dNumGroups, (double) LONG_MAX);
|
||||||
}
|
}
|
||||||
|
else if (parse->distinctClause && sorted_path &&
|
||||||
|
!root->hasHavingQual && !parse->hasAggs && !activeWindows)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* We'll reach the DISTINCT stage without any intermediate
|
||||||
|
* processing, so figure out whether we will want to hash or not
|
||||||
|
* so we can choose whether to use cheapest or sorted path.
|
||||||
|
*/
|
||||||
|
use_hashed_distinct =
|
||||||
|
choose_hashed_distinct(root,
|
||||||
|
tuple_fraction, limit_tuples,
|
||||||
|
path_rows, path_width,
|
||||||
|
cheapest_path->startup_cost,
|
||||||
|
cheapest_path->total_cost,
|
||||||
|
sorted_path->startup_cost,
|
||||||
|
sorted_path->total_cost,
|
||||||
|
sorted_path->pathkeys,
|
||||||
|
dNumGroups);
|
||||||
|
tested_hashed_distinct = true;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Select the best path. If we are doing hashed grouping, we will
|
* Select the best path. If we are doing hashed grouping, we will
|
||||||
* always read all the input tuples, so use the cheapest-total path.
|
* always read all the input tuples, so use the cheapest-total path.
|
||||||
* Otherwise, trust query_planner's decision about which to use.
|
* Otherwise, trust query_planner's decision about which to use.
|
||||||
*/
|
*/
|
||||||
if (use_hashed_grouping || !sorted_path)
|
if (use_hashed_grouping || use_hashed_distinct || !sorted_path)
|
||||||
best_path = cheapest_path;
|
best_path = cheapest_path;
|
||||||
else
|
else
|
||||||
best_path = sorted_path;
|
best_path = sorted_path;
|
||||||
@ -1506,9 +1525,6 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
|
|||||||
{
|
{
|
||||||
double dNumDistinctRows;
|
double dNumDistinctRows;
|
||||||
long numDistinctRows;
|
long numDistinctRows;
|
||||||
bool use_hashed_distinct;
|
|
||||||
bool can_sort;
|
|
||||||
bool can_hash;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If there was grouping or aggregation, use the current number of
|
* If there was grouping or aggregation, use the current number of
|
||||||
@ -1524,38 +1540,26 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
|
|||||||
/* Also convert to long int --- but 'ware overflow! */
|
/* Also convert to long int --- but 'ware overflow! */
|
||||||
numDistinctRows = (long) Min(dNumDistinctRows, (double) LONG_MAX);
|
numDistinctRows = (long) Min(dNumDistinctRows, (double) LONG_MAX);
|
||||||
|
|
||||||
|
/* Choose implementation method if we didn't already */
|
||||||
|
if (!tested_hashed_distinct)
|
||||||
|
{
|
||||||
/*
|
/*
|
||||||
* If we have a sortable DISTINCT ON clause, we always use sorting.
|
* At this point, either hashed or sorted grouping will have to
|
||||||
* This enforces the expected behavior of DISTINCT ON.
|
* work from result_plan, so we pass that as both "cheapest" and
|
||||||
|
* "sorted".
|
||||||
*/
|
*/
|
||||||
can_sort = grouping_is_sortable(parse->distinctClause);
|
|
||||||
if (can_sort && parse->hasDistinctOn)
|
|
||||||
use_hashed_distinct = false;
|
|
||||||
else
|
|
||||||
{
|
|
||||||
can_hash = grouping_is_hashable(parse->distinctClause);
|
|
||||||
if (can_hash && can_sort)
|
|
||||||
{
|
|
||||||
/* we have a meaningful choice to make ... */
|
|
||||||
use_hashed_distinct =
|
use_hashed_distinct =
|
||||||
choose_hashed_distinct(root,
|
choose_hashed_distinct(root,
|
||||||
result_plan, current_pathkeys,
|
|
||||||
tuple_fraction, limit_tuples,
|
tuple_fraction, limit_tuples,
|
||||||
|
result_plan->plan_rows,
|
||||||
|
result_plan->plan_width,
|
||||||
|
result_plan->startup_cost,
|
||||||
|
result_plan->total_cost,
|
||||||
|
result_plan->startup_cost,
|
||||||
|
result_plan->total_cost,
|
||||||
|
current_pathkeys,
|
||||||
dNumDistinctRows);
|
dNumDistinctRows);
|
||||||
}
|
}
|
||||||
else if (can_hash)
|
|
||||||
use_hashed_distinct = true;
|
|
||||||
else if (can_sort)
|
|
||||||
use_hashed_distinct = false;
|
|
||||||
else
|
|
||||||
{
|
|
||||||
ereport(ERROR,
|
|
||||||
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
|
||||||
errmsg("could not implement DISTINCT"),
|
|
||||||
errdetail("Some of the datatypes only support hashing, while others only support sorting.")));
|
|
||||||
use_hashed_distinct = false; /* keep compiler quiet */
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (use_hashed_distinct)
|
if (use_hashed_distinct)
|
||||||
{
|
{
|
||||||
@ -2155,23 +2159,49 @@ preprocess_groupclause(PlannerInfo *root)
|
|||||||
/*
|
/*
|
||||||
* choose_hashed_grouping - should we use hashed grouping?
|
* choose_hashed_grouping - should we use hashed grouping?
|
||||||
*
|
*
|
||||||
* Note: this is only applied when both alternatives are actually feasible.
|
* Returns TRUE to select hashing, FALSE to select sorting.
|
||||||
*/
|
*/
|
||||||
static bool
|
static bool
|
||||||
choose_hashed_grouping(PlannerInfo *root,
|
choose_hashed_grouping(PlannerInfo *root,
|
||||||
double tuple_fraction, double limit_tuples,
|
double tuple_fraction, double limit_tuples,
|
||||||
|
double path_rows, int path_width,
|
||||||
Path *cheapest_path, Path *sorted_path,
|
Path *cheapest_path, Path *sorted_path,
|
||||||
double dNumGroups, AggClauseCounts *agg_counts)
|
double dNumGroups, AggClauseCounts *agg_counts)
|
||||||
{
|
{
|
||||||
int numGroupCols = list_length(root->parse->groupClause);
|
Query *parse = root->parse;
|
||||||
double cheapest_path_rows;
|
int numGroupCols = list_length(parse->groupClause);
|
||||||
int cheapest_path_width;
|
bool can_hash;
|
||||||
|
bool can_sort;
|
||||||
Size hashentrysize;
|
Size hashentrysize;
|
||||||
List *target_pathkeys;
|
List *target_pathkeys;
|
||||||
List *current_pathkeys;
|
List *current_pathkeys;
|
||||||
Path hashed_p;
|
Path hashed_p;
|
||||||
Path sorted_p;
|
Path sorted_p;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Executor doesn't support hashed aggregation with DISTINCT or ORDER BY
|
||||||
|
* aggregates. (Doing so would imply storing *all* the input values in
|
||||||
|
* the hash table, and/or running many sorts in parallel, either of which
|
||||||
|
* seems like a certain loser.)
|
||||||
|
*/
|
||||||
|
can_hash = (agg_counts->numOrderedAggs == 0 &&
|
||||||
|
grouping_is_hashable(parse->groupClause));
|
||||||
|
can_sort = grouping_is_sortable(parse->groupClause);
|
||||||
|
|
||||||
|
/* Quick out if only one choice is workable */
|
||||||
|
if (!(can_hash && can_sort))
|
||||||
|
{
|
||||||
|
if (can_hash)
|
||||||
|
return true;
|
||||||
|
else if (can_sort)
|
||||||
|
return false;
|
||||||
|
else
|
||||||
|
ereport(ERROR,
|
||||||
|
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
||||||
|
errmsg("could not implement GROUP BY"),
|
||||||
|
errdetail("Some of the datatypes only support hashing, while others only support sorting.")));
|
||||||
|
}
|
||||||
|
|
||||||
/* Prefer sorting when enable_hashagg is off */
|
/* Prefer sorting when enable_hashagg is off */
|
||||||
if (!enable_hashagg)
|
if (!enable_hashagg)
|
||||||
return false;
|
return false;
|
||||||
@ -2179,23 +2209,10 @@ choose_hashed_grouping(PlannerInfo *root,
|
|||||||
/*
|
/*
|
||||||
* Don't do it if it doesn't look like the hashtable will fit into
|
* Don't do it if it doesn't look like the hashtable will fit into
|
||||||
* work_mem.
|
* work_mem.
|
||||||
*
|
|
||||||
* Beware here of the possibility that cheapest_path->parent is NULL. This
|
|
||||||
* could happen if user does something silly like SELECT 'foo' GROUP BY 1;
|
|
||||||
*/
|
*/
|
||||||
if (cheapest_path->parent)
|
|
||||||
{
|
|
||||||
cheapest_path_rows = cheapest_path->parent->rows;
|
|
||||||
cheapest_path_width = cheapest_path->parent->width;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
cheapest_path_rows = 1; /* assume non-set result */
|
|
||||||
cheapest_path_width = 100; /* arbitrary */
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Estimate per-hash-entry space at tuple width... */
|
/* Estimate per-hash-entry space at tuple width... */
|
||||||
hashentrysize = MAXALIGN(cheapest_path_width) + MAXALIGN(sizeof(MinimalTupleData));
|
hashentrysize = MAXALIGN(path_width) + MAXALIGN(sizeof(MinimalTupleData));
|
||||||
/* plus space for pass-by-ref transition values... */
|
/* plus space for pass-by-ref transition values... */
|
||||||
hashentrysize += agg_counts->transitionSpace;
|
hashentrysize += agg_counts->transitionSpace;
|
||||||
/* plus the per-hash-entry overhead */
|
/* plus the per-hash-entry overhead */
|
||||||
@ -2236,11 +2253,11 @@ choose_hashed_grouping(PlannerInfo *root,
|
|||||||
cost_agg(&hashed_p, root, AGG_HASHED, agg_counts->numAggs,
|
cost_agg(&hashed_p, root, AGG_HASHED, agg_counts->numAggs,
|
||||||
numGroupCols, dNumGroups,
|
numGroupCols, dNumGroups,
|
||||||
cheapest_path->startup_cost, cheapest_path->total_cost,
|
cheapest_path->startup_cost, cheapest_path->total_cost,
|
||||||
cheapest_path_rows);
|
path_rows);
|
||||||
/* Result of hashed agg is always unsorted */
|
/* Result of hashed agg is always unsorted */
|
||||||
if (target_pathkeys)
|
if (target_pathkeys)
|
||||||
cost_sort(&hashed_p, root, target_pathkeys, hashed_p.total_cost,
|
cost_sort(&hashed_p, root, target_pathkeys, hashed_p.total_cost,
|
||||||
dNumGroups, cheapest_path_width, limit_tuples);
|
dNumGroups, path_width, limit_tuples);
|
||||||
|
|
||||||
if (sorted_path)
|
if (sorted_path)
|
||||||
{
|
{
|
||||||
@ -2257,24 +2274,24 @@ choose_hashed_grouping(PlannerInfo *root,
|
|||||||
if (!pathkeys_contained_in(root->group_pathkeys, current_pathkeys))
|
if (!pathkeys_contained_in(root->group_pathkeys, current_pathkeys))
|
||||||
{
|
{
|
||||||
cost_sort(&sorted_p, root, root->group_pathkeys, sorted_p.total_cost,
|
cost_sort(&sorted_p, root, root->group_pathkeys, sorted_p.total_cost,
|
||||||
cheapest_path_rows, cheapest_path_width, -1.0);
|
path_rows, path_width, -1.0);
|
||||||
current_pathkeys = root->group_pathkeys;
|
current_pathkeys = root->group_pathkeys;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (root->parse->hasAggs)
|
if (parse->hasAggs)
|
||||||
cost_agg(&sorted_p, root, AGG_SORTED, agg_counts->numAggs,
|
cost_agg(&sorted_p, root, AGG_SORTED, agg_counts->numAggs,
|
||||||
numGroupCols, dNumGroups,
|
numGroupCols, dNumGroups,
|
||||||
sorted_p.startup_cost, sorted_p.total_cost,
|
sorted_p.startup_cost, sorted_p.total_cost,
|
||||||
cheapest_path_rows);
|
path_rows);
|
||||||
else
|
else
|
||||||
cost_group(&sorted_p, root, numGroupCols, dNumGroups,
|
cost_group(&sorted_p, root, numGroupCols, dNumGroups,
|
||||||
sorted_p.startup_cost, sorted_p.total_cost,
|
sorted_p.startup_cost, sorted_p.total_cost,
|
||||||
cheapest_path_rows);
|
path_rows);
|
||||||
/* The Agg or Group node will preserve ordering */
|
/* The Agg or Group node will preserve ordering */
|
||||||
if (target_pathkeys &&
|
if (target_pathkeys &&
|
||||||
!pathkeys_contained_in(target_pathkeys, current_pathkeys))
|
!pathkeys_contained_in(target_pathkeys, current_pathkeys))
|
||||||
cost_sort(&sorted_p, root, target_pathkeys, sorted_p.total_cost,
|
cost_sort(&sorted_p, root, target_pathkeys, sorted_p.total_cost,
|
||||||
dNumGroups, cheapest_path_width, limit_tuples);
|
dNumGroups, path_width, limit_tuples);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Now make the decision using the top-level tuple fraction. First we
|
* Now make the decision using the top-level tuple fraction. First we
|
||||||
@ -2297,6 +2314,9 @@ choose_hashed_grouping(PlannerInfo *root,
|
|||||||
*
|
*
|
||||||
* This is fairly similar to choose_hashed_grouping, but there are enough
|
* This is fairly similar to choose_hashed_grouping, but there are enough
|
||||||
* differences that it doesn't seem worth trying to unify the two functions.
|
* differences that it doesn't seem worth trying to unify the two functions.
|
||||||
|
* (One difference is that we sometimes apply this after forming a Plan,
|
||||||
|
* so the input alternatives can't be represented as Paths --- instead we
|
||||||
|
* pass in the costs as individual variables.)
|
||||||
*
|
*
|
||||||
* But note that making the two choices independently is a bit bogus in
|
* But note that making the two choices independently is a bit bogus in
|
||||||
* itself. If the two could be combined into a single choice operation
|
* itself. If the two could be combined into a single choice operation
|
||||||
@ -2306,21 +2326,51 @@ choose_hashed_grouping(PlannerInfo *root,
|
|||||||
* extra preference to using a sorting implementation when a common sort key
|
* extra preference to using a sorting implementation when a common sort key
|
||||||
* is available ... and that's not necessarily wrong anyway.
|
* is available ... and that's not necessarily wrong anyway.
|
||||||
*
|
*
|
||||||
* Note: this is only applied when both alternatives are actually feasible.
|
* Returns TRUE to select hashing, FALSE to select sorting.
|
||||||
*/
|
*/
|
||||||
static bool
|
static bool
|
||||||
choose_hashed_distinct(PlannerInfo *root,
|
choose_hashed_distinct(PlannerInfo *root,
|
||||||
Plan *input_plan, List *input_pathkeys,
|
|
||||||
double tuple_fraction, double limit_tuples,
|
double tuple_fraction, double limit_tuples,
|
||||||
|
double path_rows, int path_width,
|
||||||
|
Cost cheapest_startup_cost, Cost cheapest_total_cost,
|
||||||
|
Cost sorted_startup_cost, Cost sorted_total_cost,
|
||||||
|
List *sorted_pathkeys,
|
||||||
double dNumDistinctRows)
|
double dNumDistinctRows)
|
||||||
{
|
{
|
||||||
int numDistinctCols = list_length(root->parse->distinctClause);
|
Query *parse = root->parse;
|
||||||
|
int numDistinctCols = list_length(parse->distinctClause);
|
||||||
|
bool can_sort;
|
||||||
|
bool can_hash;
|
||||||
Size hashentrysize;
|
Size hashentrysize;
|
||||||
List *current_pathkeys;
|
List *current_pathkeys;
|
||||||
List *needed_pathkeys;
|
List *needed_pathkeys;
|
||||||
Path hashed_p;
|
Path hashed_p;
|
||||||
Path sorted_p;
|
Path sorted_p;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If we have a sortable DISTINCT ON clause, we always use sorting.
|
||||||
|
* This enforces the expected behavior of DISTINCT ON.
|
||||||
|
*/
|
||||||
|
can_sort = grouping_is_sortable(parse->distinctClause);
|
||||||
|
if (can_sort && parse->hasDistinctOn)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
can_hash = grouping_is_hashable(parse->distinctClause);
|
||||||
|
|
||||||
|
/* Quick out if only one choice is workable */
|
||||||
|
if (!(can_hash && can_sort))
|
||||||
|
{
|
||||||
|
if (can_hash)
|
||||||
|
return true;
|
||||||
|
else if (can_sort)
|
||||||
|
return false;
|
||||||
|
else
|
||||||
|
ereport(ERROR,
|
||||||
|
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
||||||
|
errmsg("could not implement DISTINCT"),
|
||||||
|
errdetail("Some of the datatypes only support hashing, while others only support sorting.")));
|
||||||
|
}
|
||||||
|
|
||||||
/* Prefer sorting when enable_hashagg is off */
|
/* Prefer sorting when enable_hashagg is off */
|
||||||
if (!enable_hashagg)
|
if (!enable_hashagg)
|
||||||
return false;
|
return false;
|
||||||
@ -2329,7 +2379,7 @@ choose_hashed_distinct(PlannerInfo *root,
|
|||||||
* Don't do it if it doesn't look like the hashtable will fit into
|
* Don't do it if it doesn't look like the hashtable will fit into
|
||||||
* work_mem.
|
* work_mem.
|
||||||
*/
|
*/
|
||||||
hashentrysize = MAXALIGN(input_plan->plan_width) + MAXALIGN(sizeof(MinimalTupleData));
|
hashentrysize = MAXALIGN(path_width) + MAXALIGN(sizeof(MinimalTupleData));
|
||||||
|
|
||||||
if (hashentrysize * dNumDistinctRows > work_mem * 1024L)
|
if (hashentrysize * dNumDistinctRows > work_mem * 1024L)
|
||||||
return false;
|
return false;
|
||||||
@ -2340,8 +2390,8 @@ choose_hashed_distinct(PlannerInfo *root,
|
|||||||
* output won't be sorted may be a loss; so we need to do an actual cost
|
* output won't be sorted may be a loss; so we need to do an actual cost
|
||||||
* comparison.
|
* comparison.
|
||||||
*
|
*
|
||||||
* We need to consider input_plan + hashagg [+ final sort] versus
|
* We need to consider cheapest_path + hashagg [+ final sort] versus
|
||||||
* input_plan [+ sort] + group [+ final sort] where brackets indicate a
|
* sorted_path [+ sort] + group [+ final sort] where brackets indicate a
|
||||||
* step that may not be needed.
|
* step that may not be needed.
|
||||||
*
|
*
|
||||||
* These path variables are dummies that just hold cost fields; we don't
|
* These path variables are dummies that just hold cost fields; we don't
|
||||||
@ -2349,25 +2399,25 @@ choose_hashed_distinct(PlannerInfo *root,
|
|||||||
*/
|
*/
|
||||||
cost_agg(&hashed_p, root, AGG_HASHED, 0,
|
cost_agg(&hashed_p, root, AGG_HASHED, 0,
|
||||||
numDistinctCols, dNumDistinctRows,
|
numDistinctCols, dNumDistinctRows,
|
||||||
input_plan->startup_cost, input_plan->total_cost,
|
cheapest_startup_cost, cheapest_total_cost,
|
||||||
input_plan->plan_rows);
|
path_rows);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Result of hashed agg is always unsorted, so if ORDER BY is present we
|
* Result of hashed agg is always unsorted, so if ORDER BY is present we
|
||||||
* need to charge for the final sort.
|
* need to charge for the final sort.
|
||||||
*/
|
*/
|
||||||
if (root->parse->sortClause)
|
if (parse->sortClause)
|
||||||
cost_sort(&hashed_p, root, root->sort_pathkeys, hashed_p.total_cost,
|
cost_sort(&hashed_p, root, root->sort_pathkeys, hashed_p.total_cost,
|
||||||
dNumDistinctRows, input_plan->plan_width, limit_tuples);
|
dNumDistinctRows, path_width, limit_tuples);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Now for the GROUP case. See comments in grouping_planner about the
|
* Now for the GROUP case. See comments in grouping_planner about the
|
||||||
* sorting choices here --- this code should match that code.
|
* sorting choices here --- this code should match that code.
|
||||||
*/
|
*/
|
||||||
sorted_p.startup_cost = input_plan->startup_cost;
|
sorted_p.startup_cost = sorted_startup_cost;
|
||||||
sorted_p.total_cost = input_plan->total_cost;
|
sorted_p.total_cost = sorted_total_cost;
|
||||||
current_pathkeys = input_pathkeys;
|
current_pathkeys = sorted_pathkeys;
|
||||||
if (root->parse->hasDistinctOn &&
|
if (parse->hasDistinctOn &&
|
||||||
list_length(root->distinct_pathkeys) <
|
list_length(root->distinct_pathkeys) <
|
||||||
list_length(root->sort_pathkeys))
|
list_length(root->sort_pathkeys))
|
||||||
needed_pathkeys = root->sort_pathkeys;
|
needed_pathkeys = root->sort_pathkeys;
|
||||||
@ -2381,15 +2431,15 @@ choose_hashed_distinct(PlannerInfo *root,
|
|||||||
else
|
else
|
||||||
current_pathkeys = root->sort_pathkeys;
|
current_pathkeys = root->sort_pathkeys;
|
||||||
cost_sort(&sorted_p, root, current_pathkeys, sorted_p.total_cost,
|
cost_sort(&sorted_p, root, current_pathkeys, sorted_p.total_cost,
|
||||||
input_plan->plan_rows, input_plan->plan_width, -1.0);
|
path_rows, path_width, -1.0);
|
||||||
}
|
}
|
||||||
cost_group(&sorted_p, root, numDistinctCols, dNumDistinctRows,
|
cost_group(&sorted_p, root, numDistinctCols, dNumDistinctRows,
|
||||||
sorted_p.startup_cost, sorted_p.total_cost,
|
sorted_p.startup_cost, sorted_p.total_cost,
|
||||||
input_plan->plan_rows);
|
path_rows);
|
||||||
if (root->parse->sortClause &&
|
if (parse->sortClause &&
|
||||||
!pathkeys_contained_in(root->sort_pathkeys, current_pathkeys))
|
!pathkeys_contained_in(root->sort_pathkeys, current_pathkeys))
|
||||||
cost_sort(&sorted_p, root, root->sort_pathkeys, sorted_p.total_cost,
|
cost_sort(&sorted_p, root, root->sort_pathkeys, sorted_p.total_cost,
|
||||||
dNumDistinctRows, input_plan->plan_width, limit_tuples);
|
dNumDistinctRows, path_width, limit_tuples);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Now make the decision using the top-level tuple fraction. First we
|
* Now make the decision using the top-level tuple fraction. First we
|
||||||
@ -2407,7 +2457,7 @@ choose_hashed_distinct(PlannerInfo *root,
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*---------------
|
/*
|
||||||
* make_subplanTargetList
|
* make_subplanTargetList
|
||||||
* Generate appropriate target list when grouping is required.
|
* Generate appropriate target list when grouping is required.
|
||||||
*
|
*
|
||||||
@ -2446,7 +2496,6 @@ choose_hashed_distinct(PlannerInfo *root,
|
|||||||
* result tlist.
|
* result tlist.
|
||||||
*
|
*
|
||||||
* The result is the targetlist to be passed to the subplan.
|
* The result is the targetlist to be passed to the subplan.
|
||||||
*---------------
|
|
||||||
*/
|
*/
|
||||||
static List *
|
static List *
|
||||||
make_subplanTargetList(PlannerInfo *root,
|
make_subplanTargetList(PlannerInfo *root,
|
||||||
|
Loading…
x
Reference in New Issue
Block a user