Have the planner consider Incremental Sort for DISTINCT
Prior to this, we only considered a full sort on the cheapest input path and uniquifying any path which was already sorted in the required sort order. Here we adjust create_final_distinct_paths() so that it also adds an Incremental Sort path on any path which has presorted keys. Additionally, this adjusts the parallel distinct code so that we now consider sorting the cheapest partial path and incrementally sorting any partial paths with presorted keys. Previously we didn't consider any sorting for parallel distinct and only added a unique path atop any path which had the required pathkeys already. Author: David Rowley Reviewed-by: Richard Guo Discussion: https://postgr.es/m/CAApHDvo8Lz2H=42urBbfP65LTcEUOh288MT7DsG2_EWtW1AXHQ@mail.gmail.com
This commit is contained in:
parent
e5b8a4c098
commit
3c6fc58209
@ -4654,22 +4654,63 @@ create_partial_distinct_paths(PlannerInfo *root, RelOptInfo *input_rel,
|
|||||||
cheapest_partial_path->rows,
|
cheapest_partial_path->rows,
|
||||||
NULL, NULL);
|
NULL, NULL);
|
||||||
|
|
||||||
/* first try adding unique paths atop of sorted paths */
|
/*
|
||||||
|
* Try sorting the cheapest path and incrementally sorting any paths with
|
||||||
|
* presorted keys and put a unique paths atop of those.
|
||||||
|
*/
|
||||||
if (grouping_is_sortable(parse->distinctClause))
|
if (grouping_is_sortable(parse->distinctClause))
|
||||||
{
|
{
|
||||||
foreach(lc, input_rel->partial_pathlist)
|
foreach(lc, input_rel->partial_pathlist)
|
||||||
{
|
{
|
||||||
Path *path = (Path *) lfirst(lc);
|
Path *input_path = (Path *) lfirst(lc);
|
||||||
|
Path *sorted_path;
|
||||||
|
bool is_sorted;
|
||||||
|
int presorted_keys;
|
||||||
|
|
||||||
if (pathkeys_contained_in(root->distinct_pathkeys, path->pathkeys))
|
is_sorted = pathkeys_count_contained_in(root->distinct_pathkeys,
|
||||||
|
input_path->pathkeys,
|
||||||
|
&presorted_keys);
|
||||||
|
|
||||||
|
if (is_sorted)
|
||||||
|
sorted_path = input_path;
|
||||||
|
else
|
||||||
{
|
{
|
||||||
add_partial_path(partial_distinct_rel, (Path *)
|
/*
|
||||||
create_upper_unique_path(root,
|
* Try at least sorting the cheapest path and also try
|
||||||
partial_distinct_rel,
|
* incrementally sorting any path which is partially sorted
|
||||||
path,
|
* already (no need to deal with paths which have presorted
|
||||||
list_length(root->distinct_pathkeys),
|
* keys when incremental sort is disabled unless it's the
|
||||||
numDistinctRows));
|
* cheapest partial path).
|
||||||
|
*/
|
||||||
|
if (input_path != cheapest_partial_path &&
|
||||||
|
(presorted_keys == 0 || !enable_incremental_sort))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We've no need to consider both a sort and incremental sort.
|
||||||
|
* We'll just do a sort if there are no presorted keys and an
|
||||||
|
* incremental sort when there are presorted keys.
|
||||||
|
*/
|
||||||
|
if (presorted_keys == 0 || !enable_incremental_sort)
|
||||||
|
sorted_path = (Path *) create_sort_path(root,
|
||||||
|
partial_distinct_rel,
|
||||||
|
input_path,
|
||||||
|
root->distinct_pathkeys,
|
||||||
|
-1.0);
|
||||||
|
else
|
||||||
|
sorted_path = (Path *) create_incremental_sort_path(root,
|
||||||
|
partial_distinct_rel,
|
||||||
|
input_path,
|
||||||
|
root->distinct_pathkeys,
|
||||||
|
presorted_keys,
|
||||||
|
-1.0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
add_partial_path(partial_distinct_rel, (Path *)
|
||||||
|
create_upper_unique_path(root, partial_distinct_rel,
|
||||||
|
sorted_path,
|
||||||
|
list_length(root->distinct_pathkeys),
|
||||||
|
numDistinctRows));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -4773,9 +4814,11 @@ create_final_distinct_paths(PlannerInfo *root, RelOptInfo *input_rel,
|
|||||||
if (grouping_is_sortable(parse->distinctClause))
|
if (grouping_is_sortable(parse->distinctClause))
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* First, if we have any adequately-presorted paths, just stick a
|
* Firstly, if we have any adequately-presorted paths, just stick a
|
||||||
* Unique node on those. Then consider doing an explicit sort of the
|
* Unique node on those. We also, consider doing an explicit sort of
|
||||||
* cheapest input path and Unique'ing that.
|
* the cheapest input path and Unique'ing that. If any paths have
|
||||||
|
* presorted keys then we'll create an incremental sort atop of those
|
||||||
|
* before adding a unique node on the top.
|
||||||
*
|
*
|
||||||
* When we have DISTINCT ON, we must sort by the more rigorous of
|
* When we have DISTINCT ON, we must sort by the more rigorous of
|
||||||
* DISTINCT and ORDER BY, else it won't have the desired behavior.
|
* DISTINCT and ORDER BY, else it won't have the desired behavior.
|
||||||
@ -4785,8 +4828,8 @@ create_final_distinct_paths(PlannerInfo *root, RelOptInfo *input_rel,
|
|||||||
* the other.)
|
* the other.)
|
||||||
*/
|
*/
|
||||||
List *needed_pathkeys;
|
List *needed_pathkeys;
|
||||||
Path *path;
|
|
||||||
ListCell *lc;
|
ListCell *lc;
|
||||||
|
double limittuples = root->distinct_pathkeys == NIL ? 1.0 : -1.0;
|
||||||
|
|
||||||
if (parse->hasDistinctOn &&
|
if (parse->hasDistinctOn &&
|
||||||
list_length(root->distinct_pathkeys) <
|
list_length(root->distinct_pathkeys) <
|
||||||
@ -4797,96 +4840,89 @@ create_final_distinct_paths(PlannerInfo *root, RelOptInfo *input_rel,
|
|||||||
|
|
||||||
foreach(lc, input_rel->pathlist)
|
foreach(lc, input_rel->pathlist)
|
||||||
{
|
{
|
||||||
path = (Path *) lfirst(lc);
|
Path *input_path = (Path *) lfirst(lc);
|
||||||
|
Path *sorted_path;
|
||||||
|
bool is_sorted;
|
||||||
|
int presorted_keys;
|
||||||
|
|
||||||
if (pathkeys_contained_in(needed_pathkeys, path->pathkeys))
|
is_sorted = pathkeys_count_contained_in(needed_pathkeys,
|
||||||
|
input_path->pathkeys,
|
||||||
|
&presorted_keys);
|
||||||
|
|
||||||
|
if (is_sorted)
|
||||||
|
sorted_path = input_path;
|
||||||
|
else
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* distinct_pathkeys may have become empty if all of the
|
* Try at least sorting the cheapest path and also try
|
||||||
* pathkeys were determined to be redundant. If all of the
|
* incrementally sorting any path which is partially sorted
|
||||||
* pathkeys are redundant then each DISTINCT target must only
|
* already (no need to deal with paths which have presorted
|
||||||
* allow a single value, therefore all resulting tuples must
|
* keys when incremental sort is disabled unless it's the
|
||||||
* be identical (or at least indistinguishable by an equality
|
* cheapest input path).
|
||||||
* check). We can uniquify these tuples simply by just taking
|
|
||||||
* the first tuple. All we do here is add a path to do "LIMIT
|
|
||||||
* 1" atop of 'path'. When doing a DISTINCT ON we may still
|
|
||||||
* have a non-NIL sort_pathkeys list, so we must still only do
|
|
||||||
* this with paths which are correctly sorted by
|
|
||||||
* sort_pathkeys.
|
|
||||||
*/
|
*/
|
||||||
if (root->distinct_pathkeys == NIL)
|
if (input_path != cheapest_input_path &&
|
||||||
{
|
(presorted_keys == 0 || !enable_incremental_sort))
|
||||||
Node *limitCount;
|
continue;
|
||||||
|
|
||||||
limitCount = (Node *) makeConst(INT8OID, -1, InvalidOid,
|
/*
|
||||||
sizeof(int64),
|
* We've no need to consider both a sort and incremental sort.
|
||||||
Int64GetDatum(1), false,
|
* We'll just do a sort if there are no presorted keys and an
|
||||||
FLOAT8PASSBYVAL);
|
* incremental sort when there are presorted keys.
|
||||||
|
*/
|
||||||
/*
|
if (presorted_keys == 0 || !enable_incremental_sort)
|
||||||
* If the query already has a LIMIT clause, then we could
|
sorted_path = (Path *) create_sort_path(root,
|
||||||
* end up with a duplicate LimitPath in the final plan.
|
distinct_rel,
|
||||||
* That does not seem worth troubling over too much.
|
input_path,
|
||||||
*/
|
needed_pathkeys,
|
||||||
add_path(distinct_rel, (Path *)
|
limittuples);
|
||||||
create_limit_path(root, distinct_rel, path, NULL,
|
|
||||||
limitCount, LIMIT_OPTION_COUNT,
|
|
||||||
0, 1));
|
|
||||||
}
|
|
||||||
else
|
else
|
||||||
{
|
sorted_path = (Path *) create_incremental_sort_path(root,
|
||||||
add_path(distinct_rel, (Path *)
|
distinct_rel,
|
||||||
create_upper_unique_path(root, distinct_rel,
|
input_path,
|
||||||
path,
|
needed_pathkeys,
|
||||||
list_length(root->distinct_pathkeys),
|
presorted_keys,
|
||||||
numDistinctRows));
|
limittuples);
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
/* For explicit-sort case, always use the more rigorous clause */
|
/*
|
||||||
if (list_length(root->distinct_pathkeys) <
|
* distinct_pathkeys may have become empty if all of the pathkeys
|
||||||
list_length(root->sort_pathkeys))
|
* were determined to be redundant. If all of the pathkeys are
|
||||||
{
|
* redundant then each DISTINCT target must only allow a single
|
||||||
needed_pathkeys = root->sort_pathkeys;
|
* value, therefore all resulting tuples must be identical (or at
|
||||||
/* Assert checks that parser didn't mess up... */
|
* least indistinguishable by an equality check). We can uniquify
|
||||||
Assert(pathkeys_contained_in(root->distinct_pathkeys,
|
* these tuples simply by just taking the first tuple. All we do
|
||||||
needed_pathkeys));
|
* here is add a path to do "LIMIT 1" atop of 'sorted_path'. When
|
||||||
}
|
* doing a DISTINCT ON we may still have a non-NIL sort_pathkeys
|
||||||
else
|
* list, so we must still only do this with paths which are
|
||||||
needed_pathkeys = root->distinct_pathkeys;
|
* correctly sorted by sort_pathkeys.
|
||||||
|
*/
|
||||||
|
if (root->distinct_pathkeys == NIL)
|
||||||
|
{
|
||||||
|
Node *limitCount;
|
||||||
|
|
||||||
path = cheapest_input_path;
|
limitCount = (Node *) makeConst(INT8OID, -1, InvalidOid,
|
||||||
if (!pathkeys_contained_in(needed_pathkeys, path->pathkeys))
|
sizeof(int64),
|
||||||
path = (Path *) create_sort_path(root, distinct_rel,
|
Int64GetDatum(1), false,
|
||||||
path,
|
FLOAT8PASSBYVAL);
|
||||||
needed_pathkeys,
|
|
||||||
root->distinct_pathkeys == NIL ?
|
|
||||||
1.0 : -1.0);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* As above, use a LimitPath instead of a UniquePath when all of the
|
* If the query already has a LIMIT clause, then we could end
|
||||||
* distinct_pathkeys are redundant and we're only going to get a
|
* up with a duplicate LimitPath in the final plan. That does
|
||||||
* series of tuples all with the same values anyway.
|
* not seem worth troubling over too much.
|
||||||
*/
|
*/
|
||||||
if (root->distinct_pathkeys == NIL)
|
add_path(distinct_rel, (Path *)
|
||||||
{
|
create_limit_path(root, distinct_rel, sorted_path,
|
||||||
Node *limitCount = (Node *) makeConst(INT8OID, -1, InvalidOid,
|
NULL, limitCount,
|
||||||
sizeof(int64),
|
LIMIT_OPTION_COUNT, 0, 1));
|
||||||
Int64GetDatum(1), false,
|
}
|
||||||
FLOAT8PASSBYVAL);
|
else
|
||||||
|
{
|
||||||
add_path(distinct_rel, (Path *)
|
add_path(distinct_rel, (Path *)
|
||||||
create_limit_path(root, distinct_rel, path, NULL,
|
create_upper_unique_path(root, distinct_rel,
|
||||||
limitCount, LIMIT_OPTION_COUNT, 0, 1));
|
sorted_path,
|
||||||
}
|
list_length(root->distinct_pathkeys),
|
||||||
else
|
numDistinctRows));
|
||||||
{
|
}
|
||||||
add_path(distinct_rel, (Path *)
|
|
||||||
create_upper_unique_path(root, distinct_rel,
|
|
||||||
path,
|
|
||||||
list_length(root->distinct_pathkeys),
|
|
||||||
numDistinctRows));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1484,15 +1484,16 @@ explain (costs off) select * from t union select * from t order by 1,3;
|
|||||||
-- Full sort, not just incremental sort can be pushed below a gather merge path
|
-- Full sort, not just incremental sort can be pushed below a gather merge path
|
||||||
-- by generate_useful_gather_paths.
|
-- by generate_useful_gather_paths.
|
||||||
explain (costs off) select distinct a,b from t;
|
explain (costs off) select distinct a,b from t;
|
||||||
QUERY PLAN
|
QUERY PLAN
|
||||||
------------------------------------------
|
------------------------------------------------
|
||||||
Unique
|
Unique
|
||||||
-> Gather Merge
|
-> Gather Merge
|
||||||
Workers Planned: 2
|
Workers Planned: 2
|
||||||
-> Sort
|
-> Unique
|
||||||
Sort Key: a, b
|
-> Sort
|
||||||
-> Parallel Seq Scan on t
|
Sort Key: a, b
|
||||||
(6 rows)
|
-> Parallel Seq Scan on t
|
||||||
|
(7 rows)
|
||||||
|
|
||||||
drop table t;
|
drop table t;
|
||||||
-- Sort pushdown can't go below where expressions are part of the rel target.
|
-- Sort pushdown can't go below where expressions are part of the rel target.
|
||||||
|
@ -171,6 +171,20 @@ SELECT DISTINCT g%1000 FROM generate_series(0,9999) g;
|
|||||||
SET jit_above_cost TO DEFAULT;
|
SET jit_above_cost TO DEFAULT;
|
||||||
CREATE TABLE distinct_group_2 AS
|
CREATE TABLE distinct_group_2 AS
|
||||||
SELECT DISTINCT (g%1000)::text FROM generate_series(0,9999) g;
|
SELECT DISTINCT (g%1000)::text FROM generate_series(0,9999) g;
|
||||||
|
SET enable_seqscan = 0;
|
||||||
|
-- Check to see we get an incremental sort plan
|
||||||
|
EXPLAIN (costs off)
|
||||||
|
SELECT DISTINCT hundred, two FROM tenk1;
|
||||||
|
QUERY PLAN
|
||||||
|
-----------------------------------------------------
|
||||||
|
Unique
|
||||||
|
-> Incremental Sort
|
||||||
|
Sort Key: hundred, two
|
||||||
|
Presorted Key: hundred
|
||||||
|
-> Index Scan using tenk1_hundred on tenk1
|
||||||
|
(5 rows)
|
||||||
|
|
||||||
|
RESET enable_seqscan;
|
||||||
SET enable_hashagg=TRUE;
|
SET enable_hashagg=TRUE;
|
||||||
-- Produce results with hash aggregation.
|
-- Produce results with hash aggregation.
|
||||||
SET enable_sort=FALSE;
|
SET enable_sort=FALSE;
|
||||||
@ -265,15 +279,16 @@ $$ LANGUAGE plpgsql PARALLEL SAFE;
|
|||||||
-- Ensure we do parallel distinct now that the function is parallel safe
|
-- Ensure we do parallel distinct now that the function is parallel safe
|
||||||
EXPLAIN (COSTS OFF)
|
EXPLAIN (COSTS OFF)
|
||||||
SELECT DISTINCT distinct_func(1) FROM tenk1;
|
SELECT DISTINCT distinct_func(1) FROM tenk1;
|
||||||
QUERY PLAN
|
QUERY PLAN
|
||||||
----------------------------------------------
|
----------------------------------------------------
|
||||||
Unique
|
Unique
|
||||||
-> Sort
|
-> Gather Merge
|
||||||
Sort Key: (distinct_func(1))
|
Workers Planned: 2
|
||||||
-> Gather
|
-> Unique
|
||||||
Workers Planned: 2
|
-> Sort
|
||||||
-> Parallel Seq Scan on tenk1
|
Sort Key: (distinct_func(1))
|
||||||
(6 rows)
|
-> Parallel Seq Scan on tenk1
|
||||||
|
(7 rows)
|
||||||
|
|
||||||
RESET max_parallel_workers_per_gather;
|
RESET max_parallel_workers_per_gather;
|
||||||
RESET min_parallel_table_scan_size;
|
RESET min_parallel_table_scan_size;
|
||||||
|
@ -3944,8 +3944,9 @@ ORDER BY depname, enroll_date;
|
|||||||
QUERY PLAN
|
QUERY PLAN
|
||||||
-----------------------------------------------------------------------------------------------
|
-----------------------------------------------------------------------------------------------
|
||||||
Unique
|
Unique
|
||||||
-> Sort
|
-> Incremental Sort
|
||||||
Sort Key: depname, enroll_date, empno, (sum(salary) OVER (?)), (min(salary) OVER (?))
|
Sort Key: depname, enroll_date, empno, (sum(salary) OVER (?)), (min(salary) OVER (?))
|
||||||
|
Presorted Key: depname, enroll_date
|
||||||
-> WindowAgg
|
-> WindowAgg
|
||||||
-> Incremental Sort
|
-> Incremental Sort
|
||||||
Sort Key: depname, enroll_date
|
Sort Key: depname, enroll_date
|
||||||
@ -3954,7 +3955,7 @@ ORDER BY depname, enroll_date;
|
|||||||
-> Sort
|
-> Sort
|
||||||
Sort Key: depname, empno
|
Sort Key: depname, empno
|
||||||
-> Seq Scan on empsalary
|
-> Seq Scan on empsalary
|
||||||
(11 rows)
|
(12 rows)
|
||||||
|
|
||||||
-- As above but adjust the ORDER BY clause to help ensure the plan with the
|
-- As above but adjust the ORDER BY clause to help ensure the plan with the
|
||||||
-- minimum amount of sorting wasn't a fluke.
|
-- minimum amount of sorting wasn't a fluke.
|
||||||
@ -3970,8 +3971,9 @@ ORDER BY depname, empno;
|
|||||||
QUERY PLAN
|
QUERY PLAN
|
||||||
-----------------------------------------------------------------------------------------------
|
-----------------------------------------------------------------------------------------------
|
||||||
Unique
|
Unique
|
||||||
-> Sort
|
-> Incremental Sort
|
||||||
Sort Key: depname, empno, enroll_date, (sum(salary) OVER (?)), (min(salary) OVER (?))
|
Sort Key: depname, empno, enroll_date, (sum(salary) OVER (?)), (min(salary) OVER (?))
|
||||||
|
Presorted Key: depname, empno
|
||||||
-> WindowAgg
|
-> WindowAgg
|
||||||
-> Incremental Sort
|
-> Incremental Sort
|
||||||
Sort Key: depname, empno
|
Sort Key: depname, empno
|
||||||
@ -3980,7 +3982,7 @@ ORDER BY depname, empno;
|
|||||||
-> Sort
|
-> Sort
|
||||||
Sort Key: depname, enroll_date
|
Sort Key: depname, enroll_date
|
||||||
-> Seq Scan on empsalary
|
-> Seq Scan on empsalary
|
||||||
(11 rows)
|
(12 rows)
|
||||||
|
|
||||||
RESET enable_hashagg;
|
RESET enable_hashagg;
|
||||||
-- Test Sort node reordering
|
-- Test Sort node reordering
|
||||||
|
@ -69,6 +69,14 @@ SET jit_above_cost TO DEFAULT;
|
|||||||
CREATE TABLE distinct_group_2 AS
|
CREATE TABLE distinct_group_2 AS
|
||||||
SELECT DISTINCT (g%1000)::text FROM generate_series(0,9999) g;
|
SELECT DISTINCT (g%1000)::text FROM generate_series(0,9999) g;
|
||||||
|
|
||||||
|
SET enable_seqscan = 0;
|
||||||
|
|
||||||
|
-- Check to see we get an incremental sort plan
|
||||||
|
EXPLAIN (costs off)
|
||||||
|
SELECT DISTINCT hundred, two FROM tenk1;
|
||||||
|
|
||||||
|
RESET enable_seqscan;
|
||||||
|
|
||||||
SET enable_hashagg=TRUE;
|
SET enable_hashagg=TRUE;
|
||||||
|
|
||||||
-- Produce results with hash aggregation.
|
-- Produce results with hash aggregation.
|
||||||
|
Loading…
x
Reference in New Issue
Block a user