diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c index e86dfeaecd..4bd60a09c6 100644 --- a/src/backend/optimizer/path/equivclass.c +++ b/src/backend/optimizer/path/equivclass.c @@ -652,7 +652,18 @@ get_eclass_for_sort_expr(PlannerInfo *root, if (opcintype == cur_em->em_datatype && equal(expr, cur_em->em_expr)) - return cur_ec; /* Match! */ + { + /* + * Match! + * + * Copy the sortref if it wasn't set yet. That may happen if + * the ec was constructed from a WHERE clause, i.e. it doesn't + * have a target reference at all. + */ + if (cur_ec->ec_sortref == 0 && sortref > 0) + cur_ec->ec_sortref = sortref; + return cur_ec; + } } } diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c index ca94a31f71..82ff31273b 100644 --- a/src/backend/optimizer/path/pathkeys.c +++ b/src/backend/optimizer/path/pathkeys.c @@ -22,12 +22,15 @@ #include "nodes/makefuncs.h" #include "nodes/nodeFuncs.h" #include "nodes/plannodes.h" +#include "optimizer/cost.h" #include "optimizer/optimizer.h" #include "optimizer/pathnode.h" #include "optimizer/paths.h" #include "partitioning/partbounds.h" #include "utils/lsyscache.h" +/* Consider reordering of GROUP BY keys? */ +bool enable_group_by_reordering = true; static bool pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys); static bool matches_boolean_partition_clause(RestrictInfo *rinfo, @@ -350,6 +353,202 @@ pathkeys_contained_in(List *keys1, List *keys2) return false; } +/* + * group_keys_reorder_by_pathkeys + * Reorder GROUP BY keys to match the input pathkeys. + * + * Function returns new lists (pathkeys and clauses), original GROUP BY lists + * stay untouched. + * + * Returns the number of GROUP BY keys with a matching pathkey. + */ +static int +group_keys_reorder_by_pathkeys(List *pathkeys, List **group_pathkeys, + List **group_clauses, + int num_groupby_pathkeys) +{ + List *new_group_pathkeys = NIL, + *new_group_clauses = NIL; + ListCell *lc; + int n; + + if (pathkeys == NIL || *group_pathkeys == NIL) + return 0; + + /* + * Walk the pathkeys (determining ordering of the input path) and see if + * there's a matching GROUP BY key. If we find one, we append it to the + * list, and do the same for the clauses. + * + * Once we find the first pathkey without a matching GROUP BY key, the + * rest of the pathkeys are useless and can't be used to evaluate the + * grouping, so we abort the loop and ignore the remaining pathkeys. + */ + foreach(lc, pathkeys) + { + PathKey *pathkey = (PathKey *) lfirst(lc); + SortGroupClause *sgc; + + /* + * Pathkeys are built in a way that allows simply comparing pointers. + * Give up if we can't find the matching pointer. Also give up if + * there is no sortclause reference for some reason. + */ + if (foreach_current_index(lc) >= num_groupby_pathkeys || + !list_member_ptr(*group_pathkeys, pathkey) || + pathkey->pk_eclass->ec_sortref == 0) + break; + + /* + * Since 1349d27 pathkey coming from underlying node can be in the + * root->group_pathkeys but not in the processed_groupClause. So, we + * should be careful here. + */ + sgc = get_sortgroupref_clause_noerr(pathkey->pk_eclass->ec_sortref, + *group_clauses); + if (!sgc) + /* The grouping clause does not cover this pathkey */ + break; + + /* + * Sort group clause should have an ordering operator as long as there + * is an associated pathkey. + */ + Assert(OidIsValid(sgc->sortop)); + + new_group_pathkeys = lappend(new_group_pathkeys, pathkey); + new_group_clauses = lappend(new_group_clauses, sgc); + } + + /* remember the number of pathkeys with a matching GROUP BY key */ + n = list_length(new_group_pathkeys); + + /* append the remaining group pathkeys (will be treated as not sorted) */ + *group_pathkeys = list_concat_unique_ptr(new_group_pathkeys, + *group_pathkeys); + *group_clauses = list_concat_unique_ptr(new_group_clauses, + *group_clauses); + + return n; +} + +/* + * pathkeys_are_duplicate + * Check if give pathkeys are already contained the list of + * PathKeyInfo's. + */ +static bool +pathkeys_are_duplicate(List *infos, List *pathkeys) +{ + ListCell *lc; + + foreach(lc, infos) + { + PathKeyInfo *info = lfirst_node(PathKeyInfo, lc); + + if (compare_pathkeys(pathkeys, info->pathkeys) == PATHKEYS_EQUAL) + return true; + } + return false; +} + +/* + * get_useful_group_keys_orderings + * Determine which orderings of GROUP BY keys are potentially interesting. + * + * Returns a list of PathKeyInfo items, each representing an interesting + * ordering of GROUP BY keys. Each item stores pathkeys and clauses in the + * matching order. + * + * The function considers (and keeps) multiple GROUP BY orderings: + * + * - the original ordering, as specified by the GROUP BY clause, + * - GROUP BY keys reordered to match 'path' ordering (as much as possible), + * - GROUP BY keys to match target ORDER BY clause (as much as possible). + */ +List * +get_useful_group_keys_orderings(PlannerInfo *root, Path *path) +{ + Query *parse = root->parse; + List *infos = NIL; + PathKeyInfo *info; + + List *pathkeys = root->group_pathkeys; + List *clauses = root->processed_groupClause; + + /* always return at least the original pathkeys/clauses */ + info = makeNode(PathKeyInfo); + info->pathkeys = pathkeys; + info->clauses = clauses; + infos = lappend(infos, info); + + /* + * Should we try generating alternative orderings of the group keys? If + * not, we produce only the order specified in the query, i.e. the + * optimization is effectively disabled. + */ + if (!enable_group_by_reordering) + return infos; + + /* + * Grouping sets have own and more complex logic to decide the ordering. + */ + if (parse->groupingSets) + return infos; + + /* + * If the path is sorted in some way, try reordering the group keys to + * match the path as much of the ordering as possible. Then thanks to + * incremental sort we would get this sort as cheap as possible. + */ + if (path->pathkeys && + !pathkeys_contained_in(path->pathkeys, root->group_pathkeys)) + { + int n; + + n = group_keys_reorder_by_pathkeys(path->pathkeys, &pathkeys, &clauses, + root->num_groupby_pathkeys); + + if (n > 0 && + (enable_incremental_sort || n == root->num_groupby_pathkeys) && + !pathkeys_are_duplicate(infos, pathkeys)) + { + info = makeNode(PathKeyInfo); + info->pathkeys = pathkeys; + info->clauses = clauses; + + infos = lappend(infos, info); + } + } + + /* + * Try reordering pathkeys to minimize the sort cost (this time consider + * the ORDER BY clause). + */ + if (root->sort_pathkeys && + !pathkeys_contained_in(root->sort_pathkeys, root->group_pathkeys)) + { + int n; + + n = group_keys_reorder_by_pathkeys(root->sort_pathkeys, &pathkeys, + &clauses, + root->num_groupby_pathkeys); + + if (n > 0 && + (enable_incremental_sort || n == list_length(root->sort_pathkeys)) && + !pathkeys_are_duplicate(infos, pathkeys)) + { + info = makeNode(PathKeyInfo); + info->pathkeys = pathkeys; + info->clauses = clauses; + + infos = lappend(infos, info); + } + } + + return infos; +} + /* * pathkeys_count_contained_in * Same as pathkeys_contained_in, but also sets length of longest @@ -1939,6 +2138,54 @@ pathkeys_useful_for_ordering(PlannerInfo *root, List *pathkeys) return n_common_pathkeys; } +/* + * pathkeys_useful_for_grouping + * Count the number of pathkeys that are useful for grouping (instead of + * explicit sort) + * + * Group pathkeys could be reordered to benefit from the ordering. The + * ordering may not be "complete" and may require incremental sort, but that's + * fine. So we simply count prefix pathkeys with a matching group key, and + * stop once we find the first pathkey without a match. + * + * So e.g. with pathkeys (a,b,c) and group keys (a,b,e) this determines (a,b) + * pathkeys are useful for grouping, and we might do incremental sort to get + * path ordered by (a,b,e). + * + * This logic is necessary to retain paths with ordering not matching grouping + * keys directly, without the reordering. + * + * Returns the length of pathkey prefix with matching group keys. + */ +static int +pathkeys_useful_for_grouping(PlannerInfo *root, List *pathkeys) +{ + ListCell *key; + int n = 0; + + /* no special ordering requested for grouping */ + if (root->group_pathkeys == NIL) + return 0; + + /* unordered path */ + if (pathkeys == NIL) + return 0; + + /* walk the pathkeys and search for matching group key */ + foreach(key, pathkeys) + { + PathKey *pathkey = (PathKey *) lfirst(key); + + /* no matching group key, we're done */ + if (!list_member_ptr(root->group_pathkeys, pathkey)) + break; + + n++; + } + + return n; +} + /* * truncate_useless_pathkeys * Shorten the given pathkey list to just the useful pathkeys. @@ -1953,6 +2200,9 @@ truncate_useless_pathkeys(PlannerInfo *root, nuseful = pathkeys_useful_for_merging(root, rel, pathkeys); nuseful2 = pathkeys_useful_for_ordering(root, pathkeys); + if (nuseful2 > nuseful) + nuseful = nuseful2; + nuseful2 = pathkeys_useful_for_grouping(root, pathkeys); if (nuseful2 > nuseful) nuseful = nuseful2; @@ -1988,6 +2238,8 @@ has_useful_pathkeys(PlannerInfo *root, RelOptInfo *rel) { if (rel->joininfo != NIL || rel->has_eclass_joins) return true; /* might be able to use pathkeys for merging */ + if (root->group_pathkeys != NIL) + return true; /* might be able to use pathkeys for grouping */ if (root->query_pathkeys != NIL) return true; /* might be able to use them for ordering */ return false; /* definitely useless */ diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 014b179c3f..2e2458b128 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -140,7 +140,7 @@ static double preprocess_limit(PlannerInfo *root, double tuple_fraction, int64 *offset_est, int64 *count_est); static void remove_useless_groupby_columns(PlannerInfo *root); -static List *preprocess_groupclause(PlannerInfo *root, List *force); +static List *groupclause_apply_groupingset(PlannerInfo *root, List *force); static List *extract_rollup_sets(List *groupingSets); static List *reorder_grouping_sets(List *groupingSets, List *sortclause); static void standard_qp_callback(PlannerInfo *root, void *extra); @@ -1423,7 +1423,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) else if (parse->groupClause) { /* Preprocess regular GROUP BY clause, if any */ - root->processed_groupClause = preprocess_groupclause(root, NIL); + root->processed_groupClause = list_copy(parse->groupClause);; /* Remove any redundant GROUP BY columns */ remove_useless_groupby_columns(root); } @@ -2144,7 +2144,7 @@ preprocess_grouping_sets(PlannerInfo *root) * The groupClauses for hashed grouping sets are built later on.) */ if (gs->set) - rollup->groupClause = preprocess_groupclause(root, gs->set); + rollup->groupClause = groupclause_apply_groupingset(root, gs->set); else rollup->groupClause = NIL; @@ -2796,111 +2796,24 @@ remove_useless_groupby_columns(PlannerInfo *root) } /* - * preprocess_groupclause - do preparatory work on GROUP BY clause - * - * The idea here is to adjust the ordering of the GROUP BY elements - * (which in itself is semantically insignificant) to match ORDER BY, - * thereby allowing a single sort operation to both implement the ORDER BY - * requirement and set up for a Unique step that implements GROUP BY. - * - * In principle it might be interesting to consider other orderings of the - * GROUP BY elements, which could match the sort ordering of other - * possible plans (eg an indexscan) and thereby reduce cost. We don't - * bother with that, though. Hashed grouping will frequently win anyway. - * - * Note: we need no comparable processing of the distinctClause because - * the parser already enforced that that matches ORDER BY. - * - * Note: we return a fresh List, but its elements are the same - * SortGroupClauses appearing in parse->groupClause. This is important - * because later processing may modify the processed_groupClause list. - * - * For grouping sets, the order of items is instead forced to agree with that - * of the grouping set (and items not in the grouping set are skipped). The - * work of sorting the order of grouping set elements to match the ORDER BY if - * possible is done elsewhere. + * groupclause_apply_groupingset + * Apply the order of GROUP BY clauses defined by grouping sets. Items + * not in the grouping set are skipped. */ static List * -preprocess_groupclause(PlannerInfo *root, List *force) +groupclause_apply_groupingset(PlannerInfo *root, List *gset) { Query *parse = root->parse; List *new_groupclause = NIL; - bool partial_match; ListCell *sl; - ListCell *gl; - /* For grouping sets, we need to force the ordering */ - if (force) + foreach(sl, gset) { - foreach(sl, force) - { - Index ref = lfirst_int(sl); - SortGroupClause *cl = get_sortgroupref_clause(ref, parse->groupClause); + Index ref = lfirst_int(sl); + SortGroupClause *cl = get_sortgroupref_clause(ref, parse->groupClause); - new_groupclause = lappend(new_groupclause, cl); - } - - return new_groupclause; + new_groupclause = lappend(new_groupclause, cl); } - - /* If no ORDER BY, nothing useful to do here */ - if (parse->sortClause == NIL) - return list_copy(parse->groupClause); - - /* - * Scan the ORDER BY clause and construct a list of matching GROUP BY - * items, but only as far as we can make a matching prefix. - * - * This code assumes that the sortClause contains no duplicate items. - */ - foreach(sl, parse->sortClause) - { - SortGroupClause *sc = lfirst_node(SortGroupClause, sl); - - foreach(gl, parse->groupClause) - { - SortGroupClause *gc = lfirst_node(SortGroupClause, gl); - - if (equal(gc, sc)) - { - new_groupclause = lappend(new_groupclause, gc); - break; - } - } - if (gl == NULL) - break; /* no match, so stop scanning */ - } - - /* Did we match all of the ORDER BY list, or just some of it? */ - partial_match = (sl != NULL); - - /* If no match at all, no point in reordering GROUP BY */ - if (new_groupclause == NIL) - return list_copy(parse->groupClause); - - /* - * Add any remaining GROUP BY items to the new list, but only if we were - * able to make a complete match. In other words, we only rearrange the - * GROUP BY list if the result is that one list is a prefix of the other - * --- otherwise there's no possibility of a common sort. Also, give up - * if there are any non-sortable GROUP BY items, since then there's no - * hope anyway. - */ - foreach(gl, parse->groupClause) - { - SortGroupClause *gc = lfirst_node(SortGroupClause, gl); - - if (list_member_ptr(new_groupclause, gc)) - continue; /* it matched an ORDER BY item */ - if (partial_match) /* give up, no common sort possible */ - return list_copy(parse->groupClause); - if (!OidIsValid(gc->sortop)) /* give up, GROUP BY can't be sorted */ - return list_copy(parse->groupClause); - new_groupclause = lappend(new_groupclause, gc); - } - - /* Success --- install the rearranged GROUP BY list */ - Assert(list_length(parse->groupClause) == list_length(new_groupclause)); return new_groupclause; } @@ -4200,7 +4113,7 @@ consider_groupingsets_paths(PlannerInfo *root, { rollup = makeNode(RollupData); - rollup->groupClause = preprocess_groupclause(root, gset); + rollup->groupClause = groupclause_apply_groupingset(root, gset); rollup->gsets_data = list_make1(gs); rollup->gsets = remap_to_groupclause_idx(rollup->groupClause, rollup->gsets_data, @@ -4389,7 +4302,7 @@ consider_groupingsets_paths(PlannerInfo *root, Assert(gs->set != NIL); - rollup->groupClause = preprocess_groupclause(root, gs->set); + rollup->groupClause = groupclause_apply_groupingset(root, gs->set); rollup->gsets_data = list_make1(gs); rollup->gsets = remap_to_groupclause_idx(rollup->groupClause, rollup->gsets_data, @@ -6891,60 +6804,75 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel, */ foreach(lc, input_rel->pathlist) { + ListCell *lc2; Path *path = (Path *) lfirst(lc); + Path *path_save = path; + List *pathkey_orderings = NIL; - path = make_ordered_path(root, - grouped_rel, - path, - cheapest_path, - root->group_pathkeys); + /* generate alternative group orderings that might be useful */ + pathkey_orderings = get_useful_group_keys_orderings(root, path); - if (path == NULL) - continue; + Assert(list_length(pathkey_orderings) > 0); - /* Now decide what to stick atop it */ - if (parse->groupingSets) + foreach(lc2, pathkey_orderings) { - consider_groupingsets_paths(root, grouped_rel, - path, true, can_hash, - gd, agg_costs, dNumGroups); - } - else if (parse->hasAggs) - { - /* - * We have aggregation, possibly with plain GROUP BY. Make an - * AggPath. - */ - add_path(grouped_rel, (Path *) - create_agg_path(root, + PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2); + + /* restore the path (we replace it in the loop) */ + path = path_save; + + path = make_ordered_path(root, grouped_rel, path, - grouped_rel->reltarget, - parse->groupClause ? AGG_SORTED : AGG_PLAIN, - AGGSPLIT_SIMPLE, - root->processed_groupClause, - havingQual, - agg_costs, - dNumGroups)); - } - else if (parse->groupClause) - { - /* - * We have GROUP BY without aggregation or grouping sets. Make - * a GroupPath. - */ - add_path(grouped_rel, (Path *) - create_group_path(root, - grouped_rel, - path, - root->processed_groupClause, - havingQual, - dNumGroups)); - } - else - { - /* Other cases should have been handled above */ - Assert(false); + cheapest_path, + info->pathkeys); + if (path == NULL) + continue; + + /* Now decide what to stick atop it */ + if (parse->groupingSets) + { + consider_groupingsets_paths(root, grouped_rel, + path, true, can_hash, + gd, agg_costs, dNumGroups); + } + else if (parse->hasAggs) + { + /* + * We have aggregation, possibly with plain GROUP BY. Make + * an AggPath. + */ + add_path(grouped_rel, (Path *) + create_agg_path(root, + grouped_rel, + path, + grouped_rel->reltarget, + parse->groupClause ? AGG_SORTED : AGG_PLAIN, + AGGSPLIT_SIMPLE, + info->clauses, + havingQual, + agg_costs, + dNumGroups)); + } + else if (parse->groupClause) + { + /* + * We have GROUP BY without aggregation or grouping sets. + * Make a GroupPath. + */ + add_path(grouped_rel, (Path *) + create_group_path(root, + grouped_rel, + path, + info->clauses, + havingQual, + dNumGroups)); + } + else + { + /* Other cases should have been handled above */ + Assert(false); + } } } @@ -6956,38 +6884,55 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel, { foreach(lc, partially_grouped_rel->pathlist) { + ListCell *lc2; Path *path = (Path *) lfirst(lc); + Path *path_save = path; + List *pathkey_orderings = NIL; - path = make_ordered_path(root, - grouped_rel, - path, - partially_grouped_rel->cheapest_total_path, - root->group_pathkeys); + /* generate alternative group orderings that might be useful */ + pathkey_orderings = get_useful_group_keys_orderings(root, path); - if (path == NULL) - continue; + Assert(list_length(pathkey_orderings) > 0); - if (parse->hasAggs) - add_path(grouped_rel, (Path *) - create_agg_path(root, + /* process all potentially interesting grouping reorderings */ + foreach(lc2, pathkey_orderings) + { + PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2); + + /* restore the path (we replace it in the loop) */ + path = path_save; + + path = make_ordered_path(root, grouped_rel, path, - grouped_rel->reltarget, - parse->groupClause ? AGG_SORTED : AGG_PLAIN, - AGGSPLIT_FINAL_DESERIAL, - root->processed_groupClause, - havingQual, - agg_final_costs, - dNumGroups)); - else - add_path(grouped_rel, (Path *) - create_group_path(root, - grouped_rel, - path, - root->processed_groupClause, - havingQual, - dNumGroups)); + partially_grouped_rel->cheapest_total_path, + info->pathkeys); + if (path == NULL) + continue; + + if (parse->hasAggs) + add_path(grouped_rel, (Path *) + create_agg_path(root, + grouped_rel, + path, + grouped_rel->reltarget, + parse->groupClause ? AGG_SORTED : AGG_PLAIN, + AGGSPLIT_FINAL_DESERIAL, + info->clauses, + havingQual, + agg_final_costs, + dNumGroups)); + else + add_path(grouped_rel, (Path *) + create_group_path(root, + grouped_rel, + path, + info->clauses, + havingQual, + dNumGroups)); + + } } } } @@ -7190,37 +7135,54 @@ create_partial_grouping_paths(PlannerInfo *root, */ foreach(lc, input_rel->pathlist) { + ListCell *lc2; Path *path = (Path *) lfirst(lc); + Path *path_save = path; + List *pathkey_orderings = NIL; - path = make_ordered_path(root, - partially_grouped_rel, - path, - cheapest_total_path, - root->group_pathkeys); + /* generate alternative group orderings that might be useful */ + pathkey_orderings = get_useful_group_keys_orderings(root, path); - if (path == NULL) - continue; + Assert(list_length(pathkey_orderings) > 0); - if (parse->hasAggs) - add_path(partially_grouped_rel, (Path *) - create_agg_path(root, + /* process all potentially interesting grouping reorderings */ + foreach(lc2, pathkey_orderings) + { + PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2); + + /* restore the path (we replace it in the loop) */ + path = path_save; + + path = make_ordered_path(root, partially_grouped_rel, path, - partially_grouped_rel->reltarget, - parse->groupClause ? AGG_SORTED : AGG_PLAIN, - AGGSPLIT_INITIAL_SERIAL, - root->processed_groupClause, - NIL, - agg_partial_costs, - dNumPartialGroups)); - else - add_path(partially_grouped_rel, (Path *) - create_group_path(root, - partially_grouped_rel, - path, - root->processed_groupClause, - NIL, - dNumPartialGroups)); + cheapest_total_path, + info->pathkeys); + + if (path == NULL) + continue; + + if (parse->hasAggs) + add_path(partially_grouped_rel, (Path *) + create_agg_path(root, + partially_grouped_rel, + path, + partially_grouped_rel->reltarget, + parse->groupClause ? AGG_SORTED : AGG_PLAIN, + AGGSPLIT_INITIAL_SERIAL, + info->clauses, + NIL, + agg_partial_costs, + dNumPartialGroups)); + else + add_path(partially_grouped_rel, (Path *) + create_group_path(root, + partially_grouped_rel, + path, + info->clauses, + NIL, + dNumPartialGroups)); + } } } @@ -7229,37 +7191,55 @@ create_partial_grouping_paths(PlannerInfo *root, /* Similar to above logic, but for partial paths. */ foreach(lc, input_rel->partial_pathlist) { + ListCell *lc2; Path *path = (Path *) lfirst(lc); + Path *path_save = path; + List *pathkey_orderings = NIL; - path = make_ordered_path(root, - partially_grouped_rel, - path, - cheapest_partial_path, - root->group_pathkeys); + /* generate alternative group orderings that might be useful */ + pathkey_orderings = get_useful_group_keys_orderings(root, path); - if (path == NULL) - continue; + Assert(list_length(pathkey_orderings) > 0); - if (parse->hasAggs) - add_partial_path(partially_grouped_rel, (Path *) - create_agg_path(root, - partially_grouped_rel, - path, - partially_grouped_rel->reltarget, - parse->groupClause ? AGG_SORTED : AGG_PLAIN, - AGGSPLIT_INITIAL_SERIAL, - root->processed_groupClause, - NIL, - agg_partial_costs, - dNumPartialPartialGroups)); - else - add_partial_path(partially_grouped_rel, (Path *) - create_group_path(root, - partially_grouped_rel, - path, - root->processed_groupClause, - NIL, - dNumPartialPartialGroups)); + /* process all potentially interesting grouping reorderings */ + foreach(lc2, pathkey_orderings) + { + PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2); + + + /* restore the path (we replace it in the loop) */ + path = path_save; + + path = make_ordered_path(root, + partially_grouped_rel, + path, + cheapest_partial_path, + info->pathkeys); + + if (path == NULL) + continue; + + if (parse->hasAggs) + add_partial_path(partially_grouped_rel, (Path *) + create_agg_path(root, + partially_grouped_rel, + path, + partially_grouped_rel->reltarget, + parse->groupClause ? AGG_SORTED : AGG_PLAIN, + AGGSPLIT_INITIAL_SERIAL, + info->clauses, + NIL, + agg_partial_costs, + dNumPartialPartialGroups)); + else + add_partial_path(partially_grouped_rel, (Path *) + create_group_path(root, + partially_grouped_rel, + path, + info->clauses, + NIL, + dNumPartialPartialGroups)); + } } } @@ -7373,6 +7353,8 @@ gather_grouping_paths(PlannerInfo *root, RelOptInfo *rel) * We can also skip the entire loop when we only have a single-item * group_pathkeys because then we can't possibly have a presorted prefix * of the list without having the list be fully sorted. + * + * XXX Shouldn't this also consider the group-key-reordering? */ if (!enable_incremental_sort || list_length(root->group_pathkeys) == 1) return; diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c index e53ebc6dc2..7fe58518d7 100644 --- a/src/backend/utils/misc/guc_tables.c +++ b/src/backend/utils/misc/guc_tables.c @@ -1050,6 +1050,16 @@ struct config_bool ConfigureNamesBool[] = true, NULL, NULL, NULL }, + { + {"enable_group_by_reordering", PGC_USERSET, QUERY_TUNING_METHOD, + gettext_noop("Enables reordering of GROUP BY keys."), + NULL, + GUC_EXPLAIN + }, + &enable_group_by_reordering, + true, + NULL, NULL, NULL + }, { {"geqo", PGC_USERSET, QUERY_TUNING_GEQO, gettext_noop("Enables genetic query optimization."), diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index 835b0e9ba8..da10b43dac 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -399,6 +399,7 @@ #enable_seqscan = on #enable_sort = on #enable_tidscan = on +#enable_group_by_reordering = on # - Planner Cost Constants - diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h index b9713ec9aa..137da178dc 100644 --- a/src/include/nodes/pathnodes.h +++ b/src/include/nodes/pathnodes.h @@ -1456,6 +1456,16 @@ typedef struct PathKey bool pk_nulls_first; /* do NULLs come before normal values? */ } PathKey; +/* + * Combines the information about pathkeys and the associated clauses. + */ +typedef struct PathKeyInfo +{ + NodeTag type; + List *pathkeys; + List *clauses; +} PathKeyInfo; + /* * VolatileFunctionStatus -- allows nodes to cache their * contain_volatile_functions properties. VOLATILITY_UNKNOWN means not yet diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h index efd4abc28f..0e8a9c94ba 100644 --- a/src/include/optimizer/paths.h +++ b/src/include/optimizer/paths.h @@ -24,6 +24,7 @@ extern PGDLLIMPORT bool enable_geqo; extern PGDLLIMPORT int geqo_threshold; extern PGDLLIMPORT int min_parallel_table_scan_size; extern PGDLLIMPORT int min_parallel_index_scan_size; +extern PGDLLIMPORT bool enable_group_by_reordering; /* Hook for plugins to get control in set_rel_pathlist() */ typedef void (*set_rel_pathlist_hook_type) (PlannerInfo *root, @@ -204,6 +205,7 @@ typedef enum extern PathKeysComparison compare_pathkeys(List *keys1, List *keys2); extern bool pathkeys_contained_in(List *keys1, List *keys2); extern bool pathkeys_count_contained_in(List *keys1, List *keys2, int *n_common); +extern List *get_useful_group_keys_orderings(PlannerInfo *root, Path *path); extern Path *get_cheapest_path_for_pathkeys(List *paths, List *pathkeys, Relids required_outer, CostSelector cost_criterion, diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out index f635c5a1af..67dd20f375 100644 --- a/src/test/regress/expected/aggregates.out +++ b/src/test/regress/expected/aggregates.out @@ -2728,6 +2728,208 @@ SELECT balk(hundred) FROM tenk1; (1 row) ROLLBACK; +-- GROUP BY optimization by reorder columns +CREATE TABLE btg AS SELECT + i % 100 AS x, + i % 100 AS y, + 'abc' || i % 10 AS z, + i AS w +FROM generate_series(1,10000) AS i; +CREATE INDEX abc ON btg(x,y); +ANALYZE btg; +-- GROUP BY optimization by reorder columns by frequency +SET enable_hashagg=off; +SET max_parallel_workers= 0; +SET max_parallel_workers_per_gather = 0; +-- Utilize index scan ordering to avoid a Sort operation +EXPLAIN (COSTS OFF) SELECT count(*) FROM btg GROUP BY x,y; + QUERY PLAN +---------------------------------------- + GroupAggregate + Group Key: x, y + -> Index Only Scan using abc on btg +(3 rows) + +EXPLAIN (COSTS OFF) SELECT count(*) FROM btg GROUP BY y,x; + QUERY PLAN +---------------------------------------- + GroupAggregate + Group Key: x, y + -> Index Only Scan using abc on btg +(3 rows) + +-- Engage incremental sort +explain (COSTS OFF) SELECT x,y FROM btg GROUP BY x,y,z,w; + QUERY PLAN +----------------------------------------- + Group + Group Key: x, y, z, w + -> Incremental Sort + Sort Key: x, y, z, w + Presorted Key: x, y + -> Index Scan using abc on btg +(6 rows) + +explain (COSTS OFF) SELECT x,y FROM btg GROUP BY z,y,w,x; + QUERY PLAN +----------------------------------------- + Group + Group Key: x, y, z, w + -> Incremental Sort + Sort Key: x, y, z, w + Presorted Key: x, y + -> Index Scan using abc on btg +(6 rows) + +explain (COSTS OFF) SELECT x,y FROM btg GROUP BY w,z,x,y; + QUERY PLAN +----------------------------------------- + Group + Group Key: x, y, w, z + -> Incremental Sort + Sort Key: x, y, w, z + Presorted Key: x, y + -> Index Scan using abc on btg +(6 rows) + +explain (COSTS OFF) SELECT x,y FROM btg GROUP BY w,x,z,y; + QUERY PLAN +----------------------------------------- + Group + Group Key: x, y, w, z + -> Incremental Sort + Sort Key: x, y, w, z + Presorted Key: x, y + -> Index Scan using abc on btg +(6 rows) + +-- Subqueries +explain (COSTS OFF) SELECT x,y +FROM (SELECT * FROM btg ORDER BY x,y,w,z) AS q1 +GROUP BY (w,x,z,y); + QUERY PLAN +---------------------------------------------- + Group + Group Key: btg.x, btg.y, btg.w, btg.z + -> Incremental Sort + Sort Key: btg.x, btg.y, btg.w, btg.z + Presorted Key: btg.x, btg.y + -> Index Scan using abc on btg +(6 rows) + +explain (COSTS OFF) SELECT x,y +FROM (SELECT * FROM btg ORDER BY x,y,w,z LIMIT 100) AS q1 +GROUP BY (w,x,z,y); + QUERY PLAN +---------------------------------------------------- + Group + Group Key: btg.x, btg.y, btg.w, btg.z + -> Limit + -> Incremental Sort + Sort Key: btg.x, btg.y, btg.w, btg.z + Presorted Key: btg.x, btg.y + -> Index Scan using abc on btg +(7 rows) + +-- Should work with and without GROUP-BY optimization +explain (COSTS OFF) SELECT x,y FROM btg GROUP BY w,x,z,y ORDER BY y,x,z,w; + QUERY PLAN +------------------------------ + Group + Group Key: y, x, z, w + -> Sort + Sort Key: y, x, z, w + -> Seq Scan on btg +(5 rows) + +-- Utilize incremental sort to make the ORDER BY rule a bit cheaper +explain (COSTS OFF) SELECT x,w FROM btg GROUP BY w,x,y,z ORDER BY x*x,z; + QUERY PLAN +----------------------------------------------- + Sort + Sort Key: ((x * x)), z + -> Group + Group Key: x, y, w, z + -> Incremental Sort + Sort Key: x, y, w, z + Presorted Key: x, y + -> Index Scan using abc on btg +(8 rows) + +SET enable_incremental_sort = off; +-- The case when the number of incoming subtree path keys is more than +-- the number of grouping keys. +CREATE INDEX idx_y_x_z ON btg(y,x,w); +EXPLAIN (VERBOSE, COSTS OFF) +SELECT y,x,array_agg(distinct w) FROM btg WHERE y < 0 GROUP BY x,y; + QUERY PLAN +----------------------------------------------------- + GroupAggregate + Output: y, x, array_agg(DISTINCT w) + Group Key: btg.y, btg.x + -> Index Only Scan using idx_y_x_z on public.btg + Output: y, x, w + Index Cond: (btg.y < 0) +(6 rows) + +RESET enable_incremental_sort; +DROP TABLE btg; +-- The case, when scanning sort order correspond to aggregate sort order but +-- can not be found in the group-by list +CREATE TABLE t1 (c1 int PRIMARY KEY, c2 int); +CREATE UNIQUE INDEX ON t1(c2); +explain (costs off) +SELECT array_agg(c1 ORDER BY c2),c2 +FROM t1 WHERE c2 < 100 GROUP BY c1 ORDER BY 2; + QUERY PLAN +-------------------------------------------------------- + Sort + Sort Key: c2 + -> GroupAggregate + Group Key: c1 + -> Sort + Sort Key: c1, c2 + -> Bitmap Heap Scan on t1 + Recheck Cond: (c2 < 100) + -> Bitmap Index Scan on t1_c2_idx + Index Cond: (c2 < 100) +(10 rows) + +DROP TABLE t1 CASCADE; +-- Check, that GROUP-BY reordering optimization can operate with pathkeys, built +-- by planner itself. For example, by MergeJoin. +SET enable_hashjoin = off; +SET enable_nestloop = off; +explain (COSTS OFF) +SELECT c1.relname,c1.relpages +FROM pg_class c1 JOIN pg_class c2 ON (c1.relname=c2.relname AND c1.relpages=c2.relpages) +GROUP BY c1.reltuples,c1.relpages,c1.relname +ORDER BY c1.relpages, c1.relname, c1.relpages*c1.relpages; + QUERY PLAN +--------------------------------------------------------------------------------------------- + Incremental Sort + Sort Key: c1.relpages, c1.relname, ((c1.relpages * c1.relpages)) + Presorted Key: c1.relpages, c1.relname + -> Group + Group Key: c1.relpages, c1.relname, c1.reltuples + -> Incremental Sort + Sort Key: c1.relpages, c1.relname, c1.reltuples + Presorted Key: c1.relpages, c1.relname + -> Merge Join + Merge Cond: ((c1.relpages = c2.relpages) AND (c1.relname = c2.relname)) + -> Sort + Sort Key: c1.relpages, c1.relname + -> Seq Scan on pg_class c1 + -> Sort + Sort Key: c2.relpages, c2.relname + -> Seq Scan on pg_class c2 +(16 rows) + +RESET enable_hashjoin; +RESET enable_nestloop; +RESET enable_hashagg; +RESET max_parallel_workers; +RESET max_parallel_workers_per_gather; -- Secondly test the case of a parallel aggregate combiner function -- returning NULL. For that use normal transition function, but a -- combiner function returning NULL. diff --git a/src/test/regress/expected/sysviews.out b/src/test/regress/expected/sysviews.out index 271313ebf8..9be7aca2b8 100644 --- a/src/test/regress/expected/sysviews.out +++ b/src/test/regress/expected/sysviews.out @@ -114,6 +114,7 @@ select name, setting from pg_settings where name like 'enable%'; enable_async_append | on enable_bitmapscan | on enable_gathermerge | on + enable_group_by_reordering | on enable_hashagg | on enable_hashjoin | on enable_incremental_sort | on @@ -133,7 +134,7 @@ select name, setting from pg_settings where name like 'enable%'; enable_seqscan | on enable_sort | on enable_tidscan | on -(22 rows) +(23 rows) -- There are always wait event descriptions for various types. select type, count(*) > 0 as ok FROM pg_wait_events diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql index cc8f0efad5..524bdfa67d 100644 --- a/src/test/regress/sql/aggregates.sql +++ b/src/test/regress/sql/aggregates.sql @@ -1181,6 +1181,81 @@ SELECT balk(hundred) FROM tenk1; ROLLBACK; +-- GROUP BY optimization by reorder columns +CREATE TABLE btg AS SELECT + i % 100 AS x, + i % 100 AS y, + 'abc' || i % 10 AS z, + i AS w +FROM generate_series(1,10000) AS i; +CREATE INDEX abc ON btg(x,y); +ANALYZE btg; + +-- GROUP BY optimization by reorder columns by frequency + +SET enable_hashagg=off; +SET max_parallel_workers= 0; +SET max_parallel_workers_per_gather = 0; + +-- Utilize index scan ordering to avoid a Sort operation +EXPLAIN (COSTS OFF) SELECT count(*) FROM btg GROUP BY x,y; +EXPLAIN (COSTS OFF) SELECT count(*) FROM btg GROUP BY y,x; + +-- Engage incremental sort +explain (COSTS OFF) SELECT x,y FROM btg GROUP BY x,y,z,w; +explain (COSTS OFF) SELECT x,y FROM btg GROUP BY z,y,w,x; +explain (COSTS OFF) SELECT x,y FROM btg GROUP BY w,z,x,y; +explain (COSTS OFF) SELECT x,y FROM btg GROUP BY w,x,z,y; + +-- Subqueries +explain (COSTS OFF) SELECT x,y +FROM (SELECT * FROM btg ORDER BY x,y,w,z) AS q1 +GROUP BY (w,x,z,y); +explain (COSTS OFF) SELECT x,y +FROM (SELECT * FROM btg ORDER BY x,y,w,z LIMIT 100) AS q1 +GROUP BY (w,x,z,y); + +-- Should work with and without GROUP-BY optimization +explain (COSTS OFF) SELECT x,y FROM btg GROUP BY w,x,z,y ORDER BY y,x,z,w; + +-- Utilize incremental sort to make the ORDER BY rule a bit cheaper +explain (COSTS OFF) SELECT x,w FROM btg GROUP BY w,x,y,z ORDER BY x*x,z; + +SET enable_incremental_sort = off; +-- The case when the number of incoming subtree path keys is more than +-- the number of grouping keys. +CREATE INDEX idx_y_x_z ON btg(y,x,w); +EXPLAIN (VERBOSE, COSTS OFF) +SELECT y,x,array_agg(distinct w) FROM btg WHERE y < 0 GROUP BY x,y; +RESET enable_incremental_sort; + +DROP TABLE btg; + +-- The case, when scanning sort order correspond to aggregate sort order but +-- can not be found in the group-by list +CREATE TABLE t1 (c1 int PRIMARY KEY, c2 int); +CREATE UNIQUE INDEX ON t1(c2); +explain (costs off) +SELECT array_agg(c1 ORDER BY c2),c2 +FROM t1 WHERE c2 < 100 GROUP BY c1 ORDER BY 2; +DROP TABLE t1 CASCADE; + +-- Check, that GROUP-BY reordering optimization can operate with pathkeys, built +-- by planner itself. For example, by MergeJoin. +SET enable_hashjoin = off; +SET enable_nestloop = off; +explain (COSTS OFF) +SELECT c1.relname,c1.relpages +FROM pg_class c1 JOIN pg_class c2 ON (c1.relname=c2.relname AND c1.relpages=c2.relpages) +GROUP BY c1.reltuples,c1.relpages,c1.relname +ORDER BY c1.relpages, c1.relname, c1.relpages*c1.relpages; +RESET enable_hashjoin; +RESET enable_nestloop; + +RESET enable_hashagg; +RESET max_parallel_workers; +RESET max_parallel_workers_per_gather; + -- Secondly test the case of a parallel aggregate combiner function -- returning NULL. For that use normal transition function, but a -- combiner function returning NULL. diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index 7e228c5aea..a200e5eb12 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -4045,3 +4045,4 @@ manifest_writer rfile ws_options ws_file_info +PathKeyInfo