diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out index f210f91188..2ffc836824 100644 --- a/contrib/postgres_fdw/expected/postgres_fdw.out +++ b/contrib/postgres_fdw/expected/postgres_fdw.out @@ -2741,16 +2741,13 @@ select c2 * (random() <= 1)::int as c2 from ft2 group by c2 * (random() <= 1)::i -- GROUP BY clause in various forms, cardinal, alias and constant expression explain (verbose, costs off) select count(c2) w, c2 x, 5 y, 7.0 z from ft1 group by 2, y, 9.0::int order by 2; - QUERY PLAN ---------------------------------------------------------------------------------------- - Sort + QUERY PLAN +------------------------------------------------------------------------------------------------------------ + Foreign Scan Output: (count(c2)), c2, 5, 7.0, 9 - Sort Key: ft1.c2 - -> Foreign Scan - Output: (count(c2)), c2, 5, 7.0, 9 - Relations: Aggregate on (public.ft1) - Remote SQL: SELECT count(c2), c2, 5, 7.0, 9 FROM "S 1"."T 1" GROUP BY 2, 3, 5 -(7 rows) + Relations: Aggregate on (public.ft1) + Remote SQL: SELECT count(c2), c2, 5, 7.0, 9 FROM "S 1"."T 1" GROUP BY 2, 3, 5 ORDER BY c2 ASC NULLS LAST +(4 rows) select count(c2) w, c2 x, 5 y, 7.0 z from ft1 group by 2, y, 9.0::int order by 2; w | x | y | z diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index 9788e831bc..43e4ade83e 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -4967,6 +4967,20 @@ ANY num_sync ( + enable_group_by_reordering (boolean) + + enable_group_by_reordering configuration parameter + + + + + Enables or disables reodering of keys in GROUP BY + clause. The default is on. + + + + enable_hashagg (boolean) diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index 679d8ed597..ec3c23013a 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -1755,6 +1755,322 @@ cost_recursive_union(Path *runion, Path *nrterm, Path *rterm) rterm->pathtarget->width); } +/* + * is_fake_var + * Workaround for generate_append_tlist() which generates fake Vars with + * varno == 0, that will cause a fail of estimate_num_group() call + * + * XXX Ummm, why would estimate_num_group fail with this? + */ +static bool +is_fake_var(Expr *expr) +{ + if (IsA(expr, RelabelType)) + expr = (Expr *) ((RelabelType *) expr)->arg; + + return (IsA(expr, Var) && ((Var *) expr)->varno == 0); +} + +/* + * get_width_cost_multiplier + * Returns relative complexity of comparing two values based on its width. + * The idea behind is that the comparison becomes more expensive the longer the + * value is. Return value is in cpu_operator_cost units. + */ +static double +get_width_cost_multiplier(PlannerInfo *root, Expr *expr) +{ + double width = -1.0; /* fake value */ + + if (IsA(expr, RelabelType)) + expr = (Expr *) ((RelabelType *) expr)->arg; + + /* Try to find actual stat in corresponding relation */ + if (IsA(expr, Var)) + { + Var *var = (Var *) expr; + + if (var->varno > 0 && var->varno < root->simple_rel_array_size) + { + RelOptInfo *rel = root->simple_rel_array[var->varno]; + + if (rel != NULL && + var->varattno >= rel->min_attr && + var->varattno <= rel->max_attr) + { + int ndx = var->varattno - rel->min_attr; + + if (rel->attr_widths[ndx] > 0) + width = rel->attr_widths[ndx]; + } + } + } + + /* Didn't find any actual stats, try using type width instead. */ + if (width < 0.0) + { + Node *node = (Node*) expr; + + width = get_typavgwidth(exprType(node), exprTypmod(node)); + } + + /* + * Values are passed as Datum type, so comparisons can't be cheaper than + * comparing a Datum value. + * + * FIXME I find this reasoning questionable. We may pass int2, and comparing + * it is probably a bit cheaper than comparing a bigint. + */ + if (width <= sizeof(Datum)) + return 1.0; + + /* + * We consider the cost of a comparison not to be directly proportional to + * width of the argument, because widths of the arguments could be slightly + * different (we only know the average width for the whole column). So we + * use log16(width) as an estimate. + */ + return 1.0 + 0.125 * LOG2(width / sizeof(Datum)); +} + +/* + * compute_cpu_sort_cost + * compute CPU cost of sort (i.e. in-memory) + * + * The main thing we need to calculate to estimate sort CPU costs is the number + * of calls to the comparator functions. The difficulty is that for multi-column + * sorts there may be different data types involved (for some of which the calls + * may be much more expensive). Furthermore, columns may have a very different + * number of distinct values - the higher the number, the fewer comparisons will + * be needed for the following columns. + * + * The algorithm is incremental - we add pathkeys one by one, and at each step we + * estimate the number of necessary comparisons (based on the number of distinct + * groups in the current pathkey prefix and the new pathkey), and the comparison + * costs (which is data type specific). + * + * Estimation of the number of comparisons is based on ideas from: + * + * "Quicksort Is Optimal", Robert Sedgewick, Jon Bentley, 2002 + * [https://www.cs.princeton.edu/~rs/talks/QuicksortIsOptimal.pdf] + * + * In term of that paper, let N - number of tuples, Xi - number of identical + * tuples with value Ki, then the estimate of number of comparisons is: + * + * log(N! / (X1! * X2! * ..)) ~ sum(Xi * log(N/Xi)) + * + * We assume all Xi the same because now we don't have any estimation of + * group sizes, we have only know the estimate of number of groups (distinct + * values). In that case, formula becomes: + * + * N * log(NumberOfGroups) + * + * For multi-column sorts we need to estimate the number of comparisons for + * each individual column - for example with columns (c1, c2, ..., ck) we + * can estimate that number of comparisons on ck is roughly + * + * ncomparisons(c1, c2, ..., ck) / ncomparisons(c1, c2, ..., c(k-1)) + * + * Let k be a column number, Gk - number of groups defined by k columns, and Fk + * the cost of the comparison is + * + * N * sum( Fk * log(Gk) ) + * + * Note: We also consider column width, not just the comparator cost. + * + * NOTE: some callers currently pass NIL for pathkeys because they + * can't conveniently supply the sort keys. In this case, it will fallback to + * simple comparison cost estimate. + */ +static Cost +compute_cpu_sort_cost(PlannerInfo *root, List *pathkeys, int nPresortedKeys, + Cost comparison_cost, double tuples, double output_tuples, + bool heapSort) +{ + Cost per_tuple_cost = 0.0; + ListCell *lc; + List *pathkeyExprs = NIL; + double tuplesPerPrevGroup = tuples; + double totalFuncCost = 1.0; + bool has_fake_var = false; + int i = 0; + Oid prev_datatype = InvalidOid; + List *cache_varinfos = NIL; + + /* fallback if pathkeys is unknown */ + if (list_length(pathkeys) == 0) + { + /* + * If we'll use a bounded heap-sort keeping just K tuples in memory, for + * a total number of tuple comparisons of N log2 K; but the constant + * factor is a bit higher than for quicksort. Tweak it so that the cost + * curve is continuous at the crossover point. + */ + output_tuples = (heapSort) ? 2.0 * output_tuples : tuples; + per_tuple_cost += 2.0 * cpu_operator_cost * LOG2(output_tuples); + + /* add cost provided by caller */ + per_tuple_cost += comparison_cost; + + return per_tuple_cost * tuples; + } + + /* + * Computing total cost of sorting takes into account: + * - per column comparison function cost + * - we try to compute needed number of comparison per column + */ + foreach(lc, pathkeys) + { + PathKey *pathkey = (PathKey*) lfirst(lc); + EquivalenceMember *em; + double nGroups, + correctedNGroups; + Cost funcCost = 1.0; + + /* + * We believe that equivalence members aren't very different, so, to + * estimate cost we consider just the first member. + */ + em = (EquivalenceMember *) linitial(pathkey->pk_eclass->ec_members); + + if (em->em_datatype != InvalidOid) + { + /* do not lookup funcCost if the data type is the same */ + if (prev_datatype != em->em_datatype) + { + Oid sortop; + QualCost cost; + + sortop = get_opfamily_member(pathkey->pk_opfamily, + em->em_datatype, em->em_datatype, + pathkey->pk_strategy); + + cost.startup = 0; + cost.per_tuple = 0; + add_function_cost(root, get_opcode(sortop), NULL, &cost); + + /* + * add_function_cost returns the product of cpu_operator_cost + * and procost, but we need just procost, co undo that. + */ + funcCost = cost.per_tuple / cpu_operator_cost; + + prev_datatype = em->em_datatype; + } + } + + /* factor in the width of the values in this column */ + funcCost *= get_width_cost_multiplier(root, em->em_expr); + + /* now we have per-key cost, so add to the running total */ + totalFuncCost += funcCost; + + /* remember if we have found a fake Var in pathkeys */ + has_fake_var |= is_fake_var(em->em_expr); + pathkeyExprs = lappend(pathkeyExprs, em->em_expr); + + /* + * We need to calculate the number of comparisons for this column, which + * requires knowing the group size. So we estimate the number of groups + * by calling estimate_num_groups_incremental(), which estimates the + * group size for "new" pathkeys. + * + * Note: estimate_num_groups_incremntal does not handle fake Vars, so use + * a default estimate otherwise. + */ + if (!has_fake_var) + nGroups = estimate_num_groups_incremental(root, pathkeyExprs, + tuplesPerPrevGroup, NULL, NULL, + &cache_varinfos, + list_length(pathkeyExprs) - 1); + else if (tuples > 4.0) + /* + * Use geometric mean as estimation if there are no stats. + * + * We don't use DEFAULT_NUM_DISTINCT here, because that’s used for + * a single column, but here we’re dealing with multiple columns. + */ + nGroups = ceil(2.0 + sqrt(tuples) * (i + 1) / list_length(pathkeys)); + else + nGroups = tuples; + + /* + * Presorted keys are not considered in the cost above, but we still do + * have to compare them in the qsort comparator. So make sure to factor + * in the cost in that case. + */ + if (i >= nPresortedKeys) + { + if (heapSort) + { + /* have to keep at least one group, and a multiple of group size */ + correctedNGroups = ceil(output_tuples / tuplesPerPrevGroup); + } + else + /* all groups in the input */ + correctedNGroups = nGroups; + + correctedNGroups = Max(1.0, ceil(correctedNGroups)); + + per_tuple_cost += totalFuncCost * LOG2(correctedNGroups); + } + + i++; + + /* + * Uniform distributions with all groups being of the same size are the + * best case, with nice smooth behavior. Real-world distributions tend + * not to be uniform, though, and we don’t have any reliable easy-to-use + * information. As a basic defense against skewed distributions, we use + * a 1.5 factor to make the expected group a bit larger, but we need to + * be careful not to make the group larger than in the preceding step. + */ + tuplesPerPrevGroup = Min(tuplesPerPrevGroup, + ceil(1.5 * tuplesPerPrevGroup / nGroups)); + + /* + * Once we get single-row group, it means tuples in the group are unique + * and we can skip all remaining columns. + */ + if (tuplesPerPrevGroup <= 1.0) + break; + } + + list_free(pathkeyExprs); + + /* per_tuple_cost is in cpu_operator_cost units */ + per_tuple_cost *= cpu_operator_cost; + + /* + * Accordingly to "Introduction to algorithms", Thomas H. Cormen, Charles E. + * Leiserson, Ronald L. Rivest, ISBN 0-07-013143-0, quicksort estimation + * formula has additional term proportional to number of tuples (See Chapter + * 8.2 and Theorem 4.1). That affects cases with a low number of tuples, + * approximately less than 1e4. We could implement it as an additional + * multiplier under the logarithm, but we use a bit more complex formula + * which takes into account the number of unique tuples and it’s not clear + * how to combine the multiplier with the number of groups. Estimate it as + * 10 in cpu_operator_cost unit. + */ + per_tuple_cost += 10 * cpu_operator_cost; + + per_tuple_cost += comparison_cost; + + return tuples * per_tuple_cost; +} + +/* + * simple wrapper just to estimate best sort path + */ +Cost +cost_sort_estimate(PlannerInfo *root, List *pathkeys, int nPresortedKeys, + double tuples) +{ + return compute_cpu_sort_cost(root, pathkeys, nPresortedKeys, + 0, tuples, tuples, false); +} + /* * cost_tuplesort * Determines and returns the cost of sorting a relation using tuplesort, @@ -1771,7 +2087,7 @@ cost_recursive_union(Path *runion, Path *nrterm, Path *rterm) * number of initial runs formed and M is the merge order used by tuplesort.c. * Since the average initial run should be about sort_mem, we have * disk traffic = 2 * relsize * ceil(logM(p / sort_mem)) - * cpu = comparison_cost * t * log2(t) + * and cpu cost (computed by compute_cpu_sort_cost()). * * If the sort is bounded (i.e., only the first k result tuples are needed) * and k tuples can fit into sort_mem, we use a heap method that keeps only @@ -1790,9 +2106,11 @@ cost_recursive_union(Path *runion, Path *nrterm, Path *rterm) * 'comparison_cost' is the extra cost per comparison, if any * 'sort_mem' is the number of kilobytes of work memory allowed for the sort * 'limit_tuples' is the bound on the number of output tuples; -1 if no bound + * 'startup_cost' is expected to be 0 at input. If there is "input cost" it should + * be added by caller later */ static void -cost_tuplesort(Cost *startup_cost, Cost *run_cost, +cost_tuplesort(PlannerInfo *root, List *pathkeys, Cost *startup_cost, Cost *run_cost, double tuples, int width, Cost comparison_cost, int sort_mem, double limit_tuples) @@ -1809,9 +2127,6 @@ cost_tuplesort(Cost *startup_cost, Cost *run_cost, if (tuples < 2.0) tuples = 2.0; - /* Include the default cost-per-comparison */ - comparison_cost += 2.0 * cpu_operator_cost; - /* Do we have a useful LIMIT? */ if (limit_tuples > 0 && limit_tuples < tuples) { @@ -1835,12 +2150,10 @@ cost_tuplesort(Cost *startup_cost, Cost *run_cost, double log_runs; double npageaccesses; - /* - * CPU costs - * - * Assume about N log2 N comparisons - */ - *startup_cost = comparison_cost * tuples * LOG2(tuples); + /* CPU costs */ + *startup_cost = compute_cpu_sort_cost(root, pathkeys, 0, + comparison_cost, tuples, + tuples, false); /* Disk costs */ @@ -1856,18 +2169,17 @@ cost_tuplesort(Cost *startup_cost, Cost *run_cost, } else if (tuples > 2 * output_tuples || input_bytes > sort_mem_bytes) { - /* - * We'll use a bounded heap-sort keeping just K tuples in memory, for - * a total number of tuple comparisons of N log2 K; but the constant - * factor is a bit higher than for quicksort. Tweak it so that the - * cost curve is continuous at the crossover point. - */ - *startup_cost = comparison_cost * tuples * LOG2(2.0 * output_tuples); + /* We'll use a bounded heap-sort keeping just K tuples in memory. */ + *startup_cost = compute_cpu_sort_cost(root, pathkeys, 0, + comparison_cost, tuples, + output_tuples, true); } else { /* We'll use plain quicksort on all the input tuples */ - *startup_cost = comparison_cost * tuples * LOG2(tuples); + *startup_cost = compute_cpu_sort_cost(root, pathkeys, 0, + comparison_cost, tuples, + tuples, false); } /* @@ -1900,8 +2212,8 @@ cost_incremental_sort(Path *path, double input_tuples, int width, Cost comparison_cost, int sort_mem, double limit_tuples) { - Cost startup_cost = 0, - run_cost = 0, + Cost startup_cost, + run_cost, input_run_cost = input_total_cost - input_startup_cost; double group_tuples, input_groups; @@ -1986,7 +2298,7 @@ cost_incremental_sort(Path *path, * pessimistic about incremental sort performance and increase its average * group size by half. */ - cost_tuplesort(&group_startup_cost, &group_run_cost, + cost_tuplesort(root, pathkeys, &group_startup_cost, &group_run_cost, 1.5 * group_tuples, width, comparison_cost, sort_mem, limit_tuples); @@ -1994,7 +2306,7 @@ cost_incremental_sort(Path *path, * Startup cost of incremental sort is the startup cost of its first group * plus the cost of its input. */ - startup_cost += group_startup_cost + startup_cost = group_startup_cost + input_startup_cost + group_input_run_cost; /* @@ -2003,7 +2315,7 @@ cost_incremental_sort(Path *path, * group, plus the total cost to process the remaining groups, plus the * remaining cost of input. */ - run_cost += group_run_cost + run_cost = group_run_cost + (group_run_cost + group_startup_cost) * (input_groups - 1) + group_input_run_cost * (input_groups - 1); @@ -2043,7 +2355,7 @@ cost_sort(Path *path, PlannerInfo *root, Cost startup_cost; Cost run_cost; - cost_tuplesort(&startup_cost, &run_cost, + cost_tuplesort(root, pathkeys, &startup_cost, &run_cost, tuples, width, comparison_cost, sort_mem, limit_tuples); @@ -2141,7 +2453,7 @@ append_nonpartial_cost(List *subpaths, int numpaths, int parallel_workers) * Determines and returns the cost of an Append node. */ void -cost_append(AppendPath *apath) +cost_append(AppendPath *apath, PlannerInfo *root) { ListCell *l; @@ -2209,7 +2521,7 @@ cost_append(AppendPath *apath) * any child. */ cost_sort(&sort_path, - NULL, /* doesn't currently need root */ + root, pathkeys, subpath->total_cost, subpath->rows, diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c index 8c6770de97..258302840f 100644 --- a/src/backend/optimizer/path/equivclass.c +++ b/src/backend/optimizer/path/equivclass.c @@ -681,7 +681,18 @@ get_eclass_for_sort_expr(PlannerInfo *root, if (opcintype == cur_em->em_datatype && equal(expr, cur_em->em_expr)) - return cur_ec; /* Match! */ + { + /* + * Match! + * + * Copy the sortref if it wasn't set yet. That may happen if the + * ec was constructed from WHERE clause, i.e. it doesn't have a + * target reference at all. + */ + if (cur_ec->ec_sortref == 0 && sortref > 0) + cur_ec->ec_sortref = sortref; + return cur_ec; + } } } diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c index 86a35cdef1..75fe03fd04 100644 --- a/src/backend/optimizer/path/pathkeys.c +++ b/src/backend/optimizer/path/pathkeys.c @@ -17,17 +17,22 @@ */ #include "postgres.h" +#include "miscadmin.h" #include "access/stratnum.h" #include "catalog/pg_opfamily.h" #include "nodes/makefuncs.h" #include "nodes/nodeFuncs.h" #include "nodes/plannodes.h" +#include "optimizer/cost.h" #include "optimizer/optimizer.h" #include "optimizer/pathnode.h" #include "optimizer/paths.h" #include "partitioning/partbounds.h" #include "utils/lsyscache.h" +#include "utils/selfuncs.h" +/* Consider reordering of GROUP BY keys? */ +bool enable_group_by_reordering = true; static bool pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys); static bool matches_boolean_partition_clause(RestrictInfo *rinfo, @@ -334,6 +339,517 @@ pathkeys_contained_in(List *keys1, List *keys2) return false; } +/* + * group_keys_reorder_by_pathkeys + * Reorder GROUP BY keys to match pathkeys of input path. + * + * Function returns new lists (pathkeys and clauses), original GROUP BY lists + * stay untouched. + * + * Returns the number of GROUP BY keys with a matching pathkey. + */ +int +group_keys_reorder_by_pathkeys(List *pathkeys, List **group_pathkeys, + List **group_clauses) +{ + List *new_group_pathkeys= NIL, + *new_group_clauses = NIL; + ListCell *lc; + int n; + + if (pathkeys == NIL || *group_pathkeys == NIL) + return 0; + + /* + * Walk the pathkeys (determining ordering of the input path) and see if + * there's a matching GROUP BY key. If we find one, we append it to the + * list, and do the same for the clauses. + * + * Once we find the first pathkey without a matching GROUP BY key, the rest + * of the pathkeys are useless and can't be used to evaluate the grouping, + * so we abort the loop and ignore the remaining pathkeys. + * + * XXX Pathkeys are built in a way to allow simply comparing pointers. + */ + foreach(lc, pathkeys) + { + PathKey *pathkey = (PathKey *) lfirst(lc); + SortGroupClause *sgc; + + /* abort on first mismatch */ + if (!list_member_ptr(*group_pathkeys, pathkey)) + break; + + new_group_pathkeys = lappend(new_group_pathkeys, pathkey); + + sgc = get_sortgroupref_clause(pathkey->pk_eclass->ec_sortref, + *group_clauses); + + new_group_clauses = lappend(new_group_clauses, sgc); + } + + /* remember the number of pathkeys with a matching GROUP BY key */ + n = list_length(new_group_pathkeys); + + /* append the remaining group pathkeys (will be treated as not sorted) */ + *group_pathkeys = list_concat_unique_ptr(new_group_pathkeys, + *group_pathkeys); + *group_clauses = list_concat_unique_ptr(new_group_clauses, + *group_clauses); + + return n; +} + +/* + * Used to generate all permutations of a pathkey list. + */ +typedef struct PathkeyMutatorState { + List *elemsList; + ListCell **elemCells; + void **elems; + int *positions; + int mutatorNColumns; + int count; +} PathkeyMutatorState; + + +/* + * PathkeyMutatorInit + * Initialize state of the permutation generator. + * + * We want to generate permutations of elements in the "elems" list. We may want + * to skip some number of elements at the beginning (when treating as presorted) + * or at the end (we only permute a limited number of group keys). + * + * The list is decomposed into elements, and we also keep pointers to individual + * cells. This allows us to build the permuted list quickly and cheaply, without + * creating any copies. + */ +static void +PathkeyMutatorInit(PathkeyMutatorState *state, List *elems, int start, int end) +{ + int i; + int n = end - start; + ListCell *lc; + + memset(state, 0, sizeof(*state)); + + state->mutatorNColumns = n; + + state->elemsList = list_copy(elems); + + state->elems = palloc(sizeof(void*) * n); + state->elemCells = palloc(sizeof(ListCell*) * n); + state->positions = palloc(sizeof(int) * n); + + i = 0; + for_each_cell(lc, state->elemsList, list_nth_cell(state->elemsList, start)) + { + state->elemCells[i] = lc; + state->elems[i] = lfirst(lc); + state->positions[i] = i + 1; + i++; + + if (i >= n) + break; + } +} + +/* Swap two elements of an array. */ +static void +PathkeyMutatorSwap(int *a, int i, int j) +{ + int s = a[i]; + + a[i] = a[j]; + a[j] = s; +} + +/* + * Generate the next permutation of elements. + */ +static bool +PathkeyMutatorNextSet(int *a, int n) +{ + int j, k, l, r; + + j = n - 2; + + while (j >= 0 && a[j] >= a[j + 1]) + j--; + + if (j < 0) + return false; + + k = n - 1; + + while (k >= 0 && a[j] >= a[k]) + k--; + + PathkeyMutatorSwap(a, j, k); + + l = j + 1; + r = n - 1; + + while (l < r) + PathkeyMutatorSwap(a, l++, r--); + + return true; +} + +/* + * PathkeyMutatorNext + * Generate the next permutation of list of elements. + * + * Returns the next permutation (as a list of elements) or NIL if there are no + * more permutations. + */ +static List * +PathkeyMutatorNext(PathkeyMutatorState *state) +{ + int i; + + state->count++; + + /* first permutation is original list */ + if (state->count == 1) + return state->elemsList; + + /* when there are no more permutations, return NIL */ + if (!PathkeyMutatorNextSet(state->positions, state->mutatorNColumns)) + { + pfree(state->elems); + pfree(state->elemCells); + pfree(state->positions); + + list_free(state->elemsList); + + return NIL; + } + + /* update the list cells to point to the right elements */ + for(i = 0; i < state->mutatorNColumns; i++) + lfirst(state->elemCells[i]) = + (void *) state->elems[ state->positions[i] - 1 ]; + + return state->elemsList; +} + +/* + * Cost of comparing pathkeys. + */ +typedef struct PathkeySortCost +{ + Cost cost; + PathKey *pathkey; +} PathkeySortCost; + +static int +pathkey_sort_cost_comparator(const void *_a, const void *_b) +{ + const PathkeySortCost *a = (PathkeySortCost *) _a; + const PathkeySortCost *b = (PathkeySortCost *) _b; + + if (a->cost < b->cost) + return -1; + else if (a->cost == b->cost) + return 0; + return 1; +} + +/* + * get_cheapest_group_keys_order + * Reorders the group pathkeys/clauses to minimize the comparison cost. + * + * Given a list of pathkeys, we try to reorder them in a way that minimizes + * the CPU cost of sorting. This depends mainly on the cost of comparator + * function (the pathkeys may use different data types) and the number of + * distinct values in each column (which affects the number of comparator + * calls for the following pathkeys). + * + * In case the input is partially sorted, only the remaining pathkeys are + * considered. + * + * Returns true if the keys/clauses have been reordered (or might have been), + * and a new list is returned through an argument. The list is a new copy + * and may be freed using list_free. + * + * Returns false if no reordering was possible. + */ +static bool +get_cheapest_group_keys_order(PlannerInfo *root, double nrows, + List **group_pathkeys, List **group_clauses, + int n_preordered) +{ + List *new_group_pathkeys = NIL, + *new_group_clauses = NIL, + *var_group_pathkeys; + + ListCell *cell; + PathkeyMutatorState mstate; + double cheapest_sort_cost = -1.0; + + int nFreeKeys; + int nToPermute; + + /* If there are less than 2 unsorted pathkeys, we're done. */ + if (list_length(*group_pathkeys) - n_preordered < 2) + return false; + + /* + * We could exhaustively cost all possible orderings of the pathkeys, but for + * a large number of pathkeys it might be prohibitively expensive. So we try + * to apply simple cheap heuristics first - we sort the pathkeys by sort cost + * (as if the pathkey was sorted independently) and then check only the four + * cheapest pathkeys. The remaining pathkeys are kept ordered by cost. + * + * XXX This is a very simple heuristics, but likely to work fine for most + * cases (because the number of GROUP BY clauses tends to be lower than 4). + * But it ignores how the number of distinct values in each pathkey affects + * the following steps. It might be better to use "more expensive" pathkey + * first if it has many distinct values, because it then limits the number + * of comparisons for the remaining pathkeys. But evaluating that is likely + * quite the expensive. + */ + nFreeKeys = list_length(*group_pathkeys) - n_preordered; + nToPermute = 4; + if (nFreeKeys > nToPermute) + { + int i; + PathkeySortCost *costs = palloc(sizeof(PathkeySortCost) * nFreeKeys); + + /* skip the pre-ordered pathkeys */ + cell = list_nth_cell(*group_pathkeys, n_preordered); + + /* estimate cost for sorting individual pathkeys */ + for (i = 0; cell != NULL; i++, (cell = lnext(*group_pathkeys, cell))) + { + List *to_cost = list_make1(lfirst(cell)); + + Assert(i < nFreeKeys); + + costs[i].pathkey = lfirst(cell); + costs[i].cost = cost_sort_estimate(root, to_cost, 0, nrows); + + pfree(to_cost); + } + + /* sort the pathkeys by sort cost in ascending order */ + qsort(costs, nFreeKeys, sizeof(*costs), pathkey_sort_cost_comparator); + + /* + * Rebuild the list of pathkeys - first the preordered ones, then the + * rest ordered by cost. + */ + new_group_pathkeys = list_truncate(list_copy(*group_pathkeys), n_preordered); + + for (i = 0; i < nFreeKeys; i++) + new_group_pathkeys = lappend(new_group_pathkeys, costs[i].pathkey); + + pfree(costs); + } + else + { + /* Copy the list, so that we can free the new list by list_free. */ + new_group_pathkeys = list_copy(*group_pathkeys); + nToPermute = nFreeKeys; + } + + Assert(list_length(new_group_pathkeys) == list_length(*group_pathkeys)); + + /* + * Generate pathkey lists with permutations of the first nToPermute pathkeys. + * + * XXX We simply calculate sort cost for each individual pathkey list, but + * there's room for two dynamic programming optimizations here. Firstly, we + * may pass the current "best" cost to cost_sort_estimate so that it can + * "abort" if the estimated pathkeys list exceeds it. Secondly, it could pass + * the return information about the position when it exceeded the cost, and + * we could skip all permutations with the same prefix. + * + * Imagine we've already found ordering with cost C1, and we're evaluating + * another ordering - cost_sort_estimate() calculates cost by adding the + * pathkeys one by one (more or less), and the cost only grows. If at any + * point it exceeds C1, it can't possibly be "better" so we can discard it. + * But we also know that we can discard all ordering with the same prefix, + * because if we're estimating (a,b,c,d) and we exceed C1 at (a,b) then the + * same thing will happen for any ordering with this prefix. + */ + PathkeyMutatorInit(&mstate, new_group_pathkeys, n_preordered, n_preordered + nToPermute); + + while((var_group_pathkeys = PathkeyMutatorNext(&mstate)) != NIL) + { + Cost cost; + + cost = cost_sort_estimate(root, var_group_pathkeys, n_preordered, nrows); + + if (cost < cheapest_sort_cost || cheapest_sort_cost < 0) + { + list_free(new_group_pathkeys); + new_group_pathkeys = list_copy(var_group_pathkeys); + cheapest_sort_cost = cost; + } + } + + /* Reorder the group clauses according to the reordered pathkeys. */ + foreach(cell, new_group_pathkeys) + { + PathKey *pathkey = (PathKey *) lfirst(cell); + + new_group_clauses = lappend(new_group_clauses, + get_sortgroupref_clause(pathkey->pk_eclass->ec_sortref, + *group_clauses)); + } + + /* Just append the rest GROUP BY clauses */ + new_group_clauses = list_concat_unique_ptr(new_group_clauses, + *group_clauses); + + *group_pathkeys = new_group_pathkeys; + *group_clauses = new_group_clauses; + + return true; +} + +/* + * get_useful_group_keys_orderings + * Determine which orderings of GROUP BY keys are potentially interesting. + * + * Returns list of PathKeyInfo items, each representing an interesting ordering + * of GROUP BY keys. Each item stores pathkeys and clauses in matching order. + * + * The function considers (and keeps) multiple group by orderings: + * + * - the original ordering, as specified by the GROUP BY clause + * + * - GROUP BY keys reordered to minimize the sort cost + * + * - GROUP BY keys reordered to match path ordering (as much as possible), with + * the tail reordered to minimize the sort cost + * + * - GROUP BY keys to match target ORDER BY clause (as much as possible), with + * the tail reordered to minimize the sort cost + * + * There are other potentially interesting orderings (e.g. it might be best to + * match the first ORDER BY key, order the remaining keys differently and then + * rely on the incremental sort to fix this), but we ignore those for now. To + * make this work we'd have to pretty much generate all possible permutations. + */ +List * +get_useful_group_keys_orderings(PlannerInfo *root, double nrows, + List *path_pathkeys, + List *group_pathkeys, List *group_clauses) +{ + Query *parse = root->parse; + List *infos = NIL; + PathKeyInfo *info; + int n_preordered = 0; + + List *pathkeys = group_pathkeys; + List *clauses = group_clauses; + + /* always return at least the original pathkeys/clauses */ + info = makeNode(PathKeyInfo); + info->pathkeys = pathkeys; + info->clauses = clauses; + + infos = lappend(infos, info); + + /* + * Should we try generating alternative orderings of the group keys? If not, + * we produce only the order specified in the query, i.e. the optimization + * is effectively disabled. + */ + if (!enable_group_by_reordering) + return infos; + + /* for grouping sets we can't do any reordering */ + if (parse->groupingSets) + return infos; + + /* + * Try reordering pathkeys to minimize the sort cost, ignoring both the + * target ordering (ORDER BY) and ordering of the input path. + */ + if (get_cheapest_group_keys_order(root, nrows, &pathkeys, &clauses, + n_preordered)) + { + info = makeNode(PathKeyInfo); + info->pathkeys = pathkeys; + info->clauses = clauses; + + infos = lappend(infos, info); + } + + /* + * If the path is sorted in some way, try reordering the group keys to match + * as much of the ordering as possible - we get this sort for free (mostly). + * + * We must not do this when there are no grouping sets, because those use + * more complex logic to decide the ordering. + * + * XXX Isn't this somewhat redundant with presorted_keys? Actually, it's + * more a complement, because it allows benefiting from incremental sort + * as much as possible. + * + * XXX This does nothing if (n_preordered == 0). We shouldn't create the + * info in this case. + */ + if (path_pathkeys) + { + n_preordered = group_keys_reorder_by_pathkeys(path_pathkeys, + &pathkeys, + &clauses); + + /* reorder the tail to minimize sort cost */ + get_cheapest_group_keys_order(root, nrows, &pathkeys, &clauses, + n_preordered); + + /* + * reorder the tail to minimize sort cost + * + * XXX Ignore the return value - there may be nothing to reorder, in + * which case get_cheapest_group_keys_order returns false. But we + * still want to keep the keys reordered to path_pathkeys. + */ + info = makeNode(PathKeyInfo); + info->pathkeys = pathkeys; + info->clauses = clauses; + + infos = lappend(infos, info); + } + + /* + * Try reordering pathkeys to minimize the sort cost (this time consider + * the ORDER BY clause, but only if set debug_group_by_match_order_by). + */ + if (root->sort_pathkeys) + { + n_preordered = group_keys_reorder_by_pathkeys(root->sort_pathkeys, + &pathkeys, + &clauses); + + /* + * reorder the tail to minimize sort cost + * + * XXX Ignore the return value - there may be nothing to reorder, in + * which case get_cheapest_group_keys_order returns false. But we + * still want to keep the keys reordered to sort_pathkeys. + */ + get_cheapest_group_keys_order(root, nrows, &pathkeys, &clauses, + n_preordered); + + /* keep the group keys reordered to match ordering of input path */ + info = makeNode(PathKeyInfo); + info->pathkeys = pathkeys; + info->clauses = clauses; + + infos = lappend(infos, info); + } + + return infos; +} + /* * pathkeys_count_contained_in * Same as pathkeys_contained_in, but also sets length of longest @@ -1862,6 +2378,54 @@ pathkeys_useful_for_ordering(PlannerInfo *root, List *pathkeys) return n_common_pathkeys; } +/* + * pathkeys_useful_for_grouping + * Count the number of pathkeys that are useful for grouping (instead of + * explicit sort) + * + * Group pathkeys could be reordered to benefit from the odering. The ordering + * may not be "complete" and may require incremental sort, but that's fine. So + * we simply count prefix pathkeys with a matching group key, and stop once we + * find the first pathkey without a match. + * + * So e.g. with pathkeys (a,b,c) and group keys (a,b,e) this determines (a,b) + * pathkeys are useful for grouping, and we might do incremental sort to get + * path ordered by (a,b,e). + * + * This logic is necessary to retain paths with ordeding not matching grouping + * keys directly, without the reordering. + * + * Returns the length of pathkey prefix with matching group keys. + */ +static int +pathkeys_useful_for_grouping(PlannerInfo *root, List *pathkeys) +{ + ListCell *key; + int n = 0; + + /* no special ordering requested for grouping */ + if (root->group_pathkeys == NIL) + return 0; + + /* unordered path */ + if (pathkeys == NIL) + return 0; + + /* walk the pathkeys and search for matching group key */ + foreach(key, pathkeys) + { + PathKey *pathkey = (PathKey *) lfirst(key); + + /* no matching group key, we're done */ + if (!list_member_ptr(root->group_pathkeys, pathkey)) + break; + + n++; + } + + return n; +} + /* * truncate_useless_pathkeys * Shorten the given pathkey list to just the useful pathkeys. @@ -1876,6 +2440,9 @@ truncate_useless_pathkeys(PlannerInfo *root, nuseful = pathkeys_useful_for_merging(root, rel, pathkeys); nuseful2 = pathkeys_useful_for_ordering(root, pathkeys); + if (nuseful2 > nuseful) + nuseful = nuseful2; + nuseful2 = pathkeys_useful_for_grouping(root, pathkeys); if (nuseful2 > nuseful) nuseful = nuseful2; @@ -1911,6 +2478,8 @@ has_useful_pathkeys(PlannerInfo *root, RelOptInfo *rel) { if (rel->joininfo != NIL || rel->has_eclass_joins) return true; /* might be able to use pathkeys for merging */ + if (root->group_pathkeys != NIL) + return true; /* might be able to use pathkeys for grouping */ if (root->query_pathkeys != NIL) return true; /* might be able to use them for ordering */ return false; /* definitely useless */ diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 547fda20a2..b2569c5d0c 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -6218,24 +6218,121 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel, */ foreach(lc, input_rel->pathlist) { + ListCell *lc2; Path *path = (Path *) lfirst(lc); Path *path_original = path; - bool is_sorted; - int presorted_keys; - is_sorted = pathkeys_count_contained_in(root->group_pathkeys, - path->pathkeys, - &presorted_keys); + List *pathkey_orderings = NIL; - if (path == cheapest_path || is_sorted) + List *group_pathkeys = root->group_pathkeys; + List *group_clauses = parse->groupClause; + + /* generate alternative group orderings that might be useful */ + pathkey_orderings = get_useful_group_keys_orderings(root, + path->rows, + path->pathkeys, + group_pathkeys, + group_clauses); + + Assert(list_length(pathkey_orderings) > 0); + + /* process all potentially interesting grouping reorderings */ + foreach (lc2, pathkey_orderings) { - /* Sort the cheapest-total path if it isn't already sorted */ - if (!is_sorted) - path = (Path *) create_sort_path(root, - grouped_rel, - path, - root->group_pathkeys, - -1.0); + bool is_sorted; + int presorted_keys = 0; + PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2); + + /* restore the path (we replace it in the loop) */ + path = path_original; + + is_sorted = pathkeys_count_contained_in(info->pathkeys, + path->pathkeys, + &presorted_keys); + + if (path == cheapest_path || is_sorted) + { + /* Sort the cheapest-total path if it isn't already sorted */ + if (!is_sorted) + path = (Path *) create_sort_path(root, + grouped_rel, + path, + info->pathkeys, + -1.0); + + /* Now decide what to stick atop it */ + if (parse->groupingSets) + { + consider_groupingsets_paths(root, grouped_rel, + path, true, can_hash, + gd, agg_costs, dNumGroups); + } + else if (parse->hasAggs) + { + /* + * We have aggregation, possibly with plain GROUP BY. Make + * an AggPath. + */ + add_path(grouped_rel, (Path *) + create_agg_path(root, + grouped_rel, + path, + grouped_rel->reltarget, + info->clauses ? AGG_SORTED : AGG_PLAIN, + AGGSPLIT_SIMPLE, + info->clauses, + havingQual, + agg_costs, + dNumGroups)); + } + else if (group_clauses) + { + /* + * We have GROUP BY without aggregation or grouping sets. + * Make a GroupPath. + */ + add_path(grouped_rel, (Path *) + create_group_path(root, + grouped_rel, + path, + info->clauses, + havingQual, + dNumGroups)); + } + else + { + /* Other cases should have been handled above */ + Assert(false); + } + } + + /* + * Now we may consider incremental sort on this path, but only + * when the path is not already sorted and when incremental sort + * is enabled. + */ + if (is_sorted || !enable_incremental_sort) + continue; + + /* Restore the input path (we might have added Sort on top). */ + path = path_original; + + /* no shared prefix, no point in building incremental sort */ + if (presorted_keys == 0) + continue; + + /* + * We should have already excluded pathkeys of length 1 because + * then presorted_keys > 0 would imply is_sorted was true. + */ + Assert(list_length(root->group_pathkeys) != 1); + + path = (Path *) create_incremental_sort_path(root, + grouped_rel, + path, + info->pathkeys, + presorted_keys, + -1.0); /* Now decide what to stick atop it */ if (parse->groupingSets) @@ -6247,17 +6344,17 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel, else if (parse->hasAggs) { /* - * We have aggregation, possibly with plain GROUP BY. Make - * an AggPath. + * We have aggregation, possibly with plain GROUP BY. Make an + * AggPath. */ add_path(grouped_rel, (Path *) create_agg_path(root, grouped_rel, path, grouped_rel->reltarget, - parse->groupClause ? AGG_SORTED : AGG_PLAIN, + info->clauses ? AGG_SORTED : AGG_PLAIN, AGGSPLIT_SIMPLE, - parse->groupClause, + info->clauses, havingQual, agg_costs, dNumGroups)); @@ -6265,14 +6362,14 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel, else if (parse->groupClause) { /* - * We have GROUP BY without aggregation or grouping sets. - * Make a GroupPath. + * We have GROUP BY without aggregation or grouping sets. Make + * a GroupPath. */ add_path(grouped_rel, (Path *) create_group_path(root, grouped_rel, path, - parse->groupClause, + info->clauses, havingQual, dNumGroups)); } @@ -6282,79 +6379,6 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel, Assert(false); } } - - /* - * Now we may consider incremental sort on this path, but only - * when the path is not already sorted and when incremental sort - * is enabled. - */ - if (is_sorted || !enable_incremental_sort) - continue; - - /* Restore the input path (we might have added Sort on top). */ - path = path_original; - - /* no shared prefix, no point in building incremental sort */ - if (presorted_keys == 0) - continue; - - /* - * We should have already excluded pathkeys of length 1 because - * then presorted_keys > 0 would imply is_sorted was true. - */ - Assert(list_length(root->group_pathkeys) != 1); - - path = (Path *) create_incremental_sort_path(root, - grouped_rel, - path, - root->group_pathkeys, - presorted_keys, - -1.0); - - /* Now decide what to stick atop it */ - if (parse->groupingSets) - { - consider_groupingsets_paths(root, grouped_rel, - path, true, can_hash, - gd, agg_costs, dNumGroups); - } - else if (parse->hasAggs) - { - /* - * We have aggregation, possibly with plain GROUP BY. Make an - * AggPath. - */ - add_path(grouped_rel, (Path *) - create_agg_path(root, - grouped_rel, - path, - grouped_rel->reltarget, - parse->groupClause ? AGG_SORTED : AGG_PLAIN, - AGGSPLIT_SIMPLE, - parse->groupClause, - havingQual, - agg_costs, - dNumGroups)); - } - else if (parse->groupClause) - { - /* - * We have GROUP BY without aggregation or grouping sets. Make - * a GroupPath. - */ - add_path(grouped_rel, (Path *) - create_group_path(root, - grouped_rel, - path, - parse->groupClause, - havingQual, - dNumGroups)); - } - else - { - /* Other cases should have been handled above */ - Assert(false); - } } /* @@ -6365,100 +6389,124 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel, { foreach(lc, partially_grouped_rel->pathlist) { + ListCell *lc2; Path *path = (Path *) lfirst(lc); Path *path_original = path; - bool is_sorted; - int presorted_keys; - is_sorted = pathkeys_count_contained_in(root->group_pathkeys, - path->pathkeys, - &presorted_keys); + List *pathkey_orderings = NIL; - /* - * Insert a Sort node, if required. But there's no point in - * sorting anything but the cheapest path. - */ - if (!is_sorted) + List *group_pathkeys = root->group_pathkeys; + List *group_clauses = parse->groupClause; + + /* generate alternative group orderings that might be useful */ + pathkey_orderings = get_useful_group_keys_orderings(root, + path->rows, + path->pathkeys, + group_pathkeys, + group_clauses); + + Assert(list_length(pathkey_orderings) > 0); + + /* process all potentially interesting grouping reorderings */ + foreach (lc2, pathkey_orderings) { - if (path != partially_grouped_rel->cheapest_total_path) + bool is_sorted; + int presorted_keys = 0; + PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2); + + /* restore the path (we replace it in the loop) */ + path = path_original; + + is_sorted = pathkeys_count_contained_in(info->pathkeys, + path->pathkeys, + &presorted_keys); + + /* + * Insert a Sort node, if required. But there's no point in + * sorting anything but the cheapest path. + */ + if (!is_sorted) + { + if (path != partially_grouped_rel->cheapest_total_path) + continue; + path = (Path *) create_sort_path(root, + grouped_rel, + path, + info->pathkeys, + -1.0); + } + + if (parse->hasAggs) + add_path(grouped_rel, (Path *) + create_agg_path(root, + grouped_rel, + path, + grouped_rel->reltarget, + info->clauses ? AGG_SORTED : AGG_PLAIN, + AGGSPLIT_FINAL_DESERIAL, + info->clauses, + havingQual, + agg_final_costs, + dNumGroups)); + else + add_path(grouped_rel, (Path *) + create_group_path(root, + grouped_rel, + path, + info->clauses, + havingQual, + dNumGroups)); + + /* + * Now we may consider incremental sort on this path, but only + * when the path is not already sorted and when incremental + * sort is enabled. + */ + if (is_sorted || !enable_incremental_sort) continue; - path = (Path *) create_sort_path(root, - grouped_rel, - path, - root->group_pathkeys, - -1.0); + + /* Restore the input path (we might have added Sort on top). */ + path = path_original; + + /* no shared prefix, not point in building incremental sort */ + if (presorted_keys == 0) + continue; + + /* + * We should have already excluded pathkeys of length 1 + * because then presorted_keys > 0 would imply is_sorted was + * true. + */ + Assert(list_length(root->group_pathkeys) != 1); + + path = (Path *) create_incremental_sort_path(root, + grouped_rel, + path, + info->pathkeys, + presorted_keys, + -1.0); + + if (parse->hasAggs) + add_path(grouped_rel, (Path *) + create_agg_path(root, + grouped_rel, + path, + grouped_rel->reltarget, + info->clauses ? AGG_SORTED : AGG_PLAIN, + AGGSPLIT_FINAL_DESERIAL, + info->clauses, + havingQual, + agg_final_costs, + dNumGroups)); + else + add_path(grouped_rel, (Path *) + create_group_path(root, + grouped_rel, + path, + info->clauses, + havingQual, + dNumGroups)); } - - if (parse->hasAggs) - add_path(grouped_rel, (Path *) - create_agg_path(root, - grouped_rel, - path, - grouped_rel->reltarget, - parse->groupClause ? AGG_SORTED : AGG_PLAIN, - AGGSPLIT_FINAL_DESERIAL, - parse->groupClause, - havingQual, - agg_final_costs, - dNumGroups)); - else - add_path(grouped_rel, (Path *) - create_group_path(root, - grouped_rel, - path, - parse->groupClause, - havingQual, - dNumGroups)); - - /* - * Now we may consider incremental sort on this path, but only - * when the path is not already sorted and when incremental - * sort is enabled. - */ - if (is_sorted || !enable_incremental_sort) - continue; - - /* Restore the input path (we might have added Sort on top). */ - path = path_original; - - /* no shared prefix, not point in building incremental sort */ - if (presorted_keys == 0) - continue; - - /* - * We should have already excluded pathkeys of length 1 - * because then presorted_keys > 0 would imply is_sorted was - * true. - */ - Assert(list_length(root->group_pathkeys) != 1); - - path = (Path *) create_incremental_sort_path(root, - grouped_rel, - path, - root->group_pathkeys, - presorted_keys, - -1.0); - - if (parse->hasAggs) - add_path(grouped_rel, (Path *) - create_agg_path(root, - grouped_rel, - path, - grouped_rel->reltarget, - parse->groupClause ? AGG_SORTED : AGG_PLAIN, - AGGSPLIT_FINAL_DESERIAL, - parse->groupClause, - havingQual, - agg_final_costs, - dNumGroups)); - else - add_path(grouped_rel, (Path *) - create_group_path(root, - grouped_rel, - path, - parse->groupClause, - havingQual, - dNumGroups)); } } } @@ -6661,41 +6709,71 @@ create_partial_grouping_paths(PlannerInfo *root, */ foreach(lc, input_rel->pathlist) { + ListCell *lc2; Path *path = (Path *) lfirst(lc); - bool is_sorted; + Path *path_save = path; - is_sorted = pathkeys_contained_in(root->group_pathkeys, - path->pathkeys); - if (path == cheapest_total_path || is_sorted) + List *pathkey_orderings = NIL; + + List *group_pathkeys = root->group_pathkeys; + List *group_clauses = parse->groupClause; + + /* generate alternative group orderings that might be useful */ + pathkey_orderings = get_useful_group_keys_orderings(root, + path->rows, + path->pathkeys, + group_pathkeys, + group_clauses); + + Assert(list_length(pathkey_orderings) > 0); + + /* process all potentially interesting grouping reorderings */ + foreach (lc2, pathkey_orderings) { - /* Sort the cheapest partial path, if it isn't already */ - if (!is_sorted) - path = (Path *) create_sort_path(root, - partially_grouped_rel, - path, - root->group_pathkeys, - -1.0); + bool is_sorted; + int presorted_keys = 0; + PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2); - if (parse->hasAggs) - add_path(partially_grouped_rel, (Path *) - create_agg_path(root, - partially_grouped_rel, - path, - partially_grouped_rel->reltarget, - parse->groupClause ? AGG_SORTED : AGG_PLAIN, - AGGSPLIT_INITIAL_SERIAL, - parse->groupClause, - NIL, - agg_partial_costs, - dNumPartialGroups)); - else - add_path(partially_grouped_rel, (Path *) - create_group_path(root, - partially_grouped_rel, - path, - parse->groupClause, - NIL, - dNumPartialGroups)); + /* restore the path (we replace it in the loop) */ + path = path_save; + + is_sorted = pathkeys_count_contained_in(info->pathkeys, + path->pathkeys, + &presorted_keys); + + if (path == cheapest_total_path || is_sorted) + { + /* Sort the cheapest partial path, if it isn't already */ + if (!is_sorted) + { + path = (Path *) create_sort_path(root, + partially_grouped_rel, + path, + info->pathkeys, + -1.0); + } + + if (parse->hasAggs) + add_path(partially_grouped_rel, (Path *) + create_agg_path(root, + partially_grouped_rel, + path, + partially_grouped_rel->reltarget, + info->clauses ? AGG_SORTED : AGG_PLAIN, + AGGSPLIT_INITIAL_SERIAL, + info->clauses, + NIL, + agg_partial_costs, + dNumPartialGroups)); + else + add_path(partially_grouped_rel, (Path *) + create_group_path(root, + partially_grouped_rel, + path, + info->clauses, + NIL, + dNumPartialGroups)); + } } } @@ -6705,6 +6783,8 @@ create_partial_grouping_paths(PlannerInfo *root, * We can also skip the entire loop when we only have a single-item * group_pathkeys because then we can't possibly have a presorted * prefix of the list without having the list be fully sorted. + * + * XXX Shouldn't this also consider the group-key-reordering? */ if (enable_incremental_sort && list_length(root->group_pathkeys) > 1) { @@ -6763,24 +6843,100 @@ create_partial_grouping_paths(PlannerInfo *root, /* Similar to above logic, but for partial paths. */ foreach(lc, input_rel->partial_pathlist) { + ListCell *lc2; Path *path = (Path *) lfirst(lc); Path *path_original = path; - bool is_sorted; - int presorted_keys; - is_sorted = pathkeys_count_contained_in(root->group_pathkeys, - path->pathkeys, - &presorted_keys); + List *pathkey_orderings = NIL; - if (path == cheapest_partial_path || is_sorted) + List *group_pathkeys = root->group_pathkeys; + List *group_clauses = parse->groupClause; + + /* generate alternative group orderings that might be useful */ + pathkey_orderings = get_useful_group_keys_orderings(root, + path->rows, + path->pathkeys, + group_pathkeys, + group_clauses); + + Assert(list_length(pathkey_orderings) > 0); + + /* process all potentially interesting grouping reorderings */ + foreach (lc2, pathkey_orderings) { - /* Sort the cheapest partial path, if it isn't already */ - if (!is_sorted) - path = (Path *) create_sort_path(root, - partially_grouped_rel, - path, - root->group_pathkeys, - -1.0); + bool is_sorted; + int presorted_keys = 0; + PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2); + + /* restore the path (we replace it in the loop) */ + path = path_original; + + is_sorted = pathkeys_count_contained_in(info->pathkeys, + path->pathkeys, + &presorted_keys); + + if (path == cheapest_partial_path || is_sorted) + { + + /* Sort the cheapest partial path, if it isn't already */ + if (!is_sorted) + { + path = (Path *) create_sort_path(root, + partially_grouped_rel, + path, + info->pathkeys, + -1.0); + } + + if (parse->hasAggs) + add_partial_path(partially_grouped_rel, (Path *) + create_agg_path(root, + partially_grouped_rel, + path, + partially_grouped_rel->reltarget, + info->clauses ? AGG_SORTED : AGG_PLAIN, + AGGSPLIT_INITIAL_SERIAL, + info->clauses, + NIL, + agg_partial_costs, + dNumPartialPartialGroups)); + else + add_partial_path(partially_grouped_rel, (Path *) + create_group_path(root, + partially_grouped_rel, + path, + info->clauses, + NIL, + dNumPartialPartialGroups)); + } + + /* + * Now we may consider incremental sort on this path, but only + * when the path is not already sorted and when incremental sort + * is enabled. + */ + if (is_sorted || !enable_incremental_sort) + continue; + + /* Restore the input path (we might have added Sort on top). */ + path = path_original; + + /* no shared prefix, not point in building incremental sort */ + if (presorted_keys == 0) + continue; + + /* + * We should have already excluded pathkeys of length 1 because + * then presorted_keys > 0 would imply is_sorted was true. + */ + Assert(list_length(root->group_pathkeys) != 1); + + path = (Path *) create_incremental_sort_path(root, + partially_grouped_rel, + path, + info->pathkeys, + presorted_keys, + -1.0); if (parse->hasAggs) add_partial_path(partially_grouped_rel, (Path *) @@ -6788,9 +6944,9 @@ create_partial_grouping_paths(PlannerInfo *root, partially_grouped_rel, path, partially_grouped_rel->reltarget, - parse->groupClause ? AGG_SORTED : AGG_PLAIN, + info->clauses ? AGG_SORTED : AGG_PLAIN, AGGSPLIT_INITIAL_SERIAL, - parse->groupClause, + info->clauses, NIL, agg_partial_costs, dNumPartialPartialGroups)); @@ -6799,59 +6955,10 @@ create_partial_grouping_paths(PlannerInfo *root, create_group_path(root, partially_grouped_rel, path, - parse->groupClause, + info->clauses, NIL, dNumPartialPartialGroups)); } - - /* - * Now we may consider incremental sort on this path, but only - * when the path is not already sorted and when incremental sort - * is enabled. - */ - if (is_sorted || !enable_incremental_sort) - continue; - - /* Restore the input path (we might have added Sort on top). */ - path = path_original; - - /* no shared prefix, not point in building incremental sort */ - if (presorted_keys == 0) - continue; - - /* - * We should have already excluded pathkeys of length 1 because - * then presorted_keys > 0 would imply is_sorted was true. - */ - Assert(list_length(root->group_pathkeys) != 1); - - path = (Path *) create_incremental_sort_path(root, - partially_grouped_rel, - path, - root->group_pathkeys, - presorted_keys, - -1.0); - - if (parse->hasAggs) - add_partial_path(partially_grouped_rel, (Path *) - create_agg_path(root, - partially_grouped_rel, - path, - partially_grouped_rel->reltarget, - parse->groupClause ? AGG_SORTED : AGG_PLAIN, - AGGSPLIT_INITIAL_SERIAL, - parse->groupClause, - NIL, - agg_partial_costs, - dNumPartialPartialGroups)); - else - add_partial_path(partially_grouped_rel, (Path *) - create_group_path(root, - partially_grouped_rel, - path, - parse->groupClause, - NIL, - dNumPartialPartialGroups)); } } diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index 99df76b6b7..1670e54644 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -1342,7 +1342,7 @@ create_append_path(PlannerInfo *root, pathnode->path.pathkeys = child->pathkeys; } else - cost_append(pathnode); + cost_append(pathnode, root); /* If the caller provided a row estimate, override the computed value. */ if (rows >= 0) diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index 1fbb0b28c3..fb4fb987e7 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -3368,11 +3368,28 @@ double estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows, List **pgset, EstimationInfo *estinfo) { - List *varinfos = NIL; + return estimate_num_groups_incremental(root, groupExprs, + input_rows, pgset, estinfo, + NULL, 0); +} + +/* + * estimate_num_groups_incremental + * An estimate_num_groups variant, optimized for cases that are adding the + * expressions incrementally (e.g. one by one). + */ +double +estimate_num_groups_incremental(PlannerInfo *root, List *groupExprs, + double input_rows, + List **pgset, EstimationInfo *estinfo, + List **cache_varinfos, int prevNExprs) +{ + List *varinfos = (cache_varinfos) ? *cache_varinfos : NIL; double srf_multiplier = 1.0; double numdistinct; ListCell *l; - int i; + int i, + j; /* Zero the estinfo output parameter, if non-NULL */ if (estinfo != NULL) @@ -3403,7 +3420,7 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows, */ numdistinct = 1.0; - i = 0; + i = j = 0; foreach(l, groupExprs) { Node *groupexpr = (Node *) lfirst(l); @@ -3412,6 +3429,14 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows, List *varshere; ListCell *l2; + /* was done on previous call */ + if (cache_varinfos && j++ < prevNExprs) + { + if (pgset) + i++; /* to keep in sync with lines below */ + continue; + } + /* is expression in this grouping set? */ if (pgset && !list_member_int(*pgset, i++)) continue; @@ -3481,7 +3506,11 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows, if (varshere == NIL) { if (contain_volatile_functions(groupexpr)) + { + if (cache_varinfos) + *cache_varinfos = varinfos; return input_rows; + } continue; } @@ -3498,6 +3527,9 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows, } } + if (cache_varinfos) + *cache_varinfos = varinfos; + /* * If now no Vars, we must have an all-constant or all-boolean GROUP BY * list. diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index eb3a03b976..9e8ab1420d 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -1182,6 +1182,16 @@ static struct config_bool ConfigureNamesBool[] = true, NULL, NULL, NULL }, + { + {"enable_group_by_reordering", PGC_USERSET, QUERY_TUNING_METHOD, + gettext_noop("enable reordering of GROUP BY key"), + NULL, + GUC_EXPLAIN + }, + &enable_group_by_reordering, + true, + NULL, NULL, NULL + }, { {"geqo", PGC_USERSET, QUERY_TUNING_GEQO, gettext_noop("Enables genetic query optimization."), diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index b933fade8c..93d221a37b 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -382,6 +382,7 @@ #enable_seqscan = on #enable_sort = on #enable_tidscan = on +#enable_group_by_reordering = on # - Planner Cost Constants - diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h index a56b85fe74..53f6b05a3f 100644 --- a/src/include/nodes/nodes.h +++ b/src/include/nodes/nodes.h @@ -277,6 +277,7 @@ typedef enum NodeTag T_EquivalenceClass, T_EquivalenceMember, T_PathKey, + T_PathKeyInfo, T_PathTarget, T_RestrictInfo, T_IndexClause, diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h index 365000bdcd..6cbcb67bdf 100644 --- a/src/include/nodes/pathnodes.h +++ b/src/include/nodes/pathnodes.h @@ -1070,6 +1070,16 @@ typedef struct PathKey bool pk_nulls_first; /* do NULLs come before normal values? */ } PathKey; +/* + * Combines information about pathkeys and the associated clauses. + */ +typedef struct PathKeyInfo +{ + NodeTag type; + List *pathkeys; + List *clauses; +} PathKeyInfo; + /* * VolatileFunctionStatus -- allows nodes to cache their * contain_volatile_functions properties. VOLATILITY_UNKNOWN means not yet diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h index bc12071af6..dc7fc17411 100644 --- a/src/include/optimizer/cost.h +++ b/src/include/optimizer/cost.h @@ -114,7 +114,9 @@ extern void cost_incremental_sort(Path *path, Cost input_startup_cost, Cost input_total_cost, double input_tuples, int width, Cost comparison_cost, int sort_mem, double limit_tuples); -extern void cost_append(AppendPath *path); +extern Cost cost_sort_estimate(PlannerInfo *root, List *pathkeys, + int nPresortedKeys, double tuples); +extern void cost_append(AppendPath *path, PlannerInfo *root); extern void cost_merge_append(Path *path, PlannerInfo *root, List *pathkeys, int n_streams, Cost input_startup_cost, Cost input_total_cost, diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h index 0c3a0b90c8..2ffa24aa89 100644 --- a/src/include/optimizer/paths.h +++ b/src/include/optimizer/paths.h @@ -24,6 +24,7 @@ extern PGDLLIMPORT bool enable_geqo; extern PGDLLIMPORT int geqo_threshold; extern PGDLLIMPORT int min_parallel_table_scan_size; extern PGDLLIMPORT int min_parallel_index_scan_size; +extern PGDLLIMPORT bool enable_group_by_reordering; /* Hook for plugins to get control in set_rel_pathlist() */ typedef void (*set_rel_pathlist_hook_type) (PlannerInfo *root, @@ -203,6 +204,12 @@ typedef enum extern PathKeysComparison compare_pathkeys(List *keys1, List *keys2); extern bool pathkeys_contained_in(List *keys1, List *keys2); extern bool pathkeys_count_contained_in(List *keys1, List *keys2, int *n_common); +extern int group_keys_reorder_by_pathkeys(List *pathkeys, + List **group_pathkeys, + List **group_clauses); +extern List *get_useful_group_keys_orderings(PlannerInfo *root, double nrows, + List *path_pathkeys, + List *pathkeys, List *clauses); extern Path *get_cheapest_path_for_pathkeys(List *paths, List *pathkeys, Relids required_outer, CostSelector cost_criterion, diff --git a/src/include/utils/selfuncs.h b/src/include/utils/selfuncs.h index 8f3d73edfb..c313a08d54 100644 --- a/src/include/utils/selfuncs.h +++ b/src/include/utils/selfuncs.h @@ -214,6 +214,11 @@ extern double estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows, List **pgset, EstimationInfo *estinfo); +extern double estimate_num_groups_incremental(PlannerInfo *root, List *groupExprs, + double input_rows, List **pgset, + EstimationInfo *estinfo, + List **cache_varinfos, int prevNExprs); + extern void estimate_hash_bucket_stats(PlannerInfo *root, Node *hashkey, double nbuckets, Selectivity *mcv_freq, diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out index 0a23a39aa2..601047fa3d 100644 --- a/src/test/regress/expected/aggregates.out +++ b/src/test/regress/expected/aggregates.out @@ -1210,7 +1210,8 @@ explain (costs off) select distinct min(f1), max(f1) from minmaxtest; QUERY PLAN --------------------------------------------------------------------------------------------- - Unique + HashAggregate + Group Key: $0, $1 InitPlan 1 (returns $0) -> Limit -> Merge Append @@ -1233,10 +1234,8 @@ explain (costs off) -> Index Only Scan using minmaxtest2i on minmaxtest2 minmaxtest_8 Index Cond: (f1 IS NOT NULL) -> Index Only Scan Backward using minmaxtest3i on minmaxtest3 minmaxtest_9 - -> Sort - Sort Key: ($0), ($1) - -> Result -(26 rows) + -> Result +(25 rows) select distinct min(f1), max(f1) from minmaxtest; min | max @@ -2448,6 +2447,241 @@ SELECT balk(hundred) FROM tenk1; (1 row) ROLLBACK; +-- GROUP BY optimization by reorder columns +SELECT + i AS id, + i/2 AS p, + format('%60s', i%2) AS v, + i/4 AS c, + i/8 AS d, + (random() * (10000/8))::int as e --the same as d but no correlation with p + INTO btg +FROM + generate_series(1, 10000) i; +VACUUM btg; +ANALYZE btg; +-- GROUP BY optimization by reorder columns by frequency +SET enable_hashagg=off; +SET max_parallel_workers= 0; +SET max_parallel_workers_per_gather = 0; +EXPLAIN (COSTS off) +SELECT count(*) FROM btg GROUP BY p, v; + QUERY PLAN +----------------------------- + GroupAggregate + Group Key: p, v + -> Sort + Sort Key: p, v + -> Seq Scan on btg +(5 rows) + +EXPLAIN (COSTS off) +SELECT count(*) FROM btg GROUP BY v, p; + QUERY PLAN +----------------------------- + GroupAggregate + Group Key: p, v + -> Sort + Sort Key: p, v + -> Seq Scan on btg +(5 rows) + +EXPLAIN (COSTS off) +SELECT count(*) FROM btg GROUP BY v, p, c; + QUERY PLAN +----------------------------- + GroupAggregate + Group Key: p, c, v + -> Sort + Sort Key: p, c, v + -> Seq Scan on btg +(5 rows) + +EXPLAIN (COSTS off) +SELECT count(*) FROM btg GROUP BY v, p, c ORDER BY v, p, c; + QUERY PLAN +----------------------------- + GroupAggregate + Group Key: v, p, c + -> Sort + Sort Key: v, p, c + -> Seq Scan on btg +(5 rows) + +EXPLAIN (COSTS off) +SELECT count(*) FROM btg GROUP BY v, p, d, c; + QUERY PLAN +------------------------------ + GroupAggregate + Group Key: p, d, c, v + -> Sort + Sort Key: p, d, c, v + -> Seq Scan on btg +(5 rows) + +EXPLAIN (COSTS off) +SELECT count(*) FROM btg GROUP BY v, p, d, c ORDER BY v, p, d ,c; + QUERY PLAN +------------------------------ + GroupAggregate + Group Key: v, p, d, c + -> Sort + Sort Key: v, p, d, c + -> Seq Scan on btg +(5 rows) + +EXPLAIN (COSTS off) +SELECT count(*) FROM btg GROUP BY v, p, d, c ORDER BY p, v, d ,c; + QUERY PLAN +------------------------------ + GroupAggregate + Group Key: p, v, d, c + -> Sort + Sort Key: p, v, d, c + -> Seq Scan on btg +(5 rows) + +EXPLAIN (COSTS off) +SELECT count(*) FROM btg GROUP BY p, d, e; + QUERY PLAN +----------------------------- + GroupAggregate + Group Key: p, d, e + -> Sort + Sort Key: p, d, e + -> Seq Scan on btg +(5 rows) + +EXPLAIN (COSTS off) +SELECT count(*) FROM btg GROUP BY p, e, d; + QUERY PLAN +----------------------------- + GroupAggregate + Group Key: p, e, d + -> Sort + Sort Key: p, e, d + -> Seq Scan on btg +(5 rows) + +CREATE STATISTICS btg_dep ON d, e, p FROM btg; +ANALYZE btg; +EXPLAIN (COSTS off) +SELECT count(*) FROM btg GROUP BY p, d, e; + QUERY PLAN +----------------------------- + GroupAggregate + Group Key: p, d, e + -> Sort + Sort Key: p, d, e + -> Seq Scan on btg +(5 rows) + +EXPLAIN (COSTS off) +SELECT count(*) FROM btg GROUP BY p, e, d; + QUERY PLAN +----------------------------- + GroupAggregate + Group Key: p, e, d + -> Sort + Sort Key: p, e, d + -> Seq Scan on btg +(5 rows) + +-- GROUP BY optimization by reorder columns by index scan +CREATE INDEX ON btg(p, v); +SET enable_seqscan=off; +SET enable_bitmapscan=off; +VACUUM btg; +EXPLAIN (COSTS off) +SELECT count(*) FROM btg GROUP BY p, v; + QUERY PLAN +------------------------------------------------ + GroupAggregate + Group Key: p, v + -> Index Only Scan using btg_p_v_idx on btg +(3 rows) + +EXPLAIN (COSTS off) +SELECT count(*) FROM btg GROUP BY p, v ORDER BY p, v; + QUERY PLAN +------------------------------------------------ + GroupAggregate + Group Key: p, v + -> Index Only Scan using btg_p_v_idx on btg +(3 rows) + +EXPLAIN (COSTS off) +SELECT count(*) FROM btg GROUP BY v, p; + QUERY PLAN +------------------------------------------------ + GroupAggregate + Group Key: p, v + -> Index Only Scan using btg_p_v_idx on btg +(3 rows) + +EXPLAIN (COSTS off) +SELECT count(*) FROM btg GROUP BY v, p ORDER BY p, v; + QUERY PLAN +------------------------------------------------ + GroupAggregate + Group Key: p, v + -> Index Only Scan using btg_p_v_idx on btg +(3 rows) + +EXPLAIN (COSTS off) +SELECT count(*) FROM btg GROUP BY v, p, c; + QUERY PLAN +------------------------------------------------- + GroupAggregate + Group Key: p, c, v + -> Incremental Sort + Sort Key: p, c, v + Presorted Key: p + -> Index Scan using btg_p_v_idx on btg +(6 rows) + +EXPLAIN (COSTS off) +SELECT count(*) FROM btg GROUP BY v, p, c ORDER BY p, v; + QUERY PLAN +------------------------------------------------- + GroupAggregate + Group Key: p, v, c + -> Incremental Sort + Sort Key: p, v, c + Presorted Key: p, v + -> Index Scan using btg_p_v_idx on btg +(6 rows) + +EXPLAIN (COSTS off) +SELECT count(*) FROM btg GROUP BY v, c, p, d; + QUERY PLAN +------------------------------------------------- + GroupAggregate + Group Key: p, c, d, v + -> Incremental Sort + Sort Key: p, c, d, v + Presorted Key: p + -> Index Scan using btg_p_v_idx on btg +(6 rows) + +EXPLAIN (COSTS off) +SELECT count(*) FROM btg GROUP BY v, c, p, d ORDER BY p, v; + QUERY PLAN +------------------------------------------------- + GroupAggregate + Group Key: p, v, c, d + -> Incremental Sort + Sort Key: p, v, c, d + Presorted Key: p, v + -> Index Scan using btg_p_v_idx on btg +(6 rows) + +DROP TABLE btg; +RESET enable_hashagg; +RESET max_parallel_workers; +RESET max_parallel_workers_per_gather; +RESET enable_seqscan; +RESET enable_bitmapscan; -- Secondly test the case of a parallel aggregate combiner function -- returning NULL. For that use normal transition function, but a -- combiner function returning NULL. diff --git a/src/test/regress/expected/incremental_sort.out b/src/test/regress/expected/incremental_sort.out index 545e301e48..21c429226f 100644 --- a/src/test/regress/expected/incremental_sort.out +++ b/src/test/regress/expected/incremental_sort.out @@ -1439,7 +1439,7 @@ set parallel_setup_cost = 0; set parallel_tuple_cost = 0; set max_parallel_workers_per_gather = 2; create table t (a int, b int, c int); -insert into t select mod(i,10),mod(i,10),i from generate_series(1,10000) s(i); +insert into t select mod(i,10),mod(i,10),i from generate_series(1,60000) s(i); create index on t (a); analyze t; set enable_incremental_sort = off; diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out index 19caebabd0..bf1a2db2cf 100644 --- a/src/test/regress/expected/join.out +++ b/src/test/regress/expected/join.out @@ -1984,8 +1984,8 @@ USING (name); ------+----+---- bb | 12 | 13 cc | 22 | 23 - dd | | 33 ee | 42 | + dd | | 33 (4 rows) -- Cases with non-nullable expressions in subquery results; @@ -2019,8 +2019,8 @@ NATURAL FULL JOIN ------+------+------+------+------ bb | 12 | 2 | 13 | 3 cc | 22 | 2 | 23 | 3 - dd | | | 33 | 3 ee | 42 | 2 | | + dd | | | 33 | 3 (4 rows) SELECT * FROM @@ -4618,18 +4618,20 @@ select d.* from d left join (select * from b group by b.id, b.c_id) s explain (costs off) select d.* from d left join (select distinct * from b) s on d.a = s.id; - QUERY PLAN --------------------------------------- - Merge Right Join - Merge Cond: (b.id = d.a) - -> Unique - -> Sort - Sort Key: b.id, b.c_id - -> Seq Scan on b + QUERY PLAN +--------------------------------------------- + Merge Left Join + Merge Cond: (d.a = s.id) -> Sort Sort Key: d.a -> Seq Scan on d -(9 rows) + -> Sort + Sort Key: s.id + -> Subquery Scan on s + -> HashAggregate + Group Key: b.id, b.c_id + -> Seq Scan on b +(11 rows) -- check join removal works when uniqueness of the join condition is enforced -- by a UNION @@ -6336,44 +6338,39 @@ select * from j1 natural join j2; explain (verbose, costs off) select * from j1 inner join (select distinct id from j3) j3 on j1.id = j3.id; - QUERY PLAN ------------------------------------------ + QUERY PLAN +----------------------------------- Nested Loop Output: j1.id, j3.id Inner Unique: true Join Filter: (j1.id = j3.id) - -> Unique + -> HashAggregate Output: j3.id - -> Sort + Group Key: j3.id + -> Seq Scan on public.j3 Output: j3.id - Sort Key: j3.id - -> Seq Scan on public.j3 - Output: j3.id -> Seq Scan on public.j1 Output: j1.id -(13 rows) +(11 rows) -- ensure group by clause allows the inner to become unique explain (verbose, costs off) select * from j1 inner join (select id from j3 group by id) j3 on j1.id = j3.id; - QUERY PLAN ------------------------------------------ + QUERY PLAN +----------------------------------- Nested Loop Output: j1.id, j3.id Inner Unique: true Join Filter: (j1.id = j3.id) - -> Group + -> HashAggregate Output: j3.id Group Key: j3.id - -> Sort + -> Seq Scan on public.j3 Output: j3.id - Sort Key: j3.id - -> Seq Scan on public.j3 - Output: j3.id -> Seq Scan on public.j1 Output: j1.id -(14 rows) +(11 rows) drop table j1; drop table j2; diff --git a/src/test/regress/expected/partition_aggregate.out b/src/test/regress/expected/partition_aggregate.out index dfa4b036b5..a08a3825ff 100644 --- a/src/test/regress/expected/partition_aggregate.out +++ b/src/test/regress/expected/partition_aggregate.out @@ -952,32 +952,30 @@ SELECT a, sum(b), array_agg(distinct c), count(*) FROM pagg_tab_ml GROUP BY a HA -------------------------------------------------------------------------------------- Sort Sort Key: pagg_tab_ml.a, (sum(pagg_tab_ml.b)), (array_agg(DISTINCT pagg_tab_ml.c)) - -> Gather - Workers Planned: 2 - -> Parallel Append - -> GroupAggregate - Group Key: pagg_tab_ml.a - Filter: (avg(pagg_tab_ml.b) < '3'::numeric) - -> Sort - Sort Key: pagg_tab_ml.a - -> Seq Scan on pagg_tab_ml_p1 pagg_tab_ml - -> GroupAggregate - Group Key: pagg_tab_ml_5.a - Filter: (avg(pagg_tab_ml_5.b) < '3'::numeric) - -> Sort - Sort Key: pagg_tab_ml_5.a - -> Append - -> Seq Scan on pagg_tab_ml_p3_s1 pagg_tab_ml_5 - -> Seq Scan on pagg_tab_ml_p3_s2 pagg_tab_ml_6 - -> GroupAggregate - Group Key: pagg_tab_ml_2.a - Filter: (avg(pagg_tab_ml_2.b) < '3'::numeric) - -> Sort - Sort Key: pagg_tab_ml_2.a - -> Append - -> Seq Scan on pagg_tab_ml_p2_s1 pagg_tab_ml_2 - -> Seq Scan on pagg_tab_ml_p2_s2 pagg_tab_ml_3 -(27 rows) + -> Append + -> GroupAggregate + Group Key: pagg_tab_ml.a + Filter: (avg(pagg_tab_ml.b) < '3'::numeric) + -> Sort + Sort Key: pagg_tab_ml.a + -> Seq Scan on pagg_tab_ml_p1 pagg_tab_ml + -> GroupAggregate + Group Key: pagg_tab_ml_2.a + Filter: (avg(pagg_tab_ml_2.b) < '3'::numeric) + -> Sort + Sort Key: pagg_tab_ml_2.a + -> Append + -> Seq Scan on pagg_tab_ml_p2_s1 pagg_tab_ml_2 + -> Seq Scan on pagg_tab_ml_p2_s2 pagg_tab_ml_3 + -> GroupAggregate + Group Key: pagg_tab_ml_5.a + Filter: (avg(pagg_tab_ml_5.b) < '3'::numeric) + -> Sort + Sort Key: pagg_tab_ml_5.a + -> Append + -> Seq Scan on pagg_tab_ml_p3_s1 pagg_tab_ml_5 + -> Seq Scan on pagg_tab_ml_p3_s2 pagg_tab_ml_6 +(25 rows) SELECT a, sum(b), array_agg(distinct c), count(*) FROM pagg_tab_ml GROUP BY a HAVING avg(b) < 3 ORDER BY 1, 2, 3; a | sum | array_agg | count @@ -996,34 +994,32 @@ SELECT a, sum(b), array_agg(distinct c), count(*) FROM pagg_tab_ml GROUP BY a HA -- Without ORDER BY clause, to test Gather at top-most path EXPLAIN (COSTS OFF) SELECT a, sum(b), array_agg(distinct c), count(*) FROM pagg_tab_ml GROUP BY a HAVING avg(b) < 3; - QUERY PLAN ---------------------------------------------------------------------------- - Gather - Workers Planned: 2 - -> Parallel Append - -> GroupAggregate - Group Key: pagg_tab_ml.a - Filter: (avg(pagg_tab_ml.b) < '3'::numeric) - -> Sort - Sort Key: pagg_tab_ml.a - -> Seq Scan on pagg_tab_ml_p1 pagg_tab_ml - -> GroupAggregate - Group Key: pagg_tab_ml_5.a - Filter: (avg(pagg_tab_ml_5.b) < '3'::numeric) - -> Sort - Sort Key: pagg_tab_ml_5.a - -> Append - -> Seq Scan on pagg_tab_ml_p3_s1 pagg_tab_ml_5 - -> Seq Scan on pagg_tab_ml_p3_s2 pagg_tab_ml_6 - -> GroupAggregate - Group Key: pagg_tab_ml_2.a - Filter: (avg(pagg_tab_ml_2.b) < '3'::numeric) - -> Sort - Sort Key: pagg_tab_ml_2.a - -> Append - -> Seq Scan on pagg_tab_ml_p2_s1 pagg_tab_ml_2 - -> Seq Scan on pagg_tab_ml_p2_s2 pagg_tab_ml_3 -(25 rows) + QUERY PLAN +--------------------------------------------------------------------- + Append + -> GroupAggregate + Group Key: pagg_tab_ml.a + Filter: (avg(pagg_tab_ml.b) < '3'::numeric) + -> Sort + Sort Key: pagg_tab_ml.a + -> Seq Scan on pagg_tab_ml_p1 pagg_tab_ml + -> GroupAggregate + Group Key: pagg_tab_ml_2.a + Filter: (avg(pagg_tab_ml_2.b) < '3'::numeric) + -> Sort + Sort Key: pagg_tab_ml_2.a + -> Append + -> Seq Scan on pagg_tab_ml_p2_s1 pagg_tab_ml_2 + -> Seq Scan on pagg_tab_ml_p2_s2 pagg_tab_ml_3 + -> GroupAggregate + Group Key: pagg_tab_ml_5.a + Filter: (avg(pagg_tab_ml_5.b) < '3'::numeric) + -> Sort + Sort Key: pagg_tab_ml_5.a + -> Append + -> Seq Scan on pagg_tab_ml_p3_s1 pagg_tab_ml_5 + -> Seq Scan on pagg_tab_ml_p3_s2 pagg_tab_ml_6 +(23 rows) -- Full aggregation at level 1 as GROUP BY clause matches with PARTITION KEY -- for level 1 only. For subpartitions, GROUP BY clause does not match with @@ -1379,28 +1375,26 @@ SELECT x, sum(y), avg(y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) < -- When GROUP BY clause does not match; partial aggregation is performed for each partition. EXPLAIN (COSTS OFF) SELECT y, sum(x), avg(x), count(*) FROM pagg_tab_para GROUP BY y HAVING avg(x) < 12 ORDER BY 1, 2, 3; - QUERY PLAN -------------------------------------------------------------------------------------------- + QUERY PLAN +------------------------------------------------------------------------------------- Sort Sort Key: pagg_tab_para.y, (sum(pagg_tab_para.x)), (avg(pagg_tab_para.x)) - -> Finalize GroupAggregate + -> Finalize HashAggregate Group Key: pagg_tab_para.y Filter: (avg(pagg_tab_para.x) < '12'::numeric) - -> Gather Merge + -> Gather Workers Planned: 2 - -> Sort - Sort Key: pagg_tab_para.y - -> Parallel Append - -> Partial HashAggregate - Group Key: pagg_tab_para.y - -> Parallel Seq Scan on pagg_tab_para_p1 pagg_tab_para - -> Partial HashAggregate - Group Key: pagg_tab_para_1.y - -> Parallel Seq Scan on pagg_tab_para_p2 pagg_tab_para_1 - -> Partial HashAggregate - Group Key: pagg_tab_para_2.y - -> Parallel Seq Scan on pagg_tab_para_p3 pagg_tab_para_2 -(19 rows) + -> Parallel Append + -> Partial HashAggregate + Group Key: pagg_tab_para.y + -> Parallel Seq Scan on pagg_tab_para_p1 pagg_tab_para + -> Partial HashAggregate + Group Key: pagg_tab_para_1.y + -> Parallel Seq Scan on pagg_tab_para_p2 pagg_tab_para_1 + -> Partial HashAggregate + Group Key: pagg_tab_para_2.y + -> Parallel Seq Scan on pagg_tab_para_p3 pagg_tab_para_2 +(17 rows) SELECT y, sum(x), avg(x), count(*) FROM pagg_tab_para GROUP BY y HAVING avg(x) < 12 ORDER BY 1, 2, 3; y | sum | avg | count diff --git a/src/test/regress/expected/partition_join.out b/src/test/regress/expected/partition_join.out index bb5b7c47a4..03926a8413 100644 --- a/src/test/regress/expected/partition_join.out +++ b/src/test/regress/expected/partition_join.out @@ -466,52 +466,41 @@ EXPLAIN (COSTS OFF) SELECT a, b FROM prt1 FULL JOIN prt2 p2(b,a,c) USING(a,b) WHERE a BETWEEN 490 AND 510 GROUP BY 1, 2 ORDER BY 1, 2; - QUERY PLAN ------------------------------------------------------------------------------------------------------------------ + QUERY PLAN +----------------------------------------------------------------------------------------------------------- Group Group Key: (COALESCE(prt1.a, p2.a)), (COALESCE(prt1.b, p2.b)) - -> Merge Append + -> Sort Sort Key: (COALESCE(prt1.a, p2.a)), (COALESCE(prt1.b, p2.b)) - -> Group - Group Key: (COALESCE(prt1.a, p2.a)), (COALESCE(prt1.b, p2.b)) - -> Sort - Sort Key: (COALESCE(prt1.a, p2.a)), (COALESCE(prt1.b, p2.b)) - -> Merge Full Join - Merge Cond: ((prt1.a = p2.a) AND (prt1.b = p2.b)) - Filter: ((COALESCE(prt1.a, p2.a) >= 490) AND (COALESCE(prt1.a, p2.a) <= 510)) - -> Sort - Sort Key: prt1.a, prt1.b - -> Seq Scan on prt1_p1 prt1 - -> Sort - Sort Key: p2.a, p2.b - -> Seq Scan on prt2_p1 p2 - -> Group - Group Key: (COALESCE(prt1_1.a, p2_1.a)), (COALESCE(prt1_1.b, p2_1.b)) - -> Sort - Sort Key: (COALESCE(prt1_1.a, p2_1.a)), (COALESCE(prt1_1.b, p2_1.b)) - -> Merge Full Join - Merge Cond: ((prt1_1.a = p2_1.a) AND (prt1_1.b = p2_1.b)) - Filter: ((COALESCE(prt1_1.a, p2_1.a) >= 490) AND (COALESCE(prt1_1.a, p2_1.a) <= 510)) - -> Sort - Sort Key: prt1_1.a, prt1_1.b - -> Seq Scan on prt1_p2 prt1_1 - -> Sort - Sort Key: p2_1.a, p2_1.b - -> Seq Scan on prt2_p2 p2_1 - -> Group - Group Key: (COALESCE(prt1_2.a, p2_2.a)), (COALESCE(prt1_2.b, p2_2.b)) - -> Sort - Sort Key: (COALESCE(prt1_2.a, p2_2.a)), (COALESCE(prt1_2.b, p2_2.b)) - -> Merge Full Join - Merge Cond: ((prt1_2.a = p2_2.a) AND (prt1_2.b = p2_2.b)) - Filter: ((COALESCE(prt1_2.a, p2_2.a) >= 490) AND (COALESCE(prt1_2.a, p2_2.a) <= 510)) - -> Sort - Sort Key: prt1_2.a, prt1_2.b - -> Seq Scan on prt1_p3 prt1_2 - -> Sort - Sort Key: p2_2.a, p2_2.b - -> Seq Scan on prt2_p3 p2_2 -(43 rows) + -> Append + -> Merge Full Join + Merge Cond: ((prt1_1.a = p2_1.a) AND (prt1_1.b = p2_1.b)) + Filter: ((COALESCE(prt1_1.a, p2_1.a) >= 490) AND (COALESCE(prt1_1.a, p2_1.a) <= 510)) + -> Sort + Sort Key: prt1_1.a, prt1_1.b + -> Seq Scan on prt1_p1 prt1_1 + -> Sort + Sort Key: p2_1.a, p2_1.b + -> Seq Scan on prt2_p1 p2_1 + -> Merge Full Join + Merge Cond: ((prt1_2.a = p2_2.a) AND (prt1_2.b = p2_2.b)) + Filter: ((COALESCE(prt1_2.a, p2_2.a) >= 490) AND (COALESCE(prt1_2.a, p2_2.a) <= 510)) + -> Sort + Sort Key: prt1_2.a, prt1_2.b + -> Seq Scan on prt1_p2 prt1_2 + -> Sort + Sort Key: p2_2.a, p2_2.b + -> Seq Scan on prt2_p2 p2_2 + -> Merge Full Join + Merge Cond: ((prt1_3.b = p2_3.b) AND (prt1_3.a = p2_3.a)) + Filter: ((COALESCE(prt1_3.a, p2_3.a) >= 490) AND (COALESCE(prt1_3.a, p2_3.a) <= 510)) + -> Sort + Sort Key: prt1_3.b, prt1_3.a + -> Seq Scan on prt1_p3 prt1_3 + -> Sort + Sort Key: p2_3.b, p2_3.a + -> Seq Scan on prt2_p3 p2_3 +(32 rows) SELECT a, b FROM prt1 FULL JOIN prt2 p2(b,a,c) USING(a,b) WHERE a BETWEEN 490 AND 510 diff --git a/src/test/regress/expected/sysviews.out b/src/test/regress/expected/sysviews.out index 579b861d84..4e775af175 100644 --- a/src/test/regress/expected/sysviews.out +++ b/src/test/regress/expected/sysviews.out @@ -114,6 +114,7 @@ select name, setting from pg_settings where name like 'enable%'; enable_async_append | on enable_bitmapscan | on enable_gathermerge | on + enable_group_by_reordering | on enable_hashagg | on enable_hashjoin | on enable_incremental_sort | on @@ -131,7 +132,7 @@ select name, setting from pg_settings where name like 'enable%'; enable_seqscan | on enable_sort | on enable_tidscan | on -(20 rows) +(21 rows) -- Test that the pg_timezone_names and pg_timezone_abbrevs views are -- more-or-less working. We can't test their contents in any great detail diff --git a/src/test/regress/expected/union.out b/src/test/regress/expected/union.out index dece7310cf..7ac4a9380e 100644 --- a/src/test/regress/expected/union.out +++ b/src/test/regress/expected/union.out @@ -1303,24 +1303,22 @@ select distinct q1 from union all select distinct * from int8_tbl i82) ss where q2 = q2; - QUERY PLAN ----------------------------------------------------------- - Unique - -> Merge Append - Sort Key: "*SELECT* 1".q1 + QUERY PLAN +---------------------------------------------------- + HashAggregate + Group Key: "*SELECT* 1".q1 + -> Append -> Subquery Scan on "*SELECT* 1" - -> Unique - -> Sort - Sort Key: i81.q1, i81.q2 - -> Seq Scan on int8_tbl i81 - Filter: (q2 IS NOT NULL) + -> HashAggregate + Group Key: i81.q1, i81.q2 + -> Seq Scan on int8_tbl i81 + Filter: (q2 IS NOT NULL) -> Subquery Scan on "*SELECT* 2" - -> Unique - -> Sort - Sort Key: i82.q1, i82.q2 - -> Seq Scan on int8_tbl i82 - Filter: (q2 IS NOT NULL) -(15 rows) + -> HashAggregate + Group Key: i82.q1, i82.q2 + -> Seq Scan on int8_tbl i82 + Filter: (q2 IS NOT NULL) +(13 rows) select distinct q1 from (select distinct * from int8_tbl i81 @@ -1339,24 +1337,22 @@ select distinct q1 from union all select distinct * from int8_tbl i82) ss where -q1 = q2; - QUERY PLAN --------------------------------------------------------- - Unique - -> Merge Append - Sort Key: "*SELECT* 1".q1 + QUERY PLAN +-------------------------------------------------- + HashAggregate + Group Key: "*SELECT* 1".q1 + -> Append -> Subquery Scan on "*SELECT* 1" - -> Unique - -> Sort - Sort Key: i81.q1, i81.q2 - -> Seq Scan on int8_tbl i81 - Filter: ((- q1) = q2) + -> HashAggregate + Group Key: i81.q1, i81.q2 + -> Seq Scan on int8_tbl i81 + Filter: ((- q1) = q2) -> Subquery Scan on "*SELECT* 2" - -> Unique - -> Sort - Sort Key: i82.q1, i82.q2 - -> Seq Scan on int8_tbl i82 - Filter: ((- q1) = q2) -(15 rows) + -> HashAggregate + Group Key: i82.q1, i82.q2 + -> Seq Scan on int8_tbl i82 + Filter: ((- q1) = q2) +(13 rows) select distinct q1 from (select distinct * from int8_tbl i81 diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql index 2f5d0e00f3..c6e0d7ba2b 100644 --- a/src/test/regress/sql/aggregates.sql +++ b/src/test/regress/sql/aggregates.sql @@ -1025,6 +1025,105 @@ SELECT balk(hundred) FROM tenk1; ROLLBACK; +-- GROUP BY optimization by reorder columns + +SELECT + i AS id, + i/2 AS p, + format('%60s', i%2) AS v, + i/4 AS c, + i/8 AS d, + (random() * (10000/8))::int as e --the same as d but no correlation with p + INTO btg +FROM + generate_series(1, 10000) i; + +VACUUM btg; +ANALYZE btg; + +-- GROUP BY optimization by reorder columns by frequency + +SET enable_hashagg=off; +SET max_parallel_workers= 0; +SET max_parallel_workers_per_gather = 0; + +EXPLAIN (COSTS off) +SELECT count(*) FROM btg GROUP BY p, v; + +EXPLAIN (COSTS off) +SELECT count(*) FROM btg GROUP BY v, p; + +EXPLAIN (COSTS off) +SELECT count(*) FROM btg GROUP BY v, p, c; + +EXPLAIN (COSTS off) +SELECT count(*) FROM btg GROUP BY v, p, c ORDER BY v, p, c; + +EXPLAIN (COSTS off) +SELECT count(*) FROM btg GROUP BY v, p, d, c; + +EXPLAIN (COSTS off) +SELECT count(*) FROM btg GROUP BY v, p, d, c ORDER BY v, p, d ,c; + +EXPLAIN (COSTS off) +SELECT count(*) FROM btg GROUP BY v, p, d, c ORDER BY p, v, d ,c; + +EXPLAIN (COSTS off) +SELECT count(*) FROM btg GROUP BY p, d, e; + +EXPLAIN (COSTS off) +SELECT count(*) FROM btg GROUP BY p, e, d; + +CREATE STATISTICS btg_dep ON d, e, p FROM btg; +ANALYZE btg; + +EXPLAIN (COSTS off) +SELECT count(*) FROM btg GROUP BY p, d, e; + +EXPLAIN (COSTS off) +SELECT count(*) FROM btg GROUP BY p, e, d; + + +-- GROUP BY optimization by reorder columns by index scan + +CREATE INDEX ON btg(p, v); +SET enable_seqscan=off; +SET enable_bitmapscan=off; +VACUUM btg; + +EXPLAIN (COSTS off) +SELECT count(*) FROM btg GROUP BY p, v; + +EXPLAIN (COSTS off) +SELECT count(*) FROM btg GROUP BY p, v ORDER BY p, v; + +EXPLAIN (COSTS off) +SELECT count(*) FROM btg GROUP BY v, p; + +EXPLAIN (COSTS off) +SELECT count(*) FROM btg GROUP BY v, p ORDER BY p, v; + +EXPLAIN (COSTS off) +SELECT count(*) FROM btg GROUP BY v, p, c; + +EXPLAIN (COSTS off) +SELECT count(*) FROM btg GROUP BY v, p, c ORDER BY p, v; + +EXPLAIN (COSTS off) +SELECT count(*) FROM btg GROUP BY v, c, p, d; + +EXPLAIN (COSTS off) +SELECT count(*) FROM btg GROUP BY v, c, p, d ORDER BY p, v; + +DROP TABLE btg; + +RESET enable_hashagg; +RESET max_parallel_workers; +RESET max_parallel_workers_per_gather; +RESET enable_seqscan; +RESET enable_bitmapscan; + + -- Secondly test the case of a parallel aggregate combiner function -- returning NULL. For that use normal transition function, but a -- combiner function returning NULL. diff --git a/src/test/regress/sql/incremental_sort.sql b/src/test/regress/sql/incremental_sort.sql index d8768a6b54..1de163e039 100644 --- a/src/test/regress/sql/incremental_sort.sql +++ b/src/test/regress/sql/incremental_sort.sql @@ -213,7 +213,7 @@ set parallel_tuple_cost = 0; set max_parallel_workers_per_gather = 2; create table t (a int, b int, c int); -insert into t select mod(i,10),mod(i,10),i from generate_series(1,10000) s(i); +insert into t select mod(i,10),mod(i,10),i from generate_series(1,60000) s(i); create index on t (a); analyze t;