Support hashed aggregation with grouping sets.
This extends the Aggregate node with two new features: HashAggregate can now run multiple hashtables concurrently, and a new strategy MixedAggregate populates hashtables while doing sorted grouping. The planner will now attempt to save as many sorts as possible when planning grouping sets queries, while not exceeding work_mem for the estimated combined sizes of all hashtables used. No SQL-level changes are required. There should be no user-visible impact other than the new EXPLAIN output and possible changes to result ordering when ORDER BY was not used (which affected a few regression tests). The enable_hashagg option is respected. Author: Andrew Gierth Reviewers: Mark Dilger, Andres Freund Discussion: https://postgr.es/m/87vatszyhj.fsf@news-spur.riddles.org.uk
This commit is contained in:
parent
f0a6046bcb
commit
b5635948ab
@ -3277,15 +3277,18 @@ select sum(q.a), count(q.b) from ft4 left join (select 13, avg(ft1.c1), sum(ft2.
|
||||
explain (verbose, costs off)
|
||||
select c2, sum(c1) from ft1 where c2 < 3 group by rollup(c2) order by 1 nulls last;
|
||||
QUERY PLAN
|
||||
---------------------------------------------------------------------------------------------------
|
||||
GroupAggregate
|
||||
------------------------------------------------------------------------------
|
||||
Sort
|
||||
Output: c2, (sum(c1))
|
||||
Sort Key: ft1.c2
|
||||
-> MixedAggregate
|
||||
Output: c2, sum(c1)
|
||||
Group Key: ft1.c2
|
||||
Hash Key: ft1.c2
|
||||
Group Key: ()
|
||||
-> Foreign Scan on public.ft1
|
||||
Output: c2, c1
|
||||
Remote SQL: SELECT "C 1", c2 FROM "S 1"."T 1" WHERE ((c2 < 3)) ORDER BY c2 ASC NULLS LAST
|
||||
(7 rows)
|
||||
Remote SQL: SELECT "C 1", c2 FROM "S 1"."T 1" WHERE ((c2 < 3))
|
||||
(10 rows)
|
||||
|
||||
select c2, sum(c1) from ft1 where c2 < 3 group by rollup(c2) order by 1 nulls last;
|
||||
c2 | sum
|
||||
@ -3299,15 +3302,18 @@ select c2, sum(c1) from ft1 where c2 < 3 group by rollup(c2) order by 1 nulls la
|
||||
explain (verbose, costs off)
|
||||
select c2, sum(c1) from ft1 where c2 < 3 group by cube(c2) order by 1 nulls last;
|
||||
QUERY PLAN
|
||||
---------------------------------------------------------------------------------------------------
|
||||
GroupAggregate
|
||||
------------------------------------------------------------------------------
|
||||
Sort
|
||||
Output: c2, (sum(c1))
|
||||
Sort Key: ft1.c2
|
||||
-> MixedAggregate
|
||||
Output: c2, sum(c1)
|
||||
Group Key: ft1.c2
|
||||
Hash Key: ft1.c2
|
||||
Group Key: ()
|
||||
-> Foreign Scan on public.ft1
|
||||
Output: c2, c1
|
||||
Remote SQL: SELECT "C 1", c2 FROM "S 1"."T 1" WHERE ((c2 < 3)) ORDER BY c2 ASC NULLS LAST
|
||||
(7 rows)
|
||||
Remote SQL: SELECT "C 1", c2 FROM "S 1"."T 1" WHERE ((c2 < 3))
|
||||
(10 rows)
|
||||
|
||||
select c2, sum(c1) from ft1 where c2 < 3 group by cube(c2) order by 1 nulls last;
|
||||
c2 | sum
|
||||
@ -3321,19 +3327,18 @@ select c2, sum(c1) from ft1 where c2 < 3 group by cube(c2) order by 1 nulls last
|
||||
explain (verbose, costs off)
|
||||
select c2, c6, sum(c1) from ft1 where c2 < 3 group by grouping sets(c2, c6) order by 1 nulls last, 2 nulls last;
|
||||
QUERY PLAN
|
||||
-------------------------------------------------------------------------------------------------------------
|
||||
----------------------------------------------------------------------------------
|
||||
Sort
|
||||
Output: c2, c6, (sum(c1))
|
||||
Sort Key: ft1.c2, ft1.c6
|
||||
-> GroupAggregate
|
||||
-> HashAggregate
|
||||
Output: c2, c6, sum(c1)
|
||||
Group Key: ft1.c2
|
||||
Sort Key: ft1.c6
|
||||
Group Key: ft1.c6
|
||||
Hash Key: ft1.c2
|
||||
Hash Key: ft1.c6
|
||||
-> Foreign Scan on public.ft1
|
||||
Output: c2, c6, c1
|
||||
Remote SQL: SELECT "C 1", c2, c6 FROM "S 1"."T 1" WHERE ((c2 < 3)) ORDER BY c2 ASC NULLS LAST
|
||||
(11 rows)
|
||||
Remote SQL: SELECT "C 1", c2, c6 FROM "S 1"."T 1" WHERE ((c2 < 3))
|
||||
(10 rows)
|
||||
|
||||
select c2, c6, sum(c1) from ft1 where c2 < 3 group by grouping sets(c2, c6) order by 1 nulls last, 2 nulls last;
|
||||
c2 | c6 | sum
|
||||
|
@ -1015,6 +1015,10 @@ ExplainNode(PlanState *planstate, List *ancestors,
|
||||
pname = "HashAggregate";
|
||||
strategy = "Hashed";
|
||||
break;
|
||||
case AGG_MIXED:
|
||||
pname = "MixedAggregate";
|
||||
strategy = "Mixed";
|
||||
break;
|
||||
default:
|
||||
pname = "Aggregate ???";
|
||||
strategy = "???";
|
||||
@ -1978,6 +1982,19 @@ show_grouping_set_keys(PlanState *planstate,
|
||||
ListCell *lc;
|
||||
List *gsets = aggnode->groupingSets;
|
||||
AttrNumber *keycols = aggnode->grpColIdx;
|
||||
const char *keyname;
|
||||
const char *keysetname;
|
||||
|
||||
if (aggnode->aggstrategy == AGG_HASHED || aggnode->aggstrategy == AGG_MIXED)
|
||||
{
|
||||
keyname = "Hash Key";
|
||||
keysetname = "Hash Keys";
|
||||
}
|
||||
else
|
||||
{
|
||||
keyname = "Group Key";
|
||||
keysetname = "Group Keys";
|
||||
}
|
||||
|
||||
ExplainOpenGroup("Grouping Set", NULL, true, es);
|
||||
|
||||
@ -1992,7 +2009,7 @@ show_grouping_set_keys(PlanState *planstate,
|
||||
es->indent++;
|
||||
}
|
||||
|
||||
ExplainOpenGroup("Group Keys", "Group Keys", false, es);
|
||||
ExplainOpenGroup(keysetname, keysetname, false, es);
|
||||
|
||||
foreach(lc, gsets)
|
||||
{
|
||||
@ -2016,12 +2033,12 @@ show_grouping_set_keys(PlanState *planstate,
|
||||
}
|
||||
|
||||
if (!result && es->format == EXPLAIN_FORMAT_TEXT)
|
||||
ExplainPropertyText("Group Key", "()", es);
|
||||
ExplainPropertyText(keyname, "()", es);
|
||||
else
|
||||
ExplainPropertyListNested("Group Key", result, es);
|
||||
ExplainPropertyListNested(keyname, result, es);
|
||||
}
|
||||
|
||||
ExplainCloseGroup("Group Keys", "Group Keys", false, es);
|
||||
ExplainCloseGroup(keysetname, keysetname, false, es);
|
||||
|
||||
if (sortnode && es->format == EXPLAIN_FORMAT_TEXT)
|
||||
es->indent--;
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -12,7 +12,7 @@ subdir = src/backend/lib
|
||||
top_builddir = ../../..
|
||||
include $(top_builddir)/src/Makefile.global
|
||||
|
||||
OBJS = binaryheap.o bipartite_match.o hyperloglog.o ilist.o pairingheap.o \
|
||||
rbtree.o stringinfo.o
|
||||
OBJS = binaryheap.o bipartite_match.o hyperloglog.o ilist.o knapsack.o \
|
||||
pairingheap.o rbtree.o stringinfo.o
|
||||
|
||||
include $(top_srcdir)/src/backend/common.mk
|
||||
|
114
src/backend/lib/knapsack.c
Normal file
114
src/backend/lib/knapsack.c
Normal file
@ -0,0 +1,114 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* knapsack.c
|
||||
* Knapsack problem solver
|
||||
*
|
||||
* Given input vectors of integral item weights (must be >= 0) and values
|
||||
* (double >= 0), compute the set of items which produces the greatest total
|
||||
* value without exceeding a specified total weight; each item is included at
|
||||
* most once (this is the 0/1 knapsack problem). Weight 0 items will always be
|
||||
* included.
|
||||
*
|
||||
* The performance of this algorithm is pseudo-polynomial, O(nW) where W is the
|
||||
* weight limit. To use with non-integral weights or approximate solutions,
|
||||
* the caller should pre-scale the input weights to a suitable range. This
|
||||
* allows approximate solutions in polynomial time (the general case of the
|
||||
* exact problem is NP-hard).
|
||||
*
|
||||
* Copyright (c) 2017, PostgreSQL Global Development Group
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* src/backend/lib/knapsack.c
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
#include "postgres.h"
|
||||
|
||||
#include <math.h>
|
||||
#include <limits.h>
|
||||
|
||||
#include "lib/knapsack.h"
|
||||
#include "miscadmin.h"
|
||||
#include "nodes/bitmapset.h"
|
||||
#include "utils/builtins.h"
|
||||
#include "utils/memutils.h"
|
||||
#include "utils/palloc.h"
|
||||
|
||||
/*
|
||||
* DiscreteKnapsack
|
||||
*
|
||||
* The item_values input is optional; if omitted, all the items are assumed to
|
||||
* have value 1.
|
||||
*
|
||||
* Returns a Bitmapset of the 0..(n-1) indexes of the items chosen for
|
||||
* inclusion in the solution.
|
||||
*
|
||||
* This uses the usual dynamic-programming algorithm, adapted to reuse the
|
||||
* memory on each pass (by working from larger weights to smaller). At the
|
||||
* start of pass number i, the values[w] array contains the largest value
|
||||
* computed with total weight <= w, using only items with indices < i; and
|
||||
* sets[w] contains the bitmap of items actually used for that value. (The
|
||||
* bitmapsets are all pre-initialized with an unused high bit so that memory
|
||||
* allocation is done only once.)
|
||||
*/
|
||||
Bitmapset *
|
||||
DiscreteKnapsack(int max_weight, int num_items,
|
||||
int *item_weights, double *item_values)
|
||||
{
|
||||
MemoryContext local_ctx = AllocSetContextCreate(CurrentMemoryContext,
|
||||
"Knapsack",
|
||||
ALLOCSET_SMALL_MINSIZE,
|
||||
ALLOCSET_SMALL_INITSIZE,
|
||||
ALLOCSET_SMALL_MAXSIZE);
|
||||
MemoryContext oldctx = MemoryContextSwitchTo(local_ctx);
|
||||
double *values;
|
||||
Bitmapset **sets;
|
||||
Bitmapset *result;
|
||||
int i,
|
||||
j;
|
||||
|
||||
Assert(max_weight >= 0);
|
||||
Assert(num_items > 0 && item_weights);
|
||||
|
||||
values = palloc((1 + max_weight) * sizeof(double));
|
||||
sets = palloc((1 + max_weight) * sizeof(Bitmapset *));
|
||||
|
||||
for (i = 0; i <= max_weight; ++i)
|
||||
{
|
||||
values[i] = 0;
|
||||
sets[i] = bms_make_singleton(num_items);
|
||||
}
|
||||
|
||||
for (i = 0; i < num_items; ++i)
|
||||
{
|
||||
int iw = item_weights[i];
|
||||
double iv = item_values ? item_values[i] : 1;
|
||||
|
||||
for (j = max_weight; j >= iw; --j)
|
||||
{
|
||||
int ow = j - iw;
|
||||
|
||||
if (values[j] <= values[ow] + iv)
|
||||
{
|
||||
/* copy sets[ow] to sets[j] without realloc */
|
||||
if (j != ow)
|
||||
{
|
||||
sets[j] = bms_del_members(sets[j], sets[j]);
|
||||
sets[j] = bms_add_members(sets[j], sets[ow]);
|
||||
}
|
||||
|
||||
sets[j] = bms_add_member(sets[j], i);
|
||||
|
||||
values[j] = values[ow] + iv;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
MemoryContextSwitchTo(oldctx);
|
||||
|
||||
result = bms_del_member(bms_copy(sets[max_weight]), num_items);
|
||||
|
||||
MemoryContextDelete(local_ctx);
|
||||
|
||||
return result;
|
||||
}
|
@ -21,6 +21,7 @@
|
||||
#include "postgres.h"
|
||||
|
||||
#include "access/hash.h"
|
||||
#include "nodes/pg_list.h"
|
||||
|
||||
|
||||
#define WORDNUM(x) ((x) / BITS_PER_BITMAPWORD)
|
||||
@ -457,6 +458,35 @@ bms_overlap(const Bitmapset *a, const Bitmapset *b)
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* bms_overlap_list - does a set overlap an integer list?
|
||||
*/
|
||||
bool
|
||||
bms_overlap_list(const Bitmapset *a, const List *b)
|
||||
{
|
||||
ListCell *lc;
|
||||
int wordnum,
|
||||
bitnum;
|
||||
|
||||
if (a == NULL || b == NIL)
|
||||
return false;
|
||||
|
||||
foreach(lc, b)
|
||||
{
|
||||
int x = lfirst_int(lc);
|
||||
|
||||
if (x < 0)
|
||||
elog(ERROR, "negative bitmapset member not allowed");
|
||||
wordnum = WORDNUM(x);
|
||||
bitnum = BITNUM(x);
|
||||
if (wordnum < a->nwords)
|
||||
if ((a->words[wordnum] & ((bitmapword) 1 << bitnum)) != 0)
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* bms_nonempty_difference - do sets have a nonempty difference?
|
||||
*/
|
||||
|
@ -1941,6 +1941,28 @@ _outAggPath(StringInfo str, const AggPath *node)
|
||||
WRITE_NODE_FIELD(qual);
|
||||
}
|
||||
|
||||
static void
|
||||
_outRollupData(StringInfo str, const RollupData *node)
|
||||
{
|
||||
WRITE_NODE_TYPE("ROLLUP");
|
||||
|
||||
WRITE_NODE_FIELD(groupClause);
|
||||
WRITE_NODE_FIELD(gsets);
|
||||
WRITE_NODE_FIELD(gsets_data);
|
||||
WRITE_FLOAT_FIELD(numGroups, "%.0f");
|
||||
WRITE_BOOL_FIELD(hashable);
|
||||
WRITE_BOOL_FIELD(is_hashed);
|
||||
}
|
||||
|
||||
static void
|
||||
_outGroupingSetData(StringInfo str, const GroupingSetData *node)
|
||||
{
|
||||
WRITE_NODE_TYPE("GSDATA");
|
||||
|
||||
WRITE_NODE_FIELD(set);
|
||||
WRITE_FLOAT_FIELD(numGroups, "%.0f");
|
||||
}
|
||||
|
||||
static void
|
||||
_outGroupingSetsPath(StringInfo str, const GroupingSetsPath *node)
|
||||
{
|
||||
@ -1949,8 +1971,8 @@ _outGroupingSetsPath(StringInfo str, const GroupingSetsPath *node)
|
||||
_outPathInfo(str, (const Path *) node);
|
||||
|
||||
WRITE_NODE_FIELD(subpath);
|
||||
WRITE_NODE_FIELD(rollup_groupclauses);
|
||||
WRITE_NODE_FIELD(rollup_lists);
|
||||
WRITE_ENUM_FIELD(aggstrategy, AggStrategy);
|
||||
WRITE_NODE_FIELD(rollups);
|
||||
WRITE_NODE_FIELD(qual);
|
||||
}
|
||||
|
||||
@ -3961,14 +3983,18 @@ outNode(StringInfo str, const void *obj)
|
||||
case T_PlannerParamItem:
|
||||
_outPlannerParamItem(str, obj);
|
||||
break;
|
||||
case T_RollupData:
|
||||
_outRollupData(str, obj);
|
||||
break;
|
||||
case T_GroupingSetData:
|
||||
_outGroupingSetData(str, obj);
|
||||
break;
|
||||
case T_StatisticExtInfo:
|
||||
_outStatisticExtInfo(str, obj);
|
||||
break;
|
||||
|
||||
case T_ExtensibleNode:
|
||||
_outExtensibleNode(str, obj);
|
||||
break;
|
||||
|
||||
case T_CreateStmt:
|
||||
_outCreateStmt(str, obj);
|
||||
break;
|
||||
|
@ -1884,11 +1884,16 @@ cost_agg(Path *path, PlannerInfo *root,
|
||||
total_cost = startup_cost + cpu_tuple_cost;
|
||||
output_tuples = 1;
|
||||
}
|
||||
else if (aggstrategy == AGG_SORTED)
|
||||
else if (aggstrategy == AGG_SORTED || aggstrategy == AGG_MIXED)
|
||||
{
|
||||
/* Here we are able to deliver output on-the-fly */
|
||||
startup_cost = input_startup_cost;
|
||||
total_cost = input_total_cost;
|
||||
if (aggstrategy == AGG_MIXED && !enable_hashagg)
|
||||
{
|
||||
startup_cost += disable_cost;
|
||||
total_cost += disable_cost;
|
||||
}
|
||||
/* calcs phrased this way to match HASHED case, see note above */
|
||||
total_cost += aggcosts->transCost.startup;
|
||||
total_cost += aggcosts->transCost.per_tuple * input_tuples;
|
||||
|
@ -1783,18 +1783,15 @@ create_groupingsets_plan(PlannerInfo *root, GroupingSetsPath *best_path)
|
||||
{
|
||||
Agg *plan;
|
||||
Plan *subplan;
|
||||
List *rollup_groupclauses = best_path->rollup_groupclauses;
|
||||
List *rollup_lists = best_path->rollup_lists;
|
||||
List *rollups = best_path->rollups;
|
||||
AttrNumber *grouping_map;
|
||||
int maxref;
|
||||
List *chain;
|
||||
ListCell *lc,
|
||||
*lc2;
|
||||
ListCell *lc;
|
||||
|
||||
/* Shouldn't get here without grouping sets */
|
||||
Assert(root->parse->groupingSets);
|
||||
Assert(rollup_lists != NIL);
|
||||
Assert(list_length(rollup_lists) == list_length(rollup_groupclauses));
|
||||
Assert(rollups != NIL);
|
||||
|
||||
/*
|
||||
* Agg can project, so no need to be terribly picky about child tlist, but
|
||||
@ -1846,72 +1843,86 @@ create_groupingsets_plan(PlannerInfo *root, GroupingSetsPath *best_path)
|
||||
* costs will be shown by EXPLAIN.
|
||||
*/
|
||||
chain = NIL;
|
||||
if (list_length(rollup_groupclauses) > 1)
|
||||
if (list_length(rollups) > 1)
|
||||
{
|
||||
forboth(lc, rollup_groupclauses, lc2, rollup_lists)
|
||||
ListCell *lc2 = lnext(list_head(rollups));
|
||||
bool is_first_sort = ((RollupData *) linitial(rollups))->is_hashed;
|
||||
|
||||
for_each_cell(lc, lc2)
|
||||
{
|
||||
List *groupClause = (List *) lfirst(lc);
|
||||
List *gsets = (List *) lfirst(lc2);
|
||||
RollupData *rollup = lfirst(lc);
|
||||
AttrNumber *new_grpColIdx;
|
||||
Plan *sort_plan;
|
||||
Plan *sort_plan = NULL;
|
||||
Plan *agg_plan;
|
||||
AggStrategy strat;
|
||||
|
||||
/* We want to iterate over all but the last rollup list elements */
|
||||
if (lnext(lc) == NULL)
|
||||
break;
|
||||
|
||||
new_grpColIdx = remap_groupColIdx(root, groupClause);
|
||||
new_grpColIdx = remap_groupColIdx(root, rollup->groupClause);
|
||||
|
||||
if (!rollup->is_hashed && !is_first_sort)
|
||||
{
|
||||
sort_plan = (Plan *)
|
||||
make_sort_from_groupcols(groupClause,
|
||||
make_sort_from_groupcols(rollup->groupClause,
|
||||
new_grpColIdx,
|
||||
subplan);
|
||||
}
|
||||
|
||||
if (!rollup->is_hashed)
|
||||
is_first_sort = false;
|
||||
|
||||
if (rollup->is_hashed)
|
||||
strat = AGG_HASHED;
|
||||
else if (list_length(linitial(rollup->gsets)) == 0)
|
||||
strat = AGG_PLAIN;
|
||||
else
|
||||
strat = AGG_SORTED;
|
||||
|
||||
agg_plan = (Plan *) make_agg(NIL,
|
||||
NIL,
|
||||
AGG_SORTED,
|
||||
strat,
|
||||
AGGSPLIT_SIMPLE,
|
||||
list_length((List *) linitial(gsets)),
|
||||
list_length((List *) linitial(rollup->gsets)),
|
||||
new_grpColIdx,
|
||||
extract_grouping_ops(groupClause),
|
||||
gsets,
|
||||
extract_grouping_ops(rollup->groupClause),
|
||||
rollup->gsets,
|
||||
NIL,
|
||||
0, /* numGroups not needed */
|
||||
rollup->numGroups,
|
||||
sort_plan);
|
||||
|
||||
/*
|
||||
* Nuke stuff we don't need to avoid bloating debug output.
|
||||
* Remove stuff we don't need to avoid bloating debug output.
|
||||
*/
|
||||
if (sort_plan)
|
||||
{
|
||||
sort_plan->targetlist = NIL;
|
||||
sort_plan->lefttree = NULL;
|
||||
}
|
||||
|
||||
chain = lappend(chain, agg_plan);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Now make the final Agg node
|
||||
* Now make the real Agg node
|
||||
*/
|
||||
{
|
||||
List *groupClause = (List *) llast(rollup_groupclauses);
|
||||
List *gsets = (List *) llast(rollup_lists);
|
||||
RollupData *rollup = linitial(rollups);
|
||||
AttrNumber *top_grpColIdx;
|
||||
int numGroupCols;
|
||||
|
||||
top_grpColIdx = remap_groupColIdx(root, groupClause);
|
||||
top_grpColIdx = remap_groupColIdx(root, rollup->groupClause);
|
||||
|
||||
numGroupCols = list_length((List *) linitial(gsets));
|
||||
numGroupCols = list_length((List *) linitial(rollup->gsets));
|
||||
|
||||
plan = make_agg(build_path_tlist(root, &best_path->path),
|
||||
best_path->qual,
|
||||
(numGroupCols > 0) ? AGG_SORTED : AGG_PLAIN,
|
||||
best_path->aggstrategy,
|
||||
AGGSPLIT_SIMPLE,
|
||||
numGroupCols,
|
||||
top_grpColIdx,
|
||||
extract_grouping_ops(groupClause),
|
||||
gsets,
|
||||
extract_grouping_ops(rollup->groupClause),
|
||||
rollup->gsets,
|
||||
chain,
|
||||
0, /* numGroups not needed */
|
||||
rollup->numGroups,
|
||||
subplan);
|
||||
|
||||
/* Copy cost data from Path to Plan */
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -2697,10 +2697,9 @@ create_agg_path(PlannerInfo *root,
|
||||
* 'subpath' is the path representing the source of data
|
||||
* 'target' is the PathTarget to be computed
|
||||
* 'having_qual' is the HAVING quals if any
|
||||
* 'rollup_lists' is a list of grouping sets
|
||||
* 'rollup_groupclauses' is a list of grouping clauses for grouping sets
|
||||
* 'rollups' is a list of RollupData nodes
|
||||
* 'agg_costs' contains cost info about the aggregate functions to be computed
|
||||
* 'numGroups' is the estimated number of groups
|
||||
* 'numGroups' is the estimated total number of groups
|
||||
*/
|
||||
GroupingSetsPath *
|
||||
create_groupingsets_path(PlannerInfo *root,
|
||||
@ -2708,13 +2707,15 @@ create_groupingsets_path(PlannerInfo *root,
|
||||
Path *subpath,
|
||||
PathTarget *target,
|
||||
List *having_qual,
|
||||
List *rollup_lists,
|
||||
List *rollup_groupclauses,
|
||||
AggStrategy aggstrategy,
|
||||
List *rollups,
|
||||
const AggClauseCosts *agg_costs,
|
||||
double numGroups)
|
||||
{
|
||||
GroupingSetsPath *pathnode = makeNode(GroupingSetsPath);
|
||||
int numGroupCols;
|
||||
ListCell *lc;
|
||||
bool is_first = true;
|
||||
bool is_first_sort = true;
|
||||
|
||||
/* The topmost generated Plan node will be an Agg */
|
||||
pathnode->path.pathtype = T_Agg;
|
||||
@ -2727,55 +2728,90 @@ create_groupingsets_path(PlannerInfo *root,
|
||||
pathnode->path.parallel_workers = subpath->parallel_workers;
|
||||
pathnode->subpath = subpath;
|
||||
|
||||
/*
|
||||
* Simplify callers by downgrading AGG_SORTED to AGG_PLAIN, and AGG_MIXED
|
||||
* to AGG_HASHED, here if possible.
|
||||
*/
|
||||
if (aggstrategy == AGG_SORTED &&
|
||||
list_length(rollups) == 1 &&
|
||||
((RollupData *) linitial(rollups))->groupClause == NIL)
|
||||
aggstrategy = AGG_PLAIN;
|
||||
|
||||
if (aggstrategy == AGG_MIXED &&
|
||||
list_length(rollups) == 1)
|
||||
aggstrategy = AGG_HASHED;
|
||||
|
||||
/*
|
||||
* Output will be in sorted order by group_pathkeys if, and only if, there
|
||||
* is a single rollup operation on a non-empty list of grouping
|
||||
* expressions.
|
||||
*/
|
||||
if (list_length(rollup_groupclauses) == 1 &&
|
||||
((List *) linitial(rollup_groupclauses)) != NIL)
|
||||
if (aggstrategy == AGG_SORTED && list_length(rollups) == 1)
|
||||
pathnode->path.pathkeys = root->group_pathkeys;
|
||||
else
|
||||
pathnode->path.pathkeys = NIL;
|
||||
|
||||
pathnode->rollup_groupclauses = rollup_groupclauses;
|
||||
pathnode->rollup_lists = rollup_lists;
|
||||
pathnode->aggstrategy = aggstrategy;
|
||||
pathnode->rollups = rollups;
|
||||
pathnode->qual = having_qual;
|
||||
|
||||
Assert(rollup_lists != NIL);
|
||||
Assert(list_length(rollup_lists) == list_length(rollup_groupclauses));
|
||||
Assert(rollups != NIL);
|
||||
Assert(aggstrategy != AGG_PLAIN || list_length(rollups) == 1);
|
||||
Assert(aggstrategy != AGG_MIXED || list_length(rollups) > 1);
|
||||
|
||||
/* Account for cost of the topmost Agg node */
|
||||
numGroupCols = list_length((List *) linitial((List *) llast(rollup_lists)));
|
||||
foreach(lc, rollups)
|
||||
{
|
||||
RollupData *rollup = lfirst(lc);
|
||||
List *gsets = rollup->gsets;
|
||||
int numGroupCols = list_length(linitial(gsets));
|
||||
|
||||
/*
|
||||
* In AGG_SORTED or AGG_PLAIN mode, the first rollup takes the
|
||||
* (already-sorted) input, and following ones do their own sort.
|
||||
*
|
||||
* In AGG_HASHED mode, there is one rollup for each grouping set.
|
||||
*
|
||||
* In AGG_MIXED mode, the first rollups are hashed, the first
|
||||
* non-hashed one takes the (already-sorted) input, and following ones
|
||||
* do their own sort.
|
||||
*/
|
||||
if (is_first)
|
||||
{
|
||||
cost_agg(&pathnode->path, root,
|
||||
(numGroupCols > 0) ? AGG_SORTED : AGG_PLAIN,
|
||||
aggstrategy,
|
||||
agg_costs,
|
||||
numGroupCols,
|
||||
numGroups,
|
||||
rollup->numGroups,
|
||||
subpath->startup_cost,
|
||||
subpath->total_cost,
|
||||
subpath->rows);
|
||||
|
||||
/*
|
||||
* Add in the costs and output rows of the additional sorting/aggregation
|
||||
* steps, if any. Only total costs count, since the extra sorts aren't
|
||||
* run on startup.
|
||||
*/
|
||||
if (list_length(rollup_lists) > 1)
|
||||
is_first = false;
|
||||
if (!rollup->is_hashed)
|
||||
is_first_sort = false;
|
||||
}
|
||||
else
|
||||
{
|
||||
ListCell *lc;
|
||||
|
||||
foreach(lc, rollup_lists)
|
||||
{
|
||||
List *gsets = (List *) lfirst(lc);
|
||||
Path sort_path; /* dummy for result of cost_sort */
|
||||
Path agg_path; /* dummy for result of cost_agg */
|
||||
|
||||
/* We must iterate over all but the last rollup_lists element */
|
||||
if (lnext(lc) == NULL)
|
||||
break;
|
||||
|
||||
if (rollup->is_hashed || is_first_sort)
|
||||
{
|
||||
/*
|
||||
* Account for cost of aggregation, but don't charge input
|
||||
* cost again
|
||||
*/
|
||||
cost_agg(&agg_path, root,
|
||||
rollup->is_hashed ? AGG_HASHED : AGG_SORTED,
|
||||
agg_costs,
|
||||
numGroupCols,
|
||||
rollup->numGroups,
|
||||
0.0, 0.0,
|
||||
subpath->rows);
|
||||
if (!rollup->is_hashed)
|
||||
is_first_sort = false;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Account for cost of sort, but don't charge input cost again */
|
||||
cost_sort(&sort_path, root, NIL,
|
||||
0.0,
|
||||
@ -2786,16 +2822,16 @@ create_groupingsets_path(PlannerInfo *root,
|
||||
-1.0);
|
||||
|
||||
/* Account for cost of aggregation */
|
||||
numGroupCols = list_length((List *) linitial(gsets));
|
||||
|
||||
cost_agg(&agg_path, root,
|
||||
AGG_SORTED,
|
||||
agg_costs,
|
||||
numGroupCols,
|
||||
numGroups, /* XXX surely not right for all steps? */
|
||||
rollup->numGroups,
|
||||
sort_path.startup_cost,
|
||||
sort_path.total_cost,
|
||||
sort_path.rows);
|
||||
}
|
||||
|
||||
pathnode->path.total_cost += agg_path.total_cost;
|
||||
pathnode->path.rows += agg_path.rows;
|
||||
|
17
src/include/lib/knapsack.h
Normal file
17
src/include/lib/knapsack.h
Normal file
@ -0,0 +1,17 @@
|
||||
/*
|
||||
* knapsack.h
|
||||
*
|
||||
* Copyright (c) 2017, PostgreSQL Global Development Group
|
||||
*
|
||||
* src/include/lib/knapsack.h
|
||||
*/
|
||||
#ifndef KNAPSACK_H
|
||||
#define KNAPSACK_H
|
||||
|
||||
#include "postgres.h"
|
||||
#include "nodes/bitmapset.h"
|
||||
|
||||
extern Bitmapset *DiscreteKnapsack(int max_weight, int num_items,
|
||||
int *item_weights, double *item_values);
|
||||
|
||||
#endif /* KNAPSACK_H */
|
@ -20,6 +20,11 @@
|
||||
#ifndef BITMAPSET_H
|
||||
#define BITMAPSET_H
|
||||
|
||||
/*
|
||||
* Forward decl to save including pg_list.h
|
||||
*/
|
||||
struct List;
|
||||
|
||||
/*
|
||||
* Data representation
|
||||
*/
|
||||
@ -70,6 +75,7 @@ extern bool bms_is_subset(const Bitmapset *a, const Bitmapset *b);
|
||||
extern BMS_Comparison bms_subset_compare(const Bitmapset *a, const Bitmapset *b);
|
||||
extern bool bms_is_member(int x, const Bitmapset *a);
|
||||
extern bool bms_overlap(const Bitmapset *a, const Bitmapset *b);
|
||||
extern bool bms_overlap_list(const Bitmapset *a, const struct List *b);
|
||||
extern bool bms_nonempty_difference(const Bitmapset *a, const Bitmapset *b);
|
||||
extern int bms_singleton_member(const Bitmapset *a);
|
||||
extern bool bms_get_singleton_member(const Bitmapset *a, int *member);
|
||||
|
@ -1699,6 +1699,7 @@ typedef struct AggStatePerAggData *AggStatePerAgg;
|
||||
typedef struct AggStatePerTransData *AggStatePerTrans;
|
||||
typedef struct AggStatePerGroupData *AggStatePerGroup;
|
||||
typedef struct AggStatePerPhaseData *AggStatePerPhase;
|
||||
typedef struct AggStatePerHashData *AggStatePerHash;
|
||||
|
||||
typedef struct AggState
|
||||
{
|
||||
@ -1706,15 +1707,17 @@ typedef struct AggState
|
||||
List *aggs; /* all Aggref nodes in targetlist & quals */
|
||||
int numaggs; /* length of list (could be zero!) */
|
||||
int numtrans; /* number of pertrans items */
|
||||
AggStrategy aggstrategy; /* strategy mode */
|
||||
AggSplit aggsplit; /* agg-splitting mode, see nodes.h */
|
||||
AggStatePerPhase phase; /* pointer to current phase data */
|
||||
int numphases; /* number of phases */
|
||||
int numphases; /* number of phases (including phase 0) */
|
||||
int current_phase; /* current phase number */
|
||||
FmgrInfo *hashfunctions; /* per-grouping-field hash fns */
|
||||
AggStatePerAgg peragg; /* per-Aggref information */
|
||||
AggStatePerTrans pertrans; /* per-Trans state information */
|
||||
ExprContext *hashcontext; /* econtexts for long-lived data (hashtable) */
|
||||
ExprContext **aggcontexts; /* econtexts for long-lived data (per GS) */
|
||||
ExprContext *tmpcontext; /* econtext for input expressions */
|
||||
ExprContext *curaggcontext; /* currently active aggcontext */
|
||||
AggStatePerTrans curpertrans; /* currently active trans state */
|
||||
bool input_done; /* indicates end of input */
|
||||
bool agg_done; /* indicates completion of Agg scan */
|
||||
@ -1726,21 +1729,17 @@ typedef struct AggState
|
||||
/* These fields are for grouping set phase data */
|
||||
int maxsets; /* The max number of sets in any phase */
|
||||
AggStatePerPhase phases; /* array of all phases */
|
||||
Tuplesortstate *sort_in; /* sorted input to phases > 0 */
|
||||
Tuplesortstate *sort_in; /* sorted input to phases > 1 */
|
||||
Tuplesortstate *sort_out; /* input is copied here for next phase */
|
||||
TupleTableSlot *sort_slot; /* slot for sort results */
|
||||
/* these fields are used in AGG_PLAIN and AGG_SORTED modes: */
|
||||
AggStatePerGroup pergroup; /* per-Aggref-per-group working state */
|
||||
HeapTuple grp_firstTuple; /* copy of first tuple of current group */
|
||||
/* these fields are used in AGG_HASHED mode: */
|
||||
TupleHashTable hashtable; /* hash table with one entry per group */
|
||||
TupleTableSlot *hashslot; /* slot for loading hash table */
|
||||
int numhashGrpCols; /* number of columns in hash table */
|
||||
int largestGrpColIdx; /* largest column required for hashing */
|
||||
AttrNumber *hashGrpColIdxInput; /* and their indices in input slot */
|
||||
AttrNumber *hashGrpColIdxHash; /* indices for execGrouping in hashtbl */
|
||||
/* these fields are used in AGG_HASHED and AGG_MIXED modes: */
|
||||
bool table_filled; /* hash table filled yet? */
|
||||
TupleHashIterator hashiter; /* for iterating through hash table */
|
||||
int num_hashes;
|
||||
AggStatePerHash perhash;
|
||||
AggStatePerGroup *hash_pergroup; /* array of per-group pointers */
|
||||
/* support for evaluation of agg inputs */
|
||||
TupleTableSlot *evalslot; /* slot for agg inputs */
|
||||
ProjectionInfo *evalproj; /* projection machinery */
|
||||
|
@ -261,6 +261,8 @@ typedef enum NodeTag
|
||||
T_PlaceHolderInfo,
|
||||
T_MinMaxAggInfo,
|
||||
T_PlannerParamItem,
|
||||
T_RollupData,
|
||||
T_GroupingSetData,
|
||||
T_StatisticExtInfo,
|
||||
|
||||
/*
|
||||
@ -724,7 +726,8 @@ typedef enum AggStrategy
|
||||
{
|
||||
AGG_PLAIN, /* simple agg across all input rows */
|
||||
AGG_SORTED, /* grouped agg, input must be sorted */
|
||||
AGG_HASHED /* grouped agg, use internal hashtable */
|
||||
AGG_HASHED, /* grouped agg, use internal hashtable */
|
||||
AGG_MIXED /* grouped agg, hash and sort both used */
|
||||
} AggStrategy;
|
||||
|
||||
/*
|
||||
|
@ -758,7 +758,7 @@ typedef struct Agg
|
||||
Oid *grpOperators; /* equality operators to compare with */
|
||||
long numGroups; /* estimated number of groups in input */
|
||||
Bitmapset *aggParams; /* IDs of Params used in Aggref inputs */
|
||||
/* Note: planner provides numGroups & aggParams only in AGG_HASHED case */
|
||||
/* Note: planner provides numGroups & aggParams only in HASHED/MIXED case */
|
||||
List *groupingSets; /* grouping sets to use */
|
||||
List *chain; /* chained Agg/Sort nodes */
|
||||
} Agg;
|
||||
|
@ -1418,17 +1418,37 @@ typedef struct AggPath
|
||||
} AggPath;
|
||||
|
||||
/*
|
||||
* GroupingSetsPath represents a GROUPING SETS aggregation
|
||||
*
|
||||
* Currently we only support this in sorted not hashed form, so the input
|
||||
* must always be appropriately presorted.
|
||||
* Various annotations used for grouping sets in the planner.
|
||||
*/
|
||||
|
||||
typedef struct GroupingSetData
|
||||
{
|
||||
NodeTag type;
|
||||
List *set; /* grouping set as list of sortgrouprefs */
|
||||
double numGroups; /* est. number of result groups */
|
||||
} GroupingSetData;
|
||||
|
||||
typedef struct RollupData
|
||||
{
|
||||
NodeTag type;
|
||||
List *groupClause; /* applicable subset of parse->groupClause */
|
||||
List *gsets; /* lists of integer indexes into groupClause */
|
||||
List *gsets_data; /* list of GroupingSetData */
|
||||
double numGroups; /* est. number of result groups */
|
||||
bool hashable; /* can be hashed */
|
||||
bool is_hashed; /* to be implemented as a hashagg */
|
||||
} RollupData;
|
||||
|
||||
/*
|
||||
* GroupingSetsPath represents a GROUPING SETS aggregation
|
||||
*/
|
||||
|
||||
typedef struct GroupingSetsPath
|
||||
{
|
||||
Path path;
|
||||
Path *subpath; /* path representing input source */
|
||||
List *rollup_groupclauses; /* list of lists of SortGroupClause's */
|
||||
List *rollup_lists; /* parallel list of lists of grouping sets */
|
||||
AggStrategy aggstrategy; /* basic strategy */
|
||||
List *rollups; /* list of RollupData */
|
||||
List *qual; /* quals (HAVING quals), if any */
|
||||
} GroupingSetsPath;
|
||||
|
||||
|
@ -195,8 +195,8 @@ extern GroupingSetsPath *create_groupingsets_path(PlannerInfo *root,
|
||||
Path *subpath,
|
||||
PathTarget *target,
|
||||
List *having_qual,
|
||||
List *rollup_lists,
|
||||
List *rollup_groupclauses,
|
||||
AggStrategy aggstrategy,
|
||||
List *rollups,
|
||||
const AggClauseCosts *agg_costs,
|
||||
double numGroups);
|
||||
extern MinMaxAggPath *create_minmaxagg_path(PlannerInfo *root,
|
||||
|
@ -13,6 +13,13 @@ copy gstest2 from stdin;
|
||||
create temp table gstest3 (a integer, b integer, c integer, d integer);
|
||||
copy gstest3 from stdin;
|
||||
alter table gstest3 add primary key (a);
|
||||
create temp table gstest4(id integer, v integer,
|
||||
unhashable_col bit(4), unsortable_col xid);
|
||||
insert into gstest4
|
||||
values (1,1,b'0000','1'), (2,2,b'0001','1'),
|
||||
(3,4,b'0010','2'), (4,8,b'0011','2'),
|
||||
(5,16,b'0000','2'), (6,32,b'0001','2'),
|
||||
(7,64,b'0010','1'), (8,128,b'0011','1');
|
||||
create temp table gstest_empty (a integer, b integer, v integer);
|
||||
create function gstest_data(v integer, out a integer, out b integer)
|
||||
returns setof record
|
||||
@ -22,6 +29,7 @@ create function gstest_data(v integer, out a integer, out b integer)
|
||||
end;
|
||||
$f$ language plpgsql;
|
||||
-- basic functionality
|
||||
set enable_hashagg = false; -- test hashing explicitly later
|
||||
-- simple rollup with multiple plain aggregates, with and without ordering
|
||||
-- (and with ordering differing from grouping)
|
||||
select a, b, grouping(a,b), sum(v), count(*), max(v)
|
||||
@ -462,7 +470,7 @@ select a, b from (values (1,2),(2,3)) v(a,b) group by a,b, grouping sets(a);
|
||||
|
||||
-- Tests for chained aggregates
|
||||
select a, b, grouping(a,b), sum(v), count(*), max(v)
|
||||
from gstest1 group by grouping sets ((a,b),(a+1,b+1),(a+2,b+2));
|
||||
from gstest1 group by grouping sets ((a,b),(a+1,b+1),(a+2,b+2)) order by 3,6;
|
||||
a | b | grouping | sum | count | max
|
||||
---+---+----------+-----+-------+-----
|
||||
1 | 1 | 0 | 21 | 2 | 11
|
||||
@ -473,18 +481,18 @@ select a, b, grouping(a,b), sum(v), count(*), max(v)
|
||||
3 | 4 | 0 | 17 | 1 | 17
|
||||
4 | 1 | 0 | 37 | 2 | 19
|
||||
| | 3 | 21 | 2 | 11
|
||||
| | 3 | 25 | 2 | 13
|
||||
| | 3 | 14 | 1 | 14
|
||||
| | 3 | 15 | 1 | 15
|
||||
| | 3 | 16 | 1 | 16
|
||||
| | 3 | 17 | 1 | 17
|
||||
| | 3 | 37 | 2 | 19
|
||||
| | 3 | 21 | 2 | 11
|
||||
| | 3 | 25 | 2 | 13
|
||||
| | 3 | 25 | 2 | 13
|
||||
| | 3 | 14 | 1 | 14
|
||||
| | 3 | 14 | 1 | 14
|
||||
| | 3 | 15 | 1 | 15
|
||||
| | 3 | 15 | 1 | 15
|
||||
| | 3 | 16 | 1 | 16
|
||||
| | 3 | 16 | 1 | 16
|
||||
| | 3 | 17 | 1 | 17
|
||||
| | 3 | 17 | 1 | 17
|
||||
| | 3 | 37 | 2 | 19
|
||||
| | 3 | 37 | 2 | 19
|
||||
(21 rows)
|
||||
|
||||
@ -847,4 +855,598 @@ select sum(ten) from onek group by rollup(four::text), two order by 1;
|
||||
2500
|
||||
(6 rows)
|
||||
|
||||
-- hashing support
|
||||
set enable_hashagg = true;
|
||||
-- failure cases
|
||||
select count(*) from gstest4 group by rollup(unhashable_col,unsortable_col);
|
||||
ERROR: could not implement GROUP BY
|
||||
DETAIL: Some of the datatypes only support hashing, while others only support sorting.
|
||||
select array_agg(v order by v) from gstest4 group by grouping sets ((id,unsortable_col),(id));
|
||||
ERROR: could not implement GROUP BY
|
||||
DETAIL: Some of the datatypes only support hashing, while others only support sorting.
|
||||
-- simple cases
|
||||
select a, b, grouping(a,b), sum(v), count(*), max(v)
|
||||
from gstest1 group by grouping sets ((a),(b)) order by 3,1,2;
|
||||
a | b | grouping | sum | count | max
|
||||
---+---+----------+-----+-------+-----
|
||||
1 | | 1 | 60 | 5 | 14
|
||||
2 | | 1 | 15 | 1 | 15
|
||||
3 | | 1 | 33 | 2 | 17
|
||||
4 | | 1 | 37 | 2 | 19
|
||||
| 1 | 2 | 58 | 4 | 19
|
||||
| 2 | 2 | 25 | 2 | 13
|
||||
| 3 | 2 | 45 | 3 | 16
|
||||
| 4 | 2 | 17 | 1 | 17
|
||||
(8 rows)
|
||||
|
||||
explain (costs off) select a, b, grouping(a,b), sum(v), count(*), max(v)
|
||||
from gstest1 group by grouping sets ((a),(b)) order by 3,1,2;
|
||||
QUERY PLAN
|
||||
--------------------------------------------------------------------------------------------------------
|
||||
Sort
|
||||
Sort Key: (GROUPING("*VALUES*".column1, "*VALUES*".column2)), "*VALUES*".column1, "*VALUES*".column2
|
||||
-> HashAggregate
|
||||
Hash Key: "*VALUES*".column1
|
||||
Hash Key: "*VALUES*".column2
|
||||
-> Values Scan on "*VALUES*"
|
||||
(6 rows)
|
||||
|
||||
select a, b, grouping(a,b), sum(v), count(*), max(v)
|
||||
from gstest1 group by cube(a,b) order by 3,1,2;
|
||||
a | b | grouping | sum | count | max
|
||||
---+---+----------+-----+-------+-----
|
||||
1 | 1 | 0 | 21 | 2 | 11
|
||||
1 | 2 | 0 | 25 | 2 | 13
|
||||
1 | 3 | 0 | 14 | 1 | 14
|
||||
2 | 3 | 0 | 15 | 1 | 15
|
||||
3 | 3 | 0 | 16 | 1 | 16
|
||||
3 | 4 | 0 | 17 | 1 | 17
|
||||
4 | 1 | 0 | 37 | 2 | 19
|
||||
1 | | 1 | 60 | 5 | 14
|
||||
2 | | 1 | 15 | 1 | 15
|
||||
3 | | 1 | 33 | 2 | 17
|
||||
4 | | 1 | 37 | 2 | 19
|
||||
| 1 | 2 | 58 | 4 | 19
|
||||
| 2 | 2 | 25 | 2 | 13
|
||||
| 3 | 2 | 45 | 3 | 16
|
||||
| 4 | 2 | 17 | 1 | 17
|
||||
| | 3 | 145 | 10 | 19
|
||||
(16 rows)
|
||||
|
||||
explain (costs off) select a, b, grouping(a,b), sum(v), count(*), max(v)
|
||||
from gstest1 group by cube(a,b) order by 3,1,2;
|
||||
QUERY PLAN
|
||||
--------------------------------------------------------------------------------------------------------
|
||||
Sort
|
||||
Sort Key: (GROUPING("*VALUES*".column1, "*VALUES*".column2)), "*VALUES*".column1, "*VALUES*".column2
|
||||
-> MixedAggregate
|
||||
Hash Key: "*VALUES*".column1, "*VALUES*".column2
|
||||
Hash Key: "*VALUES*".column1
|
||||
Hash Key: "*VALUES*".column2
|
||||
Group Key: ()
|
||||
-> Values Scan on "*VALUES*"
|
||||
(8 rows)
|
||||
|
||||
-- shouldn't try and hash
|
||||
explain (costs off)
|
||||
select a, b, grouping(a,b), array_agg(v order by v)
|
||||
from gstest1 group by cube(a,b);
|
||||
QUERY PLAN
|
||||
----------------------------------------------------------
|
||||
GroupAggregate
|
||||
Group Key: "*VALUES*".column1, "*VALUES*".column2
|
||||
Group Key: "*VALUES*".column1
|
||||
Group Key: ()
|
||||
Sort Key: "*VALUES*".column2
|
||||
Group Key: "*VALUES*".column2
|
||||
-> Sort
|
||||
Sort Key: "*VALUES*".column1, "*VALUES*".column2
|
||||
-> Values Scan on "*VALUES*"
|
||||
(9 rows)
|
||||
|
||||
-- mixed hashable/sortable cases
|
||||
select unhashable_col, unsortable_col,
|
||||
grouping(unhashable_col, unsortable_col),
|
||||
count(*), sum(v)
|
||||
from gstest4 group by grouping sets ((unhashable_col),(unsortable_col))
|
||||
order by 3, 5;
|
||||
unhashable_col | unsortable_col | grouping | count | sum
|
||||
----------------+----------------+----------+-------+-----
|
||||
0000 | | 1 | 2 | 17
|
||||
0001 | | 1 | 2 | 34
|
||||
0010 | | 1 | 2 | 68
|
||||
0011 | | 1 | 2 | 136
|
||||
| 2 | 2 | 4 | 60
|
||||
| 1 | 2 | 4 | 195
|
||||
(6 rows)
|
||||
|
||||
explain (costs off)
|
||||
select unhashable_col, unsortable_col,
|
||||
grouping(unhashable_col, unsortable_col),
|
||||
count(*), sum(v)
|
||||
from gstest4 group by grouping sets ((unhashable_col),(unsortable_col))
|
||||
order by 3,5;
|
||||
QUERY PLAN
|
||||
------------------------------------------------------------------
|
||||
Sort
|
||||
Sort Key: (GROUPING(unhashable_col, unsortable_col)), (sum(v))
|
||||
-> MixedAggregate
|
||||
Hash Key: unsortable_col
|
||||
Group Key: unhashable_col
|
||||
-> Sort
|
||||
Sort Key: unhashable_col
|
||||
-> Seq Scan on gstest4
|
||||
(8 rows)
|
||||
|
||||
select unhashable_col, unsortable_col,
|
||||
grouping(unhashable_col, unsortable_col),
|
||||
count(*), sum(v)
|
||||
from gstest4 group by grouping sets ((v,unhashable_col),(v,unsortable_col))
|
||||
order by 3,5;
|
||||
unhashable_col | unsortable_col | grouping | count | sum
|
||||
----------------+----------------+----------+-------+-----
|
||||
0000 | | 1 | 1 | 1
|
||||
0001 | | 1 | 1 | 2
|
||||
0010 | | 1 | 1 | 4
|
||||
0011 | | 1 | 1 | 8
|
||||
0000 | | 1 | 1 | 16
|
||||
0001 | | 1 | 1 | 32
|
||||
0010 | | 1 | 1 | 64
|
||||
0011 | | 1 | 1 | 128
|
||||
| 1 | 2 | 1 | 1
|
||||
| 1 | 2 | 1 | 2
|
||||
| 2 | 2 | 1 | 4
|
||||
| 2 | 2 | 1 | 8
|
||||
| 2 | 2 | 1 | 16
|
||||
| 2 | 2 | 1 | 32
|
||||
| 1 | 2 | 1 | 64
|
||||
| 1 | 2 | 1 | 128
|
||||
(16 rows)
|
||||
|
||||
explain (costs off)
|
||||
select unhashable_col, unsortable_col,
|
||||
grouping(unhashable_col, unsortable_col),
|
||||
count(*), sum(v)
|
||||
from gstest4 group by grouping sets ((v,unhashable_col),(v,unsortable_col))
|
||||
order by 3,5;
|
||||
QUERY PLAN
|
||||
------------------------------------------------------------------
|
||||
Sort
|
||||
Sort Key: (GROUPING(unhashable_col, unsortable_col)), (sum(v))
|
||||
-> MixedAggregate
|
||||
Hash Key: v, unsortable_col
|
||||
Group Key: v, unhashable_col
|
||||
-> Sort
|
||||
Sort Key: v, unhashable_col
|
||||
-> Seq Scan on gstest4
|
||||
(8 rows)
|
||||
|
||||
-- empty input: first is 0 rows, second 1, third 3 etc.
|
||||
select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),a);
|
||||
a | b | sum | count
|
||||
---+---+-----+-------
|
||||
(0 rows)
|
||||
|
||||
explain (costs off)
|
||||
select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),a);
|
||||
QUERY PLAN
|
||||
--------------------------------
|
||||
HashAggregate
|
||||
Hash Key: a, b
|
||||
Hash Key: a
|
||||
-> Seq Scan on gstest_empty
|
||||
(4 rows)
|
||||
|
||||
select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),());
|
||||
a | b | sum | count
|
||||
---+---+-----+-------
|
||||
| | | 0
|
||||
(1 row)
|
||||
|
||||
select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),(),(),());
|
||||
a | b | sum | count
|
||||
---+---+-----+-------
|
||||
| | | 0
|
||||
| | | 0
|
||||
| | | 0
|
||||
(3 rows)
|
||||
|
||||
explain (costs off)
|
||||
select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),(),(),());
|
||||
QUERY PLAN
|
||||
--------------------------------
|
||||
MixedAggregate
|
||||
Hash Key: a, b
|
||||
Group Key: ()
|
||||
Group Key: ()
|
||||
Group Key: ()
|
||||
-> Seq Scan on gstest_empty
|
||||
(6 rows)
|
||||
|
||||
select sum(v), count(*) from gstest_empty group by grouping sets ((),(),());
|
||||
sum | count
|
||||
-----+-------
|
||||
| 0
|
||||
| 0
|
||||
| 0
|
||||
(3 rows)
|
||||
|
||||
explain (costs off)
|
||||
select sum(v), count(*) from gstest_empty group by grouping sets ((),(),());
|
||||
QUERY PLAN
|
||||
--------------------------------
|
||||
Aggregate
|
||||
Group Key: ()
|
||||
Group Key: ()
|
||||
Group Key: ()
|
||||
-> Seq Scan on gstest_empty
|
||||
(5 rows)
|
||||
|
||||
-- check that functionally dependent cols are not nulled
|
||||
select a, d, grouping(a,b,c)
|
||||
from gstest3
|
||||
group by grouping sets ((a,b), (a,c));
|
||||
a | d | grouping
|
||||
---+---+----------
|
||||
1 | 1 | 1
|
||||
2 | 2 | 1
|
||||
1 | 1 | 2
|
||||
2 | 2 | 2
|
||||
(4 rows)
|
||||
|
||||
explain (costs off)
|
||||
select a, d, grouping(a,b,c)
|
||||
from gstest3
|
||||
group by grouping sets ((a,b), (a,c));
|
||||
QUERY PLAN
|
||||
---------------------------
|
||||
HashAggregate
|
||||
Hash Key: a, b
|
||||
Hash Key: a, c
|
||||
-> Seq Scan on gstest3
|
||||
(4 rows)
|
||||
|
||||
-- simple rescan tests
|
||||
select a, b, sum(v.x)
|
||||
from (values (1),(2)) v(x), gstest_data(v.x)
|
||||
group by grouping sets (a,b);
|
||||
a | b | sum
|
||||
---+---+-----
|
||||
2 | | 6
|
||||
1 | | 3
|
||||
| 2 | 3
|
||||
| 3 | 3
|
||||
| 1 | 3
|
||||
(5 rows)
|
||||
|
||||
explain (costs off)
|
||||
select a, b, sum(v.x)
|
||||
from (values (1),(2)) v(x), gstest_data(v.x)
|
||||
group by grouping sets (a,b);
|
||||
QUERY PLAN
|
||||
------------------------------------------
|
||||
HashAggregate
|
||||
Hash Key: gstest_data.a
|
||||
Hash Key: gstest_data.b
|
||||
-> Nested Loop
|
||||
-> Values Scan on "*VALUES*"
|
||||
-> Function Scan on gstest_data
|
||||
(6 rows)
|
||||
|
||||
select *
|
||||
from (values (1),(2)) v(x),
|
||||
lateral (select a, b, sum(v.x) from gstest_data(v.x) group by grouping sets (a,b)) s;
|
||||
ERROR: aggregate functions are not allowed in FROM clause of their own query level
|
||||
LINE 3: lateral (select a, b, sum(v.x) from gstest_data(v.x) ...
|
||||
^
|
||||
explain (costs off)
|
||||
select *
|
||||
from (values (1),(2)) v(x),
|
||||
lateral (select a, b, sum(v.x) from gstest_data(v.x) group by grouping sets (a,b)) s;
|
||||
ERROR: aggregate functions are not allowed in FROM clause of their own query level
|
||||
LINE 4: lateral (select a, b, sum(v.x) from gstest_data(v.x...
|
||||
^
|
||||
-- Tests for chained aggregates
|
||||
select a, b, grouping(a,b), sum(v), count(*), max(v)
|
||||
from gstest1 group by grouping sets ((a,b),(a+1,b+1),(a+2,b+2)) order by 3,6;
|
||||
a | b | grouping | sum | count | max
|
||||
---+---+----------+-----+-------+-----
|
||||
1 | 1 | 0 | 21 | 2 | 11
|
||||
1 | 2 | 0 | 25 | 2 | 13
|
||||
1 | 3 | 0 | 14 | 1 | 14
|
||||
2 | 3 | 0 | 15 | 1 | 15
|
||||
3 | 3 | 0 | 16 | 1 | 16
|
||||
3 | 4 | 0 | 17 | 1 | 17
|
||||
4 | 1 | 0 | 37 | 2 | 19
|
||||
| | 3 | 21 | 2 | 11
|
||||
| | 3 | 21 | 2 | 11
|
||||
| | 3 | 25 | 2 | 13
|
||||
| | 3 | 25 | 2 | 13
|
||||
| | 3 | 14 | 1 | 14
|
||||
| | 3 | 14 | 1 | 14
|
||||
| | 3 | 15 | 1 | 15
|
||||
| | 3 | 15 | 1 | 15
|
||||
| | 3 | 16 | 1 | 16
|
||||
| | 3 | 16 | 1 | 16
|
||||
| | 3 | 17 | 1 | 17
|
||||
| | 3 | 17 | 1 | 17
|
||||
| | 3 | 37 | 2 | 19
|
||||
| | 3 | 37 | 2 | 19
|
||||
(21 rows)
|
||||
|
||||
explain (costs off)
|
||||
select a, b, grouping(a,b), sum(v), count(*), max(v)
|
||||
from gstest1 group by grouping sets ((a,b),(a+1,b+1),(a+2,b+2)) order by 3,6;
|
||||
QUERY PLAN
|
||||
-------------------------------------------------------------------------------------------
|
||||
Sort
|
||||
Sort Key: (GROUPING("*VALUES*".column1, "*VALUES*".column2)), (max("*VALUES*".column3))
|
||||
-> HashAggregate
|
||||
Hash Key: "*VALUES*".column1, "*VALUES*".column2
|
||||
Hash Key: ("*VALUES*".column1 + 1), ("*VALUES*".column2 + 1)
|
||||
Hash Key: ("*VALUES*".column1 + 2), ("*VALUES*".column2 + 2)
|
||||
-> Values Scan on "*VALUES*"
|
||||
(7 rows)
|
||||
|
||||
select a, b, sum(c), sum(sum(c)) over (order by a,b) as rsum
|
||||
from gstest2 group by cube (a,b) order by rsum, a, b;
|
||||
a | b | sum | rsum
|
||||
---+---+-----+------
|
||||
1 | 1 | 8 | 8
|
||||
1 | 2 | 2 | 10
|
||||
1 | | 10 | 20
|
||||
2 | 2 | 2 | 22
|
||||
2 | | 2 | 24
|
||||
| 1 | 8 | 32
|
||||
| 2 | 4 | 36
|
||||
| | 12 | 48
|
||||
(8 rows)
|
||||
|
||||
explain (costs off)
|
||||
select a, b, sum(c), sum(sum(c)) over (order by a,b) as rsum
|
||||
from gstest2 group by cube (a,b) order by rsum, a, b;
|
||||
QUERY PLAN
|
||||
---------------------------------------------
|
||||
Sort
|
||||
Sort Key: (sum((sum(c))) OVER (?)), a, b
|
||||
-> WindowAgg
|
||||
-> Sort
|
||||
Sort Key: a, b
|
||||
-> MixedAggregate
|
||||
Hash Key: a, b
|
||||
Hash Key: a
|
||||
Hash Key: b
|
||||
Group Key: ()
|
||||
-> Seq Scan on gstest2
|
||||
(11 rows)
|
||||
|
||||
select a, b, sum(v.x)
|
||||
from (values (1),(2)) v(x), gstest_data(v.x)
|
||||
group by cube (a,b) order by a,b;
|
||||
a | b | sum
|
||||
---+---+-----
|
||||
1 | 1 | 1
|
||||
1 | 2 | 1
|
||||
1 | 3 | 1
|
||||
1 | | 3
|
||||
2 | 1 | 2
|
||||
2 | 2 | 2
|
||||
2 | 3 | 2
|
||||
2 | | 6
|
||||
| 1 | 3
|
||||
| 2 | 3
|
||||
| 3 | 3
|
||||
| | 9
|
||||
(12 rows)
|
||||
|
||||
explain (costs off)
|
||||
select a, b, sum(v.x)
|
||||
from (values (1),(2)) v(x), gstest_data(v.x)
|
||||
group by cube (a,b) order by a,b;
|
||||
QUERY PLAN
|
||||
------------------------------------------------
|
||||
Sort
|
||||
Sort Key: gstest_data.a, gstest_data.b
|
||||
-> MixedAggregate
|
||||
Hash Key: gstest_data.a, gstest_data.b
|
||||
Hash Key: gstest_data.a
|
||||
Hash Key: gstest_data.b
|
||||
Group Key: ()
|
||||
-> Nested Loop
|
||||
-> Values Scan on "*VALUES*"
|
||||
-> Function Scan on gstest_data
|
||||
(10 rows)
|
||||
|
||||
-- More rescan tests
|
||||
select * from (values (1),(2)) v(a) left join lateral (select v.a, four, ten, count(*) from onek group by cube(four,ten)) s on true order by v.a,four,ten;
|
||||
a | a | four | ten | count
|
||||
---+---+------+-----+-------
|
||||
1 | 1 | 0 | 0 | 50
|
||||
1 | 1 | 0 | 2 | 50
|
||||
1 | 1 | 0 | 4 | 50
|
||||
1 | 1 | 0 | 6 | 50
|
||||
1 | 1 | 0 | 8 | 50
|
||||
1 | 1 | 0 | | 250
|
||||
1 | 1 | 1 | 1 | 50
|
||||
1 | 1 | 1 | 3 | 50
|
||||
1 | 1 | 1 | 5 | 50
|
||||
1 | 1 | 1 | 7 | 50
|
||||
1 | 1 | 1 | 9 | 50
|
||||
1 | 1 | 1 | | 250
|
||||
1 | 1 | 2 | 0 | 50
|
||||
1 | 1 | 2 | 2 | 50
|
||||
1 | 1 | 2 | 4 | 50
|
||||
1 | 1 | 2 | 6 | 50
|
||||
1 | 1 | 2 | 8 | 50
|
||||
1 | 1 | 2 | | 250
|
||||
1 | 1 | 3 | 1 | 50
|
||||
1 | 1 | 3 | 3 | 50
|
||||
1 | 1 | 3 | 5 | 50
|
||||
1 | 1 | 3 | 7 | 50
|
||||
1 | 1 | 3 | 9 | 50
|
||||
1 | 1 | 3 | | 250
|
||||
1 | 1 | | 0 | 100
|
||||
1 | 1 | | 1 | 100
|
||||
1 | 1 | | 2 | 100
|
||||
1 | 1 | | 3 | 100
|
||||
1 | 1 | | 4 | 100
|
||||
1 | 1 | | 5 | 100
|
||||
1 | 1 | | 6 | 100
|
||||
1 | 1 | | 7 | 100
|
||||
1 | 1 | | 8 | 100
|
||||
1 | 1 | | 9 | 100
|
||||
1 | 1 | | | 1000
|
||||
2 | 2 | 0 | 0 | 50
|
||||
2 | 2 | 0 | 2 | 50
|
||||
2 | 2 | 0 | 4 | 50
|
||||
2 | 2 | 0 | 6 | 50
|
||||
2 | 2 | 0 | 8 | 50
|
||||
2 | 2 | 0 | | 250
|
||||
2 | 2 | 1 | 1 | 50
|
||||
2 | 2 | 1 | 3 | 50
|
||||
2 | 2 | 1 | 5 | 50
|
||||
2 | 2 | 1 | 7 | 50
|
||||
2 | 2 | 1 | 9 | 50
|
||||
2 | 2 | 1 | | 250
|
||||
2 | 2 | 2 | 0 | 50
|
||||
2 | 2 | 2 | 2 | 50
|
||||
2 | 2 | 2 | 4 | 50
|
||||
2 | 2 | 2 | 6 | 50
|
||||
2 | 2 | 2 | 8 | 50
|
||||
2 | 2 | 2 | | 250
|
||||
2 | 2 | 3 | 1 | 50
|
||||
2 | 2 | 3 | 3 | 50
|
||||
2 | 2 | 3 | 5 | 50
|
||||
2 | 2 | 3 | 7 | 50
|
||||
2 | 2 | 3 | 9 | 50
|
||||
2 | 2 | 3 | | 250
|
||||
2 | 2 | | 0 | 100
|
||||
2 | 2 | | 1 | 100
|
||||
2 | 2 | | 2 | 100
|
||||
2 | 2 | | 3 | 100
|
||||
2 | 2 | | 4 | 100
|
||||
2 | 2 | | 5 | 100
|
||||
2 | 2 | | 6 | 100
|
||||
2 | 2 | | 7 | 100
|
||||
2 | 2 | | 8 | 100
|
||||
2 | 2 | | 9 | 100
|
||||
2 | 2 | | | 1000
|
||||
(70 rows)
|
||||
|
||||
select array(select row(v.a,s1.*) from (select two,four, count(*) from onek group by cube(two,four) order by two,four) s1) from (values (1),(2)) v(a);
|
||||
array
|
||||
------------------------------------------------------------------------------------------------------------------------------------------------------
|
||||
{"(1,0,0,250)","(1,0,2,250)","(1,0,,500)","(1,1,1,250)","(1,1,3,250)","(1,1,,500)","(1,,0,250)","(1,,1,250)","(1,,2,250)","(1,,3,250)","(1,,,1000)"}
|
||||
{"(2,0,0,250)","(2,0,2,250)","(2,0,,500)","(2,1,1,250)","(2,1,3,250)","(2,1,,500)","(2,,0,250)","(2,,1,250)","(2,,2,250)","(2,,3,250)","(2,,,1000)"}
|
||||
(2 rows)
|
||||
|
||||
-- Rescan logic changes when there are no empty grouping sets, so test
|
||||
-- that too:
|
||||
select * from (values (1),(2)) v(a) left join lateral (select v.a, four, ten, count(*) from onek group by grouping sets(four,ten)) s on true order by v.a,four,ten;
|
||||
a | a | four | ten | count
|
||||
---+---+------+-----+-------
|
||||
1 | 1 | 0 | | 250
|
||||
1 | 1 | 1 | | 250
|
||||
1 | 1 | 2 | | 250
|
||||
1 | 1 | 3 | | 250
|
||||
1 | 1 | | 0 | 100
|
||||
1 | 1 | | 1 | 100
|
||||
1 | 1 | | 2 | 100
|
||||
1 | 1 | | 3 | 100
|
||||
1 | 1 | | 4 | 100
|
||||
1 | 1 | | 5 | 100
|
||||
1 | 1 | | 6 | 100
|
||||
1 | 1 | | 7 | 100
|
||||
1 | 1 | | 8 | 100
|
||||
1 | 1 | | 9 | 100
|
||||
2 | 2 | 0 | | 250
|
||||
2 | 2 | 1 | | 250
|
||||
2 | 2 | 2 | | 250
|
||||
2 | 2 | 3 | | 250
|
||||
2 | 2 | | 0 | 100
|
||||
2 | 2 | | 1 | 100
|
||||
2 | 2 | | 2 | 100
|
||||
2 | 2 | | 3 | 100
|
||||
2 | 2 | | 4 | 100
|
||||
2 | 2 | | 5 | 100
|
||||
2 | 2 | | 6 | 100
|
||||
2 | 2 | | 7 | 100
|
||||
2 | 2 | | 8 | 100
|
||||
2 | 2 | | 9 | 100
|
||||
(28 rows)
|
||||
|
||||
select array(select row(v.a,s1.*) from (select two,four, count(*) from onek group by grouping sets(two,four) order by two,four) s1) from (values (1),(2)) v(a);
|
||||
array
|
||||
---------------------------------------------------------------------------------
|
||||
{"(1,0,,500)","(1,1,,500)","(1,,0,250)","(1,,1,250)","(1,,2,250)","(1,,3,250)"}
|
||||
{"(2,0,,500)","(2,1,,500)","(2,,0,250)","(2,,1,250)","(2,,2,250)","(2,,3,250)"}
|
||||
(2 rows)
|
||||
|
||||
-- test the knapsack
|
||||
set work_mem = '64kB';
|
||||
explain (costs off)
|
||||
select unique1,
|
||||
count(two), count(four), count(ten),
|
||||
count(hundred), count(thousand), count(twothousand),
|
||||
count(*)
|
||||
from tenk1 group by grouping sets (unique1,twothousand,thousand,hundred,ten,four,two);
|
||||
QUERY PLAN
|
||||
-------------------------------
|
||||
MixedAggregate
|
||||
Hash Key: two
|
||||
Hash Key: four
|
||||
Hash Key: ten
|
||||
Hash Key: hundred
|
||||
Group Key: unique1
|
||||
Sort Key: twothousand
|
||||
Group Key: twothousand
|
||||
Sort Key: thousand
|
||||
Group Key: thousand
|
||||
-> Sort
|
||||
Sort Key: unique1
|
||||
-> Seq Scan on tenk1
|
||||
(13 rows)
|
||||
|
||||
explain (costs off)
|
||||
select unique1,
|
||||
count(two), count(four), count(ten),
|
||||
count(hundred), count(thousand), count(twothousand),
|
||||
count(*)
|
||||
from tenk1 group by grouping sets (unique1,hundred,ten,four,two);
|
||||
QUERY PLAN
|
||||
-------------------------------
|
||||
MixedAggregate
|
||||
Hash Key: two
|
||||
Hash Key: four
|
||||
Hash Key: ten
|
||||
Hash Key: hundred
|
||||
Group Key: unique1
|
||||
-> Sort
|
||||
Sort Key: unique1
|
||||
-> Seq Scan on tenk1
|
||||
(9 rows)
|
||||
|
||||
set work_mem = '384kB';
|
||||
explain (costs off)
|
||||
select unique1,
|
||||
count(two), count(four), count(ten),
|
||||
count(hundred), count(thousand), count(twothousand),
|
||||
count(*)
|
||||
from tenk1 group by grouping sets (unique1,twothousand,thousand,hundred,ten,four,two);
|
||||
QUERY PLAN
|
||||
-------------------------------
|
||||
MixedAggregate
|
||||
Hash Key: two
|
||||
Hash Key: four
|
||||
Hash Key: ten
|
||||
Hash Key: hundred
|
||||
Hash Key: thousand
|
||||
Group Key: unique1
|
||||
Sort Key: twothousand
|
||||
Group Key: twothousand
|
||||
-> Sort
|
||||
Sort Key: unique1
|
||||
-> Seq Scan on tenk1
|
||||
(12 rows)
|
||||
|
||||
-- end
|
||||
|
@ -233,6 +233,7 @@ SELECT few.dataa, count(*), min(id), max(id), generate_series(1,3) FROM few GROU
|
||||
(6 rows)
|
||||
|
||||
-- grouping sets are a bit special, they produce NULLs in columns not actually NULL
|
||||
set enable_hashagg = false;
|
||||
SELECT dataa, datab b, generate_series(1,2) g, count(*) FROM few GROUP BY CUBE(dataa, datab);
|
||||
dataa | b | g | count
|
||||
-------+-----+---+-------
|
||||
@ -311,46 +312,46 @@ SELECT dataa, datab b, generate_series(1,2) g, count(*) FROM few GROUP BY CUBE(d
|
||||
b | bar | | 2
|
||||
b | | | 2
|
||||
| | | 6
|
||||
| bar | 1 | 2
|
||||
| bar | 2 | 2
|
||||
| bar | | 4
|
||||
| foo | 1 | 1
|
||||
| foo | 2 | 1
|
||||
| foo | | 2
|
||||
a | | 1 | 2
|
||||
b | | 1 | 1
|
||||
| | 1 | 3
|
||||
a | | 2 | 2
|
||||
b | | 2 | 1
|
||||
| | 2 | 3
|
||||
| bar | 1 | 2
|
||||
| bar | 2 | 2
|
||||
| bar | | 4
|
||||
| foo | 1 | 1
|
||||
| foo | 2 | 1
|
||||
| foo | | 2
|
||||
(24 rows)
|
||||
|
||||
SELECT dataa, datab b, generate_series(1,2) g, count(*) FROM few GROUP BY CUBE(dataa, datab, g) ORDER BY dataa;
|
||||
dataa | b | g | count
|
||||
-------+-----+---+-------
|
||||
a | foo | | 2
|
||||
a | | | 4
|
||||
a | | 2 | 2
|
||||
a | bar | 1 | 1
|
||||
a | bar | 2 | 1
|
||||
a | bar | | 2
|
||||
a | foo | 1 | 1
|
||||
a | foo | 2 | 1
|
||||
a | foo | | 2
|
||||
a | | | 4
|
||||
a | | 1 | 2
|
||||
a | | 2 | 2
|
||||
b | bar | 2 | 1
|
||||
b | bar | 1 | 1
|
||||
b | | | 2
|
||||
b | | 1 | 1
|
||||
b | | 2 | 1
|
||||
b | bar | 1 | 1
|
||||
b | bar | 2 | 1
|
||||
b | bar | | 2
|
||||
| foo | | 2
|
||||
| foo | 1 | 1
|
||||
b | | 2 | 1
|
||||
| | 2 | 3
|
||||
| | | 6
|
||||
| bar | 1 | 2
|
||||
| bar | 2 | 2
|
||||
| | | 6
|
||||
| foo | 2 | 1
|
||||
| bar | | 4
|
||||
| foo | 1 | 1
|
||||
| foo | 2 | 1
|
||||
| foo | | 2
|
||||
| | 1 | 3
|
||||
(24 rows)
|
||||
|
||||
@ -360,29 +361,30 @@ SELECT dataa, datab b, generate_series(1,2) g, count(*) FROM few GROUP BY CUBE(d
|
||||
a | bar | 1 | 1
|
||||
a | foo | 1 | 1
|
||||
b | bar | 1 | 1
|
||||
| bar | 1 | 2
|
||||
| foo | 1 | 1
|
||||
a | | 1 | 2
|
||||
b | | 1 | 1
|
||||
| | 1 | 3
|
||||
| bar | 1 | 2
|
||||
| foo | 1 | 1
|
||||
| foo | 2 | 1
|
||||
| bar | 2 | 2
|
||||
a | | 2 | 2
|
||||
b | | 2 | 1
|
||||
a | bar | 2 | 1
|
||||
| bar | 2 | 2
|
||||
| | 2 | 3
|
||||
| foo | 2 | 1
|
||||
a | bar | 2 | 1
|
||||
a | foo | 2 | 1
|
||||
b | bar | 2 | 1
|
||||
a | foo | | 2
|
||||
a | | | 4
|
||||
b | bar | | 2
|
||||
b | | | 2
|
||||
| | | 6
|
||||
a | | | 4
|
||||
a | foo | | 2
|
||||
a | bar | | 2
|
||||
| bar | | 4
|
||||
| foo | | 2
|
||||
a | bar | | 2
|
||||
(24 rows)
|
||||
|
||||
reset enable_hashagg;
|
||||
-- data modification
|
||||
CREATE TABLE fewmore AS SELECT generate_series(1,3) AS data;
|
||||
INSERT INTO fewmore VALUES(generate_series(4,5));
|
||||
|
@ -31,6 +31,14 @@ copy gstest3 from stdin;
|
||||
\.
|
||||
alter table gstest3 add primary key (a);
|
||||
|
||||
create temp table gstest4(id integer, v integer,
|
||||
unhashable_col bit(4), unsortable_col xid);
|
||||
insert into gstest4
|
||||
values (1,1,b'0000','1'), (2,2,b'0001','1'),
|
||||
(3,4,b'0010','2'), (4,8,b'0011','2'),
|
||||
(5,16,b'0000','2'), (6,32,b'0001','2'),
|
||||
(7,64,b'0010','1'), (8,128,b'0011','1');
|
||||
|
||||
create temp table gstest_empty (a integer, b integer, v integer);
|
||||
|
||||
create function gstest_data(v integer, out a integer, out b integer)
|
||||
@ -43,8 +51,11 @@ create function gstest_data(v integer, out a integer, out b integer)
|
||||
|
||||
-- basic functionality
|
||||
|
||||
set enable_hashagg = false; -- test hashing explicitly later
|
||||
|
||||
-- simple rollup with multiple plain aggregates, with and without ordering
|
||||
-- (and with ordering differing from grouping)
|
||||
|
||||
select a, b, grouping(a,b), sum(v), count(*), max(v)
|
||||
from gstest1 group by rollup (a,b);
|
||||
select a, b, grouping(a,b), sum(v), count(*), max(v)
|
||||
@ -161,7 +172,7 @@ select a, b from (values (1,2),(2,3)) v(a,b) group by a,b, grouping sets(a);
|
||||
|
||||
-- Tests for chained aggregates
|
||||
select a, b, grouping(a,b), sum(v), count(*), max(v)
|
||||
from gstest1 group by grouping sets ((a,b),(a+1,b+1),(a+2,b+2));
|
||||
from gstest1 group by grouping sets ((a,b),(a+1,b+1),(a+2,b+2)) order by 3,6;
|
||||
select(select (select grouping(a,b) from (values (1)) v2(c)) from (values (1,2)) v1(a,b) group by (a,b)) from (values(6,7)) v3(e,f) GROUP BY ROLLUP((e+1),(f+1));
|
||||
select(select (select grouping(a,b) from (values (1)) v2(c)) from (values (1,2)) v1(a,b) group by (a,b)) from (values(6,7)) v3(e,f) GROUP BY CUBE((e+1),(f+1)) ORDER BY (e+1),(f+1);
|
||||
select a, b, sum(c), sum(sum(c)) over (order by a,b) as rsum
|
||||
@ -224,4 +235,146 @@ select array(select row(v.a,s1.*) from (select two,four, count(*) from onek grou
|
||||
select sum(ten) from onek group by two, rollup(four::text) order by 1;
|
||||
select sum(ten) from onek group by rollup(four::text), two order by 1;
|
||||
|
||||
-- hashing support
|
||||
|
||||
set enable_hashagg = true;
|
||||
|
||||
-- failure cases
|
||||
|
||||
select count(*) from gstest4 group by rollup(unhashable_col,unsortable_col);
|
||||
select array_agg(v order by v) from gstest4 group by grouping sets ((id,unsortable_col),(id));
|
||||
|
||||
-- simple cases
|
||||
|
||||
select a, b, grouping(a,b), sum(v), count(*), max(v)
|
||||
from gstest1 group by grouping sets ((a),(b)) order by 3,1,2;
|
||||
explain (costs off) select a, b, grouping(a,b), sum(v), count(*), max(v)
|
||||
from gstest1 group by grouping sets ((a),(b)) order by 3,1,2;
|
||||
|
||||
select a, b, grouping(a,b), sum(v), count(*), max(v)
|
||||
from gstest1 group by cube(a,b) order by 3,1,2;
|
||||
explain (costs off) select a, b, grouping(a,b), sum(v), count(*), max(v)
|
||||
from gstest1 group by cube(a,b) order by 3,1,2;
|
||||
|
||||
-- shouldn't try and hash
|
||||
explain (costs off)
|
||||
select a, b, grouping(a,b), array_agg(v order by v)
|
||||
from gstest1 group by cube(a,b);
|
||||
|
||||
-- mixed hashable/sortable cases
|
||||
select unhashable_col, unsortable_col,
|
||||
grouping(unhashable_col, unsortable_col),
|
||||
count(*), sum(v)
|
||||
from gstest4 group by grouping sets ((unhashable_col),(unsortable_col))
|
||||
order by 3, 5;
|
||||
explain (costs off)
|
||||
select unhashable_col, unsortable_col,
|
||||
grouping(unhashable_col, unsortable_col),
|
||||
count(*), sum(v)
|
||||
from gstest4 group by grouping sets ((unhashable_col),(unsortable_col))
|
||||
order by 3,5;
|
||||
|
||||
select unhashable_col, unsortable_col,
|
||||
grouping(unhashable_col, unsortable_col),
|
||||
count(*), sum(v)
|
||||
from gstest4 group by grouping sets ((v,unhashable_col),(v,unsortable_col))
|
||||
order by 3,5;
|
||||
explain (costs off)
|
||||
select unhashable_col, unsortable_col,
|
||||
grouping(unhashable_col, unsortable_col),
|
||||
count(*), sum(v)
|
||||
from gstest4 group by grouping sets ((v,unhashable_col),(v,unsortable_col))
|
||||
order by 3,5;
|
||||
|
||||
-- empty input: first is 0 rows, second 1, third 3 etc.
|
||||
select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),a);
|
||||
explain (costs off)
|
||||
select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),a);
|
||||
select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),());
|
||||
select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),(),(),());
|
||||
explain (costs off)
|
||||
select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),(),(),());
|
||||
select sum(v), count(*) from gstest_empty group by grouping sets ((),(),());
|
||||
explain (costs off)
|
||||
select sum(v), count(*) from gstest_empty group by grouping sets ((),(),());
|
||||
|
||||
-- check that functionally dependent cols are not nulled
|
||||
select a, d, grouping(a,b,c)
|
||||
from gstest3
|
||||
group by grouping sets ((a,b), (a,c));
|
||||
explain (costs off)
|
||||
select a, d, grouping(a,b,c)
|
||||
from gstest3
|
||||
group by grouping sets ((a,b), (a,c));
|
||||
|
||||
-- simple rescan tests
|
||||
|
||||
select a, b, sum(v.x)
|
||||
from (values (1),(2)) v(x), gstest_data(v.x)
|
||||
group by grouping sets (a,b);
|
||||
explain (costs off)
|
||||
select a, b, sum(v.x)
|
||||
from (values (1),(2)) v(x), gstest_data(v.x)
|
||||
group by grouping sets (a,b);
|
||||
|
||||
select *
|
||||
from (values (1),(2)) v(x),
|
||||
lateral (select a, b, sum(v.x) from gstest_data(v.x) group by grouping sets (a,b)) s;
|
||||
explain (costs off)
|
||||
select *
|
||||
from (values (1),(2)) v(x),
|
||||
lateral (select a, b, sum(v.x) from gstest_data(v.x) group by grouping sets (a,b)) s;
|
||||
|
||||
-- Tests for chained aggregates
|
||||
select a, b, grouping(a,b), sum(v), count(*), max(v)
|
||||
from gstest1 group by grouping sets ((a,b),(a+1,b+1),(a+2,b+2)) order by 3,6;
|
||||
explain (costs off)
|
||||
select a, b, grouping(a,b), sum(v), count(*), max(v)
|
||||
from gstest1 group by grouping sets ((a,b),(a+1,b+1),(a+2,b+2)) order by 3,6;
|
||||
select a, b, sum(c), sum(sum(c)) over (order by a,b) as rsum
|
||||
from gstest2 group by cube (a,b) order by rsum, a, b;
|
||||
explain (costs off)
|
||||
select a, b, sum(c), sum(sum(c)) over (order by a,b) as rsum
|
||||
from gstest2 group by cube (a,b) order by rsum, a, b;
|
||||
select a, b, sum(v.x)
|
||||
from (values (1),(2)) v(x), gstest_data(v.x)
|
||||
group by cube (a,b) order by a,b;
|
||||
explain (costs off)
|
||||
select a, b, sum(v.x)
|
||||
from (values (1),(2)) v(x), gstest_data(v.x)
|
||||
group by cube (a,b) order by a,b;
|
||||
|
||||
-- More rescan tests
|
||||
select * from (values (1),(2)) v(a) left join lateral (select v.a, four, ten, count(*) from onek group by cube(four,ten)) s on true order by v.a,four,ten;
|
||||
select array(select row(v.a,s1.*) from (select two,four, count(*) from onek group by cube(two,four) order by two,four) s1) from (values (1),(2)) v(a);
|
||||
|
||||
-- Rescan logic changes when there are no empty grouping sets, so test
|
||||
-- that too:
|
||||
select * from (values (1),(2)) v(a) left join lateral (select v.a, four, ten, count(*) from onek group by grouping sets(four,ten)) s on true order by v.a,four,ten;
|
||||
select array(select row(v.a,s1.*) from (select two,four, count(*) from onek group by grouping sets(two,four) order by two,four) s1) from (values (1),(2)) v(a);
|
||||
|
||||
-- test the knapsack
|
||||
|
||||
set work_mem = '64kB';
|
||||
explain (costs off)
|
||||
select unique1,
|
||||
count(two), count(four), count(ten),
|
||||
count(hundred), count(thousand), count(twothousand),
|
||||
count(*)
|
||||
from tenk1 group by grouping sets (unique1,twothousand,thousand,hundred,ten,four,two);
|
||||
explain (costs off)
|
||||
select unique1,
|
||||
count(two), count(four), count(ten),
|
||||
count(hundred), count(thousand), count(twothousand),
|
||||
count(*)
|
||||
from tenk1 group by grouping sets (unique1,hundred,ten,four,two);
|
||||
|
||||
set work_mem = '384kB';
|
||||
explain (costs off)
|
||||
select unique1,
|
||||
count(two), count(four), count(ten),
|
||||
count(hundred), count(thousand), count(twothousand),
|
||||
count(*)
|
||||
from tenk1 group by grouping sets (unique1,twothousand,thousand,hundred,ten,four,two);
|
||||
|
||||
-- end
|
||||
|
@ -66,12 +66,14 @@ SELECT SUM(count(*)) OVER(PARTITION BY generate_series(1,3) ORDER BY generate_se
|
||||
SELECT few.dataa, count(*), min(id), max(id), generate_series(1,3) FROM few GROUP BY few.dataa ORDER BY 5, 1;
|
||||
|
||||
-- grouping sets are a bit special, they produce NULLs in columns not actually NULL
|
||||
set enable_hashagg = false;
|
||||
SELECT dataa, datab b, generate_series(1,2) g, count(*) FROM few GROUP BY CUBE(dataa, datab);
|
||||
SELECT dataa, datab b, generate_series(1,2) g, count(*) FROM few GROUP BY CUBE(dataa, datab) ORDER BY dataa;
|
||||
SELECT dataa, datab b, generate_series(1,2) g, count(*) FROM few GROUP BY CUBE(dataa, datab) ORDER BY g;
|
||||
SELECT dataa, datab b, generate_series(1,2) g, count(*) FROM few GROUP BY CUBE(dataa, datab, g);
|
||||
SELECT dataa, datab b, generate_series(1,2) g, count(*) FROM few GROUP BY CUBE(dataa, datab, g) ORDER BY dataa;
|
||||
SELECT dataa, datab b, generate_series(1,2) g, count(*) FROM few GROUP BY CUBE(dataa, datab, g) ORDER BY g;
|
||||
reset enable_hashagg;
|
||||
|
||||
-- data modification
|
||||
CREATE TABLE fewmore AS SELECT generate_series(1,3) AS data;
|
||||
|
Loading…
x
Reference in New Issue
Block a user