Rework the stats_ext test
As suggested by Tom Lane, avoid printing specific estimated cost values, because they vary across architectures; instead, verify plan shapes (in this case, HashAggregate vs. GroupAggregate), as we do in other planner tests. We can now remove expected/stats_ext_1.out. Author: Tomas Vondra
This commit is contained in:
parent
70ec3f1f8f
commit
bed9ef5a16
@ -1,4 +1,10 @@
|
||||
-- Generic extended statistics support
|
||||
-- We will be checking execution plans without/with statistics, so
|
||||
-- let's make sure we get simple non-parallel plans. Also set the
|
||||
-- work_mem low so that we can use small amounts of data.
|
||||
SET max_parallel_workers = 0;
|
||||
SET max_parallel_workers_per_gather = 0;
|
||||
SET work_mem = '128kB';
|
||||
-- Ensure stats are dropped sanely
|
||||
CREATE TABLE ab1 (a INTEGER, b INTEGER, c INTEGER);
|
||||
CREATE STATISTICS ab1_a_b_stats ON (a, b) FROM ab1;
|
||||
@ -49,6 +55,67 @@ CREATE TABLE ndistinct (
|
||||
c INT,
|
||||
d INT
|
||||
);
|
||||
-- over-estimates when using only per-column statistics
|
||||
INSERT INTO ndistinct (a, b, c, filler1)
|
||||
SELECT i/100, i/100, i/100, cash_words((i/100)::money)
|
||||
FROM generate_series(1,30000) s(i);
|
||||
ANALYZE ndistinct;
|
||||
-- Group Aggregate, due to over-estimate of the number of groups
|
||||
EXPLAIN (COSTS off)
|
||||
SELECT COUNT(*) FROM ndistinct GROUP BY a, b;
|
||||
QUERY PLAN
|
||||
-----------------------------------
|
||||
GroupAggregate
|
||||
Group Key: a, b
|
||||
-> Sort
|
||||
Sort Key: a, b
|
||||
-> Seq Scan on ndistinct
|
||||
(5 rows)
|
||||
|
||||
EXPLAIN (COSTS off)
|
||||
SELECT COUNT(*) FROM ndistinct GROUP BY b, c;
|
||||
QUERY PLAN
|
||||
-----------------------------------
|
||||
GroupAggregate
|
||||
Group Key: b, c
|
||||
-> Sort
|
||||
Sort Key: b, c
|
||||
-> Seq Scan on ndistinct
|
||||
(5 rows)
|
||||
|
||||
EXPLAIN (COSTS off)
|
||||
SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c;
|
||||
QUERY PLAN
|
||||
-----------------------------------
|
||||
GroupAggregate
|
||||
Group Key: a, b, c
|
||||
-> Sort
|
||||
Sort Key: a, b, c
|
||||
-> Seq Scan on ndistinct
|
||||
(5 rows)
|
||||
|
||||
EXPLAIN (COSTS off)
|
||||
SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d;
|
||||
QUERY PLAN
|
||||
-----------------------------------
|
||||
GroupAggregate
|
||||
Group Key: a, b, c, d
|
||||
-> Sort
|
||||
Sort Key: a, b, c, d
|
||||
-> Seq Scan on ndistinct
|
||||
(5 rows)
|
||||
|
||||
EXPLAIN (COSTS off)
|
||||
SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d;
|
||||
QUERY PLAN
|
||||
-----------------------------------
|
||||
GroupAggregate
|
||||
Group Key: b, c, d
|
||||
-> Sort
|
||||
Sort Key: b, c, d
|
||||
-> Seq Scan on ndistinct
|
||||
(5 rows)
|
||||
|
||||
-- unknown column
|
||||
CREATE STATISTICS s10 ON (unknown_column) FROM ndistinct;
|
||||
ERROR: column "unknown_column" referenced in statistics does not exist
|
||||
@ -63,100 +130,184 @@ CREATE STATISTICS s10 ON (a, a, b) FROM ndistinct;
|
||||
ERROR: duplicate column name in statistics definition
|
||||
-- correct command
|
||||
CREATE STATISTICS s10 ON (a, b, c) FROM ndistinct;
|
||||
-- perfectly correlated groups
|
||||
ANALYZE ndistinct;
|
||||
SELECT staenabled, standistinct
|
||||
FROM pg_statistic_ext WHERE starelid = 'ndistinct'::regclass;
|
||||
staenabled | standistinct
|
||||
------------+------------------------------------------------------------------------------------------------
|
||||
{d} | [{(b 3 4), 301.000000}, {(b 3 6), 301.000000}, {(b 4 6), 301.000000}, {(b 3 4 6), 301.000000}]
|
||||
(1 row)
|
||||
|
||||
-- Hash Aggregate, thanks to estimates improved by the statistic
|
||||
EXPLAIN (COSTS off)
|
||||
SELECT COUNT(*) FROM ndistinct GROUP BY a, b;
|
||||
QUERY PLAN
|
||||
-----------------------------
|
||||
HashAggregate
|
||||
Group Key: a, b
|
||||
-> Seq Scan on ndistinct
|
||||
(3 rows)
|
||||
|
||||
EXPLAIN (COSTS off)
|
||||
SELECT COUNT(*) FROM ndistinct GROUP BY b, c;
|
||||
QUERY PLAN
|
||||
-----------------------------
|
||||
HashAggregate
|
||||
Group Key: b, c
|
||||
-> Seq Scan on ndistinct
|
||||
(3 rows)
|
||||
|
||||
EXPLAIN (COSTS off)
|
||||
SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c;
|
||||
QUERY PLAN
|
||||
-----------------------------
|
||||
HashAggregate
|
||||
Group Key: a, b, c
|
||||
-> Seq Scan on ndistinct
|
||||
(3 rows)
|
||||
|
||||
-- last two plans keep using Group Aggregate, because 'd' is not covered
|
||||
-- by the statistic and while it's NULL-only we assume 200 values for it
|
||||
EXPLAIN (COSTS off)
|
||||
SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d;
|
||||
QUERY PLAN
|
||||
-----------------------------------
|
||||
GroupAggregate
|
||||
Group Key: a, b, c, d
|
||||
-> Sort
|
||||
Sort Key: a, b, c, d
|
||||
-> Seq Scan on ndistinct
|
||||
(5 rows)
|
||||
|
||||
EXPLAIN (COSTS off)
|
||||
SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d;
|
||||
QUERY PLAN
|
||||
-----------------------------------
|
||||
GroupAggregate
|
||||
Group Key: b, c, d
|
||||
-> Sort
|
||||
Sort Key: b, c, d
|
||||
-> Seq Scan on ndistinct
|
||||
(5 rows)
|
||||
|
||||
TRUNCATE TABLE ndistinct;
|
||||
-- under-estimates when using only per-column statistics
|
||||
INSERT INTO ndistinct (a, b, c, filler1)
|
||||
SELECT i/100, i/100, i/100, cash_words(i::money)
|
||||
SELECT mod(i,50), mod(i,51), mod(i,32),
|
||||
cash_words(mod(i,33)::int::money)
|
||||
FROM generate_series(1,10000) s(i);
|
||||
ANALYZE ndistinct;
|
||||
SELECT staenabled, standistinct
|
||||
FROM pg_statistic_ext WHERE starelid = 'ndistinct'::regclass;
|
||||
staenabled | standistinct
|
||||
------------+------------------------------------------------------------------------------------------------
|
||||
{d} | [{(b 3 4), 101.000000}, {(b 3 6), 101.000000}, {(b 4 6), 101.000000}, {(b 3 4 6), 101.000000}]
|
||||
staenabled | standistinct
|
||||
------------+----------------------------------------------------------------------------------------------------
|
||||
{d} | [{(b 3 4), 2550.000000}, {(b 3 6), 800.000000}, {(b 4 6), 1632.000000}, {(b 3 4 6), 10000.000000}]
|
||||
(1 row)
|
||||
|
||||
-- plans using Group Aggregate, thanks to using correct esimates
|
||||
EXPLAIN (COSTS off)
|
||||
SELECT COUNT(*) FROM ndistinct GROUP BY a, b;
|
||||
QUERY PLAN
|
||||
-----------------------------
|
||||
HashAggregate
|
||||
QUERY PLAN
|
||||
-----------------------------------
|
||||
GroupAggregate
|
||||
Group Key: a, b
|
||||
-> Seq Scan on ndistinct
|
||||
(3 rows)
|
||||
-> Sort
|
||||
Sort Key: a, b
|
||||
-> Seq Scan on ndistinct
|
||||
(5 rows)
|
||||
|
||||
EXPLAIN (COSTS off)
|
||||
SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c;
|
||||
QUERY PLAN
|
||||
-----------------------------------
|
||||
GroupAggregate
|
||||
Group Key: a, b, c
|
||||
-> Sort
|
||||
Sort Key: a, b, c
|
||||
-> Seq Scan on ndistinct
|
||||
(5 rows)
|
||||
|
||||
EXPLAIN (COSTS off)
|
||||
SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d;
|
||||
QUERY PLAN
|
||||
-----------------------------------
|
||||
GroupAggregate
|
||||
Group Key: a, b, c, d
|
||||
-> Sort
|
||||
Sort Key: a, b, c, d
|
||||
-> Seq Scan on ndistinct
|
||||
(5 rows)
|
||||
|
||||
EXPLAIN (COSTS off)
|
||||
SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d;
|
||||
QUERY PLAN
|
||||
-----------------------------
|
||||
HashAggregate
|
||||
Group Key: a, b, c
|
||||
Group Key: b, c, d
|
||||
-> Seq Scan on ndistinct
|
||||
(3 rows)
|
||||
|
||||
EXPLAIN (COSTS off)
|
||||
SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d;
|
||||
SELECT COUNT(*) FROM ndistinct GROUP BY a, d;
|
||||
QUERY PLAN
|
||||
-----------------------------
|
||||
HashAggregate
|
||||
Group Key: a, b, c, d
|
||||
Group Key: a, d
|
||||
-> Seq Scan on ndistinct
|
||||
(3 rows)
|
||||
|
||||
TRUNCATE TABLE ndistinct;
|
||||
-- partially correlated groups
|
||||
INSERT INTO ndistinct (a, b, c)
|
||||
SELECT i/50, i/100, i/200 FROM generate_series(1,10000) s(i);
|
||||
ANALYZE ndistinct;
|
||||
DROP STATISTICS s10;
|
||||
SELECT staenabled, standistinct
|
||||
FROM pg_statistic_ext WHERE starelid = 'ndistinct'::regclass;
|
||||
staenabled | standistinct
|
||||
------------+------------------------------------------------------------------------------------------------
|
||||
{d} | [{(b 3 4), 201.000000}, {(b 3 6), 201.000000}, {(b 4 6), 101.000000}, {(b 3 4 6), 201.000000}]
|
||||
(1 row)
|
||||
staenabled | standistinct
|
||||
------------+--------------
|
||||
(0 rows)
|
||||
|
||||
EXPLAIN
|
||||
-- dropping the statistics switches the plans to Hash Aggregate,
|
||||
-- due to under-estimates
|
||||
EXPLAIN (COSTS off)
|
||||
SELECT COUNT(*) FROM ndistinct GROUP BY a, b;
|
||||
QUERY PLAN
|
||||
---------------------------------------------------------------------
|
||||
HashAggregate (cost=230.00..232.01 rows=201 width=16)
|
||||
QUERY PLAN
|
||||
-----------------------------
|
||||
HashAggregate
|
||||
Group Key: a, b
|
||||
-> Seq Scan on ndistinct (cost=0.00..155.00 rows=10000 width=8)
|
||||
-> Seq Scan on ndistinct
|
||||
(3 rows)
|
||||
|
||||
EXPLAIN
|
||||
EXPLAIN (COSTS off)
|
||||
SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c;
|
||||
QUERY PLAN
|
||||
----------------------------------------------------------------------
|
||||
HashAggregate (cost=255.00..257.01 rows=201 width=20)
|
||||
QUERY PLAN
|
||||
-----------------------------
|
||||
HashAggregate
|
||||
Group Key: a, b, c
|
||||
-> Seq Scan on ndistinct (cost=0.00..155.00 rows=10000 width=12)
|
||||
-> Seq Scan on ndistinct
|
||||
(3 rows)
|
||||
|
||||
EXPLAIN
|
||||
EXPLAIN (COSTS off)
|
||||
SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d;
|
||||
QUERY PLAN
|
||||
----------------------------------------------------------------------
|
||||
HashAggregate (cost=280.00..290.00 rows=1000 width=24)
|
||||
QUERY PLAN
|
||||
-----------------------------
|
||||
HashAggregate
|
||||
Group Key: a, b, c, d
|
||||
-> Seq Scan on ndistinct (cost=0.00..155.00 rows=10000 width=16)
|
||||
-> Seq Scan on ndistinct
|
||||
(3 rows)
|
||||
|
||||
EXPLAIN
|
||||
EXPLAIN (COSTS off)
|
||||
SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d;
|
||||
QUERY PLAN
|
||||
----------------------------------------------------------------------
|
||||
HashAggregate (cost=255.00..265.00 rows=1000 width=20)
|
||||
QUERY PLAN
|
||||
-----------------------------
|
||||
HashAggregate
|
||||
Group Key: b, c, d
|
||||
-> Seq Scan on ndistinct (cost=0.00..155.00 rows=10000 width=12)
|
||||
-> Seq Scan on ndistinct
|
||||
(3 rows)
|
||||
|
||||
EXPLAIN
|
||||
EXPLAIN (COSTS off)
|
||||
SELECT COUNT(*) FROM ndistinct GROUP BY a, d;
|
||||
QUERY PLAN
|
||||
---------------------------------------------------------------------
|
||||
HashAggregate (cost=230.00..240.00 rows=1000 width=16)
|
||||
QUERY PLAN
|
||||
-----------------------------
|
||||
HashAggregate
|
||||
Group Key: a, d
|
||||
-> Seq Scan on ndistinct (cost=0.00..155.00 rows=10000 width=8)
|
||||
-> Seq Scan on ndistinct
|
||||
(3 rows)
|
||||
|
||||
DROP TABLE ndistinct;
|
||||
|
@ -1,155 +0,0 @@
|
||||
-- Generic extended statistics support
|
||||
-- Ensure stats are dropped sanely
|
||||
CREATE TABLE ab1 (a INTEGER, b INTEGER, c INTEGER);
|
||||
CREATE STATISTICS ab1_a_b_stats ON (a, b) FROM ab1;
|
||||
DROP STATISTICS ab1_a_b_stats;
|
||||
CREATE SCHEMA regress_schema_2;
|
||||
CREATE STATISTICS regress_schema_2.ab1_a_b_stats ON (a, b) FROM ab1;
|
||||
DROP STATISTICS regress_schema_2.ab1_a_b_stats;
|
||||
-- Ensure statistics are dropped when columns are
|
||||
CREATE STATISTICS ab1_b_c_stats ON (b, c) FROM ab1;
|
||||
CREATE STATISTICS ab1_a_b_c_stats ON (a, b, c) FROM ab1;
|
||||
CREATE STATISTICS ab1_a_b_stats ON (a, b) FROM ab1;
|
||||
ALTER TABLE ab1 DROP COLUMN a;
|
||||
\d ab1
|
||||
Table "public.ab1"
|
||||
Column | Type | Collation | Nullable | Default
|
||||
--------+---------+-----------+----------+---------
|
||||
b | integer | | |
|
||||
c | integer | | |
|
||||
Statistics:
|
||||
"public.ab1_b_c_stats" WITH (ndistinct) ON (b, c)
|
||||
|
||||
DROP TABLE ab1;
|
||||
-- Ensure things work sanely with SET STATISTICS 0
|
||||
CREATE TABLE ab1 (a INTEGER, b INTEGER);
|
||||
ALTER TABLE ab1 ALTER a SET STATISTICS 0;
|
||||
INSERT INTO ab1 SELECT a, a%23 FROM generate_series(1, 1000) a;
|
||||
CREATE STATISTICS ab1_a_b_stats ON (a, b) FROM ab1;
|
||||
ANALYZE ab1;
|
||||
ERROR: extended statistics could not be collected for column "a" of relation public.ab1
|
||||
HINT: Consider ALTER TABLE "public"."ab1" ALTER "a" SET STATISTICS -1
|
||||
ALTER TABLE ab1 ALTER a SET STATISTICS -1;
|
||||
ANALYZE ab1;
|
||||
DROP TABLE ab1;
|
||||
-- n-distinct tests
|
||||
CREATE TABLE ndistinct (
|
||||
filler1 TEXT,
|
||||
filler2 NUMERIC,
|
||||
a INT,
|
||||
b INT,
|
||||
filler3 DATE,
|
||||
c INT,
|
||||
d INT
|
||||
);
|
||||
-- unknown column
|
||||
CREATE STATISTICS s10 ON (unknown_column) FROM ndistinct;
|
||||
ERROR: column "unknown_column" referenced in statistics does not exist
|
||||
-- single column
|
||||
CREATE STATISTICS s10 ON (a) FROM ndistinct;
|
||||
ERROR: statistics require at least 2 columns
|
||||
-- single column, duplicated
|
||||
CREATE STATISTICS s10 ON (a,a) FROM ndistinct;
|
||||
ERROR: duplicate column name in statistics definition
|
||||
-- two columns, one duplicated
|
||||
CREATE STATISTICS s10 ON (a, a, b) FROM ndistinct;
|
||||
ERROR: duplicate column name in statistics definition
|
||||
-- correct command
|
||||
CREATE STATISTICS s10 ON (a, b, c) FROM ndistinct;
|
||||
-- perfectly correlated groups
|
||||
INSERT INTO ndistinct (a, b, c, filler1)
|
||||
SELECT i/100, i/100, i/100, cash_words(i::money)
|
||||
FROM generate_series(1,10000) s(i);
|
||||
ANALYZE ndistinct;
|
||||
SELECT staenabled, standistinct
|
||||
FROM pg_statistic_ext WHERE starelid = 'ndistinct'::regclass;
|
||||
staenabled | standistinct
|
||||
------------+------------------------------------------------------------------------------------------------
|
||||
{d} | [{(b 3 4), 101.000000}, {(b 3 6), 101.000000}, {(b 4 6), 101.000000}, {(b 3 4 6), 101.000000}]
|
||||
(1 row)
|
||||
|
||||
EXPLAIN (COSTS off)
|
||||
SELECT COUNT(*) FROM ndistinct GROUP BY a, b;
|
||||
QUERY PLAN
|
||||
-----------------------------
|
||||
HashAggregate
|
||||
Group Key: a, b
|
||||
-> Seq Scan on ndistinct
|
||||
(3 rows)
|
||||
|
||||
EXPLAIN (COSTS off)
|
||||
SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c;
|
||||
QUERY PLAN
|
||||
-----------------------------
|
||||
HashAggregate
|
||||
Group Key: a, b, c
|
||||
-> Seq Scan on ndistinct
|
||||
(3 rows)
|
||||
|
||||
EXPLAIN (COSTS off)
|
||||
SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d;
|
||||
QUERY PLAN
|
||||
-----------------------------
|
||||
HashAggregate
|
||||
Group Key: a, b, c, d
|
||||
-> Seq Scan on ndistinct
|
||||
(3 rows)
|
||||
|
||||
TRUNCATE TABLE ndistinct;
|
||||
-- partially correlated groups
|
||||
INSERT INTO ndistinct (a, b, c)
|
||||
SELECT i/50, i/100, i/200 FROM generate_series(1,10000) s(i);
|
||||
ANALYZE ndistinct;
|
||||
SELECT staenabled, standistinct
|
||||
FROM pg_statistic_ext WHERE starelid = 'ndistinct'::regclass;
|
||||
staenabled | standistinct
|
||||
------------+------------------------------------------------------------------------------------------------
|
||||
{d} | [{(b 3 4), 201.000000}, {(b 3 6), 201.000000}, {(b 4 6), 101.000000}, {(b 3 4 6), 201.000000}]
|
||||
(1 row)
|
||||
|
||||
EXPLAIN
|
||||
SELECT COUNT(*) FROM ndistinct GROUP BY a, b;
|
||||
QUERY PLAN
|
||||
---------------------------------------------------------------------
|
||||
HashAggregate (cost=225.00..227.01 rows=201 width=16)
|
||||
Group Key: a, b
|
||||
-> Seq Scan on ndistinct (cost=0.00..150.00 rows=10000 width=8)
|
||||
(3 rows)
|
||||
|
||||
EXPLAIN
|
||||
SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c;
|
||||
QUERY PLAN
|
||||
----------------------------------------------------------------------
|
||||
HashAggregate (cost=250.00..252.01 rows=201 width=20)
|
||||
Group Key: a, b, c
|
||||
-> Seq Scan on ndistinct (cost=0.00..150.00 rows=10000 width=12)
|
||||
(3 rows)
|
||||
|
||||
EXPLAIN
|
||||
SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d;
|
||||
QUERY PLAN
|
||||
----------------------------------------------------------------------
|
||||
HashAggregate (cost=275.00..285.00 rows=1000 width=24)
|
||||
Group Key: a, b, c, d
|
||||
-> Seq Scan on ndistinct (cost=0.00..150.00 rows=10000 width=16)
|
||||
(3 rows)
|
||||
|
||||
EXPLAIN
|
||||
SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d;
|
||||
QUERY PLAN
|
||||
----------------------------------------------------------------------
|
||||
HashAggregate (cost=250.00..260.00 rows=1000 width=20)
|
||||
Group Key: b, c, d
|
||||
-> Seq Scan on ndistinct (cost=0.00..150.00 rows=10000 width=12)
|
||||
(3 rows)
|
||||
|
||||
EXPLAIN
|
||||
SELECT COUNT(*) FROM ndistinct GROUP BY a, d;
|
||||
QUERY PLAN
|
||||
---------------------------------------------------------------------
|
||||
HashAggregate (cost=225.00..235.00 rows=1000 width=16)
|
||||
Group Key: a, d
|
||||
-> Seq Scan on ndistinct (cost=0.00..150.00 rows=10000 width=8)
|
||||
(3 rows)
|
||||
|
||||
DROP TABLE ndistinct;
|
@ -1,5 +1,12 @@
|
||||
-- Generic extended statistics support
|
||||
|
||||
-- We will be checking execution plans without/with statistics, so
|
||||
-- let's make sure we get simple non-parallel plans. Also set the
|
||||
-- work_mem low so that we can use small amounts of data.
|
||||
SET max_parallel_workers = 0;
|
||||
SET max_parallel_workers_per_gather = 0;
|
||||
SET work_mem = '128kB';
|
||||
|
||||
-- Ensure stats are dropped sanely
|
||||
CREATE TABLE ab1 (a INTEGER, b INTEGER, c INTEGER);
|
||||
CREATE STATISTICS ab1_a_b_stats ON (a, b) FROM ab1;
|
||||
@ -43,6 +50,29 @@ CREATE TABLE ndistinct (
|
||||
d INT
|
||||
);
|
||||
|
||||
-- over-estimates when using only per-column statistics
|
||||
INSERT INTO ndistinct (a, b, c, filler1)
|
||||
SELECT i/100, i/100, i/100, cash_words((i/100)::money)
|
||||
FROM generate_series(1,30000) s(i);
|
||||
|
||||
ANALYZE ndistinct;
|
||||
|
||||
-- Group Aggregate, due to over-estimate of the number of groups
|
||||
EXPLAIN (COSTS off)
|
||||
SELECT COUNT(*) FROM ndistinct GROUP BY a, b;
|
||||
|
||||
EXPLAIN (COSTS off)
|
||||
SELECT COUNT(*) FROM ndistinct GROUP BY b, c;
|
||||
|
||||
EXPLAIN (COSTS off)
|
||||
SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c;
|
||||
|
||||
EXPLAIN (COSTS off)
|
||||
SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d;
|
||||
|
||||
EXPLAIN (COSTS off)
|
||||
SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d;
|
||||
|
||||
-- unknown column
|
||||
CREATE STATISTICS s10 ON (unknown_column) FROM ndistinct;
|
||||
|
||||
@ -58,9 +88,35 @@ CREATE STATISTICS s10 ON (a, a, b) FROM ndistinct;
|
||||
-- correct command
|
||||
CREATE STATISTICS s10 ON (a, b, c) FROM ndistinct;
|
||||
|
||||
-- perfectly correlated groups
|
||||
ANALYZE ndistinct;
|
||||
|
||||
SELECT staenabled, standistinct
|
||||
FROM pg_statistic_ext WHERE starelid = 'ndistinct'::regclass;
|
||||
|
||||
-- Hash Aggregate, thanks to estimates improved by the statistic
|
||||
EXPLAIN (COSTS off)
|
||||
SELECT COUNT(*) FROM ndistinct GROUP BY a, b;
|
||||
|
||||
EXPLAIN (COSTS off)
|
||||
SELECT COUNT(*) FROM ndistinct GROUP BY b, c;
|
||||
|
||||
EXPLAIN (COSTS off)
|
||||
SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c;
|
||||
|
||||
-- last two plans keep using Group Aggregate, because 'd' is not covered
|
||||
-- by the statistic and while it's NULL-only we assume 200 values for it
|
||||
EXPLAIN (COSTS off)
|
||||
SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d;
|
||||
|
||||
EXPLAIN (COSTS off)
|
||||
SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d;
|
||||
|
||||
TRUNCATE TABLE ndistinct;
|
||||
|
||||
-- under-estimates when using only per-column statistics
|
||||
INSERT INTO ndistinct (a, b, c, filler1)
|
||||
SELECT i/100, i/100, i/100, cash_words(i::money)
|
||||
SELECT mod(i,50), mod(i,51), mod(i,32),
|
||||
cash_words(mod(i,33)::int::money)
|
||||
FROM generate_series(1,10000) s(i);
|
||||
|
||||
ANALYZE ndistinct;
|
||||
@ -68,6 +124,7 @@ ANALYZE ndistinct;
|
||||
SELECT staenabled, standistinct
|
||||
FROM pg_statistic_ext WHERE starelid = 'ndistinct'::regclass;
|
||||
|
||||
-- plans using Group Aggregate, thanks to using correct esimates
|
||||
EXPLAIN (COSTS off)
|
||||
SELECT COUNT(*) FROM ndistinct GROUP BY a, b;
|
||||
|
||||
@ -77,30 +134,32 @@ EXPLAIN (COSTS off)
|
||||
EXPLAIN (COSTS off)
|
||||
SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d;
|
||||
|
||||
TRUNCATE TABLE ndistinct;
|
||||
EXPLAIN (COSTS off)
|
||||
SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d;
|
||||
|
||||
-- partially correlated groups
|
||||
INSERT INTO ndistinct (a, b, c)
|
||||
SELECT i/50, i/100, i/200 FROM generate_series(1,10000) s(i);
|
||||
EXPLAIN (COSTS off)
|
||||
SELECT COUNT(*) FROM ndistinct GROUP BY a, d;
|
||||
|
||||
ANALYZE ndistinct;
|
||||
DROP STATISTICS s10;
|
||||
|
||||
SELECT staenabled, standistinct
|
||||
FROM pg_statistic_ext WHERE starelid = 'ndistinct'::regclass;
|
||||
|
||||
EXPLAIN
|
||||
-- dropping the statistics switches the plans to Hash Aggregate,
|
||||
-- due to under-estimates
|
||||
EXPLAIN (COSTS off)
|
||||
SELECT COUNT(*) FROM ndistinct GROUP BY a, b;
|
||||
|
||||
EXPLAIN
|
||||
EXPLAIN (COSTS off)
|
||||
SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c;
|
||||
|
||||
EXPLAIN
|
||||
EXPLAIN (COSTS off)
|
||||
SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d;
|
||||
|
||||
EXPLAIN
|
||||
EXPLAIN (COSTS off)
|
||||
SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d;
|
||||
|
||||
EXPLAIN
|
||||
EXPLAIN (COSTS off)
|
||||
SELECT COUNT(*) FROM ndistinct GROUP BY a, d;
|
||||
|
||||
DROP TABLE ndistinct;
|
||||
|
Loading…
x
Reference in New Issue
Block a user