diff --git a/src/backend/statistics/mcv.c b/src/backend/statistics/mcv.c index 35bb21c43e..0be53a89f9 100644 --- a/src/backend/statistics/mcv.c +++ b/src/backend/statistics/mcv.c @@ -1532,13 +1532,13 @@ pg_mcv_list_send(PG_FUNCTION_ARGS) /* * match the attribute/expression to a dimension of the statistic * - * Match the attribute/expression to statistics dimension. Optionally - * determine the collation. + * Returns the zero-based index of the matching statistics dimension. + * Optionally determines the collation. */ static int mcv_match_expression(Node *expr, Bitmapset *keys, List *exprs, Oid *collid) { - int idx = -1; + int idx; if (IsA(expr, Var)) { @@ -1550,20 +1550,19 @@ mcv_match_expression(Node *expr, Bitmapset *keys, List *exprs, Oid *collid) idx = bms_member_index(keys, var->varattno); - /* make sure the index is valid */ - Assert((idx >= 0) && (idx <= bms_num_members(keys))); + if (idx < 0) + elog(ERROR, "variable not found in statistics object"); } else { + /* expression - lookup in stats expressions */ ListCell *lc; - /* expressions are stored after the simple columns */ - idx = bms_num_members(keys); - if (collid) *collid = exprCollation(expr); - /* expression - lookup in stats expressions */ + /* expressions are stored after the simple columns */ + idx = bms_num_members(keys); foreach(lc, exprs) { Node *stat_expr = (Node *) lfirst(lc); @@ -1574,13 +1573,10 @@ mcv_match_expression(Node *expr, Bitmapset *keys, List *exprs, Oid *collid) idx++; } - /* make sure the index is valid */ - Assert((idx >= bms_num_members(keys)) && - (idx <= bms_num_members(keys) + list_length(exprs))); + if (lc == NULL) + elog(ERROR, "expression not found in statistics object"); } - Assert((idx >= 0) && (idx < bms_num_members(keys) + list_length(exprs))); - return idx; } @@ -1659,8 +1655,6 @@ mcv_get_match_bitmap(PlannerInfo *root, List *clauses, /* match the attribute/expression to a dimension of the statistic */ idx = mcv_match_expression(clause_expr, keys, exprs, &collid); - Assert((idx >= 0) && (idx < bms_num_members(keys) + list_length(exprs))); - /* * Walk through the MCV items and evaluate the current clause. We * can skip items that were already ruled out, and terminate if @@ -1944,7 +1938,30 @@ mcv_get_match_bitmap(PlannerInfo *root, List *clauses, } } else - elog(ERROR, "unknown clause type: %d", clause->type); + { + /* Otherwise, it must be a bare boolean-returning expression */ + int idx; + + /* match the expression to a dimension of the statistic */ + idx = mcv_match_expression(clause, keys, exprs, NULL); + + /* + * Walk through the MCV items and evaluate the current clause. We + * can skip items that were already ruled out, and terminate if + * there are no remaining MCV items that might possibly match. + */ + for (i = 0; i < mcvlist->nitems; i++) + { + bool match; + MCVItem *item = &mcvlist->items[i]; + + /* "match" just means it's bool TRUE */ + match = !item->isnull[idx] && DatumGetBool(item->values[idx]); + + /* now, update the match bitmap, depending on OR/AND type */ + matches[i] = RESULT_MERGE(matches[i], is_or, match); + } + } } return matches; diff --git a/src/test/regress/expected/stats_ext.out b/src/test/regress/expected/stats_ext.out index bedc703ac7..a2bc409e06 100644 --- a/src/test/regress/expected/stats_ext.out +++ b/src/test/regress/expected/stats_ext.out @@ -271,14 +271,23 @@ SELECT stxkind FROM pg_statistic_ext WHERE stxname = 'ab1_exprstat_3'; CREATE STATISTICS ab1_exprstat_4 ON date_trunc('day', d) FROM ab1; -- date_trunc on timestamp is immutable CREATE STATISTICS ab1_exprstat_5 ON date_trunc('day', c) FROM ab1; +-- check use of a boolean-returning expression +CREATE STATISTICS ab1_exprstat_6 ON + (case a when 1 then true else false end), b FROM ab1; -- insert some data and run analyze, to test that these cases build properly INSERT INTO ab1 -SELECT - generate_series(1,10), - generate_series(1,10), - generate_series('2020-10-01'::timestamp, '2020-10-10'::timestamp, interval '1 day'), - generate_series('2020-10-01'::timestamptz, '2020-10-10'::timestamptz, interval '1 day'); +SELECT x / 10, x / 3, + '2020-10-01'::timestamp + x * interval '1 day', + '2020-10-01'::timestamptz + x * interval '1 day' +FROM generate_series(1, 100) x; ANALYZE ab1; +-- apply some stats +SELECT * FROM check_estimated_rows('SELECT * FROM ab1 WHERE (case a when 1 then true else false end) AND b=2'); + estimated | actual +-----------+-------- + 1 | 0 +(1 row) + DROP TABLE ab1; -- Verify supported object types for extended statistics CREATE schema tststats; diff --git a/src/test/regress/sql/stats_ext.sql b/src/test/regress/sql/stats_ext.sql index bc229e9363..19417561bd 100644 --- a/src/test/regress/sql/stats_ext.sql +++ b/src/test/regress/sql/stats_ext.sql @@ -165,14 +165,21 @@ CREATE STATISTICS ab1_exprstat_4 ON date_trunc('day', d) FROM ab1; -- date_trunc on timestamp is immutable CREATE STATISTICS ab1_exprstat_5 ON date_trunc('day', c) FROM ab1; +-- check use of a boolean-returning expression +CREATE STATISTICS ab1_exprstat_6 ON + (case a when 1 then true else false end), b FROM ab1; + -- insert some data and run analyze, to test that these cases build properly INSERT INTO ab1 -SELECT - generate_series(1,10), - generate_series(1,10), - generate_series('2020-10-01'::timestamp, '2020-10-10'::timestamp, interval '1 day'), - generate_series('2020-10-01'::timestamptz, '2020-10-10'::timestamptz, interval '1 day'); +SELECT x / 10, x / 3, + '2020-10-01'::timestamp + x * interval '1 day', + '2020-10-01'::timestamptz + x * interval '1 day' +FROM generate_series(1, 100) x; ANALYZE ab1; + +-- apply some stats +SELECT * FROM check_estimated_rows('SELECT * FROM ab1 WHERE (case a when 1 then true else false end) AND b=2'); + DROP TABLE ab1; -- Verify supported object types for extended statistics