diff --git a/src/backend/statistics/extended_stats.c b/src/backend/statistics/extended_stats.c index dd3c84a91c..463d44a68a 100644 --- a/src/backend/statistics/extended_stats.c +++ b/src/backend/statistics/extended_stats.c @@ -1255,6 +1255,10 @@ choose_best_statistics(List *stats, char requiredkind, /* * Collect attributes and expressions in remaining (unestimated) * clauses fully covered by this statistic object. + * + * We know already estimated clauses have both clause_attnums and + * clause_exprs set to NULL. We leave the pointers NULL if already + * estimated, or we reset them to NULL after estimating the clause. */ for (i = 0; i < nclauses; i++) { @@ -1758,39 +1762,65 @@ statext_mcv_clauselist_selectivity(PlannerInfo *root, List *clauses, int varReli /* record which clauses are simple (single column or expression) */ simple_clauses = NULL; - listidx = 0; + listidx = -1; foreach(l, clauses) { - /* - * If the clause is not already estimated and is compatible with - * the selected statistics object (all attributes and expressions - * covered), mark it as estimated and add it to the list to - * estimate. - */ - if (!bms_is_member(listidx, *estimatedclauses) && - bms_is_subset(list_attnums[listidx], stat->keys) && - stat_covers_expressions(stat, list_exprs[listidx], NULL)) - { - /* record simple clauses (single column or expression) */ - if ((list_attnums[listidx] == NULL && - list_length(list_exprs[listidx]) == 1) || - (list_exprs[listidx] == NIL && - bms_membership(list_attnums[listidx]) == BMS_SINGLETON)) - simple_clauses = bms_add_member(simple_clauses, - list_length(stat_clauses)); - - /* add clause to list and mark as estimated */ - stat_clauses = lappend(stat_clauses, (Node *) lfirst(l)); - *estimatedclauses = bms_add_member(*estimatedclauses, listidx); - - bms_free(list_attnums[listidx]); - list_attnums[listidx] = NULL; - - list_free(list_exprs[listidx]); - list_exprs[listidx] = NULL; - } - + /* Increment the index before we decide if to skip the clause. */ listidx++; + + /* + * Ignore clauses from which we did not extract any attnums or + * expressions (this needs to be consistent with what we do in + * choose_best_statistics). + * + * This also eliminates already estimated clauses - both those + * estimated before and during applying extended statistics. + * + * XXX This check is needed because both bms_is_subset and + * stat_covers_expressions return true for empty attnums and + * expressions. + */ + if (!list_attnums[listidx] && !list_exprs[listidx]) + continue; + + /* + * The clause was not estimated yet, and we've extracted either + * attnums of expressions from it. Ignore it if it's not fully + * covered by the chosen statistics. + * + * We need to check both attributes and expressions, and reject + * if either is not covered. + */ + if (!bms_is_subset(list_attnums[listidx], stat->keys) || + !stat_covers_expressions(stat, list_exprs[listidx], NULL)) + continue; + + /* + * Now we know the clause is compatible (we have either atttnums + * or expressions extracted from it), and was not estimated yet. + */ + + /* record simple clauses (single column or expression) */ + if ((list_attnums[listidx] == NULL && + list_length(list_exprs[listidx]) == 1) || + (list_exprs[listidx] == NIL && + bms_membership(list_attnums[listidx]) == BMS_SINGLETON)) + simple_clauses = bms_add_member(simple_clauses, + list_length(stat_clauses)); + + /* add clause to list and mark it as estimated */ + stat_clauses = lappend(stat_clauses, (Node *) lfirst(l)); + *estimatedclauses = bms_add_member(*estimatedclauses, listidx); + + /* + * Reset the pointers, so that choose_best_statistics knows this + * clause was estimated and does not consider it again. + */ + bms_free(list_attnums[listidx]); + list_attnums[listidx] = NULL; + + list_free(list_exprs[listidx]); + list_exprs[listidx] = NULL; } if (is_or) diff --git a/src/backend/statistics/mcv.c b/src/backend/statistics/mcv.c index 2a00fb4848..9ab3e81a91 100644 --- a/src/backend/statistics/mcv.c +++ b/src/backend/statistics/mcv.c @@ -1575,6 +1575,8 @@ mcv_match_expression(Node *expr, Bitmapset *keys, List *exprs, Oid *collid) (idx <= bms_num_members(keys) + list_length(exprs))); } + Assert((idx >= 0) && (idx < bms_num_members(keys) + list_length(exprs))); + return idx; } @@ -1654,6 +1656,8 @@ mcv_get_match_bitmap(PlannerInfo *root, List *clauses, /* match the attribute/expression to a dimension of the statistic */ idx = mcv_match_expression(clause_expr, keys, exprs, &collid); + Assert((idx >= 0) && (idx < bms_num_members(keys) + list_length(exprs))); + /* * Walk through the MCV items and evaluate the current clause. We * can skip items that were already ruled out, and terminate if diff --git a/src/test/regress/expected/stats_ext.out b/src/test/regress/expected/stats_ext.out index 679fd2d64d..8c214d8dfc 100644 --- a/src/test/regress/expected/stats_ext.out +++ b/src/test/regress/expected/stats_ext.out @@ -2938,6 +2938,25 @@ SELECT * FROM check_estimated_rows('SELECT * FROM expr_stats WHERE a = 0 AND (b (1 row) DROP TABLE expr_stats; +-- test handling of a mix of compatible and incompatible expressions +CREATE TABLE expr_stats_incompatible_test ( + c0 double precision, + c1 boolean NOT NULL +); +CREATE STATISTICS expr_stat_comp_1 ON c0, c1 FROM expr_stats_incompatible_test; +INSERT INTO expr_stats_incompatible_test VALUES (1234,false), (5678,true); +ANALYZE expr_stats_incompatible_test; +SELECT c0 FROM ONLY expr_stats_incompatible_test WHERE +( + upper('x') LIKE ('x'||('[0,1]'::int4range)) + AND + (c0 IN (0, 1) OR c1) +); + c0 +---- +(0 rows) + +DROP TABLE expr_stats_incompatible_test; -- Permission tests. Users should not be able to see specific data values in -- the extended statistics, if they lack permission to see those values in -- the underlying table. diff --git a/src/test/regress/sql/stats_ext.sql b/src/test/regress/sql/stats_ext.sql index 7e092571ca..e033080d4f 100644 --- a/src/test/regress/sql/stats_ext.sql +++ b/src/test/regress/sql/stats_ext.sql @@ -1470,6 +1470,25 @@ SELECT * FROM check_estimated_rows('SELECT * FROM expr_stats WHERE a = 0 AND (b DROP TABLE expr_stats; +-- test handling of a mix of compatible and incompatible expressions +CREATE TABLE expr_stats_incompatible_test ( + c0 double precision, + c1 boolean NOT NULL +); + +CREATE STATISTICS expr_stat_comp_1 ON c0, c1 FROM expr_stats_incompatible_test; + +INSERT INTO expr_stats_incompatible_test VALUES (1234,false), (5678,true); +ANALYZE expr_stats_incompatible_test; + +SELECT c0 FROM ONLY expr_stats_incompatible_test WHERE +( + upper('x') LIKE ('x'||('[0,1]'::int4range)) + AND + (c0 IN (0, 1) OR c1) +); + +DROP TABLE expr_stats_incompatible_test; -- Permission tests. Users should not be able to see specific data values in -- the extended statistics, if they lack permission to see those values in