From c102d1106732189de2bfeb93c11b358f9c6b4e1f Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Fri, 5 Aug 2022 13:58:37 -0400 Subject: [PATCH] Fix non-bulletproof ScalarArrayOpExpr code for extended statistics. statext_is_compatible_clause_internal() checked that the arguments of a ScalarArrayOpExpr are one Var and one Const, but it would allow cases where the Const was on the left. Subsequent uses of the clause are not expecting that and would suffer assertion failures or core dumps. mcv.c also had not bothered to cope with the case of a NULL array constant, which seems really unacceptably sloppy of somebody. (Although our tools failed us there too, since AFAIK neither Coverity nor any compiler warned of the obvious use-of-uninitialized-variable condition.) It seems best to handle that by having statext_is_compatible_clause_internal() reject it. Noted while fixing bug #17570. Back-patch to v13 where the extended stats code grew some awareness of ScalarArrayOpExpr. --- src/backend/statistics/extended_stats.c | 13 +++++++++++-- src/backend/statistics/mcv.c | 22 ++++++++++------------ src/test/regress/expected/stats_ext.out | 22 ++++++++++++++++++---- src/test/regress/sql/stats_ext.sql | 14 ++++++++++---- 4 files changed, 49 insertions(+), 22 deletions(-) diff --git a/src/backend/statistics/extended_stats.c b/src/backend/statistics/extended_stats.c index c4bfd51491..3c94e3c762 100644 --- a/src/backend/statistics/extended_stats.c +++ b/src/backend/statistics/extended_stats.c @@ -1051,13 +1051,19 @@ statext_is_compatible_clause_internal(PlannerInfo *root, Node *clause, RangeTblEntry *rte = root->simple_rte_array[relid]; ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *) clause; Var *var; + Const *cst; + bool expronleft; /* Only expressions with two arguments are considered compatible. */ if (list_length(expr->args) != 2) return false; /* Check if the expression has the right shape (one Var, one Const) */ - if (!examine_clause_args(expr->args, &var, NULL, NULL)) + if (!examine_clause_args(expr->args, &var, &cst, &expronleft)) + return false; + + /* We only support Var on left and non-null array constants */ + if (!expronleft || cst->constisnull) return false; /* @@ -1161,7 +1167,7 @@ statext_is_compatible_clause_internal(PlannerInfo *root, Node *clause, * statext_is_compatible_clause * Determines if the clause is compatible with MCV lists. * - * Currently, we only support three types of clauses: + * Currently, we only support the following types of clauses: * * (a) OpExprs of the form (Var op Const), or (Const op Var), where the op * is one of ("=", "<", ">", ">=", "<=") @@ -1170,6 +1176,9 @@ statext_is_compatible_clause_internal(PlannerInfo *root, Node *clause, * * (c) combinations using AND/OR/NOT * + * (d) ScalarArrayOpExprs of the form (Var op ANY (Const)) or + * (Var op ALL (Const)) + * * In the future, the range of supported clauses may be expanded to more * complex cases, for example (Var op Var). */ diff --git a/src/backend/statistics/mcv.c b/src/backend/statistics/mcv.c index d3bfeff364..9afd7e551f 100644 --- a/src/backend/statistics/mcv.c +++ b/src/backend/statistics/mcv.c @@ -1679,19 +1679,17 @@ mcv_get_match_bitmap(PlannerInfo *root, List *clauses, Datum *elem_values; bool *elem_nulls; - /* ScalarArrayOpExpr has the Var always on the left */ - Assert(varonleft); + /* We expect Var on left and non-null constant on right */ + if (!varonleft || cst->constisnull) + elog(ERROR, "incompatible clause"); - if (!cst->constisnull) - { - arrayval = DatumGetArrayTypeP(cst->constvalue); - get_typlenbyvalalign(ARR_ELEMTYPE(arrayval), - &elmlen, &elmbyval, &elmalign); - deconstruct_array(arrayval, - ARR_ELEMTYPE(arrayval), - elmlen, elmbyval, elmalign, - &elem_values, &elem_nulls, &num_elems); - } + arrayval = DatumGetArrayTypeP(cst->constvalue); + get_typlenbyvalalign(ARR_ELEMTYPE(arrayval), + &elmlen, &elmbyval, &elmalign); + deconstruct_array(arrayval, + ARR_ELEMTYPE(arrayval), + elmlen, elmbyval, elmalign, + &elem_values, &elem_nulls, &num_elems); /* match the attribute to a dimension of the statistic */ idx = bms_member_index(keys, var->varattno); diff --git a/src/test/regress/expected/stats_ext.out b/src/test/regress/expected/stats_ext.out index 298e72b5d4..048b0deb88 100644 --- a/src/test/regress/expected/stats_ext.out +++ b/src/test/regress/expected/stats_ext.out @@ -933,7 +933,8 @@ CREATE TABLE mcv_lists ( b VARCHAR, filler3 DATE, c INT, - d TEXT + d TEXT, + ia INT[] ) WITH (autovacuum_enabled = off); -- random data (no MCV list) @@ -970,8 +971,9 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 AND b = -- 100 distinct combinations, all in the MCV list TRUNCATE mcv_lists; DROP STATISTICS mcv_lists_stats; -INSERT INTO mcv_lists (a, b, c, filler1) - SELECT mod(i,100), mod(i,50), mod(i,25), i FROM generate_series(1,5000) s(i); +INSERT INTO mcv_lists (a, b, c, ia, filler1) + SELECT mod(i,100), mod(i,50), mod(i,25), array[mod(i,25)], i + FROM generate_series(1,5000) s(i); ANALYZE mcv_lists; SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 AND b = ''1'''); estimated | actual @@ -1111,8 +1113,14 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < ALL (ARRAY 1 | 100 (1 row) +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = ANY (ARRAY[4,5]) AND 4 = ANY(ia)'); + estimated | actual +-----------+-------- + 4 | 50 +(1 row) + -- create statistics -CREATE STATISTICS mcv_lists_stats (mcv) ON a, b, c FROM mcv_lists; +CREATE STATISTICS mcv_lists_stats (mcv) ON a, b, c, ia FROM mcv_lists; ANALYZE mcv_lists; SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 AND b = ''1'''); estimated | actual @@ -1253,6 +1261,12 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = ' 343 | 200 (1 row) +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = ANY (ARRAY[4,5]) AND 4 = ANY(ia)'); + estimated | actual +-----------+-------- + 4 | 50 +(1 row) + -- check change of unrelated column type does not reset the MCV statistics ALTER TABLE mcv_lists ALTER COLUMN d TYPE VARCHAR(64); SELECT d.stxdmcv IS NOT NULL diff --git a/src/test/regress/sql/stats_ext.sql b/src/test/regress/sql/stats_ext.sql index 6ff978a4a8..d628ea43d0 100644 --- a/src/test/regress/sql/stats_ext.sql +++ b/src/test/regress/sql/stats_ext.sql @@ -497,7 +497,8 @@ CREATE TABLE mcv_lists ( b VARCHAR, filler3 DATE, c INT, - d TEXT + d TEXT, + ia INT[] ) WITH (autovacuum_enabled = off); @@ -524,8 +525,9 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 AND b = TRUNCATE mcv_lists; DROP STATISTICS mcv_lists_stats; -INSERT INTO mcv_lists (a, b, c, filler1) - SELECT mod(i,100), mod(i,50), mod(i,25), i FROM generate_series(1,5000) s(i); +INSERT INTO mcv_lists (a, b, c, ia, filler1) + SELECT mod(i,100), mod(i,50), mod(i,25), array[mod(i,25)], i + FROM generate_series(1,5000) s(i); ANALYZE mcv_lists; @@ -575,8 +577,10 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < ALL (ARRAY SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < ALL (ARRAY[4, 5]) AND b IN (''1'', ''2'', NULL, ''3'') AND c > ANY (ARRAY[1, 2, NULL, 3])'); +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = ANY (ARRAY[4,5]) AND 4 = ANY(ia)'); + -- create statistics -CREATE STATISTICS mcv_lists_stats (mcv) ON a, b, c FROM mcv_lists; +CREATE STATISTICS mcv_lists_stats (mcv) ON a, b, c, ia FROM mcv_lists; ANALYZE mcv_lists; @@ -627,6 +631,8 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < ALL (ARRAY -- we can't use the statistic for OR clauses that are not fully covered (missing 'd' attribute) SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = ''1'' OR c = 1 OR d IS NOT NULL'); +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = ANY (ARRAY[4,5]) AND 4 = ANY(ia)'); + -- check change of unrelated column type does not reset the MCV statistics ALTER TABLE mcv_lists ALTER COLUMN d TYPE VARCHAR(64);