From ccaa3569f58796868303629bc2d63ddddb599b38 Mon Sep 17 00:00:00 2001 From: Tomas Vondra Date: Wed, 18 Mar 2020 16:41:45 +0100 Subject: [PATCH] Recognize some OR clauses as compatible with functional dependencies Since commit 8f321bd16c functional dependencies can handle IN clauses, which however introduced a possible (and surprising) inconsistency, because IN clauses may be expressed as an OR clause, which are still considered incompatible. For example a IN (1, 2, 3) may be rewritten as (a = 1 OR a = 2 OR a = 3) The IN clause will work fine with functional dependencies, but the OR clause will force the estimation to fall back to plain per-column estimates, possibly introducing significant estimation errors. This commit recognizes OR clauses equivalent to an IN clause (when all arugments are compatible and reference the same attribute) as a special case, compatible with functional dependencies. This allows applying functional dependencies, just like for IN clauses. This does not eliminate the difference in estimating the clause itself, i.e. IN clause and OR clause still use different formulas. It would be possible to change that (for these special OR clauses), but that's not really about extended statistics - it was always like this. Moreover the errors are usually much smaller compared to ignoring dependencies. Author: Tomas Vondra Reviewed-by: Dean Rasheed Discussion: https://www.postgresql.org/message-id/flat/13902317.Eha0YfKkKy%40pierred-pdoc --- src/backend/statistics/dependencies.c | 65 +++++++++++++++++++------ src/test/regress/expected/stats_ext.out | 52 ++++++++++++++++++++ src/test/regress/sql/stats_ext.sql | 20 ++++++++ 3 files changed, 121 insertions(+), 16 deletions(-) diff --git a/src/backend/statistics/dependencies.c b/src/backend/statistics/dependencies.c index 72dc1cd1bd..5f9b43bf7f 100644 --- a/src/backend/statistics/dependencies.c +++ b/src/backend/statistics/dependencies.c @@ -753,24 +753,27 @@ pg_dependencies_send(PG_FUNCTION_ARGS) static bool dependency_is_compatible_clause(Node *clause, Index relid, AttrNumber *attnum) { - RestrictInfo *rinfo = (RestrictInfo *) clause; Var *var; - if (!IsA(rinfo, RestrictInfo)) - return false; + if (IsA(clause, RestrictInfo)) + { + RestrictInfo *rinfo = (RestrictInfo *) clause; - /* Pseudoconstants are not interesting (they couldn't contain a Var) */ - if (rinfo->pseudoconstant) - return false; + /* Pseudoconstants are not interesting (they couldn't contain a Var) */ + if (rinfo->pseudoconstant) + return false; - /* Clauses referencing multiple, or no, varnos are incompatible */ - if (bms_membership(rinfo->clause_relids) != BMS_SINGLETON) - return false; + /* Clauses referencing multiple, or no, varnos are incompatible */ + if (bms_membership(rinfo->clause_relids) != BMS_SINGLETON) + return false; - if (is_opclause(rinfo->clause)) + clause = (Node *) rinfo->clause; + } + + if (is_opclause(clause)) { /* If it's an opclause, check for Var = Const or Const = Var. */ - OpExpr *expr = (OpExpr *) rinfo->clause; + OpExpr *expr = (OpExpr *) clause; /* Only expressions with two arguments are candidates. */ if (list_length(expr->args) != 2) @@ -801,10 +804,10 @@ dependency_is_compatible_clause(Node *clause, Index relid, AttrNumber *attnum) /* OK to proceed with checking "var" */ } - else if (IsA(rinfo->clause, ScalarArrayOpExpr)) + else if (IsA(clause, ScalarArrayOpExpr)) { /* If it's an scalar array operator, check for Var IN Const. */ - ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *) rinfo->clause; + ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *) clause; /* * Reject ALL() variant, we only care about ANY/IN. @@ -839,13 +842,43 @@ dependency_is_compatible_clause(Node *clause, Index relid, AttrNumber *attnum) /* OK to proceed with checking "var" */ } - else if (is_notclause(rinfo->clause)) + else if (is_orclause(clause)) + { + BoolExpr *expr = (BoolExpr *) clause; + ListCell *lc; + + /* start with no attribute number */ + *attnum = InvalidAttrNumber; + + foreach(lc, expr->args) + { + AttrNumber clause_attnum; + + /* + * Had we found incompatible clause in the arguments, treat the + * whole clause as incompatible. + */ + if (!dependency_is_compatible_clause((Node *) lfirst(lc), + relid, &clause_attnum)) + return false; + + if (*attnum == InvalidAttrNumber) + *attnum = clause_attnum; + + if (*attnum != clause_attnum) + return false; + } + + /* the Var is already checked by the recursive call */ + return true; + } + else if (is_notclause(clause)) { /* * "NOT x" can be interpreted as "x = false", so get the argument and * proceed with seeing if it's a suitable Var. */ - var = (Var *) get_notclausearg(rinfo->clause); + var = (Var *) get_notclausearg(clause); } else { @@ -853,7 +886,7 @@ dependency_is_compatible_clause(Node *clause, Index relid, AttrNumber *attnum) * A boolean expression "x" can be interpreted as "x = true", so * proceed with seeing if it's a suitable Var. */ - var = (Var *) rinfo->clause; + var = (Var *) clause; } /* diff --git a/src/test/regress/expected/stats_ext.out b/src/test/regress/expected/stats_ext.out index dbc2532354..bd6d8be434 100644 --- a/src/test/regress/expected/stats_ext.out +++ b/src/test/regress/expected/stats_ext.out @@ -458,6 +458,32 @@ SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE 3 | 400 (1 row) +-- OR clauses referencing the same attribute +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a = 1 OR a = 51) AND b = ''1'''); + estimated | actual +-----------+-------- + 2 | 100 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a = 1 OR a = 51) AND (b = ''1'' OR b = ''2'')'); + estimated | actual +-----------+-------- + 4 | 100 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a = 1 OR a = 2 OR a = 51 OR a = 52) AND (b = ''1'' OR b = ''2'')'); + estimated | actual +-----------+-------- + 8 | 200 +(1 row) + +-- OR clauses referencing different attributes +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a = 1 OR b = ''1'') AND b = ''1'''); + estimated | actual +-----------+-------- + 3 | 100 +(1 row) + -- ANY SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = ANY (ARRAY[1, 51]) AND b = ''1'''); estimated | actual @@ -592,6 +618,32 @@ SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE 400 | 400 (1 row) +-- OR clauses referencing the same attribute +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a = 1 OR a = 51) AND b = ''1'''); + estimated | actual +-----------+-------- + 99 | 100 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a = 1 OR a = 51) AND (b = ''1'' OR b = ''2'')'); + estimated | actual +-----------+-------- + 99 | 100 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a = 1 OR a = 2 OR a = 51 OR a = 52) AND (b = ''1'' OR b = ''2'')'); + estimated | actual +-----------+-------- + 197 | 200 +(1 row) + +-- OR clauses referencing different attributes are incompatible +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a = 1 OR b = ''1'') AND b = ''1'''); + estimated | actual +-----------+-------- + 3 | 100 +(1 row) + -- ANY SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = ANY (ARRAY[1, 51]) AND b = ''1'''); estimated | actual diff --git a/src/test/regress/sql/stats_ext.sql b/src/test/regress/sql/stats_ext.sql index 8beb04278d..897ed3051c 100644 --- a/src/test/regress/sql/stats_ext.sql +++ b/src/test/regress/sql/stats_ext.sql @@ -286,6 +286,16 @@ SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a IN (1, 2, 26, 27, 51, 52, 76, 77) AND b IN (''1'', ''2'', ''26'', ''27'') AND c IN (1, 2)'); +-- OR clauses referencing the same attribute +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a = 1 OR a = 51) AND b = ''1'''); + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a = 1 OR a = 51) AND (b = ''1'' OR b = ''2'')'); + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a = 1 OR a = 2 OR a = 51 OR a = 52) AND (b = ''1'' OR b = ''2'')'); + +-- OR clauses referencing different attributes +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a = 1 OR b = ''1'') AND b = ''1'''); + -- ANY SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = ANY (ARRAY[1, 51]) AND b = ''1'''); @@ -338,6 +348,16 @@ SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a IN (1, 2, 26, 27, 51, 52, 76, 77) AND b IN (''1'', ''2'', ''26'', ''27'') AND c IN (1, 2)'); +-- OR clauses referencing the same attribute +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a = 1 OR a = 51) AND b = ''1'''); + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a = 1 OR a = 51) AND (b = ''1'' OR b = ''2'')'); + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a = 1 OR a = 2 OR a = 51 OR a = 52) AND (b = ''1'' OR b = ''2'')'); + +-- OR clauses referencing different attributes are incompatible +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a = 1 OR b = ''1'') AND b = ''1'''); + -- ANY SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = ANY (ARRAY[1, 51]) AND b = ''1''');