Fix problems with subselects used in GROUP BY expressions, per gripe

from Philip Warner.  Side effect of change is that GROUP BY expressions
will not be re-evaluated at multiple plan levels anymore, whereas this
sometimes happened with old code.
This commit is contained in:
Tom Lane 2001-10-30 19:58:58 +00:00
parent 512a3aef36
commit 96ca8ffebc
4 changed files with 167 additions and 103 deletions
src
backend/optimizer
include/optimizer

@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planner.c,v 1.111 2001/10/28 06:25:45 momjian Exp $
* $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planner.c,v 1.112 2001/10/30 19:58:58 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -173,6 +173,17 @@ subquery_planner(Query *parse, double tuple_fraction)
parse->havingQual = preprocess_expression(parse, parse->havingQual,
EXPRKIND_HAVING);
/*
* Check for ungrouped variables passed to subplans in targetlist and
* HAVING clause (but not in WHERE or JOIN/ON clauses, since those are
* evaluated before grouping). We can't do this any earlier because
* we must use the preprocessed targetlist for comparisons of grouped
* expressions.
*/
if (parse->hasSubLinks &&
(parse->groupClause != NIL || parse->hasAggs))
check_subplans_for_ungrouped_vars(parse);
/*
* A HAVING clause without aggregates is equivalent to a WHERE clause
* (except it can only refer to grouped fields). Transfer any
@ -623,23 +634,10 @@ preprocess_expression(Query *parse, Node *expr, int kind)
#endif
}
/* Expand SubLinks to SubPlans */
if (parse->hasSubLinks)
{
/* Expand SubLinks to SubPlans */
expr = SS_process_sublinks(expr);
if (kind != EXPRKIND_WHERE &&
(parse->groupClause != NIL || parse->hasAggs))
{
/*
* Check for ungrouped variables passed to subplans. Note we
* do NOT do this for subplans in WHERE (or JOIN/ON); it's
* legal there because WHERE is evaluated pre-GROUP.
*/
check_subplans_for_ungrouped_vars(expr, parse);
}
}
/* Replace uplevel vars with Param nodes */
if (PlannerQueryLevel > 1)
expr = SS_replace_correlation_vars(expr);
@ -1122,12 +1120,11 @@ grouping_planner(Query *parse, double tuple_fraction)
/*
* If there are aggregates then the Group node should just return
* the same set of vars as the subplan did (but we can exclude any
* GROUP BY expressions). If there are no aggregates then the
* Group node had better compute the final tlist.
* the same set of vars as the subplan did. If there are no aggs
* then the Group node had better compute the final tlist.
*/
if (parse->hasAggs)
group_tlist = flatten_tlist(result_plan->targetlist);
group_tlist = new_unsorted_tlist(result_plan->targetlist);
else
group_tlist = tlist;
@ -1234,7 +1231,10 @@ grouping_planner(Query *parse, double tuple_fraction)
* where the a+b target will be used by the Sort/Group steps, and the
* other targets will be used for computing the final results. (In the
* above example we could theoretically suppress the a and b targets and
* use only a+b, but it's not really worth the trouble.)
* pass down only c,d,a+b, but it's not really worth the trouble to
* eliminate simple var references from the subplan. We will avoid doing
* the extra computation to recompute a+b at the outer level; see
* replace_vars_with_subplan_refs() in setrefs.c.)
*
* 'parse' is the query being processed.
* 'tlist' is the query's target list.

@ -9,7 +9,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/optimizer/plan/setrefs.c,v 1.71 2001/03/22 03:59:37 momjian Exp $
* $Header: /cvsroot/pgsql/src/backend/optimizer/plan/setrefs.c,v 1.72 2001/10/30 19:58:58 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -33,7 +33,8 @@ typedef struct
typedef struct
{
Index subvarno;
List *subplanTargetList;
List *subplan_targetlist;
bool tlist_has_non_vars;
} replace_vars_with_subplan_refs_context;
static void fix_expr_references(Plan *plan, Node *node);
@ -43,7 +44,8 @@ static Node *join_references_mutator(Node *node,
join_references_context *context);
static Node *replace_vars_with_subplan_refs(Node *node,
Index subvarno,
List *subplanTargetList);
List *subplan_targetlist,
bool tlist_has_non_vars);
static Node *replace_vars_with_subplan_refs_mutator(Node *node,
replace_vars_with_subplan_refs_context *context);
static bool fix_opids_walker(Node *node, void *context);
@ -278,75 +280,62 @@ set_join_references(Join *join)
* qual expressions with elements of the subplan's tlist (which was
* generated by flatten_tlist() from these selfsame expressions, so it
* should have all the required variables). There is an important exception,
* however: a GROUP BY expression that is also an output expression will
* have been pushed into the subplan tlist unflattened. We want to detect
* this case and reference the subplan output directly. Therefore, check
* for equality of the whole tlist expression to any subplan element before
* we resort to picking the expression apart for individual Vars.
* however: GROUP BY and ORDER BY expressions will have been pushed into the
* subplan tlist unflattened. If these values are also needed in the output
* then we want to reference the subplan tlist element rather than recomputing
* the expression.
*/
static void
set_uppernode_references(Plan *plan, Index subvarno)
{
Plan *subplan = plan->lefttree;
List *subplanTargetList,
*outputTargetList,
List *subplan_targetlist,
*output_targetlist,
*l;
bool tlist_has_non_vars;
if (subplan != NULL)
subplanTargetList = subplan->targetlist;
subplan_targetlist = subplan->targetlist;
else
subplanTargetList = NIL;
subplan_targetlist = NIL;
outputTargetList = NIL;
/*
* Detect whether subplan tlist has any non-Vars (typically it won't
* because it's been flattened). This allows us to save comparisons
* in common cases.
*/
tlist_has_non_vars = false;
foreach(l, subplan_targetlist)
{
TargetEntry *tle = (TargetEntry *) lfirst(l);
if (tle->expr && ! IsA(tle->expr, Var))
{
tlist_has_non_vars = true;
break;
}
}
output_targetlist = NIL;
foreach(l, plan->targetlist)
{
TargetEntry *tle = (TargetEntry *) lfirst(l);
TargetEntry *subplantle;
Node *newexpr;
subplantle = tlistentry_member(tle->expr, subplanTargetList);
if (subplantle)
{
/* Found a matching subplan output expression */
Resdom *resdom = subplantle->resdom;
Var *newvar;
newvar = makeVar(subvarno,
resdom->resno,
resdom->restype,
resdom->restypmod,
0);
/* If we're just copying a simple Var, copy up original info */
if (subplantle->expr && IsA(subplantle->expr, Var))
{
Var *subvar = (Var *) subplantle->expr;
newvar->varnoold = subvar->varnoold;
newvar->varoattno = subvar->varoattno;
}
else
{
newvar->varnoold = 0;
newvar->varoattno = 0;
}
newexpr = (Node *) newvar;
}
else
{
/* No matching expression, so replace individual Vars */
newexpr = replace_vars_with_subplan_refs(tle->expr,
subvarno,
subplanTargetList);
}
outputTargetList = lappend(outputTargetList,
makeTargetEntry(tle->resdom, newexpr));
newexpr = replace_vars_with_subplan_refs(tle->expr,
subvarno,
subplan_targetlist,
tlist_has_non_vars);
output_targetlist = lappend(output_targetlist,
makeTargetEntry(tle->resdom, newexpr));
}
plan->targetlist = outputTargetList;
plan->targetlist = output_targetlist;
plan->qual = (List *)
replace_vars_with_subplan_refs((Node *) plan->qual,
subvarno,
subplanTargetList);
subplan_targetlist,
tlist_has_non_vars);
}
/*
@ -439,9 +428,16 @@ join_references_mutator(Node *node,
* --- so this routine should only be applied to nodes whose subplans'
* targetlists were generated via flatten_tlist() or some such method.
*
* 'node': the tree to be fixed (a targetlist or qual list)
* If tlist_has_non_vars is true, then we try to match whole subexpressions
* against elements of the subplan tlist, so that we can avoid recomputing
* expressions that were already computed by the subplan. (This is relatively
* expensive, so we don't want to try it in the common case where the
* subplan tlist is just a flattened list of Vars.)
*
* 'node': the tree to be fixed (a target item or qual)
* 'subvarno': varno to be assigned to all Vars
* 'subplanTargetList': target list for subplan
* 'subplan_targetlist': target list for subplan
* 'tlist_has_non_vars': true if subplan_targetlist contains non-Var exprs
*
* The resulting tree is a copy of the original in which all Var nodes have
* varno = subvarno, varattno = resno of corresponding subplan target.
@ -450,12 +446,14 @@ join_references_mutator(Node *node,
static Node *
replace_vars_with_subplan_refs(Node *node,
Index subvarno,
List *subplanTargetList)
List *subplan_targetlist,
bool tlist_has_non_vars)
{
replace_vars_with_subplan_refs_context context;
context.subvarno = subvarno;
context.subplanTargetList = subplanTargetList;
context.subplan_targetlist = subplan_targetlist;
context.tlist_has_non_vars = tlist_has_non_vars;
return replace_vars_with_subplan_refs_mutator(node, &context);
}
@ -468,17 +466,38 @@ replace_vars_with_subplan_refs_mutator(Node *node,
if (IsA(node, Var))
{
Var *var = (Var *) node;
Var *newvar = (Var *) copyObject(var);
Resdom *resdom;
Var *newvar;
resdom = tlist_member((Node *) var, context->subplanTargetList);
resdom = tlist_member((Node *) var, context->subplan_targetlist);
if (!resdom)
elog(ERROR, "replace_vars_with_subplan_refs: variable not in subplan target list");
newvar = (Var *) copyObject(var);
newvar->varno = context->subvarno;
newvar->varattno = resdom->resno;
return (Node *) newvar;
}
/* Try matching more complex expressions too, if tlist has any */
if (context->tlist_has_non_vars)
{
Resdom *resdom;
resdom = tlist_member(node, context->subplan_targetlist);
if (resdom)
{
/* Found a matching subplan output expression */
Var *newvar;
newvar = makeVar(context->subvarno,
resdom->resno,
resdom->restype,
resdom->restypmod,
0);
newvar->varnoold = 0; /* wasn't ever a plain Var */
newvar->varoattno = 0;
return (Node *) newvar;
}
}
return expression_tree_mutator(node,
replace_vars_with_subplan_refs_mutator,
(void *) context);

@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/optimizer/util/clauses.c,v 1.89 2001/10/25 05:49:34 momjian Exp $
* $Header: /cvsroot/pgsql/src/backend/optimizer/util/clauses.c,v 1.90 2001/10/30 19:58:58 tgl Exp $
*
* HISTORY
* AUTHOR DATE MAJOR EVENT
@ -39,12 +39,18 @@
((Node *) makeConst(BOOLOID, 1, (Datum) (val), \
(isnull), true, false, false))
typedef struct
{
Query *query;
List *groupClauses;
} check_subplans_for_ungrouped_vars_context;
static bool contain_agg_clause_walker(Node *node, void *context);
static bool pull_agg_clause_walker(Node *node, List **listptr);
static bool contain_subplans_walker(Node *node, void *context);
static bool pull_subplans_walker(Node *node, List **listptr);
static bool check_subplans_for_ungrouped_vars_walker(Node *node,
Query *context);
check_subplans_for_ungrouped_vars_context *context);
static bool contain_noncachable_functions_walker(Node *node, void *context);
static Node *eval_const_expressions_mutator(Node *node, void *context);
static Expr *simplify_op_or_func(Expr *expr, List *args);
@ -517,36 +523,81 @@ pull_subplans_walker(Node *node, List **listptr)
* planner runs. So we do it here, after we have processed sublinks into
* subplans. This ought to be cleaned up someday.
*
* 'clause' is the expression tree to be searched for subplans.
* 'query' provides the GROUP BY list, the target list that the group clauses
* refer to, and the range table.
* A deficiency in this scheme is that any outer reference var must be
* grouped by itself; we don't recognize groupable expressions within
* subselects. For example, consider
* SELECT
* (SELECT x FROM bar where y = (foo.a + foo.b))
* FROM foo
* GROUP BY a + b;
* This query will be rejected although it could be allowed.
*/
void
check_subplans_for_ungrouped_vars(Node *clause,
Query *query)
check_subplans_for_ungrouped_vars(Query *query)
{
check_subplans_for_ungrouped_vars_context context;
List *gl;
context.query = query;
/*
* No special setup needed; context for walker is just the Query
* pointer
* Build a list of the acceptable GROUP BY expressions for use in
* the walker (to avoid repeated scans of the targetlist within the
* recursive routine).
*/
check_subplans_for_ungrouped_vars_walker(clause, query);
context.groupClauses = NIL;
foreach(gl, query->groupClause)
{
GroupClause *grpcl = lfirst(gl);
Node *expr;
expr = get_sortgroupclause_expr(grpcl, query->targetList);
context.groupClauses = lcons(expr, context.groupClauses);
}
/*
* Recursively scan the targetlist and the HAVING clause.
* WHERE and JOIN/ON conditions are not examined, since they are
* evaluated before grouping.
*/
check_subplans_for_ungrouped_vars_walker((Node *) query->targetList,
&context);
check_subplans_for_ungrouped_vars_walker(query->havingQual,
&context);
freeList(context.groupClauses);
}
static bool
check_subplans_for_ungrouped_vars_walker(Node *node,
Query *context)
check_subplans_for_ungrouped_vars_context *context)
{
List *gl;
if (node == NULL)
return false;
if (IsA(node, Const) || IsA(node, Param))
return false; /* constants are always acceptable */
/*
* If we find an aggregate function, do not recurse into its
* arguments. Subplans invoked within aggregate calls are allowed to
* receive ungrouped variables.
* receive ungrouped variables. (This test and the next one should
* match the logic in parse_agg.c's check_ungrouped_columns().)
*/
if (IsA(node, Aggref))
return false;
/*
* Check to see if subexpression as a whole matches any GROUP BY item.
* We need to do this at every recursion level so that we recognize
* GROUPed-BY expressions before reaching variables within them.
*/
foreach(gl, context->groupClauses)
{
if (equal(node, lfirst(gl)))
return false; /* acceptable, do not descend more */
}
/*
* We can ignore Vars other than in subplan args lists, since the
* parser already checked 'em.
@ -564,7 +615,6 @@ check_subplans_for_ungrouped_vars_walker(Node *node,
Node *thisarg = lfirst(t);
Var *var;
bool contained_in_group_clause;
List *gl;
/*
* We do not care about args that are not local variables;
@ -582,14 +632,9 @@ check_subplans_for_ungrouped_vars_walker(Node *node,
* Else, see if it is a grouping column.
*/
contained_in_group_clause = false;
foreach(gl, context->groupClause)
foreach(gl, context->groupClauses)
{
GroupClause *gcl = lfirst(gl);
Node *groupexpr;
groupexpr = get_sortgroupclause_expr(gcl,
context->targetList);
if (equal(thisarg, groupexpr))
if (equal(thisarg, lfirst(gl)))
{
contained_in_group_clause = true;
break;
@ -603,8 +648,8 @@ check_subplans_for_ungrouped_vars_walker(Node *node,
char *attname;
Assert(var->varno > 0 &&
(int) var->varno <= length(context->rtable));
rte = rt_fetch(var->varno, context->rtable);
(int) var->varno <= length(context->query->rtable));
rte = rt_fetch(var->varno, context->query->rtable);
attname = get_rte_attribute_name(rte, var->varattno);
elog(ERROR, "Sub-SELECT uses un-GROUPed attribute %s.%s from outer query",
rte->eref->relname, attname);

@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $Id: clauses.h,v 1.47 2001/10/28 06:26:07 momjian Exp $
* $Id: clauses.h,v 1.48 2001/10/30 19:58:58 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -44,7 +44,7 @@ extern List *pull_agg_clause(Node *clause);
extern bool contain_subplans(Node *clause);
extern List *pull_subplans(Node *clause);
extern void check_subplans_for_ungrouped_vars(Node *clause, Query *query);
extern void check_subplans_for_ungrouped_vars(Query *query);
extern bool contain_noncachable_functions(Node *clause);