Improve make_subplanTargetList to avoid including Vars unnecessarily.
If a Var was used only in a GROUP BY expression, the previous implementation would include the Var by itself (as well as the expression) in the generated targetlist. This wouldn't affect the efficiency of the scan/join part of the plan at all, but it could result in passing unnecessarily-wide rows through sorting and grouping steps. It turns out to take only a little more code, and not noticeably more time, to generate a tlist without such redundancy, so let's do that. Per a recent gripe from HarmeekSingh Bedi.
This commit is contained in:
parent
1af37ec96d
commit
1bc16a9460
@ -85,6 +85,7 @@ static bool choose_hashed_distinct(PlannerInfo *root,
|
|||||||
double dNumDistinctRows);
|
double dNumDistinctRows);
|
||||||
static List *make_subplanTargetList(PlannerInfo *root, List *tlist,
|
static List *make_subplanTargetList(PlannerInfo *root, List *tlist,
|
||||||
AttrNumber **groupColIdx, bool *need_tlist_eval);
|
AttrNumber **groupColIdx, bool *need_tlist_eval);
|
||||||
|
static int get_grouping_column_index(Query *parse, TargetEntry *tle);
|
||||||
static void locate_grouping_columns(PlannerInfo *root,
|
static void locate_grouping_columns(PlannerInfo *root,
|
||||||
List *tlist,
|
List *tlist,
|
||||||
List *sub_tlist,
|
List *sub_tlist,
|
||||||
@ -2536,14 +2537,9 @@ choose_hashed_distinct(PlannerInfo *root,
|
|||||||
* For example, given a query like
|
* For example, given a query like
|
||||||
* SELECT a+b,SUM(c+d) FROM table GROUP BY a+b;
|
* SELECT a+b,SUM(c+d) FROM table GROUP BY a+b;
|
||||||
* we want to pass this targetlist to the subplan:
|
* we want to pass this targetlist to the subplan:
|
||||||
* a,b,c,d,a+b
|
* a+b,c,d
|
||||||
* where the a+b target will be used by the Sort/Group steps, and the
|
* where the a+b target will be used by the Sort/Group steps, and the
|
||||||
* other targets will be used for computing the final results. (In the
|
* other targets will be used for computing the final results.
|
||||||
* above example we could theoretically suppress the a and b targets and
|
|
||||||
* pass down only c,d,a+b, but it's not really worth the trouble to
|
|
||||||
* eliminate simple var references from the subplan. We will avoid doing
|
|
||||||
* the extra computation to recompute a+b at the outer level; see
|
|
||||||
* fix_upper_expr() in setrefs.c.)
|
|
||||||
*
|
*
|
||||||
* If we are grouping or aggregating, *and* there are no non-Var grouping
|
* If we are grouping or aggregating, *and* there are no non-Var grouping
|
||||||
* expressions, then the returned tlist is effectively dummy; we do not
|
* expressions, then the returned tlist is effectively dummy; we do not
|
||||||
@ -2569,7 +2565,8 @@ make_subplanTargetList(PlannerInfo *root,
|
|||||||
{
|
{
|
||||||
Query *parse = root->parse;
|
Query *parse = root->parse;
|
||||||
List *sub_tlist;
|
List *sub_tlist;
|
||||||
List *extravars;
|
List *non_group_cols;
|
||||||
|
List *non_group_vars;
|
||||||
int numCols;
|
int numCols;
|
||||||
|
|
||||||
*groupColIdx = NULL;
|
*groupColIdx = NULL;
|
||||||
@ -2586,71 +2583,132 @@ make_subplanTargetList(PlannerInfo *root,
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Otherwise, start with a "flattened" tlist (having just the Vars
|
* Otherwise, we must build a tlist containing all grouping columns,
|
||||||
* mentioned in the targetlist and HAVING qual). Note this includes Vars
|
* plus any other Vars mentioned in the targetlist and HAVING qual.
|
||||||
* used in resjunk items, so we are covering the needs of ORDER BY and
|
|
||||||
* window specifications. Vars used within Aggrefs will be pulled out
|
|
||||||
* here, too.
|
|
||||||
*/
|
*/
|
||||||
sub_tlist = flatten_tlist(tlist,
|
sub_tlist = NIL;
|
||||||
PVC_RECURSE_AGGREGATES,
|
non_group_cols = NIL;
|
||||||
PVC_INCLUDE_PLACEHOLDERS);
|
|
||||||
extravars = pull_var_clause(parse->havingQual,
|
|
||||||
PVC_RECURSE_AGGREGATES,
|
|
||||||
PVC_INCLUDE_PLACEHOLDERS);
|
|
||||||
sub_tlist = add_to_flat_tlist(sub_tlist, extravars);
|
|
||||||
list_free(extravars);
|
|
||||||
*need_tlist_eval = false; /* only eval if not flat tlist */
|
*need_tlist_eval = false; /* only eval if not flat tlist */
|
||||||
|
|
||||||
/*
|
|
||||||
* If grouping, create sub_tlist entries for all GROUP BY expressions
|
|
||||||
* (GROUP BY items that are simple Vars should be in the list already),
|
|
||||||
* and make an array showing where the group columns are in the sub_tlist.
|
|
||||||
*/
|
|
||||||
numCols = list_length(parse->groupClause);
|
numCols = list_length(parse->groupClause);
|
||||||
if (numCols > 0)
|
if (numCols > 0)
|
||||||
{
|
{
|
||||||
int keyno = 0;
|
/*
|
||||||
|
* If grouping, create sub_tlist entries for all GROUP BY columns, and
|
||||||
|
* make an array showing where the group columns are in the sub_tlist.
|
||||||
|
*
|
||||||
|
* Note: with this implementation, the array entries will always be
|
||||||
|
* 1..N, but we don't want callers to assume that.
|
||||||
|
*/
|
||||||
AttrNumber *grpColIdx;
|
AttrNumber *grpColIdx;
|
||||||
ListCell *gl;
|
ListCell *tl;
|
||||||
|
|
||||||
grpColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numCols);
|
grpColIdx = (AttrNumber *) palloc0(sizeof(AttrNumber) * numCols);
|
||||||
*groupColIdx = grpColIdx;
|
*groupColIdx = grpColIdx;
|
||||||
|
|
||||||
foreach(gl, parse->groupClause)
|
foreach(tl, tlist)
|
||||||
{
|
{
|
||||||
SortGroupClause *grpcl = (SortGroupClause *) lfirst(gl);
|
TargetEntry *tle = (TargetEntry *) lfirst(tl);
|
||||||
Node *groupexpr = get_sortgroupclause_expr(grpcl, tlist);
|
int colno;
|
||||||
TargetEntry *te;
|
|
||||||
|
|
||||||
/*
|
colno = get_grouping_column_index(parse, tle);
|
||||||
* Find or make a matching sub_tlist entry. If the groupexpr
|
if (colno >= 0)
|
||||||
* isn't a Var, no point in searching. (Note that the parser
|
|
||||||
* won't make multiple groupClause entries for the same TLE.)
|
|
||||||
*/
|
|
||||||
if (groupexpr && IsA(groupexpr, Var))
|
|
||||||
te = tlist_member(groupexpr, sub_tlist);
|
|
||||||
else
|
|
||||||
te = NULL;
|
|
||||||
|
|
||||||
if (!te)
|
|
||||||
{
|
{
|
||||||
te = makeTargetEntry((Expr *) groupexpr,
|
/*
|
||||||
list_length(sub_tlist) + 1,
|
* It's a grouping column, so add it to the result tlist and
|
||||||
NULL,
|
* remember its resno in grpColIdx[].
|
||||||
false);
|
*/
|
||||||
sub_tlist = lappend(sub_tlist, te);
|
TargetEntry *newtle;
|
||||||
*need_tlist_eval = true; /* it's not flat anymore */
|
|
||||||
}
|
|
||||||
|
|
||||||
/* and save its resno */
|
newtle = makeTargetEntry(tle->expr,
|
||||||
grpColIdx[keyno++] = te->resno;
|
list_length(sub_tlist) + 1,
|
||||||
|
NULL,
|
||||||
|
false);
|
||||||
|
sub_tlist = lappend(sub_tlist, newtle);
|
||||||
|
|
||||||
|
Assert(grpColIdx[colno] == 0); /* no dups expected */
|
||||||
|
grpColIdx[colno] = newtle->resno;
|
||||||
|
|
||||||
|
if (!(newtle->expr && IsA(newtle->expr, Var)))
|
||||||
|
*need_tlist_eval = true; /* tlist contains non Vars */
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* Non-grouping column, so just remember the expression
|
||||||
|
* for later call to pull_var_clause. There's no need for
|
||||||
|
* pull_var_clause to examine the TargetEntry node itself.
|
||||||
|
*/
|
||||||
|
non_group_cols = lappend(non_group_cols, tle->expr);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* With no grouping columns, just pass whole tlist to pull_var_clause.
|
||||||
|
* Need (shallow) copy to avoid damaging input tlist below.
|
||||||
|
*/
|
||||||
|
non_group_cols = list_copy(tlist);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If there's a HAVING clause, we'll need the Vars it uses, too.
|
||||||
|
*/
|
||||||
|
if (parse->havingQual)
|
||||||
|
non_group_cols = lappend(non_group_cols, parse->havingQual);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Pull out all the Vars mentioned in non-group cols (plus HAVING), and
|
||||||
|
* add them to the result tlist if not already present. (A Var used
|
||||||
|
* directly as a GROUP BY item will be present already.) Note this
|
||||||
|
* includes Vars used in resjunk items, so we are covering the needs of
|
||||||
|
* ORDER BY and window specifications. Vars used within Aggrefs will be
|
||||||
|
* pulled out here, too.
|
||||||
|
*/
|
||||||
|
non_group_vars = pull_var_clause((Node *) non_group_cols,
|
||||||
|
PVC_RECURSE_AGGREGATES,
|
||||||
|
PVC_INCLUDE_PLACEHOLDERS);
|
||||||
|
sub_tlist = add_to_flat_tlist(sub_tlist, non_group_vars);
|
||||||
|
|
||||||
|
/* clean up cruft */
|
||||||
|
list_free(non_group_vars);
|
||||||
|
list_free(non_group_cols);
|
||||||
|
|
||||||
return sub_tlist;
|
return sub_tlist;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* get_grouping_column_index
|
||||||
|
* Get the GROUP BY column position, if any, of a targetlist entry.
|
||||||
|
*
|
||||||
|
* Returns the index (counting from 0) of the TLE in the GROUP BY list, or -1
|
||||||
|
* if it's not a grouping column. Note: the result is unique because the
|
||||||
|
* parser won't make multiple groupClause entries for the same TLE.
|
||||||
|
*/
|
||||||
|
static int
|
||||||
|
get_grouping_column_index(Query *parse, TargetEntry *tle)
|
||||||
|
{
|
||||||
|
int colno = 0;
|
||||||
|
Index ressortgroupref = tle->ressortgroupref;
|
||||||
|
ListCell *gl;
|
||||||
|
|
||||||
|
/* No need to search groupClause if TLE hasn't got a sortgroupref */
|
||||||
|
if (ressortgroupref == 0)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
foreach(gl, parse->groupClause)
|
||||||
|
{
|
||||||
|
SortGroupClause *grpcl = (SortGroupClause *) lfirst(gl);
|
||||||
|
|
||||||
|
if (grpcl->tleSortGroupRef == ressortgroupref)
|
||||||
|
return colno;
|
||||||
|
colno++;
|
||||||
|
}
|
||||||
|
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* locate_grouping_columns
|
* locate_grouping_columns
|
||||||
* Locate grouping columns in the tlist chosen by create_plan.
|
* Locate grouping columns in the tlist chosen by create_plan.
|
||||||
|
Loading…
x
Reference in New Issue
Block a user