From e2114817c7f7c72c5159ed225fe6832505c2bd5f Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Wed, 22 Jan 2003 00:07:00 +0000 Subject: [PATCH] Implement choice between hash-based and sort-based grouping for doing DISTINCT processing on the output of an IN sub-select. --- src/backend/optimizer/plan/createplan.c | 36 ++++++++--- src/backend/optimizer/util/pathnode.c | 81 +++++++++++++++++++++++-- 2 files changed, 105 insertions(+), 12 deletions(-) diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index b7b1204e76..eb7e922d9a 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -10,12 +10,13 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/createplan.c,v 1.132 2003/01/20 18:54:52 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/createplan.c,v 1.133 2003/01/22 00:07:00 tgl Exp $ * *------------------------------------------------------------------------- */ #include "postgres.h" +#include #include "nodes/makefuncs.h" #include "nodes/nodeFuncs.h" @@ -418,6 +419,7 @@ create_unique_plan(Query *root, UniquePath *best_path) Plan *plan; Plan *subplan; List *sub_targetlist; + List *my_tlist; List *l; subplan = create_plan(root, best_path->subpath); @@ -474,21 +476,39 @@ create_unique_plan(Query *root, UniquePath *best_path) subplan->targetlist = newtlist; } + my_tlist = new_unsorted_tlist(subplan->targetlist); + if (best_path->use_hash) { - elog(ERROR, "create_unique_plan: hash case not implemented yet"); - plan = NULL; + int numGroupCols = length(my_tlist); + long numGroups; + AttrNumber *groupColIdx; + int i; + + numGroups = (long) Min(best_path->rows, (double) LONG_MAX); + + groupColIdx = (AttrNumber *) palloc(numGroupCols * sizeof(AttrNumber)); + for (i = 0; i < numGroupCols; i++) + groupColIdx[i] = i+1; + + plan = (Plan *) make_agg(root, + my_tlist, + NIL, + AGG_HASHED, + numGroupCols, + groupColIdx, + numGroups, + 0, + subplan); } else { - List *sort_tlist; List *sortList; - sort_tlist = new_unsorted_tlist(subplan->targetlist); - sortList = addAllTargetsToSortList(NIL, sort_tlist); - plan = (Plan *) make_sort_from_sortclauses(root, sort_tlist, + sortList = addAllTargetsToSortList(NIL, my_tlist); + plan = (Plan *) make_sort_from_sortclauses(root, my_tlist, subplan, sortList); - plan = (Plan *) make_unique(sort_tlist, plan, sortList); + plan = (Plan *) make_unique(my_tlist, plan, sortList); } plan->plan_rows = best_path->rows; diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index a5cc94e831..3e8d37cb28 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/util/pathnode.c,v 1.84 2003/01/20 18:54:56 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/util/pathnode.c,v 1.85 2003/01/22 00:07:00 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -16,14 +16,22 @@ #include +#include "catalog/pg_operator.h" #include "executor/executor.h" +#include "miscadmin.h" #include "nodes/plannodes.h" #include "optimizer/cost.h" #include "optimizer/pathnode.h" #include "optimizer/paths.h" #include "optimizer/restrictinfo.h" +#include "parser/parse_expr.h" +#include "parser/parse_oper.h" #include "utils/memutils.h" #include "utils/selfuncs.h" +#include "utils/syscache.h" + + +static bool hash_safe_tlist(List *tlist); /***************************************************************************** @@ -506,6 +514,7 @@ create_unique_path(Query *root, RelOptInfo *rel, Path *subpath) { UniquePath *pathnode; Path sort_path; /* dummy for result of cost_sort */ + Path agg_path; /* dummy for result of cost_agg */ MemoryContext oldcontext; List *sub_targetlist; List *l; @@ -587,16 +596,80 @@ create_unique_path(Query *root, RelOptInfo *rel, Path *subpath) */ sort_path.total_cost += cpu_operator_cost * rel->rows * numCols; - pathnode->use_hash = false; /* for now */ + /* + * Is it safe to use a hashed implementation? If so, estimate and + * compare costs. We only try this if we know the targetlist for + * sure (else we can't be sure about the datatypes involved). + */ + pathnode->use_hash = false; + if (enable_hashagg && sub_targetlist && hash_safe_tlist(sub_targetlist)) + { + /* + * Estimate the overhead per hashtable entry at 64 bytes (same + * as in planner.c). + */ + int hashentrysize = rel->width + 64; - pathnode->path.startup_cost = sort_path.startup_cost; - pathnode->path.total_cost = sort_path.total_cost; + if (hashentrysize * pathnode->rows <= SortMem * 1024L) + { + cost_agg(&agg_path, root, + AGG_HASHED, 0, + numCols, pathnode->rows, + subpath->startup_cost, + subpath->total_cost, + rel->rows); + if (agg_path.total_cost < sort_path.total_cost) + pathnode->use_hash = true; + } + } + + if (pathnode->use_hash) + { + pathnode->path.startup_cost = agg_path.startup_cost; + pathnode->path.total_cost = agg_path.total_cost; + } + else + { + pathnode->path.startup_cost = sort_path.startup_cost; + pathnode->path.total_cost = sort_path.total_cost; + } rel->cheapest_unique_path = (Path *) pathnode; return pathnode; } +/* + * hash_safe_tlist - can datatypes of given tlist be hashed? + * + * We assume hashed aggregation will work if the datatype's equality operator + * is marked hashjoinable. + * + * XXX this probably should be somewhere else. See also hash_safe_grouping + * in plan/planner.c. + */ +static bool +hash_safe_tlist(List *tlist) +{ + List *tl; + + foreach(tl, tlist) + { + Node *expr = (Node *) lfirst(tl); + Operator optup; + bool oprcanhash; + + optup = equality_oper(exprType(expr), true); + if (!optup) + return false; + oprcanhash = ((Form_pg_operator) GETSTRUCT(optup))->oprcanhash; + ReleaseSysCache(optup); + if (!oprcanhash) + return false; + } + return true; +} + /* * create_subqueryscan_path * Creates a path corresponding to a sequential scan of a subquery,