From 1afac12910b82cf68d064284391209f41a6adcf9 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Fri, 10 Jan 2003 23:54:24 +0000 Subject: [PATCH] Create a new file executor/execGrouping.c to centralize utility routines shared by nodeGroup, nodeAgg, and soon nodeSubplan. --- src/backend/executor/Makefile | 4 +- src/backend/executor/execGrouping.c | 369 ++++++++++++++++++++++++++++ src/backend/executor/nodeAgg.c | 155 ++++-------- src/backend/executor/nodeGroup.c | 120 +-------- src/backend/executor/nodeHash.c | 74 +----- src/backend/executor/nodeSetOp.c | 4 +- src/backend/executor/nodeUnique.c | 4 +- src/include/executor/executor.h | 27 +- src/include/executor/nodeAgg.h | 4 +- src/include/executor/nodeGroup.h | 13 +- src/include/executor/nodeHash.h | 5 +- src/include/nodes/execnodes.h | 56 ++++- 12 files changed, 498 insertions(+), 337 deletions(-) create mode 100644 src/backend/executor/execGrouping.c diff --git a/src/backend/executor/Makefile b/src/backend/executor/Makefile index b875259bc1..7e3f5d2d2c 100644 --- a/src/backend/executor/Makefile +++ b/src/backend/executor/Makefile @@ -4,7 +4,7 @@ # Makefile for executor # # IDENTIFICATION -# $Header: /cvsroot/pgsql/src/backend/executor/Makefile,v 1.19 2002/05/12 23:43:02 tgl Exp $ +# $Header: /cvsroot/pgsql/src/backend/executor/Makefile,v 1.20 2003/01/10 23:54:24 tgl Exp $ # #------------------------------------------------------------------------- @@ -12,7 +12,7 @@ subdir = src/backend/executor top_builddir = ../../.. include $(top_builddir)/src/Makefile.global -OBJS = execAmi.o execJunk.o execMain.o \ +OBJS = execAmi.o execGrouping.o execJunk.o execMain.o \ execProcnode.o execQual.o execScan.o execTuples.o \ execUtils.o functions.o instrument.o nodeAppend.o nodeAgg.o nodeHash.o \ nodeHashjoin.o nodeIndexscan.o nodeMaterial.o nodeMergejoin.o \ diff --git a/src/backend/executor/execGrouping.c b/src/backend/executor/execGrouping.c new file mode 100644 index 0000000000..e3f7720ca7 --- /dev/null +++ b/src/backend/executor/execGrouping.c @@ -0,0 +1,369 @@ +/*------------------------------------------------------------------------- + * + * execGrouping.c + * executor utility routines for grouping, hashing, and aggregation + * + * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * $Header: /cvsroot/pgsql/src/backend/executor/execGrouping.c,v 1.1 2003/01/10 23:54:24 tgl Exp $ + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/hash.h" +#include "access/heapam.h" +#include "executor/executor.h" +#include "parser/parse_oper.h" +#include "utils/memutils.h" + + +/***************************************************************************** + * Utility routines for grouping tuples together + * + * These routines actually implement SQL's notion of "distinct/not distinct". + * Two tuples match if they are not distinct in all the compared columns, + * i.e., the column values are either both null, or both non-null and equal. + *****************************************************************************/ + +/* + * execTuplesMatch + * Return true if two tuples match in all the indicated fields. + * This is used to detect group boundaries in nodeGroup and nodeAgg, + * and to decide whether two tuples are distinct or not in nodeUnique. + * + * tuple1, tuple2: the tuples to compare + * tupdesc: tuple descriptor applying to both tuples + * numCols: the number of attributes to be examined + * matchColIdx: array of attribute column numbers + * eqFunctions: array of fmgr lookup info for the equality functions to use + * evalContext: short-term memory context for executing the functions + * + * NB: evalContext is reset each time! + */ +bool +execTuplesMatch(HeapTuple tuple1, + HeapTuple tuple2, + TupleDesc tupdesc, + int numCols, + AttrNumber *matchColIdx, + FmgrInfo *eqfunctions, + MemoryContext evalContext) +{ + MemoryContext oldContext; + bool result; + int i; + + /* Reset and switch into the temp context. */ + MemoryContextReset(evalContext); + oldContext = MemoryContextSwitchTo(evalContext); + + /* + * We cannot report a match without checking all the fields, but we + * can report a non-match as soon as we find unequal fields. So, + * start comparing at the last field (least significant sort key). + * That's the most likely to be different if we are dealing with + * sorted input. + */ + result = true; + + for (i = numCols; --i >= 0;) + { + AttrNumber att = matchColIdx[i]; + Datum attr1, + attr2; + bool isNull1, + isNull2; + + attr1 = heap_getattr(tuple1, + att, + tupdesc, + &isNull1); + + attr2 = heap_getattr(tuple2, + att, + tupdesc, + &isNull2); + + if (isNull1 != isNull2) + { + result = false; /* one null and one not; they aren't equal */ + break; + } + + if (isNull1) + continue; /* both are null, treat as equal */ + + /* Apply the type-specific equality function */ + + if (!DatumGetBool(FunctionCall2(&eqfunctions[i], + attr1, attr2))) + { + result = false; /* they aren't equal */ + break; + } + } + + MemoryContextSwitchTo(oldContext); + + return result; +} + + +/* + * execTuplesMatchPrepare + * Look up the equality functions needed for execTuplesMatch. + * The result is a palloc'd array. + */ +FmgrInfo * +execTuplesMatchPrepare(TupleDesc tupdesc, + int numCols, + AttrNumber *matchColIdx) +{ + FmgrInfo *eqfunctions = (FmgrInfo *) palloc(numCols * sizeof(FmgrInfo)); + int i; + + for (i = 0; i < numCols; i++) + { + AttrNumber att = matchColIdx[i]; + Oid typid = tupdesc->attrs[att - 1]->atttypid; + Oid eq_function; + + eq_function = equality_oper_funcid(typid); + fmgr_info(eq_function, &eqfunctions[i]); + } + + return eqfunctions; +} + + +/***************************************************************************** + * Utility routines for hashing + *****************************************************************************/ + +/* + * ComputeHashFunc + * + * the hash function for hash joins (also used for hash aggregation) + * + * XXX this probably ought to be replaced with datatype-specific + * hash functions, such as those already implemented for hash indexes. + */ +uint32 +ComputeHashFunc(Datum key, int typLen, bool byVal) +{ + unsigned char *k; + + if (byVal) + { + /* + * If it's a by-value data type, just hash the whole Datum value. + * This assumes that datatypes narrower than Datum are + * consistently padded (either zero-extended or sign-extended, but + * not random bits) to fill Datum; see the XXXGetDatum macros in + * postgres.h. NOTE: it would not work to do hash_any(&key, len) + * since this would get the wrong bytes on a big-endian machine. + */ + k = (unsigned char *) &key; + typLen = sizeof(Datum); + } + else + { + if (typLen > 0) + { + /* fixed-width pass-by-reference type */ + k = (unsigned char *) DatumGetPointer(key); + } + else if (typLen == -1) + { + /* + * It's a varlena type, so 'key' points to a "struct varlena". + * NOTE: VARSIZE returns the "real" data length plus the + * sizeof the "vl_len" attribute of varlena (the length + * information). 'key' points to the beginning of the varlena + * struct, so we have to use "VARDATA" to find the beginning + * of the "real" data. Also, we have to be careful to detoast + * the datum if it's toasted. (We don't worry about freeing + * the detoasted copy; that happens for free when the + * per-tuple memory context is reset in ExecHashGetBucket.) + */ + struct varlena *vkey = PG_DETOAST_DATUM(key); + + typLen = VARSIZE(vkey) - VARHDRSZ; + k = (unsigned char *) VARDATA(vkey); + } + else if (typLen == -2) + { + /* It's a null-terminated C string */ + typLen = strlen(DatumGetCString(key)) + 1; + k = (unsigned char *) DatumGetPointer(key); + } + else + { + elog(ERROR, "ComputeHashFunc: Invalid typLen %d", typLen); + k = NULL; /* keep compiler quiet */ + } + } + + return DatumGetUInt32(hash_any(k, typLen)); +} + + +/***************************************************************************** + * Utility routines for all-in-memory hash tables + * + * These routines build hash tables for grouping tuples together (eg, for + * hash aggregation). There is one entry for each not-distinct set of tuples + * presented. + *****************************************************************************/ + +/* + * Construct an empty TupleHashTable + * + * numCols, keyColIdx: identify the tuple fields to use as lookup key + * eqfunctions: equality comparison functions to use + * nbuckets: number of buckets to make + * entrysize: size of each entry (at least sizeof(TupleHashEntryData)) + * tablecxt: memory context in which to store table and table entries + * tempcxt: short-lived context for evaluation hash and comparison functions + * + * The eqfunctions array may be made with execTuplesMatchPrepare(). + * + * Note that keyColIdx and eqfunctions must be allocated in storage that + * will live as long as the hashtable does. + */ +TupleHashTable +BuildTupleHashTable(int numCols, AttrNumber *keyColIdx, + FmgrInfo *eqfunctions, + int nbuckets, Size entrysize, + MemoryContext tablecxt, MemoryContext tempcxt) +{ + TupleHashTable hashtable; + Size tabsize; + + Assert(nbuckets > 0); + Assert(entrysize >= sizeof(TupleHashEntryData)); + + tabsize = sizeof(TupleHashTableData) + + (nbuckets - 1) * sizeof(TupleHashEntry); + hashtable = (TupleHashTable) MemoryContextAllocZero(tablecxt, tabsize); + + hashtable->numCols = numCols; + hashtable->keyColIdx = keyColIdx; + hashtable->eqfunctions = eqfunctions; + hashtable->tablecxt = tablecxt; + hashtable->tempcxt = tempcxt; + hashtable->entrysize = entrysize; + hashtable->nbuckets = nbuckets; + + return hashtable; +} + +/* + * Find or create a hashtable entry for the tuple group containing the + * given tuple. + * + * On return, *isnew is true if the entry is newly created, false if it + * existed already. Any extra space in a new entry has been zeroed. + */ +TupleHashEntry +LookupTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot, + bool *isnew) +{ + int numCols = hashtable->numCols; + AttrNumber *keyColIdx = hashtable->keyColIdx; + HeapTuple tuple = slot->val; + TupleDesc tupdesc = slot->ttc_tupleDescriptor; + uint32 hashkey = 0; + int i; + int bucketno; + TupleHashEntry entry; + MemoryContext oldContext; + + /* Need to run the hash function in short-lived context */ + oldContext = MemoryContextSwitchTo(hashtable->tempcxt); + + for (i = 0; i < numCols; i++) + { + AttrNumber att = keyColIdx[i]; + Datum attr; + bool isNull; + + /* rotate hashkey left 1 bit at each step */ + hashkey = (hashkey << 1) | ((hashkey & 0x80000000) ? 1 : 0); + + attr = heap_getattr(tuple, att, tupdesc, &isNull); + if (isNull) + continue; /* treat nulls as having hash key 0 */ + hashkey ^= ComputeHashFunc(attr, + (int) tupdesc->attrs[att - 1]->attlen, + tupdesc->attrs[att - 1]->attbyval); + } + bucketno = hashkey % (uint32) hashtable->nbuckets; + + for (entry = hashtable->buckets[bucketno]; + entry != NULL; + entry = entry->next) + { + /* Quick check using hashkey */ + if (entry->hashkey != hashkey) + continue; + if (execTuplesMatch(entry->firstTuple, + tuple, + tupdesc, + numCols, keyColIdx, + hashtable->eqfunctions, + hashtable->tempcxt)) + { + MemoryContextSwitchTo(oldContext); + *isnew = false; + return entry; + } + } + + /* Not there, so build a new one */ + MemoryContextSwitchTo(hashtable->tablecxt); + + entry = (TupleHashEntry) palloc0(hashtable->entrysize); + + entry->hashkey = hashkey; + entry->firstTuple = heap_copytuple(tuple); + + entry->next = hashtable->buckets[bucketno]; + hashtable->buckets[bucketno] = entry; + + MemoryContextSwitchTo(oldContext); + + *isnew = true; + + return entry; +} + +/* + * Walk through all the entries of a hash table, in no special order. + * Returns NULL when no more entries remain. + * + * Iterator state must be initialized with ResetTupleHashIterator() macro. + */ +TupleHashEntry +ScanTupleHashTable(TupleHashTable hashtable, TupleHashIterator *state) +{ + TupleHashEntry entry; + + entry = state->next_entry; + while (entry == NULL) + { + if (state->next_bucket >= hashtable->nbuckets) + { + /* No more entries in hashtable, so done */ + return NULL; + } + entry = hashtable->buckets[state->next_bucket++]; + } + state->next_entry = entry->next; + + return entry; +} diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c index 769e88a839..d8eeae15ad 100644 --- a/src/backend/executor/nodeAgg.c +++ b/src/backend/executor/nodeAgg.c @@ -45,7 +45,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/executor/nodeAgg.c,v 1.101 2002/12/15 16:17:46 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/executor/nodeAgg.c,v 1.102 2003/01/10 23:54:24 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -57,8 +57,6 @@ #include "catalog/pg_operator.h" #include "executor/executor.h" #include "executor/nodeAgg.h" -#include "executor/nodeGroup.h" -#include "executor/nodeHash.h" #include "miscadmin.h" #include "optimizer/clauses.h" #include "parser/parse_coerce.h" @@ -182,21 +180,15 @@ typedef struct AggStatePerGroupData * distinct set of GROUP BY column values. We compute the hash key from * the GROUP BY columns. */ +typedef struct AggHashEntryData *AggHashEntry; + typedef struct AggHashEntryData { - AggHashEntry next; /* next entry in same hash bucket */ - uint32 hashkey; /* exact hash key of this entry */ - HeapTuple firstTuple; /* copy of first tuple in this group */ + TupleHashEntryData shared; /* common header for hash table entries */ /* per-aggregate transition status array - must be last! */ AggStatePerGroupData pergroup[1]; /* VARIABLE LENGTH ARRAY */ } AggHashEntryData; /* VARIABLE LENGTH STRUCT */ -typedef struct AggHashTableData -{ - int nbuckets; /* number of buckets in hash table */ - AggHashEntry buckets[1]; /* VARIABLE LENGTH ARRAY */ -} AggHashTableData; /* VARIABLE LENGTH STRUCT */ - static void initialize_aggregates(AggState *aggstate, AggStatePerAgg peragg, @@ -578,18 +570,22 @@ static void build_hash_table(AggState *aggstate) { Agg *node = (Agg *) aggstate->ss.ps.plan; - AggHashTable hashtable; - Size tabsize; + MemoryContext tmpmem = aggstate->tmpcontext->ecxt_per_tuple_memory; + Size entrysize; Assert(node->aggstrategy == AGG_HASHED); Assert(node->numGroups > 0); - tabsize = sizeof(AggHashTableData) + - (node->numGroups - 1) * sizeof(AggHashEntry); - hashtable = (AggHashTable) MemoryContextAlloc(aggstate->aggcontext, - tabsize); - MemSet(hashtable, 0, tabsize); - hashtable->nbuckets = node->numGroups; - aggstate->hashtable = hashtable; + + entrysize = sizeof(AggHashEntryData) + + (aggstate->numaggs - 1) * sizeof(AggStatePerGroupData); + + aggstate->hashtable = BuildTupleHashTable(node->numCols, + node->grpColIdx, + aggstate->eqfunctions, + node->numGroups, + entrysize, + aggstate->aggcontext, + tmpmem); } /* @@ -601,74 +597,18 @@ build_hash_table(AggState *aggstate) static AggHashEntry lookup_hash_entry(AggState *aggstate, TupleTableSlot *slot) { - Agg *node = (Agg *) aggstate->ss.ps.plan; - AggHashTable hashtable = aggstate->hashtable; - MemoryContext tmpmem = aggstate->tmpcontext->ecxt_per_tuple_memory; - HeapTuple tuple = slot->val; - TupleDesc tupdesc = slot->ttc_tupleDescriptor; - uint32 hashkey = 0; - int i; - int bucketno; - AggHashEntry entry; - MemoryContext oldContext; - Size entrysize; + AggHashEntry entry; + bool isnew; - /* Need to run the hash function in short-lived context */ - oldContext = MemoryContextSwitchTo(tmpmem); + entry = (AggHashEntry) LookupTupleHashEntry(aggstate->hashtable, + slot, + &isnew); - for (i = 0; i < node->numCols; i++) + if (isnew) { - AttrNumber att = node->grpColIdx[i]; - Datum attr; - bool isNull; - - /* rotate hashkey left 1 bit at each step */ - hashkey = (hashkey << 1) | ((hashkey & 0x80000000) ? 1 : 0); - - attr = heap_getattr(tuple, att, tupdesc, &isNull); - if (isNull) - continue; /* treat nulls as having hash key 0 */ - hashkey ^= ComputeHashFunc(attr, - (int) tupdesc->attrs[att - 1]->attlen, - tupdesc->attrs[att - 1]->attbyval); + /* initialize aggregates for new tuple group */ + initialize_aggregates(aggstate, aggstate->peragg, entry->pergroup); } - bucketno = hashkey % (uint32) hashtable->nbuckets; - - for (entry = hashtable->buckets[bucketno]; - entry != NULL; - entry = entry->next) - { - /* Quick check using hashkey */ - if (entry->hashkey != hashkey) - continue; - if (execTuplesMatch(entry->firstTuple, - tuple, - tupdesc, - node->numCols, node->grpColIdx, - aggstate->eqfunctions, - tmpmem)) - { - MemoryContextSwitchTo(oldContext); - return entry; - } - } - - /* Not there, so build a new one */ - MemoryContextSwitchTo(aggstate->aggcontext); - entrysize = sizeof(AggHashEntryData) + - (aggstate->numaggs - 1) * sizeof(AggStatePerGroupData); - entry = (AggHashEntry) palloc0(entrysize); - - entry->hashkey = hashkey; - entry->firstTuple = heap_copytuple(tuple); - - entry->next = hashtable->buckets[bucketno]; - hashtable->buckets[bucketno] = entry; - - MemoryContextSwitchTo(oldContext); - - /* initialize aggregates for new tuple group */ - initialize_aggregates(aggstate, aggstate->peragg, entry->pergroup); return entry; } @@ -964,8 +904,7 @@ agg_fill_hash_table(AggState *aggstate) aggstate->table_filled = true; /* Initialize to walk the hash table */ - aggstate->next_hash_entry = NULL; - aggstate->next_hash_bucket = 0; + ResetTupleHashIterator(&aggstate->hashiter); } /* @@ -980,7 +919,7 @@ agg_retrieve_hash_table(AggState *aggstate) bool *aggnulls; AggStatePerAgg peragg; AggStatePerGroup pergroup; - AggHashTable hashtable; + TupleHashTable hashtable; AggHashEntry entry; TupleTableSlot *firstSlot; TupleTableSlot *resultSlot; @@ -1010,18 +949,14 @@ agg_retrieve_hash_table(AggState *aggstate) /* * Find the next entry in the hash table */ - entry = aggstate->next_hash_entry; - while (entry == NULL) + entry = (AggHashEntry) ScanTupleHashTable(hashtable, + &aggstate->hashiter); + if (entry == NULL) { - if (aggstate->next_hash_bucket >= hashtable->nbuckets) - { - /* No more entries in hashtable, so done */ - aggstate->agg_done = TRUE; - return NULL; - } - entry = hashtable->buckets[aggstate->next_hash_bucket++]; + /* No more entries in hashtable, so done */ + aggstate->agg_done = TRUE; + return NULL; } - aggstate->next_hash_entry = entry->next; /* * Clear the per-output-tuple context for each group @@ -1032,7 +967,7 @@ agg_retrieve_hash_table(AggState *aggstate) * Store the copied first input tuple in the tuple table slot * reserved for it, so that it can be used in ExecProject. */ - ExecStoreTuple(entry->firstTuple, + ExecStoreTuple(entry->shared.firstTuple, firstSlot, InvalidBuffer, false); @@ -1187,6 +1122,17 @@ ExecInitAgg(Agg *node, EState *estate) numaggs = 1; } + /* + * If we are grouping, precompute fmgr lookup data for inner loop + */ + if (node->numCols > 0) + { + aggstate->eqfunctions = + execTuplesMatchPrepare(ExecGetScanType(&aggstate->ss), + node->numCols, + node->grpColIdx); + } + /* * Set up aggregate-result storage in the output expr context, and also * allocate my private per-agg working storage @@ -1211,17 +1157,6 @@ ExecInitAgg(Agg *node, EState *estate) aggstate->pergroup = pergroup; } - /* - * If we are grouping, precompute fmgr lookup data for inner loop - */ - if (node->numCols > 0) - { - aggstate->eqfunctions = - execTuplesMatchPrepare(ExecGetScanType(&aggstate->ss), - node->numCols, - node->grpColIdx); - } - /* * Perform lookups of aggregate function info, and initialize the * unchanging fields of the per-agg data diff --git a/src/backend/executor/nodeGroup.c b/src/backend/executor/nodeGroup.c index 58f6c1b34e..b480e388a2 100644 --- a/src/backend/executor/nodeGroup.c +++ b/src/backend/executor/nodeGroup.c @@ -15,7 +15,7 @@ * locate group boundaries. * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/executor/nodeGroup.c,v 1.53 2002/12/15 16:17:46 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/executor/nodeGroup.c,v 1.54 2003/01/10 23:54:24 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -23,13 +23,8 @@ #include "postgres.h" #include "access/heapam.h" -#include "catalog/pg_operator.h" #include "executor/executor.h" #include "executor/nodeGroup.h" -#include "parser/parse_oper.h" -#include "utils/builtins.h" -#include "utils/lsyscache.h" -#include "utils/syscache.h" /* @@ -241,116 +236,3 @@ ExecReScanGroup(GroupState *node, ExprContext *exprCtxt) ((PlanState *) node)->lefttree->chgParam == NULL) ExecReScan(((PlanState *) node)->lefttree, exprCtxt); } - -/***************************************************************************** - * Code shared with nodeUnique.c and nodeAgg.c - *****************************************************************************/ - -/* - * execTuplesMatch - * Return true if two tuples match in all the indicated fields. - * This is used to detect group boundaries in nodeGroup and nodeAgg, - * and to decide whether two tuples are distinct or not in nodeUnique. - * - * tuple1, tuple2: the tuples to compare - * tupdesc: tuple descriptor applying to both tuples - * numCols: the number of attributes to be examined - * matchColIdx: array of attribute column numbers - * eqFunctions: array of fmgr lookup info for the equality functions to use - * evalContext: short-term memory context for executing the functions - * - * NB: evalContext is reset each time! - */ -bool -execTuplesMatch(HeapTuple tuple1, - HeapTuple tuple2, - TupleDesc tupdesc, - int numCols, - AttrNumber *matchColIdx, - FmgrInfo *eqfunctions, - MemoryContext evalContext) -{ - MemoryContext oldContext; - bool result; - int i; - - /* Reset and switch into the temp context. */ - MemoryContextReset(evalContext); - oldContext = MemoryContextSwitchTo(evalContext); - - /* - * We cannot report a match without checking all the fields, but we - * can report a non-match as soon as we find unequal fields. So, - * start comparing at the last field (least significant sort key). - * That's the most likely to be different if we are dealing with - * sorted input. - */ - result = true; - - for (i = numCols; --i >= 0;) - { - AttrNumber att = matchColIdx[i]; - Datum attr1, - attr2; - bool isNull1, - isNull2; - - attr1 = heap_getattr(tuple1, - att, - tupdesc, - &isNull1); - - attr2 = heap_getattr(tuple2, - att, - tupdesc, - &isNull2); - - if (isNull1 != isNull2) - { - result = false; /* one null and one not; they aren't equal */ - break; - } - - if (isNull1) - continue; /* both are null, treat as equal */ - - /* Apply the type-specific equality function */ - - if (!DatumGetBool(FunctionCall2(&eqfunctions[i], - attr1, attr2))) - { - result = false; /* they aren't equal */ - break; - } - } - - MemoryContextSwitchTo(oldContext); - - return result; -} - -/* - * execTuplesMatchPrepare - * Look up the equality functions needed for execTuplesMatch. - * The result is a palloc'd array. - */ -FmgrInfo * -execTuplesMatchPrepare(TupleDesc tupdesc, - int numCols, - AttrNumber *matchColIdx) -{ - FmgrInfo *eqfunctions = (FmgrInfo *) palloc(numCols * sizeof(FmgrInfo)); - int i; - - for (i = 0; i < numCols; i++) - { - AttrNumber att = matchColIdx[i]; - Oid typid = tupdesc->attrs[att - 1]->atttypid; - Oid eq_function; - - eq_function = equality_oper_funcid(typid); - fmgr_info(eq_function, &eqfunctions[i]); - } - - return eqfunctions; -} diff --git a/src/backend/executor/nodeHash.c b/src/backend/executor/nodeHash.c index bea8963099..31152a3d85 100644 --- a/src/backend/executor/nodeHash.c +++ b/src/backend/executor/nodeHash.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/executor/nodeHash.c,v 1.73 2002/12/30 15:21:18 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/executor/nodeHash.c,v 1.74 2003/01/10 23:54:24 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -20,10 +20,6 @@ */ #include "postgres.h" -#include -#include - -#include "access/hash.h" #include "executor/execdebug.h" #include "executor/nodeHash.h" #include "executor/nodeHashjoin.h" @@ -642,74 +638,6 @@ ExecScanHashBucket(HashJoinState *hjstate, return NULL; } -/* ---------------------------------------------------------------- - * ComputeHashFunc - * - * the hash function for hash joins (also used for hash aggregation) - * - * XXX this probably ought to be replaced with datatype-specific - * hash functions, such as those already implemented for hash indexes. - * ---------------------------------------------------------------- - */ -uint32 -ComputeHashFunc(Datum key, int typLen, bool byVal) -{ - unsigned char *k; - - if (byVal) - { - /* - * If it's a by-value data type, just hash the whole Datum value. - * This assumes that datatypes narrower than Datum are - * consistently padded (either zero-extended or sign-extended, but - * not random bits) to fill Datum; see the XXXGetDatum macros in - * postgres.h. NOTE: it would not work to do hash_any(&key, len) - * since this would get the wrong bytes on a big-endian machine. - */ - k = (unsigned char *) &key; - typLen = sizeof(Datum); - } - else - { - if (typLen > 0) - { - /* fixed-width pass-by-reference type */ - k = (unsigned char *) DatumGetPointer(key); - } - else if (typLen == -1) - { - /* - * It's a varlena type, so 'key' points to a "struct varlena". - * NOTE: VARSIZE returns the "real" data length plus the - * sizeof the "vl_len" attribute of varlena (the length - * information). 'key' points to the beginning of the varlena - * struct, so we have to use "VARDATA" to find the beginning - * of the "real" data. Also, we have to be careful to detoast - * the datum if it's toasted. (We don't worry about freeing - * the detoasted copy; that happens for free when the - * per-tuple memory context is reset in ExecHashGetBucket.) - */ - struct varlena *vkey = PG_DETOAST_DATUM(key); - - typLen = VARSIZE(vkey) - VARHDRSZ; - k = (unsigned char *) VARDATA(vkey); - } - else if (typLen == -2) - { - /* It's a null-terminated C string */ - typLen = strlen(DatumGetCString(key)) + 1; - k = (unsigned char *) DatumGetPointer(key); - } - else - { - elog(ERROR, "ComputeHashFunc: Invalid typLen %d", typLen); - k = NULL; /* keep compiler quiet */ - } - } - - return DatumGetUInt32(hash_any(k, typLen)); -} - /* ---------------------------------------------------------------- * ExecHashTableReset * diff --git a/src/backend/executor/nodeSetOp.c b/src/backend/executor/nodeSetOp.c index 965a2a6466..3946cd0024 100644 --- a/src/backend/executor/nodeSetOp.c +++ b/src/backend/executor/nodeSetOp.c @@ -21,7 +21,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/executor/nodeSetOp.c,v 1.8 2002/12/15 16:17:46 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/executor/nodeSetOp.c,v 1.9 2003/01/10 23:54:24 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -36,9 +36,9 @@ #include "access/heapam.h" #include "executor/executor.h" -#include "executor/nodeGroup.h" #include "executor/nodeSetOp.h" + /* ---------------------------------------------------------------- * ExecSetOp * ---------------------------------------------------------------- diff --git a/src/backend/executor/nodeUnique.c b/src/backend/executor/nodeUnique.c index 415594f92c..7a0ccb0b14 100644 --- a/src/backend/executor/nodeUnique.c +++ b/src/backend/executor/nodeUnique.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/executor/nodeUnique.c,v 1.36 2002/12/15 16:17:46 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/executor/nodeUnique.c,v 1.37 2003/01/10 23:54:24 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -27,9 +27,9 @@ #include "access/heapam.h" #include "executor/executor.h" -#include "executor/nodeGroup.h" #include "executor/nodeUnique.h" + /* ---------------------------------------------------------------- * ExecUnique * diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h index 31cc210753..fb300fc044 100644 --- a/src/include/executor/executor.h +++ b/src/include/executor/executor.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: executor.h,v 1.85 2002/12/15 21:01:34 tgl Exp $ + * $Id: executor.h,v 1.86 2003/01/10 23:54:24 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -36,6 +36,31 @@ extern void ExecMarkPos(PlanState *node); extern void ExecRestrPos(PlanState *node); extern bool ExecSupportsMarkRestore(NodeTag plantype); +/* + * prototypes from functions in execGrouping.c + */ +extern bool execTuplesMatch(HeapTuple tuple1, + HeapTuple tuple2, + TupleDesc tupdesc, + int numCols, + AttrNumber *matchColIdx, + FmgrInfo *eqfunctions, + MemoryContext evalContext); +extern FmgrInfo *execTuplesMatchPrepare(TupleDesc tupdesc, + int numCols, + AttrNumber *matchColIdx); +extern uint32 ComputeHashFunc(Datum key, int typLen, bool byVal); +extern TupleHashTable BuildTupleHashTable(int numCols, AttrNumber *keyColIdx, + FmgrInfo *eqfunctions, + int nbuckets, Size entrysize, + MemoryContext tablecxt, + MemoryContext tempcxt); +extern TupleHashEntry LookupTupleHashEntry(TupleHashTable hashtable, + TupleTableSlot *slot, + bool *isnew); +extern TupleHashEntry ScanTupleHashTable(TupleHashTable hashtable, + TupleHashIterator *state); + /* * prototypes from functions in execJunk.c */ diff --git a/src/include/executor/nodeAgg.h b/src/include/executor/nodeAgg.h index 036d67ccaa..a2817306da 100644 --- a/src/include/executor/nodeAgg.h +++ b/src/include/executor/nodeAgg.h @@ -1,13 +1,13 @@ /*------------------------------------------------------------------------- * * nodeAgg.h - * + * prototypes for nodeAgg.c * * * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: nodeAgg.h,v 1.18 2002/12/05 15:50:36 tgl Exp $ + * $Id: nodeAgg.h,v 1.19 2003/01/10 23:54:24 tgl Exp $ * *------------------------------------------------------------------------- */ diff --git a/src/include/executor/nodeGroup.h b/src/include/executor/nodeGroup.h index 211e55b6ca..2a6b733c9d 100644 --- a/src/include/executor/nodeGroup.h +++ b/src/include/executor/nodeGroup.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: nodeGroup.h,v 1.23 2002/12/05 15:50:37 tgl Exp $ + * $Id: nodeGroup.h,v 1.24 2003/01/10 23:54:24 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -22,15 +22,4 @@ extern TupleTableSlot *ExecGroup(GroupState *node); extern void ExecEndGroup(GroupState *node); extern void ExecReScanGroup(GroupState *node, ExprContext *exprCtxt); -extern bool execTuplesMatch(HeapTuple tuple1, - HeapTuple tuple2, - TupleDesc tupdesc, - int numCols, - AttrNumber *matchColIdx, - FmgrInfo *eqfunctions, - MemoryContext evalContext); -extern FmgrInfo *execTuplesMatchPrepare(TupleDesc tupdesc, - int numCols, - AttrNumber *matchColIdx); - #endif /* NODEGROUP_H */ diff --git a/src/include/executor/nodeHash.h b/src/include/executor/nodeHash.h index 02e5635526..da1113b32d 100644 --- a/src/include/executor/nodeHash.h +++ b/src/include/executor/nodeHash.h @@ -1,13 +1,13 @@ /*------------------------------------------------------------------------- * * nodeHash.h - * + * prototypes for nodeHash.c * * * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: nodeHash.h,v 1.28 2002/12/30 15:21:23 tgl Exp $ + * $Id: nodeHash.h,v 1.29 2003/01/10 23:54:24 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -38,6 +38,5 @@ extern void ExecChooseHashTableSize(double ntuples, int tupwidth, int *virtualbuckets, int *physicalbuckets, int *numbatches); -extern uint32 ComputeHashFunc(Datum key, int typLen, bool byVal); #endif /* NODEHASH_H */ diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index 1ce0635c63..9c43660c61 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: execnodes.h,v 1.89 2003/01/10 21:08:15 tgl Exp $ + * $Id: execnodes.h,v 1.90 2003/01/10 23:54:24 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -324,6 +324,46 @@ typedef struct EState } EState; +/* ---------------------------------------------------------------- + * Tuple Hash Tables + * + * All-in-memory tuple hash tables are used for a number of purposes. + * ---------------------------------------------------------------- + */ +typedef struct TupleHashEntryData *TupleHashEntry; +typedef struct TupleHashTableData *TupleHashTable; + +typedef struct TupleHashEntryData +{ + TupleHashEntry next; /* next entry in same hash bucket */ + uint32 hashkey; /* exact hash key of this entry */ + HeapTuple firstTuple; /* copy of first tuple in this group */ + /* there may be additional data beyond the end of this struct */ +} TupleHashEntryData; /* VARIABLE LENGTH STRUCT */ + +typedef struct TupleHashTableData +{ + int numCols; /* number of columns in lookup key */ + AttrNumber *keyColIdx; /* attr numbers of key columns */ + FmgrInfo *eqfunctions; /* lookup data for comparison functions */ + MemoryContext tablecxt; /* memory context containing table */ + MemoryContext tempcxt; /* context for function evaluations */ + Size entrysize; /* actual size to make each hash entry */ + int nbuckets; /* number of buckets in hash table */ + TupleHashEntry buckets[1]; /* VARIABLE LENGTH ARRAY */ +} TupleHashTableData; /* VARIABLE LENGTH STRUCT */ + +typedef struct +{ + TupleHashEntry next_entry; /* next entry in current chain */ + int next_bucket; /* next chain */ +} TupleHashIterator; + +#define ResetTupleHashIterator(iter) \ + ((iter)->next_entry = NULL, \ + (iter)->next_bucket = 0) + + /* ---------------------------------------------------------------- * Expression State Trees * @@ -445,9 +485,6 @@ typedef struct BoolExprState * SubPlanState node * ---------------- */ -/* this struct is private in nodeSubplan.c: */ -typedef struct SubPlanHashTableData *SubPlanHashTable; - typedef struct SubPlanState { ExprState xprstate; @@ -458,8 +495,8 @@ typedef struct SubPlanState bool needShutdown; /* TRUE = need to shutdown subplan */ HeapTuple curTuple; /* copy of most recent tuple from subplan */ /* these are used when hashing the subselect's output: */ - SubPlanHashTable hashtable; /* hash table for no-nulls subselect rows */ - SubPlanHashTable hashnulls; /* hash table for rows with null(s) */ + TupleHashTable hashtable; /* hash table for no-nulls subselect rows */ + TupleHashTable hashnulls; /* hash table for rows with null(s) */ } SubPlanState; /* ---------------- @@ -877,8 +914,6 @@ typedef struct GroupState /* these structs are private in nodeAgg.c: */ typedef struct AggStatePerAggData *AggStatePerAgg; typedef struct AggStatePerGroupData *AggStatePerGroup; -typedef struct AggHashEntryData *AggHashEntry; -typedef struct AggHashTableData *AggHashTable; typedef struct AggState { @@ -894,10 +929,9 @@ typedef struct AggState AggStatePerGroup pergroup; /* per-Aggref-per-group working state */ HeapTuple grp_firstTuple; /* copy of first tuple of current group */ /* these fields are used in AGG_HASHED mode: */ - AggHashTable hashtable; /* hash table with one entry per group */ + TupleHashTable hashtable; /* hash table with one entry per group */ bool table_filled; /* hash table filled yet? */ - AggHashEntry next_hash_entry; /* next entry in current chain */ - int next_hash_bucket; /* next chain */ + TupleHashIterator hashiter; /* for iterating through hash table */ } AggState; /* ----------------