Improve dynahash.c's API so that caller can specify the comparison function
as well as the hash function (formerly the comparison function was hardwired as memcmp()). This makes it possible to eliminate the special-purpose hashtable management code in execGrouping.c in favor of using dynahash to manage tuple hashtables; which is a win because dynahash knows how to expand a hashtable when the original size estimate was too small, whereas the special-purpose code was too stupid to do that. (See recent gripe from Stephan Szabo about poor performance when hash table size estimate is way off.) Free side benefit: when using string_hash, the default comparison function is now strncmp() instead of memcmp(). This should eliminate some part of the overhead associated with larger NAMEDATALEN values.
This commit is contained in:
parent
23e10843db
commit
80860c32d9
@ -8,7 +8,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/executor/execGrouping.c,v 1.7 2003/08/08 21:41:34 momjian Exp $
|
||||
* $Header: /cvsroot/pgsql/src/backend/executor/execGrouping.c,v 1.8 2003/08/19 01:13:40 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -23,6 +23,13 @@
|
||||
#include "utils/syscache.h"
|
||||
|
||||
|
||||
static TupleHashTable CurTupleHashTable = NULL;
|
||||
|
||||
static uint32 TupleHashTableHash(const void *key, Size keysize);
|
||||
static int TupleHashTableMatch(const void *key1, const void *key2,
|
||||
Size keysize);
|
||||
|
||||
|
||||
/*****************************************************************************
|
||||
* Utility routines for grouping tuples together
|
||||
*****************************************************************************/
|
||||
@ -272,7 +279,7 @@ execTuplesHashPrepare(TupleDesc tupdesc,
|
||||
* numCols, keyColIdx: identify the tuple fields to use as lookup key
|
||||
* eqfunctions: equality comparison functions to use
|
||||
* hashfunctions: datatype-specific hashing functions to use
|
||||
* nbuckets: number of buckets to make
|
||||
* nbuckets: initial estimate of hashtable size
|
||||
* entrysize: size of each entry (at least sizeof(TupleHashEntryData))
|
||||
* tablecxt: memory context in which to store table and table entries
|
||||
* tempcxt: short-lived context for evaluation hash and comparison functions
|
||||
@ -290,14 +297,13 @@ BuildTupleHashTable(int numCols, AttrNumber *keyColIdx,
|
||||
MemoryContext tablecxt, MemoryContext tempcxt)
|
||||
{
|
||||
TupleHashTable hashtable;
|
||||
Size tabsize;
|
||||
HASHCTL hash_ctl;
|
||||
|
||||
Assert(nbuckets > 0);
|
||||
Assert(entrysize >= sizeof(TupleHashEntryData));
|
||||
|
||||
tabsize = sizeof(TupleHashTableData) +
|
||||
(nbuckets - 1) *sizeof(TupleHashEntry);
|
||||
hashtable = (TupleHashTable) MemoryContextAllocZero(tablecxt, tabsize);
|
||||
hashtable = (TupleHashTable) MemoryContextAlloc(tablecxt,
|
||||
sizeof(TupleHashTableData));
|
||||
|
||||
hashtable->numCols = numCols;
|
||||
hashtable->keyColIdx = keyColIdx;
|
||||
@ -306,7 +312,20 @@ BuildTupleHashTable(int numCols, AttrNumber *keyColIdx,
|
||||
hashtable->tablecxt = tablecxt;
|
||||
hashtable->tempcxt = tempcxt;
|
||||
hashtable->entrysize = entrysize;
|
||||
hashtable->nbuckets = nbuckets;
|
||||
|
||||
MemSet(&hash_ctl, 0, sizeof(hash_ctl));
|
||||
hash_ctl.keysize = sizeof(TupleHashEntryData);
|
||||
hash_ctl.entrysize = entrysize;
|
||||
hash_ctl.hash = TupleHashTableHash;
|
||||
hash_ctl.match = TupleHashTableMatch;
|
||||
hash_ctl.hcxt = tablecxt;
|
||||
hashtable->hashtab = hash_create("TupleHashTable", (long) nbuckets,
|
||||
&hash_ctl,
|
||||
HASH_ELEM | HASH_FUNCTION | HASH_COMPARE | HASH_CONTEXT);
|
||||
if (hashtable->hashtab == NULL)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_OUT_OF_MEMORY),
|
||||
errmsg("out of memory")));
|
||||
|
||||
return hashtable;
|
||||
}
|
||||
@ -327,19 +346,93 @@ TupleHashEntry
|
||||
LookupTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot,
|
||||
bool *isnew)
|
||||
{
|
||||
int numCols = hashtable->numCols;
|
||||
AttrNumber *keyColIdx = hashtable->keyColIdx;
|
||||
HeapTuple tuple = slot->val;
|
||||
TupleDesc tupdesc = slot->ttc_tupleDescriptor;
|
||||
uint32 hashkey = 0;
|
||||
int i;
|
||||
int bucketno;
|
||||
TupleHashEntry entry;
|
||||
MemoryContext oldContext;
|
||||
TupleHashTable saveCurHT;
|
||||
bool found;
|
||||
|
||||
/* Need to run the hash function in short-lived context */
|
||||
/* Need to run the hash functions in short-lived context */
|
||||
oldContext = MemoryContextSwitchTo(hashtable->tempcxt);
|
||||
|
||||
/*
|
||||
* Set up data needed by hash and match functions
|
||||
*
|
||||
* We save and restore CurTupleHashTable just in case someone manages
|
||||
* to invoke this code re-entrantly.
|
||||
*/
|
||||
hashtable->tupdesc = tupdesc;
|
||||
saveCurHT = CurTupleHashTable;
|
||||
CurTupleHashTable = hashtable;
|
||||
|
||||
/* Search the hash table */
|
||||
entry = (TupleHashEntry) hash_search(hashtable->hashtab,
|
||||
&tuple,
|
||||
isnew ? HASH_ENTER : HASH_FIND,
|
||||
&found);
|
||||
|
||||
if (isnew)
|
||||
{
|
||||
if (found)
|
||||
{
|
||||
/* found pre-existing entry */
|
||||
*isnew = false;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* created new entry ... we hope */
|
||||
if (entry == NULL)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_OUT_OF_MEMORY),
|
||||
errmsg("out of memory")));
|
||||
|
||||
/*
|
||||
* Zero any caller-requested space in the entry. (This zaps
|
||||
* the "key data" dynahash.c copied into the new entry, but
|
||||
* we don't care since we're about to overwrite it anyway.)
|
||||
*/
|
||||
MemSet(entry, 0, hashtable->entrysize);
|
||||
|
||||
/* Copy the first tuple into the table context */
|
||||
MemoryContextSwitchTo(hashtable->tablecxt);
|
||||
entry->firstTuple = heap_copytuple(tuple);
|
||||
|
||||
*isnew = true;
|
||||
}
|
||||
}
|
||||
|
||||
CurTupleHashTable = saveCurHT;
|
||||
|
||||
MemoryContextSwitchTo(oldContext);
|
||||
|
||||
return entry;
|
||||
}
|
||||
|
||||
/*
|
||||
* Compute the hash value for a tuple
|
||||
*
|
||||
* The passed-in key is a pointer to a HeapTuple pointer -- this is either
|
||||
* the firstTuple field of a TupleHashEntry struct, or the key value passed
|
||||
* to hash_search. We ignore the keysize.
|
||||
*
|
||||
* CurTupleHashTable must be set before calling this, since dynahash.c
|
||||
* doesn't provide any API that would let us get at the hashtable otherwise.
|
||||
*
|
||||
* Also, the caller must select an appropriate memory context for running
|
||||
* the hash functions. (dynahash.c doesn't change CurrentMemoryContext.)
|
||||
*/
|
||||
static uint32
|
||||
TupleHashTableHash(const void *key, Size keysize)
|
||||
{
|
||||
HeapTuple tuple = *(const HeapTuple *) key;
|
||||
TupleHashTable hashtable = CurTupleHashTable;
|
||||
int numCols = hashtable->numCols;
|
||||
AttrNumber *keyColIdx = hashtable->keyColIdx;
|
||||
TupleDesc tupdesc = hashtable->tupdesc;
|
||||
uint32 hashkey = 0;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < numCols; i++)
|
||||
{
|
||||
AttrNumber att = keyColIdx[i];
|
||||
@ -360,72 +453,36 @@ LookupTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot,
|
||||
hashkey ^= hkey;
|
||||
}
|
||||
}
|
||||
bucketno = hashkey % (uint32) hashtable->nbuckets;
|
||||
|
||||
for (entry = hashtable->buckets[bucketno];
|
||||
entry != NULL;
|
||||
entry = entry->next)
|
||||
{
|
||||
/* Quick check using hashkey */
|
||||
if (entry->hashkey != hashkey)
|
||||
continue;
|
||||
if (execTuplesMatch(entry->firstTuple,
|
||||
tuple,
|
||||
tupdesc,
|
||||
numCols, keyColIdx,
|
||||
hashtable->eqfunctions,
|
||||
hashtable->tempcxt))
|
||||
{
|
||||
if (isnew)
|
||||
*isnew = false;
|
||||
MemoryContextSwitchTo(oldContext);
|
||||
return entry;
|
||||
}
|
||||
}
|
||||
|
||||
/* Not there, so build a new one if requested */
|
||||
if (isnew)
|
||||
{
|
||||
MemoryContextSwitchTo(hashtable->tablecxt);
|
||||
|
||||
entry = (TupleHashEntry) palloc0(hashtable->entrysize);
|
||||
|
||||
entry->hashkey = hashkey;
|
||||
entry->firstTuple = heap_copytuple(tuple);
|
||||
|
||||
entry->next = hashtable->buckets[bucketno];
|
||||
hashtable->buckets[bucketno] = entry;
|
||||
|
||||
*isnew = true;
|
||||
}
|
||||
|
||||
MemoryContextSwitchTo(oldContext);
|
||||
|
||||
return entry;
|
||||
return hashkey;
|
||||
}
|
||||
|
||||
/*
|
||||
* Walk through all the entries of a hash table, in no special order.
|
||||
* Returns NULL when no more entries remain.
|
||||
* See whether two tuples (presumably of the same hash value) match
|
||||
*
|
||||
* Iterator state must be initialized with ResetTupleHashIterator() macro.
|
||||
* As above, the passed pointers are pointers to HeapTuple pointers.
|
||||
*
|
||||
* CurTupleHashTable must be set before calling this, since dynahash.c
|
||||
* doesn't provide any API that would let us get at the hashtable otherwise.
|
||||
*
|
||||
* Also, the caller must select an appropriate memory context for running
|
||||
* the compare functions. (dynahash.c doesn't change CurrentMemoryContext.)
|
||||
*/
|
||||
TupleHashEntry
|
||||
ScanTupleHashTable(TupleHashTable hashtable, TupleHashIterator *state)
|
||||
static int
|
||||
TupleHashTableMatch(const void *key1, const void *key2, Size keysize)
|
||||
{
|
||||
TupleHashEntry entry;
|
||||
HeapTuple tuple1 = *(const HeapTuple *) key1;
|
||||
HeapTuple tuple2 = *(const HeapTuple *) key2;
|
||||
TupleHashTable hashtable = CurTupleHashTable;
|
||||
|
||||
entry = state->next_entry;
|
||||
while (entry == NULL)
|
||||
{
|
||||
if (state->next_bucket >= hashtable->nbuckets)
|
||||
{
|
||||
/* No more entries in hashtable, so done */
|
||||
return NULL;
|
||||
}
|
||||
entry = hashtable->buckets[state->next_bucket++];
|
||||
}
|
||||
state->next_entry = entry->next;
|
||||
|
||||
return entry;
|
||||
if (execTuplesMatch(tuple1,
|
||||
tuple2,
|
||||
hashtable->tupdesc,
|
||||
hashtable->numCols,
|
||||
hashtable->keyColIdx,
|
||||
hashtable->eqfunctions,
|
||||
hashtable->tempcxt))
|
||||
return 0;
|
||||
else
|
||||
return 1;
|
||||
}
|
||||
|
@ -45,7 +45,7 @@
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/executor/nodeAgg.c,v 1.115 2003/08/08 21:41:41 momjian Exp $
|
||||
* $Header: /cvsroot/pgsql/src/backend/executor/nodeAgg.c,v 1.116 2003/08/19 01:13:40 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -905,7 +905,7 @@ agg_fill_hash_table(AggState *aggstate)
|
||||
|
||||
aggstate->table_filled = true;
|
||||
/* Initialize to walk the hash table */
|
||||
ResetTupleHashIterator(&aggstate->hashiter);
|
||||
ResetTupleHashIterator(aggstate->hashtable, &aggstate->hashiter);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -920,7 +920,6 @@ agg_retrieve_hash_table(AggState *aggstate)
|
||||
bool *aggnulls;
|
||||
AggStatePerAgg peragg;
|
||||
AggStatePerGroup pergroup;
|
||||
TupleHashTable hashtable;
|
||||
AggHashEntry entry;
|
||||
TupleTableSlot *firstSlot;
|
||||
TupleTableSlot *resultSlot;
|
||||
@ -935,7 +934,6 @@ agg_retrieve_hash_table(AggState *aggstate)
|
||||
aggnulls = econtext->ecxt_aggnulls;
|
||||
projInfo = aggstate->ss.ps.ps_ProjInfo;
|
||||
peragg = aggstate->peragg;
|
||||
hashtable = aggstate->hashtable;
|
||||
firstSlot = aggstate->ss.ss_ScanTupleSlot;
|
||||
|
||||
/*
|
||||
@ -950,8 +948,7 @@ agg_retrieve_hash_table(AggState *aggstate)
|
||||
/*
|
||||
* Find the next entry in the hash table
|
||||
*/
|
||||
entry = (AggHashEntry) ScanTupleHashTable(hashtable,
|
||||
&aggstate->hashiter);
|
||||
entry = (AggHashEntry) ScanTupleHashTable(&aggstate->hashiter);
|
||||
if (entry == NULL)
|
||||
{
|
||||
/* No more entries in hashtable, so done */
|
||||
@ -1440,7 +1437,7 @@ ExecReScanAgg(AggState *node, ExprContext *exprCtxt)
|
||||
*/
|
||||
if (((PlanState *) node)->lefttree->chgParam == NULL)
|
||||
{
|
||||
ResetTupleHashIterator(&node->hashiter);
|
||||
ResetTupleHashIterator(node->hashtable, &node->hashiter);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
@ -7,7 +7,7 @@
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/executor/nodeSubplan.c,v 1.54 2003/08/08 21:41:42 momjian Exp $
|
||||
* $Header: /cvsroot/pgsql/src/backend/executor/nodeSubplan.c,v 1.55 2003/08/19 01:13:40 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -627,8 +627,8 @@ findPartialMatch(TupleHashTable hashtable, TupleTableSlot *slot)
|
||||
TupleHashIterator hashiter;
|
||||
TupleHashEntry entry;
|
||||
|
||||
ResetTupleHashIterator(&hashiter);
|
||||
while ((entry = ScanTupleHashTable(hashtable, &hashiter)) != NULL)
|
||||
ResetTupleHashIterator(hashtable, &hashiter);
|
||||
while ((entry = ScanTupleHashTable(&hashiter)) != NULL)
|
||||
{
|
||||
if (!execTuplesUnequal(entry->firstTuple,
|
||||
tuple,
|
||||
|
@ -9,7 +9,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/utils/hash/dynahash.c,v 1.47 2003/08/04 02:40:06 momjian Exp $
|
||||
* $Header: /cvsroot/pgsql/src/backend/utils/hash/dynahash.c,v 1.48 2003/08/19 01:13:41 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -44,7 +44,6 @@
|
||||
|
||||
#include "postgres.h"
|
||||
|
||||
|
||||
#include "utils/dynahash.h"
|
||||
#include "utils/hsearch.h"
|
||||
#include "utils/memutils.h"
|
||||
@ -63,7 +62,6 @@
|
||||
* Private function prototypes
|
||||
*/
|
||||
static void *DynaHashAlloc(Size size);
|
||||
static uint32 call_hash(HTAB *hashp, void *k);
|
||||
static HASHSEGMENT seg_alloc(HTAB *hashp);
|
||||
static bool element_alloc(HTAB *hashp);
|
||||
static bool dir_realloc(HTAB *hashp);
|
||||
@ -133,6 +131,19 @@ hash_create(const char *tabname, long nelem, HASHCTL *info, int flags)
|
||||
else
|
||||
hashp->hash = string_hash; /* default hash function */
|
||||
|
||||
/*
|
||||
* If you don't specify a match function, it defaults to strncmp() if
|
||||
* you used string_hash (either explicitly or by default) and to
|
||||
* memcmp() otherwise. (Prior to PostgreSQL 7.4, memcmp() was always
|
||||
* used.)
|
||||
*/
|
||||
if (flags & HASH_COMPARE)
|
||||
hashp->match = info->match;
|
||||
else if (hashp->hash == string_hash)
|
||||
hashp->match = (HashCompareFunc) strncmp;
|
||||
else
|
||||
hashp->match = memcmp;
|
||||
|
||||
if (flags & HASH_SHARED_MEM)
|
||||
{
|
||||
/*
|
||||
@ -155,7 +166,7 @@ hash_create(const char *tabname, long nelem, HASHCTL *info, int flags)
|
||||
hashp->hctl = NULL;
|
||||
hashp->dir = NULL;
|
||||
hashp->alloc = MEM_ALLOC;
|
||||
hashp->hcxt = DynaHashCxt;
|
||||
hashp->hcxt = CurrentDynaHashCxt;
|
||||
hashp->isshared = false;
|
||||
}
|
||||
|
||||
@ -207,26 +218,13 @@ hash_create(const char *tabname, long nelem, HASHCTL *info, int flags)
|
||||
hashp->alloc = info->alloc;
|
||||
else
|
||||
{
|
||||
if (flags & HASH_CONTEXT)
|
||||
{
|
||||
/* hash table structures live in child of given context */
|
||||
CurrentDynaHashCxt = AllocSetContextCreate(info->hcxt,
|
||||
"DynaHashTable",
|
||||
ALLOCSET_DEFAULT_MINSIZE,
|
||||
ALLOCSET_DEFAULT_INITSIZE,
|
||||
ALLOCSET_DEFAULT_MAXSIZE);
|
||||
hashp->hcxt = CurrentDynaHashCxt;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* hash table structures live in child of DynaHashCxt */
|
||||
CurrentDynaHashCxt = AllocSetContextCreate(DynaHashCxt,
|
||||
"DynaHashTable",
|
||||
ALLOCSET_DEFAULT_MINSIZE,
|
||||
ALLOCSET_DEFAULT_INITSIZE,
|
||||
ALLOCSET_DEFAULT_MAXSIZE);
|
||||
hashp->hcxt = CurrentDynaHashCxt;
|
||||
}
|
||||
/* remaining hash table structures live in child of given context */
|
||||
hashp->hcxt = AllocSetContextCreate(CurrentDynaHashCxt,
|
||||
"DynaHashTable",
|
||||
ALLOCSET_DEFAULT_MINSIZE,
|
||||
ALLOCSET_DEFAULT_INITSIZE,
|
||||
ALLOCSET_DEFAULT_MAXSIZE);
|
||||
CurrentDynaHashCxt = hashp->hcxt;
|
||||
}
|
||||
|
||||
if (!init_htab(hashp, nelem))
|
||||
@ -351,7 +349,7 @@ init_htab(HTAB *hashp, long nelem)
|
||||
* NB: assumes that all hash structure parameters have default values!
|
||||
*/
|
||||
long
|
||||
hash_estimate_size(long num_entries, long entrysize)
|
||||
hash_estimate_size(long num_entries, Size entrysize)
|
||||
{
|
||||
long size = 0;
|
||||
long nBuckets,
|
||||
@ -447,7 +445,6 @@ void
|
||||
hash_stats(const char *where, HTAB *hashp)
|
||||
{
|
||||
#if HASH_STATISTICS
|
||||
|
||||
fprintf(stderr, "%s: this HTAB -- accesses %ld collisions %ld\n",
|
||||
where, hashp->hctl->accesses, hashp->hctl->collisions);
|
||||
|
||||
@ -459,19 +456,16 @@ hash_stats(const char *where, HTAB *hashp)
|
||||
fprintf(stderr, "hash_stats: total expansions %ld\n",
|
||||
hash_expansions);
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
/*******************************SEARCH ROUTINES *****************************/
|
||||
|
||||
static uint32
|
||||
call_hash(HTAB *hashp, void *k)
|
||||
{
|
||||
HASHHDR *hctl = hashp->hctl;
|
||||
uint32 hash_val,
|
||||
bucket;
|
||||
|
||||
hash_val = hashp->hash(k, (int) hctl->keysize);
|
||||
/* Convert a hash value to a bucket number */
|
||||
static inline uint32
|
||||
calc_bucket(HASHHDR *hctl, uint32 hash_val)
|
||||
{
|
||||
uint32 bucket;
|
||||
|
||||
bucket = hash_val & hctl->high_mask;
|
||||
if (bucket > hctl->max_bucket)
|
||||
@ -506,11 +500,12 @@ call_hash(HTAB *hashp, void *k)
|
||||
*/
|
||||
void *
|
||||
hash_search(HTAB *hashp,
|
||||
void *keyPtr,
|
||||
const void *keyPtr,
|
||||
HASHACTION action,
|
||||
bool *foundPtr)
|
||||
{
|
||||
HASHHDR *hctl = hashp->hctl;
|
||||
uint32 hashvalue = 0;
|
||||
uint32 bucket;
|
||||
long segment_num;
|
||||
long segment_ndx;
|
||||
@ -545,7 +540,12 @@ hash_search(HTAB *hashp,
|
||||
}
|
||||
else
|
||||
{
|
||||
bucket = call_hash(hashp, keyPtr);
|
||||
HashCompareFunc match;
|
||||
Size keysize = hctl->keysize;
|
||||
|
||||
hashvalue = hashp->hash(keyPtr, keysize);
|
||||
bucket = calc_bucket(hctl, hashvalue);
|
||||
|
||||
segment_num = bucket >> hctl->sshift;
|
||||
segment_ndx = MOD(bucket, hctl->ssize);
|
||||
|
||||
@ -560,9 +560,11 @@ hash_search(HTAB *hashp,
|
||||
/*
|
||||
* Follow collision chain looking for matching key
|
||||
*/
|
||||
match = hashp->match; /* save one fetch in inner loop */
|
||||
while (currBucket != NULL)
|
||||
{
|
||||
if (memcmp(ELEMENTKEY(currBucket), keyPtr, hctl->keysize) == 0)
|
||||
if (currBucket->hashvalue == hashvalue &&
|
||||
match(ELEMENTKEY(currBucket), keyPtr, keysize) == 0)
|
||||
break;
|
||||
prevBucketPtr = &(currBucket->link);
|
||||
currBucket = *prevBucketPtr;
|
||||
@ -641,6 +643,7 @@ hash_search(HTAB *hashp,
|
||||
currBucket->link = NULL;
|
||||
|
||||
/* copy key into record */
|
||||
currBucket->hashvalue = hashvalue;
|
||||
memcpy(ELEMENTKEY(currBucket), keyPtr, hctl->keysize);
|
||||
|
||||
/* caller is expected to fill the data field on return */
|
||||
@ -802,7 +805,7 @@ expand_table(HTAB *hashp)
|
||||
|
||||
/*
|
||||
* Relocate records to the new bucket. NOTE: because of the way the
|
||||
* hash masking is done in call_hash, only one old bucket can need to
|
||||
* hash masking is done in calc_bucket, only one old bucket can need to
|
||||
* be split at this point. With a different way of reducing the hash
|
||||
* value, that might not be true!
|
||||
*/
|
||||
@ -820,8 +823,7 @@ expand_table(HTAB *hashp)
|
||||
currElement = nextElement)
|
||||
{
|
||||
nextElement = currElement->link;
|
||||
if ((long) call_hash(hashp, (void *) ELEMENTKEY(currElement))
|
||||
== old_bucket)
|
||||
if ((long) calc_bucket(hctl, currElement->hashvalue) == old_bucket)
|
||||
{
|
||||
*oldlink = currElement;
|
||||
oldlink = &currElement->link;
|
||||
|
@ -9,7 +9,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/utils/hash/hashfn.c,v 1.18 2003/08/04 02:40:06 momjian Exp $
|
||||
* $Header: /cvsroot/pgsql/src/backend/utils/hash/hashfn.c,v 1.19 2003/08/19 01:13:41 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -22,24 +22,21 @@
|
||||
/*
|
||||
* string_hash: hash function for keys that are null-terminated strings.
|
||||
*
|
||||
* NOTE: since dynahash.c backs this up with a fixed-length memcmp(),
|
||||
* the key must actually be zero-padded to the specified maximum length
|
||||
* to work correctly. However, if it is known that nothing after the
|
||||
* first zero byte is interesting, this is the right hash function to use.
|
||||
*
|
||||
* NOTE: this is the default hash function if none is specified.
|
||||
*/
|
||||
uint32
|
||||
string_hash(void *key, int keysize)
|
||||
string_hash(const void *key, Size keysize)
|
||||
{
|
||||
return DatumGetUInt32(hash_any((unsigned char *) key, strlen((char *) key)));
|
||||
return DatumGetUInt32(hash_any((const unsigned char *) key,
|
||||
(int) strlen((const char *) key)));
|
||||
}
|
||||
|
||||
/*
|
||||
* tag_hash: hash function for fixed-size tag values
|
||||
*/
|
||||
uint32
|
||||
tag_hash(void *key, int keysize)
|
||||
tag_hash(const void *key, Size keysize)
|
||||
{
|
||||
return DatumGetUInt32(hash_any((unsigned char *) key, keysize));
|
||||
return DatumGetUInt32(hash_any((const unsigned char *) key,
|
||||
(int) keysize));
|
||||
}
|
||||
|
@ -7,7 +7,7 @@
|
||||
* Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* $Id: executor.h,v 1.99 2003/08/08 21:42:44 momjian Exp $
|
||||
* $Id: executor.h,v 1.100 2003/08/19 01:13:41 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -71,8 +71,6 @@ extern TupleHashTable BuildTupleHashTable(int numCols, AttrNumber *keyColIdx,
|
||||
extern TupleHashEntry LookupTupleHashEntry(TupleHashTable hashtable,
|
||||
TupleTableSlot *slot,
|
||||
bool *isnew);
|
||||
extern TupleHashEntry ScanTupleHashTable(TupleHashTable hashtable,
|
||||
TupleHashIterator *state);
|
||||
|
||||
/*
|
||||
* prototypes from functions in execJunk.c
|
||||
|
@ -7,7 +7,7 @@
|
||||
* Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* $Id: execnodes.h,v 1.103 2003/08/08 21:42:47 momjian Exp $
|
||||
* $Id: execnodes.h,v 1.104 2003/08/19 01:13:41 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -21,6 +21,7 @@
|
||||
#include "nodes/bitmapset.h"
|
||||
#include "nodes/params.h"
|
||||
#include "nodes/plannodes.h"
|
||||
#include "utils/hsearch.h"
|
||||
#include "utils/tuplestore.h"
|
||||
|
||||
|
||||
@ -344,14 +345,14 @@ typedef struct TupleHashTableData *TupleHashTable;
|
||||
|
||||
typedef struct TupleHashEntryData
|
||||
{
|
||||
TupleHashEntry next; /* next entry in same hash bucket */
|
||||
uint32 hashkey; /* exact hash key of this entry */
|
||||
/* firstTuple must be the first field in this struct! */
|
||||
HeapTuple firstTuple; /* copy of first tuple in this group */
|
||||
/* there may be additional data beyond the end of this struct */
|
||||
} TupleHashEntryData; /* VARIABLE LENGTH STRUCT */
|
||||
|
||||
typedef struct TupleHashTableData
|
||||
{
|
||||
HTAB *hashtab; /* underlying dynahash table */
|
||||
int numCols; /* number of columns in lookup key */
|
||||
AttrNumber *keyColIdx; /* attr numbers of key columns */
|
||||
FmgrInfo *eqfunctions; /* lookup data for comparison functions */
|
||||
@ -359,19 +360,15 @@ typedef struct TupleHashTableData
|
||||
MemoryContext tablecxt; /* memory context containing table */
|
||||
MemoryContext tempcxt; /* context for function evaluations */
|
||||
Size entrysize; /* actual size to make each hash entry */
|
||||
int nbuckets; /* number of buckets in hash table */
|
||||
TupleHashEntry buckets[1]; /* VARIABLE LENGTH ARRAY */
|
||||
} TupleHashTableData; /* VARIABLE LENGTH STRUCT */
|
||||
TupleDesc tupdesc; /* tuple descriptor */
|
||||
} TupleHashTableData;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
TupleHashEntry next_entry; /* next entry in current chain */
|
||||
int next_bucket; /* next chain */
|
||||
} TupleHashIterator;
|
||||
typedef HASH_SEQ_STATUS TupleHashIterator;
|
||||
|
||||
#define ResetTupleHashIterator(iter) \
|
||||
((iter)->next_entry = NULL, \
|
||||
(iter)->next_bucket = 0)
|
||||
#define ResetTupleHashIterator(htable, iter) \
|
||||
hash_seq_init(iter, (htable)->hashtab)
|
||||
#define ScanTupleHashTable(iter) \
|
||||
((TupleHashEntry) hash_seq_search(iter))
|
||||
|
||||
|
||||
/* ----------------------------------------------------------------
|
||||
|
@ -7,7 +7,7 @@
|
||||
* Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* $Id: hsearch.h,v 1.28 2003/08/04 02:40:15 momjian Exp $
|
||||
* $Id: hsearch.h,v 1.29 2003/08/19 01:13:41 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -15,6 +15,23 @@
|
||||
#define HSEARCH_H
|
||||
|
||||
|
||||
/*
|
||||
* Hash and comparison functions must have these signatures. Comparison
|
||||
* functions return zero for match, nonzero for no match. (The comparison
|
||||
* function definition is designed to allow memcmp() and strncmp() to be
|
||||
* used directly as key comparison functions.)
|
||||
*/
|
||||
typedef uint32 (*HashValueFunc) (const void *key, Size keysize);
|
||||
typedef int (*HashCompareFunc) (const void *key1, const void *key2,
|
||||
Size keysize);
|
||||
|
||||
/*
|
||||
* Space allocation function for a hashtable --- designed to match malloc().
|
||||
* Note: there is no free function API; can't destroy a hashtable unless you
|
||||
* use the default allocator.
|
||||
*/
|
||||
typedef void *(*HashAllocFunc) (Size request);
|
||||
|
||||
/*
|
||||
* Constants
|
||||
*
|
||||
@ -44,6 +61,7 @@
|
||||
typedef struct HASHELEMENT
|
||||
{
|
||||
struct HASHELEMENT *link; /* link to next entry in same bucket */
|
||||
uint32 hashvalue; /* hash function result for this entry */
|
||||
} HASHELEMENT;
|
||||
|
||||
/* A hash bucket is a linked list of HASHELEMENTs */
|
||||
@ -64,8 +82,8 @@ typedef struct HASHHDR
|
||||
long ffactor; /* Fill factor */
|
||||
long nentries; /* Number of entries in hash table */
|
||||
long nsegs; /* Number of allocated segments */
|
||||
long keysize; /* hash key length in bytes */
|
||||
long entrysize; /* total user element size in bytes */
|
||||
Size keysize; /* hash key length in bytes */
|
||||
Size entrysize; /* total user element size in bytes */
|
||||
long max_dsize; /* 'dsize' limit if directory is fixed
|
||||
* size */
|
||||
HASHELEMENT *freeList; /* linked list of free elements */
|
||||
@ -83,8 +101,9 @@ typedef struct HTAB
|
||||
{
|
||||
HASHHDR *hctl; /* shared control information */
|
||||
HASHSEGMENT *dir; /* directory of segment starts */
|
||||
uint32 (*hash) (void *key, int keysize); /* Hash Function */
|
||||
void *(*alloc) (Size); /* memory allocator */
|
||||
HashValueFunc hash; /* hash function */
|
||||
HashCompareFunc match; /* key comparison function */
|
||||
HashAllocFunc alloc; /* memory allocator */
|
||||
MemoryContext hcxt; /* memory context if default allocator
|
||||
* used */
|
||||
char *tabname; /* table name (for error messages) */
|
||||
@ -97,28 +116,30 @@ typedef struct HASHCTL
|
||||
{
|
||||
long ssize; /* Segment Size */
|
||||
long dsize; /* (initial) Directory Size */
|
||||
long ffactor; /* Fill factor */
|
||||
uint32 (*hash) (void *key, int keysize); /* Hash Function */
|
||||
long keysize; /* hash key length in bytes */
|
||||
long entrysize; /* total user element size in bytes */
|
||||
long max_dsize; /* limit to dsize if directory size is
|
||||
* limited */
|
||||
void *(*alloc) (Size); /* memory allocation function */
|
||||
long ffactor; /* Fill factor */
|
||||
Size keysize; /* hash key length in bytes */
|
||||
Size entrysize; /* total user element size in bytes */
|
||||
HashValueFunc hash; /* hash function */
|
||||
HashCompareFunc match; /* key comparison function */
|
||||
HashAllocFunc alloc; /* memory allocator */
|
||||
HASHSEGMENT *dir; /* directory of segment starts */
|
||||
HASHHDR *hctl; /* location of header in shared mem */
|
||||
MemoryContext hcxt; /* memory context to use for allocations */
|
||||
} HASHCTL;
|
||||
|
||||
/* Flags to indicate which parameters are supplied */
|
||||
#define HASH_SEGMENT 0x002 /* Setting segment size */
|
||||
#define HASH_DIRSIZE 0x004 /* Setting directory size */
|
||||
#define HASH_FFACTOR 0x008 /* Setting fill factor */
|
||||
#define HASH_SEGMENT 0x002 /* Set segment size */
|
||||
#define HASH_DIRSIZE 0x004 /* Set directory size */
|
||||
#define HASH_FFACTOR 0x008 /* Set fill factor */
|
||||
#define HASH_FUNCTION 0x010 /* Set user defined hash function */
|
||||
#define HASH_ELEM 0x020 /* Setting key/entry size */
|
||||
#define HASH_SHARED_MEM 0x040 /* Setting shared mem const */
|
||||
#define HASH_ELEM 0x020 /* Set key/entry size */
|
||||
#define HASH_SHARED_MEM 0x040 /* Set shared mem const */
|
||||
#define HASH_ATTACH 0x080 /* Do not initialize hctl */
|
||||
#define HASH_ALLOC 0x100 /* Setting memory allocator */
|
||||
#define HASH_CONTEXT 0x200 /* Setting explicit memory context */
|
||||
#define HASH_ALLOC 0x100 /* Set memory allocator */
|
||||
#define HASH_CONTEXT 0x200 /* Set explicit memory context */
|
||||
#define HASH_COMPARE 0x400 /* Set user defined comparison function */
|
||||
|
||||
|
||||
/* max_dsize value to indicate expansible directory */
|
||||
@ -151,17 +172,17 @@ extern HTAB *hash_create(const char *tabname, long nelem,
|
||||
HASHCTL *info, int flags);
|
||||
extern void hash_destroy(HTAB *hashp);
|
||||
extern void hash_stats(const char *where, HTAB *hashp);
|
||||
extern void *hash_search(HTAB *hashp, void *keyPtr, HASHACTION action,
|
||||
extern void *hash_search(HTAB *hashp, const void *keyPtr, HASHACTION action,
|
||||
bool *foundPtr);
|
||||
extern void hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp);
|
||||
extern void *hash_seq_search(HASH_SEQ_STATUS *status);
|
||||
extern long hash_estimate_size(long num_entries, long entrysize);
|
||||
extern long hash_estimate_size(long num_entries, Size entrysize);
|
||||
extern long hash_select_dirsize(long num_entries);
|
||||
|
||||
/*
|
||||
* prototypes for functions in hashfn.c
|
||||
*/
|
||||
extern uint32 string_hash(void *key, int keysize);
|
||||
extern uint32 tag_hash(void *key, int keysize);
|
||||
extern uint32 string_hash(const void *key, Size keysize);
|
||||
extern uint32 tag_hash(const void *key, Size keysize);
|
||||
|
||||
#endif /* HSEARCH_H */
|
||||
|
@ -350,183 +350,183 @@ select f3, myaggp01a(*) from t group by f3;
|
||||
f3 | myaggp01a
|
||||
----+-----------
|
||||
b | {}
|
||||
a | {}
|
||||
c | {}
|
||||
a | {}
|
||||
(3 rows)
|
||||
|
||||
select f3, myaggp03a(*) from t group by f3;
|
||||
f3 | myaggp03a
|
||||
----+-----------
|
||||
b | {}
|
||||
a | {}
|
||||
c | {}
|
||||
a | {}
|
||||
(3 rows)
|
||||
|
||||
select f3, myaggp03b(*) from t group by f3;
|
||||
f3 | myaggp03b
|
||||
----+-----------
|
||||
b | {}
|
||||
a | {}
|
||||
c | {}
|
||||
a | {}
|
||||
(3 rows)
|
||||
|
||||
select f3, myaggp05a(f1) from t group by f3;
|
||||
f3 | myaggp05a
|
||||
----+-----------
|
||||
b | {1,2,3}
|
||||
a | {1,2,3}
|
||||
c | {1,2}
|
||||
a | {1,2,3}
|
||||
(3 rows)
|
||||
|
||||
select f3, myaggp06a(f1) from t group by f3;
|
||||
f3 | myaggp06a
|
||||
----+-----------
|
||||
b | {}
|
||||
a | {}
|
||||
c | {}
|
||||
a | {}
|
||||
(3 rows)
|
||||
|
||||
select f3, myaggp08a(f1) from t group by f3;
|
||||
f3 | myaggp08a
|
||||
----+-----------
|
||||
b | {}
|
||||
a | {}
|
||||
c | {}
|
||||
a | {}
|
||||
(3 rows)
|
||||
|
||||
select f3, myaggp09a(f1) from t group by f3;
|
||||
f3 | myaggp09a
|
||||
----+-----------
|
||||
b | {}
|
||||
a | {}
|
||||
c | {}
|
||||
a | {}
|
||||
(3 rows)
|
||||
|
||||
select f3, myaggp09b(f1) from t group by f3;
|
||||
f3 | myaggp09b
|
||||
----+-----------
|
||||
b | {}
|
||||
a | {}
|
||||
c | {}
|
||||
a | {}
|
||||
(3 rows)
|
||||
|
||||
select f3, myaggp10a(f1) from t group by f3;
|
||||
f3 | myaggp10a
|
||||
----+-----------
|
||||
b | {1,2,3}
|
||||
a | {1,2,3}
|
||||
c | {1,2}
|
||||
a | {1,2,3}
|
||||
(3 rows)
|
||||
|
||||
select f3, myaggp10b(f1) from t group by f3;
|
||||
f3 | myaggp10b
|
||||
----+-----------
|
||||
b | {1,2,3}
|
||||
a | {1,2,3}
|
||||
c | {1,2}
|
||||
a | {1,2,3}
|
||||
(3 rows)
|
||||
|
||||
select f3, myaggp20a(f1) from t group by f3;
|
||||
f3 | myaggp20a
|
||||
----+-----------
|
||||
b | {1,2,3}
|
||||
a | {1,2,3}
|
||||
c | {1,2}
|
||||
a | {1,2,3}
|
||||
(3 rows)
|
||||
|
||||
select f3, myaggp20b(f1) from t group by f3;
|
||||
f3 | myaggp20b
|
||||
----+-----------
|
||||
b | {1,2,3}
|
||||
a | {1,2,3}
|
||||
c | {1,2}
|
||||
a | {1,2,3}
|
||||
(3 rows)
|
||||
|
||||
select f3, myaggn01a(*) from t group by f3;
|
||||
f3 | myaggn01a
|
||||
----+-----------
|
||||
b | {}
|
||||
a | {}
|
||||
c | {}
|
||||
a | {}
|
||||
(3 rows)
|
||||
|
||||
select f3, myaggn01b(*) from t group by f3;
|
||||
f3 | myaggn01b
|
||||
----+-----------
|
||||
b | {}
|
||||
a | {}
|
||||
c | {}
|
||||
a | {}
|
||||
(3 rows)
|
||||
|
||||
select f3, myaggn03a(*) from t group by f3;
|
||||
f3 | myaggn03a
|
||||
----+-----------
|
||||
b | {}
|
||||
a | {}
|
||||
c | {}
|
||||
a | {}
|
||||
(3 rows)
|
||||
|
||||
select f3, myaggn05a(f1) from t group by f3;
|
||||
f3 | myaggn05a
|
||||
----+-----------
|
||||
b | {1,2,3}
|
||||
a | {1,2,3}
|
||||
c | {1,2}
|
||||
a | {1,2,3}
|
||||
(3 rows)
|
||||
|
||||
select f3, myaggn05b(f1) from t group by f3;
|
||||
f3 | myaggn05b
|
||||
----+-----------
|
||||
b | {1,2,3}
|
||||
a | {1,2,3}
|
||||
c | {1,2}
|
||||
a | {1,2,3}
|
||||
(3 rows)
|
||||
|
||||
select f3, myaggn06a(f1) from t group by f3;
|
||||
f3 | myaggn06a
|
||||
----+-----------
|
||||
b | {}
|
||||
a | {}
|
||||
c | {}
|
||||
a | {}
|
||||
(3 rows)
|
||||
|
||||
select f3, myaggn06b(f1) from t group by f3;
|
||||
f3 | myaggn06b
|
||||
----+-----------
|
||||
b | {}
|
||||
a | {}
|
||||
c | {}
|
||||
a | {}
|
||||
(3 rows)
|
||||
|
||||
select f3, myaggn08a(f1) from t group by f3;
|
||||
f3 | myaggn08a
|
||||
----+-----------
|
||||
b | {}
|
||||
a | {}
|
||||
c | {}
|
||||
a | {}
|
||||
(3 rows)
|
||||
|
||||
select f3, myaggn08b(f1) from t group by f3;
|
||||
f3 | myaggn08b
|
||||
----+-----------
|
||||
b | {}
|
||||
a | {}
|
||||
c | {}
|
||||
a | {}
|
||||
(3 rows)
|
||||
|
||||
select f3, myaggn09a(f1) from t group by f3;
|
||||
f3 | myaggn09a
|
||||
----+-----------
|
||||
b | {}
|
||||
a | {}
|
||||
c | {}
|
||||
a | {}
|
||||
(3 rows)
|
||||
|
||||
select f3, myaggn10a(f1) from t group by f3;
|
||||
f3 | myaggn10a
|
||||
----+-----------
|
||||
b | {1,2,3}
|
||||
a | {1,2,3}
|
||||
c | {1,2}
|
||||
a | {1,2,3}
|
||||
(3 rows)
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user