Speed up Hash Join by making ExprStates support hashing

Here we add ExprState support for obtaining a 32-bit hash value from a
list of expressions.  This allows both faster hashing and also JIT
compilation of these expressions.  This is especially useful when hash
joins have multiple join keys as the previous code called ExecEvalExpr on
each hash join key individually and that was inefficient as tuple
deformation would have only taken into account one key at a time, which
could lead to walking the tuple once for each join key.  With the new
code, we'll determine the maximum attribute required and deform the tuple
to that point only once.

Some performance tests done with this change have shown up to a 20%
performance increase of a query containing a Hash Join without JIT
compilation and up to a 26% performance increase when JIT is enabled and
optimization and inlining were performed by the JIT compiler.  The
performance increase with 1 join column was less with a 14% increase
with and without JIT.  This test was done using a fairly small hash
table and a large number of hash probes.  The increase will likely be
less with large tables, especially ones larger than L3 cache as memory
pressure is more likely to be the limiting factor there.

This commit only addresses Hash Joins, but lays expression evaluation
and JIT compilation infrastructure for other hashing needs such as Hash
Aggregate.

Author: David Rowley
Reviewed-by: Alexey Dvoichenkov <alexey@hyperplane.net>
Reviewed-by: Tels <nospam-pg-abuse@bloodgate.com>
Discussion: https://postgr.es/m/CAApHDvoexAxgQFNQD_GRkr2O_eJUD1-wUGm%3Dm0L%2BGc%3DT%3DkEa4g%40mail.gmail.com
This commit is contained in:
David Rowley 2024-08-20 13:38:22 +12:00
parent 9380e5f129
commit adf97c1562
10 changed files with 651 additions and 203 deletions

View File

@ -3969,6 +3969,147 @@ ExecBuildAggTransCall(ExprState *state, AggState *aggstate,
} }
} }
/*
* Build an ExprState that calls the given hash function(s) on the given
* 'hash_exprs'. When multiple expressions are present, the hash values
* returned by each hash function are combined to produce a single hash value.
*
* desc: tuple descriptor for the to-be-hashed expressions
* ops: TupleTableSlotOps for the TupleDesc
* hashfunc_oids: Oid for each hash function to call, one for each 'hash_expr'
* collations: collation to use when calling the hash function.
* hash_expr: list of expressions to hash the value of
* opstrict: array corresponding to the 'hashfunc_oids' to store op_strict()
* parent: PlanState node that the 'hash_exprs' will be evaluated at
* init_value: Normally 0, but can be set to other values to seed the hash
* with some other value. Using non-zero is slightly less efficient but can
* be useful.
* keep_nulls: if true, evaluation of the returned ExprState will abort early
* returning NULL if the given hash function is strict and the Datum to hash
* is null. When set to false, any NULL input Datums are skipped.
*/
ExprState *
ExecBuildHash32Expr(TupleDesc desc, const TupleTableSlotOps *ops,
const Oid *hashfunc_oids, const List *collations,
const List *hash_exprs, const bool *opstrict,
PlanState *parent, uint32 init_value, bool keep_nulls)
{
ExprState *state = makeNode(ExprState);
ExprEvalStep scratch = {0};
List *adjust_jumps = NIL;
ListCell *lc;
ListCell *lc2;
intptr_t strict_opcode;
intptr_t opcode;
Assert(list_length(hash_exprs) == list_length(collations));
state->parent = parent;
/* Insert setup steps as needed. */
ExecCreateExprSetupSteps(state, (Node *) hash_exprs);
if (init_value == 0)
{
/*
* No initial value, so we can assign the result of the hash function
* for the first hash_expr without having to concern ourselves with
* combining the result with any initial value.
*/
strict_opcode = EEOP_HASHDATUM_FIRST_STRICT;
opcode = EEOP_HASHDATUM_FIRST;
}
else
{
/* Set up operation to set the initial value. */
scratch.opcode = EEOP_HASHDATUM_SET_INITVAL;
scratch.d.hashdatum_initvalue.init_value = UInt32GetDatum(init_value);
scratch.resvalue = &state->resvalue;
scratch.resnull = &state->resnull;
ExprEvalPushStep(state, &scratch);
/*
* When using an initial value use the NEXT32/NEXT32_STRICT ops as the
* FIRST/FIRST_STRICT ops would overwrite the stored initial value.
*/
strict_opcode = EEOP_HASHDATUM_NEXT32_STRICT;
opcode = EEOP_HASHDATUM_NEXT32;
}
forboth(lc, hash_exprs, lc2, collations)
{
Expr *expr = (Expr *) lfirst(lc);
FmgrInfo *finfo;
FunctionCallInfo fcinfo;
int i = foreach_current_index(lc);
Oid funcid;
Oid inputcollid = lfirst_oid(lc2);
funcid = hashfunc_oids[i];
/* Allocate hash function lookup data. */
finfo = palloc0(sizeof(FmgrInfo));
fcinfo = palloc0(SizeForFunctionCallInfo(1));
fmgr_info(funcid, finfo);
/*
* Build the steps to evaluate the hash function's argument have it so
* the value of that is stored in the 0th argument of the hash func.
*/
ExecInitExprRec(expr,
state,
&fcinfo->args[0].value,
&fcinfo->args[0].isnull);
scratch.resvalue = &state->resvalue;
scratch.resnull = &state->resnull;
/* Initialize function call parameter structure too */
InitFunctionCallInfoData(*fcinfo, finfo, 1, inputcollid, NULL, NULL);
scratch.d.hashdatum.finfo = finfo;
scratch.d.hashdatum.fcinfo_data = fcinfo;
scratch.d.hashdatum.fn_addr = finfo->fn_addr;
scratch.opcode = opstrict[i] && !keep_nulls ? strict_opcode : opcode;
scratch.d.hashdatum.jumpdone = -1;
ExprEvalPushStep(state, &scratch);
adjust_jumps = lappend_int(adjust_jumps, state->steps_len - 1);
/*
* For subsequent keys we must combine the hash value with the
* previous hashes.
*/
strict_opcode = EEOP_HASHDATUM_NEXT32_STRICT;
opcode = EEOP_HASHDATUM_NEXT32;
}
/* adjust jump targets */
foreach(lc, adjust_jumps)
{
ExprEvalStep *as = &state->steps[lfirst_int(lc)];
Assert(as->opcode == EEOP_HASHDATUM_FIRST ||
as->opcode == EEOP_HASHDATUM_FIRST_STRICT ||
as->opcode == EEOP_HASHDATUM_NEXT32 ||
as->opcode == EEOP_HASHDATUM_NEXT32_STRICT);
Assert(as->d.hashdatum.jumpdone == -1);
as->d.hashdatum.jumpdone = state->steps_len;
}
scratch.resvalue = NULL;
scratch.resnull = NULL;
scratch.opcode = EEOP_DONE;
ExprEvalPushStep(state, &scratch);
ExecReadyExpr(state);
return state;
}
/* /*
* Build equality expression that can be evaluated using ExecQual(), returning * Build equality expression that can be evaluated using ExecQual(), returning
* true if the expression context's inner/outer tuple are NOT DISTINCT. I.e * true if the expression context's inner/outer tuple are NOT DISTINCT. I.e

View File

@ -477,6 +477,11 @@ ExecInterpExpr(ExprState *state, ExprContext *econtext, bool *isnull)
&&CASE_EEOP_DOMAIN_TESTVAL, &&CASE_EEOP_DOMAIN_TESTVAL,
&&CASE_EEOP_DOMAIN_NOTNULL, &&CASE_EEOP_DOMAIN_NOTNULL,
&&CASE_EEOP_DOMAIN_CHECK, &&CASE_EEOP_DOMAIN_CHECK,
&&CASE_EEOP_HASHDATUM_SET_INITVAL,
&&CASE_EEOP_HASHDATUM_FIRST,
&&CASE_EEOP_HASHDATUM_FIRST_STRICT,
&&CASE_EEOP_HASHDATUM_NEXT32,
&&CASE_EEOP_HASHDATUM_NEXT32_STRICT,
&&CASE_EEOP_CONVERT_ROWTYPE, &&CASE_EEOP_CONVERT_ROWTYPE,
&&CASE_EEOP_SCALARARRAYOP, &&CASE_EEOP_SCALARARRAYOP,
&&CASE_EEOP_HASHED_SCALARARRAYOP, &&CASE_EEOP_HASHED_SCALARARRAYOP,
@ -1543,6 +1548,111 @@ ExecInterpExpr(ExprState *state, ExprContext *econtext, bool *isnull)
EEO_NEXT(); EEO_NEXT();
} }
EEO_CASE(EEOP_HASHDATUM_SET_INITVAL)
{
*op->resvalue = op->d.hashdatum_initvalue.init_value;
*op->resnull = false;
EEO_NEXT();
}
EEO_CASE(EEOP_HASHDATUM_FIRST)
{
FunctionCallInfo fcinfo = op->d.hashdatum.fcinfo_data;
/*
* Save the Datum on non-null inputs, otherwise store 0 so that
* subsequent NEXT32 operations combine with an initialized value.
*/
if (!fcinfo->args[0].isnull)
*op->resvalue = op->d.hashdatum.fn_addr(fcinfo);
else
*op->resvalue = (Datum) 0;
*op->resnull = false;
EEO_NEXT();
}
EEO_CASE(EEOP_HASHDATUM_FIRST_STRICT)
{
FunctionCallInfo fcinfo = op->d.hashdatum.fcinfo_data;
if (fcinfo->args[0].isnull)
{
/*
* With strict we have the expression return NULL instead of
* ignoring NULL input values. We've nothing more to do after
* finding a NULL.
*/
*op->resnull = true;
*op->resvalue = (Datum) 0;
EEO_JUMP(op->d.hashdatum.jumpdone);
}
/* execute the hash function and save the resulting value */
*op->resvalue = op->d.hashdatum.fn_addr(fcinfo);
*op->resnull = false;
EEO_NEXT();
}
EEO_CASE(EEOP_HASHDATUM_NEXT32)
{
FunctionCallInfo fcinfo = op->d.hashdatum.fcinfo_data;
uint32 existing_hash = DatumGetUInt32(*op->resvalue);
/* combine successive hash values by rotating */
existing_hash = pg_rotate_left32(existing_hash, 1);
/* leave the hash value alone on NULL inputs */
if (!fcinfo->args[0].isnull)
{
uint32 hashvalue;
/* execute hash func and combine with previous hash value */
hashvalue = DatumGetUInt32(op->d.hashdatum.fn_addr(fcinfo));
existing_hash = existing_hash ^ hashvalue;
}
*op->resvalue = UInt32GetDatum(existing_hash);
*op->resnull = false;
EEO_NEXT();
}
EEO_CASE(EEOP_HASHDATUM_NEXT32_STRICT)
{
FunctionCallInfo fcinfo = op->d.hashdatum.fcinfo_data;
if (fcinfo->args[0].isnull)
{
/*
* With strict we have the expression return NULL instead of
* ignoring NULL input values. We've nothing more to do after
* finding a NULL.
*/
*op->resnull = true;
*op->resvalue = (Datum) 0;
EEO_JUMP(op->d.hashdatum.jumpdone);
}
else
{
uint32 existing_hash = DatumGetUInt32(*op->resvalue);
uint32 hashvalue;
/* combine successive hash values by rotating */
existing_hash = pg_rotate_left32(existing_hash, 1);
/* execute hash func and combine with previous hash value */
hashvalue = DatumGetUInt32(op->d.hashdatum.fn_addr(fcinfo));
*op->resvalue = UInt32GetDatum(existing_hash ^ hashvalue);
*op->resnull = false;
}
EEO_NEXT();
}
EEO_CASE(EEOP_XMLEXPR) EEO_CASE(EEOP_XMLEXPR)
{ {
/* too complex for an inline implementation */ /* too complex for an inline implementation */

View File

@ -47,7 +47,8 @@ static void ExecHashIncreaseNumBatches(HashJoinTable hashtable);
static void ExecHashIncreaseNumBuckets(HashJoinTable hashtable); static void ExecHashIncreaseNumBuckets(HashJoinTable hashtable);
static void ExecParallelHashIncreaseNumBatches(HashJoinTable hashtable); static void ExecParallelHashIncreaseNumBatches(HashJoinTable hashtable);
static void ExecParallelHashIncreaseNumBuckets(HashJoinTable hashtable); static void ExecParallelHashIncreaseNumBuckets(HashJoinTable hashtable);
static void ExecHashBuildSkewHash(HashJoinTable hashtable, Hash *node, static void ExecHashBuildSkewHash(HashState *hashstate,
HashJoinTable hashtable, Hash *node,
int mcvsToUse); int mcvsToUse);
static void ExecHashSkewTableInsert(HashJoinTable hashtable, static void ExecHashSkewTableInsert(HashJoinTable hashtable,
TupleTableSlot *slot, TupleTableSlot *slot,
@ -138,11 +139,9 @@ static void
MultiExecPrivateHash(HashState *node) MultiExecPrivateHash(HashState *node)
{ {
PlanState *outerNode; PlanState *outerNode;
List *hashkeys;
HashJoinTable hashtable; HashJoinTable hashtable;
TupleTableSlot *slot; TupleTableSlot *slot;
ExprContext *econtext; ExprContext *econtext;
uint32 hashvalue;
/* /*
* get state info from node * get state info from node
@ -153,7 +152,6 @@ MultiExecPrivateHash(HashState *node)
/* /*
* set expression context * set expression context
*/ */
hashkeys = node->hashkeys;
econtext = node->ps.ps_ExprContext; econtext = node->ps.ps_ExprContext;
/* /*
@ -162,15 +160,23 @@ MultiExecPrivateHash(HashState *node)
*/ */
for (;;) for (;;)
{ {
bool isnull;
Datum hashdatum;
slot = ExecProcNode(outerNode); slot = ExecProcNode(outerNode);
if (TupIsNull(slot)) if (TupIsNull(slot))
break; break;
/* We have to compute the hash value */ /* We have to compute the hash value */
econtext->ecxt_outertuple = slot; econtext->ecxt_outertuple = slot;
if (ExecHashGetHashValue(hashtable, econtext, hashkeys,
false, hashtable->keepNulls, ResetExprContext(econtext);
&hashvalue))
hashdatum = ExecEvalExprSwitchContext(node->hash_expr, econtext,
&isnull);
if (!isnull)
{ {
uint32 hashvalue = DatumGetUInt32(hashdatum);
int bucketNumber; int bucketNumber;
bucketNumber = ExecHashGetSkewBucket(hashtable, hashvalue); bucketNumber = ExecHashGetSkewBucket(hashtable, hashvalue);
@ -215,7 +221,6 @@ MultiExecParallelHash(HashState *node)
{ {
ParallelHashJoinState *pstate; ParallelHashJoinState *pstate;
PlanState *outerNode; PlanState *outerNode;
List *hashkeys;
HashJoinTable hashtable; HashJoinTable hashtable;
TupleTableSlot *slot; TupleTableSlot *slot;
ExprContext *econtext; ExprContext *econtext;
@ -232,7 +237,6 @@ MultiExecParallelHash(HashState *node)
/* /*
* set expression context * set expression context
*/ */
hashkeys = node->hashkeys;
econtext = node->ps.ps_ExprContext; econtext = node->ps.ps_ExprContext;
/* /*
@ -279,13 +283,20 @@ MultiExecParallelHash(HashState *node)
ExecParallelHashTableSetCurrentBatch(hashtable, 0); ExecParallelHashTableSetCurrentBatch(hashtable, 0);
for (;;) for (;;)
{ {
bool isnull;
slot = ExecProcNode(outerNode); slot = ExecProcNode(outerNode);
if (TupIsNull(slot)) if (TupIsNull(slot))
break; break;
econtext->ecxt_outertuple = slot; econtext->ecxt_outertuple = slot;
if (ExecHashGetHashValue(hashtable, econtext, hashkeys,
false, hashtable->keepNulls, ResetExprContext(econtext);
&hashvalue))
hashvalue = DatumGetUInt32(ExecEvalExprSwitchContext(node->hash_expr,
econtext,
&isnull));
if (!isnull)
ExecParallelHashTableInsert(hashtable, slot, hashvalue); ExecParallelHashTableInsert(hashtable, slot, hashvalue);
hashtable->partialTuples++; hashtable->partialTuples++;
} }
@ -371,8 +382,8 @@ ExecInitHash(Hash *node, EState *estate, int eflags)
hashstate->ps.plan = (Plan *) node; hashstate->ps.plan = (Plan *) node;
hashstate->ps.state = estate; hashstate->ps.state = estate;
hashstate->ps.ExecProcNode = ExecHash; hashstate->ps.ExecProcNode = ExecHash;
/* delay building hashtable until ExecHashTableCreate() in executor run */
hashstate->hashtable = NULL; hashstate->hashtable = NULL;
hashstate->hashkeys = NIL; /* will be set by parent HashJoin */
/* /*
* Miscellaneous initialization * Miscellaneous initialization
@ -393,12 +404,16 @@ ExecInitHash(Hash *node, EState *estate, int eflags)
ExecInitResultTupleSlotTL(&hashstate->ps, &TTSOpsMinimalTuple); ExecInitResultTupleSlotTL(&hashstate->ps, &TTSOpsMinimalTuple);
hashstate->ps.ps_ProjInfo = NULL; hashstate->ps.ps_ProjInfo = NULL;
/*
* initialize child expressions
*/
Assert(node->plan.qual == NIL); Assert(node->plan.qual == NIL);
hashstate->hashkeys =
ExecInitExprList(node->hashkeys, (PlanState *) hashstate); /*
* Delay initialization of hash_expr until ExecInitHashJoin(). We cannot
* build the ExprState here as we don't yet know the join type we're going
* to be hashing values for and we need to know that before calling
* ExecBuildHash32Expr as the keep_nulls parameter depends on the join
* type.
*/
hashstate->hash_expr = NULL;
return hashstate; return hashstate;
} }
@ -429,7 +444,7 @@ ExecEndHash(HashState *node)
* ---------------------------------------------------------------- * ----------------------------------------------------------------
*/ */
HashJoinTable HashJoinTable
ExecHashTableCreate(HashState *state, List *hashOperators, List *hashCollations, bool keepNulls) ExecHashTableCreate(HashState *state)
{ {
Hash *node; Hash *node;
HashJoinTable hashtable; HashJoinTable hashtable;
@ -440,10 +455,6 @@ ExecHashTableCreate(HashState *state, List *hashOperators, List *hashCollations,
double rows; double rows;
int num_skew_mcvs; int num_skew_mcvs;
int log2_nbuckets; int log2_nbuckets;
int nkeys;
int i;
ListCell *ho;
ListCell *hc;
MemoryContext oldcxt; MemoryContext oldcxt;
/* /*
@ -487,7 +498,6 @@ ExecHashTableCreate(HashState *state, List *hashOperators, List *hashCollations,
hashtable->log2_nbuckets = log2_nbuckets; hashtable->log2_nbuckets = log2_nbuckets;
hashtable->log2_nbuckets_optimal = log2_nbuckets; hashtable->log2_nbuckets_optimal = log2_nbuckets;
hashtable->buckets.unshared = NULL; hashtable->buckets.unshared = NULL;
hashtable->keepNulls = keepNulls;
hashtable->skewEnabled = false; hashtable->skewEnabled = false;
hashtable->skewBucket = NULL; hashtable->skewBucket = NULL;
hashtable->skewBucketLen = 0; hashtable->skewBucketLen = 0;
@ -540,32 +550,6 @@ ExecHashTableCreate(HashState *state, List *hashOperators, List *hashCollations,
oldcxt = MemoryContextSwitchTo(hashtable->hashCxt); oldcxt = MemoryContextSwitchTo(hashtable->hashCxt);
/*
* Get info about the hash functions to be used for each hash key. Also
* remember whether the join operators are strict.
*/
nkeys = list_length(hashOperators);
hashtable->outer_hashfunctions = palloc_array(FmgrInfo, nkeys);
hashtable->inner_hashfunctions = palloc_array(FmgrInfo, nkeys);
hashtable->hashStrict = palloc_array(bool, nkeys);
hashtable->collations = palloc_array(Oid, nkeys);
i = 0;
forboth(ho, hashOperators, hc, hashCollations)
{
Oid hashop = lfirst_oid(ho);
Oid left_hashfn;
Oid right_hashfn;
if (!get_op_hash_functions(hashop, &left_hashfn, &right_hashfn))
elog(ERROR, "could not find hash function for hash operator %u",
hashop);
fmgr_info(left_hashfn, &hashtable->outer_hashfunctions[i]);
fmgr_info(right_hashfn, &hashtable->inner_hashfunctions[i]);
hashtable->hashStrict[i] = op_strict(hashop);
hashtable->collations[i] = lfirst_oid(hc);
i++;
}
if (nbatch > 1 && hashtable->parallel_state == NULL) if (nbatch > 1 && hashtable->parallel_state == NULL)
{ {
MemoryContext oldctx; MemoryContext oldctx;
@ -652,7 +636,7 @@ ExecHashTableCreate(HashState *state, List *hashOperators, List *hashCollations,
* it.) * it.)
*/ */
if (nbatch > 1) if (nbatch > 1)
ExecHashBuildSkewHash(hashtable, node, num_skew_mcvs); ExecHashBuildSkewHash(state, hashtable, node, num_skew_mcvs);
MemoryContextSwitchTo(oldcxt); MemoryContextSwitchTo(oldcxt);
} }
@ -1803,103 +1787,6 @@ ExecParallelHashTableInsertCurrentBatch(HashJoinTable hashtable,
heap_free_minimal_tuple(tuple); heap_free_minimal_tuple(tuple);
} }
/*
* ExecHashGetHashValue
* Compute the hash value for a tuple
*
* The tuple to be tested must be in econtext->ecxt_outertuple (thus Vars in
* the hashkeys expressions need to have OUTER_VAR as varno). If outer_tuple
* is false (meaning it's the HashJoin's inner node, Hash), econtext,
* hashkeys, and slot need to be from Hash, with hashkeys/slot referencing and
* being suitable for tuples from the node below the Hash. Conversely, if
* outer_tuple is true, econtext is from HashJoin, and hashkeys/slot need to
* be appropriate for tuples from HashJoin's outer node.
*
* A true result means the tuple's hash value has been successfully computed
* and stored at *hashvalue. A false result means the tuple cannot match
* because it contains a null attribute, and hence it should be discarded
* immediately. (If keep_nulls is true then false is never returned.)
*/
bool
ExecHashGetHashValue(HashJoinTable hashtable,
ExprContext *econtext,
List *hashkeys,
bool outer_tuple,
bool keep_nulls,
uint32 *hashvalue)
{
uint32 hashkey = 0;
FmgrInfo *hashfunctions;
ListCell *hk;
int i = 0;
MemoryContext oldContext;
/*
* We reset the eval context each time to reclaim any memory leaked in the
* hashkey expressions.
*/
ResetExprContext(econtext);
oldContext = MemoryContextSwitchTo(econtext->ecxt_per_tuple_memory);
if (outer_tuple)
hashfunctions = hashtable->outer_hashfunctions;
else
hashfunctions = hashtable->inner_hashfunctions;
foreach(hk, hashkeys)
{
ExprState *keyexpr = (ExprState *) lfirst(hk);
Datum keyval;
bool isNull;
/* combine successive hashkeys by rotating */
hashkey = pg_rotate_left32(hashkey, 1);
/*
* Get the join attribute value of the tuple
*/
keyval = ExecEvalExpr(keyexpr, econtext, &isNull);
/*
* If the attribute is NULL, and the join operator is strict, then
* this tuple cannot pass the join qual so we can reject it
* immediately (unless we're scanning the outside of an outer join, in
* which case we must not reject it). Otherwise we act like the
* hashcode of NULL is zero (this will support operators that act like
* IS NOT DISTINCT, though not any more-random behavior). We treat
* the hash support function as strict even if the operator is not.
*
* Note: currently, all hashjoinable operators must be strict since
* the hash index AM assumes that. However, it takes so little extra
* code here to allow non-strict that we may as well do it.
*/
if (isNull)
{
if (hashtable->hashStrict[i] && !keep_nulls)
{
MemoryContextSwitchTo(oldContext);
return false; /* cannot match */
}
/* else, leave hashkey unmodified, equivalent to hashcode 0 */
}
else
{
/* Compute the hash function */
uint32 hkey;
hkey = DatumGetUInt32(FunctionCall1Coll(&hashfunctions[i], hashtable->collations[i], keyval));
hashkey ^= hkey;
}
i++;
}
MemoryContextSwitchTo(oldContext);
*hashvalue = hashkey;
return true;
}
/* /*
* ExecHashGetBucketAndBatch * ExecHashGetBucketAndBatch
@ -2372,7 +2259,8 @@ ExecReScanHash(HashState *node)
* based on available memory. * based on available memory.
*/ */
static void static void
ExecHashBuildSkewHash(HashJoinTable hashtable, Hash *node, int mcvsToUse) ExecHashBuildSkewHash(HashState *hashstate, HashJoinTable hashtable,
Hash *node, int mcvsToUse)
{ {
HeapTupleData *statsTuple; HeapTupleData *statsTuple;
AttStatsSlot sslot; AttStatsSlot sslot;
@ -2400,7 +2288,6 @@ ExecHashBuildSkewHash(HashJoinTable hashtable, Hash *node, int mcvsToUse)
{ {
double frac; double frac;
int nbuckets; int nbuckets;
FmgrInfo *hashfunctions;
int i; int i;
if (mcvsToUse > sslot.nvalues) if (mcvsToUse > sslot.nvalues)
@ -2468,15 +2355,14 @@ ExecHashBuildSkewHash(HashJoinTable hashtable, Hash *node, int mcvsToUse)
* ExecHashRemoveNextSkewBucket) and we want the least common MCVs to * ExecHashRemoveNextSkewBucket) and we want the least common MCVs to
* be removed first. * be removed first.
*/ */
hashfunctions = hashtable->outer_hashfunctions;
for (i = 0; i < mcvsToUse; i++) for (i = 0; i < mcvsToUse; i++)
{ {
uint32 hashvalue; uint32 hashvalue;
int bucket; int bucket;
hashvalue = DatumGetUInt32(FunctionCall1Coll(&hashfunctions[0], hashvalue = DatumGetUInt32(FunctionCall1Coll(hashstate->skew_hashfunction,
hashtable->collations[0], hashstate->skew_collation,
sslot.values[i])); sslot.values[i]));
/* /*

View File

@ -169,6 +169,7 @@
#include "executor/nodeHash.h" #include "executor/nodeHash.h"
#include "executor/nodeHashjoin.h" #include "executor/nodeHashjoin.h"
#include "miscadmin.h" #include "miscadmin.h"
#include "utils/lsyscache.h"
#include "utils/sharedtuplestore.h" #include "utils/sharedtuplestore.h"
#include "utils/wait_event.h" #include "utils/wait_event.h"
@ -331,10 +332,7 @@ ExecHashJoinImpl(PlanState *pstate, bool parallel)
* whoever gets here first will create the hash table and any * whoever gets here first will create the hash table and any
* later arrivals will merely attach to it. * later arrivals will merely attach to it.
*/ */
hashtable = ExecHashTableCreate(hashNode, hashtable = ExecHashTableCreate(hashNode);
node->hj_HashOperators,
node->hj_Collations,
HJ_FILL_INNER(node));
node->hj_HashTable = hashtable; node->hj_HashTable = hashtable;
/* /*
@ -820,9 +818,96 @@ ExecInitHashJoin(HashJoin *node, EState *estate, int eflags)
*/ */
{ {
HashState *hashstate = (HashState *) innerPlanState(hjstate); HashState *hashstate = (HashState *) innerPlanState(hjstate);
Hash *hash = (Hash *) hashstate->ps.plan;
TupleTableSlot *slot = hashstate->ps.ps_ResultTupleSlot; TupleTableSlot *slot = hashstate->ps.ps_ResultTupleSlot;
Oid *outer_hashfuncid;
Oid *inner_hashfuncid;
bool *hash_strict;
ListCell *lc;
int nkeys;
hjstate->hj_HashTupleSlot = slot; hjstate->hj_HashTupleSlot = slot;
/*
* Build ExprStates to obtain hash values for either side of the join.
* This must be done here as ExecBuildHash32Expr needs to know how to
* handle NULL inputs and the required handling of that depends on the
* jointype. We don't know the join type in ExecInitHash() and we
* must build the ExprStates before ExecHashTableCreate() so we
* properly attribute any SubPlans that exist in the hash expressions
* to the correct PlanState.
*/
nkeys = list_length(node->hashoperators);
outer_hashfuncid = palloc_array(Oid, nkeys);
inner_hashfuncid = palloc_array(Oid, nkeys);
hash_strict = palloc_array(bool, nkeys);
/*
* Determine the hash function for each side of the join for the given
* hash operator.
*/
foreach(lc, node->hashoperators)
{
Oid hashop = lfirst_oid(lc);
int i = foreach_current_index(lc);
if (!get_op_hash_functions(hashop,
&outer_hashfuncid[i],
&inner_hashfuncid[i]))
elog(ERROR,
"could not find hash function for hash operator %u",
hashop);
hash_strict[i] = op_strict(hashop);
}
/*
* Build an ExprState to generate the hash value for the expressions
* on the outer of the join. This ExprState must finish generating
* the hash value when HJ_FILL_OUTER() is true. Otherwise,
* ExecBuildHash32Expr will set up the ExprState to abort early if it
* finds a NULL. In these cases, we don't need to store these tuples
* in the hash table as the jointype does not require it.
*/
hjstate->hj_OuterHash =
ExecBuildHash32Expr(hjstate->js.ps.ps_ResultTupleDesc,
hjstate->js.ps.resultops,
outer_hashfuncid,
node->hashcollations,
node->hashkeys,
hash_strict,
&hjstate->js.ps,
0,
HJ_FILL_OUTER(hjstate));
/* As above, but for the inner side of the join */
hashstate->hash_expr =
ExecBuildHash32Expr(hashstate->ps.ps_ResultTupleDesc,
hashstate->ps.resultops,
inner_hashfuncid,
node->hashcollations,
hash->hashkeys,
hash_strict,
&hashstate->ps,
0,
HJ_FILL_INNER(hjstate));
/*
* Set up the skew table hash function while we have a record of the
* first key's hash function Oid.
*/
if (OidIsValid(hash->skewTable))
{
hashstate->skew_hashfunction = palloc0(sizeof(FmgrInfo));
hashstate->skew_collation = linitial_oid(node->hashcollations);
fmgr_info(outer_hashfuncid[0], hashstate->skew_hashfunction);
}
/* no need to keep these */
pfree(outer_hashfuncid);
pfree(inner_hashfuncid);
pfree(hash_strict);
} }
/* /*
@ -846,11 +931,6 @@ ExecInitHashJoin(HashJoin *node, EState *estate, int eflags)
hjstate->hj_CurSkewBucketNo = INVALID_SKEW_BUCKET_NO; hjstate->hj_CurSkewBucketNo = INVALID_SKEW_BUCKET_NO;
hjstate->hj_CurTuple = NULL; hjstate->hj_CurTuple = NULL;
hjstate->hj_OuterHashKeys = ExecInitExprList(node->hashkeys,
(PlanState *) hjstate);
hjstate->hj_HashOperators = node->hashoperators;
hjstate->hj_Collations = node->hashcollations;
hjstate->hj_JoinState = HJ_BUILD_HASHTABLE; hjstate->hj_JoinState = HJ_BUILD_HASHTABLE;
hjstate->hj_MatchedOuter = false; hjstate->hj_MatchedOuter = false;
hjstate->hj_OuterNotEmpty = false; hjstate->hj_OuterNotEmpty = false;
@ -918,17 +998,22 @@ ExecHashJoinOuterGetTuple(PlanState *outerNode,
while (!TupIsNull(slot)) while (!TupIsNull(slot))
{ {
bool isnull;
/* /*
* We have to compute the tuple's hash value. * We have to compute the tuple's hash value.
*/ */
ExprContext *econtext = hjstate->js.ps.ps_ExprContext; ExprContext *econtext = hjstate->js.ps.ps_ExprContext;
econtext->ecxt_outertuple = slot; econtext->ecxt_outertuple = slot;
if (ExecHashGetHashValue(hashtable, econtext,
hjstate->hj_OuterHashKeys, ResetExprContext(econtext);
true, /* outer tuple */
HJ_FILL_OUTER(hjstate), *hashvalue = DatumGetUInt32(ExecEvalExprSwitchContext(hjstate->hj_OuterHash,
hashvalue)) econtext,
&isnull));
if (!isnull)
{ {
/* remember outer relation is not empty for possible rescan */ /* remember outer relation is not empty for possible rescan */
hjstate->hj_OuterNotEmpty = true; hjstate->hj_OuterNotEmpty = true;
@ -989,14 +1074,19 @@ ExecParallelHashJoinOuterGetTuple(PlanState *outerNode,
while (!TupIsNull(slot)) while (!TupIsNull(slot))
{ {
bool isnull;
ExprContext *econtext = hjstate->js.ps.ps_ExprContext; ExprContext *econtext = hjstate->js.ps.ps_ExprContext;
econtext->ecxt_outertuple = slot; econtext->ecxt_outertuple = slot;
if (ExecHashGetHashValue(hashtable, econtext,
hjstate->hj_OuterHashKeys, ResetExprContext(econtext);
true, /* outer tuple */
HJ_FILL_OUTER(hjstate), *hashvalue = DatumGetUInt32(ExecEvalExprSwitchContext(hjstate->hj_OuterHash,
hashvalue)) econtext,
&isnull));
if (!isnull)
return slot; return slot;
/* /*
@ -1518,15 +1608,20 @@ ExecParallelHashJoinPartitionOuter(HashJoinState *hjstate)
/* Execute outer plan, writing all tuples to shared tuplestores. */ /* Execute outer plan, writing all tuples to shared tuplestores. */
for (;;) for (;;)
{ {
bool isnull;
slot = ExecProcNode(outerState); slot = ExecProcNode(outerState);
if (TupIsNull(slot)) if (TupIsNull(slot))
break; break;
econtext->ecxt_outertuple = slot; econtext->ecxt_outertuple = slot;
if (ExecHashGetHashValue(hashtable, econtext,
hjstate->hj_OuterHashKeys, ResetExprContext(econtext);
true, /* outer tuple */
HJ_FILL_OUTER(hjstate), hashvalue = DatumGetUInt32(ExecEvalExprSwitchContext(hjstate->hj_OuterHash,
&hashvalue)) econtext,
&isnull));
if (!isnull)
{ {
int batchno; int batchno;
int bucketno; int bucketno;

View File

@ -1900,6 +1900,210 @@ llvm_compile_expr(ExprState *state)
LLVMBuildBr(b, opblocks[opno + 1]); LLVMBuildBr(b, opblocks[opno + 1]);
break; break;
case EEOP_HASHDATUM_SET_INITVAL:
{
LLVMValueRef v_initvalue;
v_initvalue = l_sizet_const(op->d.hashdatum_initvalue.init_value);
LLVMBuildStore(b, v_initvalue, v_resvaluep);
LLVMBuildStore(b, l_sbool_const(0), v_resnullp);
LLVMBuildBr(b, opblocks[opno + 1]);
break;
}
case EEOP_HASHDATUM_FIRST:
case EEOP_HASHDATUM_FIRST_STRICT:
case EEOP_HASHDATUM_NEXT32:
case EEOP_HASHDATUM_NEXT32_STRICT:
{
FunctionCallInfo fcinfo = op->d.hashdatum.fcinfo_data;
LLVMValueRef v_fcinfo;
LLVMValueRef v_fcinfo_isnull;
LLVMValueRef v_retval;
LLVMBasicBlockRef b_checkargnull;
LLVMBasicBlockRef b_ifnotnull;
LLVMBasicBlockRef b_ifnullblock;
LLVMValueRef v_argisnull;
LLVMValueRef v_prevhash = NULL;
/*
* When performing the next hash and not in strict mode we
* perform a rotation of the previously stored hash value
* before doing the NULL check. We want to do this even
* when we receive a NULL Datum to hash. In strict mode,
* we do this after the NULL check so as not to waste the
* effort of rotating the bits when we're going to throw
* away the hash value and return NULL.
*/
if (opcode == EEOP_HASHDATUM_NEXT32)
{
LLVMValueRef v_tmp1;
LLVMValueRef v_tmp2;
/*
* Fetch the previously hashed value from where the
* EEOP_HASHDATUM_FIRST operation stored it.
*/
v_prevhash = l_load(b, TypeSizeT, v_resvaluep,
"prevhash");
/*
* Rotate bits left by 1 bit. Be careful not to
* overflow uint32 when working with size_t.
*/
v_tmp1 = LLVMBuildShl(b, v_prevhash, l_sizet_const(1),
"");
v_tmp1 = LLVMBuildAnd(b, v_tmp1,
l_sizet_const(0xffffffff), "");
v_tmp2 = LLVMBuildLShr(b, v_prevhash,
l_sizet_const(31), "");
v_prevhash = LLVMBuildOr(b, v_tmp1, v_tmp2,
"rotatedhash");
}
/*
* Block for the actual function call, if args are
* non-NULL.
*/
b_ifnotnull = l_bb_before_v(opblocks[opno + 1],
"b.%d.ifnotnull",
opno);
/* we expect the hash function to have 1 argument */
if (fcinfo->nargs != 1)
elog(ERROR, "incorrect number of function arguments");
v_fcinfo = l_ptr_const(fcinfo,
l_ptr(StructFunctionCallInfoData));
b_checkargnull = l_bb_before_v(b_ifnotnull,
"b.%d.isnull.0", opno);
LLVMBuildBr(b, b_checkargnull);
/*
* Determine what to do if we find the argument to be
* NULL.
*/
if (opcode == EEOP_HASHDATUM_FIRST_STRICT ||
opcode == EEOP_HASHDATUM_NEXT32_STRICT)
{
b_ifnullblock = l_bb_before_v(b_ifnotnull,
"b.%d.strictnull",
opno);
LLVMPositionBuilderAtEnd(b, b_ifnullblock);
/*
* In strict node, NULL inputs result in NULL. Save
* the NULL result and goto jumpdone.
*/
LLVMBuildStore(b, l_sbool_const(1), v_resnullp);
LLVMBuildStore(b, l_sizet_const(0), v_resvaluep);
LLVMBuildBr(b, opblocks[op->d.hashdatum.jumpdone]);
}
else
{
b_ifnullblock = l_bb_before_v(b_ifnotnull,
"b.%d.null",
opno);
LLVMPositionBuilderAtEnd(b, b_ifnullblock);
LLVMBuildStore(b, l_sbool_const(0), v_resnullp);
if (opcode == EEOP_HASHDATUM_NEXT32)
{
Assert(v_prevhash != NULL);
/*
* Save the rotated hash value and skip to the
* next op.
*/
LLVMBuildStore(b, v_prevhash, v_resvaluep);
}
else
{
Assert(opcode == EEOP_HASHDATUM_FIRST);
/*
* Store a zero Datum when the Datum to hash is
* NULL
*/
LLVMBuildStore(b, l_sizet_const(0), v_resvaluep);
}
LLVMBuildBr(b, opblocks[opno + 1]);
}
LLVMPositionBuilderAtEnd(b, b_checkargnull);
/* emit code to check if the input parameter is NULL */
v_argisnull = l_funcnull(b, v_fcinfo, 0);
LLVMBuildCondBr(b,
LLVMBuildICmp(b,
LLVMIntEQ,
v_argisnull,
l_sbool_const(1),
""),
b_ifnullblock,
b_ifnotnull);
LLVMPositionBuilderAtEnd(b, b_ifnotnull);
/*
* Rotate the previously stored hash value when performing
* NEXT32 in strict mode. In non-strict mode we already
* did this before checking for NULLs.
*/
if (opcode == EEOP_HASHDATUM_NEXT32_STRICT)
{
LLVMValueRef v_tmp1;
LLVMValueRef v_tmp2;
/*
* Fetch the previously hashed value from where the
* EEOP_HASHDATUM_FIRST_STRICT operation stored it.
*/
v_prevhash = l_load(b, TypeSizeT, v_resvaluep,
"prevhash");
/*
* Rotate bits left by 1 bit. Be careful not to
* overflow uint32 when working with size_t.
*/
v_tmp1 = LLVMBuildShl(b, v_prevhash, l_sizet_const(1),
"");
v_tmp1 = LLVMBuildAnd(b, v_tmp1,
l_sizet_const(0xffffffff), "");
v_tmp2 = LLVMBuildLShr(b, v_prevhash,
l_sizet_const(31), "");
v_prevhash = LLVMBuildOr(b, v_tmp1, v_tmp2,
"rotatedhash");
}
/* call the hash function */
v_retval = BuildV1Call(context, b, mod, fcinfo,
&v_fcinfo_isnull);
/*
* For NEXT32 ops, XOR (^) the returned hash value with
* the existing hash value.
*/
if (opcode == EEOP_HASHDATUM_NEXT32 ||
opcode == EEOP_HASHDATUM_NEXT32_STRICT)
v_retval = LLVMBuildXor(b, v_prevhash, v_retval,
"xorhash");
LLVMBuildStore(b, v_retval, v_resvaluep);
LLVMBuildStore(b, l_sbool_const(0), v_resnullp);
LLVMBuildBr(b, opblocks[opno + 1]);
break;
}
case EEOP_CONVERT_ROWTYPE: case EEOP_CONVERT_ROWTYPE:
build_EvalXFunc(b, mod, "ExecEvalConvertRowtype", build_EvalXFunc(b, mod, "ExecEvalConvertRowtype",
v_state, op, v_econtext); v_state, op, v_econtext);

View File

@ -235,6 +235,13 @@ typedef enum ExprEvalOp
/* evaluate a single domain CHECK constraint */ /* evaluate a single domain CHECK constraint */
EEOP_DOMAIN_CHECK, EEOP_DOMAIN_CHECK,
/* evaluation steps for hashing */
EEOP_HASHDATUM_SET_INITVAL,
EEOP_HASHDATUM_FIRST,
EEOP_HASHDATUM_FIRST_STRICT,
EEOP_HASHDATUM_NEXT32,
EEOP_HASHDATUM_NEXT32_STRICT,
/* evaluate assorted special-purpose expression types */ /* evaluate assorted special-purpose expression types */
EEOP_CONVERT_ROWTYPE, EEOP_CONVERT_ROWTYPE,
EEOP_SCALARARRAYOP, EEOP_SCALARARRAYOP,
@ -558,6 +565,23 @@ typedef struct ExprEvalStep
ErrorSaveContext *escontext; ErrorSaveContext *escontext;
} domaincheck; } domaincheck;
/* for EEOP_HASH_SET_INITVAL */
struct
{
Datum init_value;
} hashdatum_initvalue;
/* for EEOP_HASHDATUM_(FIRST|NEXT32)[_STRICT] */
struct
{
FmgrInfo *finfo; /* function's lookup data */
FunctionCallInfo fcinfo_data; /* arguments etc */
/* faster to access without additional indirection: */
PGFunction fn_addr; /* actual call address */
int jumpdone; /* jump here on null */
} hashdatum;
/* for EEOP_CONVERT_ROWTYPE */ /* for EEOP_CONVERT_ROWTYPE */
struct struct
{ {

View File

@ -285,6 +285,13 @@ extern ExprState *ExecInitCheck(List *qual, PlanState *parent);
extern List *ExecInitExprList(List *nodes, PlanState *parent); extern List *ExecInitExprList(List *nodes, PlanState *parent);
extern ExprState *ExecBuildAggTrans(AggState *aggstate, struct AggStatePerPhaseData *phase, extern ExprState *ExecBuildAggTrans(AggState *aggstate, struct AggStatePerPhaseData *phase,
bool doSort, bool doHash, bool nullcheck); bool doSort, bool doHash, bool nullcheck);
extern ExprState *ExecBuildHash32Expr(TupleDesc desc,
const TupleTableSlotOps *ops,
const Oid *hashfunc_oids,
const List *collations,
const List *hash_exprs,
const bool *opstrict, PlanState *parent,
uint32 init_value, bool keep_nulls);
extern ExprState *ExecBuildGroupingEqual(TupleDesc ldesc, TupleDesc rdesc, extern ExprState *ExecBuildGroupingEqual(TupleDesc ldesc, TupleDesc rdesc,
const TupleTableSlotOps *lops, const TupleTableSlotOps *rops, const TupleTableSlotOps *lops, const TupleTableSlotOps *rops,
int numCols, int numCols,

View File

@ -313,8 +313,6 @@ typedef struct HashJoinTableData
dsa_pointer_atomic *shared; dsa_pointer_atomic *shared;
} buckets; } buckets;
bool keepNulls; /* true to store unmatchable NULL tuples */
bool skewEnabled; /* are we using skew optimization? */ bool skewEnabled; /* are we using skew optimization? */
HashSkewBucket **skewBucket; /* hashtable of skew buckets */ HashSkewBucket **skewBucket; /* hashtable of skew buckets */
int skewBucketLen; /* size of skewBucket array (a power of 2!) */ int skewBucketLen; /* size of skewBucket array (a power of 2!) */
@ -343,16 +341,6 @@ typedef struct HashJoinTableData
BufFile **innerBatchFile; /* buffered virtual temp file per batch */ BufFile **innerBatchFile; /* buffered virtual temp file per batch */
BufFile **outerBatchFile; /* buffered virtual temp file per batch */ BufFile **outerBatchFile; /* buffered virtual temp file per batch */
/*
* Info about the datatype-specific hash functions for the datatypes being
* hashed. These are arrays of the same length as the number of hash join
* clauses (hash keys).
*/
FmgrInfo *outer_hashfunctions; /* lookup data for hash functions */
FmgrInfo *inner_hashfunctions; /* lookup data for hash functions */
bool *hashStrict; /* is each hash join operator strict? */
Oid *collations;
Size spaceUsed; /* memory space currently used by tuples */ Size spaceUsed; /* memory space currently used by tuples */
Size spaceAllowed; /* upper limit for space used */ Size spaceAllowed; /* upper limit for space used */
Size spacePeak; /* peak space used */ Size spacePeak; /* peak space used */

View File

@ -24,8 +24,7 @@ extern Node *MultiExecHash(HashState *node);
extern void ExecEndHash(HashState *node); extern void ExecEndHash(HashState *node);
extern void ExecReScanHash(HashState *node); extern void ExecReScanHash(HashState *node);
extern HashJoinTable ExecHashTableCreate(HashState *state, List *hashOperators, List *hashCollations, extern HashJoinTable ExecHashTableCreate(HashState *state);
bool keepNulls);
extern void ExecParallelHashTableAlloc(HashJoinTable hashtable, extern void ExecParallelHashTableAlloc(HashJoinTable hashtable,
int batchno); int batchno);
extern void ExecHashTableDestroy(HashJoinTable hashtable); extern void ExecHashTableDestroy(HashJoinTable hashtable);
@ -43,12 +42,6 @@ extern void ExecParallelHashTableInsert(HashJoinTable hashtable,
extern void ExecParallelHashTableInsertCurrentBatch(HashJoinTable hashtable, extern void ExecParallelHashTableInsertCurrentBatch(HashJoinTable hashtable,
TupleTableSlot *slot, TupleTableSlot *slot,
uint32 hashvalue); uint32 hashvalue);
extern bool ExecHashGetHashValue(HashJoinTable hashtable,
ExprContext *econtext,
List *hashkeys,
bool outer_tuple,
bool keep_nulls,
uint32 *hashvalue);
extern void ExecHashGetBucketAndBatch(HashJoinTable hashtable, extern void ExecHashGetBucketAndBatch(HashJoinTable hashtable,
uint32 hashvalue, uint32 hashvalue,
int *bucketno, int *bucketno,

View File

@ -2184,8 +2184,7 @@ typedef struct MergeJoinState
* HashJoinState information * HashJoinState information
* *
* hashclauses original form of the hashjoin condition * hashclauses original form of the hashjoin condition
* hj_OuterHashKeys the outer hash keys in the hashjoin condition * hj_OuterHash ExprState for hashing outer keys
* hj_HashOperators the join operators in the hashjoin condition
* hj_HashTable hash table for the hashjoin * hj_HashTable hash table for the hashjoin
* (NULL if table not built yet) * (NULL if table not built yet)
* hj_CurHashValue hash value for current outer tuple * hj_CurHashValue hash value for current outer tuple
@ -2215,9 +2214,7 @@ typedef struct HashJoinState
{ {
JoinState js; /* its first field is NodeTag */ JoinState js; /* its first field is NodeTag */
ExprState *hashclauses; ExprState *hashclauses;
List *hj_OuterHashKeys; /* list of ExprState nodes */ ExprState *hj_OuterHash;
List *hj_HashOperators; /* list of operator OIDs */
List *hj_Collations;
HashJoinTable hj_HashTable; HashJoinTable hj_HashTable;
uint32 hj_CurHashValue; uint32 hj_CurHashValue;
int hj_CurBucketNo; int hj_CurBucketNo;
@ -2770,7 +2767,10 @@ typedef struct HashState
{ {
PlanState ps; /* its first field is NodeTag */ PlanState ps; /* its first field is NodeTag */
HashJoinTable hashtable; /* hash table for the hashjoin */ HashJoinTable hashtable; /* hash table for the hashjoin */
List *hashkeys; /* list of ExprState nodes */ ExprState *hash_expr; /* ExprState to get hash value */
FmgrInfo *skew_hashfunction; /* lookup data for skew hash function */
Oid skew_collation; /* collation to call skew_hashfunction with */
/* /*
* In a parallelized hash join, the leader retains a pointer to the * In a parallelized hash join, the leader retains a pointer to the