Tweak processing of multiple-index-scan plans to reduce overhead when
handling many-way scans: instead of re-evaluating all prior indexscan quals to see if a tuple has been fetched more than once, use a hash table indexed by tuple CTID. But fall back to the old way if the hash table grows to exceed SortMem.
This commit is contained in:
parent
38e2bf6283
commit
92ee2528d8
@ -8,7 +8,7 @@
|
|||||||
*
|
*
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $Header: /cvsroot/pgsql/src/backend/executor/nodeIndexscan.c,v 1.82 2003/08/04 02:39:59 momjian Exp $
|
* $Header: /cvsroot/pgsql/src/backend/executor/nodeIndexscan.c,v 1.83 2003/08/22 20:26:43 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -28,19 +28,51 @@
|
|||||||
#include "access/heapam.h"
|
#include "access/heapam.h"
|
||||||
#include "executor/execdebug.h"
|
#include "executor/execdebug.h"
|
||||||
#include "executor/nodeIndexscan.h"
|
#include "executor/nodeIndexscan.h"
|
||||||
|
#include "miscadmin.h"
|
||||||
#include "nodes/nodeFuncs.h"
|
#include "nodes/nodeFuncs.h"
|
||||||
#include "optimizer/clauses.h"
|
#include "optimizer/clauses.h"
|
||||||
#include "parser/parsetree.h"
|
#include "parser/parsetree.h"
|
||||||
|
|
||||||
/* ----------------
|
|
||||||
* Misc stuff to move to executor.h soon -cim 6/5/90
|
|
||||||
* ----------------
|
|
||||||
*/
|
|
||||||
#define NO_OP 0
|
#define NO_OP 0
|
||||||
#define LEFT_OP 1
|
#define LEFT_OP 1
|
||||||
#define RIGHT_OP 2
|
#define RIGHT_OP 2
|
||||||
|
|
||||||
|
/*
|
||||||
|
* In a multiple-index plan, we must take care to return any given tuple
|
||||||
|
* only once, even if it matches conditions of several index scans. Our
|
||||||
|
* preferred way to do this is to record already-returned tuples in a hash
|
||||||
|
* table (using the TID as unique identifier). However, in a very large
|
||||||
|
* scan this could conceivably run out of memory. We limit the hash table
|
||||||
|
* to no more than SortMem KB; if it grows past that, we fall back to the
|
||||||
|
* pre-7.4 technique: evaluate the prior-scan index quals again for each
|
||||||
|
* tuple (which is space-efficient, but slow).
|
||||||
|
*
|
||||||
|
* When scanning backwards, we use scannum to determine when to emit the
|
||||||
|
* tuple --- we have to re-emit a tuple in the same scan as it was first
|
||||||
|
* encountered.
|
||||||
|
*
|
||||||
|
* Note: this code would break if the planner were ever to create a multiple
|
||||||
|
* index plan with overall backwards direction, because the hashtable code
|
||||||
|
* will emit a tuple the first time it is encountered (which would be the
|
||||||
|
* highest scan in which it matches the index), but the evaluate-the-quals
|
||||||
|
* code will emit a tuple in the lowest-numbered scan in which it's valid.
|
||||||
|
* This could be fixed at need by making the evaluate-the-quals case more
|
||||||
|
* complex. Currently the planner will never create such a plan (since it
|
||||||
|
* considers multi-index plans unordered anyway), so there's no need for
|
||||||
|
* more complexity.
|
||||||
|
*/
|
||||||
|
typedef struct
|
||||||
|
{
|
||||||
|
/* tid is the hash key and so must be first! */
|
||||||
|
ItemPointerData tid; /* TID of a tuple we've returned */
|
||||||
|
int scannum; /* number of scan we returned it in */
|
||||||
|
} DupHashTabEntry;
|
||||||
|
|
||||||
|
|
||||||
static TupleTableSlot *IndexNext(IndexScanState *node);
|
static TupleTableSlot *IndexNext(IndexScanState *node);
|
||||||
|
static void create_duphash(IndexScanState *node);
|
||||||
|
|
||||||
|
|
||||||
/* ----------------------------------------------------------------
|
/* ----------------------------------------------------------------
|
||||||
* IndexNext
|
* IndexNext
|
||||||
@ -163,7 +195,7 @@ IndexNext(IndexScanState *node)
|
|||||||
while ((tuple = index_getnext(scandesc, direction)) != NULL)
|
while ((tuple = index_getnext(scandesc, direction)) != NULL)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* store the scanned tuple in the scan tuple slot of the scan
|
* Store the scanned tuple in the scan tuple slot of the scan
|
||||||
* state. Note: we pass 'false' because tuples returned by
|
* state. Note: we pass 'false' because tuples returned by
|
||||||
* amgetnext are pointers onto disk pages and must not be
|
* amgetnext are pointers onto disk pages and must not be
|
||||||
* pfree()'d.
|
* pfree()'d.
|
||||||
@ -174,36 +206,80 @@ IndexNext(IndexScanState *node)
|
|||||||
false); /* don't pfree */
|
false); /* don't pfree */
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We must check to see if the current tuple was already
|
* If it's a multiple-index scan, make sure not to double-report
|
||||||
* matched by an earlier index, so we don't double-report it.
|
* a tuple matched by more than one index. (See notes above.)
|
||||||
* We do this by passing the tuple through ExecQual and
|
|
||||||
* checking for failure with all previous qualifications.
|
|
||||||
*/
|
*/
|
||||||
if (node->iss_IndexPtr > 0)
|
if (numIndices > 1)
|
||||||
{
|
{
|
||||||
bool prev_matches = false;
|
/* First try the hash table */
|
||||||
int prev_index;
|
if (node->iss_DupHash)
|
||||||
List *qual;
|
{
|
||||||
|
DupHashTabEntry *entry;
|
||||||
|
bool found;
|
||||||
|
|
||||||
econtext->ecxt_scantuple = slot;
|
entry = (DupHashTabEntry *)
|
||||||
ResetExprContext(econtext);
|
hash_search(node->iss_DupHash,
|
||||||
qual = node->indxqualorig;
|
&tuple->t_data->t_ctid,
|
||||||
for (prev_index = 0;
|
HASH_ENTER,
|
||||||
prev_index < node->iss_IndexPtr;
|
&found);
|
||||||
prev_index++)
|
if (entry == NULL ||
|
||||||
{
|
node->iss_DupHash->hctl->nentries > node->iss_MaxHash)
|
||||||
if (ExecQual((List *) lfirst(qual), econtext, false))
|
|
||||||
{
|
{
|
||||||
prev_matches = true;
|
/* out of memory (either hard or soft limit) */
|
||||||
break;
|
/* release hash table and fall thru to old code */
|
||||||
|
hash_destroy(node->iss_DupHash);
|
||||||
|
node->iss_DupHash = NULL;
|
||||||
|
}
|
||||||
|
else if (found)
|
||||||
|
{
|
||||||
|
/* pre-existing entry */
|
||||||
|
|
||||||
|
/*
|
||||||
|
* It's duplicate if first emitted in a different
|
||||||
|
* scan. If same scan, we must be backing up, so
|
||||||
|
* okay to emit again.
|
||||||
|
*/
|
||||||
|
if (entry->scannum != node->iss_IndexPtr)
|
||||||
|
{
|
||||||
|
/* Dup, so drop it and loop back for another */
|
||||||
|
ExecClearTuple(slot);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/* new entry, finish filling it in */
|
||||||
|
entry->scannum = node->iss_IndexPtr;
|
||||||
}
|
}
|
||||||
qual = lnext(qual);
|
|
||||||
}
|
}
|
||||||
if (prev_matches)
|
/* If hash table has overflowed, do it the hard way */
|
||||||
|
if (node->iss_DupHash == NULL &&
|
||||||
|
node->iss_IndexPtr > 0)
|
||||||
{
|
{
|
||||||
/* Duplicate, so drop it and loop back for another */
|
bool prev_matches = false;
|
||||||
ExecClearTuple(slot);
|
int prev_index;
|
||||||
continue;
|
List *qual;
|
||||||
|
|
||||||
|
econtext->ecxt_scantuple = slot;
|
||||||
|
ResetExprContext(econtext);
|
||||||
|
qual = node->indxqualorig;
|
||||||
|
for (prev_index = 0;
|
||||||
|
prev_index < node->iss_IndexPtr;
|
||||||
|
prev_index++)
|
||||||
|
{
|
||||||
|
if (ExecQual((List *) lfirst(qual), econtext, false))
|
||||||
|
{
|
||||||
|
prev_matches = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
qual = lnext(qual);
|
||||||
|
}
|
||||||
|
if (prev_matches)
|
||||||
|
{
|
||||||
|
/* Dup, so drop it and loop back for another */
|
||||||
|
ExecClearTuple(slot);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -383,6 +459,14 @@ ExecIndexReScan(IndexScanState *node, ExprContext *exprCtxt)
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* reset hash table */
|
||||||
|
if (numIndices > 1)
|
||||||
|
{
|
||||||
|
if (node->iss_DupHash)
|
||||||
|
hash_destroy(node->iss_DupHash);
|
||||||
|
create_duphash(node);
|
||||||
|
}
|
||||||
|
|
||||||
/* reset index scans */
|
/* reset index scans */
|
||||||
if (ScanDirectionIsBackward(((IndexScan *) node->ss.ps.plan)->indxorderdir))
|
if (ScanDirectionIsBackward(((IndexScan *) node->ss.ps.plan)->indxorderdir))
|
||||||
node->iss_IndexPtr = numIndices;
|
node->iss_IndexPtr = numIndices;
|
||||||
@ -432,6 +516,10 @@ ExecEndIndexScan(IndexScanState *node)
|
|||||||
ExecClearTuple(node->ss.ps.ps_ResultTupleSlot);
|
ExecClearTuple(node->ss.ps.ps_ResultTupleSlot);
|
||||||
ExecClearTuple(node->ss.ss_ScanTupleSlot);
|
ExecClearTuple(node->ss.ss_ScanTupleSlot);
|
||||||
|
|
||||||
|
/* drop hash table */
|
||||||
|
if (node->iss_DupHash)
|
||||||
|
hash_destroy(node->iss_DupHash);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* close the index relations
|
* close the index relations
|
||||||
*/
|
*/
|
||||||
@ -507,7 +595,7 @@ ExecIndexRestrPos(IndexScanState *node)
|
|||||||
|
|
||||||
/* ----------------------------------------------------------------
|
/* ----------------------------------------------------------------
|
||||||
* ExecInitIndexScan
|
* ExecInitIndexScan
|
||||||
*
|
*
|
||||||
* Initializes the index scan's state information, creates
|
* Initializes the index scan's state information, creates
|
||||||
* scan keys, and opens the base and index relations.
|
* scan keys, and opens the base and index relations.
|
||||||
*
|
*
|
||||||
@ -919,12 +1007,42 @@ ExecInitIndexScan(IndexScan *node, EState *estate)
|
|||||||
ExecAssignResultTypeFromTL(&indexstate->ss.ps);
|
ExecAssignResultTypeFromTL(&indexstate->ss.ps);
|
||||||
ExecAssignScanProjectionInfo(&indexstate->ss);
|
ExecAssignScanProjectionInfo(&indexstate->ss);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Initialize hash table if needed.
|
||||||
|
*/
|
||||||
|
if (numIndices > 1)
|
||||||
|
create_duphash(indexstate);
|
||||||
|
else
|
||||||
|
indexstate->iss_DupHash = NULL;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* all done.
|
* all done.
|
||||||
*/
|
*/
|
||||||
return indexstate;
|
return indexstate;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
create_duphash(IndexScanState *node)
|
||||||
|
{
|
||||||
|
HASHCTL hash_ctl;
|
||||||
|
|
||||||
|
MemSet(&hash_ctl, 0, sizeof(hash_ctl));
|
||||||
|
hash_ctl.keysize = SizeOfIptrData;
|
||||||
|
hash_ctl.entrysize = sizeof(DupHashTabEntry);
|
||||||
|
hash_ctl.hash = tag_hash;
|
||||||
|
hash_ctl.hcxt = CurrentMemoryContext;
|
||||||
|
node->iss_DupHash = hash_create("DupHashTable",
|
||||||
|
(long) ceil(node->ss.ps.plan->plan_rows),
|
||||||
|
&hash_ctl,
|
||||||
|
HASH_ELEM | HASH_FUNCTION | HASH_CONTEXT);
|
||||||
|
if (node->iss_DupHash == NULL)
|
||||||
|
ereport(ERROR,
|
||||||
|
(errcode(ERRCODE_OUT_OF_MEMORY),
|
||||||
|
errmsg("out of memory")));
|
||||||
|
node->iss_MaxHash = (SortMem * 1024L) /
|
||||||
|
(MAXALIGN(sizeof(HASHELEMENT)) + MAXALIGN(sizeof(DupHashTabEntry)));
|
||||||
|
}
|
||||||
|
|
||||||
int
|
int
|
||||||
ExecCountSlotsIndexScan(IndexScan *node)
|
ExecCountSlotsIndexScan(IndexScan *node)
|
||||||
{
|
{
|
||||||
|
@ -7,7 +7,7 @@
|
|||||||
* Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
|
* Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
|
||||||
* Portions Copyright (c) 1994, Regents of the University of California
|
* Portions Copyright (c) 1994, Regents of the University of California
|
||||||
*
|
*
|
||||||
* $Id: execnodes.h,v 1.104 2003/08/19 01:13:41 tgl Exp $
|
* $Id: execnodes.h,v 1.105 2003/08/22 20:26:43 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -768,6 +768,8 @@ typedef ScanState SeqScanState;
|
|||||||
* RuntimeKeysReady true if runtime Skeys have been computed
|
* RuntimeKeysReady true if runtime Skeys have been computed
|
||||||
* RelationDescs ptr to array of relation descriptors
|
* RelationDescs ptr to array of relation descriptors
|
||||||
* ScanDescs ptr to array of scan descriptors
|
* ScanDescs ptr to array of scan descriptors
|
||||||
|
* DupHash hashtable for recognizing dups in multiple scan
|
||||||
|
* MaxHash max # entries we will allow in hashtable
|
||||||
* ----------------
|
* ----------------
|
||||||
*/
|
*/
|
||||||
typedef struct IndexScanState
|
typedef struct IndexScanState
|
||||||
@ -785,6 +787,8 @@ typedef struct IndexScanState
|
|||||||
bool iss_RuntimeKeysReady;
|
bool iss_RuntimeKeysReady;
|
||||||
RelationPtr iss_RelationDescs;
|
RelationPtr iss_RelationDescs;
|
||||||
IndexScanDescPtr iss_ScanDescs;
|
IndexScanDescPtr iss_ScanDescs;
|
||||||
|
HTAB *iss_DupHash;
|
||||||
|
long iss_MaxHash;
|
||||||
} IndexScanState;
|
} IndexScanState;
|
||||||
|
|
||||||
/* ----------------
|
/* ----------------
|
||||||
|
Loading…
x
Reference in New Issue
Block a user