mirror of https://github.com/postgres/postgres
Teach tuplestore.c to throw away data before the "mark" point when the caller
is using mark/restore but not rewind or backward-scan capability. Insert a materialize plan node between a mergejoin and its inner child if the inner child is a sort that is expected to spill to disk. The materialize shields the sort from the need to do mark/restore and thereby allows it to perform its final merge pass on-the-fly; while the materialize itself is normally cheap since it won't spill to disk unless the number of tuples with equal key values exceeds work_mem. Greg Stark, with some kibitzing from Tom Lane.
This commit is contained in:
parent
3963574d13
commit
2415ad9831
|
@ -8,7 +8,7 @@
|
|||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/executor/nodeMaterial.c,v 1.58 2007/01/05 22:19:28 momjian Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/executor/nodeMaterial.c,v 1.59 2007/05/21 17:57:33 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
|
@ -56,10 +56,10 @@ ExecMaterial(MaterialState *node)
|
|||
/*
|
||||
* If first time through, and we need a tuplestore, initialize it.
|
||||
*/
|
||||
if (tuplestorestate == NULL && node->randomAccess)
|
||||
if (tuplestorestate == NULL && node->eflags != 0)
|
||||
{
|
||||
tuplestorestate = tuplestore_begin_heap(true, false, work_mem);
|
||||
|
||||
tuplestore_set_eflags(tuplestorestate, node->eflags);
|
||||
node->tuplestorestate = (void *) tuplestorestate;
|
||||
}
|
||||
|
||||
|
@ -162,14 +162,14 @@ ExecInitMaterial(Material *node, EState *estate, int eflags)
|
|||
matstate->ss.ps.state = estate;
|
||||
|
||||
/*
|
||||
* We must have random access to the subplan output to do backward scan or
|
||||
* mark/restore. We also prefer to materialize the subplan output if we
|
||||
* might be called on to rewind and replay it many times. However, if none
|
||||
* of these cases apply, we can skip storing the data.
|
||||
* We must have a tuplestore buffering the subplan output to do backward
|
||||
* scan or mark/restore. We also prefer to materialize the subplan output
|
||||
* if we might be called on to rewind and replay it many times. However,
|
||||
* if none of these cases apply, we can skip storing the data.
|
||||
*/
|
||||
matstate->randomAccess = (eflags & (EXEC_FLAG_REWIND |
|
||||
EXEC_FLAG_BACKWARD |
|
||||
EXEC_FLAG_MARK)) != 0;
|
||||
matstate->eflags = (eflags & (EXEC_FLAG_REWIND |
|
||||
EXEC_FLAG_BACKWARD |
|
||||
EXEC_FLAG_MARK));
|
||||
|
||||
matstate->eof_underlying = false;
|
||||
matstate->tuplestorestate = NULL;
|
||||
|
@ -255,7 +255,7 @@ ExecEndMaterial(MaterialState *node)
|
|||
void
|
||||
ExecMaterialMarkPos(MaterialState *node)
|
||||
{
|
||||
Assert(node->randomAccess);
|
||||
Assert(node->eflags & EXEC_FLAG_MARK);
|
||||
|
||||
/*
|
||||
* if we haven't materialized yet, just return.
|
||||
|
@ -275,7 +275,7 @@ ExecMaterialMarkPos(MaterialState *node)
|
|||
void
|
||||
ExecMaterialRestrPos(MaterialState *node)
|
||||
{
|
||||
Assert(node->randomAccess);
|
||||
Assert(node->eflags & EXEC_FLAG_MARK);
|
||||
|
||||
/*
|
||||
* if we haven't materialized yet, just return.
|
||||
|
@ -300,7 +300,7 @@ ExecMaterialReScan(MaterialState *node, ExprContext *exprCtxt)
|
|||
{
|
||||
ExecClearTuple(node->ss.ps.ps_ResultTupleSlot);
|
||||
|
||||
if (node->randomAccess)
|
||||
if (node->eflags != 0)
|
||||
{
|
||||
/*
|
||||
* If we haven't materialized yet, just return. If outerplan' chgParam
|
||||
|
@ -312,15 +312,21 @@ ExecMaterialReScan(MaterialState *node, ExprContext *exprCtxt)
|
|||
|
||||
/*
|
||||
* If subnode is to be rescanned then we forget previous stored
|
||||
* results; we have to re-read the subplan and re-store.
|
||||
* results; we have to re-read the subplan and re-store. Also,
|
||||
* if we told tuplestore it needn't support rescan, we lose and
|
||||
* must re-read. (This last should not happen in common cases;
|
||||
* else our caller lied by not passing EXEC_FLAG_REWIND to us.)
|
||||
*
|
||||
* Otherwise we can just rewind and rescan the stored output. The
|
||||
* state of the subnode does not change.
|
||||
*/
|
||||
if (((PlanState *) node)->lefttree->chgParam != NULL)
|
||||
if (((PlanState *) node)->lefttree->chgParam != NULL ||
|
||||
(node->eflags & EXEC_FLAG_REWIND) == 0)
|
||||
{
|
||||
tuplestore_end((Tuplestorestate *) node->tuplestorestate);
|
||||
node->tuplestorestate = NULL;
|
||||
if (((PlanState *) node)->lefttree->chgParam == NULL)
|
||||
ExecReScan(((PlanState *) node)->lefttree, exprCtxt);
|
||||
node->eof_underlying = false;
|
||||
}
|
||||
else
|
||||
|
|
|
@ -8,7 +8,7 @@
|
|||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/executor/nodeMergejoin.c,v 1.87 2007/02/02 00:07:03 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/executor/nodeMergejoin.c,v 1.88 2007/05/21 17:57:33 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
|
@ -706,6 +706,9 @@ ExecMergeJoin(MergeJoinState *node)
|
|||
}
|
||||
else
|
||||
{
|
||||
/* Mark before advancing, if wanted */
|
||||
if (node->mj_ExtraMarks)
|
||||
ExecMarkPos(innerPlan);
|
||||
/* Stay in same state to fetch next inner tuple */
|
||||
if (doFillInner)
|
||||
{
|
||||
|
@ -830,6 +833,9 @@ ExecMergeJoin(MergeJoinState *node)
|
|||
* now we get the next inner tuple, if any. If there's none,
|
||||
* advance to next outer tuple (which may be able to join to
|
||||
* previously marked tuples).
|
||||
*
|
||||
* NB: must NOT do "extraMarks" here, since we may need to
|
||||
* return to previously marked tuples.
|
||||
*/
|
||||
innerTupleSlot = ExecProcNode(innerPlan);
|
||||
node->mj_InnerTupleSlot = innerTupleSlot;
|
||||
|
@ -1140,6 +1146,9 @@ ExecMergeJoin(MergeJoinState *node)
|
|||
break;
|
||||
|
||||
/*
|
||||
* SKIPOUTER_ADVANCE: advance over an outer tuple that is
|
||||
* known not to join to any inner tuple.
|
||||
*
|
||||
* Before advancing, we check to see if we must emit an
|
||||
* outer-join fill tuple for this outer tuple.
|
||||
*/
|
||||
|
@ -1204,6 +1213,9 @@ ExecMergeJoin(MergeJoinState *node)
|
|||
break;
|
||||
|
||||
/*
|
||||
* SKIPINNER_ADVANCE: advance over an inner tuple that is
|
||||
* known not to join to any outer tuple.
|
||||
*
|
||||
* Before advancing, we check to see if we must emit an
|
||||
* outer-join fill tuple for this inner tuple.
|
||||
*/
|
||||
|
@ -1225,6 +1237,10 @@ ExecMergeJoin(MergeJoinState *node)
|
|||
return result;
|
||||
}
|
||||
|
||||
/* Mark before advancing, if wanted */
|
||||
if (node->mj_ExtraMarks)
|
||||
ExecMarkPos(innerPlan);
|
||||
|
||||
/*
|
||||
* now we get the next inner tuple, if any
|
||||
*/
|
||||
|
@ -1295,6 +1311,10 @@ ExecMergeJoin(MergeJoinState *node)
|
|||
return result;
|
||||
}
|
||||
|
||||
/* Mark before advancing, if wanted */
|
||||
if (node->mj_ExtraMarks)
|
||||
ExecMarkPos(innerPlan);
|
||||
|
||||
/*
|
||||
* now we get the next inner tuple, if any
|
||||
*/
|
||||
|
@ -1425,6 +1445,22 @@ ExecInitMergeJoin(MergeJoin *node, EState *estate, int eflags)
|
|||
innerPlanState(mergestate) = ExecInitNode(innerPlan(node), estate,
|
||||
eflags | EXEC_FLAG_MARK);
|
||||
|
||||
/*
|
||||
* For certain types of inner child nodes, it is advantageous to issue
|
||||
* MARK every time we advance past an inner tuple we will never return
|
||||
* to. For other types, MARK on a tuple we cannot return to is a waste
|
||||
* of cycles. Detect which case applies and set mj_ExtraMarks if we
|
||||
* want to issue "unnecessary" MARK calls.
|
||||
*
|
||||
* Currently, only Material wants the extra MARKs, and it will be helpful
|
||||
* only if eflags doesn't specify REWIND.
|
||||
*/
|
||||
if (IsA(innerPlan(node), Material) &&
|
||||
(eflags & EXEC_FLAG_REWIND) == 0)
|
||||
mergestate->mj_ExtraMarks = true;
|
||||
else
|
||||
mergestate->mj_ExtraMarks = false;
|
||||
|
||||
#define MERGEJOIN_NSLOTS 4
|
||||
|
||||
/*
|
||||
|
|
|
@ -54,7 +54,7 @@
|
|||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.182 2007/05/04 01:13:44 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.183 2007/05/21 17:57:33 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
|
@ -1038,6 +1038,23 @@ cost_sort(Path *path, PlannerInfo *root,
|
|||
path->total_cost = startup_cost + run_cost;
|
||||
}
|
||||
|
||||
/*
|
||||
* sort_exceeds_work_mem
|
||||
* Given a finished Sort plan node, detect whether it is expected to
|
||||
* spill to disk (ie, will need more than work_mem workspace)
|
||||
*
|
||||
* This assumes there will be no available LIMIT.
|
||||
*/
|
||||
bool
|
||||
sort_exceeds_work_mem(Sort *sort)
|
||||
{
|
||||
double input_bytes = relation_byte_size(sort->plan.plan_rows,
|
||||
sort->plan.plan_width);
|
||||
long work_mem_bytes = work_mem * 1024L;
|
||||
|
||||
return (input_bytes > work_mem_bytes);
|
||||
}
|
||||
|
||||
/*
|
||||
* cost_material
|
||||
* Determines and returns the cost of materializing a relation, including
|
||||
|
|
|
@ -10,7 +10,7 @@
|
|||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.230 2007/05/04 01:13:44 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.231 2007/05/21 17:57:34 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
|
@ -1600,6 +1600,30 @@ create_mergejoin_plan(PlannerInfo *root,
|
|||
else
|
||||
innerpathkeys = best_path->jpath.innerjoinpath->pathkeys;
|
||||
|
||||
/*
|
||||
* If inner plan is a sort that is expected to spill to disk, add a
|
||||
* materialize node to shield it from the need to handle mark/restore.
|
||||
* This will allow it to perform the last merge pass on-the-fly, while
|
||||
* in most cases not requiring the materialize to spill to disk.
|
||||
*
|
||||
* XXX really, Sort oughta do this for itself, probably, to avoid the
|
||||
* overhead of a separate plan node.
|
||||
*/
|
||||
if (IsA(inner_plan, Sort) &&
|
||||
sort_exceeds_work_mem((Sort *) inner_plan))
|
||||
{
|
||||
Plan *matplan = (Plan *) make_material(inner_plan);
|
||||
|
||||
/*
|
||||
* We assume the materialize will not spill to disk, and therefore
|
||||
* charge just cpu_tuple_cost per tuple.
|
||||
*/
|
||||
copy_plan_costsize(matplan, inner_plan);
|
||||
matplan->total_cost += cpu_tuple_cost * matplan->plan_rows;
|
||||
|
||||
inner_plan = matplan;
|
||||
}
|
||||
|
||||
/*
|
||||
* Compute the opfamily/strategy/nullsfirst arrays needed by the executor.
|
||||
* The information is in the pathkeys for the two inputs, but we need to
|
||||
|
|
|
@ -20,10 +20,12 @@
|
|||
* maxKBytes, we dump all the tuples into a temp file and then read from that
|
||||
* when needed.
|
||||
*
|
||||
* When the caller requests random access to the data, we write the temp file
|
||||
* When the caller requests backward-scan capability, we write the temp file
|
||||
* in a format that allows either forward or backward scan. Otherwise, only
|
||||
* forward scan is allowed. But rewind and markpos/restorepos are allowed
|
||||
* in any case.
|
||||
* forward scan is allowed. Rewind and markpos/restorepos are normally allowed
|
||||
* but can be turned off via tuplestore_set_eflags; turning off both backward
|
||||
* scan and rewind enables truncation of the tuplestore at the mark point
|
||||
* (if any) for minimal memory usage.
|
||||
*
|
||||
* Because we allow reading before writing is complete, there are two
|
||||
* interesting positions in the temp file: the current read position and
|
||||
|
@ -36,7 +38,7 @@
|
|||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/utils/sort/tuplestore.c,v 1.30 2007/01/05 22:19:47 momjian Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/utils/sort/tuplestore.c,v 1.31 2007/05/21 17:57:34 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
|
@ -44,6 +46,7 @@
|
|||
#include "postgres.h"
|
||||
|
||||
#include "access/heapam.h"
|
||||
#include "executor/executor.h"
|
||||
#include "storage/buffile.h"
|
||||
#include "utils/memutils.h"
|
||||
#include "utils/tuplestore.h"
|
||||
|
@ -66,7 +69,7 @@ typedef enum
|
|||
struct Tuplestorestate
|
||||
{
|
||||
TupStoreStatus status; /* enumerated value as shown above */
|
||||
bool randomAccess; /* did caller request random access? */
|
||||
int eflags; /* capability flags */
|
||||
bool interXact; /* keep open through transactions? */
|
||||
long availMem; /* remaining memory available, in bytes */
|
||||
BufFile *myfile; /* underlying file, or NULL if none */
|
||||
|
@ -157,11 +160,11 @@ struct Tuplestorestate
|
|||
* may or may not match the in-memory representation of the tuple ---
|
||||
* any conversion needed is the job of the writetup and readtup routines.
|
||||
*
|
||||
* If state->randomAccess is true, then the stored representation of the
|
||||
* tuple must be followed by another "unsigned int" that is a copy of the
|
||||
* If state->eflags & EXEC_FLAG_BACKWARD, then the stored representation of
|
||||
* the tuple must be followed by another "unsigned int" that is a copy of the
|
||||
* length --- so the total tape space used is actually sizeof(unsigned int)
|
||||
* more than the stored length value. This allows read-backwards. When
|
||||
* randomAccess is not true, the write/read routines may omit the extra
|
||||
* EXEC_FLAG_BACKWARD is not set, the write/read routines may omit the extra
|
||||
* length word.
|
||||
*
|
||||
* writetup is expected to write both length words as well as the tuple
|
||||
|
@ -192,11 +195,12 @@ struct Tuplestorestate
|
|||
*/
|
||||
|
||||
|
||||
static Tuplestorestate *tuplestore_begin_common(bool randomAccess,
|
||||
static Tuplestorestate *tuplestore_begin_common(int eflags,
|
||||
bool interXact,
|
||||
int maxKBytes);
|
||||
static void tuplestore_puttuple_common(Tuplestorestate *state, void *tuple);
|
||||
static void dumptuples(Tuplestorestate *state);
|
||||
static void tuplestore_trim(Tuplestorestate *state, int ntuples);
|
||||
static unsigned int getlen(Tuplestorestate *state, bool eofOK);
|
||||
static void *copytup_heap(Tuplestorestate *state, void *tup);
|
||||
static void writetup_heap(Tuplestorestate *state, void *tup);
|
||||
|
@ -209,14 +213,14 @@ static void *readtup_heap(Tuplestorestate *state, unsigned int len);
|
|||
* Initialize for a tuple store operation.
|
||||
*/
|
||||
static Tuplestorestate *
|
||||
tuplestore_begin_common(bool randomAccess, bool interXact, int maxKBytes)
|
||||
tuplestore_begin_common(int eflags, bool interXact, int maxKBytes)
|
||||
{
|
||||
Tuplestorestate *state;
|
||||
|
||||
state = (Tuplestorestate *) palloc0(sizeof(Tuplestorestate));
|
||||
|
||||
state->status = TSS_INMEM;
|
||||
state->randomAccess = randomAccess;
|
||||
state->eflags = eflags;
|
||||
state->interXact = interXact;
|
||||
state->availMem = maxKBytes * 1024L;
|
||||
state->myfile = NULL;
|
||||
|
@ -255,9 +259,18 @@ tuplestore_begin_common(bool randomAccess, bool interXact, int maxKBytes)
|
|||
Tuplestorestate *
|
||||
tuplestore_begin_heap(bool randomAccess, bool interXact, int maxKBytes)
|
||||
{
|
||||
Tuplestorestate *state = tuplestore_begin_common(randomAccess,
|
||||
interXact,
|
||||
maxKBytes);
|
||||
Tuplestorestate *state;
|
||||
int eflags;
|
||||
|
||||
/*
|
||||
* This interpretation of the meaning of randomAccess is compatible
|
||||
* with the pre-8.3 behavior of tuplestores.
|
||||
*/
|
||||
eflags = randomAccess ?
|
||||
(EXEC_FLAG_BACKWARD | EXEC_FLAG_REWIND | EXEC_FLAG_MARK) :
|
||||
(EXEC_FLAG_REWIND | EXEC_FLAG_MARK);
|
||||
|
||||
state = tuplestore_begin_common(eflags, interXact, maxKBytes);
|
||||
|
||||
state->copytup = copytup_heap;
|
||||
state->writetup = writetup_heap;
|
||||
|
@ -266,6 +279,30 @@ tuplestore_begin_heap(bool randomAccess, bool interXact, int maxKBytes)
|
|||
return state;
|
||||
}
|
||||
|
||||
/*
|
||||
* tuplestore_set_eflags
|
||||
*
|
||||
* Set capability flags at a finer grain than is allowed by
|
||||
* tuplestore_begin_xxx. This must be called before inserting any data
|
||||
* into the tuplestore.
|
||||
*
|
||||
* eflags is a bitmask following the meanings used for executor node
|
||||
* startup flags (see executor.h). tuplestore pays attention to these bits:
|
||||
* EXEC_FLAG_REWIND need rewind to start
|
||||
* EXEC_FLAG_BACKWARD need backward fetch
|
||||
* EXEC_FLAG_MARK need mark/restore
|
||||
* If tuplestore_set_eflags is not called, REWIND and MARK are allowed,
|
||||
* and BACKWARD is set per "randomAccess" in the tuplestore_begin_xxx call.
|
||||
*/
|
||||
void
|
||||
tuplestore_set_eflags(Tuplestorestate *state, int eflags)
|
||||
{
|
||||
Assert(state->status == TSS_INMEM);
|
||||
Assert(state->memtupcount == 0);
|
||||
|
||||
state->eflags = eflags;
|
||||
}
|
||||
|
||||
/*
|
||||
* tuplestore_end
|
||||
*
|
||||
|
@ -420,6 +457,9 @@ tuplestore_puttuple_common(Tuplestorestate *state, void *tuple)
|
|||
* Fetch the next tuple in either forward or back direction.
|
||||
* Returns NULL if no more tuples. If should_free is set, the
|
||||
* caller must pfree the returned tuple when done with it.
|
||||
*
|
||||
* Backward scan is only allowed if randomAccess was set true or
|
||||
* EXEC_FLAG_BACKWARD was specified to tuplestore_set_eflags().
|
||||
*/
|
||||
static void *
|
||||
tuplestore_gettuple(Tuplestorestate *state, bool forward,
|
||||
|
@ -428,7 +468,7 @@ tuplestore_gettuple(Tuplestorestate *state, bool forward,
|
|||
unsigned int tuplen;
|
||||
void *tup;
|
||||
|
||||
Assert(forward || state->randomAccess);
|
||||
Assert(forward || (state->eflags & EXEC_FLAG_BACKWARD));
|
||||
|
||||
switch (state->status)
|
||||
{
|
||||
|
@ -643,6 +683,8 @@ dumptuples(Tuplestorestate *state)
|
|||
void
|
||||
tuplestore_rescan(Tuplestorestate *state)
|
||||
{
|
||||
Assert(state->eflags & EXEC_FLAG_REWIND);
|
||||
|
||||
switch (state->status)
|
||||
{
|
||||
case TSS_INMEM:
|
||||
|
@ -671,10 +713,26 @@ tuplestore_rescan(Tuplestorestate *state)
|
|||
void
|
||||
tuplestore_markpos(Tuplestorestate *state)
|
||||
{
|
||||
Assert(state->eflags & EXEC_FLAG_MARK);
|
||||
|
||||
switch (state->status)
|
||||
{
|
||||
case TSS_INMEM:
|
||||
state->markpos_current = state->current;
|
||||
/*
|
||||
* We can truncate the tuplestore if neither backward scan nor
|
||||
* rewind capability are required by the caller. There will
|
||||
* never be a need to back up past the mark point.
|
||||
*
|
||||
* Note: you might think we could remove all the tuples before
|
||||
* "current", since that one is the next to be returned. However,
|
||||
* since tuplestore_gettuple returns a direct pointer to our
|
||||
* internal copy of the tuple, it's likely that the caller has
|
||||
* still got the tuple just before "current" referenced in a slot.
|
||||
* Don't free it yet.
|
||||
*/
|
||||
if (!(state->eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_REWIND)))
|
||||
tuplestore_trim(state, 1);
|
||||
break;
|
||||
case TSS_WRITEFILE:
|
||||
if (state->eof_reached)
|
||||
|
@ -708,6 +766,8 @@ tuplestore_markpos(Tuplestorestate *state)
|
|||
void
|
||||
tuplestore_restorepos(Tuplestorestate *state)
|
||||
{
|
||||
Assert(state->eflags & EXEC_FLAG_MARK);
|
||||
|
||||
switch (state->status)
|
||||
{
|
||||
case TSS_INMEM:
|
||||
|
@ -733,6 +793,55 @@ tuplestore_restorepos(Tuplestorestate *state)
|
|||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* tuplestore_trim - remove all but ntuples tuples before current
|
||||
*/
|
||||
static void
|
||||
tuplestore_trim(Tuplestorestate *state, int ntuples)
|
||||
{
|
||||
int nremove;
|
||||
int i;
|
||||
|
||||
/*
|
||||
* We don't bother trimming temp files since it usually would mean more
|
||||
* work than just letting them sit in kernel buffers until they age out.
|
||||
*/
|
||||
if (state->status != TSS_INMEM)
|
||||
return;
|
||||
|
||||
nremove = state->current - ntuples;
|
||||
if (nremove <= 0)
|
||||
return; /* nothing to do */
|
||||
Assert(nremove <= state->memtupcount);
|
||||
|
||||
/* Release no-longer-needed tuples */
|
||||
for (i = 0; i < nremove; i++)
|
||||
{
|
||||
FREEMEM(state, GetMemoryChunkSpace(state->memtuples[i]));
|
||||
pfree(state->memtuples[i]);
|
||||
}
|
||||
|
||||
/*
|
||||
* Slide the array down and readjust pointers. This may look pretty
|
||||
* stupid, but we expect that there will usually not be very many
|
||||
* tuple-pointers to move, so this isn't that expensive; and it keeps
|
||||
* a lot of other logic simple.
|
||||
*
|
||||
* In fact, in the current usage for merge joins, it's demonstrable that
|
||||
* there will always be exactly one non-removed tuple; so optimize that
|
||||
* case.
|
||||
*/
|
||||
if (nremove + 1 == state->memtupcount)
|
||||
state->memtuples[0] = state->memtuples[nremove];
|
||||
else
|
||||
memmove(state->memtuples, state->memtuples + nremove,
|
||||
(state->memtupcount - nremove) * sizeof(void *));
|
||||
|
||||
state->memtupcount -= nremove;
|
||||
state->current -= nremove;
|
||||
state->markpos_current -= nremove;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Tape interface routines
|
||||
|
@ -783,7 +892,7 @@ writetup_heap(Tuplestorestate *state, void *tup)
|
|||
|
||||
if (BufFileWrite(state->myfile, (void *) tuple, tuplen) != (size_t) tuplen)
|
||||
elog(ERROR, "write failed");
|
||||
if (state->randomAccess) /* need trailing length word? */
|
||||
if (state->eflags & EXEC_FLAG_BACKWARD) /* need trailing length word? */
|
||||
if (BufFileWrite(state->myfile, (void *) &tuplen,
|
||||
sizeof(tuplen)) != sizeof(tuplen))
|
||||
elog(ERROR, "write failed");
|
||||
|
@ -804,7 +913,7 @@ readtup_heap(Tuplestorestate *state, unsigned int len)
|
|||
if (BufFileRead(state->myfile, (void *) ((char *) tuple + sizeof(int)),
|
||||
len - sizeof(int)) != (size_t) (len - sizeof(int)))
|
||||
elog(ERROR, "unexpected end of data");
|
||||
if (state->randomAccess) /* need trailing length word? */
|
||||
if (state->eflags & EXEC_FLAG_BACKWARD) /* need trailing length word? */
|
||||
if (BufFileRead(state->myfile, (void *) &tuplen,
|
||||
sizeof(tuplen)) != sizeof(tuplen))
|
||||
elog(ERROR, "unexpected end of data");
|
||||
|
|
|
@ -7,7 +7,7 @@
|
|||
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* $PostgreSQL: pgsql/src/include/nodes/execnodes.h,v 1.174 2007/05/17 19:35:08 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/include/nodes/execnodes.h,v 1.175 2007/05/21 17:57:34 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
|
@ -1180,6 +1180,7 @@ typedef struct NestLoopState
|
|||
* NumClauses number of mergejoinable join clauses
|
||||
* Clauses info for each mergejoinable clause
|
||||
* JoinState current "state" of join. see execdefs.h
|
||||
* ExtraMarks true to issue extra Mark operations on inner scan
|
||||
* FillOuter true if should emit unjoined outer tuples anyway
|
||||
* FillInner true if should emit unjoined inner tuples anyway
|
||||
* MatchedOuter true if found a join match for current outer tuple
|
||||
|
@ -1202,6 +1203,7 @@ typedef struct MergeJoinState
|
|||
int mj_NumClauses;
|
||||
MergeJoinClause mj_Clauses; /* array of length mj_NumClauses */
|
||||
int mj_JoinState;
|
||||
bool mj_ExtraMarks;
|
||||
bool mj_FillOuter;
|
||||
bool mj_FillInner;
|
||||
bool mj_MatchedOuter;
|
||||
|
@ -1281,7 +1283,7 @@ typedef struct HashJoinState
|
|||
typedef struct MaterialState
|
||||
{
|
||||
ScanState ss; /* its first field is NodeTag */
|
||||
bool randomAccess; /* need random access to subplan output? */
|
||||
int eflags; /* capability flags to pass to tuplestore */
|
||||
bool eof_underlying; /* reached end of underlying plan? */
|
||||
void *tuplestorestate; /* private state of tuplestore.c */
|
||||
} MaterialState;
|
||||
|
|
|
@ -7,7 +7,7 @@
|
|||
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* $PostgreSQL: pgsql/src/include/optimizer/cost.h,v 1.86 2007/05/04 01:13:45 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/include/optimizer/cost.h,v 1.87 2007/05/21 17:57:34 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
|
@ -75,6 +75,7 @@ extern void cost_valuesscan(Path *path, PlannerInfo *root,
|
|||
extern void cost_sort(Path *path, PlannerInfo *root,
|
||||
List *pathkeys, Cost input_cost, double tuples, int width,
|
||||
double limit_tuples);
|
||||
extern bool sort_exceeds_work_mem(Sort *sort);
|
||||
extern void cost_material(Path *path,
|
||||
Cost input_cost, double tuples, int width);
|
||||
extern void cost_agg(Path *path, PlannerInfo *root,
|
||||
|
|
|
@ -22,7 +22,7 @@
|
|||
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* $PostgreSQL: pgsql/src/include/utils/tuplestore.h,v 1.20 2007/01/05 22:20:00 momjian Exp $
|
||||
* $PostgreSQL: pgsql/src/include/utils/tuplestore.h,v 1.21 2007/05/21 17:57:35 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
|
@ -46,6 +46,8 @@ extern Tuplestorestate *tuplestore_begin_heap(bool randomAccess,
|
|||
bool interXact,
|
||||
int maxKBytes);
|
||||
|
||||
extern void tuplestore_set_eflags(Tuplestorestate *state, int eflags);
|
||||
|
||||
extern void tuplestore_puttupleslot(Tuplestorestate *state,
|
||||
TupleTableSlot *slot);
|
||||
extern void tuplestore_puttuple(Tuplestorestate *state, HeapTuple tuple);
|
||||
|
@ -53,7 +55,6 @@ extern void tuplestore_puttuple(Tuplestorestate *state, HeapTuple tuple);
|
|||
/* tuplestore_donestoring() used to be required, but is no longer used */
|
||||
#define tuplestore_donestoring(state) ((void) 0)
|
||||
|
||||
/* backwards scan is only allowed if randomAccess was specified 'true' */
|
||||
extern bool tuplestore_gettupleslot(Tuplestorestate *state, bool forward,
|
||||
TupleTableSlot *slot);
|
||||
extern bool tuplestore_advance(Tuplestorestate *state, bool forward);
|
||||
|
|
Loading…
Reference in New Issue