
When the previously-chosen plan was non-partial, all pa_finished flags for partial plans are now set, and pa_next_plan has not yet been set to INVALID_SUBPLAN_INDEX, the previous code could go into an infinite loop. Report by Rajkumar Raghuwanshi. Patch by Amit Khandekar and me. Review by Kyotaro Horiguchi. Discussion: http://postgr.es/m/CAJ3gD9cf43z78qY=U=H0HvOEN341qfRO-vLpnKPSviHeWgJQ5w@mail.gmail.com
534 lines
15 KiB
C
534 lines
15 KiB
C
/*-------------------------------------------------------------------------
|
|
*
|
|
* nodeAppend.c
|
|
* routines to handle append nodes.
|
|
*
|
|
* Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
|
|
* Portions Copyright (c) 1994, Regents of the University of California
|
|
*
|
|
*
|
|
* IDENTIFICATION
|
|
* src/backend/executor/nodeAppend.c
|
|
*
|
|
*-------------------------------------------------------------------------
|
|
*/
|
|
/* INTERFACE ROUTINES
|
|
* ExecInitAppend - initialize the append node
|
|
* ExecAppend - retrieve the next tuple from the node
|
|
* ExecEndAppend - shut down the append node
|
|
* ExecReScanAppend - rescan the append node
|
|
*
|
|
* NOTES
|
|
* Each append node contains a list of one or more subplans which
|
|
* must be iteratively processed (forwards or backwards).
|
|
* Tuples are retrieved by executing the 'whichplan'th subplan
|
|
* until the subplan stops returning tuples, at which point that
|
|
* plan is shut down and the next started up.
|
|
*
|
|
* Append nodes don't make use of their left and right
|
|
* subtrees, rather they maintain a list of subplans so
|
|
* a typical append node looks like this in the plan tree:
|
|
*
|
|
* ...
|
|
* /
|
|
* Append -------+------+------+--- nil
|
|
* / \ | | |
|
|
* nil nil ... ... ...
|
|
* subplans
|
|
*
|
|
* Append nodes are currently used for unions, and to support
|
|
* inheritance queries, where several relations need to be scanned.
|
|
* For example, in our standard person/student/employee/student-emp
|
|
* example, where student and employee inherit from person
|
|
* and student-emp inherits from student and employee, the
|
|
* query:
|
|
*
|
|
* select name from person
|
|
*
|
|
* generates the plan:
|
|
*
|
|
* |
|
|
* Append -------+-------+--------+--------+
|
|
* / \ | | | |
|
|
* nil nil Scan Scan Scan Scan
|
|
* | | | |
|
|
* person employee student student-emp
|
|
*/
|
|
|
|
#include "postgres.h"
|
|
|
|
#include "executor/execdebug.h"
|
|
#include "executor/nodeAppend.h"
|
|
#include "miscadmin.h"
|
|
|
|
/* Shared state for parallel-aware Append. */
|
|
struct ParallelAppendState
|
|
{
|
|
LWLock pa_lock; /* mutual exclusion to choose next subplan */
|
|
int pa_next_plan; /* next plan to choose by any worker */
|
|
|
|
/*
|
|
* pa_finished[i] should be true if no more workers should select subplan
|
|
* i. for a non-partial plan, this should be set to true as soon as a
|
|
* worker selects the plan; for a partial plan, it remains false until
|
|
* some worker executes the plan to completion.
|
|
*/
|
|
bool pa_finished[FLEXIBLE_ARRAY_MEMBER];
|
|
};
|
|
|
|
#define INVALID_SUBPLAN_INDEX -1
|
|
|
|
static TupleTableSlot *ExecAppend(PlanState *pstate);
|
|
static bool choose_next_subplan_locally(AppendState *node);
|
|
static bool choose_next_subplan_for_leader(AppendState *node);
|
|
static bool choose_next_subplan_for_worker(AppendState *node);
|
|
|
|
/* ----------------------------------------------------------------
|
|
* ExecInitAppend
|
|
*
|
|
* Begin all of the subscans of the append node.
|
|
*
|
|
* (This is potentially wasteful, since the entire result of the
|
|
* append node may not be scanned, but this way all of the
|
|
* structures get allocated in the executor's top level memory
|
|
* block instead of that of the call to ExecAppend.)
|
|
* ----------------------------------------------------------------
|
|
*/
|
|
AppendState *
|
|
ExecInitAppend(Append *node, EState *estate, int eflags)
|
|
{
|
|
AppendState *appendstate = makeNode(AppendState);
|
|
PlanState **appendplanstates;
|
|
int nplans;
|
|
int i;
|
|
ListCell *lc;
|
|
|
|
/* check for unsupported flags */
|
|
Assert(!(eflags & EXEC_FLAG_MARK));
|
|
|
|
/*
|
|
* Lock the non-leaf tables in the partition tree controlled by this node.
|
|
* It's a no-op for non-partitioned parent tables.
|
|
*/
|
|
ExecLockNonLeafAppendTables(node->partitioned_rels, estate);
|
|
|
|
/*
|
|
* Set up empty vector of subplan states
|
|
*/
|
|
nplans = list_length(node->appendplans);
|
|
|
|
appendplanstates = (PlanState **) palloc0(nplans * sizeof(PlanState *));
|
|
|
|
/*
|
|
* create new AppendState for our append node
|
|
*/
|
|
appendstate->ps.plan = (Plan *) node;
|
|
appendstate->ps.state = estate;
|
|
appendstate->ps.ExecProcNode = ExecAppend;
|
|
appendstate->appendplans = appendplanstates;
|
|
appendstate->as_nplans = nplans;
|
|
|
|
/*
|
|
* Miscellaneous initialization
|
|
*
|
|
* Append plans don't have expression contexts because they never call
|
|
* ExecQual or ExecProject.
|
|
*/
|
|
|
|
/*
|
|
* append nodes still have Result slots, which hold pointers to tuples, so
|
|
* we have to initialize them.
|
|
*/
|
|
ExecInitResultTupleSlot(estate, &appendstate->ps);
|
|
|
|
/*
|
|
* call ExecInitNode on each of the plans to be executed and save the
|
|
* results into the array "appendplans".
|
|
*/
|
|
i = 0;
|
|
foreach(lc, node->appendplans)
|
|
{
|
|
Plan *initNode = (Plan *) lfirst(lc);
|
|
|
|
appendplanstates[i] = ExecInitNode(initNode, estate, eflags);
|
|
i++;
|
|
}
|
|
|
|
/*
|
|
* initialize output tuple type
|
|
*/
|
|
ExecAssignResultTypeFromTL(&appendstate->ps);
|
|
appendstate->ps.ps_ProjInfo = NULL;
|
|
|
|
/*
|
|
* Parallel-aware append plans must choose the first subplan to execute by
|
|
* looking at shared memory, but non-parallel-aware append plans can
|
|
* always start with the first subplan.
|
|
*/
|
|
appendstate->as_whichplan =
|
|
appendstate->ps.plan->parallel_aware ? INVALID_SUBPLAN_INDEX : 0;
|
|
|
|
/* If parallel-aware, this will be overridden later. */
|
|
appendstate->choose_next_subplan = choose_next_subplan_locally;
|
|
|
|
return appendstate;
|
|
}
|
|
|
|
/* ----------------------------------------------------------------
|
|
* ExecAppend
|
|
*
|
|
* Handles iteration over multiple subplans.
|
|
* ----------------------------------------------------------------
|
|
*/
|
|
static TupleTableSlot *
|
|
ExecAppend(PlanState *pstate)
|
|
{
|
|
AppendState *node = castNode(AppendState, pstate);
|
|
|
|
/* If no subplan has been chosen, we must choose one before proceeding. */
|
|
if (node->as_whichplan == INVALID_SUBPLAN_INDEX &&
|
|
!node->choose_next_subplan(node))
|
|
return ExecClearTuple(node->ps.ps_ResultTupleSlot);
|
|
|
|
for (;;)
|
|
{
|
|
PlanState *subnode;
|
|
TupleTableSlot *result;
|
|
|
|
CHECK_FOR_INTERRUPTS();
|
|
|
|
/*
|
|
* figure out which subplan we are currently processing
|
|
*/
|
|
Assert(node->as_whichplan >= 0 && node->as_whichplan < node->as_nplans);
|
|
subnode = node->appendplans[node->as_whichplan];
|
|
|
|
/*
|
|
* get a tuple from the subplan
|
|
*/
|
|
result = ExecProcNode(subnode);
|
|
|
|
if (!TupIsNull(result))
|
|
{
|
|
/*
|
|
* If the subplan gave us something then return it as-is. We do
|
|
* NOT make use of the result slot that was set up in
|
|
* ExecInitAppend; there's no need for it.
|
|
*/
|
|
return result;
|
|
}
|
|
|
|
/* choose new subplan; if none, we're done */
|
|
if (!node->choose_next_subplan(node))
|
|
return ExecClearTuple(node->ps.ps_ResultTupleSlot);
|
|
}
|
|
}
|
|
|
|
/* ----------------------------------------------------------------
|
|
* ExecEndAppend
|
|
*
|
|
* Shuts down the subscans of the append node.
|
|
*
|
|
* Returns nothing of interest.
|
|
* ----------------------------------------------------------------
|
|
*/
|
|
void
|
|
ExecEndAppend(AppendState *node)
|
|
{
|
|
PlanState **appendplans;
|
|
int nplans;
|
|
int i;
|
|
|
|
/*
|
|
* get information from the node
|
|
*/
|
|
appendplans = node->appendplans;
|
|
nplans = node->as_nplans;
|
|
|
|
/*
|
|
* shut down each of the subscans
|
|
*/
|
|
for (i = 0; i < nplans; i++)
|
|
ExecEndNode(appendplans[i]);
|
|
}
|
|
|
|
void
|
|
ExecReScanAppend(AppendState *node)
|
|
{
|
|
int i;
|
|
|
|
for (i = 0; i < node->as_nplans; i++)
|
|
{
|
|
PlanState *subnode = node->appendplans[i];
|
|
|
|
/*
|
|
* ExecReScan doesn't know about my subplans, so I have to do
|
|
* changed-parameter signaling myself.
|
|
*/
|
|
if (node->ps.chgParam != NULL)
|
|
UpdateChangedParamSet(subnode, node->ps.chgParam);
|
|
|
|
/*
|
|
* If chgParam of subnode is not null then plan will be re-scanned by
|
|
* first ExecProcNode.
|
|
*/
|
|
if (subnode->chgParam == NULL)
|
|
ExecReScan(subnode);
|
|
}
|
|
|
|
node->as_whichplan =
|
|
node->ps.plan->parallel_aware ? INVALID_SUBPLAN_INDEX : 0;
|
|
}
|
|
|
|
/* ----------------------------------------------------------------
|
|
* Parallel Append Support
|
|
* ----------------------------------------------------------------
|
|
*/
|
|
|
|
/* ----------------------------------------------------------------
|
|
* ExecAppendEstimate
|
|
*
|
|
* Compute the amount of space we'll need in the parallel
|
|
* query DSM, and inform pcxt->estimator about our needs.
|
|
* ----------------------------------------------------------------
|
|
*/
|
|
void
|
|
ExecAppendEstimate(AppendState *node,
|
|
ParallelContext *pcxt)
|
|
{
|
|
node->pstate_len =
|
|
add_size(offsetof(ParallelAppendState, pa_finished),
|
|
sizeof(bool) * node->as_nplans);
|
|
|
|
shm_toc_estimate_chunk(&pcxt->estimator, node->pstate_len);
|
|
shm_toc_estimate_keys(&pcxt->estimator, 1);
|
|
}
|
|
|
|
|
|
/* ----------------------------------------------------------------
|
|
* ExecAppendInitializeDSM
|
|
*
|
|
* Set up shared state for Parallel Append.
|
|
* ----------------------------------------------------------------
|
|
*/
|
|
void
|
|
ExecAppendInitializeDSM(AppendState *node,
|
|
ParallelContext *pcxt)
|
|
{
|
|
ParallelAppendState *pstate;
|
|
|
|
pstate = shm_toc_allocate(pcxt->toc, node->pstate_len);
|
|
memset(pstate, 0, node->pstate_len);
|
|
LWLockInitialize(&pstate->pa_lock, LWTRANCHE_PARALLEL_APPEND);
|
|
shm_toc_insert(pcxt->toc, node->ps.plan->plan_node_id, pstate);
|
|
|
|
node->as_pstate = pstate;
|
|
node->choose_next_subplan = choose_next_subplan_for_leader;
|
|
}
|
|
|
|
/* ----------------------------------------------------------------
|
|
* ExecAppendReInitializeDSM
|
|
*
|
|
* Reset shared state before beginning a fresh scan.
|
|
* ----------------------------------------------------------------
|
|
*/
|
|
void
|
|
ExecAppendReInitializeDSM(AppendState *node, ParallelContext *pcxt)
|
|
{
|
|
ParallelAppendState *pstate = node->as_pstate;
|
|
|
|
pstate->pa_next_plan = 0;
|
|
memset(pstate->pa_finished, 0, sizeof(bool) * node->as_nplans);
|
|
}
|
|
|
|
/* ----------------------------------------------------------------
|
|
* ExecAppendInitializeWorker
|
|
*
|
|
* Copy relevant information from TOC into planstate, and initialize
|
|
* whatever is required to choose and execute the optimal subplan.
|
|
* ----------------------------------------------------------------
|
|
*/
|
|
void
|
|
ExecAppendInitializeWorker(AppendState *node, ParallelWorkerContext *pwcxt)
|
|
{
|
|
node->as_pstate = shm_toc_lookup(pwcxt->toc, node->ps.plan->plan_node_id, false);
|
|
node->choose_next_subplan = choose_next_subplan_for_worker;
|
|
}
|
|
|
|
/* ----------------------------------------------------------------
|
|
* choose_next_subplan_locally
|
|
*
|
|
* Choose next subplan for a non-parallel-aware Append,
|
|
* returning false if there are no more.
|
|
* ----------------------------------------------------------------
|
|
*/
|
|
static bool
|
|
choose_next_subplan_locally(AppendState *node)
|
|
{
|
|
int whichplan = node->as_whichplan;
|
|
|
|
/* We should never see INVALID_SUBPLAN_INDEX in this case. */
|
|
Assert(whichplan >= 0 && whichplan <= node->as_nplans);
|
|
|
|
if (ScanDirectionIsForward(node->ps.state->es_direction))
|
|
{
|
|
if (whichplan >= node->as_nplans - 1)
|
|
return false;
|
|
node->as_whichplan++;
|
|
}
|
|
else
|
|
{
|
|
if (whichplan <= 0)
|
|
return false;
|
|
node->as_whichplan--;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
/* ----------------------------------------------------------------
|
|
* choose_next_subplan_for_leader
|
|
*
|
|
* Try to pick a plan which doesn't commit us to doing much
|
|
* work locally, so that as much work as possible is done in
|
|
* the workers. Cheapest subplans are at the end.
|
|
* ----------------------------------------------------------------
|
|
*/
|
|
static bool
|
|
choose_next_subplan_for_leader(AppendState *node)
|
|
{
|
|
ParallelAppendState *pstate = node->as_pstate;
|
|
Append *append = (Append *) node->ps.plan;
|
|
|
|
/* Backward scan is not supported by parallel-aware plans */
|
|
Assert(ScanDirectionIsForward(node->ps.state->es_direction));
|
|
|
|
LWLockAcquire(&pstate->pa_lock, LW_EXCLUSIVE);
|
|
|
|
if (node->as_whichplan != INVALID_SUBPLAN_INDEX)
|
|
{
|
|
/* Mark just-completed subplan as finished. */
|
|
node->as_pstate->pa_finished[node->as_whichplan] = true;
|
|
}
|
|
else
|
|
{
|
|
/* Start with last subplan. */
|
|
node->as_whichplan = node->as_nplans - 1;
|
|
}
|
|
|
|
/* Loop until we find a subplan to execute. */
|
|
while (pstate->pa_finished[node->as_whichplan])
|
|
{
|
|
if (node->as_whichplan == 0)
|
|
{
|
|
pstate->pa_next_plan = INVALID_SUBPLAN_INDEX;
|
|
node->as_whichplan = INVALID_SUBPLAN_INDEX;
|
|
LWLockRelease(&pstate->pa_lock);
|
|
return false;
|
|
}
|
|
node->as_whichplan--;
|
|
}
|
|
|
|
/* If non-partial, immediately mark as finished. */
|
|
if (node->as_whichplan < append->first_partial_plan)
|
|
node->as_pstate->pa_finished[node->as_whichplan] = true;
|
|
|
|
LWLockRelease(&pstate->pa_lock);
|
|
|
|
return true;
|
|
}
|
|
|
|
/* ----------------------------------------------------------------
|
|
* choose_next_subplan_for_worker
|
|
*
|
|
* Choose next subplan for a parallel-aware Append, returning
|
|
* false if there are no more.
|
|
*
|
|
* We start from the first plan and advance through the list;
|
|
* when we get back to the end, we loop back to the first
|
|
* partial plan. This assigns the non-partial plans first in
|
|
* order of descending cost and then spreads out the workers
|
|
* as evenly as possible across the remaining partial plans.
|
|
* ----------------------------------------------------------------
|
|
*/
|
|
static bool
|
|
choose_next_subplan_for_worker(AppendState *node)
|
|
{
|
|
ParallelAppendState *pstate = node->as_pstate;
|
|
Append *append = (Append *) node->ps.plan;
|
|
|
|
/* Backward scan is not supported by parallel-aware plans */
|
|
Assert(ScanDirectionIsForward(node->ps.state->es_direction));
|
|
|
|
LWLockAcquire(&pstate->pa_lock, LW_EXCLUSIVE);
|
|
|
|
/* Mark just-completed subplan as finished. */
|
|
if (node->as_whichplan != INVALID_SUBPLAN_INDEX)
|
|
node->as_pstate->pa_finished[node->as_whichplan] = true;
|
|
|
|
/* If all the plans are already done, we have nothing to do */
|
|
if (pstate->pa_next_plan == INVALID_SUBPLAN_INDEX)
|
|
{
|
|
LWLockRelease(&pstate->pa_lock);
|
|
return false;
|
|
}
|
|
|
|
/* Save the plan from which we are starting the search. */
|
|
node->as_whichplan = pstate->pa_next_plan;
|
|
|
|
/* Loop until we find a subplan to execute. */
|
|
while (pstate->pa_finished[pstate->pa_next_plan])
|
|
{
|
|
if (pstate->pa_next_plan < node->as_nplans - 1)
|
|
{
|
|
/* Advance to next plan. */
|
|
pstate->pa_next_plan++;
|
|
}
|
|
else if (node->as_whichplan > append->first_partial_plan)
|
|
{
|
|
/* Loop back to first partial plan. */
|
|
pstate->pa_next_plan = append->first_partial_plan;
|
|
}
|
|
else
|
|
{
|
|
/*
|
|
* At last plan, and either there are no partial plans or we've
|
|
* tried them all. Arrange to bail out.
|
|
*/
|
|
pstate->pa_next_plan = node->as_whichplan;
|
|
}
|
|
|
|
if (pstate->pa_next_plan == node->as_whichplan)
|
|
{
|
|
/* We've tried everything! */
|
|
pstate->pa_next_plan = INVALID_SUBPLAN_INDEX;
|
|
LWLockRelease(&pstate->pa_lock);
|
|
return false;
|
|
}
|
|
}
|
|
|
|
/* Pick the plan we found, and advance pa_next_plan one more time. */
|
|
node->as_whichplan = pstate->pa_next_plan++;
|
|
if (pstate->pa_next_plan >= node->as_nplans)
|
|
{
|
|
if (append->first_partial_plan < node->as_nplans)
|
|
pstate->pa_next_plan = append->first_partial_plan;
|
|
else
|
|
{
|
|
/*
|
|
* We have only non-partial plans, and we already chose the last
|
|
* one; so arrange for the other workers to immediately bail out.
|
|
*/
|
|
pstate->pa_next_plan = INVALID_SUBPLAN_INDEX;
|
|
}
|
|
}
|
|
|
|
/* If non-partial, immediately mark as finished. */
|
|
if (node->as_whichplan < append->first_partial_plan)
|
|
node->as_pstate->pa_finished[node->as_whichplan] = true;
|
|
|
|
LWLockRelease(&pstate->pa_lock);
|
|
|
|
return true;
|
|
}
|