Improve commentary about run-time partition pruning data structures.
No code changes except for a couple of new Asserts. David Rowley and Tom Lane Discussion: https://postgr.es/m/CAKJS1f-6GODRNgEtdPxCnAPme2h2hTztB6LmtfdmcYAAOE0kQg@mail.gmail.com
This commit is contained in:
parent
e5d11b91e4
commit
4e23236403
@ -1451,7 +1451,8 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel,
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* If we need to build partitioned_rels, accumulate the partitioned
|
* If we need to build partitioned_rels, accumulate the partitioned
|
||||||
* rels for this child.
|
* rels for this child. We must ensure that parents are always listed
|
||||||
|
* before their child partitioned tables.
|
||||||
*/
|
*/
|
||||||
if (build_partitioned_rels)
|
if (build_partitioned_rels)
|
||||||
{
|
{
|
||||||
|
@ -202,12 +202,17 @@ make_partition_pruneinfo(PlannerInfo *root, List *partition_rels,
|
|||||||
int i;
|
int i;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Allocate two arrays to store the 1-based indexes of the 'subpaths' and
|
* Construct two temporary arrays to map from planner relids to subplan
|
||||||
* 'partitioned_rels' by relid.
|
* and sub-partition indexes. For convenience, we use 1-based indexes
|
||||||
|
* here, so that zero can represent an un-filled array entry.
|
||||||
*/
|
*/
|
||||||
relid_subplan_map = palloc0(sizeof(int) * root->simple_rel_array_size);
|
relid_subplan_map = palloc0(sizeof(int) * root->simple_rel_array_size);
|
||||||
relid_subpart_map = palloc0(sizeof(int) * root->simple_rel_array_size);
|
relid_subpart_map = palloc0(sizeof(int) * root->simple_rel_array_size);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* relid_subplan_map maps relid of a leaf partition to the index in
|
||||||
|
* 'subpaths' of the scan plan for that partition.
|
||||||
|
*/
|
||||||
i = 1;
|
i = 1;
|
||||||
foreach(lc, subpaths)
|
foreach(lc, subpaths)
|
||||||
{
|
{
|
||||||
@ -216,17 +221,27 @@ make_partition_pruneinfo(PlannerInfo *root, List *partition_rels,
|
|||||||
|
|
||||||
Assert(IS_SIMPLE_REL(pathrel));
|
Assert(IS_SIMPLE_REL(pathrel));
|
||||||
Assert(pathrel->relid < root->simple_rel_array_size);
|
Assert(pathrel->relid < root->simple_rel_array_size);
|
||||||
|
/* No duplicates please */
|
||||||
|
Assert(relid_subplan_map[pathrel->relid] == 0);
|
||||||
|
|
||||||
relid_subplan_map[pathrel->relid] = i++;
|
relid_subplan_map[pathrel->relid] = i++;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Likewise for the partition_rels */
|
/*
|
||||||
|
* relid_subpart_map maps relid of a non-leaf partition to the index in
|
||||||
|
* 'partition_rels' of that rel (which will also be the index in the
|
||||||
|
* returned PartitionPruneInfo list of the info for that partition).
|
||||||
|
*/
|
||||||
i = 1;
|
i = 1;
|
||||||
foreach(lc, partition_rels)
|
foreach(lc, partition_rels)
|
||||||
{
|
{
|
||||||
Index rti = lfirst_int(lc);
|
Index rti = lfirst_int(lc);
|
||||||
|
|
||||||
Assert(rti < root->simple_rel_array_size);
|
Assert(rti < root->simple_rel_array_size);
|
||||||
|
/* No duplicates please */
|
||||||
|
Assert(relid_subpart_map[rti] == 0);
|
||||||
|
/* Same rel cannot be both leaf and non-leaf */
|
||||||
|
Assert(relid_subplan_map[rti] == 0);
|
||||||
|
|
||||||
relid_subpart_map[rti] = i++;
|
relid_subpart_map[rti] = i++;
|
||||||
}
|
}
|
||||||
@ -287,16 +302,16 @@ make_partition_pruneinfo(PlannerInfo *root, List *partition_rels,
|
|||||||
return NIL;
|
return NIL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Construct the subplan and subpart maps for this partitioning level.
|
||||||
|
* Here we convert to zero-based indexes, with -1 for empty entries.
|
||||||
|
* Also construct a Bitmapset of all partitions that are present (that
|
||||||
|
* is, not pruned already).
|
||||||
|
*/
|
||||||
subplan_map = (int *) palloc(nparts * sizeof(int));
|
subplan_map = (int *) palloc(nparts * sizeof(int));
|
||||||
subpart_map = (int *) palloc(nparts * sizeof(int));
|
subpart_map = (int *) palloc(nparts * sizeof(int));
|
||||||
present_parts = NULL;
|
present_parts = NULL;
|
||||||
|
|
||||||
/*
|
|
||||||
* Loop over each partition of the partitioned rel and record the
|
|
||||||
* subpath index for each. Any partitions which are not present in
|
|
||||||
* the subpaths List will be set to -1, and any sub-partitioned table
|
|
||||||
* which is not present will also be set to -1.
|
|
||||||
*/
|
|
||||||
for (i = 0; i < nparts; i++)
|
for (i = 0; i < nparts; i++)
|
||||||
{
|
{
|
||||||
RelOptInfo *partrel = subpart->part_rels[i];
|
RelOptInfo *partrel = subpart->part_rels[i];
|
||||||
@ -305,12 +320,6 @@ make_partition_pruneinfo(PlannerInfo *root, List *partition_rels,
|
|||||||
|
|
||||||
subplan_map[i] = subplanidx;
|
subplan_map[i] = subplanidx;
|
||||||
subpart_map[i] = subpartidx;
|
subpart_map[i] = subpartidx;
|
||||||
|
|
||||||
/*
|
|
||||||
* Record the indexes of all the partition indexes that we have
|
|
||||||
* subplans or subparts for. This allows an optimization to skip
|
|
||||||
* attempting any run-time pruning when it's irrelevant.
|
|
||||||
*/
|
|
||||||
if (subplanidx >= 0 || subpartidx >= 0)
|
if (subplanidx >= 0 || subpartidx >= 0)
|
||||||
present_parts = bms_add_member(present_parts, i);
|
present_parts = bms_add_member(present_parts, i);
|
||||||
}
|
}
|
||||||
|
@ -113,30 +113,27 @@ typedef struct PartitionTupleRouting
|
|||||||
} PartitionTupleRouting;
|
} PartitionTupleRouting;
|
||||||
|
|
||||||
/*-----------------------
|
/*-----------------------
|
||||||
* PartitionPruningData - Encapsulates all information required to support
|
* PartitionPruningData - Per-partitioned-table data for run-time pruning
|
||||||
* elimination of partitions in plan types which support arbitrary Lists of
|
* of partitions. For a multilevel partitioned table, we have one of these
|
||||||
* subplans. Information stored here allows the partition pruning functions
|
* for the topmost partition plus one for each non-leaf child partition,
|
||||||
* to be called and the return value of partition indexes translated into the
|
* ordered such that parents appear before their children.
|
||||||
* subpath indexes of plan types such as Append, thus allowing us to bypass a
|
|
||||||
* subplan when we can prove that no tuple matching the 'pruning_steps' will
|
|
||||||
* be found within.
|
|
||||||
*
|
*
|
||||||
* subplan_map An array containing the subplan index which
|
* subplan_map[] and subpart_map[] have the same definitions as in
|
||||||
* matches this partition index, or -1 if the
|
* PartitionPruneInfo (see plannodes.h); though note that here,
|
||||||
* subplan has been pruned already.
|
* subpart_map contains indexes into PartitionPruneState.partprunedata[].
|
||||||
* subpart_map An array containing the index into the
|
*
|
||||||
* partprunedata array in PartitionPruneState, or
|
* subplan_map Subplan index by partition index, or -1.
|
||||||
* -1 if there is no such element in that array.
|
* subpart_map Subpart index by partition index, or -1.
|
||||||
* present_parts A Bitmapset of the partition indexes that we
|
* present_parts A Bitmapset of the partition indexes that we
|
||||||
* have subplans mapped for.
|
* have subplans or subparts for.
|
||||||
* context Contains the context details required to call
|
* context Contains the context details required to call
|
||||||
* the partition pruning code.
|
* the partition pruning code.
|
||||||
* pruning_steps List of PartitionPruneSteps used to
|
* pruning_steps List of PartitionPruneSteps used to
|
||||||
* perform the actual pruning.
|
* perform the actual pruning.
|
||||||
* do_initial_prune true if pruning should be performed during
|
* do_initial_prune true if pruning should be performed during
|
||||||
* executor startup.
|
* executor startup (for this partitioning level).
|
||||||
* do_exec_prune true if pruning should be performed during
|
* do_exec_prune true if pruning should be performed during
|
||||||
* executor run.
|
* executor run (for this partitioning level).
|
||||||
*-----------------------
|
*-----------------------
|
||||||
*/
|
*/
|
||||||
typedef struct PartitionPruningData
|
typedef struct PartitionPruningData
|
||||||
@ -152,16 +149,18 @@ typedef struct PartitionPruningData
|
|||||||
|
|
||||||
/*-----------------------
|
/*-----------------------
|
||||||
* PartitionPruneState - State object required for plan nodes to perform
|
* PartitionPruneState - State object required for plan nodes to perform
|
||||||
* partition pruning elimination of their subplans. This encapsulates a
|
* run-time partition pruning.
|
||||||
* flattened hierarchy of PartitionPruningData structs.
|
*
|
||||||
* This struct can be attached to plan types which support arbitrary Lists of
|
* This struct can be attached to plan types which support arbitrary Lists of
|
||||||
* subplans containing partitions to allow subplans to be eliminated due to
|
* subplans containing partitions to allow subplans to be eliminated due to
|
||||||
* the clauses being unable to match to any tuple that the subplan could
|
* the clauses being unable to match to any tuple that the subplan could
|
||||||
* possibly produce.
|
* possibly produce. Note that we currently support only one partitioned
|
||||||
|
* table per parent plan node, hence partprunedata[] need describe only one
|
||||||
|
* partitioning hierarchy.
|
||||||
*
|
*
|
||||||
* partprunedata Array of PartitionPruningData for the plan's target
|
* partprunedata Array of PartitionPruningData for the plan's
|
||||||
* partitioned relation. First element contains the
|
* partitioned relation, ordered such that parent tables
|
||||||
* details for the target partitioned table.
|
* appear before children (hence, topmost table is first).
|
||||||
* num_partprunedata Number of items in 'partprunedata' array.
|
* num_partprunedata Number of items in 'partprunedata' array.
|
||||||
* do_initial_prune true if pruning should be performed during executor
|
* do_initial_prune true if pruning should be performed during executor
|
||||||
* startup (at any hierarchy level).
|
* startup (at any hierarchy level).
|
||||||
|
@ -1059,18 +1059,35 @@ typedef struct PlanRowMark
|
|||||||
* plan types which support arbitrary numbers of subplans, such as Append.
|
* plan types which support arbitrary numbers of subplans, such as Append.
|
||||||
* We also store various details to tell the executor when it should be
|
* We also store various details to tell the executor when it should be
|
||||||
* performing partition pruning.
|
* performing partition pruning.
|
||||||
|
*
|
||||||
|
* Each PartitionPruneInfo describes the partitioning rules for a single
|
||||||
|
* partitioned table (a/k/a level of partitioning). For a multilevel
|
||||||
|
* partitioned table, we have a List of PartitionPruneInfos, where the
|
||||||
|
* first entry represents the topmost partitioned table and additional
|
||||||
|
* entries represent non-leaf child partitions, ordered such that parents
|
||||||
|
* appear before their children.
|
||||||
|
*
|
||||||
|
* subplan_map[] and subpart_map[] are indexed by partition index (where
|
||||||
|
* zero is the topmost partition, and non-leaf partitions must come before
|
||||||
|
* their children). For a leaf partition p, subplan_map[p] contains the
|
||||||
|
* zero-based index of the partition's subplan in the parent plan's subplan
|
||||||
|
* list; it is -1 if the partition is non-leaf or has been pruned. For a
|
||||||
|
* non-leaf partition p, subpart_map[p] contains the zero-based index of
|
||||||
|
* that sub-partition's PartitionPruneInfo in the plan's PartitionPruneInfo
|
||||||
|
* list; it is -1 if the partition is a leaf or has been pruned. All these
|
||||||
|
* indexes are global across the whole partitioned table and Append plan node.
|
||||||
*/
|
*/
|
||||||
typedef struct PartitionPruneInfo
|
typedef struct PartitionPruneInfo
|
||||||
{
|
{
|
||||||
NodeTag type;
|
NodeTag type;
|
||||||
Oid reloid; /* Oid of partition rel */
|
Oid reloid; /* OID of partition rel for this level */
|
||||||
List *pruning_steps; /* List of PartitionPruneStep, see below */
|
List *pruning_steps; /* List of PartitionPruneStep, see below */
|
||||||
Bitmapset *present_parts; /* Indexes of all partitions which subplans
|
Bitmapset *present_parts; /* Indexes of all partitions which subplans or
|
||||||
* are present for. */
|
* subparts are present for. */
|
||||||
int nparts; /* Length of subplan_map[] and subpart_map[] */
|
int nparts; /* Length of subplan_map[] and subpart_map[] */
|
||||||
int nexprs; /* Length of hasexecparam[] */
|
int nexprs; /* Length of hasexecparam[] */
|
||||||
int *subplan_map; /* subplan index by partition id, or -1 */
|
int *subplan_map; /* subplan index by partition index, or -1 */
|
||||||
int *subpart_map; /* subpart index by partition id, or -1 */
|
int *subpart_map; /* subpart index by partition index, or -1 */
|
||||||
bool *hasexecparam; /* true if corresponding pruning_step contains
|
bool *hasexecparam; /* true if corresponding pruning_step contains
|
||||||
* any PARAM_EXEC Params. */
|
* any PARAM_EXEC Params. */
|
||||||
bool do_initial_prune; /* true if pruning should be performed
|
bool do_initial_prune; /* true if pruning should be performed
|
||||||
|
@ -552,8 +552,8 @@ typedef struct PartitionSchemeData *PartitionScheme;
|
|||||||
* part_rels - RelOptInfos for each partition
|
* part_rels - RelOptInfos for each partition
|
||||||
* partexprs, nullable_partexprs - Partition key expressions
|
* partexprs, nullable_partexprs - Partition key expressions
|
||||||
* partitioned_child_rels - RT indexes of unpruned partitions of
|
* partitioned_child_rels - RT indexes of unpruned partitions of
|
||||||
* relation that are partitioned tables
|
* this relation that are partitioned tables
|
||||||
* themselves
|
* themselves, in hierarchical order
|
||||||
*
|
*
|
||||||
* Note: A base relation always has only one set of partition keys, but a join
|
* Note: A base relation always has only one set of partition keys, but a join
|
||||||
* relation may have as many sets of partition keys as the number of relations
|
* relation may have as many sets of partition keys as the number of relations
|
||||||
|
Loading…
x
Reference in New Issue
Block a user