diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c index 4f2f38168d..be5439dd9d 100644 --- a/src/backend/access/heap/vacuumlazy.c +++ b/src/backend/access/heap/vacuumlazy.c @@ -471,7 +471,6 @@ heap_vacuum_rel(Relation onerel, VacuumParams *params, params->freeze_table_age, params->multixact_freeze_min_age, params->multixact_freeze_table_age, - true, /* we must be a top-level command */ &OldestXmin, &FreezeLimit, &xidFullScanLimit, &MultiXactCutoff, &mxactFullScanLimit); diff --git a/src/backend/commands/cluster.c b/src/backend/commands/cluster.c index 0d647e912c..04d12a7ece 100644 --- a/src/backend/commands/cluster.c +++ b/src/backend/commands/cluster.c @@ -67,13 +67,10 @@ typedef struct } RelToCluster; -static void rebuild_relation(Relation OldHeap, Oid indexOid, - bool isTopLevel, bool verbose); +static void rebuild_relation(Relation OldHeap, Oid indexOid, bool verbose); static void copy_table_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, - bool isTopLevel, bool verbose, - bool *pSwapToastByContent, - TransactionId *pFreezeXid, - MultiXactId *pCutoffMulti); + bool verbose, bool *pSwapToastByContent, + TransactionId *pFreezeXid, MultiXactId *pCutoffMulti); static List *get_tables_to_cluster(MemoryContext cluster_context); @@ -173,7 +170,7 @@ cluster(ClusterStmt *stmt, bool isTopLevel) table_close(rel, NoLock); /* Do the job. */ - cluster_rel(tableOid, indexOid, stmt->options, isTopLevel); + cluster_rel(tableOid, indexOid, stmt->options); } else { @@ -222,8 +219,7 @@ cluster(ClusterStmt *stmt, bool isTopLevel) PushActiveSnapshot(GetTransactionSnapshot()); /* Do the job. */ cluster_rel(rvtc->tableOid, rvtc->indexOid, - stmt->options | CLUOPT_RECHECK, - isTopLevel); + stmt->options | CLUOPT_RECHECK); PopActiveSnapshot(); CommitTransactionCommand(); } @@ -254,7 +250,7 @@ cluster(ClusterStmt *stmt, bool isTopLevel) * and error messages should refer to the operation as VACUUM not CLUSTER. */ void -cluster_rel(Oid tableOid, Oid indexOid, int options, bool isTopLevel) +cluster_rel(Oid tableOid, Oid indexOid, int options) { Relation OldHeap; bool verbose = ((options & CLUOPT_VERBOSE) != 0); @@ -404,7 +400,7 @@ cluster_rel(Oid tableOid, Oid indexOid, int options, bool isTopLevel) TransferPredicateLocksToHeapRelation(OldHeap); /* rebuild_relation does all the dirty work */ - rebuild_relation(OldHeap, indexOid, isTopLevel, verbose); + rebuild_relation(OldHeap, indexOid, verbose); /* NB: rebuild_relation does table_close() on OldHeap */ @@ -549,12 +545,11 @@ mark_index_clustered(Relation rel, Oid indexOid, bool is_internal) * * OldHeap: table to rebuild --- must be opened and exclusive-locked! * indexOid: index to cluster by, or InvalidOid to rewrite in physical order. - * isTopLevel: should be passed down from ProcessUtility. * * NB: this routine closes OldHeap at the right time; caller should not. */ static void -rebuild_relation(Relation OldHeap, Oid indexOid, bool isTopLevel, bool verbose) +rebuild_relation(Relation OldHeap, Oid indexOid, bool verbose) { Oid tableOid = RelationGetRelid(OldHeap); Oid tableSpace = OldHeap->rd_rel->reltablespace; @@ -582,7 +577,7 @@ rebuild_relation(Relation OldHeap, Oid indexOid, bool isTopLevel, bool verbose) AccessExclusiveLock); /* Copy the heap data into the new table in the desired order */ - copy_table_data(OIDNewHeap, tableOid, indexOid, isTopLevel, verbose, + copy_table_data(OIDNewHeap, tableOid, indexOid, verbose, &swap_toast_by_content, &frozenXid, &cutoffMulti); /* @@ -733,8 +728,7 @@ make_new_heap(Oid OIDOldHeap, Oid NewTableSpace, char relpersistence, * *pCutoffMulti receives the MultiXactId used as a cutoff point. */ static void -copy_table_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, - bool isTopLevel, bool verbose, +copy_table_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose, bool *pSwapToastByContent, TransactionId *pFreezeXid, MultiXactId *pCutoffMulti) { @@ -832,7 +826,7 @@ copy_table_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, * Since we're going to rewrite the whole table anyway, there's no reason * not to be aggressive about this. */ - vacuum_set_xid_limits(OldHeap, 0, 0, 0, 0, isTopLevel, + vacuum_set_xid_limits(OldHeap, 0, 0, 0, 0, &OldestXmin, &FreezeXid, NULL, &MultiXactCutoff, NULL); diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c index ddeec870d8..1b6717f727 100644 --- a/src/backend/commands/vacuum.c +++ b/src/backend/commands/vacuum.c @@ -907,8 +907,7 @@ get_all_vacuum_rels(int options) /* * vacuum_set_xid_limits() -- compute oldestXmin and freeze cutoff points * - * Input parameters are the target relation, applicable freeze age settings, - * and isTopLevel which should be passed down from ProcessUtility. + * Input parameters are the target relation, applicable freeze age settings. * * The output parameters are: * - oldestXmin is the cutoff value used to distinguish whether tuples are @@ -934,7 +933,6 @@ vacuum_set_xid_limits(Relation rel, int freeze_table_age, int multixact_freeze_min_age, int multixact_freeze_table_age, - bool isTopLevel, TransactionId *oldestXmin, TransactionId *freezeLimit, TransactionId *xidFullScanLimit, @@ -950,53 +948,33 @@ vacuum_set_xid_limits(Relation rel, MultiXactId mxactLimit; MultiXactId safeMxactLimit; - if (RELATION_IS_LOCAL(rel) && !IsInTransactionBlock(isTopLevel)) - { - /* - * If we are processing a temp relation (which by prior checks must be - * one belonging to our session), and we are not inside any - * transaction block, then there can be no tuples in the rel that are - * still in-doubt, nor can there be any that are dead but possibly - * still interesting to some snapshot our session holds. We don't - * need to care whether other sessions could see such tuples, either. - * So we can aggressively set the cutoff xmin to be the nextXid. - */ - *oldestXmin = ReadNewTransactionId(); - } - else - { - /* - * Otherwise, calculate the cutoff xmin normally. - * - * We can always ignore processes running lazy vacuum. This is - * because we use these values only for deciding which tuples we must - * keep in the tables. Since lazy vacuum doesn't write its XID - * anywhere (usually no XID assigned), it's safe to ignore it. In - * theory it could be problematic to ignore lazy vacuums in a full - * vacuum, but keep in mind that only one vacuum process can be - * working on a particular table at any time, and that each vacuum is - * always an independent transaction. - */ - *oldestXmin = GetOldestNonRemovableTransactionId(rel); + /* + * We can always ignore processes running lazy vacuum. This is because we + * use these values only for deciding which tuples we must keep in the + * tables. Since lazy vacuum doesn't write its XID anywhere (usually no + * XID assigned), it's safe to ignore it. In theory it could be + * problematic to ignore lazy vacuums in a full vacuum, but keep in mind + * that only one vacuum process can be working on a particular table at + * any time, and that each vacuum is always an independent transaction. + */ + *oldestXmin = GetOldestNonRemovableTransactionId(rel); - if (OldSnapshotThresholdActive()) + if (OldSnapshotThresholdActive()) + { + TransactionId limit_xmin; + TimestampTz limit_ts; + + if (TransactionIdLimitedForOldSnapshots(*oldestXmin, rel, + &limit_xmin, &limit_ts)) { - TransactionId limit_xmin; - TimestampTz limit_ts; - - if (TransactionIdLimitedForOldSnapshots(*oldestXmin, rel, - &limit_xmin, &limit_ts)) - { - /* - * TODO: We should only set the threshold if we are pruning on - * the basis of the increased limits. Not as crucial here as - * it is for opportunistic pruning (which often happens at a - * much higher frequency), but would still be a significant - * improvement. - */ - SetOldSnapshotThresholdTimestamp(limit_ts, limit_xmin); - *oldestXmin = limit_xmin; - } + /* + * TODO: We should only set the threshold if we are pruning on the + * basis of the increased limits. Not as crucial here as it is + * for opportunistic pruning (which often happens at a much higher + * frequency), but would still be a significant improvement. + */ + SetOldSnapshotThresholdTimestamp(limit_ts, limit_xmin); + *oldestXmin = limit_xmin; } } @@ -1930,7 +1908,7 @@ vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params) cluster_options |= CLUOPT_VERBOSE; /* VACUUM FULL is now a variant of CLUSTER; see cluster.c */ - cluster_rel(relid, InvalidOid, cluster_options, true); + cluster_rel(relid, InvalidOid, cluster_options); } else table_relation_vacuum(onerel, params, vac_strategy); diff --git a/src/backend/storage/ipc/procarray.c b/src/backend/storage/ipc/procarray.c index 07c5eeb749..ee4caa5115 100644 --- a/src/backend/storage/ipc/procarray.c +++ b/src/backend/storage/ipc/procarray.c @@ -131,7 +131,7 @@ typedef struct ProcArrayStruct * different types of relations. As e.g. a normal user defined table in one * database is inaccessible to backends connected to another database, a test * specific to a relation can be more aggressive than a test for a shared - * relation. Currently we track three different states: + * relation. Currently we track four different states: * * 1) GlobalVisSharedRels, which only considers an XID's * effects visible-to-everyone if neither snapshots in any database, nor a @@ -153,6 +153,9 @@ typedef struct ProcArrayStruct * I.e. the difference to GlobalVisCatalogRels is that * replication slot's catalog_xmin is not taken into account. * + * 4) GlobalVisTempRels, which only considers the current session, as temp + * tables are not visible to other sessions. + * * GlobalVisTestFor(relation) returns the appropriate state * for the relation. * @@ -234,6 +237,13 @@ typedef struct ComputeXidHorizonsResult * defined tables. */ TransactionId data_oldest_nonremovable; + + /* + * Oldest xid for which deleted tuples need to be retained in this + * session's temporary tables. + */ + TransactionId temp_oldest_nonremovable; + } ComputeXidHorizonsResult; @@ -257,12 +267,13 @@ static TransactionId standbySnapshotPendingXmin; /* * State for visibility checks on different types of relations. See struct - * GlobalVisState for details. As shared, catalog, and user defined + * GlobalVisState for details. As shared, catalog, normal and temporary * relations can have different horizons, one such state exists for each. */ static GlobalVisState GlobalVisSharedRels; static GlobalVisState GlobalVisCatalogRels; static GlobalVisState GlobalVisDataRels; +static GlobalVisState GlobalVisTempRels; /* * This backend's RecentXmin at the last time the accurate xmin horizon was @@ -1668,6 +1679,23 @@ ComputeXidHorizons(ComputeXidHorizonsResult *h) h->oldest_considered_running = initial; h->shared_oldest_nonremovable = initial; h->data_oldest_nonremovable = initial; + + /* + * Only modifications made by this backend affect the horizon for + * temporary relations. Instead of a check in each iteration of the + * loop over all PGPROCs it is cheaper to just initialize to the + * current top-level xid any. + * + * Without an assigned xid we could use a horizon as agressive as + * ReadNewTransactionid(), but we can get away with the much cheaper + * latestCompletedXid + 1: If this backend has no xid there, by + * definition, can't be any newer changes in the temp table than + * latestCompletedXid. + */ + if (TransactionIdIsValid(MyProc->xid)) + h->temp_oldest_nonremovable = MyProc->xid; + else + h->temp_oldest_nonremovable = initial; } /* @@ -1760,6 +1788,7 @@ ComputeXidHorizons(ComputeXidHorizonsResult *h) TransactionIdOlder(h->shared_oldest_nonremovable, kaxmin); h->data_oldest_nonremovable = TransactionIdOlder(h->data_oldest_nonremovable, kaxmin); + /* temp relations cannot be accessed in recovery */ } else { @@ -1785,6 +1814,7 @@ ComputeXidHorizons(ComputeXidHorizonsResult *h) h->data_oldest_nonremovable = TransactionIdRetreatedBy(h->data_oldest_nonremovable, vacuum_defer_cleanup_age); + /* defer doesn't apply to temp relations */ } /* @@ -1844,6 +1874,8 @@ ComputeXidHorizons(ComputeXidHorizonsResult *h) h->catalog_oldest_nonremovable)); Assert(TransactionIdPrecedesOrEquals(h->oldest_considered_running, h->data_oldest_nonremovable)); + Assert(TransactionIdPrecedesOrEquals(h->oldest_considered_running, + h->temp_oldest_nonremovable)); Assert(!TransactionIdIsValid(h->slot_xmin) || TransactionIdPrecedesOrEquals(h->oldest_considered_running, h->slot_xmin)); @@ -1878,6 +1910,8 @@ GetOldestNonRemovableTransactionId(Relation rel) return horizons.shared_oldest_nonremovable; else if (RelationIsAccessibleInLogicalDecoding(rel)) return horizons.catalog_oldest_nonremovable; + else if (RELATION_IS_LOCAL(rel)) + return horizons.temp_oldest_nonremovable; else return horizons.data_oldest_nonremovable; } @@ -2054,8 +2088,8 @@ GetSnapshotDataReuse(Snapshot snapshot) * RecentXmin: the xmin computed for the most recent snapshot. XIDs * older than this are known not running any more. * - * And try to advance the bounds of GlobalVisSharedRels, GlobalVisCatalogRels, - * GlobalVisDataRels for the benefit of theGlobalVisTest* family of functions. + * And try to advance the bounds of GlobalVis{Shared,Catalog,Data,Temp}Rels + * for the benefit of theGlobalVisTest* family of functions. * * Note: this function should probably not be called with an argument that's * not statically allocated (see xip allocation below). @@ -2357,6 +2391,15 @@ GetSnapshotData(Snapshot snapshot) GlobalVisDataRels.definitely_needed = FullTransactionIdNewer(def_vis_fxid_data, GlobalVisDataRels.definitely_needed); + /* See temp_oldest_nonremovable computation in ComputeXidHorizons() */ + if (TransactionIdIsNormal(myxid)) + GlobalVisTempRels.definitely_needed = + FullXidRelativeTo(latest_completed, myxid); + else + { + GlobalVisTempRels.definitely_needed = latest_completed; + FullTransactionIdAdvance(&GlobalVisTempRels.definitely_needed); + } /* * Check if we know that we can initialize or increase the lower @@ -2375,6 +2418,8 @@ GetSnapshotData(Snapshot snapshot) GlobalVisDataRels.maybe_needed = FullTransactionIdNewer(GlobalVisDataRels.maybe_needed, oldestfxid); + /* accurate value known */ + GlobalVisTempRels.maybe_needed = GlobalVisTempRels.definitely_needed; } RecentXmin = xmin; @@ -3892,6 +3937,8 @@ GlobalVisTestFor(Relation rel) state = &GlobalVisSharedRels; else if (need_catalog) state = &GlobalVisCatalogRels; + else if (RELATION_IS_LOCAL(rel)) + state = &GlobalVisTempRels; else state = &GlobalVisDataRels; @@ -3942,6 +3989,9 @@ GlobalVisUpdateApply(ComputeXidHorizonsResult *horizons) GlobalVisDataRels.maybe_needed = FullXidRelativeTo(horizons->latest_completed, horizons->data_oldest_nonremovable); + GlobalVisTempRels.maybe_needed = + FullXidRelativeTo(horizons->latest_completed, + horizons->temp_oldest_nonremovable); /* * In longer running transactions it's possible that transactions we @@ -3957,6 +4007,7 @@ GlobalVisUpdateApply(ComputeXidHorizonsResult *horizons) GlobalVisDataRels.definitely_needed = FullTransactionIdNewer(GlobalVisDataRels.maybe_needed, GlobalVisDataRels.definitely_needed); + GlobalVisTempRels.definitely_needed = GlobalVisTempRels.maybe_needed; ComputeXidHorizonsResultLastXmin = RecentXmin; } diff --git a/src/include/commands/cluster.h b/src/include/commands/cluster.h index 1eb144204b..e05884781b 100644 --- a/src/include/commands/cluster.h +++ b/src/include/commands/cluster.h @@ -19,8 +19,7 @@ extern void cluster(ClusterStmt *stmt, bool isTopLevel); -extern void cluster_rel(Oid tableOid, Oid indexOid, int options, - bool isTopLevel); +extern void cluster_rel(Oid tableOid, Oid indexOid, int options); extern void check_index_is_clusterable(Relation OldHeap, Oid indexOid, bool recheck, LOCKMODE lockmode); extern void mark_index_clustered(Relation rel, Oid indexOid, bool is_internal); diff --git a/src/include/commands/vacuum.h b/src/include/commands/vacuum.h index d9475c9989..a4cd721400 100644 --- a/src/include/commands/vacuum.h +++ b/src/include/commands/vacuum.h @@ -267,7 +267,6 @@ extern void vacuum_set_xid_limits(Relation rel, int freeze_min_age, int freeze_table_age, int multixact_freeze_min_age, int multixact_freeze_table_age, - bool isTopLevel, TransactionId *oldestXmin, TransactionId *freezeLimit, TransactionId *xidFullScanLimit, diff --git a/src/test/isolation/expected/horizons.out b/src/test/isolation/expected/horizons.out new file mode 100644 index 0000000000..07bbc9832c --- /dev/null +++ b/src/test/isolation/expected/horizons.out @@ -0,0 +1,281 @@ +Parsed test spec with 2 sessions + +starting permutation: pruner_create_perm ll_start pruner_query_plan pruner_query pruner_query pruner_delete pruner_query pruner_query ll_commit pruner_drop +step pruner_create_perm: + CREATE TABLE horizons_tst (data int unique) WITH (autovacuum_enabled = off); + INSERT INTO horizons_tst(data) VALUES(1),(2); + +step ll_start: + BEGIN TRANSACTION ISOLATION LEVEL REPEATABLE READ; + SELECT 1; + +?column? + +1 +step pruner_query_plan: + EXPLAIN (COSTS OFF) SELECT * FROM horizons_tst ORDER BY data; + +QUERY PLAN + +Index Only Scan using horizons_tst_data_key on horizons_tst +step pruner_query: + SELECT explain_json($$ + EXPLAIN (FORMAT json, BUFFERS, ANALYZE) + SELECT * FROM horizons_tst ORDER BY data;$$)->0->'Plan'->'Heap Fetches'; + +?column? + +2 +step pruner_query: + SELECT explain_json($$ + EXPLAIN (FORMAT json, BUFFERS, ANALYZE) + SELECT * FROM horizons_tst ORDER BY data;$$)->0->'Plan'->'Heap Fetches'; + +?column? + +2 +step pruner_delete: + DELETE FROM horizons_tst; + +step pruner_query: + SELECT explain_json($$ + EXPLAIN (FORMAT json, BUFFERS, ANALYZE) + SELECT * FROM horizons_tst ORDER BY data;$$)->0->'Plan'->'Heap Fetches'; + +?column? + +2 +step pruner_query: + SELECT explain_json($$ + EXPLAIN (FORMAT json, BUFFERS, ANALYZE) + SELECT * FROM horizons_tst ORDER BY data;$$)->0->'Plan'->'Heap Fetches'; + +?column? + +2 +step ll_commit: COMMIT; +step pruner_drop: + DROP TABLE horizons_tst; + + +starting permutation: pruner_create_temp ll_start pruner_query_plan pruner_query pruner_query pruner_delete pruner_query pruner_query ll_commit pruner_drop +step pruner_create_temp: + CREATE TEMPORARY TABLE horizons_tst (data int unique) WITH (autovacuum_enabled = off); + INSERT INTO horizons_tst(data) VALUES(1),(2); + +step ll_start: + BEGIN TRANSACTION ISOLATION LEVEL REPEATABLE READ; + SELECT 1; + +?column? + +1 +step pruner_query_plan: + EXPLAIN (COSTS OFF) SELECT * FROM horizons_tst ORDER BY data; + +QUERY PLAN + +Index Only Scan using horizons_tst_data_key on horizons_tst +step pruner_query: + SELECT explain_json($$ + EXPLAIN (FORMAT json, BUFFERS, ANALYZE) + SELECT * FROM horizons_tst ORDER BY data;$$)->0->'Plan'->'Heap Fetches'; + +?column? + +2 +step pruner_query: + SELECT explain_json($$ + EXPLAIN (FORMAT json, BUFFERS, ANALYZE) + SELECT * FROM horizons_tst ORDER BY data;$$)->0->'Plan'->'Heap Fetches'; + +?column? + +2 +step pruner_delete: + DELETE FROM horizons_tst; + +step pruner_query: + SELECT explain_json($$ + EXPLAIN (FORMAT json, BUFFERS, ANALYZE) + SELECT * FROM horizons_tst ORDER BY data;$$)->0->'Plan'->'Heap Fetches'; + +?column? + +2 +step pruner_query: + SELECT explain_json($$ + EXPLAIN (FORMAT json, BUFFERS, ANALYZE) + SELECT * FROM horizons_tst ORDER BY data;$$)->0->'Plan'->'Heap Fetches'; + +?column? + +0 +step ll_commit: COMMIT; +step pruner_drop: + DROP TABLE horizons_tst; + + +starting permutation: pruner_create_temp ll_start pruner_query pruner_query pruner_begin pruner_delete pruner_query pruner_query ll_commit pruner_commit pruner_drop +step pruner_create_temp: + CREATE TEMPORARY TABLE horizons_tst (data int unique) WITH (autovacuum_enabled = off); + INSERT INTO horizons_tst(data) VALUES(1),(2); + +step ll_start: + BEGIN TRANSACTION ISOLATION LEVEL REPEATABLE READ; + SELECT 1; + +?column? + +1 +step pruner_query: + SELECT explain_json($$ + EXPLAIN (FORMAT json, BUFFERS, ANALYZE) + SELECT * FROM horizons_tst ORDER BY data;$$)->0->'Plan'->'Heap Fetches'; + +?column? + +2 +step pruner_query: + SELECT explain_json($$ + EXPLAIN (FORMAT json, BUFFERS, ANALYZE) + SELECT * FROM horizons_tst ORDER BY data;$$)->0->'Plan'->'Heap Fetches'; + +?column? + +2 +step pruner_begin: BEGIN; +step pruner_delete: + DELETE FROM horizons_tst; + +step pruner_query: + SELECT explain_json($$ + EXPLAIN (FORMAT json, BUFFERS, ANALYZE) + SELECT * FROM horizons_tst ORDER BY data;$$)->0->'Plan'->'Heap Fetches'; + +?column? + +2 +step pruner_query: + SELECT explain_json($$ + EXPLAIN (FORMAT json, BUFFERS, ANALYZE) + SELECT * FROM horizons_tst ORDER BY data;$$)->0->'Plan'->'Heap Fetches'; + +?column? + +2 +step ll_commit: COMMIT; +step pruner_commit: COMMIT; +step pruner_drop: + DROP TABLE horizons_tst; + + +starting permutation: pruner_create_perm ll_start pruner_query pruner_query pruner_delete pruner_vacuum pruner_query pruner_query ll_commit pruner_drop +step pruner_create_perm: + CREATE TABLE horizons_tst (data int unique) WITH (autovacuum_enabled = off); + INSERT INTO horizons_tst(data) VALUES(1),(2); + +step ll_start: + BEGIN TRANSACTION ISOLATION LEVEL REPEATABLE READ; + SELECT 1; + +?column? + +1 +step pruner_query: + SELECT explain_json($$ + EXPLAIN (FORMAT json, BUFFERS, ANALYZE) + SELECT * FROM horizons_tst ORDER BY data;$$)->0->'Plan'->'Heap Fetches'; + +?column? + +2 +step pruner_query: + SELECT explain_json($$ + EXPLAIN (FORMAT json, BUFFERS, ANALYZE) + SELECT * FROM horizons_tst ORDER BY data;$$)->0->'Plan'->'Heap Fetches'; + +?column? + +2 +step pruner_delete: + DELETE FROM horizons_tst; + +step pruner_vacuum: + VACUUM horizons_tst; + +step pruner_query: + SELECT explain_json($$ + EXPLAIN (FORMAT json, BUFFERS, ANALYZE) + SELECT * FROM horizons_tst ORDER BY data;$$)->0->'Plan'->'Heap Fetches'; + +?column? + +2 +step pruner_query: + SELECT explain_json($$ + EXPLAIN (FORMAT json, BUFFERS, ANALYZE) + SELECT * FROM horizons_tst ORDER BY data;$$)->0->'Plan'->'Heap Fetches'; + +?column? + +2 +step ll_commit: COMMIT; +step pruner_drop: + DROP TABLE horizons_tst; + + +starting permutation: pruner_create_temp ll_start pruner_query pruner_query pruner_delete pruner_vacuum pruner_query pruner_query ll_commit pruner_drop +step pruner_create_temp: + CREATE TEMPORARY TABLE horizons_tst (data int unique) WITH (autovacuum_enabled = off); + INSERT INTO horizons_tst(data) VALUES(1),(2); + +step ll_start: + BEGIN TRANSACTION ISOLATION LEVEL REPEATABLE READ; + SELECT 1; + +?column? + +1 +step pruner_query: + SELECT explain_json($$ + EXPLAIN (FORMAT json, BUFFERS, ANALYZE) + SELECT * FROM horizons_tst ORDER BY data;$$)->0->'Plan'->'Heap Fetches'; + +?column? + +2 +step pruner_query: + SELECT explain_json($$ + EXPLAIN (FORMAT json, BUFFERS, ANALYZE) + SELECT * FROM horizons_tst ORDER BY data;$$)->0->'Plan'->'Heap Fetches'; + +?column? + +2 +step pruner_delete: + DELETE FROM horizons_tst; + +step pruner_vacuum: + VACUUM horizons_tst; + +step pruner_query: + SELECT explain_json($$ + EXPLAIN (FORMAT json, BUFFERS, ANALYZE) + SELECT * FROM horizons_tst ORDER BY data;$$)->0->'Plan'->'Heap Fetches'; + +?column? + +0 +step pruner_query: + SELECT explain_json($$ + EXPLAIN (FORMAT json, BUFFERS, ANALYZE) + SELECT * FROM horizons_tst ORDER BY data;$$)->0->'Plan'->'Heap Fetches'; + +?column? + +0 +step ll_commit: COMMIT; +step pruner_drop: + DROP TABLE horizons_tst; + diff --git a/src/test/isolation/isolation_schedule b/src/test/isolation/isolation_schedule index aa386ab1a2..f2e752c445 100644 --- a/src/test/isolation/isolation_schedule +++ b/src/test/isolation/isolation_schedule @@ -78,6 +78,7 @@ test: timeouts test: vacuum-concurrent-drop test: vacuum-conflict test: vacuum-skip-locked +test: horizons test: predicate-hash test: predicate-gist test: predicate-gin diff --git a/src/test/isolation/specs/horizons.spec b/src/test/isolation/specs/horizons.spec new file mode 100644 index 0000000000..f74035c42f --- /dev/null +++ b/src/test/isolation/specs/horizons.spec @@ -0,0 +1,169 @@ +# Test that pruning and vacuuming pay attention to concurrent sessions +# in the right way. For normal relations that means that rows cannot +# be pruned away if there's an older snapshot, in contrast to that +# temporary tables should nearly always be prunable. +# +# NB: Think hard before adding a test showing that rows in permanent +# tables get pruned - it's quite likely that it'd be racy, e.g. due to +# an autovacuum worker holding a snapshot. + +setup { + CREATE OR REPLACE FUNCTION explain_json(p_query text) + RETURNS json + LANGUAGE plpgsql AS $$ + DECLARE + v_ret json; + BEGIN + EXECUTE p_query INTO STRICT v_ret; + RETURN v_ret; + END;$$; +} + +teardown { + DROP FUNCTION explain_json(text); +} + +session "lifeline" + +# Start a transaction, force a snapshot to be held +step "ll_start" +{ + BEGIN TRANSACTION ISOLATION LEVEL REPEATABLE READ; + SELECT 1; +} + +step "ll_commit" { COMMIT; } + + +session "pruner" + +setup +{ + SET enable_seqscan = false; + SET enable_indexscan = false; + SET enable_bitmapscan = false; +} + +step "pruner_create_temp" +{ + CREATE TEMPORARY TABLE horizons_tst (data int unique) WITH (autovacuum_enabled = off); + INSERT INTO horizons_tst(data) VALUES(1),(2); +} + +step "pruner_create_perm" +{ + CREATE TABLE horizons_tst (data int unique) WITH (autovacuum_enabled = off); + INSERT INTO horizons_tst(data) VALUES(1),(2); +} + +# Temp tables cannot be dropped in the teardown, so just always do so +# as part of the permutation +step "pruner_drop" +{ + DROP TABLE horizons_tst; +} + +step "pruner_delete" +{ + DELETE FROM horizons_tst; +} + +step "pruner_begin" { BEGIN; } +step "pruner_commit" { COMMIT; } + +step "pruner_vacuum" +{ + VACUUM horizons_tst; +} + +# Show the heap fetches of an ordered index-only-scan (other plans +# have been forbidden above) - that tells us how many non-killed leaf +# entries there are. +step "pruner_query" +{ + SELECT explain_json($$ + EXPLAIN (FORMAT json, BUFFERS, ANALYZE) + SELECT * FROM horizons_tst ORDER BY data;$$)->0->'Plan'->'Heap Fetches'; +} + +# Verify that the query plan still is an IOS +step "pruner_query_plan" +{ + EXPLAIN (COSTS OFF) SELECT * FROM horizons_tst ORDER BY data; +} + + +# Show that with a permanent relation deleted rows cannot be pruned +# away if there's a concurrent session still seeing the rows. +permutation + "pruner_create_perm" + "ll_start" + "pruner_query_plan" + # Run query that could do pruning twice, first has chance to prune, + # second would not perform heap fetches if first query did. + "pruner_query" + "pruner_query" + "pruner_delete" + "pruner_query" + "pruner_query" + "ll_commit" + "pruner_drop" + +# Show that with a temporary relation deleted rows can be pruned away, +# even if there's a concurrent session with a snapshot from before the +# deletion. That's safe because the session with the older snapshot +# cannot access the temporary table. +permutation + "pruner_create_temp" + "ll_start" + "pruner_query_plan" + "pruner_query" + "pruner_query" + "pruner_delete" + "pruner_query" + "pruner_query" + "ll_commit" + "pruner_drop" + +# Verify that pruning in temporary relations doesn't remove rows still +# visible in the current session +permutation + "pruner_create_temp" + "ll_start" + "pruner_query" + "pruner_query" + "pruner_begin" + "pruner_delete" + "pruner_query" + "pruner_query" + "ll_commit" + "pruner_commit" + "pruner_drop" + +# Show that vacuum cannot remove deleted rows still visible to another +# session's snapshot, when accessing a permanent table. +permutation + "pruner_create_perm" + "ll_start" + "pruner_query" + "pruner_query" + "pruner_delete" + "pruner_vacuum" + "pruner_query" + "pruner_query" + "ll_commit" + "pruner_drop" + +# Show that vacuum can remove deleted rows still visible to another +# session's snapshot, when accessing a temporary table. +permutation + "pruner_create_temp" + "ll_start" + "pruner_query" + "pruner_query" + "pruner_delete" + "pruner_vacuum" + "pruner_query" + "pruner_query" + "ll_commit" + "pruner_drop"