Work around deadlock problems with VACUUM FULL/CLUSTER on system catalogs,
as per my recent proposal. First, teach IndexBuildHeapScan to not wait for INSERT_IN_PROGRESS or DELETE_IN_PROGRESS tuples to commit unless the index build is checking uniqueness/exclusion constraints. If it isn't, there's no harm in just indexing the in-doubt tuple. Second, modify VACUUM FULL/CLUSTER to suppress reverifying uniqueness/exclusion constraint properties while rebuilding indexes of the target relation. This is reasonable because these commands aren't meant to deal with corrupted-data situations. Constraint properties will still be rechecked when an index is rebuilt by a REINDEX command. This gets us out of the problem that new-style VACUUM FULL would often wait for other transactions while holding exclusive lock on a system catalog, leading to probable deadlock because those other transactions need to look at the catalogs too. Although the real ultimate cause of the problem is a debatable choice to release locks early after modifying system catalogs, changing that choice would require pretty serious analysis and is not something to be undertaken lightly or on a tight schedule. The present patch fixes the problem in a fairly reasonable way and should also improve the speed of VACUUM FULL/CLUSTER a little bit.
This commit is contained in:
parent
1c05b0b4ea
commit
1ddc2703a9
@ -8,7 +8,7 @@
|
|||||||
*
|
*
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $PostgreSQL: pgsql/src/backend/catalog/index.c,v 1.333 2010/02/07 20:48:09 tgl Exp $
|
* $PostgreSQL: pgsql/src/backend/catalog/index.c,v 1.334 2010/02/07 22:40:33 tgl Exp $
|
||||||
*
|
*
|
||||||
*
|
*
|
||||||
* INTERFACE ROUTINES
|
* INTERFACE ROUTINES
|
||||||
@ -1541,6 +1541,8 @@ IndexBuildHeapScan(Relation heapRelation,
|
|||||||
IndexBuildCallback callback,
|
IndexBuildCallback callback,
|
||||||
void *callback_state)
|
void *callback_state)
|
||||||
{
|
{
|
||||||
|
bool is_system_catalog;
|
||||||
|
bool checking_uniqueness;
|
||||||
HeapScanDesc scan;
|
HeapScanDesc scan;
|
||||||
HeapTuple heapTuple;
|
HeapTuple heapTuple;
|
||||||
Datum values[INDEX_MAX_KEYS];
|
Datum values[INDEX_MAX_KEYS];
|
||||||
@ -1560,6 +1562,13 @@ IndexBuildHeapScan(Relation heapRelation,
|
|||||||
*/
|
*/
|
||||||
Assert(OidIsValid(indexRelation->rd_rel->relam));
|
Assert(OidIsValid(indexRelation->rd_rel->relam));
|
||||||
|
|
||||||
|
/* Remember if it's a system catalog */
|
||||||
|
is_system_catalog = IsSystemRelation(heapRelation);
|
||||||
|
|
||||||
|
/* See whether we're verifying uniqueness/exclusion properties */
|
||||||
|
checking_uniqueness = (indexInfo->ii_Unique ||
|
||||||
|
indexInfo->ii_ExclusionOps != NULL);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Need an EState for evaluation of index expressions and partial-index
|
* Need an EState for evaluation of index expressions and partial-index
|
||||||
* predicates. Also a slot to hold the current tuple.
|
* predicates. Also a slot to hold the current tuple.
|
||||||
@ -1652,6 +1661,7 @@ IndexBuildHeapScan(Relation heapRelation,
|
|||||||
{
|
{
|
||||||
/* do our own time qual check */
|
/* do our own time qual check */
|
||||||
bool indexIt;
|
bool indexIt;
|
||||||
|
TransactionId xwait;
|
||||||
|
|
||||||
recheck:
|
recheck:
|
||||||
|
|
||||||
@ -1710,29 +1720,31 @@ IndexBuildHeapScan(Relation heapRelation,
|
|||||||
case HEAPTUPLE_INSERT_IN_PROGRESS:
|
case HEAPTUPLE_INSERT_IN_PROGRESS:
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Since caller should hold ShareLock or better, we should
|
* Since caller should hold ShareLock or better, normally
|
||||||
* not see any tuples inserted by open transactions ---
|
* the only way to see this is if it was inserted earlier
|
||||||
* unless it's our own transaction. (Consider INSERT
|
* in our own transaction. However, it can happen in
|
||||||
* followed by CREATE INDEX within a transaction.) An
|
* system catalogs, since we tend to release write lock
|
||||||
* exception occurs when reindexing a system catalog,
|
* before commit there. Give a warning if neither case
|
||||||
* because we often release lock on system catalogs before
|
* applies.
|
||||||
* committing. In that case we wait for the inserting
|
|
||||||
* transaction to finish and check again. (We could do
|
|
||||||
* that on user tables too, but since the case is not
|
|
||||||
* expected it seems better to throw an error.)
|
|
||||||
*/
|
*/
|
||||||
if (!TransactionIdIsCurrentTransactionId(
|
xwait = HeapTupleHeaderGetXmin(heapTuple->t_data);
|
||||||
HeapTupleHeaderGetXmin(heapTuple->t_data)))
|
if (!TransactionIdIsCurrentTransactionId(xwait))
|
||||||
{
|
{
|
||||||
if (!IsSystemRelation(heapRelation))
|
if (!is_system_catalog)
|
||||||
elog(ERROR, "concurrent insert in progress");
|
elog(WARNING, "concurrent insert in progress within table \"%s\"",
|
||||||
else
|
RelationGetRelationName(heapRelation));
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If we are performing uniqueness checks, indexing
|
||||||
|
* such a tuple could lead to a bogus uniqueness
|
||||||
|
* failure. In that case we wait for the inserting
|
||||||
|
* transaction to finish and check again.
|
||||||
|
*/
|
||||||
|
if (checking_uniqueness)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* Must drop the lock on the buffer before we wait
|
* Must drop the lock on the buffer before we wait
|
||||||
*/
|
*/
|
||||||
TransactionId xwait = HeapTupleHeaderGetXmin(heapTuple->t_data);
|
|
||||||
|
|
||||||
LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
|
LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
|
||||||
XactLockTableWait(xwait);
|
XactLockTableWait(xwait);
|
||||||
goto recheck;
|
goto recheck;
|
||||||
@ -1749,30 +1761,27 @@ IndexBuildHeapScan(Relation heapRelation,
|
|||||||
case HEAPTUPLE_DELETE_IN_PROGRESS:
|
case HEAPTUPLE_DELETE_IN_PROGRESS:
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Since caller should hold ShareLock or better, we should
|
* Similar situation to INSERT_IN_PROGRESS case.
|
||||||
* not see any tuples deleted by open transactions ---
|
|
||||||
* unless it's our own transaction. (Consider DELETE
|
|
||||||
* followed by CREATE INDEX within a transaction.) An
|
|
||||||
* exception occurs when reindexing a system catalog,
|
|
||||||
* because we often release lock on system catalogs before
|
|
||||||
* committing. In that case we wait for the deleting
|
|
||||||
* transaction to finish and check again. (We could do
|
|
||||||
* that on user tables too, but since the case is not
|
|
||||||
* expected it seems better to throw an error.)
|
|
||||||
*/
|
*/
|
||||||
Assert(!(heapTuple->t_data->t_infomask & HEAP_XMAX_IS_MULTI));
|
Assert(!(heapTuple->t_data->t_infomask & HEAP_XMAX_IS_MULTI));
|
||||||
if (!TransactionIdIsCurrentTransactionId(
|
xwait = HeapTupleHeaderGetXmax(heapTuple->t_data);
|
||||||
HeapTupleHeaderGetXmax(heapTuple->t_data)))
|
if (!TransactionIdIsCurrentTransactionId(xwait))
|
||||||
{
|
{
|
||||||
if (!IsSystemRelation(heapRelation))
|
if (!is_system_catalog)
|
||||||
elog(ERROR, "concurrent delete in progress");
|
elog(WARNING, "concurrent delete in progress within table \"%s\"",
|
||||||
else
|
RelationGetRelationName(heapRelation));
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If we are performing uniqueness checks, assuming
|
||||||
|
* the tuple is dead could lead to missing a uniqueness
|
||||||
|
* violation. In that case we wait for the deleting
|
||||||
|
* transaction to finish and check again.
|
||||||
|
*/
|
||||||
|
if (checking_uniqueness)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* Must drop the lock on the buffer before we wait
|
* Must drop the lock on the buffer before we wait
|
||||||
*/
|
*/
|
||||||
TransactionId xwait = HeapTupleHeaderGetXmax(heapTuple->t_data);
|
|
||||||
|
|
||||||
LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
|
LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
|
||||||
XactLockTableWait(xwait);
|
XactLockTableWait(xwait);
|
||||||
goto recheck;
|
goto recheck;
|
||||||
@ -2402,7 +2411,7 @@ IndexGetRelation(Oid indexId)
|
|||||||
* reindex_index - This routine is used to recreate a single index
|
* reindex_index - This routine is used to recreate a single index
|
||||||
*/
|
*/
|
||||||
void
|
void
|
||||||
reindex_index(Oid indexId)
|
reindex_index(Oid indexId, bool skip_constraint_checks)
|
||||||
{
|
{
|
||||||
Relation iRel,
|
Relation iRel,
|
||||||
heapRelation,
|
heapRelation,
|
||||||
@ -2411,6 +2420,7 @@ reindex_index(Oid indexId)
|
|||||||
IndexInfo *indexInfo;
|
IndexInfo *indexInfo;
|
||||||
HeapTuple indexTuple;
|
HeapTuple indexTuple;
|
||||||
Form_pg_index indexForm;
|
Form_pg_index indexForm;
|
||||||
|
volatile bool skipped_constraint = false;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Open and lock the parent heap relation. ShareLock is sufficient since
|
* Open and lock the parent heap relation. ShareLock is sufficient since
|
||||||
@ -2448,6 +2458,17 @@ reindex_index(Oid indexId)
|
|||||||
/* Fetch info needed for index_build */
|
/* Fetch info needed for index_build */
|
||||||
indexInfo = BuildIndexInfo(iRel);
|
indexInfo = BuildIndexInfo(iRel);
|
||||||
|
|
||||||
|
/* If requested, skip checking uniqueness/exclusion constraints */
|
||||||
|
if (skip_constraint_checks)
|
||||||
|
{
|
||||||
|
if (indexInfo->ii_Unique || indexInfo->ii_ExclusionOps != NULL)
|
||||||
|
skipped_constraint = true;
|
||||||
|
indexInfo->ii_Unique = false;
|
||||||
|
indexInfo->ii_ExclusionOps = NULL;
|
||||||
|
indexInfo->ii_ExclusionProcs = NULL;
|
||||||
|
indexInfo->ii_ExclusionStrats = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
/* We'll build a new physical relation for the index */
|
/* We'll build a new physical relation for the index */
|
||||||
RelationSetNewRelfilenode(iRel, InvalidTransactionId);
|
RelationSetNewRelfilenode(iRel, InvalidTransactionId);
|
||||||
|
|
||||||
@ -2466,13 +2487,16 @@ reindex_index(Oid indexId)
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* If the index is marked invalid or not ready (ie, it's from a failed
|
* If the index is marked invalid or not ready (ie, it's from a failed
|
||||||
* CREATE INDEX CONCURRENTLY), we can now mark it valid. This allows
|
* CREATE INDEX CONCURRENTLY), and we didn't skip a uniqueness check,
|
||||||
* REINDEX to be used to clean up in such cases.
|
* we can now mark it valid. This allows REINDEX to be used to clean up
|
||||||
|
* in such cases.
|
||||||
*
|
*
|
||||||
* We can also reset indcheckxmin, because we have now done a
|
* We can also reset indcheckxmin, because we have now done a
|
||||||
* non-concurrent index build, *except* in the case where index_build
|
* non-concurrent index build, *except* in the case where index_build
|
||||||
* found some still-broken HOT chains.
|
* found some still-broken HOT chains.
|
||||||
*/
|
*/
|
||||||
|
if (!skipped_constraint)
|
||||||
|
{
|
||||||
pg_index = heap_open(IndexRelationId, RowExclusiveLock);
|
pg_index = heap_open(IndexRelationId, RowExclusiveLock);
|
||||||
|
|
||||||
indexTuple = SearchSysCacheCopy(INDEXRELID,
|
indexTuple = SearchSysCacheCopy(INDEXRELID,
|
||||||
@ -2492,7 +2516,9 @@ reindex_index(Oid indexId)
|
|||||||
simple_heap_update(pg_index, &indexTuple->t_self, indexTuple);
|
simple_heap_update(pg_index, &indexTuple->t_self, indexTuple);
|
||||||
CatalogUpdateIndexes(pg_index, indexTuple);
|
CatalogUpdateIndexes(pg_index, indexTuple);
|
||||||
}
|
}
|
||||||
|
|
||||||
heap_close(pg_index, RowExclusiveLock);
|
heap_close(pg_index, RowExclusiveLock);
|
||||||
|
}
|
||||||
|
|
||||||
/* Close rels, but keep locks */
|
/* Close rels, but keep locks */
|
||||||
index_close(iRel, NoLock);
|
index_close(iRel, NoLock);
|
||||||
@ -2513,6 +2539,11 @@ reindex_index(Oid indexId)
|
|||||||
* do CCI after having collected the index list. (This way we can still use
|
* do CCI after having collected the index list. (This way we can still use
|
||||||
* catalog indexes while collecting the list.)
|
* catalog indexes while collecting the list.)
|
||||||
*
|
*
|
||||||
|
* We also skip rechecking uniqueness/exclusion constraint properties if
|
||||||
|
* heap_rebuilt is true. This avoids likely deadlock conditions when doing
|
||||||
|
* VACUUM FULL or CLUSTER on system catalogs. REINDEX should be used to
|
||||||
|
* rebuild an index if constraint inconsistency is suspected.
|
||||||
|
*
|
||||||
* Returns true if any indexes were rebuilt. Note that a
|
* Returns true if any indexes were rebuilt. Note that a
|
||||||
* CommandCounterIncrement will occur after each index rebuild.
|
* CommandCounterIncrement will occur after each index rebuild.
|
||||||
*/
|
*/
|
||||||
@ -2594,7 +2625,7 @@ reindex_relation(Oid relid, bool toast_too, bool heap_rebuilt)
|
|||||||
if (is_pg_class)
|
if (is_pg_class)
|
||||||
RelationSetIndexList(rel, doneIndexes, InvalidOid);
|
RelationSetIndexList(rel, doneIndexes, InvalidOid);
|
||||||
|
|
||||||
reindex_index(indexOid);
|
reindex_index(indexOid, heap_rebuilt);
|
||||||
|
|
||||||
CommandCounterIncrement();
|
CommandCounterIncrement();
|
||||||
|
|
||||||
|
@ -11,7 +11,7 @@
|
|||||||
*
|
*
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $PostgreSQL: pgsql/src/backend/commands/cluster.c,v 1.198 2010/02/07 20:48:10 tgl Exp $
|
* $PostgreSQL: pgsql/src/backend/commands/cluster.c,v 1.199 2010/02/07 22:40:33 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -948,7 +948,6 @@ copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex,
|
|||||||
*/
|
*/
|
||||||
if (!is_system_catalog &&
|
if (!is_system_catalog &&
|
||||||
!TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmin(tuple->t_data)))
|
!TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmin(tuple->t_data)))
|
||||||
|
|
||||||
elog(WARNING, "concurrent insert in progress within table \"%s\"",
|
elog(WARNING, "concurrent insert in progress within table \"%s\"",
|
||||||
RelationGetRelationName(OldHeap));
|
RelationGetRelationName(OldHeap));
|
||||||
/* treat as live */
|
/* treat as live */
|
||||||
|
@ -8,7 +8,7 @@
|
|||||||
*
|
*
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $PostgreSQL: pgsql/src/backend/commands/indexcmds.c,v 1.191 2010/02/07 20:48:10 tgl Exp $
|
* $PostgreSQL: pgsql/src/backend/commands/indexcmds.c,v 1.192 2010/02/07 22:40:33 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -1588,7 +1588,7 @@ ReindexIndex(RangeVar *indexRelation)
|
|||||||
|
|
||||||
ReleaseSysCache(tuple);
|
ReleaseSysCache(tuple);
|
||||||
|
|
||||||
reindex_index(indOid);
|
reindex_index(indOid, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -7,7 +7,7 @@
|
|||||||
* Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
|
* Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
|
||||||
* Portions Copyright (c) 1994, Regents of the University of California
|
* Portions Copyright (c) 1994, Regents of the University of California
|
||||||
*
|
*
|
||||||
* $PostgreSQL: pgsql/src/include/catalog/index.h,v 1.82 2010/02/07 20:48:11 tgl Exp $
|
* $PostgreSQL: pgsql/src/include/catalog/index.h,v 1.83 2010/02/07 22:40:33 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -70,7 +70,7 @@ extern double IndexBuildHeapScan(Relation heapRelation,
|
|||||||
|
|
||||||
extern void validate_index(Oid heapId, Oid indexId, Snapshot snapshot);
|
extern void validate_index(Oid heapId, Oid indexId, Snapshot snapshot);
|
||||||
|
|
||||||
extern void reindex_index(Oid indexId);
|
extern void reindex_index(Oid indexId, bool skip_constraint_checks);
|
||||||
extern bool reindex_relation(Oid relid, bool toast_too, bool heap_rebuilt);
|
extern bool reindex_relation(Oid relid, bool toast_too, bool heap_rebuilt);
|
||||||
|
|
||||||
extern bool ReindexIsProcessingHeap(Oid heapOid);
|
extern bool ReindexIsProcessingHeap(Oid heapOid);
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
# ----------
|
# ----------
|
||||||
# $PostgreSQL: pgsql/src/test/regress/parallel_schedule,v 1.59 2010/02/07 20:48:13 tgl Exp $
|
# $PostgreSQL: pgsql/src/test/regress/parallel_schedule,v 1.60 2010/02/07 22:40:33 tgl Exp $
|
||||||
#
|
#
|
||||||
# By convention, we put no more than twenty tests in any one parallel group;
|
# By convention, we put no more than twenty tests in any one parallel group;
|
||||||
# this limits the number of connections needed to run the tests.
|
# this limits the number of connections needed to run the tests.
|
||||||
@ -52,10 +52,7 @@ test: copy copyselect
|
|||||||
# ----------
|
# ----------
|
||||||
# Another group of parallel tests
|
# Another group of parallel tests
|
||||||
# ----------
|
# ----------
|
||||||
test: constraints triggers create_misc create_aggregate create_operator inherit typed_table drop_if_exists create_cast
|
test: constraints triggers create_misc create_aggregate create_operator inherit typed_table vacuum drop_if_exists create_cast
|
||||||
|
|
||||||
# XXX temporarily run this by itself
|
|
||||||
test: vacuum
|
|
||||||
|
|
||||||
# Depends on the above
|
# Depends on the above
|
||||||
test: create_index create_view
|
test: create_index create_view
|
||||||
|
Loading…
x
Reference in New Issue
Block a user