Detect early deadlock in Hot Standby when Startup is already waiting. First

stage of required deadlock detection to allow re-enabling max_standby_delay
setting of -1, which is now essential in the absence of improved relation-
specific conflict resoluton. Requested by Greg Stark et al.
This commit is contained in:
Simon Riggs 2010-01-31 19:01:11 +00:00
parent 034fffbf31
commit c85c941470
3 changed files with 46 additions and 4 deletions

View File

@ -11,7 +11,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/storage/ipc/standby.c,v 1.8 2010/01/29 17:10:05 sriggs Exp $
* $PostgreSQL: pgsql/src/backend/storage/ipc/standby.c,v 1.9 2010/01/31 19:01:11 sriggs Exp $
*
*-------------------------------------------------------------------------
*/
@ -22,6 +22,7 @@
#include "access/xlog.h"
#include "miscadmin.h"
#include "pgstat.h"
#include "storage/bufmgr.h"
#include "storage/lmgr.h"
#include "storage/proc.h"
#include "storage/procarray.h"
@ -384,7 +385,7 @@ ResolveRecoveryConflictWithBufferPin(void)
TimestampDifference(GetLatestXLogTime(), now,
&standby_delay_secs, &standby_delay_usecs);
if (standby_delay_secs >= (long) MaxStandbyDelay)
if (standby_delay_secs >= MaxStandbyDelay)
SendRecoveryConflictWithBufferPin();
else
{
@ -445,6 +446,39 @@ SendRecoveryConflictWithBufferPin(void)
CancelDBBackends(InvalidOid, PROCSIG_RECOVERY_CONFLICT_BUFFERPIN, false);
}
/*
* In Hot Standby perform early deadlock detection. We abort the lock
* wait if are about to sleep while holding the buffer pin that Startup
* process is waiting for. The deadlock occurs because we can only be
* waiting behind an AccessExclusiveLock, which can only clear when a
* transaction completion record is replayed, which can only occur when
* Startup process is not waiting. So if Startup process is waiting we
* never will clear that lock, so if we wait we cause deadlock. If we
* are the Startup process then no need to check for deadlocks.
*/
void
CheckRecoveryConflictDeadlock(LWLockId partitionLock)
{
Assert(!InRecovery);
if (!HoldingBufferPinThatDelaysRecovery())
return;
LWLockRelease(partitionLock);
/*
* Error message should match ProcessInterrupts() but we avoid calling
* that because we aren't handling an interrupt at this point. Note
* that we only cancel the current transaction here, so if we are in a
* subtransaction and the pin is held by a parent, then the Startup
* process will continue to wait even though we have avoided deadlock.
*/
ereport(ERROR,
(errcode(ERRCODE_QUERY_CANCELED),
errmsg("canceling statement due to conflict with recovery"),
errdetail("User transaction caused buffer deadlock with recovery.")));
}
/*
* -----------------------------------------------------
* Locking in Recovery Mode

View File

@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/storage/lmgr/lock.c,v 1.193 2010/01/29 19:45:12 sriggs Exp $
* $PostgreSQL: pgsql/src/backend/storage/lmgr/lock.c,v 1.194 2010/01/31 19:01:11 sriggs Exp $
*
* NOTES
* A lock table is a shared memory hash table. When
@ -814,6 +814,13 @@ LockAcquireExtended(const LOCKTAG *locktag,
return LOCKACQUIRE_NOT_AVAIL;
}
/*
* In Hot Standby perform early deadlock detection in normal backends.
* If deadlock found we release partition lock but do not return.
*/
if (RecoveryInProgress() && !InRecovery)
CheckRecoveryConflictDeadlock(partitionLock);
/*
* Set bitmask of locks this process already holds on this object.
*/

View File

@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/storage/standby.h,v 1.6 2010/01/29 17:10:05 sriggs Exp $
* $PostgreSQL: pgsql/src/include/storage/standby.h,v 1.7 2010/01/31 19:01:11 sriggs Exp $
*
*-------------------------------------------------------------------------
*/
@ -31,6 +31,7 @@ extern void ResolveRecoveryConflictWithDatabase(Oid dbid);
extern void ResolveRecoveryConflictWithBufferPin(void);
extern void SendRecoveryConflictWithBufferPin(void);
extern void CheckRecoveryConflictDeadlock(LWLockId partitionLock);
/*
* Standby Rmgr (RM_STANDBY_ID)