Split ProcSleep function into JoinWaitQueue and ProcSleep

Split ProcSleep into two functions: JoinWaitQueue and ProcSleep.
JoinWaitQueue is called while holding the partition lock, and inserts
the current process to the wait queue, while ProcSleep() does the
actual sleeping. ProcSleep() is now called without holding the
partition lock, and it no longer re-acquires the partition lock before
returning. That makes the wakeup a little cheaper. Once upon a time,
re-acquiring the partition lock was needed to prevent a signal handler
from longjmping out at a bad time, but these days our signal handlers
just set flags, and longjmping can only happen at points where we
explicitly run CHECK_FOR_INTERRUPTS().

If JoinWaitQueue detects an "early deadlock" before even joining the
wait queue, it returns without changing the shared lock entry, leaving
the cleanup of the shared lock entry to the caller. This makes the
handling of an early deadlock the same as the dontWait=true case.

One small user-visible side-effect of this refactoring is that we now
only set the 'ps' title to say "waiting" when we actually enter the
sleep, not when the lock is skipped because dontWait=true, or when a
deadlock is detected early before entering the sleep.

This eliminates the 'lockAwaited' global variable in proc.c, which was
largely redundant with 'awaitedLock' in lock.c

Note: Updating the local lock table is now the caller's responsibility.
JoinWaitQueue and ProcSleep are now only responsible for modifying the
shared state. Seems a little nicer that way.

Based on Thomas Munro's earlier patch and observation that ProcSleep
doesn't really need to re-acquire the partition lock.

Reviewed-by: Maxim Orlov
Discussion: https://www.postgresql.org/message-id/7c2090cd-a72a-4e34-afaa-6dd2ef31440e@iki.fi
This commit is contained in:
Heikki Linnakangas 2024-11-04 17:59:24 +02:00
parent 0b1765d959
commit 3c0fd64fec
5 changed files with 186 additions and 187 deletions

View File

@ -409,8 +409,7 @@ static PROCLOCK *SetupLockInTable(LockMethod lockMethodTable, PGPROC *proc,
static void GrantLockLocal(LOCALLOCK *locallock, ResourceOwner owner);
static void BeginStrongLockAcquire(LOCALLOCK *locallock, uint32 fasthashcode);
static void FinishStrongLockAcquire(void);
static void WaitOnLock(LOCALLOCK *locallock, ResourceOwner owner,
bool dontWait);
static ProcWaitStatus WaitOnLock(LOCALLOCK *locallock, ResourceOwner owner);
static void ReleaseLockIfHeld(LOCALLOCK *locallock, bool sessionLock);
static void LockReassignOwner(LOCALLOCK *locallock, ResourceOwner parent);
static bool UnGrantLock(LOCK *lock, LOCKMODE lockmode,
@ -843,6 +842,7 @@ LockAcquireExtended(const LOCKTAG *locktag,
uint32 hashcode;
LWLock *partitionLock;
bool found_conflict;
ProcWaitStatus waitResult;
bool log_lock = false;
if (lockmethodid <= 0 || lockmethodid >= lengthof(LockMethods))
@ -1091,39 +1091,30 @@ LockAcquireExtended(const LOCKTAG *locktag,
{
/* No conflict with held or previously requested locks */
GrantLock(lock, proclock, lockmode);
GrantLockLocal(locallock, owner);
waitResult = PROC_WAIT_STATUS_OK;
}
else
{
/*
* Sleep till someone wakes me up. We do this even in the dontWait
* case, because while trying to go to sleep, we may discover that we
* can acquire the lock immediately after all.
* Join the lock's wait queue. We call this even in the dontWait
* case, because JoinWaitQueue() may discover that we can acquire the
* lock immediately after all.
*/
WaitOnLock(locallock, owner, dontWait);
waitResult = JoinWaitQueue(locallock, lockMethodTable, dontWait);
}
/*
* NOTE: do not do any material change of state between here and
* return. All required changes in locktable state must have been
* done when the lock was granted to us --- see notes in WaitOnLock.
*/
/*
* Check the proclock entry status. If dontWait = true, this is an
* expected case; otherwise, it will only happen if something in the
* ipc communication doesn't work correctly.
*/
if (!(proclock->holdMask & LOCKBIT_ON(lockmode)))
if (waitResult == PROC_WAIT_STATUS_ERROR)
{
/*
* We're not getting the lock because a deadlock was detected already
* while trying to join the wait queue, or because we would have to
* wait but the caller requested no blocking.
*
* Undo the changes to shared entries before releasing the partition
* lock.
*/
AbortStrongLockAcquire();
if (dontWait)
{
/*
* We can't acquire the lock immediately. If caller specified
* no blocking, remove useless table entries and return
* LOCKACQUIRE_NOT_AVAIL without waiting.
*/
if (proclock->holdMask == 0)
{
uint32 proclock_hashcode;
@ -1140,10 +1131,10 @@ LockAcquireExtended(const LOCKTAG *locktag,
elog(PANIC, "proclock table corrupted");
}
else
PROCLOCK_PRINT("LockAcquire: NOWAIT", proclock);
PROCLOCK_PRINT("LockAcquire: did not join wait queue", proclock);
lock->nRequested--;
lock->requested[lockmode]--;
LOCK_PRINT("LockAcquire: conditional lock failed",
LOCK_PRINT("LockAcquire: did not join wait queue",
lock, lockmode);
Assert((lock->nRequested > 0) &&
(lock->requested[lockmode] >= 0));
@ -1151,25 +1142,57 @@ LockAcquireExtended(const LOCKTAG *locktag,
LWLockRelease(partitionLock);
if (locallock->nLocks == 0)
RemoveLocalLock(locallock);
if (dontWait)
{
if (locallockp)
*locallockp = NULL;
return LOCKACQUIRE_NOT_AVAIL;
}
else
{
DeadLockReport();
/* DeadLockReport() will not return */
}
}
/*
* We should have gotten the lock, but somehow that didn't
* happen. If we get here, it's a bug.
* We are now in the lock queue, or the lock was already granted. If
* queued, go to sleep.
*/
PROCLOCK_PRINT("LockAcquire: INCONSISTENT", proclock);
LOCK_PRINT("LockAcquire: INCONSISTENT", lock, lockmode);
if (waitResult == PROC_WAIT_STATUS_WAITING)
{
Assert(!dontWait);
PROCLOCK_PRINT("LockAcquire: sleeping on lock", proclock);
LOCK_PRINT("LockAcquire: sleeping on lock", lock, lockmode);
LWLockRelease(partitionLock);
elog(ERROR, "LockAcquire failed");
waitResult = WaitOnLock(locallock, owner);
/*
* NOTE: do not do any material change of state between here and
* return. All required changes in locktable state must have been
* done when the lock was granted to us --- see notes in WaitOnLock.
*/
if (waitResult == PROC_WAIT_STATUS_ERROR)
{
/*
* We failed as a result of a deadlock, see CheckDeadLock(). Quit
* now.
*/
Assert(!dontWait);
DeadLockReport();
/* DeadLockReport() will not return */
}
}
PROCLOCK_PRINT("LockAcquire: granted", proclock);
LOCK_PRINT("LockAcquire: granted", lock, lockmode);
}
else
LWLockRelease(partitionLock);
Assert(waitResult == PROC_WAIT_STATUS_OK);
/* The lock was granted to us. Update the local lock entry accordingly */
Assert((proclock->holdMask & LOCKBIT_ON(lockmode)) != 0);
GrantLockLocal(locallock, owner);
/*
* Lock state is fully up-to-date now; if we error out after this, no
@ -1177,8 +1200,6 @@ LockAcquireExtended(const LOCKTAG *locktag,
*/
FinishStrongLockAcquire();
LWLockRelease(partitionLock);
/*
* Emit a WAL record if acquisition of this lock needs to be replayed in a
* standby server.
@ -1819,6 +1840,15 @@ GrantAwaitedLock(void)
GrantLockLocal(awaitedLock, awaitedOwner);
}
/*
* GetAwaitedLock -- Return the lock we're currently doing WaitOnLock on.
*/
LOCALLOCK *
GetAwaitedLock(void)
{
return awaitedLock;
}
/*
* MarkLockClear -- mark an acquired lock as "clear"
*
@ -1836,14 +1866,12 @@ MarkLockClear(LOCALLOCK *locallock)
/*
* WaitOnLock -- wait to acquire a lock
*
* The appropriate partition lock must be held at entry, and will still be
* held at exit.
* This is a wrapper around ProcSleep, with extra tracing and bookkeeping.
*/
static void
WaitOnLock(LOCALLOCK *locallock, ResourceOwner owner, bool dontWait)
static ProcWaitStatus
WaitOnLock(LOCALLOCK *locallock, ResourceOwner owner)
{
LOCKMETHODID lockmethodid = LOCALLOCK_LOCKMETHOD(*locallock);
LockMethod lockMethodTable = LockMethods[lockmethodid];
ProcWaitStatus result;
TRACE_POSTGRESQL_LOCK_WAIT_START(locallock->tag.lock.locktag_field1,
locallock->tag.lock.locktag_field2,
@ -1852,12 +1880,13 @@ WaitOnLock(LOCALLOCK *locallock, ResourceOwner owner, bool dontWait)
locallock->tag.lock.locktag_type,
locallock->tag.mode);
LOCK_PRINT("WaitOnLock: sleeping on lock",
locallock->lock, locallock->tag.mode);
/* adjust the process title to indicate that it's waiting */
set_ps_display_suffix("waiting");
/*
* Record the fact that we are waiting for a lock, so that
* LockErrorCleanup will clean up if cancel/die happens.
*/
awaitedLock = locallock;
awaitedOwner = owner;
@ -1880,30 +1909,7 @@ WaitOnLock(LOCALLOCK *locallock, ResourceOwner owner, bool dontWait)
*/
PG_TRY();
{
/*
* If dontWait = true, we handle success and failure in the same way
* here. The caller will be able to sort out what has happened.
*/
if (ProcSleep(locallock, lockMethodTable, dontWait) != PROC_WAIT_STATUS_OK
&& !dontWait)
{
/*
* We failed as a result of a deadlock, see CheckDeadLock(). Quit
* now.
*/
awaitedLock = NULL;
LOCK_PRINT("WaitOnLock: aborting on lock",
locallock->lock, locallock->tag.mode);
LWLockRelease(LockHashPartitionLock(locallock->hashcode));
/*
* Now that we aren't holding the partition lock, we can give an
* error report including details about the detected deadlock.
*/
DeadLockReport();
/* not reached */
}
result = ProcSleep(locallock);
}
PG_CATCH();
{
@ -1917,20 +1923,22 @@ WaitOnLock(LOCALLOCK *locallock, ResourceOwner owner, bool dontWait)
}
PG_END_TRY();
/*
* We no longer want LockErrorCleanup to do anything.
*/
awaitedLock = NULL;
/* reset ps display to remove the suffix */
set_ps_display_remove_suffix();
LOCK_PRINT("WaitOnLock: wakeup on lock",
locallock->lock, locallock->tag.mode);
TRACE_POSTGRESQL_LOCK_WAIT_DONE(locallock->tag.lock.locktag_field1,
locallock->tag.lock.locktag_field2,
locallock->tag.lock.locktag_field3,
locallock->tag.lock.locktag_field4,
locallock->tag.lock.locktag_type,
locallock->tag.mode);
return result;
}
/*

View File

@ -14,7 +14,7 @@
*/
/*
* Interface (a):
* ProcSleep(), ProcWakeup(),
* JoinWaitQueue(), ProcSleep(), ProcWakeup()
*
* Waiting for a lock causes the backend to be put to sleep. Whoever releases
* the lock wakes the process up again (and gives it an error code so it knows
@ -80,9 +80,6 @@ PROC_HDR *ProcGlobal = NULL;
NON_EXEC_STATIC PGPROC *AuxiliaryProcs = NULL;
PGPROC *PreparedXactProcs = NULL;
/* If we are waiting for a lock, this points to the associated LOCALLOCK */
static LOCALLOCK *lockAwaited = NULL;
static DeadLockState deadlock_state = DS_NOT_YET_CHECKED;
/* Is a deadlock check pending? */
@ -753,18 +750,6 @@ HaveNFreeProcs(int n, int *nfree)
return (*nfree == n);
}
/*
* Check if the current process is awaiting a lock.
*/
bool
IsWaitingForLock(void)
{
if (lockAwaited == NULL)
return false;
return true;
}
/*
* Cancel any pending wait for lock, when aborting a transaction, and revert
* any strong lock count acquisition for a lock being acquired.
@ -776,6 +761,7 @@ IsWaitingForLock(void)
void
LockErrorCleanup(void)
{
LOCALLOCK *lockAwaited;
LWLock *partitionLock;
DisableTimeoutParams timeouts[2];
@ -784,6 +770,7 @@ LockErrorCleanup(void)
AbortStrongLockAcquire();
/* Nothing to do if we weren't waiting for a lock */
lockAwaited = GetAwaitedLock();
if (lockAwaited == NULL)
{
RESUME_INTERRUPTS();
@ -825,8 +812,6 @@ LockErrorCleanup(void)
GrantAwaitedLock();
}
lockAwaited = NULL;
LWLockRelease(partitionLock);
RESUME_INTERRUPTS();
@ -1078,7 +1063,7 @@ AuxiliaryPidGetProc(int pid)
/*
* ProcSleep -- put a process to sleep on the specified lock
* JoinWaitQueue -- join the wait queue on the specified lock
*
* It's not actually guaranteed that we need to wait when this function is
* called, because it could be that when we try to find a position at which
@ -1087,37 +1072,44 @@ AuxiliaryPidGetProc(int pid)
* we get the lock immediately. Because of this, it's sensible for this function
* to have a dontWait argument, despite the name.
*
* The lock table's partition lock must be held at entry, and will be held
* at exit.
* On entry, the caller has already set up LOCK and PROCLOCK entries to
* reflect that we have "requested" the lock. The caller is responsible for
* cleaning that up, if we end up not joining the queue after all.
*
* Result: PROC_WAIT_STATUS_OK if we acquired the lock, PROC_WAIT_STATUS_ERROR
* if not (if dontWait = true, we would have had to wait; if dontWait = false,
* this is a deadlock).
* The lock table's partition lock must be held at entry, and is still held
* at exit. The caller must release it before calling ProcSleep().
*
* ASSUME: that no one will fiddle with the queue until after
* we release the partition lock.
* Result is one of the following:
*
* PROC_WAIT_STATUS_OK - lock was immediately granted
* PROC_WAIT_STATUS_WAITING - joined the wait queue; call ProcSleep()
* PROC_WAIT_STATUS_ERROR - immediate deadlock was detected, or would
* need to wait and dontWait == true
*
* NOTES: The process queue is now a priority queue for locking.
*/
ProcWaitStatus
ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable, bool dontWait)
JoinWaitQueue(LOCALLOCK *locallock, LockMethod lockMethodTable, bool dontWait)
{
LOCKMODE lockmode = locallock->tag.mode;
LOCK *lock = locallock->lock;
PROCLOCK *proclock = locallock->proclock;
uint32 hashcode = locallock->hashcode;
LWLock *partitionLock = LockHashPartitionLock(hashcode);
LWLock *partitionLock PG_USED_FOR_ASSERTS_ONLY = LockHashPartitionLock(hashcode);
dclist_head *waitQueue = &lock->waitProcs;
PGPROC *insert_before = NULL;
LOCKMASK myProcHeldLocks;
LOCKMASK myHeldLocks;
TimestampTz standbyWaitStart = 0;
bool early_deadlock = false;
bool allow_autovacuum_cancel = true;
bool logged_recovery_conflict = false;
ProcWaitStatus myWaitStatus;
PGPROC *leader = MyProc->lockGroupLeader;
Assert(LWLockHeldByMeInMode(partitionLock, LW_EXCLUSIVE));
/*
* Set bitmask of locks this process already holds on this object.
*/
myHeldLocks = MyProc->heldLocks = proclock->holdMask;
/*
* Determine which locks we're already holding.
*
@ -1205,7 +1197,6 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable, bool dontWait)
{
/* Skip the wait and just grant myself the lock. */
GrantLock(lock, proclock, lockmode);
GrantAwaitedLock();
return PROC_WAIT_STATUS_OK;
}
@ -1218,6 +1209,13 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable, bool dontWait)
}
}
/*
* If we detected deadlock, give up without waiting. This must agree with
* CheckDeadLock's recovery code.
*/
if (early_deadlock)
return PROC_WAIT_STATUS_ERROR;
/*
* At this point we know that we'd really need to sleep. If we've been
* commanded not to do that, bail out.
@ -1243,34 +1241,43 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable, bool dontWait)
MyProc->waitStatus = PROC_WAIT_STATUS_WAITING;
/*
* If we detected deadlock, give up without waiting. This must agree with
* CheckDeadLock's recovery code.
*/
if (early_deadlock)
{
RemoveFromWaitQueue(MyProc, hashcode);
return PROC_WAIT_STATUS_ERROR;
return PROC_WAIT_STATUS_WAITING;
}
/* mark that we are waiting for a lock */
lockAwaited = locallock;
/*
* Release the lock table's partition lock.
* ProcSleep -- put process to sleep waiting on lock
*
* NOTE: this may also cause us to exit critical-section state, possibly
* allowing a cancel/die interrupt to be accepted. This is OK because we
* have recorded the fact that we are waiting for a lock, and so
* LockErrorCleanup will clean up if cancel/die happens.
* This must be called when JoinWaitQueue() returns PROC_WAIT_STATUS_WAITING.
* Returns after the lock has been granted, or if a deadlock is detected. Can
* also bail out with ereport(ERROR), if some other error condition, or a
* timeout or cancellation is triggered.
*
* Result is one of the following:
*
* PROC_WAIT_STATUS_OK - lock was granted
* PROC_WAIT_STATUS_ERROR - a deadlock was detected
*/
LWLockRelease(partitionLock);
ProcWaitStatus
ProcSleep(LOCALLOCK *locallock)
{
LOCKMODE lockmode = locallock->tag.mode;
LOCK *lock = locallock->lock;
uint32 hashcode = locallock->hashcode;
LWLock *partitionLock = LockHashPartitionLock(hashcode);
TimestampTz standbyWaitStart = 0;
bool allow_autovacuum_cancel = true;
bool logged_recovery_conflict = false;
ProcWaitStatus myWaitStatus;
/* The caller must've armed the on-error cleanup mechanism */
Assert(GetAwaitedLock() == locallock);
Assert(!LWLockHeldByMe(partitionLock));
/*
* Also, now that we will successfully clean up after an ereport, it's
* safe to check to see if there's a buffer pin deadlock against the
* Startup process. Of course, that's only necessary if we're doing Hot
* Standby and are not the Startup process ourselves.
* Now that we will successfully clean up after an ereport, it's safe to
* check to see if there's a buffer pin deadlock against the Startup
* process. Of course, that's only necessary if we're doing Hot Standby
* and are not the Startup process ourselves.
*/
if (RecoveryInProgress() && !InRecovery)
CheckRecoveryConflictDeadlock();
@ -1679,29 +1686,12 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable, bool dontWait)
standbyWaitStart, GetCurrentTimestamp(),
NULL, false);
/*
* Re-acquire the lock table's partition lock. We have to do this to hold
* off cancel/die interrupts before we can mess with lockAwaited (else we
* might have a missed or duplicated locallock update).
*/
LWLockAcquire(partitionLock, LW_EXCLUSIVE);
/*
* We no longer want LockErrorCleanup to do anything.
*/
lockAwaited = NULL;
/*
* If we got the lock, be sure to remember it in the locallock table.
*/
if (MyProc->waitStatus == PROC_WAIT_STATUS_OK)
GrantAwaitedLock();
/*
* We don't have to do anything else, because the awaker did all the
* necessary update of the lock table and MyProc.
* necessary updates of the lock table and MyProc. (The caller is
* responsible for updating the local lock table.)
*/
return MyProc->waitStatus;
return myWaitStatus;
}

View File

@ -3085,7 +3085,7 @@ ProcessRecoveryConflictInterrupt(ProcSignalReason reason)
/*
* If we aren't waiting for a lock we can never deadlock.
*/
if (!IsWaitingForLock())
if (GetAwaitedLock() == NULL)
return;
/* Intentional fall through to check wait for pin */

View File

@ -585,6 +585,8 @@ extern bool LockCheckConflicts(LockMethod lockMethodTable,
LOCK *lock, PROCLOCK *proclock);
extern void GrantLock(LOCK *lock, PROCLOCK *proclock, LOCKMODE lockmode);
extern void GrantAwaitedLock(void);
extern LOCALLOCK *GetAwaitedLock(void);
extern void RemoveFromWaitQueue(PGPROC *proc, uint32 hashcode);
extern LockData *GetLockStatusData(void);
extern BlockedProcsData *GetBlockerStatusData(int blocked_pid);

View File

@ -484,13 +484,12 @@ extern int GetStartupBufferPinWaitBufId(void);
extern bool HaveNFreeProcs(int n, int *nfree);
extern void ProcReleaseLocks(bool isCommit);
extern ProcWaitStatus ProcSleep(LOCALLOCK *locallock,
LockMethod lockMethodTable,
bool dontWait);
extern ProcWaitStatus JoinWaitQueue(LOCALLOCK *locallock,
LockMethod lockMethodTable, bool dontWait);
extern ProcWaitStatus ProcSleep(LOCALLOCK *locallock);
extern void ProcWakeup(PGPROC *proc, ProcWaitStatus waitStatus);
extern void ProcLockWakeup(LockMethod lockMethodTable, LOCK *lock);
extern void CheckDeadLockAlert(void);
extern bool IsWaitingForLock(void);
extern void LockErrorCleanup(void);
extern void ProcWaitForSignal(uint32 wait_event_info);