From 2346df6fc373df9c5ab944eebecf7d3036d727de Mon Sep 17 00:00:00 2001 From: Robert Haas Date: Thu, 14 Mar 2024 08:55:25 -0400 Subject: [PATCH] Allow a no-wait lock acquisition to succeed in more cases. We don't determine the position at which a process waiting for a lock should insert itself into the wait queue until we reach ProcSleep(), and we may at that point discover that we must insert ourselves ahead of everyone who wants a conflicting lock, in which case we obtain the lock immediately. Up until now, a no-wait lock acquisition would fail in such cases, erroneously claiming that the lock couldn't be obtained immediately. Fix that by trying ProcSleep even in the no-wait case. No back-patch for now, because I'm treating this as an improvement to the existing no-wait feature. It could instead be argued that it's a bug fix, on the theory that there should never be any case whatsoever where no-wait fails to obtain a lock that would have been obtained immediately without no-wait, but I'm reluctant to interpret the semantics of no-wait that strictly. Robert Haas and Jingxian Li Discussion: http://postgr.es/m/CA+TgmobCH-kMXGVpb0BB-iNMdtcNkTvcZ4JBxDJows3kYM+GDg@mail.gmail.com --- src/backend/storage/lmgr/lock.c | 119 ++++++++++++-------- src/backend/storage/lmgr/proc.c | 20 +++- src/include/storage/proc.h | 4 +- src/test/isolation/expected/lock-nowait.out | 9 ++ src/test/isolation/isolation_schedule | 1 + src/test/isolation/specs/lock-nowait.spec | 28 +++++ 6 files changed, 128 insertions(+), 53 deletions(-) create mode 100644 src/test/isolation/expected/lock-nowait.out create mode 100644 src/test/isolation/specs/lock-nowait.spec diff --git a/src/backend/storage/lmgr/lock.c b/src/backend/storage/lmgr/lock.c index 0d93932d8d..5022a50dd7 100644 --- a/src/backend/storage/lmgr/lock.c +++ b/src/backend/storage/lmgr/lock.c @@ -360,7 +360,8 @@ static PROCLOCK *SetupLockInTable(LockMethod lockMethodTable, PGPROC *proc, static void GrantLockLocal(LOCALLOCK *locallock, ResourceOwner owner); static void BeginStrongLockAcquire(LOCALLOCK *locallock, uint32 fasthashcode); static void FinishStrongLockAcquire(void); -static void WaitOnLock(LOCALLOCK *locallock, ResourceOwner owner); +static void WaitOnLock(LOCALLOCK *locallock, ResourceOwner owner, + bool dontWait); static void ReleaseLockIfHeld(LOCALLOCK *locallock, bool sessionLock); static void LockReassignOwner(LOCALLOCK *locallock, ResourceOwner parent); static bool UnGrantLock(LOCK *lock, LOCKMODE lockmode, @@ -1024,50 +1025,15 @@ LockAcquireExtended(const LOCKTAG *locktag, } else { - /* - * We can't acquire the lock immediately. If caller specified no - * blocking, remove useless table entries and return - * LOCKACQUIRE_NOT_AVAIL without waiting. - */ - if (dontWait) - { - AbortStrongLockAcquire(); - if (proclock->holdMask == 0) - { - uint32 proclock_hashcode; - - proclock_hashcode = ProcLockHashCode(&proclock->tag, hashcode); - dlist_delete(&proclock->lockLink); - dlist_delete(&proclock->procLink); - if (!hash_search_with_hash_value(LockMethodProcLockHash, - &(proclock->tag), - proclock_hashcode, - HASH_REMOVE, - NULL)) - elog(PANIC, "proclock table corrupted"); - } - else - PROCLOCK_PRINT("LockAcquire: NOWAIT", proclock); - lock->nRequested--; - lock->requested[lockmode]--; - LOCK_PRINT("LockAcquire: conditional lock failed", lock, lockmode); - Assert((lock->nRequested > 0) && (lock->requested[lockmode] >= 0)); - Assert(lock->nGranted <= lock->nRequested); - LWLockRelease(partitionLock); - if (locallock->nLocks == 0) - RemoveLocalLock(locallock); - if (locallockp) - *locallockp = NULL; - return LOCKACQUIRE_NOT_AVAIL; - } - /* * Set bitmask of locks this process already holds on this object. */ MyProc->heldLocks = proclock->holdMask; /* - * Sleep till someone wakes me up. + * Sleep till someone wakes me up. We do this even in the dontWait + * case, beause while trying to go to sleep, we may discover that we + * can acquire the lock immediately after all. */ TRACE_POSTGRESQL_LOCK_WAIT_START(locktag->locktag_field1, @@ -1077,7 +1043,7 @@ LockAcquireExtended(const LOCKTAG *locktag, locktag->locktag_type, lockmode); - WaitOnLock(locallock, owner); + WaitOnLock(locallock, owner, dontWait); TRACE_POSTGRESQL_LOCK_WAIT_DONE(locktag->locktag_field1, locktag->locktag_field2, @@ -1093,17 +1059,63 @@ LockAcquireExtended(const LOCKTAG *locktag, */ /* - * Check the proclock entry status, in case something in the ipc - * communication doesn't work correctly. + * Check the proclock entry status. If dontWait = true, this is an + * expected case; otherwise, it will open happen if something in the + * ipc communication doesn't work correctly. */ if (!(proclock->holdMask & LOCKBIT_ON(lockmode))) { AbortStrongLockAcquire(); - PROCLOCK_PRINT("LockAcquire: INCONSISTENT", proclock); - LOCK_PRINT("LockAcquire: INCONSISTENT", lock, lockmode); - /* Should we retry ? */ - LWLockRelease(partitionLock); - elog(ERROR, "LockAcquire failed"); + + if (dontWait) + { + /* + * We can't acquire the lock immediately. If caller specified + * no blocking, remove useless table entries and return + * LOCKACQUIRE_NOT_AVAIL without waiting. + */ + if (proclock->holdMask == 0) + { + uint32 proclock_hashcode; + + proclock_hashcode = ProcLockHashCode(&proclock->tag, + hashcode); + dlist_delete(&proclock->lockLink); + dlist_delete(&proclock->procLink); + if (!hash_search_with_hash_value(LockMethodProcLockHash, + &(proclock->tag), + proclock_hashcode, + HASH_REMOVE, + NULL)) + elog(PANIC, "proclock table corrupted"); + } + else + PROCLOCK_PRINT("LockAcquire: NOWAIT", proclock); + lock->nRequested--; + lock->requested[lockmode]--; + LOCK_PRINT("LockAcquire: conditional lock failed", + lock, lockmode); + Assert((lock->nRequested > 0) && + (lock->requested[lockmode] >= 0)); + Assert(lock->nGranted <= lock->nRequested); + LWLockRelease(partitionLock); + if (locallock->nLocks == 0) + RemoveLocalLock(locallock); + if (locallockp) + *locallockp = NULL; + return LOCKACQUIRE_NOT_AVAIL; + } + else + { + /* + * We should have gotten the lock, but somehow that didn't + * happen. If we get here, it's a bug. + */ + PROCLOCK_PRINT("LockAcquire: INCONSISTENT", proclock); + LOCK_PRINT("LockAcquire: INCONSISTENT", lock, lockmode); + LWLockRelease(partitionLock); + elog(ERROR, "LockAcquire failed"); + } } PROCLOCK_PRINT("LockAcquire: granted", proclock); LOCK_PRINT("LockAcquire: granted", lock, lockmode); @@ -1777,10 +1789,11 @@ MarkLockClear(LOCALLOCK *locallock) * Caller must have set MyProc->heldLocks to reflect locks already held * on the lockable object by this process. * - * The appropriate partition lock must be held at entry. + * The appropriate partition lock must be held at entry, and will still be + * held at exit. */ static void -WaitOnLock(LOCALLOCK *locallock, ResourceOwner owner) +WaitOnLock(LOCALLOCK *locallock, ResourceOwner owner, bool dontWait) { LOCKMETHODID lockmethodid = LOCALLOCK_LOCKMETHOD(*locallock); LockMethod lockMethodTable = LockMethods[lockmethodid]; @@ -1813,8 +1826,14 @@ WaitOnLock(LOCALLOCK *locallock, ResourceOwner owner) */ PG_TRY(); { - if (ProcSleep(locallock, lockMethodTable) != PROC_WAIT_STATUS_OK) + /* + * If dontWait = true, we handle success and failure in the same way + * here. The caller will be able to sort out what has happened. + */ + if (ProcSleep(locallock, lockMethodTable, dontWait) != PROC_WAIT_STATUS_OK + && !dontWait) { + /* * We failed as a result of a deadlock, see CheckDeadLock(). Quit * now. diff --git a/src/backend/storage/lmgr/proc.c b/src/backend/storage/lmgr/proc.c index f3e20038f4..162b1f919d 100644 --- a/src/backend/storage/lmgr/proc.c +++ b/src/backend/storage/lmgr/proc.c @@ -1043,10 +1043,19 @@ AuxiliaryPidGetProc(int pid) * Caller must have set MyProc->heldLocks to reflect locks already held * on the lockable object by this process (under all XIDs). * + * It's not actually guaranteed that we need to wait when this function is + * called, because it could be that when we try to find a position at which + * to insert ourself into the wait queue, we discover that we must be inserted + * ahead of everyone who wants a lock that conflict with ours. In that case, + * we get the lock immediately. Beause of this, it's sensible for this function + * to have a dontWait argument, despite the name. + * * The lock table's partition lock must be held at entry, and will be held * at exit. * - * Result: PROC_WAIT_STATUS_OK if we acquired the lock, PROC_WAIT_STATUS_ERROR if not (deadlock). + * Result: PROC_WAIT_STATUS_OK if we acquired the lock, PROC_WAIT_STATUS_ERROR + * if not (if dontWait = true, this is a deadlock; if dontWait = false, we + * would have had to wait). * * ASSUME: that no one will fiddle with the queue until after * we release the partition lock. @@ -1054,7 +1063,7 @@ AuxiliaryPidGetProc(int pid) * NOTES: The process queue is now a priority queue for locking. */ ProcWaitStatus -ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable) +ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable, bool dontWait) { LOCKMODE lockmode = locallock->tag.mode; LOCK *lock = locallock->lock; @@ -1167,6 +1176,13 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable) } } + /* + * At this point we know that we'd really need to sleep. If we've been + * commanded not to do that, bail out. + */ + if (dontWait) + return PROC_WAIT_STATUS_ERROR; + /* * Insert self into queue, at the position determined above. */ diff --git a/src/include/storage/proc.h b/src/include/storage/proc.h index 1095aefddf..18891a86fb 100644 --- a/src/include/storage/proc.h +++ b/src/include/storage/proc.h @@ -471,7 +471,9 @@ extern int GetStartupBufferPinWaitBufId(void); extern bool HaveNFreeProcs(int n, int *nfree); extern void ProcReleaseLocks(bool isCommit); -extern ProcWaitStatus ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable); +extern ProcWaitStatus ProcSleep(LOCALLOCK *locallock, + LockMethod lockMethodTable, + bool dontWait); extern void ProcWakeup(PGPROC *proc, ProcWaitStatus waitStatus); extern void ProcLockWakeup(LockMethod lockMethodTable, LOCK *lock); extern void CheckDeadLockAlert(void); diff --git a/src/test/isolation/expected/lock-nowait.out b/src/test/isolation/expected/lock-nowait.out new file mode 100644 index 0000000000..2dc5aad6f0 --- /dev/null +++ b/src/test/isolation/expected/lock-nowait.out @@ -0,0 +1,9 @@ +Parsed test spec with 2 sessions + +starting permutation: s1a s2a s1b s1c s2c +step s1a: LOCK TABLE a1 IN ACCESS EXCLUSIVE MODE; +step s2a: LOCK TABLE a1 IN EXCLUSIVE MODE; +step s1b: LOCK TABLE a1 IN SHARE ROW EXCLUSIVE MODE NOWAIT; +step s1c: COMMIT; +step s2a: <... completed> +step s2c: COMMIT; diff --git a/src/test/isolation/isolation_schedule b/src/test/isolation/isolation_schedule index b2be88ead1..188fc04f85 100644 --- a/src/test/isolation/isolation_schedule +++ b/src/test/isolation/isolation_schedule @@ -111,3 +111,4 @@ test: serializable-parallel test: serializable-parallel-2 test: serializable-parallel-3 test: matview-write-skew +test: lock-nowait diff --git a/src/test/isolation/specs/lock-nowait.spec b/src/test/isolation/specs/lock-nowait.spec new file mode 100644 index 0000000000..bb46d12a79 --- /dev/null +++ b/src/test/isolation/specs/lock-nowait.spec @@ -0,0 +1,28 @@ +# While requesting nowait lock, if the lock requested should +# be inserted in front of some waiter, check to see if the lock +# conflicts with already-held locks or the requests before +# the waiter. If not, then just grant myself the requested +# lock immediately. Test this scenario. + +setup +{ + CREATE TABLE a1 (); +} + +teardown +{ + DROP TABLE a1; +} + +session s1 +setup { BEGIN; } +step s1a { LOCK TABLE a1 IN ACCESS EXCLUSIVE MODE; } +step s1b { LOCK TABLE a1 IN SHARE ROW EXCLUSIVE MODE NOWAIT; } +step s1c { COMMIT; } + +session s2 +setup { BEGIN; } +step s2a { LOCK TABLE a1 IN EXCLUSIVE MODE; } +step s2c { COMMIT; } + +permutation s1a s2a s1b s1c s2c