user_mutex: Refactor locking and unblocking mechanism.

Suppose the following scenario:

1. Thread A holds a mutex.

2. Thread B goes to acquire the mutex, winds up in kernel waiting.

3. Thread A unlocks; first unsets the LOCKED flag.
   As WAITING is set, it calls the kernel; but instead of processing
   this immediately, the thread is suspended for any reason (locks,
   reschedule, etc.)

4. Thread B hits a timeout, or a signal. It then unblocks in the kernel,
   which causes the WAITING flag to be unset.

5. Thread C goes to acquire the lock. It sets the LOCKED flag.
   It sees the WAITING flag is not set, so it returns at once,
   having successfully acquired the lock.

6. Thread A, suspended back in step 3, resumes.

Now we encounter the problem. Under the previous code, the following
would occur.

7. Thread A sees that no threads are waiting. It thus unsets the LOCKED
   flag, and returns from the kernel. Now we have a mutex theoretically
   held by thread C but which (illegally) has no LOCKED flag set!

8. Some other thread tries to acquire the lock, and succeeds, for LOCKED
   is not set. We now have one lock owned by two separate threads.
   That's very bad!

The solution, in this commit, is to (1) switch from using "atomic_or"
to lock mutexes, to using "atomic_test_and_set", and (2) mandate that
_kern_unblock_mutex must be invoked with the mutex already unlocked.

Trying to solve the problem with (2) but without (1) produces other
complications and would overall be more complicated. For instance,
all existing userland code expected that it would set LOCKED, but then
check LOCKED|WAITING. If _kern_mutex_unlock does not unset LOCKED,
then whichever thread sets LOCKED when it was previously unset is
now the mutex's undisputed owner, and if it fails to notice this,
would deadlock.

That could have been solved with extra checks at all lock points, but
then that would mean locks would not be acquired "fairly": it would
be possible for any thread to race with an unlocking thread, and
acquire the lock before the kernel had a chance to wake anyone up.

Given how fast atomics can be, and how slow invoking the kernel is
comparatively, that would probably make our mutexes extremely "unfair."
This would not violate the POSIX specification, but it does seem like
a dangerous choice to make in implementing these APIs.

Linux's "futex" API, which our API bears some similarities to, requires
at least one atomic test-and-set for an uncontended acquisition,
and multiple atomics more for even the simplest case of contended
acquisition. If it works for them, it should work for us, too.

Fixes #18436.

Change-Id: Ib8c28acf04ce03234fe738e41aa0969ca1917540
Reviewed-on: https://review.haiku-os.org/c/haiku/+/6537
Tested-by: Commit checker robot <no-reply+buildbot@haiku-os.org>
Reviewed-by: Adrien Destugues <pulkomandy@pulkomandy.tk>
Reviewed-by: waddlesplash <waddlesplash@gmail.com>
This commit is contained in:
Augustin Cavalier 2023-06-06 23:30:15 -04:00 committed by waddlesplash
parent 65a76a0fb9
commit 6f3f29c7dd
11 changed files with 36 additions and 45 deletions

View File

@ -17,7 +17,7 @@ void user_mutex_init();
status_t _user_mutex_lock(int32* mutex, const char* name, uint32 flags,
bigtime_t timeout);
status_t _user_mutex_unlock(int32* mutex, uint32 flags);
status_t _user_mutex_unblock(int32* mutex, uint32 flags);
status_t _user_mutex_switch_lock(int32* fromMutex, int32* toMutex,
const char* name, uint32 flags, bigtime_t timeout);
status_t _user_mutex_sem_acquire(int32* sem, const char* name, uint32 flags,

View File

@ -77,7 +77,7 @@ extern ssize_t _kern_wait_for_objects(object_wait_info* infos, int numInfos,
/* user mutex functions */
extern status_t _kern_mutex_lock(int32* mutex, const char* name,
uint32 flags, bigtime_t timeout);
extern status_t _kern_mutex_unlock(int32* mutex, uint32 flags);
extern status_t _kern_mutex_unblock(int32* mutex, uint32 flags);
extern status_t _kern_mutex_switch_lock(int32* fromMutex, int32* toMutex,
const char* name, uint32 flags, bigtime_t timeout);
extern status_t _kern_mutex_sem_acquire(int32* sem, const char* name,

View File

@ -6,7 +6,7 @@
#define _SYSTEM_USER_MUTEX_DEFS_H
// user mutex specific flags passed to _kern_user_mutex_unlock()
// user mutex specific flags passed to _kern_mutex_unblock()
#define B_USER_MUTEX_UNBLOCK_ALL 0x80000000
// All threads currently waiting on the mutex will be unblocked. The mutex
// state will be locked.

View File

@ -180,13 +180,12 @@ static status_t
user_mutex_lock_locked(int32* mutex, phys_addr_t physicalAddress,
const char* name, uint32 flags, bigtime_t timeout, MutexLocker& locker)
{
// mark the mutex locked + waiting
int32 oldValue = user_atomic_or(mutex,
B_USER_MUTEX_LOCKED | B_USER_MUTEX_WAITING);
if ((oldValue & (B_USER_MUTEX_LOCKED | B_USER_MUTEX_WAITING)) == 0
if ((oldValue & B_USER_MUTEX_LOCKED) == 0
|| (oldValue & B_USER_MUTEX_DISABLED) != 0) {
// clear the waiting flag and be done
if ((oldValue & B_USER_MUTEX_WAITING) == 0)
user_atomic_and(mutex, ~(int32)B_USER_MUTEX_WAITING);
return B_OK;
}
@ -195,28 +194,27 @@ user_mutex_lock_locked(int32* mutex, phys_addr_t physicalAddress,
status_t error = user_mutex_wait_locked(mutex, physicalAddress, name,
flags, timeout, locker, lastWaiter);
if (lastWaiter) {
if (lastWaiter)
user_atomic_and(mutex, ~(int32)B_USER_MUTEX_WAITING);
}
return error;
}
static void
user_mutex_unlock_locked(int32* mutex, phys_addr_t physicalAddress, uint32 flags)
user_mutex_unblock_locked(int32* mutex, phys_addr_t physicalAddress, uint32 flags)
{
UserMutexEntry* entry = sUserMutexTable.Lookup(physicalAddress);
if (entry == NULL) {
// no one is waiting -- clear locked flag
user_atomic_and(mutex, ~(int32)B_USER_MUTEX_LOCKED);
// no one is waiting
user_atomic_and(mutex, ~(int32)B_USER_MUTEX_WAITING);
return;
}
// Someone is waiting -- set the locked flag. It might still be set,
// but when using userland atomic operations, the caller will usually
// have cleared it already.
// Someone is waiting: try to hand off the lock to them, if possible.
int32 oldValue = user_atomic_or(mutex, B_USER_MUTEX_LOCKED);
if ((oldValue & B_USER_MUTEX_LOCKED) != 0)
return;
// unblock the first thread
entry->locked = true;
@ -340,7 +338,8 @@ user_mutex_switch_lock(int32* fromMutex, int32* toMutex, const char* name,
// unlock the first mutex and lock the second one
{
MutexLocker locker(sUserMutexTableLock);
user_mutex_unlock_locked(fromMutex, fromWiringInfo.physicalAddress,
user_atomic_and(fromMutex, ~(int32)B_USER_MUTEX_LOCKED);
user_mutex_unblock_locked(fromMutex, fromWiringInfo.physicalAddress,
flags);
error = user_mutex_lock_locked(toMutex, toWiringInfo.physicalAddress,
@ -386,7 +385,7 @@ _user_mutex_lock(int32* mutex, const char* name, uint32 flags,
status_t
_user_mutex_unlock(int32* mutex, uint32 flags)
_user_mutex_unblock(int32* mutex, uint32 flags)
{
if (mutex == NULL || !IS_USER_ADDRESS(mutex) || (addr_t)mutex % 4 != 0)
return B_BAD_ADDRESS;
@ -400,7 +399,7 @@ _user_mutex_unlock(int32* mutex, uint32 flags)
{
MutexLocker locker(sUserMutexTableLock);
user_mutex_unlock_locked(mutex, wiringInfo.physicalAddress, flags);
user_mutex_unblock_locked(mutex, wiringInfo.physicalAddress, flags);
}
vm_unwire_page(&wiringInfo);

View File

@ -65,15 +65,9 @@ __mutex_lock(mutex *lock)
int32 oldValue;
do {
// set the locked flag
oldValue = atomic_or(&lock->lock, B_USER_MUTEX_LOCKED);
if ((oldValue & (B_USER_MUTEX_LOCKED | B_USER_MUTEX_WAITING)) == 0
|| (oldValue & B_USER_MUTEX_DISABLED) != 0) {
// No one has the lock or is waiting for it, or the mutex has been
// disabled.
oldValue = atomic_test_and_set(&lock->lock, B_USER_MUTEX_LOCKED, 0);
if (oldValue == 0 || (oldValue & B_USER_MUTEX_DISABLED) != 0)
return B_OK;
}
} while (count++ < kMaxCount && (oldValue & B_USER_MUTEX_WAITING) != 0);
// we have to call the kernel
@ -93,7 +87,7 @@ __mutex_unlock(mutex *lock)
int32 oldValue = atomic_and(&lock->lock, ~(int32)B_USER_MUTEX_LOCKED);
if ((oldValue & B_USER_MUTEX_WAITING) != 0
&& (oldValue & B_USER_MUTEX_DISABLED) == 0) {
_kern_mutex_unlock(&lock->lock, 0);
_kern_mutex_unblock(&lock->lock, 0);
}
if ((oldValue & B_USER_MUTEX_LOCKED) == 0)

View File

@ -48,8 +48,8 @@ pthread_barrier_init(pthread_barrier_t* barrier,
static status_t
barrier_lock(__haiku_std_int32* mutex)
{
int32 oldValue = atomic_or((int32*)mutex, B_USER_MUTEX_LOCKED);
if ((oldValue & (B_USER_MUTEX_LOCKED | B_USER_MUTEX_WAITING)) != 0) {
const int32 oldValue = atomic_test_and_set((int32*)mutex, B_USER_MUTEX_LOCKED, 0);
if (oldValue != 0) {
status_t error;
do {
error = _kern_mutex_lock((int32*)mutex, NULL, 0, 0);
@ -68,7 +68,7 @@ barrier_unlock(__haiku_std_int32* mutex)
int32 oldValue = atomic_and((int32*)mutex,
~(int32)B_USER_MUTEX_LOCKED);
if ((oldValue & B_USER_MUTEX_WAITING) != 0)
_kern_mutex_unlock((int32*)mutex, 0);
_kern_mutex_unblock((int32*)mutex, 0);
}
@ -98,7 +98,7 @@ pthread_barrier_wait(pthread_barrier_t* barrier)
// Wake everyone else up.
barrier->waiter_count = (-barrier->waiter_max) + 1;
atomic_and((int32*)&barrier->lock, ~(int32)B_USER_MUTEX_LOCKED);
_kern_mutex_unlock((int32*)&barrier->lock, B_USER_MUTEX_UNBLOCK_ALL);
_kern_mutex_unblock((int32*)&barrier->lock, B_USER_MUTEX_UNBLOCK_ALL);
// Return with the barrier mutex still locked, as waiter_count < 0.
// The last thread out will take care of unlocking it and resetting state.

View File

@ -92,6 +92,7 @@ cond_wait(pthread_cond_t* cond, pthread_mutex_t* mutex, uint32 flags,
pthread_mutex_lock(mutex);
cond->waiter_count--;
// If there are no more waiters, we can change mutexes.
if (cond->waiter_count == 0)
cond->mutex = NULL;
@ -107,7 +108,8 @@ cond_signal(pthread_cond_t* cond, bool broadcast)
return;
// release the condition lock
_kern_mutex_unlock((int32*)&cond->lock,
atomic_and((int32*)&cond->lock, ~(int32)B_USER_MUTEX_LOCKED);
_kern_mutex_unblock((int32*)&cond->lock,
broadcast ? B_USER_MUTEX_UNBLOCK_ALL : 0);
}

View File

@ -74,9 +74,8 @@ __pthread_mutex_lock(pthread_mutex_t* mutex, uint32 flags, bigtime_t timeout)
}
// set the locked flag
int32 oldValue = atomic_or((int32*)&mutex->lock, B_USER_MUTEX_LOCKED);
if ((oldValue & (B_USER_MUTEX_LOCKED | B_USER_MUTEX_WAITING)) != 0) {
const int32 oldValue = atomic_test_and_set((int32*)&mutex->lock, B_USER_MUTEX_LOCKED, 0);
if (oldValue != 0) {
// someone else has the lock or is at least waiting for it
if (timeout < 0)
return EBUSY;
@ -176,7 +175,7 @@ pthread_mutex_unlock(pthread_mutex_t* mutex)
int32 oldValue = atomic_and((int32*)&mutex->lock,
~(int32)B_USER_MUTEX_LOCKED);
if ((oldValue & B_USER_MUTEX_WAITING) != 0)
_kern_mutex_unlock((int32*)&mutex->lock, 0);
_kern_mutex_unblock((int32*)&mutex->lock, 0);
if (MUTEX_TYPE(mutex) == PTHREAD_MUTEX_ERRORCHECK
|| MUTEX_TYPE(mutex) == PTHREAD_MUTEX_DEFAULT) {

View File

@ -123,11 +123,9 @@ struct LocalRWLock {
bool StructureLock()
{
// Enter critical region: lock the mutex
int32 status = atomic_or((int32*)&mutex, B_USER_MUTEX_LOCKED);
// If already locked, call the kernel
if ((status & (B_USER_MUTEX_LOCKED | B_USER_MUTEX_WAITING)) != 0) {
const int32 oldValue = atomic_test_and_set((int32*)&mutex, B_USER_MUTEX_LOCKED, 0);
if (oldValue != 0) {
status_t status;
do {
status = _kern_mutex_lock((int32*)&mutex, NULL, 0, 0);
} while (status == B_INTERRUPTED);
@ -143,9 +141,8 @@ struct LocalRWLock {
// Exit critical region: unlock the mutex
int32 status = atomic_and((int32*)&mutex,
~(int32)B_USER_MUTEX_LOCKED);
if ((status & B_USER_MUTEX_WAITING) != 0)
_kern_mutex_unlock((int32*)&mutex, 0);
_kern_mutex_unblock((int32*)&mutex, 0);
}
status_t ReadLock(uint32 flags, bigtime_t timeout)

View File

@ -1207,7 +1207,7 @@ void _kern_mutex_lock() {}
void _kern_mutex_sem_acquire() {}
void _kern_mutex_sem_release() {}
void _kern_mutex_switch_lock() {}
void _kern_mutex_unlock() {}
void _kern_mutex_unblock() {}
void _kern_next_device() {}
void _kern_normalize_path() {}
void _kern_open() {}

View File

@ -1127,7 +1127,7 @@ void _kern_mutex_lock() {}
void _kern_mutex_sem_acquire() {}
void _kern_mutex_sem_release() {}
void _kern_mutex_switch_lock() {}
void _kern_mutex_unlock() {}
void _kern_mutex_unblock() {}
void _kern_next_device() {}
void _kern_normalize_path() {}
void _kern_open() {}