qemu-thread: add a fast path to the Win32 QemuEvent
QemuEvents are used heavily by call_rcu. We do not want them to be slow, but the current implementation does a kernel call on every invocation of qemu_event_* and won't cut it. So, wrap a Win32 manual-reset event with a fast userspace path. The states and transitions are the same as for the futex and mutex/condvar implementations, but the slow path is different of course. The idea is to reset the Win32 event lazily, as part of a test-reset-test-wait sequence. Such a sequence is, indeed, how QemuEvents are used by RCU and other subsystems! The patch includes a formal model of the algorithm. Tested-by: Stefan Weil <sw@weilnetz.de> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> Signed-off-by: Stefan Weil <sw@weilnetz.de>
This commit is contained in:
parent
a246a01631
commit
7c9b2bf677
98
docs/win32-qemu-event.promela
Normal file
98
docs/win32-qemu-event.promela
Normal file
@ -0,0 +1,98 @@
|
|||||||
|
/*
|
||||||
|
* This model describes the implementation of QemuEvent in
|
||||||
|
* util/qemu-thread-win32.c.
|
||||||
|
*
|
||||||
|
* Author: Paolo Bonzini <pbonzini@redhat.com>
|
||||||
|
*
|
||||||
|
* This file is in the public domain. If you really want a license,
|
||||||
|
* the WTFPL will do.
|
||||||
|
*
|
||||||
|
* To verify it:
|
||||||
|
* spin -a docs/event.promela
|
||||||
|
* gcc -O2 pan.c -DSAFETY
|
||||||
|
* ./a.out
|
||||||
|
*/
|
||||||
|
|
||||||
|
bool event;
|
||||||
|
int value;
|
||||||
|
|
||||||
|
/* Primitives for a Win32 event */
|
||||||
|
#define RAW_RESET event = false
|
||||||
|
#define RAW_SET event = true
|
||||||
|
#define RAW_WAIT do :: event -> break; od
|
||||||
|
|
||||||
|
#if 0
|
||||||
|
/* Basic sanity checking: test the Win32 event primitives */
|
||||||
|
#define RESET RAW_RESET
|
||||||
|
#define SET RAW_SET
|
||||||
|
#define WAIT RAW_WAIT
|
||||||
|
#else
|
||||||
|
/* Full model: layer a userspace-only fast path on top of the RAW_*
|
||||||
|
* primitives. SET/RESET/WAIT have exactly the same semantics as
|
||||||
|
* RAW_SET/RAW_RESET/RAW_WAIT, but try to avoid invoking them.
|
||||||
|
*/
|
||||||
|
#define EV_SET 0
|
||||||
|
#define EV_FREE 1
|
||||||
|
#define EV_BUSY -1
|
||||||
|
|
||||||
|
int state = EV_FREE;
|
||||||
|
|
||||||
|
int xchg_result;
|
||||||
|
#define SET if :: state != EV_SET -> \
|
||||||
|
atomic { /* xchg_result=xchg(state, EV_SET) */ \
|
||||||
|
xchg_result = state; \
|
||||||
|
state = EV_SET; \
|
||||||
|
} \
|
||||||
|
if :: xchg_result == EV_BUSY -> RAW_SET; \
|
||||||
|
:: else -> skip; \
|
||||||
|
fi; \
|
||||||
|
:: else -> skip; \
|
||||||
|
fi
|
||||||
|
|
||||||
|
#define RESET if :: state == EV_SET -> atomic { state = state | EV_FREE; } \
|
||||||
|
:: else -> skip; \
|
||||||
|
fi
|
||||||
|
|
||||||
|
int tmp1, tmp2;
|
||||||
|
#define WAIT tmp1 = state; \
|
||||||
|
if :: tmp1 != EV_SET -> \
|
||||||
|
if :: tmp1 == EV_FREE -> \
|
||||||
|
RAW_RESET; \
|
||||||
|
atomic { /* tmp2=cas(state, EV_FREE, EV_BUSY) */ \
|
||||||
|
tmp2 = state; \
|
||||||
|
if :: tmp2 == EV_FREE -> state = EV_BUSY; \
|
||||||
|
:: else -> skip; \
|
||||||
|
fi; \
|
||||||
|
} \
|
||||||
|
if :: tmp2 == EV_SET -> tmp1 = EV_SET; \
|
||||||
|
:: else -> tmp1 = EV_BUSY; \
|
||||||
|
fi; \
|
||||||
|
:: else -> skip; \
|
||||||
|
fi; \
|
||||||
|
assert(tmp1 != EV_FREE); \
|
||||||
|
if :: tmp1 == EV_BUSY -> RAW_WAIT; \
|
||||||
|
:: else -> skip; \
|
||||||
|
fi; \
|
||||||
|
:: else -> skip; \
|
||||||
|
fi
|
||||||
|
#endif
|
||||||
|
|
||||||
|
active proctype waiter()
|
||||||
|
{
|
||||||
|
if
|
||||||
|
:: !value ->
|
||||||
|
RESET;
|
||||||
|
if
|
||||||
|
:: !value -> WAIT;
|
||||||
|
:: else -> skip;
|
||||||
|
fi;
|
||||||
|
:: else -> skip;
|
||||||
|
fi;
|
||||||
|
assert(value);
|
||||||
|
}
|
||||||
|
|
||||||
|
active proctype notifier()
|
||||||
|
{
|
||||||
|
value = true;
|
||||||
|
SET;
|
||||||
|
}
|
@ -18,6 +18,7 @@ struct QemuSemaphore {
|
|||||||
};
|
};
|
||||||
|
|
||||||
struct QemuEvent {
|
struct QemuEvent {
|
||||||
|
int value;
|
||||||
HANDLE event;
|
HANDLE event;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -238,10 +238,34 @@ void qemu_sem_wait(QemuSemaphore *sem)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Wrap a Win32 manual-reset event with a fast userspace path. The idea
|
||||||
|
* is to reset the Win32 event lazily, as part of a test-reset-test-wait
|
||||||
|
* sequence. Such a sequence is, indeed, how QemuEvents are used by
|
||||||
|
* RCU and other subsystems!
|
||||||
|
*
|
||||||
|
* Valid transitions:
|
||||||
|
* - free->set, when setting the event
|
||||||
|
* - busy->set, when setting the event, followed by futex_wake
|
||||||
|
* - set->free, when resetting the event
|
||||||
|
* - free->busy, when waiting
|
||||||
|
*
|
||||||
|
* set->busy does not happen (it can be observed from the outside but
|
||||||
|
* it really is set->free->busy).
|
||||||
|
*
|
||||||
|
* busy->free provably cannot happen; to enforce it, the set->free transition
|
||||||
|
* is done with an OR, which becomes a no-op if the event has concurrently
|
||||||
|
* transitioned to free or busy (and is faster than cmpxchg).
|
||||||
|
*/
|
||||||
|
|
||||||
|
#define EV_SET 0
|
||||||
|
#define EV_FREE 1
|
||||||
|
#define EV_BUSY -1
|
||||||
|
|
||||||
void qemu_event_init(QemuEvent *ev, bool init)
|
void qemu_event_init(QemuEvent *ev, bool init)
|
||||||
{
|
{
|
||||||
/* Manual reset. */
|
/* Manual reset. */
|
||||||
ev->event = CreateEvent(NULL, TRUE, init, NULL);
|
ev->event = CreateEvent(NULL, TRUE, TRUE, NULL);
|
||||||
|
ev->value = (init ? EV_SET : EV_FREE);
|
||||||
}
|
}
|
||||||
|
|
||||||
void qemu_event_destroy(QemuEvent *ev)
|
void qemu_event_destroy(QemuEvent *ev)
|
||||||
@ -251,17 +275,51 @@ void qemu_event_destroy(QemuEvent *ev)
|
|||||||
|
|
||||||
void qemu_event_set(QemuEvent *ev)
|
void qemu_event_set(QemuEvent *ev)
|
||||||
{
|
{
|
||||||
SetEvent(ev->event);
|
if (atomic_mb_read(&ev->value) != EV_SET) {
|
||||||
|
if (atomic_xchg(&ev->value, EV_SET) == EV_BUSY) {
|
||||||
|
/* There were waiters, wake them up. */
|
||||||
|
SetEvent(ev->event);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void qemu_event_reset(QemuEvent *ev)
|
void qemu_event_reset(QemuEvent *ev)
|
||||||
{
|
{
|
||||||
ResetEvent(ev->event);
|
if (atomic_mb_read(&ev->value) == EV_SET) {
|
||||||
|
/* If there was a concurrent reset (or even reset+wait),
|
||||||
|
* do nothing. Otherwise change EV_SET->EV_FREE.
|
||||||
|
*/
|
||||||
|
atomic_or(&ev->value, EV_FREE);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void qemu_event_wait(QemuEvent *ev)
|
void qemu_event_wait(QemuEvent *ev)
|
||||||
{
|
{
|
||||||
WaitForSingleObject(ev->event, INFINITE);
|
unsigned value;
|
||||||
|
|
||||||
|
value = atomic_mb_read(&ev->value);
|
||||||
|
if (value != EV_SET) {
|
||||||
|
if (value == EV_FREE) {
|
||||||
|
/* qemu_event_set is not yet going to call SetEvent, but we are
|
||||||
|
* going to do another check for EV_SET below when setting EV_BUSY.
|
||||||
|
* At that point it is safe to call WaitForSingleObject.
|
||||||
|
*/
|
||||||
|
ResetEvent(ev->event);
|
||||||
|
|
||||||
|
/* Tell qemu_event_set that there are waiters. No need to retry
|
||||||
|
* because there cannot be a concurent busy->free transition.
|
||||||
|
* After the CAS, the event will be either set or busy.
|
||||||
|
*/
|
||||||
|
if (atomic_cmpxchg(&ev->value, EV_FREE, EV_BUSY) == EV_SET) {
|
||||||
|
value = EV_SET;
|
||||||
|
} else {
|
||||||
|
value = EV_BUSY;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (value == EV_BUSY) {
|
||||||
|
WaitForSingleObject(ev->event, INFINITE);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
struct QemuThreadData {
|
struct QemuThreadData {
|
||||||
|
Loading…
Reference in New Issue
Block a user