qemu/docs/aio_notify_bug.promela
Paolo Bonzini 21a03d17f2 AioContext: fix broken placement of event_notifier_test_and_clear
event_notifier_test_and_clear must be called before processing events.
Otherwise, an aio_poll could "eat" the notification before the main
I/O thread invokes ppoll().  The main I/O thread then never wakes up.
This is an example of what could happen:

   i/o thread       vcpu thread                     worker thread
   ---------------------------------------------------------------------
   lock_iothread
   notify_me = 1
   ...
   unlock_iothread
                                                     bh->scheduled = 1
                                                     event_notifier_set
                    lock_iothread
                    notify_me = 3
                    ppoll
                    notify_me = 1
                    aio_dispatch
                     aio_bh_poll
                      thread_pool_completion_bh
                                                     bh->scheduled = 1
                                                     event_notifier_set
                     node->io_read(node->opaque)
                      event_notifier_test_and_clear
   ppoll
   *** hang ***

"Tracing" with qemu_clock_get_ns shows pretty much the same behavior as
in the previous bug, so there are no new tricks here---just stare more
at the code until it is apparent.

One could also use a formal model, of course.  The included one shows
this with three processes: notifier corresponds to a QEMU thread pool
worker, temporary_waiter to a VCPU thread that invokes aio_poll(),
waiter to the main I/O thread.  I would be happy to say that the
formal model found the bug for me, but actually I wrote it after the
fact.

This patch is a bit of a big hammer.  The next one optimizes it,
with help (this time for real rather than a posteriori :)) from
another, similar formal model.

Reported-by: Richard W. M. Jones <rjones@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Tested-by: Richard W.M. Jones <rjones@redhat.com>
Message-id: 1437487673-23740-6-git-send-email-pbonzini@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2015-07-22 12:41:40 +01:00

141 lines
4.1 KiB
Promela

/*
* This model describes a bug in aio_notify. If ctx->notifier is
* cleared too late, a wakeup could be lost.
*
* Author: Paolo Bonzini <pbonzini@redhat.com>
*
* This file is in the public domain. If you really want a license,
* the WTFPL will do.
*
* To verify the buggy version:
* spin -a -DBUG docs/aio_notify_bug.promela
* gcc -O2 pan.c
* ./a.out -a -f
*
* To verify the fixed version:
* spin -a docs/aio_notify_bug.promela
* gcc -O2 pan.c
* ./a.out -a -f
*
* Add -DCHECK_REQ to test an alternative invariant and the
* "notify_me" optimization.
*/
int notify_me;
bool event;
bool req;
bool notifier_done;
#ifdef CHECK_REQ
#define USE_NOTIFY_ME 1
#else
#define USE_NOTIFY_ME 0
#endif
active proctype notifier()
{
do
:: true -> {
req = 1;
if
:: !USE_NOTIFY_ME || notify_me -> event = 1;
:: else -> skip;
fi
}
:: true -> break;
od;
notifier_done = 1;
}
#ifdef BUG
#define AIO_POLL \
notify_me++; \
if \
:: !req -> { \
if \
:: event -> skip; \
fi; \
} \
:: else -> skip; \
fi; \
notify_me--; \
\
req = 0; \
event = 0;
#else
#define AIO_POLL \
notify_me++; \
if \
:: !req -> { \
if \
:: event -> skip; \
fi; \
} \
:: else -> skip; \
fi; \
notify_me--; \
\
event = 0; \
req = 0;
#endif
active proctype waiter()
{
do
:: true -> AIO_POLL;
od;
}
/* Same as waiter(), but disappears after a while. */
active proctype temporary_waiter()
{
do
:: true -> AIO_POLL;
:: true -> break;
od;
}
#ifdef CHECK_REQ
never {
do
:: req -> goto accept_if_req_not_eventually_false;
:: true -> skip;
od;
accept_if_req_not_eventually_false:
if
:: req -> goto accept_if_req_not_eventually_false;
fi;
assert(0);
}
#else
/* There must be infinitely many transitions of event as long
* as the notifier does not exit.
*
* If event stayed always true, the waiters would be busy looping.
* If event stayed always false, the waiters would be sleeping
* forever.
*/
never {
do
:: !event -> goto accept_if_event_not_eventually_true;
:: event -> goto accept_if_event_not_eventually_false;
:: true -> skip;
od;
accept_if_event_not_eventually_true:
if
:: !event && notifier_done -> do :: true -> skip; od;
:: !event && !notifier_done -> goto accept_if_event_not_eventually_true;
fi;
assert(0);
accept_if_event_not_eventually_false:
if
:: event -> goto accept_if_event_not_eventually_false;
fi;
assert(0);
}
#endif