coroutine: avoid co_queue_wakeup recursion
qemu_aio_coroutine_enter() is (indirectly) called recursively when processing co_queue_wakeup. This can lead to stack exhaustion. This patch rewrites co_queue_wakeup in an iterative fashion (instead of recursive) with bounded memory usage to prevent stack exhaustion. qemu_co_queue_run_restart() is inlined into qemu_aio_coroutine_enter() and the qemu_coroutine_enter() call is turned into a loop to avoid recursion. There is one change that is worth mentioning: Previously, when coroutine A queued coroutine B, qemu_co_queue_run_restart() entered coroutine B from coroutine A. If A was terminating then it would still stay alive until B yielded. After this patch B is entered by A's parent so that a A can be deleted immediately if it is terminating. It is safe to make this change since B could never interact with A if it was terminating anyway. Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> Reviewed-by: Paolo Bonzini <pbonzini@redhat.com> Message-id: 20180322152834.12656-3-stefanha@redhat.com Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
This commit is contained in:
parent
67a74148d8
commit
c40a254570
@ -249,8 +249,7 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
|
|||||||
BdrvCoDrainData data;
|
BdrvCoDrainData data;
|
||||||
|
|
||||||
/* Calling bdrv_drain() from a BH ensures the current coroutine yields and
|
/* Calling bdrv_drain() from a BH ensures the current coroutine yields and
|
||||||
* other coroutines run if they were queued from
|
* other coroutines run if they were queued by aio_co_enter(). */
|
||||||
* qemu_co_queue_run_restart(). */
|
|
||||||
|
|
||||||
assert(qemu_in_coroutine());
|
assert(qemu_in_coroutine());
|
||||||
data = (BdrvCoDrainData) {
|
data = (BdrvCoDrainData) {
|
||||||
|
@ -68,6 +68,5 @@ Coroutine *qemu_coroutine_new(void);
|
|||||||
void qemu_coroutine_delete(Coroutine *co);
|
void qemu_coroutine_delete(Coroutine *co);
|
||||||
CoroutineAction qemu_coroutine_switch(Coroutine *from, Coroutine *to,
|
CoroutineAction qemu_coroutine_switch(Coroutine *from, Coroutine *to,
|
||||||
CoroutineAction action);
|
CoroutineAction action);
|
||||||
void coroutine_fn qemu_co_queue_run_restart(Coroutine *co);
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -68,40 +68,6 @@ void coroutine_fn qemu_co_queue_wait_impl(CoQueue *queue, QemuLockable *lock)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* qemu_co_queue_run_restart:
|
|
||||||
*
|
|
||||||
* Enter each coroutine that was previously marked for restart by
|
|
||||||
* qemu_co_queue_next() or qemu_co_queue_restart_all(). This function is
|
|
||||||
* invoked by the core coroutine code when the current coroutine yields or
|
|
||||||
* terminates.
|
|
||||||
*/
|
|
||||||
void qemu_co_queue_run_restart(Coroutine *co)
|
|
||||||
{
|
|
||||||
Coroutine *next;
|
|
||||||
QSIMPLEQ_HEAD(, Coroutine) tmp_queue_wakeup =
|
|
||||||
QSIMPLEQ_HEAD_INITIALIZER(tmp_queue_wakeup);
|
|
||||||
|
|
||||||
trace_qemu_co_queue_run_restart(co);
|
|
||||||
|
|
||||||
/* Because "co" has yielded, any coroutine that we wakeup can resume it.
|
|
||||||
* If this happens and "co" terminates, co->co_queue_wakeup becomes
|
|
||||||
* invalid memory. Therefore, use a temporary queue and do not touch
|
|
||||||
* the "co" coroutine as soon as you enter another one.
|
|
||||||
*
|
|
||||||
* In its turn resumed "co" can populate "co_queue_wakeup" queue with
|
|
||||||
* new coroutines to be woken up. The caller, who has resumed "co",
|
|
||||||
* will be responsible for traversing the same queue, which may cause
|
|
||||||
* a different wakeup order but not any missing wakeups.
|
|
||||||
*/
|
|
||||||
QSIMPLEQ_CONCAT(&tmp_queue_wakeup, &co->co_queue_wakeup);
|
|
||||||
|
|
||||||
while ((next = QSIMPLEQ_FIRST(&tmp_queue_wakeup))) {
|
|
||||||
QSIMPLEQ_REMOVE_HEAD(&tmp_queue_wakeup, co_queue_next);
|
|
||||||
qemu_coroutine_enter(next);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static bool qemu_co_queue_do_restart(CoQueue *queue, bool single)
|
static bool qemu_co_queue_do_restart(CoQueue *queue, bool single)
|
||||||
{
|
{
|
||||||
Coroutine *next;
|
Coroutine *next;
|
||||||
|
@ -104,57 +104,65 @@ static void coroutine_delete(Coroutine *co)
|
|||||||
|
|
||||||
void qemu_aio_coroutine_enter(AioContext *ctx, Coroutine *co)
|
void qemu_aio_coroutine_enter(AioContext *ctx, Coroutine *co)
|
||||||
{
|
{
|
||||||
Coroutine *self = qemu_coroutine_self();
|
QSIMPLEQ_HEAD(, Coroutine) pending = QSIMPLEQ_HEAD_INITIALIZER(pending);
|
||||||
CoroutineAction ret;
|
Coroutine *from = qemu_coroutine_self();
|
||||||
|
|
||||||
/* Cannot rely on the read barrier for co in aio_co_wake(), as there are
|
QSIMPLEQ_INSERT_TAIL(&pending, co, co_queue_next);
|
||||||
* callers outside of aio_co_wake() */
|
|
||||||
const char *scheduled = atomic_mb_read(&co->scheduled);
|
|
||||||
|
|
||||||
trace_qemu_aio_coroutine_enter(ctx, self, co, co->entry_arg);
|
/* Run co and any queued coroutines */
|
||||||
|
while (!QSIMPLEQ_EMPTY(&pending)) {
|
||||||
|
Coroutine *to = QSIMPLEQ_FIRST(&pending);
|
||||||
|
CoroutineAction ret;
|
||||||
|
|
||||||
/* if the Coroutine has already been scheduled, entering it again will
|
/* Cannot rely on the read barrier for to in aio_co_wake(), as there are
|
||||||
* cause us to enter it twice, potentially even after the coroutine has
|
* callers outside of aio_co_wake() */
|
||||||
* been deleted */
|
const char *scheduled = atomic_mb_read(&to->scheduled);
|
||||||
if (scheduled) {
|
|
||||||
fprintf(stderr,
|
|
||||||
"%s: Co-routine was already scheduled in '%s'\n",
|
|
||||||
__func__, scheduled);
|
|
||||||
abort();
|
|
||||||
}
|
|
||||||
|
|
||||||
if (co->caller) {
|
QSIMPLEQ_REMOVE_HEAD(&pending, co_queue_next);
|
||||||
fprintf(stderr, "Co-routine re-entered recursively\n");
|
|
||||||
abort();
|
|
||||||
}
|
|
||||||
|
|
||||||
co->caller = self;
|
trace_qemu_aio_coroutine_enter(ctx, from, to, to->entry_arg);
|
||||||
co->ctx = ctx;
|
|
||||||
|
|
||||||
/* Store co->ctx before anything that stores co. Matches
|
/* if the Coroutine has already been scheduled, entering it again will
|
||||||
* barrier in aio_co_wake and qemu_co_mutex_wake.
|
* cause us to enter it twice, potentially even after the coroutine has
|
||||||
*/
|
* been deleted */
|
||||||
smp_wmb();
|
if (scheduled) {
|
||||||
|
fprintf(stderr,
|
||||||
|
"%s: Co-routine was already scheduled in '%s'\n",
|
||||||
|
__func__, scheduled);
|
||||||
|
abort();
|
||||||
|
}
|
||||||
|
|
||||||
ret = qemu_coroutine_switch(self, co, COROUTINE_ENTER);
|
if (to->caller) {
|
||||||
|
fprintf(stderr, "Co-routine re-entered recursively\n");
|
||||||
|
abort();
|
||||||
|
}
|
||||||
|
|
||||||
qemu_co_queue_run_restart(co);
|
to->caller = from;
|
||||||
|
to->ctx = ctx;
|
||||||
|
|
||||||
/* Beware, if ret == COROUTINE_YIELD and qemu_co_queue_run_restart()
|
/* Store to->ctx before anything that stores to. Matches
|
||||||
* has started any other coroutine, "co" might have been reentered
|
* barrier in aio_co_wake and qemu_co_mutex_wake.
|
||||||
* and even freed by now! So be careful and do not touch it.
|
*/
|
||||||
*/
|
smp_wmb();
|
||||||
|
|
||||||
switch (ret) {
|
ret = qemu_coroutine_switch(from, to, COROUTINE_ENTER);
|
||||||
case COROUTINE_YIELD:
|
|
||||||
return;
|
/* Queued coroutines are run depth-first; previously pending coroutines
|
||||||
case COROUTINE_TERMINATE:
|
* run after those queued more recently.
|
||||||
assert(!co->locks_held);
|
*/
|
||||||
trace_qemu_coroutine_terminate(co);
|
QSIMPLEQ_PREPEND(&pending, &to->co_queue_wakeup);
|
||||||
coroutine_delete(co);
|
|
||||||
return;
|
switch (ret) {
|
||||||
default:
|
case COROUTINE_YIELD:
|
||||||
abort();
|
break;
|
||||||
|
case COROUTINE_TERMINATE:
|
||||||
|
assert(!to->locks_held);
|
||||||
|
trace_qemu_coroutine_terminate(to);
|
||||||
|
coroutine_delete(to);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
abort();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user