-----BEGIN PGP SIGNATURE-----

iQIcBAABAgAGBQJaFFvtAAoJEL2+eyfA3jBXHgoP/2Hw6ksjPOUKwuRvOWJYvS27
 Vfr5S5yjayS3fB+6mSZJ4cX77ais0d7kyQgPyRfOI7ecMsgQXphYSJlf1ROM5VGY
 4Jn3d/jrJ1ZiN3bdQyFH4x0T8+/UuHILc5Hu0C0Jekk1sxGI/8j+k8J9pDVZ7j/8
 BvYLMqpnV+W1tGAXUlu8SwB3JqAjy8orO05bd9zN6Q0zPrrJuW+4evijHrB/LJwG
 HPSttN7khlCt+bPHSx1qDNKYr2lVE2RmaS6Nk3KS/lJgnmEerGCPTbLpno7IdQGu
 MJ+Lq2FzFNRJ2Il/7bqGio2KN3CmWlm2Kw1n6DuvjUjYXv1m4RlQ8x6EIZCcicN1
 sDARn3qm6in3v85WV0uav1ARyqn3XM2YDhDSx3VBQcvgm9pgQYuJ6ztOx3tfVJPz
 9T2R272CvOnlOFHyi5C9vZ1XEo9eJMgUNLVXPud+oMAjPodpOv04tK2IsoNOYfk0
 OX2aVuV/AvsLMD3xol3bcIOFNhVrv/ePV7J5n8TLZxOpJfbPLYkgC/gTTMkOLce3
 bO+W8YCp7GMPCmba6wx7x1frZOY5yg4gA4MVisN7vnnggr1LUU0AvkNw2CDZWj/m
 K6ZDJwTuBO1OjAUKURHxrRlcnqILaNeR3T7dS+7fS5tpkxxDkZlGZgBmJhw4/21K
 LvGpqDNcljvowdlKPKgB
 =s4RK
 -----END PGP SIGNATURE-----

Merge remote-tracking branch 'remotes/cody/tags/block-pull-request' into staging

# gpg: Signature made Tue 21 Nov 2017 17:01:33 GMT
# gpg:                using RSA key 0xBDBE7B27C0DE3057
# gpg: Good signature from "Jeffrey Cody <jcody@redhat.com>"
# gpg:                 aka "Jeffrey Cody <jeff@codyprime.org>"
# gpg:                 aka "Jeffrey Cody <codyprime@gmail.com>"
# Primary key fingerprint: 9957 4B4D 3474 90E7 9D98  D624 BDBE 7B27 C0DE 3057

* remotes/cody/tags/block-pull-request:
  qemu-iotest: add test for blockjob coroutine race condition
  qemu-iotests: add option in common.qemu for mismatch only
  coroutine: abort if we try to schedule or enter a pending coroutine
  blockjob: do not allow coroutine double entry or entry-after-completion

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
Peter Maydell 2017-11-21 17:05:49 +00:00
commit 64807cd779
10 changed files with 177 additions and 7 deletions

View File

@ -797,11 +797,14 @@ void block_job_sleep_ns(BlockJob *job, QEMUClockType type, int64_t ns)
return;
}
job->busy = false;
/* We need to leave job->busy set here, because when we have
* put a coroutine to 'sleep', we have scheduled it to run in
* the future. We cannot enter that same coroutine again before
* it wakes and runs, otherwise we risk double-entry or entry after
* completion. */
if (!block_job_should_pause(job)) {
co_aio_sleep_ns(blk_get_aio_context(job->blk), type, ns);
}
job->busy = true;
block_job_pause_point(job);
}

View File

@ -143,7 +143,8 @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver,
* @ns: How many nanoseconds to stop for.
*
* Put the job to sleep (assuming that it wasn't canceled) for @ns
* nanoseconds. Canceling the job will interrupt the wait immediately.
* nanoseconds. Canceling the job will not interrupt the wait, so the
* cancel will not process until the coroutine wakes up.
*/
void block_job_sleep_ns(BlockJob *job, QEMUClockType type, int64_t ns);

View File

@ -46,14 +46,21 @@ struct Coroutine {
size_t locks_held;
/* Only used when the coroutine has yielded. */
AioContext *ctx;
/* Used to catch and abort on illegal co-routine entry.
* Will contain the name of the function that had first
* scheduled the coroutine. */
const char *scheduled;
QSIMPLEQ_ENTRY(Coroutine) co_queue_next;
/* Coroutines that should be woken up when we yield or terminate.
* Only used when the coroutine is running.
*/
QSIMPLEQ_HEAD(, Coroutine) co_queue_wakeup;
/* Only used when the coroutine has yielded. */
AioContext *ctx;
QSIMPLEQ_ENTRY(Coroutine) co_queue_next;
QSLIST_ENTRY(Coroutine) co_scheduled_next;
};

99
tests/qemu-iotests/200 Executable file
View File

@ -0,0 +1,99 @@
#!/bin/bash
#
# Block job co-routine race condition test.
#
# See: https://bugzilla.redhat.com/show_bug.cgi?id=1508708
#
# Copyright (C) 2017 Red Hat, Inc.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
# creator
owner=jcody@redhat.com
seq=`basename $0`
echo "QA output created by $seq"
here=`pwd`
status=1 # failure is the default!
_cleanup()
{
_cleanup_qemu
rm -f "${TEST_IMG}" "${BACKING_IMG}"
}
trap "_cleanup; exit \$status" 0 1 2 3 15
# get standard environment, filters and checks
. ./common.rc
. ./common.filter
. ./common.qemu
_supported_fmt qcow2 qed
_supported_proto file
_supported_os Linux
BACKING_IMG="${TEST_DIR}/backing.img"
TEST_IMG="${TEST_DIR}/test.img"
${QEMU_IMG} create -f $IMGFMT "${BACKING_IMG}" 512M | _filter_img_create
${QEMU_IMG} create -f $IMGFMT -F $IMGFMT "${TEST_IMG}" -b "${BACKING_IMG}" 512M | _filter_img_create
${QEMU_IO} -c "write -P 0xa5 512 300M" "${BACKING_IMG}" | _filter_qemu_io
echo
echo === Starting QEMU VM ===
echo
qemu_comm_method="qmp"
_launch_qemu -device pci-bridge,id=bridge1,chassis_nr=1,bus=pci.0 \
-object iothread,id=iothread0 \
-device virtio-scsi-pci,bus=bridge1,addr=0x1f,id=scsi0,iothread=iothread0 \
-drive file="${TEST_IMG}",media=disk,if=none,cache=none,id=drive_sysdisk,aio=native,format=$IMGFMT \
-device scsi-hd,drive=drive_sysdisk,bus=scsi0.0,id=sysdisk,bootindex=0
h1=$QEMU_HANDLE
_send_qemu_cmd $h1 "{ 'execute': 'qmp_capabilities' }" 'return'
echo
echo === Sending stream/cancel, checking for SIGSEGV only ===
echo
for (( i=1;i<500;i++ ))
do
mismatch_only='y' qemu_error_no_exit='n' _send_qemu_cmd $h1 \
"{
'execute': 'block-stream',
'arguments': {
'device': 'drive_sysdisk',
'speed': 10000000,
'on-error': 'report',
'job-id': 'job-$i'
}
}
{
'execute': 'block-job-cancel',
'arguments': {
'device': 'job-$i'
}
}" \
"{.*{.*}.*}" # should match all well-formed QMP responses
done
silent='y' _send_qemu_cmd $h1 "{ 'execute': 'quit' }" 'return'
echo "$i iterations performed"
echo "*** done"
rm -f $seq.full
status=0

View File

@ -0,0 +1,14 @@
QA output created by 200
Formatting 'TEST_DIR/backing.img', fmt=IMGFMT size=536870912
Formatting 'TEST_DIR/test.img', fmt=IMGFMT size=536870912 backing_file=TEST_DIR/backing.img backing_fmt=IMGFMT
wrote 314572800/314572800 bytes at offset 512
300 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
=== Starting QEMU VM ===
{"return": {}}
=== Sending stream/cancel, checking for SIGSEGV only ===
500 iterations performed
*** done

View File

@ -50,6 +50,8 @@ _in_fd=4
#
# If $silent is set to anything but an empty string, then
# response is not echoed out.
# If $mismatch_only is set, only non-matching responses will
# be echoed.
function _timed_wait_for()
{
local h=${1}
@ -58,14 +60,18 @@ function _timed_wait_for()
QEMU_STATUS[$h]=0
while IFS= read -t ${QEMU_COMM_TIMEOUT} resp <&${QEMU_OUT[$h]}
do
if [ -z "${silent}" ]; then
if [ -z "${silent}" ] && [ -z "${mismatch_only}" ]; then
echo "${resp}" | _filter_testdir | _filter_qemu \
| _filter_qemu_io | _filter_qmp | _filter_hmp
fi
grep -q "${*}" < <(echo "${resp}")
if [ $? -eq 0 ]; then
return
elif [ -z "${silent}" ] && [ -n "${mismatch_only}" ]; then
echo "${resp}" | _filter_testdir | _filter_qemu \
| _filter_qemu_io | _filter_qmp | _filter_hmp
fi
done
QEMU_STATUS[$h]=-1
if [ -z "${qemu_error_no_exit}" ]; then

View File

@ -196,3 +196,4 @@
196 rw auto quick
197 rw auto quick
198 rw auto
200 rw auto

View File

@ -388,6 +388,9 @@ static void co_schedule_bh_cb(void *opaque)
QSLIST_REMOVE_HEAD(&straight, co_scheduled_next);
trace_aio_co_schedule_bh_cb(ctx, co);
aio_context_acquire(ctx);
/* Protected by write barrier in qemu_aio_coroutine_enter */
atomic_set(&co->scheduled, NULL);
qemu_coroutine_enter(co);
aio_context_release(ctx);
}
@ -438,6 +441,16 @@ fail:
void aio_co_schedule(AioContext *ctx, Coroutine *co)
{
trace_aio_co_schedule(ctx, co);
const char *scheduled = atomic_cmpxchg(&co->scheduled, NULL,
__func__);
if (scheduled) {
fprintf(stderr,
"%s: Co-routine was already scheduled in '%s'\n",
__func__, scheduled);
abort();
}
QSLIST_INSERT_HEAD_ATOMIC(&ctx->scheduled_coroutines,
co, co_scheduled_next);
qemu_bh_schedule(ctx->co_schedule_bh);

View File

@ -13,6 +13,7 @@
#include "qemu/osdep.h"
#include "qemu/coroutine.h"
#include "qemu/coroutine_int.h"
#include "qemu/timer.h"
#include "block/aio.h"
@ -25,6 +26,8 @@ static void co_sleep_cb(void *opaque)
{
CoSleepCB *sleep_cb = opaque;
/* Write of schedule protected by barrier write in aio_co_schedule */
atomic_set(&sleep_cb->co->scheduled, NULL);
aio_co_wake(sleep_cb->co);
}
@ -34,6 +37,15 @@ void coroutine_fn co_aio_sleep_ns(AioContext *ctx, QEMUClockType type,
CoSleepCB sleep_cb = {
.co = qemu_coroutine_self(),
};
const char *scheduled = atomic_cmpxchg(&sleep_cb.co->scheduled, NULL,
__func__);
if (scheduled) {
fprintf(stderr,
"%s: Co-routine was already scheduled in '%s'\n",
__func__, scheduled);
abort();
}
sleep_cb.ts = aio_timer_new(ctx, type, SCALE_NS, co_sleep_cb, &sleep_cb);
timer_mod(sleep_cb.ts, qemu_clock_get_ns(type) + ns);
qemu_coroutine_yield();

View File

@ -107,8 +107,22 @@ void qemu_aio_coroutine_enter(AioContext *ctx, Coroutine *co)
Coroutine *self = qemu_coroutine_self();
CoroutineAction ret;
/* Cannot rely on the read barrier for co in aio_co_wake(), as there are
* callers outside of aio_co_wake() */
const char *scheduled = atomic_mb_read(&co->scheduled);
trace_qemu_aio_coroutine_enter(ctx, self, co, co->entry_arg);
/* if the Coroutine has already been scheduled, entering it again will
* cause us to enter it twice, potentially even after the coroutine has
* been deleted */
if (scheduled) {
fprintf(stderr,
"%s: Co-routine was already scheduled in '%s'\n",
__func__, scheduled);
abort();
}
if (co->caller) {
fprintf(stderr, "Co-routine re-entered recursively\n");
abort();