8ec90598e9
monitor_qmp_dispatcher_co() runs in the iohandler AioContext that is not polled during nested event loops. The coroutine currently reschedules itself in the main loop's qemu_aio_context AioContext, which is polled during nested event loops. One known problem is that QMP device-add calls drain_call_rcu(), which temporarily drops the BQL, leading to all sorts of havoc like other vCPU threads re-entering device emulation code while another vCPU thread is waiting in device emulation code with aio_poll(). Paolo Bonzini suggested running non-coroutine QMP handlers in the iohandler AioContext. This avoids trouble with nested event loops. His original idea was to move coroutine rescheduling to monitor_qmp_dispatch(), but I resorted to moving it to qmp_dispatch() because we don't know if the QMP handler needs to run in coroutine context in monitor_qmp_dispatch(). monitor_qmp_dispatch() would have been nicer since it's associated with the monitor implementation and not as general as qmp_dispatch(), which is also used by qemu-ga. A number of qemu-iotests need updated .out files because the order of QMP events vs QMP responses has changed. Solves Issue #1933. Cc: qemu-stable@nongnu.org Fixes:7bed89958b
("device_core: use drain_call_rcu in in qmp_device_add") Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2215192 Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2214985 Buglink: https://issues.redhat.com/browse/RHEL-17369 Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> Message-ID: <20240118144823.1497953-4-stefanha@redhat.com> Reviewed-by: Kevin Wolf <kwolf@redhat.com> Tested-by: Fiona Ebner <f.ebner@proxmox.com> Signed-off-by: Kevin Wolf <kwolf@redhat.com> (cherry picked from commiteffd60c878
) Signed-off-by: Michael Tokarev <mjt@tls.msk.ru> (Mjt: omit changes to tests missing in 7.2)
411 lines
12 KiB
Bash
Executable File
411 lines
12 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
# group: rw
|
|
#
|
|
# Test exiting qemu while jobs are still running
|
|
#
|
|
# Copyright (C) 2017 Red Hat, Inc.
|
|
#
|
|
# This program is free software; you can redistribute it and/or modify
|
|
# it under the terms of the GNU General Public License as published by
|
|
# the Free Software Foundation; either version 2 of the License, or
|
|
# (at your option) any later version.
|
|
#
|
|
# This program is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU General Public License
|
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
#
|
|
|
|
# creator
|
|
owner=kwolf@redhat.com
|
|
|
|
seq=`basename $0`
|
|
echo "QA output created by $seq"
|
|
|
|
status=1 # failure is the default!
|
|
|
|
_cleanup()
|
|
{
|
|
_rm_test_img "${TEST_IMG}.mid"
|
|
_rm_test_img "${TEST_IMG}.copy"
|
|
_cleanup_test_img
|
|
_cleanup_qemu
|
|
|
|
if [ -f "$TEST_DIR/qsd.pid" ]; then
|
|
kill -SIGKILL "$(cat "$TEST_DIR/qsd.pid")"
|
|
rm -f "$TEST_DIR/qsd.pid"
|
|
fi
|
|
rm -f "$SOCK_DIR/qsd.sock"
|
|
}
|
|
trap "_cleanup; exit \$status" 0 1 2 3 15
|
|
|
|
# get standard environment, filters and checks
|
|
. ./common.rc
|
|
. ./common.filter
|
|
. ./common.qemu
|
|
|
|
_supported_fmt qcow2
|
|
_supported_proto file
|
|
_supported_os Linux
|
|
|
|
size=$((64 * 1048576))
|
|
TEST_IMG="${TEST_IMG}.base" _make_test_img $size
|
|
|
|
echo
|
|
echo === Starting VM ===
|
|
echo
|
|
|
|
qemu_comm_method="qmp"
|
|
|
|
_launch_qemu \
|
|
-drive file="${TEST_IMG}.base",cache=$CACHEMODE,aio=$AIOMODE,driver=$IMGFMT,id=disk
|
|
h=$QEMU_HANDLE
|
|
_send_qemu_cmd $h "{ 'execute': 'qmp_capabilities' }" 'return'
|
|
|
|
echo
|
|
echo === Creating backing chain ===
|
|
echo
|
|
|
|
_send_qemu_cmd $h \
|
|
"{ 'execute': 'blockdev-snapshot-sync',
|
|
'arguments': { 'device': 'disk',
|
|
'snapshot-file': '$TEST_IMG.mid',
|
|
'format': '$IMGFMT',
|
|
'mode': 'absolute-paths' } }" \
|
|
"return"
|
|
|
|
_send_qemu_cmd $h \
|
|
"{ 'execute': 'human-monitor-command',
|
|
'arguments': { 'command-line':
|
|
'qemu-io disk \"write 0 4M\"' } }" \
|
|
"return"
|
|
|
|
_send_qemu_cmd $h \
|
|
"{ 'execute': 'blockdev-snapshot-sync',
|
|
'arguments': { 'device': 'disk',
|
|
'snapshot-file': '$TEST_IMG',
|
|
'format': '$IMGFMT',
|
|
'mode': 'absolute-paths' } }" \
|
|
"return"
|
|
|
|
echo
|
|
echo === Start commit job and exit qemu ===
|
|
echo
|
|
|
|
# Note that the reference output intentionally includes the 'offset' field in
|
|
# BLOCK_JOB_* events for all of the following block jobs. They are predictable
|
|
# and any change in the offsets would hint at a bug in the job throttling code.
|
|
#
|
|
# In order to achieve these predictable offsets, all of the following tests
|
|
# use speed=65536. Each job will perform exactly one iteration before it has
|
|
# to sleep at least for a second, which is plenty of time for the 'quit' QMP
|
|
# command to be received (after receiving the command, the rest runs
|
|
# synchronously, so jobs can arbitrarily continue or complete).
|
|
#
|
|
# The buffer size for commit and streaming is 512k (waiting for 8 seconds after
|
|
# the first request), for active commit and mirror it's large enough to cover
|
|
# the full 4M, and for backup it's the qcow2 cluster size, which we know is
|
|
# 64k. As all of these are at least as large as the speed, we are sure that the
|
|
# offset advances exactly once before qemu exits.
|
|
|
|
_send_qemu_cmd $h \
|
|
"{ 'execute': 'block-commit',
|
|
'arguments': { 'device': 'disk',
|
|
'base':'$TEST_IMG.base',
|
|
'top': '$TEST_IMG.mid',
|
|
'speed': 65536 } }" \
|
|
"return"
|
|
|
|
# If we don't sleep here 'quit' command races with disk I/O
|
|
sleep 0.5
|
|
|
|
# Ignore the JOB_STATUS_CHANGE events while shutting down the VM. Depending on
|
|
# the timing, jobs may or may not transition through a paused state.
|
|
_send_qemu_cmd $h "{ 'execute': 'quit' }" "return"
|
|
wait=1 _cleanup_qemu | grep -v 'JOB_STATUS_CHANGE'
|
|
|
|
echo
|
|
echo === Start active commit job and exit qemu ===
|
|
echo
|
|
|
|
_launch_qemu \
|
|
-drive file="${TEST_IMG}",cache=$CACHEMODE,aio=$AIOMODE,driver=$IMGFMT,id=disk
|
|
h=$QEMU_HANDLE
|
|
_send_qemu_cmd $h "{ 'execute': 'qmp_capabilities' }" 'return'
|
|
|
|
_send_qemu_cmd $h \
|
|
"{ 'execute': 'block-commit',
|
|
'arguments': { 'device': 'disk',
|
|
'base':'$TEST_IMG.base',
|
|
'speed': 65536 } }" \
|
|
"return"
|
|
|
|
# If we don't sleep here 'quit' command races with disk I/O
|
|
sleep 0.5
|
|
|
|
_send_qemu_cmd $h "{ 'execute': 'quit' }" "return"
|
|
wait=1 _cleanup_qemu | grep -v 'JOB_STATUS_CHANGE'
|
|
|
|
echo
|
|
echo === Start mirror job and exit qemu ===
|
|
echo
|
|
|
|
_launch_qemu \
|
|
-drive file="${TEST_IMG}",cache=$CACHEMODE,aio=$AIOMODE,driver=$IMGFMT,id=disk
|
|
h=$QEMU_HANDLE
|
|
_send_qemu_cmd $h "{ 'execute': 'qmp_capabilities' }" 'return'
|
|
|
|
_send_qemu_cmd $h \
|
|
"{ 'execute': 'drive-mirror',
|
|
'arguments': { 'device': 'disk',
|
|
'target': '$TEST_IMG.copy',
|
|
'format': '$IMGFMT',
|
|
'sync': 'full',
|
|
'speed': 65536 } }" \
|
|
"return"
|
|
|
|
# If we don't sleep here 'quit' command may be handled before
|
|
# the first mirror iteration is done
|
|
sleep 0.5
|
|
|
|
_send_qemu_cmd $h "{ 'execute': 'quit' }" "return"
|
|
wait=1 _cleanup_qemu | grep -v 'JOB_STATUS_CHANGE'
|
|
|
|
echo
|
|
echo === Start backup job and exit qemu ===
|
|
echo
|
|
|
|
_launch_qemu \
|
|
-drive file="${TEST_IMG}",cache=$CACHEMODE,aio=$AIOMODE,driver=$IMGFMT,id=disk
|
|
h=$QEMU_HANDLE
|
|
_send_qemu_cmd $h "{ 'execute': 'qmp_capabilities' }" 'return'
|
|
|
|
_send_qemu_cmd $h \
|
|
"{ 'execute': 'drive-backup',
|
|
'arguments': { 'device': 'disk',
|
|
'target': '$TEST_IMG.copy',
|
|
'format': '$IMGFMT',
|
|
'sync': 'full',
|
|
'speed': 65536,
|
|
'x-perf': {'max-chunk': 65536} } }" \
|
|
"return"
|
|
|
|
# If we don't sleep here 'quit' command races with disk I/O
|
|
sleep 0.5
|
|
|
|
_send_qemu_cmd $h "{ 'execute': 'quit' }" "return"
|
|
wait=1 _cleanup_qemu | grep -v 'JOB_STATUS_CHANGE'
|
|
|
|
echo
|
|
echo === Start streaming job and exit qemu ===
|
|
echo
|
|
|
|
_launch_qemu \
|
|
-drive file="${TEST_IMG}",cache=$CACHEMODE,aio=$AIOMODE,driver=$IMGFMT,id=disk
|
|
h=$QEMU_HANDLE
|
|
_send_qemu_cmd $h "{ 'execute': 'qmp_capabilities' }" 'return'
|
|
|
|
_send_qemu_cmd $h \
|
|
"{ 'execute': 'block-stream',
|
|
'arguments': { 'device': 'disk',
|
|
'speed': 65536 } }" \
|
|
"return"
|
|
|
|
# If we don't sleep here 'quit' command races with disk I/O
|
|
sleep 0.5
|
|
|
|
_send_qemu_cmd $h "{ 'execute': 'quit' }" "return"
|
|
wait=1 _cleanup_qemu | grep -v 'JOB_STATUS_CHANGE'
|
|
|
|
_check_test_img
|
|
|
|
echo
|
|
echo === Start mirror to throttled QSD and exit qemu ===
|
|
echo
|
|
|
|
# Mirror to a throttled QSD instance (so that qemu cannot drain the
|
|
# throttling), wait for READY, then write some data to the device,
|
|
# and then quit qemu.
|
|
# (qemu should force-cancel the job and not wait for the data to be
|
|
# written to the target.)
|
|
|
|
_make_test_img $size
|
|
|
|
# Will be used by this and the next case
|
|
set_up_throttled_qsd() {
|
|
$QSD \
|
|
--object throttle-group,id=thrgr,limits.bps-total=1048576 \
|
|
--blockdev null-co,node-name=null,size=$size \
|
|
--blockdev throttle,node-name=throttled,throttle-group=thrgr,file=null \
|
|
--nbd-server addr.type=unix,addr.path="$SOCK_DIR/qsd.sock" \
|
|
--export nbd,id=exp,node-name=throttled,name=target,writable=true \
|
|
--pidfile "$TEST_DIR/qsd.pid" \
|
|
--daemonize
|
|
}
|
|
|
|
set_up_throttled_qsd
|
|
|
|
# Need a virtio-blk device so that qemu-io writes will not block the monitor
|
|
_launch_qemu \
|
|
--blockdev file,node-name=source-proto,filename="$TEST_IMG" \
|
|
--blockdev qcow2,node-name=source-fmt,file=source-proto \
|
|
--device virtio-blk,id=vblk,drive=source-fmt \
|
|
--blockdev "{\"driver\": \"nbd\",
|
|
\"node-name\": \"target\",
|
|
\"server\": {
|
|
\"type\": \"unix\",
|
|
\"path\": \"$SOCK_DIR/qsd.sock\"
|
|
},
|
|
\"export\": \"target\"}"
|
|
|
|
h=$QEMU_HANDLE
|
|
_send_qemu_cmd $h '{"execute": "qmp_capabilities"}' 'return'
|
|
|
|
# Use sync=top, so the first pass will not copy the whole image
|
|
_send_qemu_cmd $h \
|
|
'{"execute": "blockdev-mirror",
|
|
"arguments": {
|
|
"job-id": "mirror",
|
|
"device": "source-fmt",
|
|
"target": "target",
|
|
"sync": "top"
|
|
}}' \
|
|
'return' \
|
|
| grep -v JOB_STATUS_CHANGE # Ignore these events during creation
|
|
|
|
# This too will be used by this and the next case
|
|
# $1: QEMU handle
|
|
# $2: Image size
|
|
wait_for_job_and_quit() {
|
|
h=$1
|
|
size=$2
|
|
|
|
# List of expected events
|
|
capture_events='BLOCK_JOB_READY JOB_STATUS_CHANGE'
|
|
_wait_event $h 'BLOCK_JOB_READY'
|
|
QEMU_EVENTS= # Ignore all JOB_STATUS_CHANGE events that came before READY
|
|
|
|
# Write something to the device for post-READY mirroring. Write it in
|
|
# blocks matching the cluster size, each spaced one block apart, so
|
|
# that the mirror job will have to spawn one request per cluster.
|
|
# Because the number of concurrent requests is limited (to 16), this
|
|
# limits the number of bytes concurrently in flight, which speeds up
|
|
# cancelling the job (in-flight requests still are waited for).
|
|
# To limit the number of bytes in flight, we could alternatively pass
|
|
# something for blockdev-mirror's @buf-size parameter, but
|
|
# block-commit does not have such a parameter, so we need to figure
|
|
# something out that works for both.
|
|
|
|
cluster_size=65536
|
|
step=$((cluster_size * 2))
|
|
|
|
echo '--- Writing data to the virtio-blk device ---'
|
|
|
|
for ofs in $(seq 0 $step $((size - step))); do
|
|
qemu_io_cmd="qemu-io -d vblk/virtio-backend "
|
|
qemu_io_cmd+="\\\"aio_write $ofs $cluster_size\\\""
|
|
|
|
# Do not include these requests in the reference output
|
|
# (it's just too much)
|
|
silent=yes _send_qemu_cmd $h \
|
|
"{\"execute\": \"human-monitor-command\",
|
|
\"arguments\": {
|
|
\"command-line\": \"$qemu_io_cmd\"
|
|
}}" \
|
|
'return'
|
|
done
|
|
|
|
# Wait until the job's length is updated to reflect the write requests
|
|
|
|
# We have written to half of the device, so this is the expected job length
|
|
final_len=$((size / 2))
|
|
timeout=100 # unit: 0.1 seconds
|
|
while true; do
|
|
len=$(
|
|
_send_qemu_cmd $h \
|
|
'{"execute": "query-block-jobs"}' \
|
|
'return.*"len": [0-9]\+' \
|
|
| grep 'return.*"len": [0-9]\+' \
|
|
| sed -e 's/.*"len": \([0-9]\+\).*/\1/'
|
|
)
|
|
if [ "$len" -eq "$final_len" ]; then
|
|
break
|
|
fi
|
|
timeout=$((timeout - 1))
|
|
if [ "$timeout" -eq 0 ]; then
|
|
echo "ERROR: Timeout waiting for job to reach len=$final_len"
|
|
break
|
|
fi
|
|
sleep 0.1
|
|
done
|
|
|
|
sleep 1
|
|
|
|
# List of expected events
|
|
capture_events='BLOCK_JOB_CANCELLED JOB_STATUS_CHANGE SHUTDOWN'
|
|
|
|
_send_qemu_cmd $h \
|
|
'{"execute": "quit"}' \
|
|
'return'
|
|
|
|
_wait_event $h 'SHUTDOWN'
|
|
_wait_event $h 'JOB_STATUS_CHANGE' # standby
|
|
_wait_event $h 'JOB_STATUS_CHANGE' # ready
|
|
_wait_event $h 'JOB_STATUS_CHANGE' # aborting
|
|
# Filter the offset (depends on when exactly `quit` was issued)
|
|
_wait_event $h 'BLOCK_JOB_CANCELLED' \
|
|
| sed -e 's/"offset": [0-9]\+/"offset": (filtered)/'
|
|
_wait_event $h 'JOB_STATUS_CHANGE' # concluded
|
|
_wait_event $h 'JOB_STATUS_CHANGE' # null
|
|
|
|
wait=yes _cleanup_qemu
|
|
|
|
kill -SIGTERM "$(cat "$TEST_DIR/qsd.pid")"
|
|
}
|
|
|
|
wait_for_job_and_quit $h $size
|
|
|
|
echo
|
|
echo === Start active commit to throttled QSD and exit qemu ===
|
|
echo
|
|
|
|
# Same as the above, but instead of mirroring, do an active commit
|
|
|
|
_make_test_img $size
|
|
|
|
set_up_throttled_qsd
|
|
|
|
_launch_qemu \
|
|
--blockdev "{\"driver\": \"nbd\",
|
|
\"node-name\": \"target\",
|
|
\"server\": {
|
|
\"type\": \"unix\",
|
|
\"path\": \"$SOCK_DIR/qsd.sock\"
|
|
},
|
|
\"export\": \"target\"}" \
|
|
--blockdev file,node-name=source-proto,filename="$TEST_IMG" \
|
|
--blockdev qcow2,node-name=source-fmt,file=source-proto,backing=target \
|
|
--device virtio-blk,id=vblk,drive=source-fmt
|
|
|
|
h=$QEMU_HANDLE
|
|
_send_qemu_cmd $h '{"execute": "qmp_capabilities"}' 'return'
|
|
|
|
_send_qemu_cmd $h \
|
|
'{"execute": "block-commit",
|
|
"arguments": {
|
|
"job-id": "commit",
|
|
"device": "source-fmt"
|
|
}}' \
|
|
'return' \
|
|
| grep -v JOB_STATUS_CHANGE # Ignore these events during creation
|
|
|
|
wait_for_job_and_quit $h $size
|
|
|
|
# success, all done
|
|
echo "*** done"
|
|
rm -f $seq.full
|
|
status=0
|