Block layer fixes

- mirror: Fix job-complete race condition causing unexpected errors
 - fdc: Fix 'fallback' property on sysbus floppy disk controllers
 - rbd: Fix memory leaks
 - iotest improvements
 -----BEGIN PGP SIGNATURE-----
 
 iQJFBAABCAAvFiEE3D3rFZqa+V09dFb+fwmycsiPL9YFAmBwfRcRHGt3b2xmQHJl
 ZGhhdC5jb20ACgkQfwmycsiPL9afvA/9Ek5sr95gYMWz+4XuuaeVYjCwyrcEv3WX
 +zZNCwT/lcbmKAmkKwuHcU9nDpEfeRZ2nmJB1rhCKIdya/qWhLyJKRY7s8Ip8W8h
 uhWz+LIoo8q/ZGcxIDlkyazr8s5qQMSZtvBkb/QSi2h8yhwY8wf1dIk2J3IgB2wf
 JjZaZIyGpUYnfDmYncnTduGUOKrgHPNaSagGbis9OFqd8jqdcCt9vb+jDNYa29st
 e5223MUpjHilhdvrM7lCNX8wTcximTeZfXnBvZd87MQXIoitl3jb9Da0qTLZyo/b
 uGORfRs2DXldFgdHAf699KDWjVGieGnMKUQkP3vgFtrUDd4xfj1lRWBmnKolBwng
 4ku9cP8tqRIA7y6LFJX/ExxeR48AwbbMbsQIJNj3mjez49HRlGPMGVRYonCWK9B6
 /XQF8FD+Xk9Ivua6rMRXK7IHMqdJGKIiTvDf1frwg1qbYPrPOAOu9F3h/ybGQQhb
 GxQHQccAinmcDco0PSoJnqe/3ukyuSOrV4Bf/JZpo9pIJau3By4XphLmu35JdBhh
 B+xVUBr7k3SQ/s/DLRnuunRPZGHMGY+Cf9v0dqvTKDEmStdrctfyTshjP01LDsex
 zBmDLFIyzmFwwz14atNP6kusce8H4XIbC6x51DV2jNG01OL4PcTVQaCDmDTsYqlY
 Yi3HfzSG9Ng=
 =948n
 -----END PGP SIGNATURE-----

Merge remote-tracking branch 'remotes/kevin/tags/for-upstream' into staging

Block layer fixes

- mirror: Fix job-complete race condition causing unexpected errors
- fdc: Fix 'fallback' property on sysbus floppy disk controllers
- rbd: Fix memory leaks
- iotest improvements

# gpg: Signature made Fri 09 Apr 2021 17:13:11 BST
# gpg:                using RSA key DC3DEB159A9AF95D3D7456FE7F09B272C88F2FD6
# gpg:                issuer "kwolf@redhat.com"
# gpg: Good signature from "Kevin Wolf <kwolf@redhat.com>" [full]
# Primary key fingerprint: DC3D EB15 9A9A F95D 3D74  56FE 7F09 B272 C88F 2FD6

* remotes/kevin/tags/for-upstream:
  test-blockjob: Test job_wait_unpaused()
  job: Allow complete for jobs on standby
  mirror: Do not enter a paused job on completion
  mirror: Move open_backing_file to exit_common
  hw/block/fdc: Fix 'fallback' property on sysbus floppy disk controllers
  iotests: Test mirror-top filter permissions
  iotests: add test for removing persistent bitmap from backing file
  iotests/qsd-jobs: Filter events in the first test
  block/rbd: fix memory leak in qemu_rbd_co_create_opts()
  block/rbd: fix memory leak in qemu_rbd_connect()

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
Peter Maydell 2021-04-09 19:26:42 +01:00
commit 836b36af93
11 changed files with 349 additions and 34 deletions

View File

@ -689,6 +689,14 @@ static int mirror_exit_common(Job *job)
ret = -EPERM;
}
}
} else if (!abort && s->backing_mode == MIRROR_OPEN_BACKING_CHAIN) {
assert(!bdrv_backing_chain_next(target_bs));
ret = bdrv_open_backing_file(bdrv_skip_filters(target_bs), NULL,
"backing", &local_err);
if (ret < 0) {
error_report_err(local_err);
local_err = NULL;
}
}
if (s->to_replace) {
@ -1107,9 +1115,6 @@ immediate_exit:
static void mirror_complete(Job *job, Error **errp)
{
MirrorBlockJob *s = container_of(job, MirrorBlockJob, common.job);
BlockDriverState *target;
target = blk_bs(s->target);
if (!s->synced) {
error_setg(errp, "The active block job '%s' cannot be completed",
@ -1117,17 +1122,6 @@ static void mirror_complete(Job *job, Error **errp)
return;
}
if (s->backing_mode == MIRROR_OPEN_BACKING_CHAIN) {
int ret;
assert(!bdrv_backing_chain_next(target));
ret = bdrv_open_backing_file(bdrv_skip_filters(target), NULL,
"backing", errp);
if (ret < 0) {
return;
}
}
/* block all operations on to_replace bs */
if (s->replaces) {
AioContext *replace_aio_context;
@ -1154,7 +1148,11 @@ static void mirror_complete(Job *job, Error **errp)
}
s->should_complete = true;
job_enter(job);
/* If the job is paused, it will be re-entered when it is resumed */
if (!job->paused) {
job_enter(job);
}
}
static void coroutine_fn mirror_pause(Job *job)

View File

@ -444,6 +444,7 @@ static int coroutine_fn qemu_rbd_co_create_opts(BlockDriver *drv,
loc->user = g_strdup(qdict_get_try_str(options, "user"));
loc->has_user = !!loc->user;
loc->q_namespace = g_strdup(qdict_get_try_str(options, "namespace"));
loc->has_q_namespace = !!loc->q_namespace;
loc->image = g_strdup(qdict_get_try_str(options, "image"));
keypairs = qdict_get_try_str(options, "=keyvalue-pairs");
@ -563,13 +564,13 @@ static int qemu_rbd_connect(rados_t *cluster, rados_ioctx_t *io_ctx,
if (local_err) {
error_propagate(errp, local_err);
r = -EINVAL;
goto failed_opts;
goto out;
}
r = rados_create(cluster, opts->user);
if (r < 0) {
error_setg_errno(errp, -r, "error initializing");
goto failed_opts;
goto out;
}
/* try default location when conf=NULL, but ignore failure */
@ -626,11 +627,12 @@ static int qemu_rbd_connect(rados_t *cluster, rados_ioctx_t *io_ctx,
*/
rados_ioctx_set_namespace(*io_ctx, opts->q_namespace);
return 0;
r = 0;
goto out;
failed_shutdown:
rados_shutdown(*cluster);
failed_opts:
out:
g_free(mon_host);
return r;
}

View File

@ -2893,7 +2893,7 @@ static Property sysbus_fdc_properties[] = {
DEFINE_PROP_SIGNED("fdtypeB", FDCtrlSysBus, state.qdev_for_drives[1].type,
FLOPPY_DRIVE_TYPE_AUTO, qdev_prop_fdc_drive_type,
FloppyDriveType),
DEFINE_PROP_SIGNED("fallback", FDCtrlISABus, state.fallback,
DEFINE_PROP_SIGNED("fallback", FDCtrlSysBus, state.fallback,
FLOPPY_DRIVE_TYPE_144, qdev_prop_fdc_drive_type,
FloppyDriveType),
DEFINE_PROP_END_OF_LIST(),
@ -2918,7 +2918,7 @@ static Property sun4m_fdc_properties[] = {
DEFINE_PROP_SIGNED("fdtype", FDCtrlSysBus, state.qdev_for_drives[0].type,
FLOPPY_DRIVE_TYPE_AUTO, qdev_prop_fdc_drive_type,
FloppyDriveType),
DEFINE_PROP_SIGNED("fallback", FDCtrlISABus, state.fallback,
DEFINE_PROP_SIGNED("fallback", FDCtrlSysBus, state.fallback,
FLOPPY_DRIVE_TYPE_144, qdev_prop_fdc_drive_type,
FloppyDriveType),
DEFINE_PROP_END_OF_LIST(),

4
job.c
View File

@ -56,7 +56,7 @@ bool JobVerbTable[JOB_VERB__MAX][JOB_STATUS__MAX] = {
[JOB_VERB_PAUSE] = {0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0},
[JOB_VERB_RESUME] = {0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0},
[JOB_VERB_SET_SPEED] = {0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0},
[JOB_VERB_COMPLETE] = {0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0},
[JOB_VERB_COMPLETE] = {0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0},
[JOB_VERB_FINALIZE] = {0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0},
[JOB_VERB_DISMISS] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0},
};
@ -991,7 +991,7 @@ void job_complete(Job *job, Error **errp)
if (job_apply_verb(job, JOB_VERB_COMPLETE, errp)) {
return;
}
if (job->pause_count || job_is_cancelled(job) || !job->driver->complete) {
if (job_is_cancelled(job) || !job->driver->complete) {
error_setg(errp, "The active block job '%s' cannot be completed",
job->id);
return;

View File

@ -0,0 +1,121 @@
#!/usr/bin/env python3
# group: rw
#
# Test permissions taken by the mirror-top filter
#
# Copyright (C) 2021 Red Hat, Inc.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
import os
import iotests
from iotests import qemu_img
# Import qemu after iotests.py has amended sys.path
# pylint: disable=wrong-import-order
import qemu
image_size = 1 * 1024 * 1024
source = os.path.join(iotests.test_dir, 'source.img')
class TestMirrorTopPerms(iotests.QMPTestCase):
def setUp(self):
assert qemu_img('create', '-f', iotests.imgfmt, source,
str(image_size)) == 0
self.vm = iotests.VM()
self.vm.add_drive(source)
self.vm.add_blockdev(f'null-co,node-name=null,size={image_size}')
self.vm.launch()
# Will be created by the test function itself
self.vm_b = None
def tearDown(self):
try:
self.vm.shutdown()
except qemu.machine.AbnormalShutdown:
pass
if self.vm_b is not None:
self.vm_b.shutdown()
os.remove(source)
def test_cancel(self):
"""
Before commit 53431b9086b28, mirror-top used to not take any
permissions but WRITE and share all permissions. Because it
is inserted between the source's original parents and the
source, there generally was no parent that would have taken or
unshared any permissions on the source, which means that an
external process could access the image unhindered by locks.
(Unless there was a parent above the protocol node that would
take its own locks, e.g. a format driver.)
This is bad enough, but if the mirror job is then cancelled,
the mirroring VM tries to take back the image, restores the
original permissions taken and unshared, and assumes this must
just work. But it will not, and so the VM aborts.
Commit 53431b9086b28 made mirror keep the original permissions
and so no other process can "steal" the image.
(Note that you cannot really do the same with the target image
and then completing the job, because the mirror job always
took/unshared the correct permissions on the target. For
example, it does not share READ_CONSISTENT, which makes it
difficult to let some other qemu process open the image.)
"""
result = self.vm.qmp('blockdev-mirror',
job_id='mirror',
device='drive0',
target='null',
sync='full')
self.assert_qmp(result, 'return', {})
self.vm.event_wait('BLOCK_JOB_READY')
# We want this to fail because the image cannot be locked.
# If it does not fail, continue still and see what happens.
self.vm_b = iotests.VM(path_suffix='b')
# Must use -blockdev -device so we can use share-rw.
# (And we need share-rw=on because mirror-top was always
# forced to take the WRITE permission so it can write to the
# source image.)
self.vm_b.add_blockdev(f'file,node-name=drive0,filename={source}')
self.vm_b.add_device('virtio-blk,drive=drive0,share-rw=on')
try:
self.vm_b.launch()
print('ERROR: VM B launched successfully, this should not have '
'happened')
except qemu.qmp.QMPConnectError:
assert 'Is another process using the image' in self.vm_b.get_log()
result = self.vm.qmp('block-job-cancel',
device='mirror')
self.assert_qmp(result, 'return', {})
self.vm.event_wait('BLOCK_JOB_COMPLETED')
if __name__ == '__main__':
# No metadata format driver supported, because they would for
# example always unshare the WRITE permission. The raw driver
# just passes through the permissions from the guest device, and
# those are the permissions that we want to test.
iotests.main(supported_fmts=['raw'],
supported_protocols=['file'])

View File

@ -0,0 +1,5 @@
.
----------------------------------------------------------------------
Ran 1 tests
OK

View File

@ -52,9 +52,12 @@ echo "=== Job still present at shutdown ==="
echo
# Just make sure that this doesn't crash
# (Filter job status and READY events, because their order may differ
# between runs, particularly around when 'quit' is issued)
$QSD --chardev stdio,id=stdio --monitor chardev=stdio \
--blockdev node-name=file0,driver=file,filename="$TEST_IMG" \
--blockdev node-name=fmt0,driver=qcow2,file=file0 <<EOF | _filter_qmp
--blockdev node-name=fmt0,driver=qcow2,file=file0 <<EOF \
| _filter_qmp | grep -v JOB_STATUS_CHANGE | grep -v BLOCK_JOB_READY
{"execute":"qmp_capabilities"}
{"execute": "block-commit", "arguments": {"device": "fmt0", "job-id": "job0"}}
{"execute": "quit"}

View File

@ -6,19 +6,9 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 backing_file=TEST_DIR/
QMP_VERSION
{"return": {}}
{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "job0"}}
{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "job0"}}
{"return": {}}
{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "job0"}}
{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "job0", "len": 0, "offset": 0, "speed": 0, "type": "commit"}}
{"return": {}}
{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "job0"}}
{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "job0"}}
{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "waiting", "id": "job0"}}
{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "pending", "id": "job0"}}
{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "job0", "len": 0, "offset": 0, "speed": 0, "type": "commit"}}
{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "job0"}}
{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "job0"}}
=== Streaming can't get permission on base node ===

View File

@ -0,0 +1,69 @@
#!/usr/bin/env python3
#
# Test removing persistent bitmap from backing
#
# Copyright (c) 2021 Virtuozzo International GmbH.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
import iotests
from iotests import log, qemu_img_create, qemu_img, qemu_img_pipe
iotests.script_initialize(supported_fmts=['qcow2'])
top, base = iotests.file_path('top', 'base')
size = '1M'
assert qemu_img_create('-f', iotests.imgfmt, base, size) == 0
assert qemu_img_create('-f', iotests.imgfmt, '-b', base,
'-F', iotests.imgfmt, top, size) == 0
assert qemu_img('bitmap', '--add', base, 'bitmap0') == 0
# Just assert that our method of checking bitmaps in the image works.
assert 'bitmaps' in qemu_img_pipe('info', base)
vm = iotests.VM().add_drive(top, 'backing.node-name=base')
vm.launch()
log('Trying to remove persistent bitmap from r-o base node, should fail:')
vm.qmp_log('block-dirty-bitmap-remove', node='base', name='bitmap0')
new_base_opts = {
'node-name': 'base',
'driver': 'qcow2',
'file': {
'driver': 'file',
'filename': base
},
'read-only': False
}
# Don't want to bother with filtering qmp_log for reopen command
result = vm.qmp('x-blockdev-reopen', **new_base_opts)
if result != {'return': {}}:
log('Failed to reopen: ' + str(result))
log('Remove persistent bitmap from base node reopened to RW:')
vm.qmp_log('block-dirty-bitmap-remove', node='base', name='bitmap0')
new_base_opts['read-only'] = True
result = vm.qmp('x-blockdev-reopen', **new_base_opts)
if result != {'return': {}}:
log('Failed to reopen: ' + str(result))
vm.shutdown()
if 'bitmaps' in qemu_img_pipe('info', base):
log('ERROR: Bitmap is still in the base image')

View File

@ -0,0 +1,6 @@
Trying to remove persistent bitmap from r-o base node, should fail:
{"execute": "block-dirty-bitmap-remove", "arguments": {"name": "bitmap0", "node": "base"}}
{"error": {"class": "GenericError", "desc": "Bitmap 'bitmap0' is readonly and cannot be modified"}}
Remove persistent bitmap from base node reopened to RW:
{"execute": "block-dirty-bitmap-remove", "arguments": {"name": "bitmap0", "node": "base"}}
{"return": {}}

View File

@ -16,6 +16,7 @@
#include "block/blockjob_int.h"
#include "sysemu/block-backend.h"
#include "qapi/qmp/qdict.h"
#include "iothread.h"
static const BlockJobDriver test_block_job_driver = {
.job_driver = {
@ -375,6 +376,125 @@ static void test_cancel_concluded(void)
cancel_common(s);
}
/* (See test_yielding_driver for the job description) */
typedef struct YieldingJob {
BlockJob common;
bool should_complete;
} YieldingJob;
static void yielding_job_complete(Job *job, Error **errp)
{
YieldingJob *s = container_of(job, YieldingJob, common.job);
s->should_complete = true;
job_enter(job);
}
static int coroutine_fn yielding_job_run(Job *job, Error **errp)
{
YieldingJob *s = container_of(job, YieldingJob, common.job);
job_transition_to_ready(job);
while (!s->should_complete) {
job_yield(job);
}
return 0;
}
/*
* This job transitions immediately to the READY state, and then
* yields until it is to complete.
*/
static const BlockJobDriver test_yielding_driver = {
.job_driver = {
.instance_size = sizeof(YieldingJob),
.free = block_job_free,
.user_resume = block_job_user_resume,
.run = yielding_job_run,
.complete = yielding_job_complete,
},
};
/*
* Test that job_complete() works even on jobs that are in a paused
* state (i.e., STANDBY).
*
* To do this, run YieldingJob in an IO thread, get it into the READY
* state, then have a drained section. Before ending the section,
* acquire the context so the job will not be entered and will thus
* remain on STANDBY.
*
* job_complete() should still work without error.
*
* Note that on the QMP interface, it is impossible to lock an IO
* thread before a drained section ends. In practice, the
* bdrv_drain_all_end() and the aio_context_acquire() will be
* reversed. However, that makes for worse reproducibility here:
* Sometimes, the job would no longer be in STANDBY then but already
* be started. We cannot prevent that, because the IO thread runs
* concurrently. We can only prevent it by taking the lock before
* ending the drained section, so we do that.
*
* (You can reverse the order of operations and most of the time the
* test will pass, but sometimes the assert(status == STANDBY) will
* fail.)
*/
static void test_complete_in_standby(void)
{
BlockBackend *blk;
IOThread *iothread;
AioContext *ctx;
Job *job;
BlockJob *bjob;
/* Create a test drive, move it to an IO thread */
blk = create_blk(NULL);
iothread = iothread_new();
ctx = iothread_get_aio_context(iothread);
blk_set_aio_context(blk, ctx, &error_abort);
/* Create our test job */
bjob = mk_job(blk, "job", &test_yielding_driver, true,
JOB_MANUAL_FINALIZE | JOB_MANUAL_DISMISS);
job = &bjob->job;
assert(job->status == JOB_STATUS_CREATED);
/* Wait for the job to become READY */
job_start(job);
aio_context_acquire(ctx);
AIO_WAIT_WHILE(ctx, job->status != JOB_STATUS_READY);
aio_context_release(ctx);
/* Begin the drained section, pausing the job */
bdrv_drain_all_begin();
assert(job->status == JOB_STATUS_STANDBY);
/* Lock the IO thread to prevent the job from being run */
aio_context_acquire(ctx);
/* This will schedule the job to resume it */
bdrv_drain_all_end();
/* But the job cannot run, so it will remain on standby */
assert(job->status == JOB_STATUS_STANDBY);
/* Even though the job is on standby, this should work */
job_complete(job, &error_abort);
/* The test is done now, clean up. */
job_finish_sync(job, NULL, &error_abort);
assert(job->status == JOB_STATUS_PENDING);
job_finalize(job, &error_abort);
assert(job->status == JOB_STATUS_CONCLUDED);
job_dismiss(&job, &error_abort);
destroy_blk(blk);
aio_context_release(ctx);
iothread_join(iothread);
}
int main(int argc, char **argv)
{
qemu_init_main_loop(&error_abort);
@ -389,5 +509,6 @@ int main(int argc, char **argv)
g_test_add_func("/blockjob/cancel/standby", test_cancel_standby);
g_test_add_func("/blockjob/cancel/pending", test_cancel_pending);
g_test_add_func("/blockjob/cancel/concluded", test_cancel_concluded);
g_test_add_func("/blockjob/complete_in_standby", test_complete_in_standby);
return g_test_run();
}