a937f8e857
virtio_blk_dma_restart_cb() is tricky because the BH must deal with
virtio_blk_data_plane_start()/virtio_blk_data_plane_stop() being called.
There are two issues with the code:
1. virtio_blk_realize() should use qdev_add_vm_change_state_handler()
instead of qemu_add_vm_change_state_handler(). This ensures the
ordering with virtio_init()'s vm change state handler that calls
virtio_blk_data_plane_start()/virtio_blk_data_plane_stop() is
well-defined. Then blk's AioContext is guaranteed to be up-to-date in
virtio_blk_dma_restart_cb() and it's no longer necessary to have a
special case for virtio_blk_data_plane_start().
2. Only blk_drain() waits for virtio_blk_dma_restart_cb()'s
blk_inc_in_flight() to be decremented. The bdrv_drain() family of
functions do not wait for BlockBackend's in_flight counter to reach
zero. virtio_blk_data_plane_stop() relies on blk_set_aio_context()'s
implicit drain, but that's a bdrv_drain() and not a blk_drain().
Note that virtio_blk_reset() already correctly relies on blk_drain().
If virtio_blk_data_plane_stop() switches to blk_drain() then we can
properly wait for pending virtio_blk_dma_restart_bh() calls.
Once these issues are taken care of the code becomes simpler. This
change is in preparation for multiple IOThreads in virtio-blk where we
need to clean up the multi-threading behavior.
I ran the reproducer from commit 49b44549ac
("virtio-blk: On restart,
process queued requests in the proper context") to check that there is
no regression.
Cc: Sergio Lopez <slp@redhat.com>
Cc: Kevin Wolf <kwolf@redhat.com>
Cc: Emanuele Giuseppe Esposito <eesposit@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
Message-id: 20221102182337.252202-1-stefanha@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
360 lines
10 KiB
C
360 lines
10 KiB
C
/*
|
|
* Dedicated thread for virtio-blk I/O processing
|
|
*
|
|
* Copyright 2012 IBM, Corp.
|
|
* Copyright 2012 Red Hat, Inc. and/or its affiliates
|
|
*
|
|
* Authors:
|
|
* Stefan Hajnoczi <stefanha@redhat.com>
|
|
*
|
|
* This work is licensed under the terms of the GNU GPL, version 2 or later.
|
|
* See the COPYING file in the top-level directory.
|
|
*
|
|
*/
|
|
|
|
#include "qemu/osdep.h"
|
|
#include "qapi/error.h"
|
|
#include "trace.h"
|
|
#include "qemu/iov.h"
|
|
#include "qemu/main-loop.h"
|
|
#include "qemu/thread.h"
|
|
#include "qemu/error-report.h"
|
|
#include "hw/virtio/virtio-access.h"
|
|
#include "hw/virtio/virtio-blk.h"
|
|
#include "virtio-blk.h"
|
|
#include "block/aio.h"
|
|
#include "hw/virtio/virtio-bus.h"
|
|
#include "qom/object_interfaces.h"
|
|
|
|
struct VirtIOBlockDataPlane {
|
|
bool starting;
|
|
bool stopping;
|
|
|
|
VirtIOBlkConf *conf;
|
|
VirtIODevice *vdev;
|
|
QEMUBH *bh; /* bh for guest notification */
|
|
unsigned long *batch_notify_vqs;
|
|
bool batch_notifications;
|
|
|
|
/* Note that these EventNotifiers are assigned by value. This is
|
|
* fine as long as you do not call event_notifier_cleanup on them
|
|
* (because you don't own the file descriptor or handle; you just
|
|
* use it).
|
|
*/
|
|
IOThread *iothread;
|
|
AioContext *ctx;
|
|
};
|
|
|
|
/* Raise an interrupt to signal guest, if necessary */
|
|
void virtio_blk_data_plane_notify(VirtIOBlockDataPlane *s, VirtQueue *vq)
|
|
{
|
|
if (s->batch_notifications) {
|
|
set_bit(virtio_get_queue_index(vq), s->batch_notify_vqs);
|
|
qemu_bh_schedule(s->bh);
|
|
} else {
|
|
virtio_notify_irqfd(s->vdev, vq);
|
|
}
|
|
}
|
|
|
|
static void notify_guest_bh(void *opaque)
|
|
{
|
|
VirtIOBlockDataPlane *s = opaque;
|
|
unsigned nvqs = s->conf->num_queues;
|
|
unsigned long bitmap[BITS_TO_LONGS(nvqs)];
|
|
unsigned j;
|
|
|
|
memcpy(bitmap, s->batch_notify_vqs, sizeof(bitmap));
|
|
memset(s->batch_notify_vqs, 0, sizeof(bitmap));
|
|
|
|
for (j = 0; j < nvqs; j += BITS_PER_LONG) {
|
|
unsigned long bits = bitmap[j / BITS_PER_LONG];
|
|
|
|
while (bits != 0) {
|
|
unsigned i = j + ctzl(bits);
|
|
VirtQueue *vq = virtio_get_queue(s->vdev, i);
|
|
|
|
virtio_notify_irqfd(s->vdev, vq);
|
|
|
|
bits &= bits - 1; /* clear right-most bit */
|
|
}
|
|
}
|
|
}
|
|
|
|
/* Context: QEMU global mutex held */
|
|
bool virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *conf,
|
|
VirtIOBlockDataPlane **dataplane,
|
|
Error **errp)
|
|
{
|
|
VirtIOBlockDataPlane *s;
|
|
BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev)));
|
|
VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
|
|
|
|
*dataplane = NULL;
|
|
|
|
if (conf->iothread) {
|
|
if (!k->set_guest_notifiers || !k->ioeventfd_assign) {
|
|
error_setg(errp,
|
|
"device is incompatible with iothread "
|
|
"(transport does not support notifiers)");
|
|
return false;
|
|
}
|
|
if (!virtio_device_ioeventfd_enabled(vdev)) {
|
|
error_setg(errp, "ioeventfd is required for iothread");
|
|
return false;
|
|
}
|
|
|
|
/* If dataplane is (re-)enabled while the guest is running there could
|
|
* be block jobs that can conflict.
|
|
*/
|
|
if (blk_op_is_blocked(conf->conf.blk, BLOCK_OP_TYPE_DATAPLANE, errp)) {
|
|
error_prepend(errp, "cannot start virtio-blk dataplane: ");
|
|
return false;
|
|
}
|
|
}
|
|
/* Don't try if transport does not support notifiers. */
|
|
if (!virtio_device_ioeventfd_enabled(vdev)) {
|
|
return false;
|
|
}
|
|
|
|
s = g_new0(VirtIOBlockDataPlane, 1);
|
|
s->vdev = vdev;
|
|
s->conf = conf;
|
|
|
|
if (conf->iothread) {
|
|
s->iothread = conf->iothread;
|
|
object_ref(OBJECT(s->iothread));
|
|
s->ctx = iothread_get_aio_context(s->iothread);
|
|
} else {
|
|
s->ctx = qemu_get_aio_context();
|
|
}
|
|
s->bh = aio_bh_new(s->ctx, notify_guest_bh, s);
|
|
s->batch_notify_vqs = bitmap_new(conf->num_queues);
|
|
|
|
*dataplane = s;
|
|
|
|
return true;
|
|
}
|
|
|
|
/* Context: QEMU global mutex held */
|
|
void virtio_blk_data_plane_destroy(VirtIOBlockDataPlane *s)
|
|
{
|
|
VirtIOBlock *vblk;
|
|
|
|
if (!s) {
|
|
return;
|
|
}
|
|
|
|
vblk = VIRTIO_BLK(s->vdev);
|
|
assert(!vblk->dataplane_started);
|
|
g_free(s->batch_notify_vqs);
|
|
qemu_bh_delete(s->bh);
|
|
if (s->iothread) {
|
|
object_unref(OBJECT(s->iothread));
|
|
}
|
|
g_free(s);
|
|
}
|
|
|
|
/* Context: QEMU global mutex held */
|
|
int virtio_blk_data_plane_start(VirtIODevice *vdev)
|
|
{
|
|
VirtIOBlock *vblk = VIRTIO_BLK(vdev);
|
|
VirtIOBlockDataPlane *s = vblk->dataplane;
|
|
BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vblk)));
|
|
VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
|
|
AioContext *old_context;
|
|
unsigned i;
|
|
unsigned nvqs = s->conf->num_queues;
|
|
Error *local_err = NULL;
|
|
int r;
|
|
|
|
if (vblk->dataplane_started || s->starting) {
|
|
return 0;
|
|
}
|
|
|
|
s->starting = true;
|
|
|
|
if (!virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
|
|
s->batch_notifications = true;
|
|
} else {
|
|
s->batch_notifications = false;
|
|
}
|
|
|
|
/* Set up guest notifier (irq) */
|
|
r = k->set_guest_notifiers(qbus->parent, nvqs, true);
|
|
if (r != 0) {
|
|
error_report("virtio-blk failed to set guest notifier (%d), "
|
|
"ensure -accel kvm is set.", r);
|
|
goto fail_guest_notifiers;
|
|
}
|
|
|
|
/*
|
|
* Batch all the host notifiers in a single transaction to avoid
|
|
* quadratic time complexity in address_space_update_ioeventfds().
|
|
*/
|
|
memory_region_transaction_begin();
|
|
|
|
/* Set up virtqueue notify */
|
|
for (i = 0; i < nvqs; i++) {
|
|
r = virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, true);
|
|
if (r != 0) {
|
|
int j = i;
|
|
|
|
fprintf(stderr, "virtio-blk failed to set host notifier (%d)\n", r);
|
|
while (i--) {
|
|
virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, false);
|
|
}
|
|
|
|
/*
|
|
* The transaction expects the ioeventfds to be open when it
|
|
* commits. Do it now, before the cleanup loop.
|
|
*/
|
|
memory_region_transaction_commit();
|
|
|
|
while (j--) {
|
|
virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), j);
|
|
}
|
|
goto fail_host_notifiers;
|
|
}
|
|
}
|
|
|
|
memory_region_transaction_commit();
|
|
|
|
/*
|
|
* These fields are visible to the IOThread so we rely on implicit barriers
|
|
* in aio_context_acquire() on the write side and aio_notify_accept() on
|
|
* the read side.
|
|
*/
|
|
s->starting = false;
|
|
vblk->dataplane_started = true;
|
|
trace_virtio_blk_data_plane_start(s);
|
|
|
|
old_context = blk_get_aio_context(s->conf->conf.blk);
|
|
aio_context_acquire(old_context);
|
|
r = blk_set_aio_context(s->conf->conf.blk, s->ctx, &local_err);
|
|
aio_context_release(old_context);
|
|
if (r < 0) {
|
|
error_report_err(local_err);
|
|
goto fail_aio_context;
|
|
}
|
|
|
|
/* Kick right away to begin processing requests already in vring */
|
|
for (i = 0; i < nvqs; i++) {
|
|
VirtQueue *vq = virtio_get_queue(s->vdev, i);
|
|
|
|
event_notifier_set(virtio_queue_get_host_notifier(vq));
|
|
}
|
|
|
|
/* Get this show started by hooking up our callbacks */
|
|
aio_context_acquire(s->ctx);
|
|
for (i = 0; i < nvqs; i++) {
|
|
VirtQueue *vq = virtio_get_queue(s->vdev, i);
|
|
|
|
virtio_queue_aio_attach_host_notifier(vq, s->ctx);
|
|
}
|
|
aio_context_release(s->ctx);
|
|
return 0;
|
|
|
|
fail_aio_context:
|
|
memory_region_transaction_begin();
|
|
|
|
for (i = 0; i < nvqs; i++) {
|
|
virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, false);
|
|
}
|
|
|
|
memory_region_transaction_commit();
|
|
|
|
for (i = 0; i < nvqs; i++) {
|
|
virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), i);
|
|
}
|
|
fail_host_notifiers:
|
|
k->set_guest_notifiers(qbus->parent, nvqs, false);
|
|
fail_guest_notifiers:
|
|
vblk->dataplane_disabled = true;
|
|
s->starting = false;
|
|
vblk->dataplane_started = true;
|
|
return -ENOSYS;
|
|
}
|
|
|
|
/* Stop notifications for new requests from guest.
|
|
*
|
|
* Context: BH in IOThread
|
|
*/
|
|
static void virtio_blk_data_plane_stop_bh(void *opaque)
|
|
{
|
|
VirtIOBlockDataPlane *s = opaque;
|
|
unsigned i;
|
|
|
|
for (i = 0; i < s->conf->num_queues; i++) {
|
|
VirtQueue *vq = virtio_get_queue(s->vdev, i);
|
|
|
|
virtio_queue_aio_detach_host_notifier(vq, s->ctx);
|
|
}
|
|
}
|
|
|
|
/* Context: QEMU global mutex held */
|
|
void virtio_blk_data_plane_stop(VirtIODevice *vdev)
|
|
{
|
|
VirtIOBlock *vblk = VIRTIO_BLK(vdev);
|
|
VirtIOBlockDataPlane *s = vblk->dataplane;
|
|
BusState *qbus = qdev_get_parent_bus(DEVICE(vblk));
|
|
VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
|
|
unsigned i;
|
|
unsigned nvqs = s->conf->num_queues;
|
|
|
|
if (!vblk->dataplane_started || s->stopping) {
|
|
return;
|
|
}
|
|
|
|
/* Better luck next time. */
|
|
if (vblk->dataplane_disabled) {
|
|
vblk->dataplane_disabled = false;
|
|
vblk->dataplane_started = false;
|
|
return;
|
|
}
|
|
s->stopping = true;
|
|
trace_virtio_blk_data_plane_stop(s);
|
|
|
|
aio_context_acquire(s->ctx);
|
|
aio_wait_bh_oneshot(s->ctx, virtio_blk_data_plane_stop_bh, s);
|
|
|
|
/* Wait for virtio_blk_dma_restart_bh() and in flight I/O to complete */
|
|
blk_drain(s->conf->conf.blk);
|
|
|
|
/*
|
|
* Try to switch bs back to the QEMU main loop. If other users keep the
|
|
* BlockBackend in the iothread, that's ok
|
|
*/
|
|
blk_set_aio_context(s->conf->conf.blk, qemu_get_aio_context(), NULL);
|
|
|
|
aio_context_release(s->ctx);
|
|
|
|
/*
|
|
* Batch all the host notifiers in a single transaction to avoid
|
|
* quadratic time complexity in address_space_update_ioeventfds().
|
|
*/
|
|
memory_region_transaction_begin();
|
|
|
|
for (i = 0; i < nvqs; i++) {
|
|
virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, false);
|
|
}
|
|
|
|
/*
|
|
* The transaction expects the ioeventfds to be open when it
|
|
* commits. Do it now, before the cleanup loop.
|
|
*/
|
|
memory_region_transaction_commit();
|
|
|
|
for (i = 0; i < nvqs; i++) {
|
|
virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), i);
|
|
}
|
|
|
|
qemu_bh_cancel(s->bh);
|
|
notify_guest_bh(s); /* final chance to notify guest */
|
|
|
|
/* Clean up guest notifier (irq) */
|
|
k->set_guest_notifiers(qbus->parent, nvqs, false);
|
|
|
|
vblk->dataplane_started = false;
|
|
s->stopping = false;
|
|
}
|