9a4b6a63ae
As soon as virtio_scsi_data_plane_start() attaches host notifiers the IOThread may start virtqueue processing. There is a race between IOThread virtqueue processing and virtio_scsi_data_plane_start() because it only assigns s->dataplane_started after attaching host notifiers. When a virtqueue handler function in the IOThread calls virtio_scsi_defer_to_dataplane() it may see !s->dataplane_started and attempt to start dataplane even though we're already in the IOThread: #0 0x00007f67b360857c __pthread_kill_implementation (libc.so.6 + 0xa257c) #1 0x00007f67b35bbd56 raise (libc.so.6 + 0x55d56) #2 0x00007f67b358e833 abort (libc.so.6 + 0x28833) #3 0x00007f67b358e75b __assert_fail_base.cold (libc.so.6 + 0x2875b) #4 0x00007f67b35b4cd6 __assert_fail (libc.so.6 + 0x4ecd6) #5 0x000055ca87fd411b memory_region_transaction_commit (qemu-kvm + 0x67511b) #6 0x000055ca87e17811 virtio_pci_ioeventfd_assign (qemu-kvm + 0x4b8811) #7 0x000055ca87e14836 virtio_bus_set_host_notifier (qemu-kvm + 0x4b5836) #8 0x000055ca87f8e14e virtio_scsi_set_host_notifier (qemu-kvm + 0x62f14e) #9 0x000055ca87f8dd62 virtio_scsi_dataplane_start (qemu-kvm + 0x62ed62) #10 0x000055ca87e14610 virtio_bus_start_ioeventfd (qemu-kvm + 0x4b5610) #11 0x000055ca87f8c29a virtio_scsi_handle_ctrl (qemu-kvm + 0x62d29a) #12 0x000055ca87fa5902 virtio_queue_host_notifier_read (qemu-kvm + 0x646902) #13 0x000055ca882c099e aio_dispatch_handler (qemu-kvm + 0x96199e) #14 0x000055ca882c1761 aio_poll (qemu-kvm + 0x962761) #15 0x000055ca880e1052 iothread_run (qemu-kvm + 0x782052) #16 0x000055ca882c562a qemu_thread_start (qemu-kvm + 0x96662a) This patch assigns s->dataplane_started before attaching host notifiers so that virtqueue handler functions that run in the IOThread before virtio_scsi_data_plane_start() returns correctly identify that dataplane does not need to be started. This fix is taken from the virtio-blk dataplane code and it's worth adding a comment in virtio-blk as well to explain why it works. Note that s->dataplane_started does not need the AioContext lock because it is set before attaching host notifiers and cleared after detaching host notifiers. In other words, the IOThread always sees the value true and the main loop thread does not modify it while the IOThread is active. Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2099541 Reported-by: Qing Wang <qinwang@redhat.com> Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> Message-Id: <20220808162134.240405-1-stefanha@redhat.com> Reviewed-by: Emanuele Giuseppe Esposito <eesposit@redhat.com> Reviewed-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
363 lines
10 KiB
C
363 lines
10 KiB
C
/*
|
|
* Dedicated thread for virtio-blk I/O processing
|
|
*
|
|
* Copyright 2012 IBM, Corp.
|
|
* Copyright 2012 Red Hat, Inc. and/or its affiliates
|
|
*
|
|
* Authors:
|
|
* Stefan Hajnoczi <stefanha@redhat.com>
|
|
*
|
|
* This work is licensed under the terms of the GNU GPL, version 2 or later.
|
|
* See the COPYING file in the top-level directory.
|
|
*
|
|
*/
|
|
|
|
#include "qemu/osdep.h"
|
|
#include "qapi/error.h"
|
|
#include "trace.h"
|
|
#include "qemu/iov.h"
|
|
#include "qemu/main-loop.h"
|
|
#include "qemu/thread.h"
|
|
#include "qemu/error-report.h"
|
|
#include "hw/virtio/virtio-access.h"
|
|
#include "hw/virtio/virtio-blk.h"
|
|
#include "virtio-blk.h"
|
|
#include "block/aio.h"
|
|
#include "hw/virtio/virtio-bus.h"
|
|
#include "qom/object_interfaces.h"
|
|
|
|
struct VirtIOBlockDataPlane {
|
|
bool starting;
|
|
bool stopping;
|
|
|
|
VirtIOBlkConf *conf;
|
|
VirtIODevice *vdev;
|
|
QEMUBH *bh; /* bh for guest notification */
|
|
unsigned long *batch_notify_vqs;
|
|
bool batch_notifications;
|
|
|
|
/* Note that these EventNotifiers are assigned by value. This is
|
|
* fine as long as you do not call event_notifier_cleanup on them
|
|
* (because you don't own the file descriptor or handle; you just
|
|
* use it).
|
|
*/
|
|
IOThread *iothread;
|
|
AioContext *ctx;
|
|
};
|
|
|
|
/* Raise an interrupt to signal guest, if necessary */
|
|
void virtio_blk_data_plane_notify(VirtIOBlockDataPlane *s, VirtQueue *vq)
|
|
{
|
|
if (s->batch_notifications) {
|
|
set_bit(virtio_get_queue_index(vq), s->batch_notify_vqs);
|
|
qemu_bh_schedule(s->bh);
|
|
} else {
|
|
virtio_notify_irqfd(s->vdev, vq);
|
|
}
|
|
}
|
|
|
|
static void notify_guest_bh(void *opaque)
|
|
{
|
|
VirtIOBlockDataPlane *s = opaque;
|
|
unsigned nvqs = s->conf->num_queues;
|
|
unsigned long bitmap[BITS_TO_LONGS(nvqs)];
|
|
unsigned j;
|
|
|
|
memcpy(bitmap, s->batch_notify_vqs, sizeof(bitmap));
|
|
memset(s->batch_notify_vqs, 0, sizeof(bitmap));
|
|
|
|
for (j = 0; j < nvqs; j += BITS_PER_LONG) {
|
|
unsigned long bits = bitmap[j / BITS_PER_LONG];
|
|
|
|
while (bits != 0) {
|
|
unsigned i = j + ctzl(bits);
|
|
VirtQueue *vq = virtio_get_queue(s->vdev, i);
|
|
|
|
virtio_notify_irqfd(s->vdev, vq);
|
|
|
|
bits &= bits - 1; /* clear right-most bit */
|
|
}
|
|
}
|
|
}
|
|
|
|
/* Context: QEMU global mutex held */
|
|
bool virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *conf,
|
|
VirtIOBlockDataPlane **dataplane,
|
|
Error **errp)
|
|
{
|
|
VirtIOBlockDataPlane *s;
|
|
BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev)));
|
|
VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
|
|
|
|
*dataplane = NULL;
|
|
|
|
if (conf->iothread) {
|
|
if (!k->set_guest_notifiers || !k->ioeventfd_assign) {
|
|
error_setg(errp,
|
|
"device is incompatible with iothread "
|
|
"(transport does not support notifiers)");
|
|
return false;
|
|
}
|
|
if (!virtio_device_ioeventfd_enabled(vdev)) {
|
|
error_setg(errp, "ioeventfd is required for iothread");
|
|
return false;
|
|
}
|
|
|
|
/* If dataplane is (re-)enabled while the guest is running there could
|
|
* be block jobs that can conflict.
|
|
*/
|
|
if (blk_op_is_blocked(conf->conf.blk, BLOCK_OP_TYPE_DATAPLANE, errp)) {
|
|
error_prepend(errp, "cannot start virtio-blk dataplane: ");
|
|
return false;
|
|
}
|
|
}
|
|
/* Don't try if transport does not support notifiers. */
|
|
if (!virtio_device_ioeventfd_enabled(vdev)) {
|
|
return false;
|
|
}
|
|
|
|
s = g_new0(VirtIOBlockDataPlane, 1);
|
|
s->vdev = vdev;
|
|
s->conf = conf;
|
|
|
|
if (conf->iothread) {
|
|
s->iothread = conf->iothread;
|
|
object_ref(OBJECT(s->iothread));
|
|
s->ctx = iothread_get_aio_context(s->iothread);
|
|
} else {
|
|
s->ctx = qemu_get_aio_context();
|
|
}
|
|
s->bh = aio_bh_new(s->ctx, notify_guest_bh, s);
|
|
s->batch_notify_vqs = bitmap_new(conf->num_queues);
|
|
|
|
*dataplane = s;
|
|
|
|
return true;
|
|
}
|
|
|
|
/* Context: QEMU global mutex held */
|
|
void virtio_blk_data_plane_destroy(VirtIOBlockDataPlane *s)
|
|
{
|
|
VirtIOBlock *vblk;
|
|
|
|
if (!s) {
|
|
return;
|
|
}
|
|
|
|
vblk = VIRTIO_BLK(s->vdev);
|
|
assert(!vblk->dataplane_started);
|
|
g_free(s->batch_notify_vqs);
|
|
qemu_bh_delete(s->bh);
|
|
if (s->iothread) {
|
|
object_unref(OBJECT(s->iothread));
|
|
}
|
|
g_free(s);
|
|
}
|
|
|
|
/* Context: QEMU global mutex held */
|
|
int virtio_blk_data_plane_start(VirtIODevice *vdev)
|
|
{
|
|
VirtIOBlock *vblk = VIRTIO_BLK(vdev);
|
|
VirtIOBlockDataPlane *s = vblk->dataplane;
|
|
BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vblk)));
|
|
VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
|
|
AioContext *old_context;
|
|
unsigned i;
|
|
unsigned nvqs = s->conf->num_queues;
|
|
Error *local_err = NULL;
|
|
int r;
|
|
|
|
if (vblk->dataplane_started || s->starting) {
|
|
return 0;
|
|
}
|
|
|
|
s->starting = true;
|
|
|
|
if (!virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
|
|
s->batch_notifications = true;
|
|
} else {
|
|
s->batch_notifications = false;
|
|
}
|
|
|
|
/* Set up guest notifier (irq) */
|
|
r = k->set_guest_notifiers(qbus->parent, nvqs, true);
|
|
if (r != 0) {
|
|
error_report("virtio-blk failed to set guest notifier (%d), "
|
|
"ensure -accel kvm is set.", r);
|
|
goto fail_guest_notifiers;
|
|
}
|
|
|
|
/*
|
|
* Batch all the host notifiers in a single transaction to avoid
|
|
* quadratic time complexity in address_space_update_ioeventfds().
|
|
*/
|
|
memory_region_transaction_begin();
|
|
|
|
/* Set up virtqueue notify */
|
|
for (i = 0; i < nvqs; i++) {
|
|
r = virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, true);
|
|
if (r != 0) {
|
|
int j = i;
|
|
|
|
fprintf(stderr, "virtio-blk failed to set host notifier (%d)\n", r);
|
|
while (i--) {
|
|
virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, false);
|
|
}
|
|
|
|
/*
|
|
* The transaction expects the ioeventfds to be open when it
|
|
* commits. Do it now, before the cleanup loop.
|
|
*/
|
|
memory_region_transaction_commit();
|
|
|
|
while (j--) {
|
|
virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), j);
|
|
}
|
|
goto fail_host_notifiers;
|
|
}
|
|
}
|
|
|
|
memory_region_transaction_commit();
|
|
|
|
/*
|
|
* These fields are visible to the IOThread so we rely on implicit barriers
|
|
* in aio_context_acquire() on the write side and aio_notify_accept() on
|
|
* the read side.
|
|
*/
|
|
s->starting = false;
|
|
vblk->dataplane_started = true;
|
|
trace_virtio_blk_data_plane_start(s);
|
|
|
|
old_context = blk_get_aio_context(s->conf->conf.blk);
|
|
aio_context_acquire(old_context);
|
|
r = blk_set_aio_context(s->conf->conf.blk, s->ctx, &local_err);
|
|
aio_context_release(old_context);
|
|
if (r < 0) {
|
|
error_report_err(local_err);
|
|
goto fail_aio_context;
|
|
}
|
|
|
|
/* Process queued requests before the ones in vring */
|
|
virtio_blk_process_queued_requests(vblk, false);
|
|
|
|
/* Kick right away to begin processing requests already in vring */
|
|
for (i = 0; i < nvqs; i++) {
|
|
VirtQueue *vq = virtio_get_queue(s->vdev, i);
|
|
|
|
event_notifier_set(virtio_queue_get_host_notifier(vq));
|
|
}
|
|
|
|
/* Get this show started by hooking up our callbacks */
|
|
aio_context_acquire(s->ctx);
|
|
for (i = 0; i < nvqs; i++) {
|
|
VirtQueue *vq = virtio_get_queue(s->vdev, i);
|
|
|
|
virtio_queue_aio_attach_host_notifier(vq, s->ctx);
|
|
}
|
|
aio_context_release(s->ctx);
|
|
return 0;
|
|
|
|
fail_aio_context:
|
|
memory_region_transaction_begin();
|
|
|
|
for (i = 0; i < nvqs; i++) {
|
|
virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, false);
|
|
}
|
|
|
|
memory_region_transaction_commit();
|
|
|
|
for (i = 0; i < nvqs; i++) {
|
|
virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), i);
|
|
}
|
|
fail_host_notifiers:
|
|
k->set_guest_notifiers(qbus->parent, nvqs, false);
|
|
fail_guest_notifiers:
|
|
/*
|
|
* If we failed to set up the guest notifiers queued requests will be
|
|
* processed on the main context.
|
|
*/
|
|
virtio_blk_process_queued_requests(vblk, false);
|
|
vblk->dataplane_disabled = true;
|
|
s->starting = false;
|
|
vblk->dataplane_started = true;
|
|
return -ENOSYS;
|
|
}
|
|
|
|
/* Stop notifications for new requests from guest.
|
|
*
|
|
* Context: BH in IOThread
|
|
*/
|
|
static void virtio_blk_data_plane_stop_bh(void *opaque)
|
|
{
|
|
VirtIOBlockDataPlane *s = opaque;
|
|
unsigned i;
|
|
|
|
for (i = 0; i < s->conf->num_queues; i++) {
|
|
VirtQueue *vq = virtio_get_queue(s->vdev, i);
|
|
|
|
virtio_queue_aio_detach_host_notifier(vq, s->ctx);
|
|
}
|
|
}
|
|
|
|
/* Context: QEMU global mutex held */
|
|
void virtio_blk_data_plane_stop(VirtIODevice *vdev)
|
|
{
|
|
VirtIOBlock *vblk = VIRTIO_BLK(vdev);
|
|
VirtIOBlockDataPlane *s = vblk->dataplane;
|
|
BusState *qbus = qdev_get_parent_bus(DEVICE(vblk));
|
|
VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
|
|
unsigned i;
|
|
unsigned nvqs = s->conf->num_queues;
|
|
|
|
if (!vblk->dataplane_started || s->stopping) {
|
|
return;
|
|
}
|
|
|
|
/* Better luck next time. */
|
|
if (vblk->dataplane_disabled) {
|
|
vblk->dataplane_disabled = false;
|
|
vblk->dataplane_started = false;
|
|
return;
|
|
}
|
|
s->stopping = true;
|
|
trace_virtio_blk_data_plane_stop(s);
|
|
|
|
aio_context_acquire(s->ctx);
|
|
aio_wait_bh_oneshot(s->ctx, virtio_blk_data_plane_stop_bh, s);
|
|
|
|
/* Drain and try to switch bs back to the QEMU main loop. If other users
|
|
* keep the BlockBackend in the iothread, that's ok */
|
|
blk_set_aio_context(s->conf->conf.blk, qemu_get_aio_context(), NULL);
|
|
|
|
aio_context_release(s->ctx);
|
|
|
|
/*
|
|
* Batch all the host notifiers in a single transaction to avoid
|
|
* quadratic time complexity in address_space_update_ioeventfds().
|
|
*/
|
|
memory_region_transaction_begin();
|
|
|
|
for (i = 0; i < nvqs; i++) {
|
|
virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, false);
|
|
}
|
|
|
|
/*
|
|
* The transaction expects the ioeventfds to be open when it
|
|
* commits. Do it now, before the cleanup loop.
|
|
*/
|
|
memory_region_transaction_commit();
|
|
|
|
for (i = 0; i < nvqs; i++) {
|
|
virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), i);
|
|
}
|
|
|
|
qemu_bh_cancel(s->bh);
|
|
notify_guest_bh(s); /* final chance to notify guest */
|
|
|
|
/* Clean up guest notifier (irq) */
|
|
k->set_guest_notifiers(qbus->parent, nvqs, false);
|
|
|
|
vblk->dataplane_started = false;
|
|
s->stopping = false;
|
|
}
|