2008-12-04 22:38:57 +03:00
|
|
|
/*
|
|
|
|
* Virtio Support
|
|
|
|
*
|
|
|
|
* Copyright IBM, Corp. 2007
|
|
|
|
*
|
|
|
|
* Authors:
|
|
|
|
* Anthony Liguori <aliguori@us.ibm.com>
|
|
|
|
*
|
|
|
|
* This work is licensed under the terms of the GNU GPL, version 2. See
|
|
|
|
* the COPYING file in the top-level directory.
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
2016-06-29 14:47:03 +03:00
|
|
|
#ifndef QEMU_VIRTIO_H
|
|
|
|
#define QEMU_VIRTIO_H
|
2008-12-04 22:38:57 +03:00
|
|
|
|
2019-08-12 08:23:46 +03:00
|
|
|
#include "exec/memory.h"
|
2019-08-12 08:23:51 +03:00
|
|
|
#include "hw/qdev-core.h"
|
2012-10-24 10:43:34 +04:00
|
|
|
#include "net/net.h"
|
2019-08-12 08:23:45 +03:00
|
|
|
#include "migration/vmstate.h"
|
2012-12-17 21:20:00 +04:00
|
|
|
#include "qemu/event_notifier.h"
|
2015-02-17 00:35:46 +03:00
|
|
|
#include "standard-headers/linux/virtio_config.h"
|
|
|
|
#include "standard-headers/linux/virtio_ring.h"
|
2020-09-03 23:43:22 +03:00
|
|
|
#include "qom/object.h"
|
2024-04-04 21:56:11 +03:00
|
|
|
#include "block/aio.h"
|
2008-12-04 22:38:57 +03:00
|
|
|
|
2022-08-02 12:49:53 +03:00
|
|
|
/*
|
|
|
|
* A guest should never accept this. It implies negotiation is broken
|
|
|
|
* between the driver frontend and the device. This bit is re-used for
|
|
|
|
* vhost-user to advertise VHOST_USER_F_PROTOCOL_FEATURES between QEMU
|
|
|
|
* and a vhost-user backend.
|
|
|
|
*/
|
2009-04-05 21:40:08 +04:00
|
|
|
#define VIRTIO_F_BAD_FEATURE 30
|
2008-12-04 22:38:57 +03:00
|
|
|
|
2015-07-22 13:09:25 +03:00
|
|
|
#define VIRTIO_LEGACY_FEATURES ((0x1ULL << VIRTIO_F_BAD_FEATURE) | \
|
|
|
|
(0x1ULL << VIRTIO_F_NOTIFY_ON_EMPTY) | \
|
|
|
|
(0x1ULL << VIRTIO_F_ANY_LAYOUT))
|
|
|
|
|
2008-12-04 22:38:57 +03:00
|
|
|
struct VirtQueue;
|
|
|
|
|
2012-10-23 14:30:10 +04:00
|
|
|
static inline hwaddr vring_align(hwaddr addr,
|
2008-12-04 22:58:45 +03:00
|
|
|
unsigned long align)
|
|
|
|
{
|
2017-03-25 02:19:43 +03:00
|
|
|
return QEMU_ALIGN_UP(addr, align);
|
2008-12-04 22:58:45 +03:00
|
|
|
}
|
|
|
|
|
2019-02-21 13:33:08 +03:00
|
|
|
typedef struct VirtIOFeature {
|
|
|
|
uint64_t flags;
|
|
|
|
size_t end;
|
|
|
|
} VirtIOFeature;
|
|
|
|
|
virtio: introduce VirtIOConfigSizeParams & virtio_get_config_size
This is the first step towards moving all device config size calculation
logic into the virtio core code. In particular, this adds a struct that
contains all the necessary information for common virtio code to be able
to calculate the final config size for a device. This is expected to be
used with the new virtio_get_config_size helper, which calculates the
final length based on the provided host features.
This builds on top of already existing code like VirtIOFeature and
virtio_feature_get_config_size(), but adds additional fields, as well as
sanity checking so that device-specifc code doesn't have to duplicate it.
An example usage would be:
static const VirtIOFeature dev_features[] = {
{.flags = 1ULL << FEATURE_1_BIT,
.end = endof(struct virtio_dev_config, feature_1)},
{.flags = 1ULL << FEATURE_2_BIT,
.end = endof(struct virtio_dev_config, feature_2)},
{}
};
static const VirtIOConfigSizeParams dev_cfg_size_params = {
.min_size = DEV_BASE_CONFIG_SIZE,
.max_size = sizeof(struct virtio_dev_config),
.feature_sizes = dev_features
};
// code inside my_dev_device_realize()
size_t config_size = virtio_get_config_size(&dev_cfg_size_params,
host_features);
virtio_init(vdev, VIRTIO_ID_MYDEV, config_size);
Currently every device is expected to write its own boilerplate from the
example above in device_realize(), however, the next step of this
transition is moving VirtIOConfigSizeParams into VirtioDeviceClass,
so that it can be done automatically by the virtio initialization code.
All of the users of virtio_feature_get_config_size have been converted
to use virtio_get_config_size so it's no longer needed and is removed
with this commit.
Signed-off-by: Daniil Tatianin <d-tatianin@yandex-team.ru>
Message-Id: <20220906073111.353245-2-d-tatianin@yandex-team.ru>
Reviewed-by: Raphael Norwitz <raphael.norwitz@nutanix.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2022-09-06 10:31:07 +03:00
|
|
|
typedef struct VirtIOConfigSizeParams {
|
|
|
|
size_t min_size;
|
|
|
|
size_t max_size;
|
|
|
|
const VirtIOFeature *feature_sizes;
|
|
|
|
} VirtIOConfigSizeParams;
|
|
|
|
|
|
|
|
size_t virtio_get_config_size(const VirtIOConfigSizeParams *params,
|
|
|
|
uint64_t host_features);
|
2019-02-21 13:33:08 +03:00
|
|
|
|
2008-12-04 22:38:57 +03:00
|
|
|
typedef struct VirtQueue VirtQueue;
|
|
|
|
|
|
|
|
#define VIRTQUEUE_MAX_SIZE 1024
|
|
|
|
|
|
|
|
typedef struct VirtQueueElement
|
|
|
|
{
|
|
|
|
unsigned int index;
|
2019-10-25 11:35:24 +03:00
|
|
|
unsigned int len;
|
|
|
|
unsigned int ndescs;
|
2008-12-04 22:38:57 +03:00
|
|
|
unsigned int out_num;
|
|
|
|
unsigned int in_num;
|
2016-01-31 13:29:00 +03:00
|
|
|
hwaddr *in_addr;
|
|
|
|
hwaddr *out_addr;
|
|
|
|
struct iovec *in_sg;
|
|
|
|
struct iovec *out_sg;
|
2008-12-04 22:38:57 +03:00
|
|
|
} VirtQueueElement;
|
|
|
|
|
2015-05-29 09:15:32 +03:00
|
|
|
#define VIRTIO_QUEUE_MAX 1024
|
2008-12-04 22:38:57 +03:00
|
|
|
|
2009-06-21 20:50:13 +04:00
|
|
|
#define VIRTIO_NO_VECTOR 0xffff
|
|
|
|
|
2022-12-22 10:04:42 +03:00
|
|
|
/* special index value used internally for config irqs */
|
|
|
|
#define VIRTIO_CONFIG_IRQ_IDX -1
|
|
|
|
|
2013-01-15 03:08:02 +04:00
|
|
|
#define TYPE_VIRTIO_DEVICE "virtio-device"
|
2020-09-16 21:25:18 +03:00
|
|
|
OBJECT_DECLARE_TYPE(VirtIODevice, VirtioDeviceClass, VIRTIO_DEVICE)
|
2013-01-15 03:08:02 +04:00
|
|
|
|
qmp: decode feature & status bits in virtio-status
Display feature names instead of bitmaps for host, guest, and
backend for VirtIODevices.
Display status names instead of bitmaps for VirtIODevices.
Display feature names instead of bitmaps for backend, protocol,
acked, and features (hdev->features) for vhost devices.
Decode features according to device ID. Decode statuses
according to configuration status bitmap (config_status_map).
Decode vhost user protocol features according to vhost user
protocol bitmap (vhost_user_protocol_map).
Transport features are on the first line. Undecoded bits (if
any) are stored in a separate field.
[Jonah: Several changes made to this patch from prev. version (v14):
- Moved all device features mappings to hw/virtio/virtio.c
- Renamed device features mappings (less generic)
- Generalized @FEATURE_ENTRY macro for all device mappings
- Virtio device feature map definitions include descriptions of
feature bits
- Moved @VHOST_USER_F_PROTOCOL_FEATURES feature bit from transport
feature map to vhost-user-supported device feature mappings
(blk, fs, i2c, rng, net, gpu, input, scsi, vsock)
- New feature bit added for virtio-vsock: @VIRTIO_VSOCK_F_SEQPACKET
- New feature bit added for virtio-iommu: @VIRTIO_IOMMU_F_BYPASS_CONFIG
- New feature bit added for virtio-mem: @VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE
- New virtio transport feature bit added: @VIRTIO_F_IN_ORDER
- Added device feature map definition for virtio-rng
]
Signed-off-by: Laurent Vivier <lvivier@redhat.com>
Signed-off-by: Jonah Palmer <jonah.palmer@oracle.com>
Message-Id: <1660220684-24909-4-git-send-email-jonah.palmer@oracle.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2022-08-11 15:24:41 +03:00
|
|
|
typedef struct {
|
|
|
|
int virtio_bit;
|
|
|
|
const char *feature_desc;
|
|
|
|
} qmp_virtio_feature_map_t;
|
|
|
|
|
2014-06-24 21:38:54 +04:00
|
|
|
enum virtio_device_endian {
|
|
|
|
VIRTIO_DEVICE_ENDIAN_UNKNOWN,
|
|
|
|
VIRTIO_DEVICE_ENDIAN_LITTLE,
|
|
|
|
VIRTIO_DEVICE_ENDIAN_BIG,
|
|
|
|
};
|
|
|
|
|
2022-11-23 18:21:33 +03:00
|
|
|
/**
|
|
|
|
* struct VirtIODevice - common VirtIO structure
|
|
|
|
* @name: name of the device
|
|
|
|
* @status: VirtIO Device Status field
|
|
|
|
*
|
|
|
|
*/
|
2008-12-04 22:38:57 +03:00
|
|
|
struct VirtIODevice
|
|
|
|
{
|
2013-01-15 03:08:02 +04:00
|
|
|
DeviceState parent_obj;
|
2008-12-04 22:38:57 +03:00
|
|
|
const char *name;
|
|
|
|
uint8_t status;
|
|
|
|
uint8_t isr;
|
|
|
|
uint16_t queue_sel;
|
2022-11-23 18:21:33 +03:00
|
|
|
/**
|
|
|
|
* These fields represent a set of VirtIO features at various
|
|
|
|
* levels of the stack. @host_features indicates the complete
|
|
|
|
* feature set the VirtIO device can offer to the driver.
|
|
|
|
* @guest_features indicates which features the VirtIO driver has
|
|
|
|
* selected by writing to the feature register. Finally
|
|
|
|
* @backend_features represents everything supported by the
|
|
|
|
* backend (e.g. vhost) and could potentially be a subset of the
|
|
|
|
* total feature set offered by QEMU.
|
|
|
|
*/
|
2015-06-01 11:45:40 +03:00
|
|
|
uint64_t host_features;
|
2022-11-23 18:21:33 +03:00
|
|
|
uint64_t guest_features;
|
2017-05-23 15:31:19 +03:00
|
|
|
uint64_t backend_features;
|
2022-11-23 18:21:33 +03:00
|
|
|
|
2008-12-04 22:38:57 +03:00
|
|
|
size_t config_len;
|
|
|
|
void *config;
|
2009-06-21 20:50:13 +04:00
|
|
|
uint16_t config_vector;
|
2015-06-04 13:34:23 +03:00
|
|
|
uint32_t generation;
|
2009-06-21 20:50:13 +04:00
|
|
|
int nvectors;
|
2008-12-04 22:38:57 +03:00
|
|
|
VirtQueue *vq;
|
2017-01-27 18:40:17 +03:00
|
|
|
MemoryListener listener;
|
2009-05-18 17:51:59 +04:00
|
|
|
uint16_t device_id;
|
2022-08-02 12:49:58 +03:00
|
|
|
/* @vm_running: current VM running state via virtio_vmstate_change() */
|
2011-01-10 15:28:40 +03:00
|
|
|
bool vm_running;
|
2016-09-21 18:52:19 +03:00
|
|
|
bool broken; /* device in invalid state, needs reset */
|
virtio-pci: disable vring processing when bus-mastering is disabled
Currently the SLOF firmware for pseries guests will disable/re-enable
a PCI device multiple times via IO/MEM/MASTER bits of PCI_COMMAND
register after the initial probe/feature negotiation, as it tends to
work with a single device at a time at various stages like probing
and running block/network bootloaders without doing a full reset
in-between.
In QEMU, when PCI_COMMAND_MASTER is disabled we disable the
corresponding IOMMU memory region, so DMA accesses (including to vring
fields like idx/flags) will no longer undergo the necessary
translation. Normally we wouldn't expect this to happen since it would
be misbehavior on the driver side to continue driving DMA requests.
However, in the case of pseries, with iommu_platform=on, we trigger the
following sequence when tearing down the virtio-blk dataplane ioeventfd
in response to the guest unsetting PCI_COMMAND_MASTER:
#2 0x0000555555922651 in virtqueue_map_desc (vdev=vdev@entry=0x555556dbcfb0, p_num_sg=p_num_sg@entry=0x7fffe657e1a8, addr=addr@entry=0x7fffe657e240, iov=iov@entry=0x7fffe6580240, max_num_sg=max_num_sg@entry=1024, is_write=is_write@entry=false, pa=0, sz=0)
at /home/mdroth/w/qemu.git/hw/virtio/virtio.c:757
#3 0x0000555555922a89 in virtqueue_pop (vq=vq@entry=0x555556dc8660, sz=sz@entry=184)
at /home/mdroth/w/qemu.git/hw/virtio/virtio.c:950
#4 0x00005555558d3eca in virtio_blk_get_request (vq=0x555556dc8660, s=0x555556dbcfb0)
at /home/mdroth/w/qemu.git/hw/block/virtio-blk.c:255
#5 0x00005555558d3eca in virtio_blk_handle_vq (s=0x555556dbcfb0, vq=0x555556dc8660)
at /home/mdroth/w/qemu.git/hw/block/virtio-blk.c:776
#6 0x000055555591dd66 in virtio_queue_notify_aio_vq (vq=vq@entry=0x555556dc8660)
at /home/mdroth/w/qemu.git/hw/virtio/virtio.c:1550
#7 0x000055555591ecef in virtio_queue_notify_aio_vq (vq=0x555556dc8660)
at /home/mdroth/w/qemu.git/hw/virtio/virtio.c:1546
#8 0x000055555591ecef in virtio_queue_host_notifier_aio_poll (opaque=0x555556dc86c8)
at /home/mdroth/w/qemu.git/hw/virtio/virtio.c:2527
#9 0x0000555555d02164 in run_poll_handlers_once (ctx=ctx@entry=0x55555688bfc0, timeout=timeout@entry=0x7fffe65844a8)
at /home/mdroth/w/qemu.git/util/aio-posix.c:520
#10 0x0000555555d02d1b in try_poll_mode (timeout=0x7fffe65844a8, ctx=0x55555688bfc0)
at /home/mdroth/w/qemu.git/util/aio-posix.c:607
#11 0x0000555555d02d1b in aio_poll (ctx=ctx@entry=0x55555688bfc0, blocking=blocking@entry=true)
at /home/mdroth/w/qemu.git/util/aio-posix.c:639
#12 0x0000555555d0004d in aio_wait_bh_oneshot (ctx=0x55555688bfc0, cb=cb@entry=0x5555558d5130 <virtio_blk_data_plane_stop_bh>, opaque=opaque@entry=0x555556de86f0)
at /home/mdroth/w/qemu.git/util/aio-wait.c:71
#13 0x00005555558d59bf in virtio_blk_data_plane_stop (vdev=<optimized out>)
at /home/mdroth/w/qemu.git/hw/block/dataplane/virtio-blk.c:288
#14 0x0000555555b906a1 in virtio_bus_stop_ioeventfd (bus=bus@entry=0x555556dbcf38)
at /home/mdroth/w/qemu.git/hw/virtio/virtio-bus.c:245
#15 0x0000555555b90dbb in virtio_bus_stop_ioeventfd (bus=bus@entry=0x555556dbcf38)
at /home/mdroth/w/qemu.git/hw/virtio/virtio-bus.c:237
#16 0x0000555555b92a8e in virtio_pci_stop_ioeventfd (proxy=0x555556db4e40)
at /home/mdroth/w/qemu.git/hw/virtio/virtio-pci.c:292
#17 0x0000555555b92a8e in virtio_write_config (pci_dev=0x555556db4e40, address=<optimized out>, val=1048832, len=<optimized out>)
at /home/mdroth/w/qemu.git/hw/virtio/virtio-pci.c:613
I.e. the calling code is only scheduling a one-shot BH for
virtio_blk_data_plane_stop_bh, but somehow we end up trying to process
an additional virtqueue entry before we get there. This is likely due
to the following check in virtio_queue_host_notifier_aio_poll:
static bool virtio_queue_host_notifier_aio_poll(void *opaque)
{
EventNotifier *n = opaque;
VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
bool progress;
if (!vq->vring.desc || virtio_queue_empty(vq)) {
return false;
}
progress = virtio_queue_notify_aio_vq(vq);
namely the call to virtio_queue_empty(). In this case, since no new
requests have actually been issued, shadow_avail_idx == last_avail_idx,
so we actually try to access the vring via vring_avail_idx() to get
the latest non-shadowed idx:
int virtio_queue_empty(VirtQueue *vq)
{
bool empty;
...
if (vq->shadow_avail_idx != vq->last_avail_idx) {
return 0;
}
rcu_read_lock();
empty = vring_avail_idx(vq) == vq->last_avail_idx;
rcu_read_unlock();
return empty;
but since the IOMMU region has been disabled we get a bogus value (0
usually), which causes virtio_queue_empty() to falsely report that
there are entries to be processed, which causes errors such as:
"virtio: zero sized buffers are not allowed"
or
"virtio-blk missing headers"
and puts the device in an error state.
This patch works around the issue by introducing virtio_set_disabled(),
which sets a 'disabled' flag to bypass checks like virtio_queue_empty()
when bus-mastering is disabled. Since we'd check this flag at all the
same sites as vdev->broken, we replace those checks with an inline
function which checks for either vdev->broken or vdev->disabled.
The 'disabled' flag is only migrated when set, which should be fairly
rare, but to maintain migration compatibility we disable it's use for
older machine types. Users requiring the use of the flag in conjunction
with older machine types can set it explicitly as a virtio-device
option.
NOTES:
- This leaves some other oddities in play, like the fact that
DRIVER_OK also gets unset in response to bus-mastering being
disabled, but not restored (however the device seems to continue
working)
- Similarly, we disable the host notifier via
virtio_bus_stop_ioeventfd(), which seems to move the handling out
of virtio-blk dataplane and back into the main IO thread, and it
ends up staying there till a reset (but otherwise continues working
normally)
Cc: David Gibson <david@gibson.dropbear.id.au>,
Cc: Alexey Kardashevskiy <aik@ozlabs.ru>
Cc: "Michael S. Tsirkin" <mst@redhat.com>
Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
Message-Id: <20191120005003.27035-1-mdroth@linux.vnet.ibm.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2019-11-20 03:50:03 +03:00
|
|
|
bool use_disabled_flag; /* allow use of 'disable' flag when needed */
|
|
|
|
bool disabled; /* device in temporarily disabled state */
|
2022-11-30 14:24:39 +03:00
|
|
|
/**
|
|
|
|
* @use_started: true if the @started flag should be used to check the
|
|
|
|
* current state of the VirtIO device. Otherwise status bits
|
|
|
|
* should be checked for a current status of the device.
|
|
|
|
* @use_started is only set via QMP and defaults to true for all
|
|
|
|
* modern machines (since 4.1).
|
|
|
|
*/
|
2019-06-26 05:31:26 +03:00
|
|
|
bool use_started;
|
2019-03-20 14:26:40 +03:00
|
|
|
bool started;
|
2019-06-26 05:31:27 +03:00
|
|
|
bool start_on_kick; /* when virtio 1.0 feature has not been negotiated */
|
2020-09-21 15:25:03 +03:00
|
|
|
bool disable_legacy_check;
|
2022-04-01 16:23:19 +03:00
|
|
|
bool vhost_started;
|
2011-01-10 15:28:40 +03:00
|
|
|
VMChangeStateEntry *vmstate;
|
2013-04-30 18:08:48 +04:00
|
|
|
char *bus_name;
|
2014-06-24 21:38:54 +04:00
|
|
|
uint8_t device_endian;
|
2023-07-10 18:35:08 +03:00
|
|
|
/**
|
|
|
|
* @user_guest_notifier_mask: gate usage of ->guest_notifier_mask() callback.
|
|
|
|
* This is used to suppress the masking of guest updates for
|
|
|
|
* vhost-user devices which are asynchronous by design.
|
|
|
|
*/
|
2016-02-18 17:12:23 +03:00
|
|
|
bool use_guest_notifier_mask;
|
2016-12-30 13:09:10 +03:00
|
|
|
AddressSpace *dma_as;
|
2015-04-27 22:01:20 +03:00
|
|
|
QLIST_HEAD(, VirtQueue) *vector_queues;
|
2022-08-11 15:24:39 +03:00
|
|
|
QTAILQ_ENTRY(VirtIODevice) next;
|
2023-07-10 18:35:08 +03:00
|
|
|
/**
|
|
|
|
* @config_notifier: the event notifier that handles config events
|
|
|
|
*/
|
2022-12-22 10:04:47 +03:00
|
|
|
EventNotifier config_notifier;
|
2023-05-12 16:51:20 +03:00
|
|
|
bool device_iotlb_enabled;
|
2008-12-04 22:38:57 +03:00
|
|
|
};
|
|
|
|
|
2020-09-03 23:43:22 +03:00
|
|
|
struct VirtioDeviceClass {
|
2013-07-30 06:05:02 +04:00
|
|
|
/*< private >*/
|
2013-01-15 03:08:02 +04:00
|
|
|
DeviceClass parent;
|
2013-07-30 06:05:02 +04:00
|
|
|
/*< public >*/
|
2013-07-30 02:50:27 +04:00
|
|
|
|
|
|
|
/* This is what a VirtioDevice must implement */
|
|
|
|
DeviceRealize realize;
|
2013-07-30 05:50:44 +04:00
|
|
|
DeviceUnrealize unrealize;
|
2015-07-27 12:49:19 +03:00
|
|
|
uint64_t (*get_features)(VirtIODevice *vdev,
|
|
|
|
uint64_t requested_features,
|
|
|
|
Error **errp);
|
2015-06-01 11:45:40 +03:00
|
|
|
uint64_t (*bad_features)(VirtIODevice *vdev);
|
2015-06-03 15:47:19 +03:00
|
|
|
void (*set_features)(VirtIODevice *vdev, uint64_t val);
|
2015-06-04 13:34:15 +03:00
|
|
|
int (*validate_features)(VirtIODevice *vdev);
|
2013-01-15 03:08:02 +04:00
|
|
|
void (*get_config)(VirtIODevice *vdev, uint8_t *config);
|
|
|
|
void (*set_config)(VirtIODevice *vdev, const uint8_t *config);
|
|
|
|
void (*reset)(VirtIODevice *vdev);
|
|
|
|
void (*set_status)(VirtIODevice *vdev, uint8_t val);
|
2022-11-10 12:57:39 +03:00
|
|
|
/* Device must validate queue_index. */
|
2022-10-17 12:25:45 +03:00
|
|
|
void (*queue_reset)(VirtIODevice *vdev, uint32_t queue_index);
|
2022-11-10 12:57:39 +03:00
|
|
|
/* Device must validate queue_index. */
|
2022-10-17 12:25:46 +03:00
|
|
|
void (*queue_enable)(VirtIODevice *vdev, uint32_t queue_index);
|
2016-11-04 13:04:23 +03:00
|
|
|
/* For transitional devices, this is a bitmap of features
|
|
|
|
* that are only exposed on the legacy interface but not
|
|
|
|
* the modern one.
|
|
|
|
*/
|
|
|
|
uint64_t legacy_features;
|
2013-04-11 18:29:56 +04:00
|
|
|
/* Test and clear event pending status.
|
|
|
|
* Should be called after unmask to avoid losing events.
|
|
|
|
* If backend does not support masking,
|
|
|
|
* must check in frontend instead.
|
|
|
|
*/
|
|
|
|
bool (*guest_notifier_pending)(VirtIODevice *vdev, int n);
|
|
|
|
/* Mask/unmask events from this vq. Any events reported
|
|
|
|
* while masked will become pending.
|
|
|
|
* If backend does not support masking,
|
|
|
|
* must mask in frontend instead.
|
|
|
|
*/
|
|
|
|
void (*guest_notifier_mask)(VirtIODevice *vdev, int n, bool mask);
|
2016-10-21 23:48:07 +03:00
|
|
|
int (*start_ioeventfd)(VirtIODevice *vdev);
|
|
|
|
void (*stop_ioeventfd)(VirtIODevice *vdev);
|
2016-10-27 20:36:36 +03:00
|
|
|
/* Saving and loading of a device; trying to deprecate save/load
|
|
|
|
* use vmsd for new devices.
|
|
|
|
*/
|
2014-06-24 21:15:31 +04:00
|
|
|
void (*save)(VirtIODevice *vdev, QEMUFile *f);
|
|
|
|
int (*load)(VirtIODevice *vdev, QEMUFile *f, int version_id);
|
2019-10-11 16:58:03 +03:00
|
|
|
/* Post load hook in vmsd is called early while device is processed, and
|
|
|
|
* when VirtIODevice isn't fully initialized. Devices should use this instead,
|
|
|
|
* unless they specifically want to verify the migration stream as it's
|
|
|
|
* processed, e.g. for bounds checking.
|
|
|
|
*/
|
|
|
|
int (*post_load)(VirtIODevice *vdev);
|
2016-10-27 20:36:36 +03:00
|
|
|
const VMStateDescription *vmsd;
|
2019-10-29 14:49:04 +03:00
|
|
|
bool (*primary_unplug_pending)(void *opaque);
|
2022-04-01 16:23:19 +03:00
|
|
|
struct vhost_dev *(*get_vhost)(VirtIODevice *vdev);
|
2023-05-12 16:51:20 +03:00
|
|
|
void (*toggle_device_iotlb)(VirtIODevice *vdev);
|
2020-09-03 23:43:22 +03:00
|
|
|
};
|
2013-01-15 03:08:02 +04:00
|
|
|
|
2014-09-30 10:10:38 +04:00
|
|
|
void virtio_instance_init_common(Object *proxy_obj, void *data,
|
|
|
|
size_t vdev_size, const char *vdev_name);
|
|
|
|
|
2023-07-10 18:35:07 +03:00
|
|
|
/**
|
|
|
|
* virtio_init() - initialise the common VirtIODevice structure
|
|
|
|
* @vdev: pointer to VirtIODevice
|
|
|
|
* @device_id: the VirtIO device ID (see virtio_ids.h)
|
|
|
|
* @config_size: size of the config space
|
|
|
|
*/
|
2022-04-01 16:23:18 +03:00
|
|
|
void virtio_init(VirtIODevice *vdev, uint16_t device_id, size_t config_size);
|
|
|
|
|
2013-04-24 12:21:22 +04:00
|
|
|
void virtio_cleanup(VirtIODevice *vdev);
|
2013-01-15 03:08:02 +04:00
|
|
|
|
2022-02-20 19:39:25 +03:00
|
|
|
void virtio_error(VirtIODevice *vdev, const char *fmt, ...) G_GNUC_PRINTF(2, 3);
|
2016-09-21 18:52:19 +03:00
|
|
|
|
2013-04-30 18:08:48 +04:00
|
|
|
/* Set the child bus name. */
|
|
|
|
void virtio_device_set_child_bus_name(VirtIODevice *vdev, char *bus_name);
|
|
|
|
|
2016-07-13 08:09:43 +03:00
|
|
|
typedef void (*VirtIOHandleOutput)(VirtIODevice *, VirtQueue *);
|
|
|
|
|
2008-12-04 22:38:57 +03:00
|
|
|
VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size,
|
2016-07-13 08:09:43 +03:00
|
|
|
VirtIOHandleOutput handle_output);
|
2008-12-04 22:38:57 +03:00
|
|
|
|
2013-01-30 15:12:36 +04:00
|
|
|
void virtio_del_queue(VirtIODevice *vdev, int n);
|
|
|
|
|
2019-12-09 19:46:13 +03:00
|
|
|
void virtio_delete_queue(VirtQueue *vq);
|
|
|
|
|
2008-12-04 22:38:57 +03:00
|
|
|
void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem,
|
|
|
|
unsigned int len);
|
|
|
|
void virtqueue_flush(VirtQueue *vq, unsigned int count);
|
2016-09-19 16:28:03 +03:00
|
|
|
void virtqueue_detach_element(VirtQueue *vq, const VirtQueueElement *elem,
|
|
|
|
unsigned int len);
|
2016-11-03 11:55:49 +03:00
|
|
|
void virtqueue_unpop(VirtQueue *vq, const VirtQueueElement *elem,
|
|
|
|
unsigned int len);
|
2016-09-07 18:20:48 +03:00
|
|
|
bool virtqueue_rewind(VirtQueue *vq, unsigned int num);
|
2008-12-04 22:38:57 +03:00
|
|
|
void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
|
|
|
|
unsigned int len, unsigned int idx);
|
|
|
|
|
2016-12-30 13:09:10 +03:00
|
|
|
void virtqueue_map(VirtIODevice *vdev, VirtQueueElement *elem);
|
2016-02-04 17:26:51 +03:00
|
|
|
void *virtqueue_pop(VirtQueue *vq, size_t sz);
|
2016-12-13 11:12:07 +03:00
|
|
|
unsigned int virtqueue_drop_all(VirtQueue *vq);
|
2016-12-30 13:09:10 +03:00
|
|
|
void *qemu_get_virtqueue_element(VirtIODevice *vdev, QEMUFile *f, size_t sz);
|
2019-10-25 11:35:24 +03:00
|
|
|
void qemu_put_virtqueue_element(VirtIODevice *vdev, QEMUFile *f,
|
|
|
|
VirtQueueElement *elem);
|
2012-09-24 22:35:15 +04:00
|
|
|
int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes,
|
|
|
|
unsigned int out_bytes);
|
|
|
|
void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
|
2012-11-30 02:02:56 +04:00
|
|
|
unsigned int *out_bytes,
|
|
|
|
unsigned max_in_bytes, unsigned max_out_bytes);
|
2008-12-04 22:38:57 +03:00
|
|
|
|
virtio: set ISR on dataplane notifications
Dataplane has been omitting forever the step of setting ISR when
an interrupt is raised. This caused little breakage, because the
specification actually says that ISR may not be updated in MSI mode.
Some versions of the Windows drivers however didn't clear MSI mode
correctly, and proceeded using polling mode (using ISR, not the used
ring index!) for crashdump and hibernation. If it were just crashdump
and hibernation it would not be a big deal, but recent releases of
Windows do not really shut down, but rather log out and hibernate to
make the next startup faster. Hence, this manifested as a more serious
hang during shutdown with e.g. Windows 8.1 and virtio-win 1.8.0 RPMs.
Newer versions fixed this, while older versions do not use MSI at all.
The failure has always been there for virtio dataplane, but it became
visible after commits 9ffe337 ("virtio-blk: always use dataplane path
if ioeventfd is active", 2016-10-30) and ad07cd6 ("virtio-scsi: always
use dataplane path if ioeventfd is active", 2016-10-30) made virtio-blk
and virtio-scsi always use the dataplane code under KVM. The good news
therefore is that it was not a bug in the patches---they were doing
exactly what they were meant for, i.e. shake out remaining dataplane bugs.
The fix is not hard, so it's worth arranging for the broken drivers.
The virtio_should_notify+event_notifier_set pair that is common to
virtio-blk and virtio-scsi dataplane is replaced with a new public
function virtio_notify_irqfd that also sets ISR. The irqfd emulation
code now need not set ISR anymore, so virtio_irq is removed.
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Tested-by: Farhan Ali <alifm@linux.vnet.ibm.com>
Tested-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-11-18 18:07:02 +03:00
|
|
|
void virtio_notify_irqfd(VirtIODevice *vdev, VirtQueue *vq);
|
2008-12-04 22:38:57 +03:00
|
|
|
void virtio_notify(VirtIODevice *vdev, VirtQueue *vq);
|
|
|
|
|
2017-09-25 14:29:17 +03:00
|
|
|
int virtio_save(VirtIODevice *vdev, QEMUFile *f);
|
2016-07-14 20:22:45 +03:00
|
|
|
|
2016-10-06 15:55:39 +03:00
|
|
|
extern const VMStateInfo virtio_vmstate_info;
|
|
|
|
|
|
|
|
#define VMSTATE_VIRTIO_DEVICE \
|
|
|
|
{ \
|
|
|
|
.name = "virtio", \
|
|
|
|
.info = &virtio_vmstate_info, \
|
|
|
|
.flags = VMS_SINGLE, \
|
|
|
|
}
|
|
|
|
|
2014-06-24 21:15:31 +04:00
|
|
|
int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id);
|
2008-12-04 22:38:57 +03:00
|
|
|
|
2023-07-10 18:35:06 +03:00
|
|
|
/**
|
|
|
|
* virtio_notify_config() - signal a change to device config
|
|
|
|
* @vdev: the virtio device
|
|
|
|
*
|
|
|
|
* Assuming the virtio device is up (VIRTIO_CONFIG_S_DRIVER_OK) this
|
|
|
|
* will trigger a guest interrupt and update the config version.
|
|
|
|
*/
|
2008-12-04 22:38:57 +03:00
|
|
|
void virtio_notify_config(VirtIODevice *vdev);
|
|
|
|
|
2019-12-10 00:09:57 +03:00
|
|
|
bool virtio_queue_get_notification(VirtQueue *vq);
|
2008-12-04 22:38:57 +03:00
|
|
|
void virtio_queue_set_notification(VirtQueue *vq, int enable);
|
|
|
|
|
|
|
|
int virtio_queue_ready(VirtQueue *vq);
|
|
|
|
|
|
|
|
int virtio_queue_empty(VirtQueue *vq);
|
|
|
|
|
2009-05-18 17:51:59 +04:00
|
|
|
/* Host binding interface. */
|
|
|
|
|
|
|
|
uint32_t virtio_config_readb(VirtIODevice *vdev, uint32_t addr);
|
|
|
|
uint32_t virtio_config_readw(VirtIODevice *vdev, uint32_t addr);
|
|
|
|
uint32_t virtio_config_readl(VirtIODevice *vdev, uint32_t addr);
|
|
|
|
void virtio_config_writeb(VirtIODevice *vdev, uint32_t addr, uint32_t data);
|
|
|
|
void virtio_config_writew(VirtIODevice *vdev, uint32_t addr, uint32_t data);
|
|
|
|
void virtio_config_writel(VirtIODevice *vdev, uint32_t addr, uint32_t data);
|
2015-06-04 13:34:24 +03:00
|
|
|
uint32_t virtio_config_modern_readb(VirtIODevice *vdev, uint32_t addr);
|
|
|
|
uint32_t virtio_config_modern_readw(VirtIODevice *vdev, uint32_t addr);
|
|
|
|
uint32_t virtio_config_modern_readl(VirtIODevice *vdev, uint32_t addr);
|
|
|
|
void virtio_config_modern_writeb(VirtIODevice *vdev,
|
|
|
|
uint32_t addr, uint32_t data);
|
|
|
|
void virtio_config_modern_writew(VirtIODevice *vdev,
|
|
|
|
uint32_t addr, uint32_t data);
|
|
|
|
void virtio_config_modern_writel(VirtIODevice *vdev,
|
|
|
|
uint32_t addr, uint32_t data);
|
2012-10-23 14:30:10 +04:00
|
|
|
void virtio_queue_set_addr(VirtIODevice *vdev, int n, hwaddr addr);
|
|
|
|
hwaddr virtio_queue_get_addr(VirtIODevice *vdev, int n);
|
2013-07-16 16:25:07 +04:00
|
|
|
void virtio_queue_set_num(VirtIODevice *vdev, int n, int num);
|
2009-05-18 17:51:59 +04:00
|
|
|
int virtio_queue_get_num(VirtIODevice *vdev, int n);
|
2017-01-13 00:26:22 +03:00
|
|
|
int virtio_queue_get_max_num(VirtIODevice *vdev, int n);
|
2015-05-29 09:15:26 +03:00
|
|
|
int virtio_get_num_queues(VirtIODevice *vdev);
|
2015-06-04 13:34:12 +03:00
|
|
|
void virtio_queue_set_rings(VirtIODevice *vdev, int n, hwaddr desc,
|
|
|
|
hwaddr avail, hwaddr used);
|
|
|
|
void virtio_queue_update_rings(VirtIODevice *vdev, int n);
|
2023-03-17 03:27:51 +03:00
|
|
|
void virtio_init_region_cache(VirtIODevice *vdev, int n);
|
2013-07-16 16:25:08 +04:00
|
|
|
void virtio_queue_set_align(VirtIODevice *vdev, int n, int align);
|
2009-05-18 17:51:59 +04:00
|
|
|
void virtio_queue_notify(VirtIODevice *vdev, int n);
|
2009-06-21 20:50:13 +04:00
|
|
|
uint16_t virtio_queue_vector(VirtIODevice *vdev, int n);
|
|
|
|
void virtio_queue_set_vector(VirtIODevice *vdev, int n, uint16_t vector);
|
2018-04-12 18:12:30 +03:00
|
|
|
int virtio_queue_set_host_notifier_mr(VirtIODevice *vdev, int n,
|
|
|
|
MemoryRegion *mr, bool assign);
|
2015-06-04 13:34:15 +03:00
|
|
|
int virtio_set_status(VirtIODevice *vdev, uint8_t val);
|
2009-05-18 17:51:59 +04:00
|
|
|
void virtio_reset(void *opaque);
|
2022-10-17 12:25:45 +03:00
|
|
|
void virtio_queue_reset(VirtIODevice *vdev, uint32_t queue_index);
|
2022-10-17 12:25:46 +03:00
|
|
|
void virtio_queue_enable(VirtIODevice *vdev, uint32_t queue_index);
|
2009-05-18 17:51:59 +04:00
|
|
|
void virtio_update_irq(VirtIODevice *vdev);
|
2015-06-03 15:47:19 +03:00
|
|
|
int virtio_set_features(VirtIODevice *vdev, uint64_t val);
|
2009-05-18 17:51:59 +04:00
|
|
|
|
|
|
|
/* Base devices. */
|
2012-05-16 14:54:05 +04:00
|
|
|
typedef struct VirtIOBlkConf VirtIOBlkConf;
|
2010-09-02 19:00:50 +04:00
|
|
|
struct virtio_net_conf;
|
2011-02-03 08:52:32 +03:00
|
|
|
typedef struct virtio_serial_conf virtio_serial_conf;
|
2014-03-14 17:39:20 +04:00
|
|
|
typedef struct virtio_input_conf virtio_input_conf;
|
2011-02-11 11:40:59 +03:00
|
|
|
typedef struct VirtIOSCSIConf VirtIOSCSIConf;
|
2012-06-20 10:59:32 +04:00
|
|
|
typedef struct VirtIORNGConf VirtIORNGConf;
|
2009-10-21 17:25:35 +04:00
|
|
|
|
2010-01-10 14:52:53 +03:00
|
|
|
#define DEFINE_VIRTIO_COMMON_FEATURES(_state, _field) \
|
2015-06-01 11:45:40 +03:00
|
|
|
DEFINE_PROP_BIT64("indirect_desc", _state, _field, \
|
|
|
|
VIRTIO_RING_F_INDIRECT_DESC, true), \
|
|
|
|
DEFINE_PROP_BIT64("event_idx", _state, _field, \
|
|
|
|
VIRTIO_RING_F_EVENT_IDX, true), \
|
|
|
|
DEFINE_PROP_BIT64("notify_on_empty", _state, _field, \
|
2015-07-22 12:32:25 +03:00
|
|
|
VIRTIO_F_NOTIFY_ON_EMPTY, true), \
|
|
|
|
DEFINE_PROP_BIT64("any_layout", _state, _field, \
|
2016-12-30 13:09:10 +03:00
|
|
|
VIRTIO_F_ANY_LAYOUT, true), \
|
|
|
|
DEFINE_PROP_BIT64("iommu_platform", _state, _field, \
|
2019-10-25 11:35:27 +03:00
|
|
|
VIRTIO_F_IOMMU_PLATFORM, false), \
|
|
|
|
DEFINE_PROP_BIT64("packed", _state, _field, \
|
2022-10-17 12:25:47 +03:00
|
|
|
VIRTIO_F_RING_PACKED, false), \
|
|
|
|
DEFINE_PROP_BIT64("queue_reset", _state, _field, \
|
|
|
|
VIRTIO_F_RING_RESET, true)
|
2010-01-10 14:52:53 +03:00
|
|
|
|
2012-10-23 14:30:10 +04:00
|
|
|
hwaddr virtio_queue_get_desc_addr(VirtIODevice *vdev, int n);
|
2020-07-27 18:33:19 +03:00
|
|
|
bool virtio_queue_enabled_legacy(VirtIODevice *vdev, int n);
|
2019-03-25 06:40:36 +03:00
|
|
|
bool virtio_queue_enabled(VirtIODevice *vdev, int n);
|
2012-10-23 14:30:10 +04:00
|
|
|
hwaddr virtio_queue_get_avail_addr(VirtIODevice *vdev, int n);
|
|
|
|
hwaddr virtio_queue_get_used_addr(VirtIODevice *vdev, int n);
|
|
|
|
hwaddr virtio_queue_get_desc_size(VirtIODevice *vdev, int n);
|
|
|
|
hwaddr virtio_queue_get_avail_size(VirtIODevice *vdev, int n);
|
|
|
|
hwaddr virtio_queue_get_used_size(VirtIODevice *vdev, int n);
|
2019-10-25 11:35:24 +03:00
|
|
|
unsigned int virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n);
|
|
|
|
void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n,
|
|
|
|
unsigned int idx);
|
2017-11-16 21:48:34 +03:00
|
|
|
void virtio_queue_restore_last_avail_idx(VirtIODevice *vdev, int n);
|
2013-08-12 13:08:09 +04:00
|
|
|
void virtio_queue_invalidate_signalled_used(VirtIODevice *vdev, int n);
|
2016-12-13 11:12:05 +03:00
|
|
|
void virtio_queue_update_used_idx(VirtIODevice *vdev, int n);
|
2010-03-17 14:08:02 +03:00
|
|
|
VirtQueue *virtio_get_queue(VirtIODevice *vdev, int n);
|
2013-01-30 15:12:37 +04:00
|
|
|
uint16_t virtio_get_queue_index(VirtQueue *vq);
|
2010-03-17 14:08:02 +03:00
|
|
|
EventNotifier *virtio_queue_get_guest_notifier(VirtQueue *vq);
|
2012-07-05 19:16:30 +04:00
|
|
|
void virtio_queue_set_guest_notifier_fd_handler(VirtQueue *vq, bool assign,
|
|
|
|
bool with_irqfd);
|
2016-10-21 23:48:07 +03:00
|
|
|
int virtio_device_start_ioeventfd(VirtIODevice *vdev);
|
2016-11-18 18:07:00 +03:00
|
|
|
int virtio_device_grab_ioeventfd(VirtIODevice *vdev);
|
|
|
|
void virtio_device_release_ioeventfd(VirtIODevice *vdev);
|
2016-10-21 23:48:08 +03:00
|
|
|
bool virtio_device_ioeventfd_enabled(VirtIODevice *vdev);
|
2010-03-17 14:08:02 +03:00
|
|
|
EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq);
|
2019-11-05 17:09:46 +03:00
|
|
|
void virtio_queue_set_host_notifier_enabled(VirtQueue *vq, bool enabled);
|
2016-10-21 23:48:15 +03:00
|
|
|
void virtio_queue_host_notifier_read(EventNotifier *n);
|
2021-12-07 16:23:36 +03:00
|
|
|
void virtio_queue_aio_attach_host_notifier(VirtQueue *vq, AioContext *ctx);
|
virtio-scsi: don't waste CPU polling the event virtqueue
The virtio-scsi event virtqueue is not emptied by its handler function.
This is typical for rx virtqueues where the device uses buffers when
some event occurs (e.g. a packet is received, an error condition
happens, etc).
Polling non-empty virtqueues wastes CPU cycles. We are not waiting for
new buffers to become available, we are waiting for an event to occur,
so it's a misuse of CPU resources to poll for buffers.
Introduce the new virtio_queue_aio_attach_host_notifier_no_poll() API,
which is identical to virtio_queue_aio_attach_host_notifier() except
that it does not poll the virtqueue.
Before this patch the following command-line consumed 100% CPU in the
IOThread polling and calling virtio_scsi_handle_event():
$ qemu-system-x86_64 -M accel=kvm -m 1G -cpu host \
--object iothread,id=iothread0 \
--device virtio-scsi-pci,iothread=iothread0 \
--blockdev file,filename=test.img,aio=native,cache.direct=on,node-name=drive0 \
--device scsi-hd,drive=drive0
After this patch CPU is no longer wasted.
Reported-by: Nir Soffer <nsoffer@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Tested-by: Nir Soffer <nsoffer@redhat.com>
Message-id: 20220427143541.119567-3-stefanha@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2022-04-27 17:35:37 +03:00
|
|
|
void virtio_queue_aio_attach_host_notifier_no_poll(VirtQueue *vq, AioContext *ctx);
|
2021-12-07 16:23:36 +03:00
|
|
|
void virtio_queue_aio_detach_host_notifier(VirtQueue *vq, AioContext *ctx);
|
2015-04-23 09:21:46 +03:00
|
|
|
VirtQueue *virtio_vector_first_queue(VirtIODevice *vdev, uint16_t vector);
|
|
|
|
VirtQueue *virtio_vector_next_queue(VirtQueue *vq);
|
2022-12-22 10:04:47 +03:00
|
|
|
EventNotifier *virtio_config_get_guest_notifier(VirtIODevice *vdev);
|
|
|
|
void virtio_config_set_guest_notifier_fd_handler(VirtIODevice *vdev,
|
|
|
|
bool assign, bool with_irqfd);
|
2014-06-24 21:26:29 +04:00
|
|
|
|
2015-06-01 11:45:40 +03:00
|
|
|
static inline void virtio_add_feature(uint64_t *features, unsigned int fbit)
|
2014-12-11 16:25:05 +03:00
|
|
|
{
|
2015-06-01 11:45:40 +03:00
|
|
|
assert(fbit < 64);
|
2015-06-03 15:47:19 +03:00
|
|
|
*features |= (1ULL << fbit);
|
2014-12-11 16:25:05 +03:00
|
|
|
}
|
|
|
|
|
2015-06-01 11:45:40 +03:00
|
|
|
static inline void virtio_clear_feature(uint64_t *features, unsigned int fbit)
|
2014-12-11 16:25:05 +03:00
|
|
|
{
|
2015-06-01 11:45:40 +03:00
|
|
|
assert(fbit < 64);
|
2015-06-03 15:47:19 +03:00
|
|
|
*features &= ~(1ULL << fbit);
|
2014-12-11 16:25:05 +03:00
|
|
|
}
|
|
|
|
|
2015-08-17 12:48:29 +03:00
|
|
|
static inline bool virtio_has_feature(uint64_t features, unsigned int fbit)
|
2014-12-11 16:25:06 +03:00
|
|
|
{
|
2015-06-01 11:45:40 +03:00
|
|
|
assert(fbit < 64);
|
2015-06-03 15:47:19 +03:00
|
|
|
return !!(features & (1ULL << fbit));
|
2014-12-11 16:25:06 +03:00
|
|
|
}
|
|
|
|
|
2023-06-02 14:52:13 +03:00
|
|
|
static inline bool virtio_vdev_has_feature(const VirtIODevice *vdev,
|
2015-08-17 12:48:29 +03:00
|
|
|
unsigned int fbit)
|
2014-12-11 16:25:06 +03:00
|
|
|
{
|
2015-08-17 12:48:29 +03:00
|
|
|
return virtio_has_feature(vdev->guest_features, fbit);
|
2014-12-11 16:25:06 +03:00
|
|
|
}
|
|
|
|
|
2015-08-05 12:50:07 +03:00
|
|
|
static inline bool virtio_host_has_feature(VirtIODevice *vdev,
|
|
|
|
unsigned int fbit)
|
|
|
|
{
|
2015-08-17 12:48:29 +03:00
|
|
|
return virtio_has_feature(vdev->host_features, fbit);
|
2015-08-05 12:50:07 +03:00
|
|
|
}
|
|
|
|
|
2014-06-24 21:38:54 +04:00
|
|
|
static inline bool virtio_is_big_endian(VirtIODevice *vdev)
|
2014-06-24 21:26:29 +04:00
|
|
|
{
|
2015-08-17 12:48:29 +03:00
|
|
|
if (!virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
|
2015-06-04 13:34:11 +03:00
|
|
|
assert(vdev->device_endian != VIRTIO_DEVICE_ENDIAN_UNKNOWN);
|
|
|
|
return vdev->device_endian == VIRTIO_DEVICE_ENDIAN_BIG;
|
|
|
|
}
|
|
|
|
/* Devices conforming to VIRTIO 1.0 or later are always LE. */
|
|
|
|
return false;
|
2014-06-24 21:26:29 +04:00
|
|
|
}
|
2019-06-26 05:31:26 +03:00
|
|
|
|
2022-11-30 14:24:39 +03:00
|
|
|
/**
|
|
|
|
* virtio_device_started() - check if device started
|
|
|
|
* @vdev - the VirtIO device
|
|
|
|
* @status - the devices status bits
|
|
|
|
*
|
|
|
|
* Check if the device is started. For most modern machines this is
|
|
|
|
* tracked via the @vdev->started field (to support migration),
|
|
|
|
* otherwise we check for the final negotiated status bit that
|
|
|
|
* indicates everything is ready.
|
|
|
|
*/
|
2019-06-26 05:31:26 +03:00
|
|
|
static inline bool virtio_device_started(VirtIODevice *vdev, uint8_t status)
|
|
|
|
{
|
|
|
|
if (vdev->use_started) {
|
|
|
|
return vdev->started;
|
|
|
|
}
|
|
|
|
|
2022-11-07 15:14:07 +03:00
|
|
|
return status & VIRTIO_CONFIG_S_DRIVER_OK;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* virtio_device_should_start() - check if device startable
|
|
|
|
* @vdev - the VirtIO device
|
|
|
|
* @status - the devices status bits
|
|
|
|
*
|
|
|
|
* This is similar to virtio_device_started() but also encapsulates a
|
|
|
|
* check on the VM status which would prevent a device starting
|
|
|
|
* anyway.
|
|
|
|
*/
|
|
|
|
static inline bool virtio_device_should_start(VirtIODevice *vdev, uint8_t status)
|
|
|
|
{
|
2022-08-02 12:49:58 +03:00
|
|
|
if (!vdev->vm_running) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2022-11-30 14:24:39 +03:00
|
|
|
return virtio_device_started(vdev, status);
|
2019-06-26 05:31:26 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
static inline void virtio_set_started(VirtIODevice *vdev, bool started)
|
|
|
|
{
|
|
|
|
if (started) {
|
|
|
|
vdev->start_on_kick = false;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (vdev->use_started) {
|
|
|
|
vdev->started = started;
|
|
|
|
}
|
|
|
|
}
|
virtio-pci: disable vring processing when bus-mastering is disabled
Currently the SLOF firmware for pseries guests will disable/re-enable
a PCI device multiple times via IO/MEM/MASTER bits of PCI_COMMAND
register after the initial probe/feature negotiation, as it tends to
work with a single device at a time at various stages like probing
and running block/network bootloaders without doing a full reset
in-between.
In QEMU, when PCI_COMMAND_MASTER is disabled we disable the
corresponding IOMMU memory region, so DMA accesses (including to vring
fields like idx/flags) will no longer undergo the necessary
translation. Normally we wouldn't expect this to happen since it would
be misbehavior on the driver side to continue driving DMA requests.
However, in the case of pseries, with iommu_platform=on, we trigger the
following sequence when tearing down the virtio-blk dataplane ioeventfd
in response to the guest unsetting PCI_COMMAND_MASTER:
#2 0x0000555555922651 in virtqueue_map_desc (vdev=vdev@entry=0x555556dbcfb0, p_num_sg=p_num_sg@entry=0x7fffe657e1a8, addr=addr@entry=0x7fffe657e240, iov=iov@entry=0x7fffe6580240, max_num_sg=max_num_sg@entry=1024, is_write=is_write@entry=false, pa=0, sz=0)
at /home/mdroth/w/qemu.git/hw/virtio/virtio.c:757
#3 0x0000555555922a89 in virtqueue_pop (vq=vq@entry=0x555556dc8660, sz=sz@entry=184)
at /home/mdroth/w/qemu.git/hw/virtio/virtio.c:950
#4 0x00005555558d3eca in virtio_blk_get_request (vq=0x555556dc8660, s=0x555556dbcfb0)
at /home/mdroth/w/qemu.git/hw/block/virtio-blk.c:255
#5 0x00005555558d3eca in virtio_blk_handle_vq (s=0x555556dbcfb0, vq=0x555556dc8660)
at /home/mdroth/w/qemu.git/hw/block/virtio-blk.c:776
#6 0x000055555591dd66 in virtio_queue_notify_aio_vq (vq=vq@entry=0x555556dc8660)
at /home/mdroth/w/qemu.git/hw/virtio/virtio.c:1550
#7 0x000055555591ecef in virtio_queue_notify_aio_vq (vq=0x555556dc8660)
at /home/mdroth/w/qemu.git/hw/virtio/virtio.c:1546
#8 0x000055555591ecef in virtio_queue_host_notifier_aio_poll (opaque=0x555556dc86c8)
at /home/mdroth/w/qemu.git/hw/virtio/virtio.c:2527
#9 0x0000555555d02164 in run_poll_handlers_once (ctx=ctx@entry=0x55555688bfc0, timeout=timeout@entry=0x7fffe65844a8)
at /home/mdroth/w/qemu.git/util/aio-posix.c:520
#10 0x0000555555d02d1b in try_poll_mode (timeout=0x7fffe65844a8, ctx=0x55555688bfc0)
at /home/mdroth/w/qemu.git/util/aio-posix.c:607
#11 0x0000555555d02d1b in aio_poll (ctx=ctx@entry=0x55555688bfc0, blocking=blocking@entry=true)
at /home/mdroth/w/qemu.git/util/aio-posix.c:639
#12 0x0000555555d0004d in aio_wait_bh_oneshot (ctx=0x55555688bfc0, cb=cb@entry=0x5555558d5130 <virtio_blk_data_plane_stop_bh>, opaque=opaque@entry=0x555556de86f0)
at /home/mdroth/w/qemu.git/util/aio-wait.c:71
#13 0x00005555558d59bf in virtio_blk_data_plane_stop (vdev=<optimized out>)
at /home/mdroth/w/qemu.git/hw/block/dataplane/virtio-blk.c:288
#14 0x0000555555b906a1 in virtio_bus_stop_ioeventfd (bus=bus@entry=0x555556dbcf38)
at /home/mdroth/w/qemu.git/hw/virtio/virtio-bus.c:245
#15 0x0000555555b90dbb in virtio_bus_stop_ioeventfd (bus=bus@entry=0x555556dbcf38)
at /home/mdroth/w/qemu.git/hw/virtio/virtio-bus.c:237
#16 0x0000555555b92a8e in virtio_pci_stop_ioeventfd (proxy=0x555556db4e40)
at /home/mdroth/w/qemu.git/hw/virtio/virtio-pci.c:292
#17 0x0000555555b92a8e in virtio_write_config (pci_dev=0x555556db4e40, address=<optimized out>, val=1048832, len=<optimized out>)
at /home/mdroth/w/qemu.git/hw/virtio/virtio-pci.c:613
I.e. the calling code is only scheduling a one-shot BH for
virtio_blk_data_plane_stop_bh, but somehow we end up trying to process
an additional virtqueue entry before we get there. This is likely due
to the following check in virtio_queue_host_notifier_aio_poll:
static bool virtio_queue_host_notifier_aio_poll(void *opaque)
{
EventNotifier *n = opaque;
VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
bool progress;
if (!vq->vring.desc || virtio_queue_empty(vq)) {
return false;
}
progress = virtio_queue_notify_aio_vq(vq);
namely the call to virtio_queue_empty(). In this case, since no new
requests have actually been issued, shadow_avail_idx == last_avail_idx,
so we actually try to access the vring via vring_avail_idx() to get
the latest non-shadowed idx:
int virtio_queue_empty(VirtQueue *vq)
{
bool empty;
...
if (vq->shadow_avail_idx != vq->last_avail_idx) {
return 0;
}
rcu_read_lock();
empty = vring_avail_idx(vq) == vq->last_avail_idx;
rcu_read_unlock();
return empty;
but since the IOMMU region has been disabled we get a bogus value (0
usually), which causes virtio_queue_empty() to falsely report that
there are entries to be processed, which causes errors such as:
"virtio: zero sized buffers are not allowed"
or
"virtio-blk missing headers"
and puts the device in an error state.
This patch works around the issue by introducing virtio_set_disabled(),
which sets a 'disabled' flag to bypass checks like virtio_queue_empty()
when bus-mastering is disabled. Since we'd check this flag at all the
same sites as vdev->broken, we replace those checks with an inline
function which checks for either vdev->broken or vdev->disabled.
The 'disabled' flag is only migrated when set, which should be fairly
rare, but to maintain migration compatibility we disable it's use for
older machine types. Users requiring the use of the flag in conjunction
with older machine types can set it explicitly as a virtio-device
option.
NOTES:
- This leaves some other oddities in play, like the fact that
DRIVER_OK also gets unset in response to bus-mastering being
disabled, but not restored (however the device seems to continue
working)
- Similarly, we disable the host notifier via
virtio_bus_stop_ioeventfd(), which seems to move the handling out
of virtio-blk dataplane and back into the main IO thread, and it
ends up staying there till a reset (but otherwise continues working
normally)
Cc: David Gibson <david@gibson.dropbear.id.au>,
Cc: Alexey Kardashevskiy <aik@ozlabs.ru>
Cc: "Michael S. Tsirkin" <mst@redhat.com>
Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
Message-Id: <20191120005003.27035-1-mdroth@linux.vnet.ibm.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2019-11-20 03:50:03 +03:00
|
|
|
|
|
|
|
static inline void virtio_set_disabled(VirtIODevice *vdev, bool disable)
|
|
|
|
{
|
|
|
|
if (vdev->use_disabled_flag) {
|
|
|
|
vdev->disabled = disable;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline bool virtio_device_disabled(VirtIODevice *vdev)
|
|
|
|
{
|
|
|
|
return unlikely(vdev->disabled || vdev->broken);
|
|
|
|
}
|
|
|
|
|
2020-07-07 13:54:45 +03:00
|
|
|
bool virtio_legacy_allowed(VirtIODevice *vdev);
|
2020-09-21 15:25:03 +03:00
|
|
|
bool virtio_legacy_check_disabled(VirtIODevice *vdev);
|
2020-07-07 13:54:45 +03:00
|
|
|
|
2024-04-04 21:56:11 +03:00
|
|
|
QEMUBH *virtio_bh_new_guarded_full(DeviceState *dev,
|
|
|
|
QEMUBHFunc *cb, void *opaque,
|
|
|
|
const char *name);
|
|
|
|
#define virtio_bh_new_guarded(dev, cb, opaque) \
|
|
|
|
virtio_bh_new_guarded_full((dev), (cb), (opaque), (stringify(cb)))
|
|
|
|
|
2008-12-04 22:38:57 +03:00
|
|
|
#endif
|