diff --git a/MAINTAINERS b/MAINTAINERS index f0e30b5248..1c56d45b16 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1554,7 +1554,8 @@ F: include/hw/virtio/virtio-input.h F: contrib/vhost-user-input/* virtio-serial -M: Amit Shah +M: Laurent Vivier +R: Amit Shah S: Supported F: hw/char/virtio-serial-bus.c F: hw/char/virtio-console.c @@ -1563,7 +1564,8 @@ F: tests/virtio-console-test.c F: tests/virtio-serial-test.c virtio-rng -M: Amit Shah +M: Laurent Vivier +R: Amit Shah S: Supported F: hw/virtio/virtio-rng.c F: include/hw/virtio/virtio-rng.h diff --git a/backends/vhost-user.c b/backends/vhost-user.c index 0a13506c98..2bf3406525 100644 --- a/backends/vhost-user.c +++ b/backends/vhost-user.c @@ -46,7 +46,7 @@ vhost_user_backend_dev_init(VhostUserBackend *b, VirtIODevice *vdev, b->vdev = vdev; b->dev.nvqs = nvqs; - b->dev.vqs = g_new(struct vhost_virtqueue, nvqs); + b->dev.vqs = g_new0(struct vhost_virtqueue, nvqs); ret = vhost_dev_init(&b->dev, &b->vhost_user, VHOST_BACKEND_TYPE_USER, 0); if (ret < 0) { diff --git a/docs/nvdimm.txt b/docs/nvdimm.txt index b531cacd35..362e99109e 100644 --- a/docs/nvdimm.txt +++ b/docs/nvdimm.txt @@ -171,6 +171,35 @@ guest software that this vNVDIMM device contains a region that cannot accept persistent writes. In result, for example, the guest Linux NVDIMM driver, marks such vNVDIMM device as read-only. +Backend File Setup Example +-------------------------- + +Here are two examples showing how to setup these persistent backends on +linux using the tool ndctl [3]. + +A. DAX device + +Use the following command to set up /dev/dax0.0 so that the entirety of +namespace0.0 can be exposed as an emulated NVDIMM to the guest: + + ndctl create-namespace -f -e namespace0.0 -m devdax + +The /dev/dax0.0 could be used directly in "mem-path" option. + +B. DAX file + +Individual files on a DAX host file system can be exposed as emulated +NVDIMMS. First an fsdax block device is created, partitioned, and then +mounted with the "dax" mount option: + + ndctl create-namespace -f -e namespace0.0 -m fsdax + (partition /dev/pmem0 with name pmem0p1) + mount -o dax /dev/pmem0p1 /mnt + (create or copy a disk image file with qemu-img(1), cp(1), or dd(1) + in /mnt) + +Then the new file in /mnt could be used in "mem-path" option. + NVDIMM Persistence ------------------ @@ -212,3 +241,5 @@ References https://www.snia.org/sites/default/files/technical_work/final/NVMProgrammingModel_v1.2.pdf [2] Persistent Memory Development Kit (PMDK), formerly known as NVML project, home page: http://pmem.io/pmdk/ +[3] ndctl-create-namespace - provision or reconfigure a namespace + http://pmem.io/ndctl/ndctl-create-namespace.html diff --git a/docs/virtio-pmem.rst b/docs/virtio-pmem.rst new file mode 100644 index 0000000000..e77881b26f --- /dev/null +++ b/docs/virtio-pmem.rst @@ -0,0 +1,75 @@ + +======================== +QEMU virtio pmem +======================== + + This document explains the setup and usage of the virtio pmem device + which is available since QEMU v4.1.0. + + The virtio pmem device is a paravirtualized persistent memory device + on regular (i.e non-NVDIMM) storage. + +Usecase +-------- + + Virtio pmem allows to bypass the guest page cache and directly use + host page cache. This reduces guest memory footprint as the host can + make efficient memory reclaim decisions under memory pressure. + +o How does virtio-pmem compare to the nvdimm emulation supported by QEMU? + + NVDIMM emulation on regular (i.e. non-NVDIMM) host storage does not + persist the guest writes as there are no defined semantics in the device + specification. The virtio pmem device provides guest write persistence + on non-NVDIMM host storage. + +virtio pmem usage +----------------- + + A virtio pmem device backed by a memory-backend-file can be created on + the QEMU command line as in the following example: + + -object memory-backend-file,id=mem1,share,mem-path=./virtio_pmem.img,size=4G + -device virtio-pmem-pci,memdev=mem1,id=nv1 + + where: + - "object memory-backend-file,id=mem1,share,mem-path=, size=" + creates a backend file with the specified size. + + - "device virtio-pmem-pci,id=nvdimm1,memdev=mem1" creates a virtio pmem + pci device whose storage is provided by above memory backend device. + + Multiple virtio pmem devices can be created if multiple pairs of "-object" + and "-device" are provided. + +Hotplug +------- + +Virtio pmem devices can be hotplugged via the QEMU monitor. First, the +memory backing has to be added via 'object_add'; afterwards, the virtio +pmem device can be added via 'device_add'. + +For example, the following commands add another 4GB virtio pmem device to +the guest: + + (qemu) object_add memory-backend-file,id=mem2,share=on,mem-path=virtio_pmem2.img,size=4G + (qemu) device_add virtio-pmem-pci,id=virtio_pmem2,memdev=mem2 + +Guest Data Persistence +---------------------- + + Guest data persistence on non-NVDIMM requires guest userspace applications + to perform fsync/msync. This is different from a real nvdimm backend where + no additional fsync/msync is required. This is to persist guest writes in + host backing file which otherwise remains in host page cache and there is + risk of losing the data in case of power failure. + + With virtio pmem device, MAP_SYNC mmap flag is not supported. This provides + a hint to application to perform fsync for write persistence. + +Limitations +------------ +- Real nvdimm device backend is not supported. +- virtio pmem hotunplug is not supported. +- ACPI NVDIMM features like regions/namespaces are not supported. +- ndctl command is not supported. diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c index 0b8c5dfeab..63da9bb619 100644 --- a/hw/block/vhost-user-blk.c +++ b/hw/block/vhost-user-blk.c @@ -421,7 +421,7 @@ static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp) } s->inflight = g_new0(struct vhost_inflight, 1); - s->vqs = g_new(struct vhost_virtqueue, s->num_queues); + s->vqs = g_new0(struct vhost_virtqueue, s->num_queues); s->watch = 0; s->connected = false; diff --git a/hw/core/qdev.c b/hw/core/qdev.c index 60d66c2f39..cbad6c1d55 100644 --- a/hw/core/qdev.c +++ b/hw/core/qdev.c @@ -237,6 +237,23 @@ HotplugHandler *qdev_get_machine_hotplug_handler(DeviceState *dev) return NULL; } +bool qdev_hotplug_allowed(DeviceState *dev, Error **errp) +{ + MachineState *machine; + MachineClass *mc; + Object *m_obj = qdev_get_machine(); + + if (object_dynamic_cast(m_obj, TYPE_MACHINE)) { + machine = MACHINE(m_obj); + mc = MACHINE_GET_CLASS(machine); + if (mc->hotplug_allowed) { + return mc->hotplug_allowed(machine, dev, errp); + } + } + + return true; +} + HotplugHandler *qdev_get_bus_hotplug_handler(DeviceState *dev) { if (dev->parent_bus) { diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index 75ca6f9c70..f1de8fdb75 100644 --- a/hw/i386/intel_iommu.c +++ b/hw/i386/intel_iommu.c @@ -35,6 +35,7 @@ #include "hw/i386/x86-iommu.h" #include "hw/pci-host/q35.h" #include "sysemu/kvm.h" +#include "sysemu/sysemu.h" #include "hw/i386/apic_internal.h" #include "kvm_i386.h" #include "migration/vmstate.h" @@ -64,6 +65,13 @@ static void vtd_address_space_refresh_all(IntelIOMMUState *s); static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n); +static void vtd_panic_require_caching_mode(void) +{ + error_report("We need to set caching-mode=on for intel-iommu to enable " + "device assignment with IOMMU protection."); + exit(1); +} + static void vtd_define_quad(IntelIOMMUState *s, hwaddr addr, uint64_t val, uint64_t wmask, uint64_t w1cmask) { @@ -2928,12 +2936,6 @@ static void vtd_iommu_notify_flag_changed(IOMMUMemoryRegion *iommu, VTDAddressSpace *vtd_as = container_of(iommu, VTDAddressSpace, iommu); IntelIOMMUState *s = vtd_as->iommu_state; - if (!s->caching_mode && new & IOMMU_NOTIFIER_MAP) { - error_report("We need to set caching-mode=on for intel-iommu to enable " - "device assignment with IOMMU protection."); - exit(1); - } - /* Update per-address-space notifier flags */ vtd_as->notifier_flags = new; @@ -3699,6 +3701,32 @@ static bool vtd_decide_config(IntelIOMMUState *s, Error **errp) return true; } +static int vtd_machine_done_notify_one(Object *child, void *unused) +{ + IntelIOMMUState *iommu = INTEL_IOMMU_DEVICE(x86_iommu_get_default()); + + /* + * We hard-coded here because vfio-pci is the only special case + * here. Let's be more elegant in the future when we can, but so + * far there seems to be no better way. + */ + if (object_dynamic_cast(child, "vfio-pci") && !iommu->caching_mode) { + vtd_panic_require_caching_mode(); + } + + return 0; +} + +static void vtd_machine_done_hook(Notifier *notifier, void *unused) +{ + object_child_foreach_recursive(object_get_root(), + vtd_machine_done_notify_one, NULL); +} + +static Notifier vtd_machine_done_notify = { + .notify = vtd_machine_done_hook, +}; + static void vtd_realize(DeviceState *dev, Error **errp) { MachineState *ms = MACHINE(qdev_get_machine()); @@ -3744,6 +3772,7 @@ static void vtd_realize(DeviceState *dev, Error **errp) pci_setup_iommu(bus, vtd_host_dma_iommu, dev); /* Pseudo address space under root PCI bus. */ pcms->ioapic_as = vtd_host_dma_iommu(bus, s, Q35_PSEUDO_DEVFN_IOAPIC); + qemu_add_machine_init_done_notifier(&vtd_machine_done_notify); } static void vtd_class_init(ObjectClass *klass, void *data) diff --git a/hw/i386/pc.c b/hw/i386/pc.c index e5b59ff3c5..bcda50efcc 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -2756,6 +2756,26 @@ static void x86_nmi(NMIState *n, int cpu_index, Error **errp) } } + +static bool pc_hotplug_allowed(MachineState *ms, DeviceState *dev, Error **errp) +{ + X86IOMMUState *iommu = x86_iommu_get_default(); + IntelIOMMUState *intel_iommu; + + if (iommu && + object_dynamic_cast((Object *)iommu, TYPE_INTEL_IOMMU_DEVICE) && + object_dynamic_cast((Object *)dev, "vfio-pci")) { + intel_iommu = INTEL_IOMMU_DEVICE(iommu); + if (!intel_iommu->caching_mode) { + error_setg(errp, "Device assignment is not allowed without " + "enabling caching-mode=on for Intel IOMMU."); + return false; + } + } + + return true; +} + static void pc_machine_class_init(ObjectClass *oc, void *data) { MachineClass *mc = MACHINE_CLASS(oc); @@ -2780,6 +2800,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) pcmc->pvh_enabled = true; assert(!mc->get_hotplug_handler); mc->get_hotplug_handler = pc_get_hotplug_handler; + mc->hotplug_allowed = pc_hotplug_allowed; mc->cpu_index_to_instance_props = pc_cpu_index_to_props; mc->get_default_cpu_node_id = pc_get_default_cpu_node_id; mc->possible_cpu_arch_ids = pc_possible_cpu_arch_ids; diff --git a/hw/virtio/virtio-mmio.c b/hw/virtio/virtio-mmio.c index eccc795f28..3d5ca0f667 100644 --- a/hw/virtio/virtio-mmio.c +++ b/hw/virtio/virtio-mmio.c @@ -50,14 +50,24 @@ OBJECT_CHECK(VirtIOMMIOProxy, (obj), TYPE_VIRTIO_MMIO) #define VIRT_MAGIC 0x74726976 /* 'virt' */ -#define VIRT_VERSION 1 +#define VIRT_VERSION 2 +#define VIRT_VERSION_LEGACY 1 #define VIRT_VENDOR 0x554D4551 /* 'QEMU' */ +typedef struct VirtIOMMIOQueue { + uint16_t num; + bool enabled; + uint32_t desc[2]; + uint32_t avail[2]; + uint32_t used[2]; +} VirtIOMMIOQueue; + typedef struct { /* Generic */ SysBusDevice parent_obj; MemoryRegion iomem; qemu_irq irq; + bool legacy; /* Guest accessible state needing migration and reset */ uint32_t host_features_sel; uint32_t guest_features_sel; @@ -65,6 +75,9 @@ typedef struct { /* virtio-bus */ VirtioBusState bus; bool format_transport_address; + /* Fields only used for non-legacy (v2) devices */ + uint32_t guest_features[2]; + VirtIOMMIOQueue vqs[VIRTIO_QUEUE_MAX]; } VirtIOMMIOProxy; static bool virtio_mmio_ioeventfd_enabled(DeviceState *d) @@ -118,7 +131,11 @@ static uint64_t virtio_mmio_read(void *opaque, hwaddr offset, unsigned size) case VIRTIO_MMIO_MAGIC_VALUE: return VIRT_MAGIC; case VIRTIO_MMIO_VERSION: - return VIRT_VERSION; + if (proxy->legacy) { + return VIRT_VERSION_LEGACY; + } else { + return VIRT_VERSION; + } case VIRTIO_MMIO_VENDOR_ID: return VIRT_VENDOR; default: @@ -149,28 +166,64 @@ static uint64_t virtio_mmio_read(void *opaque, hwaddr offset, unsigned size) case VIRTIO_MMIO_MAGIC_VALUE: return VIRT_MAGIC; case VIRTIO_MMIO_VERSION: - return VIRT_VERSION; + if (proxy->legacy) { + return VIRT_VERSION_LEGACY; + } else { + return VIRT_VERSION; + } case VIRTIO_MMIO_DEVICE_ID: return vdev->device_id; case VIRTIO_MMIO_VENDOR_ID: return VIRT_VENDOR; case VIRTIO_MMIO_DEVICE_FEATURES: - if (proxy->host_features_sel) { - return 0; + if (proxy->legacy) { + if (proxy->host_features_sel) { + return 0; + } else { + return vdev->host_features; + } + } else { + VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev); + return (vdev->host_features & ~vdc->legacy_features) + >> (32 * proxy->host_features_sel); } - return vdev->host_features; case VIRTIO_MMIO_QUEUE_NUM_MAX: if (!virtio_queue_get_num(vdev, vdev->queue_sel)) { return 0; } return VIRTQUEUE_MAX_SIZE; case VIRTIO_MMIO_QUEUE_PFN: + if (!proxy->legacy) { + qemu_log_mask(LOG_GUEST_ERROR, + "%s: read from legacy register (0x%" + HWADDR_PRIx ") in non-legacy mode\n", + __func__, offset); + return 0; + } return virtio_queue_get_addr(vdev, vdev->queue_sel) >> proxy->guest_page_shift; + case VIRTIO_MMIO_QUEUE_READY: + if (proxy->legacy) { + qemu_log_mask(LOG_GUEST_ERROR, + "%s: read from non-legacy register (0x%" + HWADDR_PRIx ") in legacy mode\n", + __func__, offset); + return 0; + } + return proxy->vqs[vdev->queue_sel].enabled; case VIRTIO_MMIO_INTERRUPT_STATUS: return atomic_read(&vdev->isr); case VIRTIO_MMIO_STATUS: return vdev->status; + case VIRTIO_MMIO_CONFIG_GENERATION: + if (proxy->legacy) { + qemu_log_mask(LOG_GUEST_ERROR, + "%s: read from non-legacy register (0x%" + HWADDR_PRIx ") in legacy mode\n", + __func__, offset); + return 0; + } + return vdev->generation; case VIRTIO_MMIO_DEVICE_FEATURES_SEL: case VIRTIO_MMIO_DRIVER_FEATURES: case VIRTIO_MMIO_DRIVER_FEATURES_SEL: @@ -180,12 +233,20 @@ static uint64_t virtio_mmio_read(void *opaque, hwaddr offset, unsigned size) case VIRTIO_MMIO_QUEUE_ALIGN: case VIRTIO_MMIO_QUEUE_NOTIFY: case VIRTIO_MMIO_INTERRUPT_ACK: + case VIRTIO_MMIO_QUEUE_DESC_LOW: + case VIRTIO_MMIO_QUEUE_DESC_HIGH: + case VIRTIO_MMIO_QUEUE_AVAIL_LOW: + case VIRTIO_MMIO_QUEUE_AVAIL_HIGH: + case VIRTIO_MMIO_QUEUE_USED_LOW: + case VIRTIO_MMIO_QUEUE_USED_HIGH: qemu_log_mask(LOG_GUEST_ERROR, - "%s: read of write-only register\n", - __func__); + "%s: read of write-only register (0x%" HWADDR_PRIx ")\n", + __func__, offset); return 0; default: - qemu_log_mask(LOG_GUEST_ERROR, "%s: bad register offset\n", __func__); + qemu_log_mask(LOG_GUEST_ERROR, + "%s: bad register offset (0x%" HWADDR_PRIx ")\n", + __func__, offset); return 0; } return 0; @@ -232,17 +293,41 @@ static void virtio_mmio_write(void *opaque, hwaddr offset, uint64_t value, } switch (offset) { case VIRTIO_MMIO_DEVICE_FEATURES_SEL: - proxy->host_features_sel = value; + if (value) { + proxy->host_features_sel = 1; + } else { + proxy->host_features_sel = 0; + } break; case VIRTIO_MMIO_DRIVER_FEATURES: - if (!proxy->guest_features_sel) { - virtio_set_features(vdev, value); + if (proxy->legacy) { + if (proxy->guest_features_sel) { + qemu_log_mask(LOG_GUEST_ERROR, + "%s: attempt to write guest features with " + "guest_features_sel > 0 in legacy mode\n", + __func__); + } else { + virtio_set_features(vdev, value); + } + } else { + proxy->guest_features[proxy->guest_features_sel] = value; } break; case VIRTIO_MMIO_DRIVER_FEATURES_SEL: - proxy->guest_features_sel = value; + if (value) { + proxy->guest_features_sel = 1; + } else { + proxy->guest_features_sel = 0; + } break; case VIRTIO_MMIO_GUEST_PAGE_SIZE: + if (!proxy->legacy) { + qemu_log_mask(LOG_GUEST_ERROR, + "%s: write to legacy register (0x%" + HWADDR_PRIx ") in non-legacy mode\n", + __func__, offset); + return; + } proxy->guest_page_shift = ctz32(value); if (proxy->guest_page_shift > 31) { proxy->guest_page_shift = 0; @@ -256,15 +341,31 @@ static void virtio_mmio_write(void *opaque, hwaddr offset, uint64_t value, break; case VIRTIO_MMIO_QUEUE_NUM: trace_virtio_mmio_queue_write(value, VIRTQUEUE_MAX_SIZE); - virtio_queue_set_num(vdev, vdev->queue_sel, value); - /* Note: only call this function for legacy devices */ - virtio_queue_update_rings(vdev, vdev->queue_sel); + if (proxy->legacy) { + virtio_queue_set_num(vdev, vdev->queue_sel, value); + virtio_queue_update_rings(vdev, vdev->queue_sel); + } else { + proxy->vqs[vdev->queue_sel].num = value; + } break; case VIRTIO_MMIO_QUEUE_ALIGN: - /* Note: this is only valid for legacy devices */ + if (!proxy->legacy) { + qemu_log_mask(LOG_GUEST_ERROR, + "%s: write to legacy register (0x%" + HWADDR_PRIx ") in non-legacy mode\n", + __func__, offset); + return; + } virtio_queue_set_align(vdev, vdev->queue_sel, value); break; case VIRTIO_MMIO_QUEUE_PFN: + if (!proxy->legacy) { + qemu_log_mask(LOG_GUEST_ERROR, + "%s: write to legacy register (0x%" + HWADDR_PRIx ") in non-legacy mode\n", + __func__, offset); + return; + } if (value == 0) { virtio_reset(vdev); } else { @@ -272,6 +373,29 @@ static void virtio_mmio_write(void *opaque, hwaddr offset, uint64_t value, value << proxy->guest_page_shift); } break; + case VIRTIO_MMIO_QUEUE_READY: + if (proxy->legacy) { + qemu_log_mask(LOG_GUEST_ERROR, + "%s: write to non-legacy register (0x%" + HWADDR_PRIx ") in legacy mode\n", + __func__, offset); + return; + } + if (value) { + virtio_queue_set_num(vdev, vdev->queue_sel, + proxy->vqs[vdev->queue_sel].num); + virtio_queue_set_rings(vdev, vdev->queue_sel, + ((uint64_t)proxy->vqs[vdev->queue_sel].desc[1]) << 32 | + proxy->vqs[vdev->queue_sel].desc[0], + ((uint64_t)proxy->vqs[vdev->queue_sel].avail[1]) << 32 | + proxy->vqs[vdev->queue_sel].avail[0], + ((uint64_t)proxy->vqs[vdev->queue_sel].used[1]) << 32 | + proxy->vqs[vdev->queue_sel].used[0]); + proxy->vqs[vdev->queue_sel].enabled = 1; + } else { + proxy->vqs[vdev->queue_sel].enabled = 0; + } + break; case VIRTIO_MMIO_QUEUE_NOTIFY: if (value < VIRTIO_QUEUE_MAX) { virtio_queue_notify(vdev, value); @@ -286,6 +410,12 @@ static void virtio_mmio_write(void *opaque, hwaddr offset, uint64_t value, virtio_mmio_stop_ioeventfd(proxy); } + if (!proxy->legacy && (value & VIRTIO_CONFIG_S_FEATURES_OK)) { + virtio_set_features(vdev, + ((uint64_t)proxy->guest_features[1]) << 32 | + proxy->guest_features[0]); + } + virtio_set_status(vdev, value & 0xff); if (value & VIRTIO_CONFIG_S_DRIVER_OK) { @@ -296,6 +426,66 @@ static void virtio_mmio_write(void *opaque, hwaddr offset, uint64_t value, virtio_reset(vdev); } break; + case VIRTIO_MMIO_QUEUE_DESC_LOW: + if (proxy->legacy) { + qemu_log_mask(LOG_GUEST_ERROR, + "%s: write to non-legacy register (0x%" + HWADDR_PRIx ") in legacy mode\n", + __func__, offset); + return; + } + proxy->vqs[vdev->queue_sel].desc[0] = value; + break; + case VIRTIO_MMIO_QUEUE_DESC_HIGH: + if (proxy->legacy) { + qemu_log_mask(LOG_GUEST_ERROR, + "%s: write to non-legacy register (0x%" + HWADDR_PRIx ") in legacy mode\n", + __func__, offset); + return; + } + proxy->vqs[vdev->queue_sel].desc[1] = value; + break; + case VIRTIO_MMIO_QUEUE_AVAIL_LOW: + if (proxy->legacy) { + qemu_log_mask(LOG_GUEST_ERROR, + "%s: write to non-legacy register (0x%" + HWADDR_PRIx ") in legacy mode\n", + __func__, offset); + return; + } + proxy->vqs[vdev->queue_sel].avail[0] = value; + break; + case VIRTIO_MMIO_QUEUE_AVAIL_HIGH: + if (proxy->legacy) { + qemu_log_mask(LOG_GUEST_ERROR, + "%s: write to non-legacy register (0x%" + HWADDR_PRIx ") in legacy mode\n", + __func__, offset); + return; + } + proxy->vqs[vdev->queue_sel].avail[1] = value; + break; + case VIRTIO_MMIO_QUEUE_USED_LOW: + if (proxy->legacy) { + qemu_log_mask(LOG_GUEST_ERROR, + "%s: write to non-legacy register (0x%" + HWADDR_PRIx ") in legacy mode\n", + __func__, offset); + return; + } + proxy->vqs[vdev->queue_sel].used[0] = value; + break; + case VIRTIO_MMIO_QUEUE_USED_HIGH: + if (proxy->legacy) { + qemu_log_mask(LOG_GUEST_ERROR, + "%s: write to non-legacy register (0x%" + HWADDR_PRIx ") in legacy mode\n", + __func__, offset); + return; + } + proxy->vqs[vdev->queue_sel].used[1] = value; + break; case VIRTIO_MMIO_MAGIC_VALUE: case VIRTIO_MMIO_VERSION: case VIRTIO_MMIO_DEVICE_ID: @@ -303,20 +493,29 @@ static void virtio_mmio_write(void *opaque, hwaddr offset, uint64_t value, case VIRTIO_MMIO_DEVICE_FEATURES: case VIRTIO_MMIO_QUEUE_NUM_MAX: case VIRTIO_MMIO_INTERRUPT_STATUS: + case VIRTIO_MMIO_CONFIG_GENERATION: qemu_log_mask(LOG_GUEST_ERROR, - "%s: write to readonly register\n", - __func__); + "%s: write to read-only register (0x%" HWADDR_PRIx ")\n", + __func__, offset); break; default: - qemu_log_mask(LOG_GUEST_ERROR, "%s: bad register offset\n", __func__); + qemu_log_mask(LOG_GUEST_ERROR, + "%s: bad register offset (0x%" HWADDR_PRIx ")\n", + __func__, offset); } } +static const MemoryRegionOps virtio_legacy_mem_ops = { + .read = virtio_mmio_read, + .write = virtio_mmio_write, + .endianness = DEVICE_NATIVE_ENDIAN, +}; + static const MemoryRegionOps virtio_mem_ops = { .read = virtio_mmio_read, .write = virtio_mmio_write, - .endianness = DEVICE_NATIVE_ENDIAN, + .endianness = DEVICE_LITTLE_ENDIAN, }; static void virtio_mmio_update_irq(DeviceState *opaque, uint16_t vector) @@ -352,15 +551,90 @@ static void virtio_mmio_save_config(DeviceState *opaque, QEMUFile *f) qemu_put_be32(f, proxy->guest_page_shift); } +static const VMStateDescription vmstate_virtio_mmio_queue_state = { + .name = "virtio_mmio/queue_state", + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_UINT16(num, VirtIOMMIOQueue), + VMSTATE_BOOL(enabled, VirtIOMMIOQueue), + VMSTATE_UINT32_ARRAY(desc, VirtIOMMIOQueue, 2), + VMSTATE_UINT32_ARRAY(avail, VirtIOMMIOQueue, 2), + VMSTATE_UINT32_ARRAY(used, VirtIOMMIOQueue, 2), + VMSTATE_END_OF_LIST() + } +}; + +static const VMStateDescription vmstate_virtio_mmio_state_sub = { + .name = "virtio_mmio/state", + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_UINT32_ARRAY(guest_features, VirtIOMMIOProxy, 2), + VMSTATE_STRUCT_ARRAY(vqs, VirtIOMMIOProxy, VIRTIO_QUEUE_MAX, 0, + vmstate_virtio_mmio_queue_state, + VirtIOMMIOQueue), + VMSTATE_END_OF_LIST() + } +}; + +static const VMStateDescription vmstate_virtio_mmio = { + .name = "virtio_mmio", + .version_id = 1, + .minimum_version_id = 1, + .minimum_version_id_old = 1, + .fields = (VMStateField[]) { + VMSTATE_END_OF_LIST() + }, + .subsections = (const VMStateDescription * []) { + &vmstate_virtio_mmio_state_sub, + NULL + } +}; + +static void virtio_mmio_save_extra_state(DeviceState *opaque, QEMUFile *f) +{ + VirtIOMMIOProxy *proxy = VIRTIO_MMIO(opaque); + + vmstate_save_state(f, &vmstate_virtio_mmio, proxy, NULL); +} + +static int virtio_mmio_load_extra_state(DeviceState *opaque, QEMUFile *f) +{ + VirtIOMMIOProxy *proxy = VIRTIO_MMIO(opaque); + + return vmstate_load_state(f, &vmstate_virtio_mmio, proxy, 1); +} + +static bool virtio_mmio_has_extra_state(DeviceState *opaque) +{ + VirtIOMMIOProxy *proxy = VIRTIO_MMIO(opaque); + + return !proxy->legacy; +} + static void virtio_mmio_reset(DeviceState *d) { VirtIOMMIOProxy *proxy = VIRTIO_MMIO(d); + int i; virtio_mmio_stop_ioeventfd(proxy); virtio_bus_reset(&proxy->bus); proxy->host_features_sel = 0; proxy->guest_features_sel = 0; proxy->guest_page_shift = 0; + + if (!proxy->legacy) { + proxy->guest_features[0] = proxy->guest_features[1] = 0; + + for (i = 0; i < VIRTIO_QUEUE_MAX; i++) { + proxy->vqs[i].enabled = 0; + proxy->vqs[i].num = 0; + proxy->vqs[i].desc[0] = proxy->vqs[i].desc[1] = 0; + proxy->vqs[i].avail[0] = proxy->vqs[i].avail[1] = 0; + proxy->vqs[i].used[0] = proxy->vqs[i].used[1] = 0; + } + } } static int virtio_mmio_set_guest_notifier(DeviceState *d, int n, bool assign, @@ -423,11 +697,22 @@ assign_error: return r; } +static void virtio_mmio_pre_plugged(DeviceState *d, Error **errp) +{ + VirtIOMMIOProxy *proxy = VIRTIO_MMIO(d); + VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); + + if (!proxy->legacy) { + virtio_add_feature(&vdev->host_features, VIRTIO_F_VERSION_1); + } +} + /* virtio-mmio device */ static Property virtio_mmio_properties[] = { DEFINE_PROP_BOOL("format_transport_address", VirtIOMMIOProxy, format_transport_address, true), + DEFINE_PROP_BOOL("force-legacy", VirtIOMMIOProxy, legacy, true), DEFINE_PROP_END_OF_LIST(), }; @@ -439,8 +724,15 @@ static void virtio_mmio_realizefn(DeviceState *d, Error **errp) qbus_create_inplace(&proxy->bus, sizeof(proxy->bus), TYPE_VIRTIO_MMIO_BUS, d, NULL); sysbus_init_irq(sbd, &proxy->irq); - memory_region_init_io(&proxy->iomem, OBJECT(d), &virtio_mem_ops, proxy, - TYPE_VIRTIO_MMIO, 0x200); + if (proxy->legacy) { + memory_region_init_io(&proxy->iomem, OBJECT(d), + &virtio_legacy_mem_ops, proxy, + TYPE_VIRTIO_MMIO, 0x200); + } else { + memory_region_init_io(&proxy->iomem, OBJECT(d), + &virtio_mem_ops, proxy, + TYPE_VIRTIO_MMIO, 0x200); + } sysbus_init_mmio(sbd, &proxy->iomem); } @@ -511,9 +803,13 @@ static void virtio_mmio_bus_class_init(ObjectClass *klass, void *data) k->notify = virtio_mmio_update_irq; k->save_config = virtio_mmio_save_config; k->load_config = virtio_mmio_load_config; + k->save_extra_state = virtio_mmio_save_extra_state; + k->load_extra_state = virtio_mmio_load_extra_state; + k->has_extra_state = virtio_mmio_has_extra_state; k->set_guest_notifiers = virtio_mmio_set_guest_notifiers; k->ioeventfd_enabled = virtio_mmio_ioeventfd_enabled; k->ioeventfd_assign = virtio_mmio_ioeventfd_assign; + k->pre_plugged = virtio_mmio_pre_plugged; k->has_variable_vring_alignment = true; bus_class->max_dev = 1; bus_class->get_dev_path = virtio_mmio_bus_get_dev_path; diff --git a/include/hw/boards.h b/include/hw/boards.h index 2289536e48..be18a5c032 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h @@ -166,6 +166,13 @@ typedef struct { * The function pointer to hook different machine specific functions for * parsing "smp-opts" from QemuOpts to MachineState::CpuTopology and more * machine specific topology fields, such as smp_dies for PCMachine. + * @hotplug_allowed: + * If the hook is provided, then it'll be called for each device + * hotplug to check whether the device hotplug is allowed. Return + * true to grant allowance or false to reject the hotplug. When + * false is returned, an error must be set to show the reason of + * the rejection. If the hook is not provided, all hotplug will be + * allowed. */ struct MachineClass { /*< private >*/ @@ -224,6 +231,8 @@ struct MachineClass { HotplugHandler *(*get_hotplug_handler)(MachineState *machine, DeviceState *dev); + bool (*hotplug_allowed)(MachineState *state, DeviceState *dev, + Error **errp); CpuInstanceProperties (*cpu_index_to_instance_props)(MachineState *machine, unsigned cpu_index); const CPUArchIdList *(*possible_cpu_arch_ids)(MachineState *machine); diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h index de70b7a19a..aa123f88cb 100644 --- a/include/hw/qdev-core.h +++ b/include/hw/qdev-core.h @@ -280,6 +280,7 @@ void qdev_set_legacy_instance_id(DeviceState *dev, int alias_id, int required_for_version); HotplugHandler *qdev_get_bus_hotplug_handler(DeviceState *dev); HotplugHandler *qdev_get_machine_hotplug_handler(DeviceState *dev); +bool qdev_hotplug_allowed(DeviceState *dev, Error **errp); /** * qdev_get_hotplug_handler: Get handler responsible for device wiring * diff --git a/qdev-monitor.c b/qdev-monitor.c index 8fe5c2cad2..148df9cacf 100644 --- a/qdev-monitor.c +++ b/qdev-monitor.c @@ -615,6 +615,13 @@ DeviceState *qdev_device_add(QemuOpts *opts, Error **errp) /* create device */ dev = DEVICE(object_new(driver)); + /* Check whether the hotplug is allowed by the machine */ + if (qdev_hotplug && !qdev_hotplug_allowed(dev, &err)) { + /* Error must be set in the machine hook */ + assert(err); + goto err_del_dev; + } + if (bus) { qdev_set_parent_bus(dev, bus); } else if (qdev_hotplug && !qdev_get_machine_hotplug_handler(dev)) {