VFIO update 2021-03-16

* Fix "listerner" typo (Zenghui Yu)
 
  * Inclusive language and MAINTAINERS update (Philippe Mathieu-Daudé)
 
  * vIOMMU unmap notifier fixes (Eric Auger)
 
  * Migration fixes and optimizations (Shenming Lu)
 
  * Use host page size for dirty bitmap (Kunkun Jiang)
 
  * Use log_global_start/stop to switch dirty tracking (Keqian Zhu)
 -----BEGIN PGP SIGNATURE-----
 Version: GnuPG v2.0.14 (GNU/Linux)
 
 iQIcBAABAgAGBQJgUOPeAAoJECObm247sIsi8iwQAKytv1LP4r4dqQVjXRk6nl1h
 ITiFEj9gtN5EMZo6xqyH6F9pUYCHy+jziG/HYF3uA8+eyzFK9hCQnXVupFVahNCq
 CU1nsYDOemUwyS7rbhPhyZMfyt9oqoqvU5biXerztdLaxxL/w+Nf1V8Q50NnjGwO
 N65Ahrhr6gm7T++VFgvP4z7tI1ktFsgny2l9/I5OL1uDeSWStg+XA0YMQIhjfCEo
 C/qm4nnsfpI1K4B9O4suZAwLimsQD68qwoF1jlCfanpHCddutdpqMAMmWwee5igG
 pkfa/aohCQ1w2vXK1sGnFYDMIT3OdOXRwBIpCom50qdiGXtScTim2tdXr2uG50Ba
 BEjAn+MAjRIonzrm4RXv1lWMbdYlHCvMG4EJyv59WdWYRFGoDzP5Xs0Z97lcjvMT
 GQmWuCbyC6CHbTvnHroLs+5iQyWHnTS9tdWl0dixCdK4uzAzldm8OAqGKtN9c9bS
 xBvc8peQV/Kq4KBowfz+5YlrDMWQodB9GTtBQyjw3Ugr7aJFZU0cUdqfitsxAMVS
 cb4jTUQEKzGvTShZ7KuvJRQNB2JSHdFwTouZ24gKESPubHnnIIlUhDFwEjYGDU1x
 4SoMEwxtJ2kxK9Z20v2q1qiABmRmuNHXuCsysZqeHGeeUqa3fT5NmxUXWKwBTJ+R
 Ob69aL8usd+1RETSRpkj
 =wpJY
 -----END PGP SIGNATURE-----

Merge remote-tracking branch 'remotes/awilliam/tags/vfio-update-20210316.0' into staging

VFIO update 2021-03-16

 * Fix "listerner" typo (Zenghui Yu)

 * Inclusive language and MAINTAINERS update (Philippe Mathieu-Daudé)

 * vIOMMU unmap notifier fixes (Eric Auger)

 * Migration fixes and optimizations (Shenming Lu)

 * Use host page size for dirty bitmap (Kunkun Jiang)

 * Use log_global_start/stop to switch dirty tracking (Keqian Zhu)

# gpg: Signature made Tue 16 Mar 2021 16:59:10 GMT
# gpg:                using RSA key 239B9B6E3BB08B22
# gpg: Good signature from "Alex Williamson <alex.williamson@redhat.com>" [full]
# gpg:                 aka "Alex Williamson <alex@shazbot.org>" [full]
# gpg:                 aka "Alex Williamson <alwillia@redhat.com>" [full]
# gpg:                 aka "Alex Williamson <alex.l.williamson@gmail.com>" [full]
# Primary key fingerprint: 42F6 C04E 540B D1A9 9E7B  8A90 239B 9B6E 3BB0 8B22

* remotes/awilliam/tags/vfio-update-20210316.0:
  vfio/migrate: Move switch of dirty tracking into vfio_memory_listener
  vfio: Support host translation granule size
  vfio: Avoid disabling and enabling vectors repeatedly in VFIO migration
  vfio: Set the priority of the VFIO VM state change handler explicitly
  vfio: Move the saving of the config space to the right place in VFIO migration
  spapr_iommu: Fix vhost integration regression
  vfio: Do not register any IOMMU_NOTIFIER_DEVIOTLB_UNMAP notifier
  MAINTAINERS: Cover docs/igd-assign.txt in VFIO section
  hw/vfio/pci-quirks: Replace the word 'blacklist'
  vfio: Fix vfio_listener_log_sync function name typo

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
Peter Maydell 2021-03-17 19:30:13 +00:00
commit 2255564fd2
8 changed files with 119 additions and 95 deletions

View File

@ -1822,6 +1822,7 @@ M: Alex Williamson <alex.williamson@redhat.com>
S: Supported
F: hw/vfio/*
F: include/hw/vfio/
F: docs/igd-assign.txt
vfio-ccw
M: Cornelia Huck <cohuck@redhat.com>

View File

@ -212,6 +212,11 @@ static int spapr_tce_notify_flag_changed(IOMMUMemoryRegion *iommu,
{
struct SpaprTceTable *tbl = container_of(iommu, SpaprTceTable, iommu);
if (new & IOMMU_NOTIFIER_DEVIOTLB_UNMAP) {
error_setg(errp, "spart_tce does not support dev-iotlb yet");
return -EINVAL;
}
if (old == IOMMU_NOTIFIER_NONE && new != IOMMU_NOTIFIER_NONE) {
spapr_tce_set_need_vfio(tbl, true);
} else if (old != IOMMU_NOTIFIER_NONE && new == IOMMU_NOTIFIER_NONE) {

View File

@ -311,7 +311,7 @@ bool vfio_mig_active(void)
return true;
}
static bool vfio_devices_all_saving(VFIOContainer *container)
static bool vfio_devices_all_dirty_tracking(VFIOContainer *container)
{
VFIOGroup *group;
VFIODevice *vbasedev;
@ -329,13 +329,8 @@ static bool vfio_devices_all_saving(VFIOContainer *container)
return false;
}
if (migration->device_state & VFIO_DEVICE_STATE_SAVING) {
if ((vbasedev->pre_copy_dirty_page_tracking == ON_OFF_AUTO_OFF)
&& (migration->device_state & VFIO_DEVICE_STATE_RUNNING)) {
return false;
}
continue;
} else {
if ((vbasedev->pre_copy_dirty_page_tracking == ON_OFF_AUTO_OFF)
&& (migration->device_state & VFIO_DEVICE_STATE_RUNNING)) {
return false;
}
}
@ -378,7 +373,7 @@ static int vfio_dma_unmap_bitmap(VFIOContainer *container,
{
struct vfio_iommu_type1_dma_unmap *unmap;
struct vfio_bitmap *bitmap;
uint64_t pages = TARGET_PAGE_ALIGN(size) >> TARGET_PAGE_BITS;
uint64_t pages = REAL_HOST_PAGE_ALIGN(size) / qemu_real_host_page_size;
int ret;
unmap = g_malloc0(sizeof(*unmap) + sizeof(*bitmap));
@ -390,12 +385,12 @@ static int vfio_dma_unmap_bitmap(VFIOContainer *container,
bitmap = (struct vfio_bitmap *)&unmap->data;
/*
* cpu_physical_memory_set_dirty_lebitmap() expects pages in bitmap of
* TARGET_PAGE_SIZE to mark those dirty. Hence set bitmap_pgsize to
* TARGET_PAGE_SIZE.
* cpu_physical_memory_set_dirty_lebitmap() supports pages in bitmap of
* qemu_real_host_page_size to mark those dirty. Hence set bitmap_pgsize
* to qemu_real_host_page_size.
*/
bitmap->pgsize = TARGET_PAGE_SIZE;
bitmap->pgsize = qemu_real_host_page_size;
bitmap->size = ROUND_UP(pages, sizeof(__u64) * BITS_PER_BYTE) /
BITS_PER_BYTE;
@ -674,16 +669,17 @@ static void vfio_listener_region_add(MemoryListener *listener,
return;
}
if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) !=
(section->offset_within_region & ~TARGET_PAGE_MASK))) {
if (unlikely((section->offset_within_address_space &
~qemu_real_host_page_mask) !=
(section->offset_within_region & ~qemu_real_host_page_mask))) {
error_report("%s received unaligned region", __func__);
return;
}
iova = TARGET_PAGE_ALIGN(section->offset_within_address_space);
iova = REAL_HOST_PAGE_ALIGN(section->offset_within_address_space);
llend = int128_make64(section->offset_within_address_space);
llend = int128_add(llend, section->size);
llend = int128_and(llend, int128_exts64(TARGET_PAGE_MASK));
llend = int128_and(llend, int128_exts64(qemu_real_host_page_mask));
if (int128_ge(int128_make64(iova), llend)) {
return;
@ -787,7 +783,7 @@ static void vfio_listener_region_add(MemoryListener *listener,
iommu_idx = memory_region_iommu_attrs_to_index(iommu_mr,
MEMTXATTRS_UNSPECIFIED);
iommu_notifier_init(&giommu->n, vfio_iommu_map_notify,
IOMMU_NOTIFIER_ALL,
IOMMU_NOTIFIER_IOTLB_EVENTS,
section->offset_within_region,
int128_get64(llend),
iommu_idx);
@ -892,8 +888,9 @@ static void vfio_listener_region_del(MemoryListener *listener,
return;
}
if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) !=
(section->offset_within_region & ~TARGET_PAGE_MASK))) {
if (unlikely((section->offset_within_address_space &
~qemu_real_host_page_mask) !=
(section->offset_within_region & ~qemu_real_host_page_mask))) {
error_report("%s received unaligned region", __func__);
return;
}
@ -921,10 +918,10 @@ static void vfio_listener_region_del(MemoryListener *listener,
*/
}
iova = TARGET_PAGE_ALIGN(section->offset_within_address_space);
iova = REAL_HOST_PAGE_ALIGN(section->offset_within_address_space);
llend = int128_make64(section->offset_within_address_space);
llend = int128_add(llend, section->size);
llend = int128_and(llend, int128_exts64(TARGET_PAGE_MASK));
llend = int128_and(llend, int128_exts64(qemu_real_host_page_mask));
if (int128_ge(int128_make64(iova), llend)) {
return;
@ -987,6 +984,40 @@ static void vfio_listener_region_del(MemoryListener *listener,
}
}
static void vfio_set_dirty_page_tracking(VFIOContainer *container, bool start)
{
int ret;
struct vfio_iommu_type1_dirty_bitmap dirty = {
.argsz = sizeof(dirty),
};
if (start) {
dirty.flags = VFIO_IOMMU_DIRTY_PAGES_FLAG_START;
} else {
dirty.flags = VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP;
}
ret = ioctl(container->fd, VFIO_IOMMU_DIRTY_PAGES, &dirty);
if (ret) {
error_report("Failed to set dirty tracking flag 0x%x errno: %d",
dirty.flags, errno);
}
}
static void vfio_listener_log_global_start(MemoryListener *listener)
{
VFIOContainer *container = container_of(listener, VFIOContainer, listener);
vfio_set_dirty_page_tracking(container, true);
}
static void vfio_listener_log_global_stop(MemoryListener *listener)
{
VFIOContainer *container = container_of(listener, VFIOContainer, listener);
vfio_set_dirty_page_tracking(container, false);
}
static int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova,
uint64_t size, ram_addr_t ram_addr)
{
@ -1004,13 +1035,13 @@ static int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova,
range->size = size;
/*
* cpu_physical_memory_set_dirty_lebitmap() expects pages in bitmap of
* TARGET_PAGE_SIZE to mark those dirty. Hence set bitmap's pgsize to
* TARGET_PAGE_SIZE.
* cpu_physical_memory_set_dirty_lebitmap() supports pages in bitmap of
* qemu_real_host_page_size to mark those dirty. Hence set bitmap's pgsize
* to qemu_real_host_page_size.
*/
range->bitmap.pgsize = TARGET_PAGE_SIZE;
range->bitmap.pgsize = qemu_real_host_page_size;
pages = TARGET_PAGE_ALIGN(range->size) >> TARGET_PAGE_BITS;
pages = REAL_HOST_PAGE_ALIGN(range->size) / qemu_real_host_page_size;
range->bitmap.size = ROUND_UP(pages, sizeof(__u64) * BITS_PER_BYTE) /
BITS_PER_BYTE;
range->bitmap.data = g_try_malloc0(range->bitmap.size);
@ -1114,11 +1145,11 @@ static int vfio_sync_dirty_bitmap(VFIOContainer *container,
section->offset_within_region;
return vfio_get_dirty_bitmap(container,
TARGET_PAGE_ALIGN(section->offset_within_address_space),
int128_get64(section->size), ram_addr);
REAL_HOST_PAGE_ALIGN(section->offset_within_address_space),
int128_get64(section->size), ram_addr);
}
static void vfio_listerner_log_sync(MemoryListener *listener,
static void vfio_listener_log_sync(MemoryListener *listener,
MemoryRegionSection *section)
{
VFIOContainer *container = container_of(listener, VFIOContainer, listener);
@ -1128,7 +1159,7 @@ static void vfio_listerner_log_sync(MemoryListener *listener,
return;
}
if (vfio_devices_all_saving(container)) {
if (vfio_devices_all_dirty_tracking(container)) {
vfio_sync_dirty_bitmap(container, section);
}
}
@ -1136,7 +1167,9 @@ static void vfio_listerner_log_sync(MemoryListener *listener,
static const MemoryListener vfio_memory_listener = {
.region_add = vfio_listener_region_add,
.region_del = vfio_listener_region_del,
.log_sync = vfio_listerner_log_sync,
.log_global_start = vfio_listener_log_global_start,
.log_global_stop = vfio_listener_log_global_stop,
.log_sync = vfio_listener_log_sync,
};
static void vfio_listener_release(VFIOContainer *container)
@ -1655,10 +1688,10 @@ static void vfio_get_iommu_info_migration(VFIOContainer *container,
header);
/*
* cpu_physical_memory_set_dirty_lebitmap() expects pages in bitmap of
* TARGET_PAGE_SIZE to mark those dirty.
* cpu_physical_memory_set_dirty_lebitmap() supports pages in bitmap of
* qemu_real_host_page_size to mark those dirty.
*/
if (cap_mig->pgsize_bitmap & TARGET_PAGE_SIZE) {
if (cap_mig->pgsize_bitmap & qemu_real_host_page_size) {
container->dirty_pages_supported = true;
container->max_dirty_bitmap_size = cap_mig->max_dirty_bitmap_size;
container->dirty_pgsizes = cap_mig->pgsize_bitmap;

View File

@ -395,40 +395,10 @@ static int vfio_load_device_config_state(QEMUFile *f, void *opaque)
return qemu_file_get_error(f);
}
static int vfio_set_dirty_page_tracking(VFIODevice *vbasedev, bool start)
{
int ret;
VFIOMigration *migration = vbasedev->migration;
VFIOContainer *container = vbasedev->group->container;
struct vfio_iommu_type1_dirty_bitmap dirty = {
.argsz = sizeof(dirty),
};
if (start) {
if (migration->device_state & VFIO_DEVICE_STATE_SAVING) {
dirty.flags = VFIO_IOMMU_DIRTY_PAGES_FLAG_START;
} else {
return -EINVAL;
}
} else {
dirty.flags = VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP;
}
ret = ioctl(container->fd, VFIO_IOMMU_DIRTY_PAGES, &dirty);
if (ret) {
error_report("Failed to set dirty tracking flag 0x%x errno: %d",
dirty.flags, errno);
return -errno;
}
return ret;
}
static void vfio_migration_cleanup(VFIODevice *vbasedev)
{
VFIOMigration *migration = vbasedev->migration;
vfio_set_dirty_page_tracking(vbasedev, false);
if (migration->region.mmaps) {
vfio_region_unmap(&migration->region);
}
@ -469,11 +439,6 @@ static int vfio_save_setup(QEMUFile *f, void *opaque)
return ret;
}
ret = vfio_set_dirty_page_tracking(vbasedev, true);
if (ret) {
return ret;
}
qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE);
ret = qemu_file_get_error(f);
@ -575,11 +540,6 @@ static int vfio_save_complete_precopy(QEMUFile *f, void *opaque)
return ret;
}
ret = vfio_save_device_config_state(f, opaque);
if (ret) {
return ret;
}
ret = vfio_update_pending(vbasedev);
if (ret) {
return ret;
@ -620,6 +580,19 @@ static int vfio_save_complete_precopy(QEMUFile *f, void *opaque)
return ret;
}
static void vfio_save_state(QEMUFile *f, void *opaque)
{
VFIODevice *vbasedev = opaque;
int ret;
ret = vfio_save_device_config_state(f, opaque);
if (ret) {
error_report("%s: Failed to save device config space",
vbasedev->name);
qemu_file_set_error(f, ret);
}
}
static int vfio_load_setup(QEMUFile *f, void *opaque)
{
VFIODevice *vbasedev = opaque;
@ -670,11 +643,7 @@ static int vfio_load_state(QEMUFile *f, void *opaque, int version_id)
switch (data) {
case VFIO_MIG_FLAG_DEV_CONFIG_STATE:
{
ret = vfio_load_device_config_state(f, opaque);
if (ret) {
return ret;
}
break;
return vfio_load_device_config_state(f, opaque);
}
case VFIO_MIG_FLAG_DEV_SETUP_STATE:
{
@ -720,6 +689,7 @@ static SaveVMHandlers savevm_vfio_handlers = {
.save_live_pending = vfio_save_pending,
.save_live_iterate = vfio_save_iterate,
.save_live_complete_precopy = vfio_save_complete_precopy,
.save_state = vfio_save_state,
.load_setup = vfio_load_setup,
.load_cleanup = vfio_load_cleanup,
.load_state = vfio_load_state,
@ -857,7 +827,8 @@ static int vfio_migration_init(VFIODevice *vbasedev,
register_savevm_live(id, VMSTATE_INSTANCE_ID_ANY, 1, &savevm_vfio_handlers,
vbasedev);
migration->vm_state = qemu_add_vm_change_state_handler(vfio_vmstate_change,
migration->vm_state = qdev_add_vm_change_state_handler(vbasedev->dev,
vfio_vmstate_change,
vbasedev);
migration->migration_state.notify = vfio_migration_state_notifier;
add_migration_state_change_notifier(&migration->migration_state);

View File

@ -44,19 +44,19 @@
static const struct {
uint32_t vendor;
uint32_t device;
} romblacklist[] = {
} rom_denylist[] = {
{ 0x14e4, 0x168e }, /* Broadcom BCM 57810 */
};
bool vfio_blacklist_opt_rom(VFIOPCIDevice *vdev)
bool vfio_opt_rom_in_denylist(VFIOPCIDevice *vdev)
{
int i;
for (i = 0 ; i < ARRAY_SIZE(romblacklist); i++) {
if (vfio_pci_is(vdev, romblacklist[i].vendor, romblacklist[i].device)) {
trace_vfio_quirk_rom_blacklisted(vdev->vbasedev.name,
romblacklist[i].vendor,
romblacklist[i].device);
for (i = 0 ; i < ARRAY_SIZE(rom_denylist); i++) {
if (vfio_pci_is(vdev, rom_denylist[i].vendor, rom_denylist[i].device)) {
trace_vfio_quirk_rom_in_denylist(vdev->vbasedev.name,
rom_denylist[i].vendor,
rom_denylist[i].device);
return true;
}
}

View File

@ -569,6 +569,9 @@ static void vfio_msix_vector_release(PCIDevice *pdev, unsigned int nr)
static void vfio_msix_enable(VFIOPCIDevice *vdev)
{
PCIDevice *pdev = &vdev->pdev;
unsigned int nr, max_vec = 0;
vfio_disable_interrupts(vdev);
vdev->msi_vectors = g_new0(VFIOMSIVector, vdev->msix->entries);
@ -587,11 +590,22 @@ static void vfio_msix_enable(VFIOPCIDevice *vdev)
* triggering to userspace, then immediately release the vector, leaving
* the physical device with no vectors enabled, but MSI-X enabled, just
* like the guest view.
* If there are already unmasked vectors (in migration resume phase and
* some guest startups) which will be enabled soon, we can allocate all
* of them here to avoid inefficiently disabling and enabling vectors
* repeatedly later.
*/
vfio_msix_vector_do_use(&vdev->pdev, 0, NULL, NULL);
vfio_msix_vector_release(&vdev->pdev, 0);
if (!pdev->msix_function_masked) {
for (nr = 0; nr < msix_nr_vectors_allocated(pdev); nr++) {
if (!msix_is_masked(pdev, nr)) {
max_vec = nr;
}
}
}
vfio_msix_vector_do_use(pdev, max_vec, NULL, NULL);
vfio_msix_vector_release(pdev, max_vec);
if (msix_set_vector_notifiers(&vdev->pdev, vfio_msix_vector_use,
if (msix_set_vector_notifiers(pdev, vfio_msix_vector_use,
vfio_msix_vector_release, NULL)) {
error_report("vfio: msix_set_vector_notifiers failed");
}
@ -900,7 +914,7 @@ static void vfio_pci_size_rom(VFIOPCIDevice *vdev)
if (vdev->pdev.romfile || !vdev->pdev.rom_bar) {
/* Since pci handles romfile, just print a message and return */
if (vfio_blacklist_opt_rom(vdev) && vdev->pdev.romfile) {
if (vfio_opt_rom_in_denylist(vdev) && vdev->pdev.romfile) {
warn_report("Device at %s is known to cause system instability"
" issues during option rom execution",
vdev->vbasedev.name);
@ -927,7 +941,7 @@ static void vfio_pci_size_rom(VFIOPCIDevice *vdev)
return;
}
if (vfio_blacklist_opt_rom(vdev)) {
if (vfio_opt_rom_in_denylist(vdev)) {
if (dev->opts && qemu_opt_get(dev->opts, "rombar")) {
warn_report("Device at %s is known to cause system instability"
" issues during option rom execution",

View File

@ -197,7 +197,7 @@ void vfio_pci_write_config(PCIDevice *pdev,
uint64_t vfio_vga_read(void *opaque, hwaddr addr, unsigned size);
void vfio_vga_write(void *opaque, hwaddr addr, uint64_t data, unsigned size);
bool vfio_blacklist_opt_rom(VFIOPCIDevice *vdev);
bool vfio_opt_rom_in_denylist(VFIOPCIDevice *vdev);
void vfio_vga_quirk_setup(VFIOPCIDevice *vdev);
void vfio_vga_quirk_exit(VFIOPCIDevice *vdev);
void vfio_vga_quirk_finalize(VFIOPCIDevice *vdev);

View File

@ -49,7 +49,7 @@ vfio_pci_emulated_sub_vendor_id(const char *name, uint16_t val) "%s 0x%04x"
vfio_pci_emulated_sub_device_id(const char *name, uint16_t val) "%s 0x%04x"
# pci-quirks.c
vfio_quirk_rom_blacklisted(const char *name, uint16_t vid, uint16_t did) "%s %04x:%04x"
vfio_quirk_rom_in_denylist(const char *name, uint16_t vid, uint16_t did) "%s %04x:%04x"
vfio_quirk_generic_window_address_write(const char *name, const char * region_name, uint64_t data) "%s %s 0x%"PRIx64
vfio_quirk_generic_window_data_read(const char *name, const char * region_name, uint64_t data) "%s %s 0x%"PRIx64
vfio_quirk_generic_window_data_write(const char *name, const char * region_name, uint64_t data) "%s %s 0x%"PRIx64