diff --git a/hw/vfio/common.c b/hw/vfio/common.c index 63831eab78..811502dbc9 100644 --- a/hw/vfio/common.c +++ b/hw/vfio/common.c @@ -1325,11 +1325,96 @@ static int vfio_set_dirty_page_tracking(VFIOContainer *container, bool start) return ret; } +typedef struct VFIODirtyRanges { + hwaddr min32; + hwaddr max32; + hwaddr min64; + hwaddr max64; +} VFIODirtyRanges; + +typedef struct VFIODirtyRangesListener { + VFIOContainer *container; + VFIODirtyRanges ranges; + MemoryListener listener; +} VFIODirtyRangesListener; + +static void vfio_dirty_tracking_update(MemoryListener *listener, + MemoryRegionSection *section) +{ + VFIODirtyRangesListener *dirty = container_of(listener, + VFIODirtyRangesListener, + listener); + VFIODirtyRanges *range = &dirty->ranges; + hwaddr iova, end, *min, *max; + + if (!vfio_listener_valid_section(section, "tracking_update") || + !vfio_get_section_iova_range(dirty->container, section, + &iova, &end, NULL)) { + return; + } + + /* + * The address space passed to the dirty tracker is reduced to two ranges: + * one for 32-bit DMA ranges, and another one for 64-bit DMA ranges. + * The underlying reports of dirty will query a sub-interval of each of + * these ranges. + * + * The purpose of the dual range handling is to handle known cases of big + * holes in the address space, like the x86 AMD 1T hole. The alternative + * would be an IOVATree but that has a much bigger runtime overhead and + * unnecessary complexity. + */ + min = (end <= UINT32_MAX) ? &range->min32 : &range->min64; + max = (end <= UINT32_MAX) ? &range->max32 : &range->max64; + + if (*min > iova) { + *min = iova; + } + if (*max < end) { + *max = end; + } + + trace_vfio_device_dirty_tracking_update(iova, end, *min, *max); + return; +} + +static const MemoryListener vfio_dirty_tracking_listener = { + .name = "vfio-tracking", + .region_add = vfio_dirty_tracking_update, +}; + +static void vfio_dirty_tracking_init(VFIOContainer *container, + VFIODirtyRanges *ranges) +{ + VFIODirtyRangesListener dirty; + + memset(&dirty, 0, sizeof(dirty)); + dirty.ranges.min32 = UINT32_MAX; + dirty.ranges.min64 = UINT64_MAX; + dirty.listener = vfio_dirty_tracking_listener; + dirty.container = container; + + memory_listener_register(&dirty.listener, + container->space->as); + + *ranges = dirty.ranges; + + /* + * The memory listener is synchronous, and used to calculate the range + * to dirty tracking. Unregister it after we are done as we are not + * interested in any follow-up updates. + */ + memory_listener_unregister(&dirty.listener); +} + static void vfio_listener_log_global_start(MemoryListener *listener) { VFIOContainer *container = container_of(listener, VFIOContainer, listener); + VFIODirtyRanges ranges; int ret; + vfio_dirty_tracking_init(container, &ranges); + ret = vfio_set_dirty_page_tracking(container, true); if (ret) { vfio_set_migration_error(ret); diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events index 7173e6a5c7..dd9fd7b9bd 100644 --- a/hw/vfio/trace-events +++ b/hw/vfio/trace-events @@ -103,6 +103,7 @@ vfio_listener_region_add_ram(uint64_t iova_start, uint64_t iova_end, void *vaddr vfio_known_safe_misalignment(const char *name, uint64_t iova, uint64_t offset_within_region, uintptr_t page_size) "Region \"%s\" iova=0x%"PRIx64" offset_within_region=0x%"PRIx64" qemu_real_host_page_size=0x%"PRIxPTR vfio_listener_region_add_no_dma_map(const char *name, uint64_t iova, uint64_t size, uint64_t page_size) "Region \"%s\" 0x%"PRIx64" size=0x%"PRIx64" is not aligned to 0x%"PRIx64" and cannot be mapped for DMA" vfio_listener_region_del(uint64_t start, uint64_t end) "region_del 0x%"PRIx64" - 0x%"PRIx64 +vfio_device_dirty_tracking_update(uint64_t start, uint64_t end, uint64_t min, uint64_t max) "section 0x%"PRIx64" - 0x%"PRIx64" -> update [0x%"PRIx64" - 0x%"PRIx64"]" vfio_disconnect_container(int fd) "close container->fd=%d" vfio_put_group(int fd) "close group->fd=%d" vfio_get_device(const char * name, unsigned int flags, unsigned int num_regions, unsigned int num_irqs) "Device %s flags: %u, regions: %u, irqs: %u"