virtio-iommu: Add bypass mode support to assigned device
Currently assigned devices can not work in virtio-iommu bypass mode. Guest driver fails to probe the device due to DMA failure. And the reason is because of lacking GPA -> HPA mappings when VM is created. Add a root container memory region to hold both bypass memory region and iommu memory region, so the switch between them is supported just like the implementation in virtual VT-d. Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com> Message-Id: <20220613061010.2674054-2-zhenzhong.duan@intel.com> Reviewed-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
This commit is contained in:
parent
b595d6272e
commit
90519b9053
@ -124,6 +124,7 @@ virtio_iommu_remap(const char *name, uint64_t virt_start, uint64_t virt_end, uin
|
||||
virtio_iommu_set_page_size_mask(const char *name, uint64_t old, uint64_t new) "mr=%s old_mask=0x%"PRIx64" new_mask=0x%"PRIx64
|
||||
virtio_iommu_notify_flag_add(const char *name) "add notifier to mr %s"
|
||||
virtio_iommu_notify_flag_del(const char *name) "del notifier from mr %s"
|
||||
virtio_iommu_switch_address_space(uint8_t bus, uint8_t slot, uint8_t fn, bool on) "Device %02x:%02x.%x switching address space (iommu enabled=%d)"
|
||||
|
||||
# virtio-mem.c
|
||||
virtio_mem_send_response(uint16_t type) "type=%" PRIu16
|
||||
|
@ -69,6 +69,77 @@ static inline uint16_t virtio_iommu_get_bdf(IOMMUDevice *dev)
|
||||
return PCI_BUILD_BDF(pci_bus_num(dev->bus), dev->devfn);
|
||||
}
|
||||
|
||||
static bool virtio_iommu_device_bypassed(IOMMUDevice *sdev)
|
||||
{
|
||||
uint32_t sid;
|
||||
bool bypassed;
|
||||
VirtIOIOMMU *s = sdev->viommu;
|
||||
VirtIOIOMMUEndpoint *ep;
|
||||
|
||||
sid = virtio_iommu_get_bdf(sdev);
|
||||
|
||||
qemu_mutex_lock(&s->mutex);
|
||||
/* need to check bypass before system reset */
|
||||
if (!s->endpoints) {
|
||||
bypassed = s->config.bypass;
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
ep = g_tree_lookup(s->endpoints, GUINT_TO_POINTER(sid));
|
||||
if (!ep || !ep->domain) {
|
||||
bypassed = s->config.bypass;
|
||||
} else {
|
||||
bypassed = ep->domain->bypass;
|
||||
}
|
||||
|
||||
unlock:
|
||||
qemu_mutex_unlock(&s->mutex);
|
||||
return bypassed;
|
||||
}
|
||||
|
||||
/* Return whether the device is using IOMMU translation. */
|
||||
static bool virtio_iommu_switch_address_space(IOMMUDevice *sdev)
|
||||
{
|
||||
bool use_remapping;
|
||||
|
||||
assert(sdev);
|
||||
|
||||
use_remapping = !virtio_iommu_device_bypassed(sdev);
|
||||
|
||||
trace_virtio_iommu_switch_address_space(pci_bus_num(sdev->bus),
|
||||
PCI_SLOT(sdev->devfn),
|
||||
PCI_FUNC(sdev->devfn),
|
||||
use_remapping);
|
||||
|
||||
/* Turn off first then on the other */
|
||||
if (use_remapping) {
|
||||
memory_region_set_enabled(&sdev->bypass_mr, false);
|
||||
memory_region_set_enabled(MEMORY_REGION(&sdev->iommu_mr), true);
|
||||
} else {
|
||||
memory_region_set_enabled(MEMORY_REGION(&sdev->iommu_mr), false);
|
||||
memory_region_set_enabled(&sdev->bypass_mr, true);
|
||||
}
|
||||
|
||||
return use_remapping;
|
||||
}
|
||||
|
||||
static void virtio_iommu_switch_address_space_all(VirtIOIOMMU *s)
|
||||
{
|
||||
GHashTableIter iter;
|
||||
IOMMUPciBus *iommu_pci_bus;
|
||||
int i;
|
||||
|
||||
g_hash_table_iter_init(&iter, s->as_by_busptr);
|
||||
while (g_hash_table_iter_next(&iter, NULL, (void **)&iommu_pci_bus)) {
|
||||
for (i = 0; i < PCI_DEVFN_MAX; i++) {
|
||||
if (!iommu_pci_bus->pbdev[i]) {
|
||||
continue;
|
||||
}
|
||||
virtio_iommu_switch_address_space(iommu_pci_bus->pbdev[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* The bus number is used for lookup when SID based operations occur.
|
||||
* In that case we lazily populate the IOMMUPciBus array from the bus hash
|
||||
@ -213,6 +284,7 @@ static gboolean virtio_iommu_notify_map_cb(gpointer key, gpointer value,
|
||||
static void virtio_iommu_detach_endpoint_from_domain(VirtIOIOMMUEndpoint *ep)
|
||||
{
|
||||
VirtIOIOMMUDomain *domain = ep->domain;
|
||||
IOMMUDevice *sdev = container_of(ep->iommu_mr, IOMMUDevice, iommu_mr);
|
||||
|
||||
if (!ep->domain) {
|
||||
return;
|
||||
@ -221,6 +293,7 @@ static void virtio_iommu_detach_endpoint_from_domain(VirtIOIOMMUEndpoint *ep)
|
||||
ep->iommu_mr);
|
||||
QLIST_REMOVE(ep, next);
|
||||
ep->domain = NULL;
|
||||
virtio_iommu_switch_address_space(sdev);
|
||||
}
|
||||
|
||||
static VirtIOIOMMUEndpoint *virtio_iommu_get_endpoint(VirtIOIOMMU *s,
|
||||
@ -323,12 +396,39 @@ static AddressSpace *virtio_iommu_find_add_as(PCIBus *bus, void *opaque,
|
||||
|
||||
trace_virtio_iommu_init_iommu_mr(name);
|
||||
|
||||
memory_region_init(&sdev->root, OBJECT(s), name, UINT64_MAX);
|
||||
address_space_init(&sdev->as, &sdev->root, TYPE_VIRTIO_IOMMU);
|
||||
|
||||
/*
|
||||
* Build the IOMMU disabled container with aliases to the
|
||||
* shared MRs. Note that aliasing to a shared memory region
|
||||
* could help the memory API to detect same FlatViews so we
|
||||
* can have devices to share the same FlatView when in bypass
|
||||
* mode. (either by not configuring virtio-iommu driver or with
|
||||
* "iommu=pt"). It will greatly reduce the total number of
|
||||
* FlatViews of the system hence VM runs faster.
|
||||
*/
|
||||
memory_region_init_alias(&sdev->bypass_mr, OBJECT(s),
|
||||
"system", get_system_memory(), 0,
|
||||
memory_region_size(get_system_memory()));
|
||||
|
||||
memory_region_init_iommu(&sdev->iommu_mr, sizeof(sdev->iommu_mr),
|
||||
TYPE_VIRTIO_IOMMU_MEMORY_REGION,
|
||||
OBJECT(s), name,
|
||||
UINT64_MAX);
|
||||
address_space_init(&sdev->as,
|
||||
MEMORY_REGION(&sdev->iommu_mr), TYPE_VIRTIO_IOMMU);
|
||||
|
||||
/*
|
||||
* Hook both the containers under the root container, we
|
||||
* switch between iommu & bypass MRs by enable/disable
|
||||
* corresponding sub-containers
|
||||
*/
|
||||
memory_region_add_subregion_overlap(&sdev->root, 0,
|
||||
MEMORY_REGION(&sdev->iommu_mr),
|
||||
0);
|
||||
memory_region_add_subregion_overlap(&sdev->root, 0,
|
||||
&sdev->bypass_mr, 0);
|
||||
|
||||
virtio_iommu_switch_address_space(sdev);
|
||||
g_free(name);
|
||||
}
|
||||
return &sdev->as;
|
||||
@ -342,6 +442,7 @@ static int virtio_iommu_attach(VirtIOIOMMU *s,
|
||||
uint32_t flags = le32_to_cpu(req->flags);
|
||||
VirtIOIOMMUDomain *domain;
|
||||
VirtIOIOMMUEndpoint *ep;
|
||||
IOMMUDevice *sdev;
|
||||
|
||||
trace_virtio_iommu_attach(domain_id, ep_id);
|
||||
|
||||
@ -375,6 +476,8 @@ static int virtio_iommu_attach(VirtIOIOMMU *s,
|
||||
QLIST_INSERT_HEAD(&domain->endpoint_list, ep, next);
|
||||
|
||||
ep->domain = domain;
|
||||
sdev = container_of(ep->iommu_mr, IOMMUDevice, iommu_mr);
|
||||
virtio_iommu_switch_address_space(sdev);
|
||||
|
||||
/* Replay domain mappings on the associated memory region */
|
||||
g_tree_foreach(domain->mappings, virtio_iommu_notify_map_cb,
|
||||
@ -887,6 +990,7 @@ static void virtio_iommu_set_config(VirtIODevice *vdev,
|
||||
return;
|
||||
}
|
||||
dev_config->bypass = in_config->bypass;
|
||||
virtio_iommu_switch_address_space_all(dev);
|
||||
}
|
||||
|
||||
trace_virtio_iommu_set_config(in_config->bypass);
|
||||
@ -1026,6 +1130,8 @@ static void virtio_iommu_system_reset(void *opaque)
|
||||
* system reset
|
||||
*/
|
||||
s->config.bypass = s->boot_bypass;
|
||||
virtio_iommu_switch_address_space_all(s);
|
||||
|
||||
}
|
||||
|
||||
static void virtio_iommu_device_realize(DeviceState *dev, Error **errp)
|
||||
@ -1041,6 +1147,11 @@ static void virtio_iommu_device_realize(DeviceState *dev, Error **errp)
|
||||
virtio_iommu_handle_command);
|
||||
s->event_vq = virtio_add_queue(vdev, VIOMMU_DEFAULT_QUEUE_SIZE, NULL);
|
||||
|
||||
/*
|
||||
* config.bypass is needed to get initial address space early, such as
|
||||
* in vfio realize
|
||||
*/
|
||||
s->config.bypass = s->boot_bypass;
|
||||
s->config.page_size_mask = TARGET_PAGE_MASK;
|
||||
s->config.input_range.end = UINT64_MAX;
|
||||
s->config.domain_range.end = UINT32_MAX;
|
||||
|
@ -37,6 +37,8 @@ typedef struct IOMMUDevice {
|
||||
int devfn;
|
||||
IOMMUMemoryRegion iommu_mr;
|
||||
AddressSpace as;
|
||||
MemoryRegion root; /* The root container of the device */
|
||||
MemoryRegion bypass_mr; /* The alias of shared memory MR */
|
||||
} IOMMUDevice;
|
||||
|
||||
typedef struct IOMMUPciBus {
|
||||
|
Loading…
Reference in New Issue
Block a user