intel_iommu: enable remote IOTLB
This patch is based on Aviv Ben-David (<bd.aviv@gmail.com>)'s patch upstream: "IOMMU: enable intel_iommu map and unmap notifiers" https://lists.gnu.org/archive/html/qemu-devel/2016-11/msg01453.html However I removed/fixed some content, and added my own codes. Instead of translate() every page for iotlb invalidations (which is slower), we walk the pages when needed and notify in a hook function. This patch enables vfio devices for VT-d emulation. And, since we already have vhost DMAR support via device-iotlb, a natural benefit that this patch brings is that vt-d enabled vhost can live even without ATS capability now. Though more tests are needed. Signed-off-by: Aviv Ben-David <bdaviv@cs.technion.ac.il> Reviewed-by: Jason Wang <jasowang@redhat.com> Reviewed-by: David Gibson <david@gibson.dropbear.id.au> Reviewed-by: \"Michael S. Tsirkin\" <mst@redhat.com> Signed-off-by: Peter Xu <peterx@redhat.com> Message-Id: <1491562755-23867-10-git-send-email-peterx@redhat.com> Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
This commit is contained in:
parent
558e0024a4
commit
dd4d607e40
@ -806,7 +806,8 @@ next:
|
||||
* @private: private data for the hook function
|
||||
*/
|
||||
static int vtd_page_walk(VTDContextEntry *ce, uint64_t start, uint64_t end,
|
||||
vtd_page_walk_hook hook_fn, void *private)
|
||||
vtd_page_walk_hook hook_fn, void *private,
|
||||
bool notify_unmap)
|
||||
{
|
||||
dma_addr_t addr = vtd_get_slpt_base_from_context(ce);
|
||||
uint32_t level = vtd_get_level_from_context_entry(ce);
|
||||
@ -821,7 +822,7 @@ static int vtd_page_walk(VTDContextEntry *ce, uint64_t start, uint64_t end,
|
||||
}
|
||||
|
||||
return vtd_page_walk_level(addr, start, end, hook_fn, private,
|
||||
level, true, true, false);
|
||||
level, true, true, notify_unmap);
|
||||
}
|
||||
|
||||
/* Map a device to its corresponding domain (context-entry) */
|
||||
@ -1038,6 +1039,15 @@ static void vtd_interrupt_remap_table_setup(IntelIOMMUState *s)
|
||||
s->intr_root, s->intr_size);
|
||||
}
|
||||
|
||||
static void vtd_iommu_replay_all(IntelIOMMUState *s)
|
||||
{
|
||||
IntelIOMMUNotifierNode *node;
|
||||
|
||||
QLIST_FOREACH(node, &s->notifiers_list, next) {
|
||||
memory_region_iommu_replay_all(&node->vtd_as->iommu);
|
||||
}
|
||||
}
|
||||
|
||||
static void vtd_context_global_invalidate(IntelIOMMUState *s)
|
||||
{
|
||||
trace_vtd_inv_desc_cc_global();
|
||||
@ -1045,6 +1055,14 @@ static void vtd_context_global_invalidate(IntelIOMMUState *s)
|
||||
if (s->context_cache_gen == VTD_CONTEXT_CACHE_GEN_MAX) {
|
||||
vtd_reset_context_cache(s);
|
||||
}
|
||||
/*
|
||||
* From VT-d spec 6.5.2.1, a global context entry invalidation
|
||||
* should be followed by a IOTLB global invalidation, so we should
|
||||
* be safe even without this. Hoewever, let's replay the region as
|
||||
* well to be safer, and go back here when we need finer tunes for
|
||||
* VT-d emulation codes.
|
||||
*/
|
||||
vtd_iommu_replay_all(s);
|
||||
}
|
||||
|
||||
|
||||
@ -1111,6 +1129,16 @@ static void vtd_context_device_invalidate(IntelIOMMUState *s,
|
||||
trace_vtd_inv_desc_cc_device(bus_n, VTD_PCI_SLOT(devfn_it),
|
||||
VTD_PCI_FUNC(devfn_it));
|
||||
vtd_as->context_cache_entry.context_cache_gen = 0;
|
||||
/*
|
||||
* So a device is moving out of (or moving into) a
|
||||
* domain, a replay() suites here to notify all the
|
||||
* IOMMU_NOTIFIER_MAP registers about this change.
|
||||
* This won't bring bad even if we have no such
|
||||
* notifier registered - the IOMMU notification
|
||||
* framework will skip MAP notifications if that
|
||||
* happened.
|
||||
*/
|
||||
memory_region_iommu_replay_all(&vtd_as->iommu);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1152,12 +1180,53 @@ static void vtd_iotlb_global_invalidate(IntelIOMMUState *s)
|
||||
{
|
||||
trace_vtd_iotlb_reset("global invalidation recved");
|
||||
vtd_reset_iotlb(s);
|
||||
vtd_iommu_replay_all(s);
|
||||
}
|
||||
|
||||
static void vtd_iotlb_domain_invalidate(IntelIOMMUState *s, uint16_t domain_id)
|
||||
{
|
||||
IntelIOMMUNotifierNode *node;
|
||||
VTDContextEntry ce;
|
||||
VTDAddressSpace *vtd_as;
|
||||
|
||||
g_hash_table_foreach_remove(s->iotlb, vtd_hash_remove_by_domain,
|
||||
&domain_id);
|
||||
|
||||
QLIST_FOREACH(node, &s->notifiers_list, next) {
|
||||
vtd_as = node->vtd_as;
|
||||
if (!vtd_dev_to_context_entry(s, pci_bus_num(vtd_as->bus),
|
||||
vtd_as->devfn, &ce) &&
|
||||
domain_id == VTD_CONTEXT_ENTRY_DID(ce.hi)) {
|
||||
memory_region_iommu_replay_all(&vtd_as->iommu);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static int vtd_page_invalidate_notify_hook(IOMMUTLBEntry *entry,
|
||||
void *private)
|
||||
{
|
||||
memory_region_notify_iommu((MemoryRegion *)private, *entry);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void vtd_iotlb_page_invalidate_notify(IntelIOMMUState *s,
|
||||
uint16_t domain_id, hwaddr addr,
|
||||
uint8_t am)
|
||||
{
|
||||
IntelIOMMUNotifierNode *node;
|
||||
VTDContextEntry ce;
|
||||
int ret;
|
||||
|
||||
QLIST_FOREACH(node, &(s->notifiers_list), next) {
|
||||
VTDAddressSpace *vtd_as = node->vtd_as;
|
||||
ret = vtd_dev_to_context_entry(s, pci_bus_num(vtd_as->bus),
|
||||
vtd_as->devfn, &ce);
|
||||
if (!ret && domain_id == VTD_CONTEXT_ENTRY_DID(ce.hi)) {
|
||||
vtd_page_walk(&ce, addr, addr + (1 << am) * VTD_PAGE_SIZE,
|
||||
vtd_page_invalidate_notify_hook,
|
||||
(void *)&vtd_as->iommu, true);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void vtd_iotlb_page_invalidate(IntelIOMMUState *s, uint16_t domain_id,
|
||||
@ -1170,6 +1239,7 @@ static void vtd_iotlb_page_invalidate(IntelIOMMUState *s, uint16_t domain_id,
|
||||
info.addr = addr;
|
||||
info.mask = ~((1 << am) - 1);
|
||||
g_hash_table_foreach_remove(s->iotlb, vtd_hash_remove_by_page, &info);
|
||||
vtd_iotlb_page_invalidate_notify(s, domain_id, addr, am);
|
||||
}
|
||||
|
||||
/* Flush IOTLB
|
||||
@ -2187,15 +2257,33 @@ static void vtd_iommu_notify_flag_changed(MemoryRegion *iommu,
|
||||
IOMMUNotifierFlag new)
|
||||
{
|
||||
VTDAddressSpace *vtd_as = container_of(iommu, VTDAddressSpace, iommu);
|
||||
IntelIOMMUState *s = vtd_as->iommu_state;
|
||||
IntelIOMMUNotifierNode *node = NULL;
|
||||
IntelIOMMUNotifierNode *next_node = NULL;
|
||||
|
||||
if (new & IOMMU_NOTIFIER_MAP) {
|
||||
error_report("Device at bus %s addr %02x.%d requires iommu "
|
||||
"notifier which is currently not supported by "
|
||||
"intel-iommu emulation",
|
||||
vtd_as->bus->qbus.name, PCI_SLOT(vtd_as->devfn),
|
||||
PCI_FUNC(vtd_as->devfn));
|
||||
if (!s->caching_mode && new & IOMMU_NOTIFIER_MAP) {
|
||||
error_report("We need to set cache_mode=1 for intel-iommu to enable "
|
||||
"device assignment with IOMMU protection.");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (old == IOMMU_NOTIFIER_NONE) {
|
||||
node = g_malloc0(sizeof(*node));
|
||||
node->vtd_as = vtd_as;
|
||||
QLIST_INSERT_HEAD(&s->notifiers_list, node, next);
|
||||
return;
|
||||
}
|
||||
|
||||
/* update notifier node with new flags */
|
||||
QLIST_FOREACH_SAFE(node, &s->notifiers_list, next, next_node) {
|
||||
if (node->vtd_as == vtd_as) {
|
||||
if (new == IOMMU_NOTIFIER_NONE) {
|
||||
QLIST_REMOVE(node, next);
|
||||
g_free(node);
|
||||
}
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static const VMStateDescription vtd_vmstate = {
|
||||
@ -2613,6 +2701,74 @@ VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus, int devfn)
|
||||
return vtd_dev_as;
|
||||
}
|
||||
|
||||
/* Unmap the whole range in the notifier's scope. */
|
||||
static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n)
|
||||
{
|
||||
IOMMUTLBEntry entry;
|
||||
hwaddr size;
|
||||
hwaddr start = n->start;
|
||||
hwaddr end = n->end;
|
||||
|
||||
/*
|
||||
* Note: all the codes in this function has a assumption that IOVA
|
||||
* bits are no more than VTD_MGAW bits (which is restricted by
|
||||
* VT-d spec), otherwise we need to consider overflow of 64 bits.
|
||||
*/
|
||||
|
||||
if (end > VTD_ADDRESS_SIZE) {
|
||||
/*
|
||||
* Don't need to unmap regions that is bigger than the whole
|
||||
* VT-d supported address space size
|
||||
*/
|
||||
end = VTD_ADDRESS_SIZE;
|
||||
}
|
||||
|
||||
assert(start <= end);
|
||||
size = end - start;
|
||||
|
||||
if (ctpop64(size) != 1) {
|
||||
/*
|
||||
* This size cannot format a correct mask. Let's enlarge it to
|
||||
* suite the minimum available mask.
|
||||
*/
|
||||
int n = 64 - clz64(size);
|
||||
if (n > VTD_MGAW) {
|
||||
/* should not happen, but in case it happens, limit it */
|
||||
n = VTD_MGAW;
|
||||
}
|
||||
size = 1ULL << n;
|
||||
}
|
||||
|
||||
entry.target_as = &address_space_memory;
|
||||
/* Adjust iova for the size */
|
||||
entry.iova = n->start & ~(size - 1);
|
||||
/* This field is meaningless for unmap */
|
||||
entry.translated_addr = 0;
|
||||
entry.perm = IOMMU_NONE;
|
||||
entry.addr_mask = size - 1;
|
||||
|
||||
trace_vtd_as_unmap_whole(pci_bus_num(as->bus),
|
||||
VTD_PCI_SLOT(as->devfn),
|
||||
VTD_PCI_FUNC(as->devfn),
|
||||
entry.iova, size);
|
||||
|
||||
memory_region_notify_one(n, &entry);
|
||||
}
|
||||
|
||||
static void vtd_address_space_unmap_all(IntelIOMMUState *s)
|
||||
{
|
||||
IntelIOMMUNotifierNode *node;
|
||||
VTDAddressSpace *vtd_as;
|
||||
IOMMUNotifier *n;
|
||||
|
||||
QLIST_FOREACH(node, &s->notifiers_list, next) {
|
||||
vtd_as = node->vtd_as;
|
||||
IOMMU_NOTIFIER_FOREACH(n, &vtd_as->iommu) {
|
||||
vtd_address_space_unmap(vtd_as, n);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static int vtd_replay_hook(IOMMUTLBEntry *entry, void *private)
|
||||
{
|
||||
memory_region_notify_one((IOMMUNotifier *)private, entry);
|
||||
@ -2626,16 +2782,19 @@ static void vtd_iommu_replay(MemoryRegion *mr, IOMMUNotifier *n)
|
||||
uint8_t bus_n = pci_bus_num(vtd_as->bus);
|
||||
VTDContextEntry ce;
|
||||
|
||||
/*
|
||||
* The replay can be triggered by either a invalidation or a newly
|
||||
* created entry. No matter what, we release existing mappings
|
||||
* (it means flushing caches for UNMAP-only registers).
|
||||
*/
|
||||
vtd_address_space_unmap(vtd_as, n);
|
||||
|
||||
if (vtd_dev_to_context_entry(s, bus_n, vtd_as->devfn, &ce) == 0) {
|
||||
/*
|
||||
* Scanned a valid context entry, walk over the pages and
|
||||
* notify when needed.
|
||||
*/
|
||||
trace_vtd_replay_ce_valid(bus_n, PCI_SLOT(vtd_as->devfn),
|
||||
PCI_FUNC(vtd_as->devfn),
|
||||
VTD_CONTEXT_ENTRY_DID(ce.hi),
|
||||
ce.hi, ce.lo);
|
||||
vtd_page_walk(&ce, 0, ~0ULL, vtd_replay_hook, (void *)n);
|
||||
vtd_page_walk(&ce, 0, ~0ULL, vtd_replay_hook, (void *)n, false);
|
||||
} else {
|
||||
trace_vtd_replay_ce_invalid(bus_n, PCI_SLOT(vtd_as->devfn),
|
||||
PCI_FUNC(vtd_as->devfn));
|
||||
@ -2754,6 +2913,11 @@ static void vtd_reset(DeviceState *dev)
|
||||
|
||||
VTD_DPRINTF(GENERAL, "");
|
||||
vtd_init(s);
|
||||
|
||||
/*
|
||||
* When device reset, throw away all mappings and external caches
|
||||
*/
|
||||
vtd_address_space_unmap_all(s);
|
||||
}
|
||||
|
||||
static AddressSpace *vtd_host_dma_iommu(PCIBus *bus, void *opaque, int devfn)
|
||||
@ -2817,6 +2981,7 @@ static void vtd_realize(DeviceState *dev, Error **errp)
|
||||
return;
|
||||
}
|
||||
|
||||
QLIST_INIT(&s->notifiers_list);
|
||||
memset(s->vtd_as_by_bus_num, 0, sizeof(s->vtd_as_by_bus_num));
|
||||
memory_region_init_io(&s->csrmem, OBJECT(s), &vtd_mem_ops, s,
|
||||
"intel_iommu", DMAR_REG_SIZE);
|
||||
|
@ -197,6 +197,7 @@
|
||||
#define VTD_DOMAIN_ID_MASK ((1UL << VTD_DOMAIN_ID_SHIFT) - 1)
|
||||
#define VTD_CAP_ND (((VTD_DOMAIN_ID_SHIFT - 4) / 2) & 7ULL)
|
||||
#define VTD_MGAW 39 /* Maximum Guest Address Width */
|
||||
#define VTD_ADDRESS_SIZE (1ULL << VTD_MGAW)
|
||||
#define VTD_CAP_MGAW (((VTD_MGAW - 1) & 0x3fULL) << 16)
|
||||
#define VTD_MAMV 18ULL
|
||||
#define VTD_CAP_MAMV (VTD_MAMV << 48)
|
||||
|
@ -37,6 +37,7 @@ vtd_page_walk_skip_read(uint64_t iova, uint64_t next) "Page walk skip iova 0x%"P
|
||||
vtd_page_walk_skip_perm(uint64_t iova, uint64_t next) "Page walk skip iova 0x%"PRIx64" - 0x%"PRIx64" due to perm empty"
|
||||
vtd_page_walk_skip_reserve(uint64_t iova, uint64_t next) "Page walk skip iova 0x%"PRIx64" - 0x%"PRIx64" due to rsrv set"
|
||||
vtd_switch_address_space(uint8_t bus, uint8_t slot, uint8_t fn, bool on) "Device %02x:%02x.%x switching address space (iommu enabled=%d)"
|
||||
vtd_as_unmap_whole(uint8_t bus, uint8_t slot, uint8_t fn, uint64_t iova, uint64_t size) "Device %02x:%02x.%x start 0x%"PRIx64" size 0x%"PRIx64
|
||||
|
||||
# hw/i386/amd_iommu.c
|
||||
amdvi_evntlog_fail(uint64_t addr, uint32_t head) "error: fail to write at addr 0x%"PRIx64" + offset 0x%"PRIx32
|
||||
|
@ -63,6 +63,7 @@ typedef union VTD_IR_TableEntry VTD_IR_TableEntry;
|
||||
typedef union VTD_IR_MSIAddress VTD_IR_MSIAddress;
|
||||
typedef struct VTDIrq VTDIrq;
|
||||
typedef struct VTD_MSIMessage VTD_MSIMessage;
|
||||
typedef struct IntelIOMMUNotifierNode IntelIOMMUNotifierNode;
|
||||
|
||||
/* Context-Entry */
|
||||
struct VTDContextEntry {
|
||||
@ -249,6 +250,11 @@ struct VTD_MSIMessage {
|
||||
/* When IR is enabled, all MSI/MSI-X data bits should be zero */
|
||||
#define VTD_IR_MSI_DATA (0)
|
||||
|
||||
struct IntelIOMMUNotifierNode {
|
||||
VTDAddressSpace *vtd_as;
|
||||
QLIST_ENTRY(IntelIOMMUNotifierNode) next;
|
||||
};
|
||||
|
||||
/* The iommu (DMAR) device state struct */
|
||||
struct IntelIOMMUState {
|
||||
X86IOMMUState x86_iommu;
|
||||
@ -286,6 +292,8 @@ struct IntelIOMMUState {
|
||||
MemoryRegionIOMMUOps iommu_ops;
|
||||
GHashTable *vtd_as_by_busptr; /* VTDBus objects indexed by PCIBus* reference */
|
||||
VTDBus *vtd_as_by_bus_num[VTD_PCI_BUS_MAX]; /* VTDBus objects indexed by bus number */
|
||||
/* list of registered notifiers */
|
||||
QLIST_HEAD(, IntelIOMMUNotifierNode) notifiers_list;
|
||||
|
||||
/* interrupt remapping */
|
||||
bool intr_enabled; /* Whether guest enabled IR */
|
||||
|
Loading…
Reference in New Issue
Block a user