virtio, pci: fixes
A couple of bugfixes. Signed-off-by: Michael S. Tsirkin <mst@redhat.com> -----BEGIN PGP SIGNATURE----- iQEcBAABAgAGBQJdwrXTAAoJECgfDbjSjVRpLPMH/18Ktl9xtmm4mpWtunKFIHc2 w74Xvfah3IBkJMzuOpu3CltvHqS9nqK8/l89qESwhAsidad58meB5UJeTCXGBYZq E7m3BHX/NbpAPWozKj+HDE1G4Ik0OO+7SPNXLkxjx1zQ9g0fgzcCQSeYgyNxjQO1 v852aL9dyJVykHhOfhQQsT2JTJm9n0f1l/aMSJIkJQgG8xIEALSrT7Od6lqvQjz5 /CqFgFxM+h1iymXXEgaa6RlodzOAIsoQ8dJ51M43MauyxwAlY5wO4CTWRrL/1ctH zEBPYVzjiUTFANSgx0jmksO7Enisx17zbPwbrcUWFNkIAw4Y2A/jm0WHP2uPFcY= =Mw2o -----END PGP SIGNATURE----- Merge remote-tracking branch 'remotes/mst/tags/for_upstream' into staging virtio, pci: fixes A couple of bugfixes. Signed-off-by: Michael S. Tsirkin <mst@redhat.com> # gpg: Signature made Wed 06 Nov 2019 12:00:19 GMT # gpg: using RSA key 281F0DB8D28D5469 # gpg: Good signature from "Michael S. Tsirkin <mst@kernel.org>" [full] # gpg: aka "Michael S. Tsirkin <mst@redhat.com>" [full] # Primary key fingerprint: 0270 606B 6F3C DF3D 0B17 0970 C350 3912 AFBE 8E67 # Subkey fingerprint: 5D09 FD08 71C8 F85B 94CA 8A0D 281F 0DB8 D28D 5469 * remotes/mst/tags/for_upstream: virtio: notify virtqueue via host notifier when available hw/i386: AMD-Vi IVRS DMA alias support pci: Use PCI aliases when determining device IOMMU address space Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
commit
1c5880e785
@ -2517,12 +2517,105 @@ build_dmar_q35(GArray *table_data, BIOSLinker *linker)
|
||||
*/
|
||||
#define IOAPIC_SB_DEVID (uint64_t)PCI_BUILD_BDF(0, PCI_DEVFN(0x14, 0))
|
||||
|
||||
/*
|
||||
* Insert IVHD entry for device and recurse, insert alias, or insert range as
|
||||
* necessary for the PCI topology.
|
||||
*/
|
||||
static void
|
||||
insert_ivhd(PCIBus *bus, PCIDevice *dev, void *opaque)
|
||||
{
|
||||
GArray *table_data = opaque;
|
||||
uint32_t entry;
|
||||
|
||||
/* "Select" IVHD entry, type 0x2 */
|
||||
entry = PCI_BUILD_BDF(pci_bus_num(bus), dev->devfn) << 8 | 0x2;
|
||||
build_append_int_noprefix(table_data, entry, 4);
|
||||
|
||||
if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_BRIDGE)) {
|
||||
PCIBus *sec_bus = pci_bridge_get_sec_bus(PCI_BRIDGE(dev));
|
||||
uint8_t sec = pci_bus_num(sec_bus);
|
||||
uint8_t sub = dev->config[PCI_SUBORDINATE_BUS];
|
||||
|
||||
if (pci_bus_is_express(sec_bus)) {
|
||||
/*
|
||||
* Walk the bus if there are subordinates, otherwise use a range
|
||||
* to cover an entire leaf bus. We could potentially also use a
|
||||
* range for traversed buses, but we'd need to take care not to
|
||||
* create both Select and Range entries covering the same device.
|
||||
* This is easier and potentially more compact.
|
||||
*
|
||||
* An example bare metal system seems to use Select entries for
|
||||
* root ports without a slot (ie. built-ins) and Range entries
|
||||
* when there is a slot. The same system also only hard-codes
|
||||
* the alias range for an onboard PCIe-to-PCI bridge, apparently
|
||||
* making no effort to support nested bridges. We attempt to
|
||||
* be more thorough here.
|
||||
*/
|
||||
if (sec == sub) { /* leaf bus */
|
||||
/* "Start of Range" IVHD entry, type 0x3 */
|
||||
entry = PCI_BUILD_BDF(sec, PCI_DEVFN(0, 0)) << 8 | 0x3;
|
||||
build_append_int_noprefix(table_data, entry, 4);
|
||||
/* "End of Range" IVHD entry, type 0x4 */
|
||||
entry = PCI_BUILD_BDF(sub, PCI_DEVFN(31, 7)) << 8 | 0x4;
|
||||
build_append_int_noprefix(table_data, entry, 4);
|
||||
} else {
|
||||
pci_for_each_device(sec_bus, sec, insert_ivhd, table_data);
|
||||
}
|
||||
} else {
|
||||
/*
|
||||
* If the secondary bus is conventional, then we need to create an
|
||||
* Alias range for everything downstream. The range covers the
|
||||
* first devfn on the secondary bus to the last devfn on the
|
||||
* subordinate bus. The alias target depends on legacy versus
|
||||
* express bridges, just as in pci_device_iommu_address_space().
|
||||
* DeviceIDa vs DeviceIDb as per the AMD IOMMU spec.
|
||||
*/
|
||||
uint16_t dev_id_a, dev_id_b;
|
||||
|
||||
dev_id_a = PCI_BUILD_BDF(sec, PCI_DEVFN(0, 0));
|
||||
|
||||
if (pci_is_express(dev) &&
|
||||
pcie_cap_get_type(dev) == PCI_EXP_TYPE_PCI_BRIDGE) {
|
||||
dev_id_b = dev_id_a;
|
||||
} else {
|
||||
dev_id_b = PCI_BUILD_BDF(pci_bus_num(bus), dev->devfn);
|
||||
}
|
||||
|
||||
/* "Alias Start of Range" IVHD entry, type 0x43, 8 bytes */
|
||||
build_append_int_noprefix(table_data, dev_id_a << 8 | 0x43, 4);
|
||||
build_append_int_noprefix(table_data, dev_id_b << 8 | 0x0, 4);
|
||||
|
||||
/* "End of Range" IVHD entry, type 0x4 */
|
||||
entry = PCI_BUILD_BDF(sub, PCI_DEVFN(31, 7)) << 8 | 0x4;
|
||||
build_append_int_noprefix(table_data, entry, 4);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* For all PCI host bridges, walk and insert IVHD entries */
|
||||
static int
|
||||
ivrs_host_bridges(Object *obj, void *opaque)
|
||||
{
|
||||
GArray *ivhd_blob = opaque;
|
||||
|
||||
if (object_dynamic_cast(obj, TYPE_PCI_HOST_BRIDGE)) {
|
||||
PCIBus *bus = PCI_HOST_BRIDGE(obj)->bus;
|
||||
|
||||
if (bus) {
|
||||
pci_for_each_device(bus, pci_bus_num(bus), insert_ivhd, ivhd_blob);
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void
|
||||
build_amd_iommu(GArray *table_data, BIOSLinker *linker)
|
||||
{
|
||||
int ivhd_table_len = 28;
|
||||
int ivhd_table_len = 24;
|
||||
int iommu_start = table_data->len;
|
||||
AMDVIState *s = AMD_IOMMU_DEVICE(x86_iommu_get_default());
|
||||
GArray *ivhd_blob = g_array_new(false, true, 1);
|
||||
|
||||
/* IVRS header */
|
||||
acpi_data_push(table_data, sizeof(AcpiTableHeader));
|
||||
@ -2543,6 +2636,27 @@ build_amd_iommu(GArray *table_data, BIOSLinker *linker)
|
||||
(1UL << 7), /* PPRSup */
|
||||
1);
|
||||
|
||||
/*
|
||||
* A PCI bus walk, for each PCI host bridge, is necessary to create a
|
||||
* complete set of IVHD entries. Do this into a separate blob so that we
|
||||
* can calculate the total IVRS table length here and then append the new
|
||||
* blob further below. Fall back to an entry covering all devices, which
|
||||
* is sufficient when no aliases are present.
|
||||
*/
|
||||
object_child_foreach_recursive(object_get_root(),
|
||||
ivrs_host_bridges, ivhd_blob);
|
||||
|
||||
if (!ivhd_blob->len) {
|
||||
/*
|
||||
* Type 1 device entry reporting all devices
|
||||
* These are 4-byte device entries currently reporting the range of
|
||||
* Refer to Spec - Table 95:IVHD Device Entry Type Codes(4-byte)
|
||||
*/
|
||||
build_append_int_noprefix(ivhd_blob, 0x0000001, 4);
|
||||
}
|
||||
|
||||
ivhd_table_len += ivhd_blob->len;
|
||||
|
||||
/*
|
||||
* When interrupt remapping is supported, we add a special IVHD device
|
||||
* for type IO-APIC.
|
||||
@ -2550,6 +2664,7 @@ build_amd_iommu(GArray *table_data, BIOSLinker *linker)
|
||||
if (x86_iommu_ir_supported(x86_iommu_get_default())) {
|
||||
ivhd_table_len += 8;
|
||||
}
|
||||
|
||||
/* IVHD length */
|
||||
build_append_int_noprefix(table_data, ivhd_table_len, 2);
|
||||
/* DeviceID */
|
||||
@ -2569,12 +2684,10 @@ build_amd_iommu(GArray *table_data, BIOSLinker *linker)
|
||||
(1UL << 2) | /* GTSup */
|
||||
(1UL << 6), /* GASup */
|
||||
4);
|
||||
/*
|
||||
* Type 1 device entry reporting all devices
|
||||
* These are 4-byte device entries currently reporting the range of
|
||||
* Refer to Spec - Table 95:IVHD Device Entry Type Codes(4-byte)
|
||||
*/
|
||||
build_append_int_noprefix(table_data, 0x0000001, 4);
|
||||
|
||||
/* IVHD entries as found above */
|
||||
g_array_append_vals(table_data, ivhd_blob->data, ivhd_blob->len);
|
||||
g_array_free(ivhd_blob, TRUE);
|
||||
|
||||
/*
|
||||
* Add a special IVHD device type.
|
||||
|
43
hw/pci/pci.c
43
hw/pci/pci.c
@ -2646,12 +2646,49 @@ AddressSpace *pci_device_iommu_address_space(PCIDevice *dev)
|
||||
{
|
||||
PCIBus *bus = pci_get_bus(dev);
|
||||
PCIBus *iommu_bus = bus;
|
||||
uint8_t devfn = dev->devfn;
|
||||
|
||||
while(iommu_bus && !iommu_bus->iommu_fn && iommu_bus->parent_dev) {
|
||||
iommu_bus = pci_get_bus(iommu_bus->parent_dev);
|
||||
while (iommu_bus && !iommu_bus->iommu_fn && iommu_bus->parent_dev) {
|
||||
PCIBus *parent_bus = pci_get_bus(iommu_bus->parent_dev);
|
||||
|
||||
/*
|
||||
* The requester ID of the provided device may be aliased, as seen from
|
||||
* the IOMMU, due to topology limitations. The IOMMU relies on a
|
||||
* requester ID to provide a unique AddressSpace for devices, but
|
||||
* conventional PCI buses pre-date such concepts. Instead, the PCIe-
|
||||
* to-PCI bridge creates and accepts transactions on behalf of down-
|
||||
* stream devices. When doing so, all downstream devices are masked
|
||||
* (aliased) behind a single requester ID. The requester ID used
|
||||
* depends on the format of the bridge devices. Proper PCIe-to-PCI
|
||||
* bridges, with a PCIe capability indicating such, follow the
|
||||
* guidelines of chapter 2.3 of the PCIe-to-PCI/X bridge specification,
|
||||
* where the bridge uses the seconary bus as the bridge portion of the
|
||||
* requester ID and devfn of 00.0. For other bridges, typically those
|
||||
* found on the root complex such as the dmi-to-pci-bridge, we follow
|
||||
* the convention of typical bare-metal hardware, which uses the
|
||||
* requester ID of the bridge itself. There are device specific
|
||||
* exceptions to these rules, but these are the defaults that the
|
||||
* Linux kernel uses when determining DMA aliases itself and believed
|
||||
* to be true for the bare metal equivalents of the devices emulated
|
||||
* in QEMU.
|
||||
*/
|
||||
if (!pci_bus_is_express(iommu_bus)) {
|
||||
PCIDevice *parent = iommu_bus->parent_dev;
|
||||
|
||||
if (pci_is_express(parent) &&
|
||||
pcie_cap_get_type(parent) == PCI_EXP_TYPE_PCI_BRIDGE) {
|
||||
devfn = PCI_DEVFN(0, 0);
|
||||
bus = iommu_bus;
|
||||
} else {
|
||||
devfn = parent->devfn;
|
||||
bus = parent_bus;
|
||||
}
|
||||
}
|
||||
|
||||
iommu_bus = parent_bus;
|
||||
}
|
||||
if (iommu_bus && iommu_bus->iommu_fn) {
|
||||
return iommu_bus->iommu_fn(bus, iommu_bus->iommu_opaque, dev->devfn);
|
||||
return iommu_bus->iommu_fn(bus, iommu_bus->iommu_opaque, devfn);
|
||||
}
|
||||
return &address_space_memory;
|
||||
}
|
||||
|
@ -288,6 +288,10 @@ int virtio_bus_set_host_notifier(VirtioBusState *bus, int n, bool assign)
|
||||
k->ioeventfd_assign(proxy, notifier, n, false);
|
||||
}
|
||||
|
||||
if (r == 0) {
|
||||
virtio_queue_set_host_notifier_enabled(vq, assign);
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
|
@ -128,6 +128,7 @@ struct VirtQueue
|
||||
VirtIODevice *vdev;
|
||||
EventNotifier guest_notifier;
|
||||
EventNotifier host_notifier;
|
||||
bool host_notifier_enabled;
|
||||
QLIST_ENTRY(VirtQueue) node;
|
||||
};
|
||||
|
||||
@ -2271,7 +2272,7 @@ void virtio_queue_notify(VirtIODevice *vdev, int n)
|
||||
}
|
||||
|
||||
trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
|
||||
if (vq->handle_aio_output) {
|
||||
if (vq->host_notifier_enabled) {
|
||||
event_notifier_set(&vq->host_notifier);
|
||||
} else if (vq->handle_output) {
|
||||
vq->handle_output(vdev, vq);
|
||||
@ -3145,6 +3146,7 @@ void virtio_init(VirtIODevice *vdev, const char *name,
|
||||
vdev->vq[i].vector = VIRTIO_NO_VECTOR;
|
||||
vdev->vq[i].vdev = vdev;
|
||||
vdev->vq[i].queue_index = i;
|
||||
vdev->vq[i].host_notifier_enabled = false;
|
||||
}
|
||||
|
||||
vdev->name = name;
|
||||
@ -3436,6 +3438,11 @@ EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq)
|
||||
return &vq->host_notifier;
|
||||
}
|
||||
|
||||
void virtio_queue_set_host_notifier_enabled(VirtQueue *vq, bool enabled)
|
||||
{
|
||||
vq->host_notifier_enabled = enabled;
|
||||
}
|
||||
|
||||
int virtio_queue_set_host_notifier_mr(VirtIODevice *vdev, int n,
|
||||
MemoryRegion *mr, bool assign)
|
||||
{
|
||||
|
@ -312,6 +312,7 @@ int virtio_device_grab_ioeventfd(VirtIODevice *vdev);
|
||||
void virtio_device_release_ioeventfd(VirtIODevice *vdev);
|
||||
bool virtio_device_ioeventfd_enabled(VirtIODevice *vdev);
|
||||
EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq);
|
||||
void virtio_queue_set_host_notifier_enabled(VirtQueue *vq, bool enabled);
|
||||
void virtio_queue_host_notifier_read(EventNotifier *n);
|
||||
void virtio_queue_aio_set_host_notifier_handler(VirtQueue *vq, AioContext *ctx,
|
||||
VirtIOHandleAIOOutput handle_output);
|
||||
|
Loading…
Reference in New Issue
Block a user