From 07ceaf98800519ef9c5dc893af00f1fe1f9144e4 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Tue, 28 Apr 2015 11:14:02 -0600 Subject: [PATCH 1/3] vfio-pci: Further fix BAR size overflow In an analysis by Laszlo, the resulting type of our calculation for the end of the MSI-X table, and thus the start of memory after the table, is uint32_t. We're therefore not correctly preventing the corner case overflow that we intended to fix here where a BAR >=4G could place the MSI-X table to end exactly at the 4G boundary. The MSI-X table offset is defined by the hardware spec to 32bits, so we simply use a cast rather than changing data structure types. This scenario is purely theoretically, typically the MSI-X table is located at the front of the BAR. Reported-by: Laszlo Ersek Reviewed-by: Laszlo Ersek Signed-off-by: Alex Williamson --- hw/vfio/pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index cd15b20990..495f5fda8e 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -2400,7 +2400,7 @@ static void vfio_map_bar(VFIOPCIDevice *vdev, int nr) if (vdev->msix && vdev->msix->table_bar == nr) { uint64_t start; - start = HOST_PAGE_ALIGN(vdev->msix->table_offset + + start = HOST_PAGE_ALIGN((uint64_t)vdev->msix->table_offset + (vdev->msix->entries * PCI_MSIX_ENTRY_SIZE)); size = start < bar->region.size ? bar->region.size - start : 0; From c6d231e2fd3773ef9a566ca24962f2314cb78f73 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Tue, 28 Apr 2015 11:14:02 -0600 Subject: [PATCH 2/3] vfio-pci: Fix error path sign This is an impossible error path due to the fact that we're reading a kernel provided, rather than user provided link, which will certainly always fit in PATH_MAX. Currently it returns a fixed 26 char path plus %d group number, which typically maxes out at double digits. However, the caller of the initfn certainly expects a less-than zero return value on error, not just a non-zero value. Therefore we should correct the sign here. Reported-by: Laszlo Ersek Reviewed-by: Laszlo Ersek Signed-off-by: Alex Williamson --- hw/vfio/pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index 495f5fda8e..576c677be3 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -3358,7 +3358,7 @@ static int vfio_initfn(PCIDevice *pdev) len = readlink(path, iommu_group_path, sizeof(path)); if (len <= 0 || len >= sizeof(path)) { error_report("vfio: error no iommu_group for device"); - return len < 0 ? -errno : ENAMETOOLONG; + return len < 0 ? -errno : -ENAMETOOLONG; } iommu_group_path[len] = 0; From 5655f931abcfa5f100d12d021eaed606c2d4ef52 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Tue, 28 Apr 2015 11:14:02 -0600 Subject: [PATCH 3/3] vfio-pci: Reset workaround for AMD Bonaire and Hawaii GPUs Somehow these GPUs manage not to respond to a PCI bus reset, removing our primary mechanism for resetting graphics cards. The result is that these devices typically work well for a single VM boot. If the VM is rebooted or restarted, the guest driver is not able to init the card from the dirty state, resulting in a blue screen for Windows guests. The workaround is to use a device specific reset. This is not 100% reliable though since it depends on the incoming state of the device, but it substantially improves the usability of these devices in a VM. Credit to Alex Deucher for his guidance. Signed-off-by: Alex Williamson --- hw/vfio/pci.c | 162 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 162 insertions(+) diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index 576c677be3..e0e339a534 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -154,6 +154,7 @@ typedef struct VFIOPCIDevice { PCIHostDeviceAddress host; EventNotifier err_notifier; EventNotifier req_notifier; + int (*resetfn)(struct VFIOPCIDevice *); uint32_t features; #define VFIO_FEATURE_ENABLE_VGA_BIT 0 #define VFIO_FEATURE_ENABLE_VGA (1 << VFIO_FEATURE_ENABLE_VGA_BIT) @@ -3325,6 +3326,162 @@ static void vfio_unregister_req_notifier(VFIOPCIDevice *vdev) vdev->req_enabled = false; } +/* + * AMD Radeon PCI config reset, based on Linux: + * drivers/gpu/drm/radeon/ci_smc.c:ci_is_smc_running() + * drivers/gpu/drm/radeon/radeon_device.c:radeon_pci_config_reset + * drivers/gpu/drm/radeon/ci_smc.c:ci_reset_smc() + * drivers/gpu/drm/radeon/ci_smc.c:ci_stop_smc_clock() + * IDs: include/drm/drm_pciids.h + * Registers: http://cgit.freedesktop.org/~agd5f/linux/commit/?id=4e2aa447f6f0 + * + * Bonaire and Hawaii GPUs do not respond to a bus reset. This is a bug in the + * hardware that should be fixed on future ASICs. The symptom of this is that + * once the accerlated driver loads, Windows guests will bsod on subsequent + * attmpts to load the driver, such as after VM reset or shutdown/restart. To + * work around this, we do an AMD specific PCI config reset, followed by an SMC + * reset. The PCI config reset only works if SMC firmware is running, so we + * have a dependency on the state of the device as to whether this reset will + * be effective. There are still cases where we won't be able to kick the + * device into working, but this greatly improves the usability overall. The + * config reset magic is relatively common on AMD GPUs, but the setup and SMC + * poking is largely ASIC specific. + */ +static bool vfio_radeon_smc_is_running(VFIOPCIDevice *vdev) +{ + uint32_t clk, pc_c; + + /* + * Registers 200h and 204h are index and data registers for acessing + * indirect configuration registers within the device. + */ + vfio_region_write(&vdev->bars[5].region, 0x200, 0x80000004, 4); + clk = vfio_region_read(&vdev->bars[5].region, 0x204, 4); + vfio_region_write(&vdev->bars[5].region, 0x200, 0x80000370, 4); + pc_c = vfio_region_read(&vdev->bars[5].region, 0x204, 4); + + return (!(clk & 1) && (0x20100 <= pc_c)); +} + +/* + * The scope of a config reset is controlled by a mode bit in the misc register + * and a fuse, exposed as a bit in another register. The fuse is the default + * (0 = GFX, 1 = whole GPU), the misc bit is a toggle, with the forumula + * scope = !(misc ^ fuse), where the resulting scope is defined the same as + * the fuse. A truth table therefore tells us that if misc == fuse, we need + * to flip the value of the bit in the misc register. + */ +static void vfio_radeon_set_gfx_only_reset(VFIOPCIDevice *vdev) +{ + uint32_t misc, fuse; + bool a, b; + + vfio_region_write(&vdev->bars[5].region, 0x200, 0xc00c0000, 4); + fuse = vfio_region_read(&vdev->bars[5].region, 0x204, 4); + b = fuse & 64; + + vfio_region_write(&vdev->bars[5].region, 0x200, 0xc0000010, 4); + misc = vfio_region_read(&vdev->bars[5].region, 0x204, 4); + a = misc & 2; + + if (a == b) { + vfio_region_write(&vdev->bars[5].region, 0x204, misc ^ 2, 4); + vfio_region_read(&vdev->bars[5].region, 0x204, 4); /* flush */ + } +} + +static int vfio_radeon_reset(VFIOPCIDevice *vdev) +{ + PCIDevice *pdev = &vdev->pdev; + int i, ret = 0; + uint32_t data; + + /* Defer to a kernel implemented reset */ + if (vdev->vbasedev.reset_works) { + return -ENODEV; + } + + /* Enable only memory BAR access */ + vfio_pci_write_config(pdev, PCI_COMMAND, PCI_COMMAND_MEMORY, 2); + + /* Reset only works if SMC firmware is loaded and running */ + if (!vfio_radeon_smc_is_running(vdev)) { + ret = -EINVAL; + goto out; + } + + /* Make sure only the GFX function is reset */ + vfio_radeon_set_gfx_only_reset(vdev); + + /* AMD PCI config reset */ + vfio_pci_write_config(pdev, 0x7c, 0x39d5e86b, 4); + usleep(100); + + /* Read back the memory size to make sure we're out of reset */ + for (i = 0; i < 100000; i++) { + if (vfio_region_read(&vdev->bars[5].region, 0x5428, 4) != 0xffffffff) { + break; + } + usleep(1); + } + + /* Reset SMC */ + vfio_region_write(&vdev->bars[5].region, 0x200, 0x80000000, 4); + data = vfio_region_read(&vdev->bars[5].region, 0x204, 4); + data |= 1; + vfio_region_write(&vdev->bars[5].region, 0x204, data, 4); + + /* Disable SMC clock */ + vfio_region_write(&vdev->bars[5].region, 0x200, 0x80000004, 4); + data = vfio_region_read(&vdev->bars[5].region, 0x204, 4); + data |= 1; + vfio_region_write(&vdev->bars[5].region, 0x204, data, 4); + +out: + /* Restore PCI command register */ + vfio_pci_write_config(pdev, PCI_COMMAND, 0, 2); + + return ret; +} + +static void vfio_setup_resetfn(VFIOPCIDevice *vdev) +{ + PCIDevice *pdev = &vdev->pdev; + uint16_t vendor, device; + + vendor = pci_get_word(pdev->config + PCI_VENDOR_ID); + device = pci_get_word(pdev->config + PCI_DEVICE_ID); + + switch (vendor) { + case 0x1002: + switch (device) { + /* Bonaire */ + case 0x6649: /* Bonaire [FirePro W5100] */ + case 0x6650: + case 0x6651: + case 0x6658: /* Bonaire XTX [Radeon R7 260X] */ + case 0x665c: /* Bonaire XT [Radeon HD 7790/8770 / R9 260 OEM] */ + case 0x665d: /* Bonaire [Radeon R7 200 Series] */ + /* Hawaii */ + case 0x67A0: /* Hawaii XT GL [FirePro W9100] */ + case 0x67A1: /* Hawaii PRO GL [FirePro W8100] */ + case 0x67A2: + case 0x67A8: + case 0x67A9: + case 0x67AA: + case 0x67B0: /* Hawaii XT [Radeon R9 290X] */ + case 0x67B1: /* Hawaii PRO [Radeon R9 290] */ + case 0x67B8: + case 0x67B9: + case 0x67BA: + case 0x67BE: + vdev->resetfn = vfio_radeon_reset; + break; + } + break; + } +} + static int vfio_initfn(PCIDevice *pdev) { VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev); @@ -3473,6 +3630,7 @@ static int vfio_initfn(PCIDevice *pdev) vfio_register_err_notifier(vdev); vfio_register_req_notifier(vdev); + vfio_setup_resetfn(vdev); return 0; @@ -3520,6 +3678,10 @@ static void vfio_pci_reset(DeviceState *dev) vfio_pci_pre_reset(vdev); + if (vdev->resetfn && !vdev->resetfn(vdev)) { + goto post_reset; + } + if (vdev->vbasedev.reset_works && (vdev->has_flr || !vdev->has_pm_reset) && !ioctl(vdev->vbasedev.fd, VFIO_DEVICE_RESET)) {