qemu/hw/vfio/pci.h
Alex Williamson 2b1dbd0d72 vfio/quirks: Enable ioeventfd quirks to be handled by vfio directly
With vfio ioeventfd support, we can program vfio-pci to perform a
specified BAR write when an eventfd is triggered.  This allows the
KVM ioeventfd to be wired directly to vfio-pci, entirely avoiding
userspace handling for these events.  On the same micro-benchmark
where the ioeventfd got us to almost 90% of performance versus
disabling the GeForce quirks, this gets us to within 95%.

Reviewed-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
2018-06-05 08:28:09 -06:00

202 lines
6.1 KiB
C

/*
* vfio based device assignment support - PCI devices
*
* Copyright Red Hat, Inc. 2012-2015
*
* Authors:
* Alex Williamson <alex.williamson@redhat.com>
*
* This work is licensed under the terms of the GNU GPL, version 2. See
* the COPYING file in the top-level directory.
*/
#ifndef HW_VFIO_VFIO_PCI_H
#define HW_VFIO_VFIO_PCI_H
#include "qemu-common.h"
#include "exec/memory.h"
#include "hw/pci/pci.h"
#include "hw/vfio/vfio-common.h"
#include "qemu/event_notifier.h"
#include "qemu/queue.h"
#include "qemu/timer.h"
#define PCI_ANY_ID (~0)
struct VFIOPCIDevice;
typedef struct VFIOIOEventFD {
QLIST_ENTRY(VFIOIOEventFD) next;
MemoryRegion *mr;
hwaddr addr;
unsigned size;
uint64_t data;
EventNotifier e;
VFIORegion *region;
hwaddr region_addr;
bool dynamic; /* Added runtime, removed on device reset */
bool vfio;
} VFIOIOEventFD;
typedef struct VFIOQuirk {
QLIST_ENTRY(VFIOQuirk) next;
void *data;
QLIST_HEAD(, VFIOIOEventFD) ioeventfds;
int nr_mem;
MemoryRegion *mem;
void (*reset)(struct VFIOPCIDevice *vdev, struct VFIOQuirk *quirk);
} VFIOQuirk;
typedef struct VFIOBAR {
VFIORegion region;
MemoryRegion *mr;
size_t size;
uint8_t type;
bool ioport;
bool mem64;
QLIST_HEAD(, VFIOQuirk) quirks;
} VFIOBAR;
typedef struct VFIOVGARegion {
MemoryRegion mem;
off_t offset;
int nr;
QLIST_HEAD(, VFIOQuirk) quirks;
} VFIOVGARegion;
typedef struct VFIOVGA {
off_t fd_offset;
int fd;
VFIOVGARegion region[QEMU_PCI_VGA_NUM_REGIONS];
} VFIOVGA;
typedef struct VFIOINTx {
bool pending; /* interrupt pending */
bool kvm_accel; /* set when QEMU bypass through KVM enabled */
uint8_t pin; /* which pin to pull for qemu_set_irq */
EventNotifier interrupt; /* eventfd triggered on interrupt */
EventNotifier unmask; /* eventfd for unmask on QEMU bypass */
PCIINTxRoute route; /* routing info for QEMU bypass */
uint32_t mmap_timeout; /* delay to re-enable mmaps after interrupt */
QEMUTimer *mmap_timer; /* enable mmaps after periods w/o interrupts */
} VFIOINTx;
typedef struct VFIOMSIVector {
/*
* Two interrupt paths are configured per vector. The first, is only used
* for interrupts injected via QEMU. This is typically the non-accel path,
* but may also be used when we want QEMU to handle masking and pending
* bits. The KVM path bypasses QEMU and is therefore higher performance,
* but requires masking at the device. virq is used to track the MSI route
* through KVM, thus kvm_interrupt is only available when virq is set to a
* valid (>= 0) value.
*/
EventNotifier interrupt;
EventNotifier kvm_interrupt;
struct VFIOPCIDevice *vdev; /* back pointer to device */
int virq;
bool use;
} VFIOMSIVector;
enum {
VFIO_INT_NONE = 0,
VFIO_INT_INTx = 1,
VFIO_INT_MSI = 2,
VFIO_INT_MSIX = 3,
};
/* Cache of MSI-X setup */
typedef struct VFIOMSIXInfo {
uint8_t table_bar;
uint8_t pba_bar;
uint16_t entries;
uint32_t table_offset;
uint32_t pba_offset;
unsigned long *pending;
} VFIOMSIXInfo;
typedef struct VFIOPCIDevice {
PCIDevice pdev;
VFIODevice vbasedev;
VFIOINTx intx;
unsigned int config_size;
uint8_t *emulated_config_bits; /* QEMU emulated bits, little-endian */
off_t config_offset; /* Offset of config space region within device fd */
unsigned int rom_size;
off_t rom_offset; /* Offset of ROM region within device fd */
void *rom;
int msi_cap_size;
VFIOMSIVector *msi_vectors;
VFIOMSIXInfo *msix;
int nr_vectors; /* Number of MSI/MSIX vectors currently in use */
int interrupt; /* Current interrupt type */
VFIOBAR bars[PCI_NUM_REGIONS - 1]; /* No ROM */
VFIOVGA *vga; /* 0xa0000, 0x3b0, 0x3c0 */
void *igd_opregion;
PCIHostDeviceAddress host;
EventNotifier err_notifier;
EventNotifier req_notifier;
int (*resetfn)(struct VFIOPCIDevice *);
uint32_t vendor_id;
uint32_t device_id;
uint32_t sub_vendor_id;
uint32_t sub_device_id;
uint32_t features;
#define VFIO_FEATURE_ENABLE_VGA_BIT 0
#define VFIO_FEATURE_ENABLE_VGA (1 << VFIO_FEATURE_ENABLE_VGA_BIT)
#define VFIO_FEATURE_ENABLE_REQ_BIT 1
#define VFIO_FEATURE_ENABLE_REQ (1 << VFIO_FEATURE_ENABLE_REQ_BIT)
#define VFIO_FEATURE_ENABLE_IGD_OPREGION_BIT 2
#define VFIO_FEATURE_ENABLE_IGD_OPREGION \
(1 << VFIO_FEATURE_ENABLE_IGD_OPREGION_BIT)
OnOffAuto display;
int32_t bootindex;
uint32_t igd_gms;
OffAutoPCIBAR msix_relo;
uint8_t pm_cap;
uint8_t nv_gpudirect_clique;
bool pci_aer;
bool req_enabled;
bool has_flr;
bool has_pm_reset;
bool rom_read_failed;
bool no_kvm_intx;
bool no_kvm_msi;
bool no_kvm_msix;
bool no_geforce_quirks;
bool no_kvm_ioeventfd;
bool no_vfio_ioeventfd;
VFIODisplay *dpy;
} VFIOPCIDevice;
uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len);
void vfio_pci_write_config(PCIDevice *pdev,
uint32_t addr, uint32_t val, int len);
uint64_t vfio_vga_read(void *opaque, hwaddr addr, unsigned size);
void vfio_vga_write(void *opaque, hwaddr addr, uint64_t data, unsigned size);
bool vfio_blacklist_opt_rom(VFIOPCIDevice *vdev);
void vfio_vga_quirk_setup(VFIOPCIDevice *vdev);
void vfio_vga_quirk_exit(VFIOPCIDevice *vdev);
void vfio_vga_quirk_finalize(VFIOPCIDevice *vdev);
void vfio_bar_quirk_setup(VFIOPCIDevice *vdev, int nr);
void vfio_bar_quirk_exit(VFIOPCIDevice *vdev, int nr);
void vfio_bar_quirk_finalize(VFIOPCIDevice *vdev, int nr);
void vfio_setup_resetfn_quirk(VFIOPCIDevice *vdev);
int vfio_add_virt_caps(VFIOPCIDevice *vdev, Error **errp);
void vfio_quirk_reset(VFIOPCIDevice *vdev);
extern const PropertyInfo qdev_prop_nv_gpudirect_clique;
int vfio_populate_vga(VFIOPCIDevice *vdev, Error **errp);
int vfio_pci_igd_opregion_init(VFIOPCIDevice *vdev,
struct vfio_region_info *info,
Error **errp);
void vfio_display_reset(VFIOPCIDevice *vdev);
int vfio_display_probe(VFIOPCIDevice *vdev, Error **errp);
void vfio_display_finalize(VFIOPCIDevice *vdev);
#endif /* HW_VFIO_VFIO_PCI_H */