65501a745d
This adds the core of the QEMU VFIO-based PCI device assignment driver. To make use of this driver, enable CONFIG_VFIO, CONFIG_VFIO_IOMMU_TYPE1, and CONFIG_VFIO_PCI in your host Linux kernel config. Load the vfio-pci module. To assign device 0000:05:00.0 to a guest, do the following: for dev in $(ls /sys/bus/pci/devices/0000:05:00.0/iommu_group/devices); do vendor=$(cat /sys/bus/pci/devices/$dev/vendor) device=$(cat /sys/bus/pci/devices/$dev/device) if [ -e /sys/bus/pci/devices/$dev/driver ]; then echo $dev > /sys/bus/pci/devices/$dev/driver/unbind fi echo $vendor $device > /sys/bus/pci/drivers/vfio-pci/new_id done See Documentation/vfio.txt in the Linux kernel tree for further description of IOMMU groups and VFIO. Then launch qemu including the option: -device vfio-pci,host=0000:05:00.0 Legacy PCI interrupts (INTx) currently makes use of a kludge where we trap BAR accesses and assume the access is in response to an interrupt, therefore de-asserting and unmasking the interrupt. It's not quite as targetted as using the EOI for this, but it's self contained and seems to work across all architectures. The side-effect is a significant performance slow-down for device in INTx mode. Some devices, like graphics cards, don't really use their interrupt, so this can be turned off with the x-intx=off option, which disables INTx alltogether. This should be considered an experimental option until we refine this code. Both MSI and MSI-X are supported and avoid these issues. Signed-off-by: Alex Williamson <alex.williamson@redhat.com> Acked-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>
115 lines
3.2 KiB
C
115 lines
3.2 KiB
C
/*
|
|
* vfio based device assignment support
|
|
*
|
|
* Copyright Red Hat, Inc. 2012
|
|
*
|
|
* Authors:
|
|
* Alex Williamson <alex.williamson@redhat.com>
|
|
*
|
|
* This work is licensed under the terms of the GNU GPL, version 2. See
|
|
* the COPYING file in the top-level directory.
|
|
*/
|
|
|
|
#ifndef HW_VFIO_PCI_INT_H
|
|
#define HW_VFIO_PCI_INT_H
|
|
|
|
#include "qemu-common.h"
|
|
#include "qemu-queue.h"
|
|
#include "pci.h"
|
|
#include "event_notifier.h"
|
|
|
|
typedef struct VFIOBAR {
|
|
off_t fd_offset; /* offset of BAR within device fd */
|
|
int fd; /* device fd, allows us to pass VFIOBAR as opaque data */
|
|
MemoryRegion mem; /* slow, read/write access */
|
|
MemoryRegion mmap_mem; /* direct mapped access */
|
|
void *mmap;
|
|
size_t size;
|
|
uint32_t flags; /* VFIO region flags (rd/wr/mmap) */
|
|
uint8_t nr; /* cache the BAR number for debug */
|
|
} VFIOBAR;
|
|
|
|
typedef struct VFIOINTx {
|
|
bool pending; /* interrupt pending */
|
|
bool kvm_accel; /* set when QEMU bypass through KVM enabled */
|
|
uint8_t pin; /* which pin to pull for qemu_set_irq */
|
|
EventNotifier interrupt; /* eventfd triggered on interrupt */
|
|
EventNotifier unmask; /* eventfd for unmask on QEMU bypass */
|
|
PCIINTxRoute route; /* routing info for QEMU bypass */
|
|
bool disabled;
|
|
char *intx;
|
|
} VFIOINTx;
|
|
|
|
struct VFIODevice;
|
|
|
|
typedef struct VFIOMSIVector {
|
|
EventNotifier interrupt; /* eventfd triggered on interrupt */
|
|
struct VFIODevice *vdev; /* back pointer to device */
|
|
int virq; /* KVM irqchip route for QEMU bypass */
|
|
bool use;
|
|
} VFIOMSIVector;
|
|
|
|
enum {
|
|
VFIO_INT_NONE = 0,
|
|
VFIO_INT_INTx = 1,
|
|
VFIO_INT_MSI = 2,
|
|
VFIO_INT_MSIX = 3,
|
|
};
|
|
|
|
struct VFIOGroup;
|
|
|
|
typedef struct VFIOContainer {
|
|
int fd; /* /dev/vfio/vfio, empowered by the attached groups */
|
|
struct {
|
|
/* enable abstraction to support various iommu backends */
|
|
union {
|
|
MemoryListener listener; /* Used by type1 iommu */
|
|
};
|
|
void (*release)(struct VFIOContainer *);
|
|
} iommu_data;
|
|
QLIST_HEAD(, VFIOGroup) group_list;
|
|
QLIST_ENTRY(VFIOContainer) next;
|
|
} VFIOContainer;
|
|
|
|
/* Cache of MSI-X setup plus extra mmap and memory region for split BAR map */
|
|
typedef struct VFIOMSIXInfo {
|
|
uint8_t table_bar;
|
|
uint8_t pba_bar;
|
|
uint16_t entries;
|
|
uint32_t table_offset;
|
|
uint32_t pba_offset;
|
|
MemoryRegion mmap_mem;
|
|
void *mmap;
|
|
} VFIOMSIXInfo;
|
|
|
|
typedef struct VFIODevice {
|
|
PCIDevice pdev;
|
|
int fd;
|
|
VFIOINTx intx;
|
|
unsigned int config_size;
|
|
off_t config_offset; /* Offset of config space region within device fd */
|
|
unsigned int rom_size;
|
|
off_t rom_offset; /* Offset of ROM region within device fd */
|
|
int msi_cap_size;
|
|
VFIOMSIVector *msi_vectors;
|
|
VFIOMSIXInfo *msix;
|
|
int nr_vectors; /* Number of MSI/MSIX vectors currently in use */
|
|
int interrupt; /* Current interrupt type */
|
|
VFIOBAR bars[PCI_NUM_REGIONS - 1]; /* No ROM */
|
|
PCIHostDeviceAddress host;
|
|
QLIST_ENTRY(VFIODevice) next;
|
|
struct VFIOGroup *group;
|
|
bool reset_works;
|
|
} VFIODevice;
|
|
|
|
typedef struct VFIOGroup {
|
|
int fd;
|
|
int groupid;
|
|
VFIOContainer *container;
|
|
QLIST_HEAD(, VFIODevice) device_list;
|
|
QLIST_ENTRY(VFIOGroup) next;
|
|
QLIST_ENTRY(VFIOGroup) container_next;
|
|
} VFIOGroup;
|
|
|
|
#endif /* HW_VFIO_PCI_INT_H */
|