122173a583
A user can create a SR-IOV device by specifying the PF with the sriov-pf property of the VFs. The VFs must be added before the PF. A user-creatable VF must have PCIDeviceClass::sriov_vf_user_creatable set. Such a VF cannot refer to the PF because it is created before the PF. A PF that user-creatable VFs can be attached calls pcie_sriov_pf_init_from_user_created_vfs() during realization and pcie_sriov_pf_exit() when exiting. Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com> Message-Id: <20240715-sriov-v5-5-3f5539093ffc@daynix.com> Reviewed-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
370 lines
11 KiB
C
370 lines
11 KiB
C
#ifndef QEMU_PCI_DEVICE_H
|
|
#define QEMU_PCI_DEVICE_H
|
|
|
|
#include "hw/pci/pci.h"
|
|
#include "hw/pci/pcie.h"
|
|
|
|
#define TYPE_PCI_DEVICE "pci-device"
|
|
typedef struct PCIDeviceClass PCIDeviceClass;
|
|
DECLARE_OBJ_CHECKERS(PCIDevice, PCIDeviceClass,
|
|
PCI_DEVICE, TYPE_PCI_DEVICE)
|
|
|
|
/*
|
|
* Implemented by devices that can be plugged on CXL buses. In the spec, this is
|
|
* actually a "CXL Component, but we name it device to match the PCI naming.
|
|
*/
|
|
#define INTERFACE_CXL_DEVICE "cxl-device"
|
|
|
|
/* Implemented by devices that can be plugged on PCI Express buses */
|
|
#define INTERFACE_PCIE_DEVICE "pci-express-device"
|
|
|
|
/* Implemented by devices that can be plugged on Conventional PCI buses */
|
|
#define INTERFACE_CONVENTIONAL_PCI_DEVICE "conventional-pci-device"
|
|
|
|
struct PCIDeviceClass {
|
|
DeviceClass parent_class;
|
|
|
|
void (*realize)(PCIDevice *dev, Error **errp);
|
|
PCIUnregisterFunc *exit;
|
|
PCIConfigReadFunc *config_read;
|
|
PCIConfigWriteFunc *config_write;
|
|
|
|
uint16_t vendor_id;
|
|
uint16_t device_id;
|
|
uint8_t revision;
|
|
uint16_t class_id;
|
|
uint16_t subsystem_vendor_id; /* only for header type = 0 */
|
|
uint16_t subsystem_id; /* only for header type = 0 */
|
|
|
|
const char *romfile; /* rom bar */
|
|
|
|
bool sriov_vf_user_creatable;
|
|
};
|
|
|
|
enum PCIReqIDType {
|
|
PCI_REQ_ID_INVALID = 0,
|
|
PCI_REQ_ID_BDF,
|
|
PCI_REQ_ID_SECONDARY_BUS,
|
|
PCI_REQ_ID_MAX,
|
|
};
|
|
typedef enum PCIReqIDType PCIReqIDType;
|
|
|
|
struct PCIReqIDCache {
|
|
PCIDevice *dev;
|
|
PCIReqIDType type;
|
|
};
|
|
typedef struct PCIReqIDCache PCIReqIDCache;
|
|
|
|
struct PCIDevice {
|
|
DeviceState qdev;
|
|
bool partially_hotplugged;
|
|
bool enabled;
|
|
|
|
/* PCI config space */
|
|
uint8_t *config;
|
|
|
|
/*
|
|
* Used to enable config checks on load. Note that writable bits are
|
|
* never checked even if set in cmask.
|
|
*/
|
|
uint8_t *cmask;
|
|
|
|
/* Used to implement R/W bytes */
|
|
uint8_t *wmask;
|
|
|
|
/* Used to implement RW1C(Write 1 to Clear) bytes */
|
|
uint8_t *w1cmask;
|
|
|
|
/* Used to allocate config space for capabilities. */
|
|
uint8_t *used;
|
|
|
|
/* the following fields are read only */
|
|
int32_t devfn;
|
|
/*
|
|
* Cached device to fetch requester ID from, to avoid the PCI tree
|
|
* walking every time we invoke PCI request (e.g., MSI). For
|
|
* conventional PCI root complex, this field is meaningless.
|
|
*/
|
|
PCIReqIDCache requester_id_cache;
|
|
char name[64];
|
|
PCIIORegion io_regions[PCI_NUM_REGIONS];
|
|
AddressSpace bus_master_as;
|
|
MemoryRegion bus_master_container_region;
|
|
MemoryRegion bus_master_enable_region;
|
|
|
|
/* do not access the following fields */
|
|
PCIConfigReadFunc *config_read;
|
|
PCIConfigWriteFunc *config_write;
|
|
|
|
/* Legacy PCI VGA regions */
|
|
MemoryRegion *vga_regions[QEMU_PCI_VGA_NUM_REGIONS];
|
|
bool has_vga;
|
|
|
|
/* Current IRQ levels. Used internally by the generic PCI code. */
|
|
uint8_t irq_state;
|
|
|
|
/* Capability bits */
|
|
uint32_t cap_present;
|
|
|
|
/* Offset of MSI-X capability in config space */
|
|
uint8_t msix_cap;
|
|
|
|
/* MSI-X entries */
|
|
int msix_entries_nr;
|
|
|
|
/* Space to store MSIX table & pending bit array */
|
|
uint8_t *msix_table;
|
|
uint8_t *msix_pba;
|
|
|
|
/* May be used by INTx or MSI during interrupt notification */
|
|
void *irq_opaque;
|
|
|
|
MSITriggerFunc *msi_trigger;
|
|
MSIPrepareMessageFunc *msi_prepare_message;
|
|
MSIxPrepareMessageFunc *msix_prepare_message;
|
|
|
|
/* MemoryRegion container for msix exclusive BAR setup */
|
|
MemoryRegion msix_exclusive_bar;
|
|
/* Memory Regions for MSIX table and pending bit entries. */
|
|
MemoryRegion msix_table_mmio;
|
|
MemoryRegion msix_pba_mmio;
|
|
/* Reference-count for entries actually in use by driver. */
|
|
unsigned *msix_entry_used;
|
|
/* MSIX function mask set or MSIX disabled */
|
|
bool msix_function_masked;
|
|
/* Version id needed for VMState */
|
|
int32_t version_id;
|
|
|
|
/* Offset of MSI capability in config space */
|
|
uint8_t msi_cap;
|
|
|
|
/* PCI Express */
|
|
PCIExpressDevice exp;
|
|
|
|
/* SHPC */
|
|
SHPCDevice *shpc;
|
|
|
|
/* Location of option rom */
|
|
char *romfile;
|
|
uint32_t romsize;
|
|
bool has_rom;
|
|
MemoryRegion rom;
|
|
uint32_t rom_bar;
|
|
|
|
/* INTx routing notifier */
|
|
PCIINTxRoutingNotifier intx_routing_notifier;
|
|
|
|
/* MSI-X notifiers */
|
|
MSIVectorUseNotifier msix_vector_use_notifier;
|
|
MSIVectorReleaseNotifier msix_vector_release_notifier;
|
|
MSIVectorPollNotifier msix_vector_poll_notifier;
|
|
|
|
/* ID of standby device in net_failover pair */
|
|
char *failover_pair_id;
|
|
uint32_t acpi_index;
|
|
|
|
char *sriov_pf;
|
|
};
|
|
|
|
static inline int pci_intx(PCIDevice *pci_dev)
|
|
{
|
|
return pci_get_byte(pci_dev->config + PCI_INTERRUPT_PIN) - 1;
|
|
}
|
|
|
|
static inline int pci_is_cxl(const PCIDevice *d)
|
|
{
|
|
return d->cap_present & QEMU_PCIE_CAP_CXL;
|
|
}
|
|
|
|
static inline int pci_is_express(const PCIDevice *d)
|
|
{
|
|
return d->cap_present & QEMU_PCI_CAP_EXPRESS;
|
|
}
|
|
|
|
static inline int pci_is_express_downstream_port(const PCIDevice *d)
|
|
{
|
|
uint8_t type;
|
|
|
|
if (!pci_is_express(d) || !d->exp.exp_cap) {
|
|
return 0;
|
|
}
|
|
|
|
type = pcie_cap_get_type(d);
|
|
|
|
return type == PCI_EXP_TYPE_DOWNSTREAM || type == PCI_EXP_TYPE_ROOT_PORT;
|
|
}
|
|
|
|
static inline int pci_is_vf(const PCIDevice *d)
|
|
{
|
|
return d->sriov_pf || d->exp.sriov_vf.pf != NULL;
|
|
}
|
|
|
|
static inline uint32_t pci_config_size(const PCIDevice *d)
|
|
{
|
|
return pci_is_express(d) ? PCIE_CONFIG_SPACE_SIZE : PCI_CONFIG_SPACE_SIZE;
|
|
}
|
|
|
|
static inline uint16_t pci_get_bdf(PCIDevice *dev)
|
|
{
|
|
return PCI_BUILD_BDF(pci_bus_num(pci_get_bus(dev)), dev->devfn);
|
|
}
|
|
|
|
static inline void pci_set_power(PCIDevice *pci_dev, bool state)
|
|
{
|
|
/*
|
|
* Don't change the enabled state of VFs when powering on/off the device.
|
|
*
|
|
* When powering on, VFs must not be enabled immediately but they must
|
|
* wait until the guest configures SR-IOV.
|
|
* When powering off, their corresponding PFs will be reset and disable
|
|
* VFs.
|
|
*/
|
|
if (!pci_is_vf(pci_dev)) {
|
|
pci_set_enabled(pci_dev, state);
|
|
}
|
|
}
|
|
|
|
uint16_t pci_requester_id(PCIDevice *dev);
|
|
|
|
/* DMA access functions */
|
|
static inline AddressSpace *pci_get_address_space(PCIDevice *dev)
|
|
{
|
|
return &dev->bus_master_as;
|
|
}
|
|
|
|
/**
|
|
* pci_dma_rw: Read from or write to an address space from PCI device.
|
|
*
|
|
* Return a MemTxResult indicating whether the operation succeeded
|
|
* or failed (eg unassigned memory, device rejected the transaction,
|
|
* IOMMU fault).
|
|
*
|
|
* @dev: #PCIDevice doing the memory access
|
|
* @addr: address within the #PCIDevice address space
|
|
* @buf: buffer with the data transferred
|
|
* @len: the number of bytes to read or write
|
|
* @dir: indicates the transfer direction
|
|
*/
|
|
static inline MemTxResult pci_dma_rw(PCIDevice *dev, dma_addr_t addr,
|
|
void *buf, dma_addr_t len,
|
|
DMADirection dir, MemTxAttrs attrs)
|
|
{
|
|
return dma_memory_rw(pci_get_address_space(dev), addr, buf, len,
|
|
dir, attrs);
|
|
}
|
|
|
|
/**
|
|
* pci_dma_read: Read from an address space from PCI device.
|
|
*
|
|
* Return a MemTxResult indicating whether the operation succeeded
|
|
* or failed (eg unassigned memory, device rejected the transaction,
|
|
* IOMMU fault). Called within RCU critical section.
|
|
*
|
|
* @dev: #PCIDevice doing the memory access
|
|
* @addr: address within the #PCIDevice address space
|
|
* @buf: buffer with the data transferred
|
|
* @len: length of the data transferred
|
|
*/
|
|
static inline MemTxResult pci_dma_read(PCIDevice *dev, dma_addr_t addr,
|
|
void *buf, dma_addr_t len)
|
|
{
|
|
return pci_dma_rw(dev, addr, buf, len,
|
|
DMA_DIRECTION_TO_DEVICE, MEMTXATTRS_UNSPECIFIED);
|
|
}
|
|
|
|
/**
|
|
* pci_dma_write: Write to address space from PCI device.
|
|
*
|
|
* Return a MemTxResult indicating whether the operation succeeded
|
|
* or failed (eg unassigned memory, device rejected the transaction,
|
|
* IOMMU fault).
|
|
*
|
|
* @dev: #PCIDevice doing the memory access
|
|
* @addr: address within the #PCIDevice address space
|
|
* @buf: buffer with the data transferred
|
|
* @len: the number of bytes to write
|
|
*/
|
|
static inline MemTxResult pci_dma_write(PCIDevice *dev, dma_addr_t addr,
|
|
const void *buf, dma_addr_t len)
|
|
{
|
|
return pci_dma_rw(dev, addr, (void *) buf, len,
|
|
DMA_DIRECTION_FROM_DEVICE, MEMTXATTRS_UNSPECIFIED);
|
|
}
|
|
|
|
#define PCI_DMA_DEFINE_LDST(_l, _s, _bits) \
|
|
static inline MemTxResult ld##_l##_pci_dma(PCIDevice *dev, \
|
|
dma_addr_t addr, \
|
|
uint##_bits##_t *val, \
|
|
MemTxAttrs attrs) \
|
|
{ \
|
|
return ld##_l##_dma(pci_get_address_space(dev), addr, val, attrs); \
|
|
} \
|
|
static inline MemTxResult st##_s##_pci_dma(PCIDevice *dev, \
|
|
dma_addr_t addr, \
|
|
uint##_bits##_t val, \
|
|
MemTxAttrs attrs) \
|
|
{ \
|
|
return st##_s##_dma(pci_get_address_space(dev), addr, val, attrs); \
|
|
}
|
|
|
|
PCI_DMA_DEFINE_LDST(ub, b, 8);
|
|
PCI_DMA_DEFINE_LDST(uw_le, w_le, 16)
|
|
PCI_DMA_DEFINE_LDST(l_le, l_le, 32);
|
|
PCI_DMA_DEFINE_LDST(q_le, q_le, 64);
|
|
PCI_DMA_DEFINE_LDST(uw_be, w_be, 16)
|
|
PCI_DMA_DEFINE_LDST(l_be, l_be, 32);
|
|
PCI_DMA_DEFINE_LDST(q_be, q_be, 64);
|
|
|
|
#undef PCI_DMA_DEFINE_LDST
|
|
|
|
/**
|
|
* pci_dma_map: Map device PCI address space range into host virtual address
|
|
* @dev: #PCIDevice to be accessed
|
|
* @addr: address within that device's address space
|
|
* @plen: pointer to length of buffer; updated on return to indicate
|
|
* if only a subset of the requested range has been mapped
|
|
* @dir: indicates the transfer direction
|
|
*
|
|
* Return: A host pointer, or %NULL if the resources needed to
|
|
* perform the mapping are exhausted (in that case *@plen
|
|
* is set to zero).
|
|
*/
|
|
static inline void *pci_dma_map(PCIDevice *dev, dma_addr_t addr,
|
|
dma_addr_t *plen, DMADirection dir)
|
|
{
|
|
return dma_memory_map(pci_get_address_space(dev), addr, plen, dir,
|
|
MEMTXATTRS_UNSPECIFIED);
|
|
}
|
|
|
|
static inline void pci_dma_unmap(PCIDevice *dev, void *buffer, dma_addr_t len,
|
|
DMADirection dir, dma_addr_t access_len)
|
|
{
|
|
dma_memory_unmap(pci_get_address_space(dev), buffer, len, dir, access_len);
|
|
}
|
|
|
|
static inline void pci_dma_sglist_init(QEMUSGList *qsg, PCIDevice *dev,
|
|
int alloc_hint)
|
|
{
|
|
qemu_sglist_init(qsg, DEVICE(dev), alloc_hint, pci_get_address_space(dev));
|
|
}
|
|
|
|
extern const VMStateDescription vmstate_pci_device;
|
|
|
|
#define VMSTATE_PCI_DEVICE(_field, _state) { \
|
|
.name = (stringify(_field)), \
|
|
.size = sizeof(PCIDevice), \
|
|
.vmsd = &vmstate_pci_device, \
|
|
.flags = VMS_STRUCT, \
|
|
.offset = vmstate_offset_value(_state, _field, PCIDevice), \
|
|
}
|
|
|
|
#define VMSTATE_PCI_DEVICE_POINTER(_field, _state) { \
|
|
.name = (stringify(_field)), \
|
|
.size = sizeof(PCIDevice), \
|
|
.vmsd = &vmstate_pci_device, \
|
|
.flags = VMS_STRUCT | VMS_POINTER, \
|
|
.offset = vmstate_offset_pointer(_state, _field, PCIDevice), \
|
|
}
|
|
|
|
#endif
|