diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index 1e6e0fcad9..855ab55aa4 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -35,6 +35,7 @@ * mdts=,vsl=, \ * zoned.zasl=, \ * zoned.auto_transition=, \ + * sriov_max_vfs= \ * subsys= * -device nvme-ns,drive=,bus=,nsid=,\ * zoned=, \ @@ -106,6 +107,12 @@ * transitioned to zone state closed for resource management purposes. * Defaults to 'on'. * + * - `sriov_max_vfs` + * Indicates the maximum number of PCIe virtual functions supported + * by the controller. The default value is 0. Specifying a non-zero value + * enables reporting of both SR-IOV and ARI capabilities by the NVMe device. + * Virtual function controllers will not report SR-IOV capability. + * * nvme namespace device parameters * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * - `shared` @@ -160,6 +167,7 @@ #include "sysemu/block-backend.h" #include "sysemu/hostmem.h" #include "hw/pci/msix.h" +#include "hw/pci/pcie_sriov.h" #include "migration/vmstate.h" #include "nvme.h" @@ -176,6 +184,9 @@ #define NVME_TEMPERATURE_CRITICAL 0x175 #define NVME_NUM_FW_SLOTS 1 #define NVME_DEFAULT_MAX_ZA_SIZE (128 * KiB) +#define NVME_MAX_VFS 127 +#define NVME_VF_OFFSET 0x1 +#define NVME_VF_STRIDE 1 #define NVME_GUEST_ERR(trace, fmt, ...) \ do { \ @@ -5888,6 +5899,10 @@ static void nvme_ctrl_reset(NvmeCtrl *n) g_free(event); } + if (!pci_is_vf(&n->parent_obj) && n->params.sriov_max_vfs) { + pcie_sriov_pf_disable_vfs(&n->parent_obj); + } + n->aer_queued = 0; n->outstanding_aers = 0; n->qs_created = false; @@ -6569,6 +6584,29 @@ static void nvme_check_constraints(NvmeCtrl *n, Error **errp) error_setg(errp, "vsl must be non-zero"); return; } + + if (params->sriov_max_vfs) { + if (!n->subsys) { + error_setg(errp, "subsystem is required for the use of SR-IOV"); + return; + } + + if (params->sriov_max_vfs > NVME_MAX_VFS) { + error_setg(errp, "sriov_max_vfs must be between 0 and %d", + NVME_MAX_VFS); + return; + } + + if (params->cmb_size_mb) { + error_setg(errp, "CMB is not supported with SR-IOV"); + return; + } + + if (n->pmr.dev) { + error_setg(errp, "PMR is not supported with SR-IOV"); + return; + } + } } static void nvme_init_state(NvmeCtrl *n) @@ -6626,6 +6664,20 @@ static void nvme_init_pmr(NvmeCtrl *n, PCIDevice *pci_dev) memory_region_set_enabled(&n->pmr.dev->mr, false); } +static void nvme_init_sriov(NvmeCtrl *n, PCIDevice *pci_dev, uint16_t offset, + uint64_t bar_size) +{ + uint16_t vf_dev_id = n->params.use_intel_id ? + PCI_DEVICE_ID_INTEL_NVME : PCI_DEVICE_ID_REDHAT_NVME; + + pcie_sriov_pf_init(pci_dev, offset, "nvme", vf_dev_id, + n->params.sriov_max_vfs, n->params.sriov_max_vfs, + NVME_VF_OFFSET, NVME_VF_STRIDE); + + pcie_sriov_pf_init_vf_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY | + PCI_BASE_ADDRESS_MEM_TYPE_64, bar_size); +} + static int nvme_init_pci(NvmeCtrl *n, PCIDevice *pci_dev, Error **errp) { uint8_t *pci_conf = pci_dev->config; @@ -6640,7 +6692,7 @@ static int nvme_init_pci(NvmeCtrl *n, PCIDevice *pci_dev, Error **errp) if (n->params.use_intel_id) { pci_config_set_vendor_id(pci_conf, PCI_VENDOR_ID_INTEL); - pci_config_set_device_id(pci_conf, 0x5845); + pci_config_set_device_id(pci_conf, PCI_DEVICE_ID_INTEL_NVME); } else { pci_config_set_vendor_id(pci_conf, PCI_VENDOR_ID_REDHAT); pci_config_set_device_id(pci_conf, PCI_DEVICE_ID_REDHAT_NVME); @@ -6648,6 +6700,9 @@ static int nvme_init_pci(NvmeCtrl *n, PCIDevice *pci_dev, Error **errp) pci_config_set_class(pci_conf, PCI_CLASS_STORAGE_EXPRESS); pcie_endpoint_cap_init(pci_dev, 0x80); + if (n->params.sriov_max_vfs) { + pcie_ari_init(pci_dev, 0x100, 1); + } bar_size = QEMU_ALIGN_UP(n->reg_size, 4 * KiB); msix_table_offset = bar_size; @@ -6666,8 +6721,12 @@ static int nvme_init_pci(NvmeCtrl *n, PCIDevice *pci_dev, Error **errp) n->reg_size); memory_region_add_subregion(&n->bar0, 0, &n->iomem); - pci_register_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY | - PCI_BASE_ADDRESS_MEM_TYPE_64, &n->bar0); + if (pci_is_vf(pci_dev)) { + pcie_sriov_vf_register_bar(pci_dev, 0, &n->bar0); + } else { + pci_register_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY | + PCI_BASE_ADDRESS_MEM_TYPE_64, &n->bar0); + } ret = msix_init(pci_dev, n->params.msix_qsize, &n->bar0, 0, msix_table_offset, &n->bar0, 0, msix_pba_offset, 0, &err); @@ -6688,6 +6747,10 @@ static int nvme_init_pci(NvmeCtrl *n, PCIDevice *pci_dev, Error **errp) nvme_init_pmr(n, pci_dev); } + if (!pci_is_vf(pci_dev) && n->params.sriov_max_vfs) { + nvme_init_sriov(n, pci_dev, 0x120, bar_size); + } + return 0; } @@ -6838,6 +6901,16 @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp) NvmeCtrl *n = NVME(pci_dev); NvmeNamespace *ns; Error *local_err = NULL; + NvmeCtrl *pn = NVME(pcie_sriov_get_pf(pci_dev)); + + if (pci_is_vf(pci_dev)) { + /* + * VFs derive settings from the parent. PF's lifespan exceeds + * that of VF's, so it's safe to share params.serial. + */ + memcpy(&n->params, &pn->params, sizeof(NvmeParams)); + n->subsys = pn->subsys; + } nvme_check_constraints(n, &local_err); if (local_err) { @@ -6902,6 +6975,11 @@ static void nvme_exit(PCIDevice *pci_dev) if (n->pmr.dev) { host_memory_backend_set_mapped(n->pmr.dev, false); } + + if (!pci_is_vf(pci_dev) && n->params.sriov_max_vfs) { + pcie_sriov_pf_exit(pci_dev); + } + msix_uninit(pci_dev, &n->bar0, &n->bar0); memory_region_del_subregion(&n->bar0, &n->iomem); } @@ -6926,6 +7004,7 @@ static Property nvme_props[] = { DEFINE_PROP_UINT8("zoned.zasl", NvmeCtrl, params.zasl, 0), DEFINE_PROP_BOOL("zoned.auto_transition", NvmeCtrl, params.auto_transition_zones, true), + DEFINE_PROP_UINT8("sriov_max_vfs", NvmeCtrl, params.sriov_max_vfs, 0), DEFINE_PROP_END_OF_LIST(), }; diff --git a/hw/nvme/nvme.h b/hw/nvme/nvme.h index e41771604f..7e4bbd5606 100644 --- a/hw/nvme/nvme.h +++ b/hw/nvme/nvme.h @@ -24,7 +24,7 @@ #include "block/nvme.h" -#define NVME_MAX_CONTROLLERS 32 +#define NVME_MAX_CONTROLLERS 256 #define NVME_MAX_NAMESPACES 256 #define NVME_EUI64_DEFAULT ((uint64_t)0x5254000000000000) @@ -406,6 +406,7 @@ typedef struct NvmeParams { uint8_t zasl; bool auto_transition_zones; bool legacy_cmb; + uint8_t sriov_max_vfs; } NvmeParams; typedef struct NvmeCtrl { diff --git a/include/hw/pci/pci_ids.h b/include/hw/pci/pci_ids.h index 898083b86f..d5ddea558b 100644 --- a/include/hw/pci/pci_ids.h +++ b/include/hw/pci/pci_ids.h @@ -238,6 +238,7 @@ #define PCI_DEVICE_ID_INTEL_82801BA_11 0x244e #define PCI_DEVICE_ID_INTEL_82801D 0x24CD #define PCI_DEVICE_ID_INTEL_ESB_9 0x25ab +#define PCI_DEVICE_ID_INTEL_NVME 0x5845 #define PCI_DEVICE_ID_INTEL_82371SB_0 0x7000 #define PCI_DEVICE_ID_INTEL_82371SB_1 0x7010 #define PCI_DEVICE_ID_INTEL_82371SB_2 0x7020