diff --git a/MAINTAINERS b/MAINTAINERS index 0087e4f1c7..42e59143b7 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2407,6 +2407,7 @@ F: docs/system/devices/virtio-snd.rst nvme M: Keith Busch M: Klaus Jensen +R: Jesper Devantier L: qemu-block@nongnu.org S: Supported F: hw/nvme/* diff --git a/docs/system/devices/nvme.rst b/docs/system/devices/nvme.rst index 4ea957baed..d2b1ca9645 100644 --- a/docs/system/devices/nvme.rst +++ b/docs/system/devices/nvme.rst @@ -81,6 +81,13 @@ There are a number of parameters available: Set the UUID of the namespace. This will be reported as a "Namespace UUID" descriptor in the Namespace Identification Descriptor List. +``nguid`` + Set the NGUID of the namespace. This will be reported as a "Namespace Globally + Unique Identifier" descriptor in the Namespace Identification Descriptor List. + It is specified as a string of hexadecimal digits containing exactly 16 bytes + or "auto" for a random value. An optional '-' separator could be used to group + bytes. If not specified the NGUID will remain all zeros. + ``eui64`` Set the EUI-64 of the namespace. This will be reported as a "IEEE Extended Unique Identifier" descriptor in the Namespace Identification Descriptor List. diff --git a/hw/core/machine.c b/hw/core/machine.c index f64dc5c432..e483b34459 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c @@ -102,6 +102,7 @@ GlobalProperty hw_compat_5_2[] = { { "PIIX4_PM", "smm-compat", "on"}, { "virtio-blk-device", "report-discard-granularity", "off" }, { "virtio-net-pci-base", "vectors", "3"}, + { "nvme", "msix-exclusive-bar", "on"}, }; const size_t hw_compat_5_2_len = G_N_ELEMENTS(hw_compat_5_2); diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index 76fe039704..036b15403a 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -2855,7 +2855,7 @@ static inline uint16_t nvme_check_copy_mcl(NvmeNamespace *ns, uint32_t nlb; nvme_copy_source_range_parse(iocb->ranges, idx, iocb->format, NULL, &nlb, NULL, NULL, NULL); - copy_len += nlb + 1; + copy_len += nlb; } if (copy_len > ns->id_ns.mcl) { @@ -5640,6 +5640,10 @@ static uint16_t nvme_identify_ns_descr_list(NvmeCtrl *n, NvmeRequest *req) NvmeIdNsDescr hdr; uint8_t v[NVME_NIDL_UUID]; } QEMU_PACKED uuid = {}; + struct { + NvmeIdNsDescr hdr; + uint8_t v[NVME_NIDL_NGUID]; + } QEMU_PACKED nguid = {}; struct { NvmeIdNsDescr hdr; uint64_t v; @@ -5668,6 +5672,14 @@ static uint16_t nvme_identify_ns_descr_list(NvmeCtrl *n, NvmeRequest *req) pos += sizeof(uuid); } + if (!nvme_nguid_is_null(&ns->params.nguid)) { + nguid.hdr.nidt = NVME_NIDT_NGUID; + nguid.hdr.nidl = NVME_NIDL_NGUID; + memcpy(nguid.v, ns->params.nguid.data, NVME_NIDL_NGUID); + memcpy(pos, &nguid, sizeof(nguid)); + pos += sizeof(nguid); + } + if (ns->params.eui64) { eui64.hdr.nidt = NVME_NIDT_EUI64; eui64.hdr.nidl = NVME_NIDL_EUI64; @@ -7798,6 +7810,11 @@ static bool nvme_check_params(NvmeCtrl *n, Error **errp) } if (n->pmr.dev) { + if (params->msix_exclusive_bar) { + error_setg(errp, "not enough BARs available to enable PMR"); + return false; + } + if (host_memory_backend_is_mapped(n->pmr.dev)) { error_setg(errp, "can't use already busy memdev: %s", object_get_canonical_path_component(OBJECT(n->pmr.dev))); @@ -8003,13 +8020,18 @@ static void nvme_init_pmr(NvmeCtrl *n, PCIDevice *pci_dev) memory_region_set_enabled(&n->pmr.dev->mr, false); } -static uint64_t nvme_bar_size(unsigned total_queues, unsigned total_irqs, - unsigned *msix_table_offset, - unsigned *msix_pba_offset) +static uint64_t nvme_mbar_size(unsigned total_queues, unsigned total_irqs, + unsigned *msix_table_offset, + unsigned *msix_pba_offset) { - uint64_t bar_size, msix_table_size, msix_pba_size; + uint64_t bar_size, msix_table_size; bar_size = sizeof(NvmeBar) + 2 * total_queues * NVME_DB_SIZE; + + if (total_irqs == 0) { + goto out; + } + bar_size = QEMU_ALIGN_UP(bar_size, 4 * KiB); if (msix_table_offset) { @@ -8024,11 +8046,10 @@ static uint64_t nvme_bar_size(unsigned total_queues, unsigned total_irqs, *msix_pba_offset = bar_size; } - msix_pba_size = QEMU_ALIGN_UP(total_irqs, 64) / 8; - bar_size += msix_pba_size; + bar_size += QEMU_ALIGN_UP(total_irqs, 64) / 8; - bar_size = pow2ceil(bar_size); - return bar_size; +out: + return pow2ceil(bar_size); } static void nvme_init_sriov(NvmeCtrl *n, PCIDevice *pci_dev, uint16_t offset) @@ -8036,7 +8057,7 @@ static void nvme_init_sriov(NvmeCtrl *n, PCIDevice *pci_dev, uint16_t offset) uint16_t vf_dev_id = n->params.use_intel_id ? PCI_DEVICE_ID_INTEL_NVME : PCI_DEVICE_ID_REDHAT_NVME; NvmePriCtrlCap *cap = &n->pri_ctrl_cap; - uint64_t bar_size = nvme_bar_size(le16_to_cpu(cap->vqfrsm), + uint64_t bar_size = nvme_mbar_size(le16_to_cpu(cap->vqfrsm), le16_to_cpu(cap->vifrsm), NULL, NULL); @@ -8075,7 +8096,7 @@ static bool nvme_init_pci(NvmeCtrl *n, PCIDevice *pci_dev, Error **errp) ERRP_GUARD(); uint8_t *pci_conf = pci_dev->config; uint64_t bar_size; - unsigned msix_table_offset, msix_pba_offset; + unsigned msix_table_offset = 0, msix_pba_offset = 0; int ret; pci_conf[PCI_INTERRUPT_PIN] = 1; @@ -8097,24 +8118,38 @@ static bool nvme_init_pci(NvmeCtrl *n, PCIDevice *pci_dev, Error **errp) pcie_ari_init(pci_dev, 0x100); } - /* add one to max_ioqpairs to account for the admin queue pair */ - bar_size = nvme_bar_size(n->params.max_ioqpairs + 1, n->params.msix_qsize, - &msix_table_offset, &msix_pba_offset); - - memory_region_init(&n->bar0, OBJECT(n), "nvme-bar0", bar_size); - memory_region_init_io(&n->iomem, OBJECT(n), &nvme_mmio_ops, n, "nvme", - msix_table_offset); - memory_region_add_subregion(&n->bar0, 0, &n->iomem); - - if (pci_is_vf(pci_dev)) { - pcie_sriov_vf_register_bar(pci_dev, 0, &n->bar0); - } else { + if (n->params.msix_exclusive_bar && !pci_is_vf(pci_dev)) { + bar_size = nvme_mbar_size(n->params.max_ioqpairs + 1, 0, NULL, NULL); + memory_region_init_io(&n->iomem, OBJECT(n), &nvme_mmio_ops, n, "nvme", + bar_size); pci_register_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY | - PCI_BASE_ADDRESS_MEM_TYPE_64, &n->bar0); + PCI_BASE_ADDRESS_MEM_TYPE_64, &n->iomem); + ret = msix_init_exclusive_bar(pci_dev, n->params.msix_qsize, 4, errp); + } else { + assert(n->params.msix_qsize >= 1); + + /* add one to max_ioqpairs to account for the admin queue pair */ + bar_size = nvme_mbar_size(n->params.max_ioqpairs + 1, + n->params.msix_qsize, &msix_table_offset, + &msix_pba_offset); + + memory_region_init(&n->bar0, OBJECT(n), "nvme-bar0", bar_size); + memory_region_init_io(&n->iomem, OBJECT(n), &nvme_mmio_ops, n, "nvme", + msix_table_offset); + memory_region_add_subregion(&n->bar0, 0, &n->iomem); + + if (pci_is_vf(pci_dev)) { + pcie_sriov_vf_register_bar(pci_dev, 0, &n->bar0); + } else { + pci_register_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY | + PCI_BASE_ADDRESS_MEM_TYPE_64, &n->bar0); + } + + ret = msix_init(pci_dev, n->params.msix_qsize, + &n->bar0, 0, msix_table_offset, + &n->bar0, 0, msix_pba_offset, 0, errp); } - ret = msix_init(pci_dev, n->params.msix_qsize, - &n->bar0, 0, msix_table_offset, - &n->bar0, 0, msix_pba_offset, 0, errp); + if (ret == -ENOTSUP) { /* report that msix is not supported, but do not error out */ warn_report_err(*errp); @@ -8309,9 +8344,15 @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp) if (pci_is_vf(pci_dev)) { /* * VFs derive settings from the parent. PF's lifespan exceeds - * that of VF's, so it's safe to share params.serial. + * that of VF's. */ memcpy(&n->params, &pn->params, sizeof(NvmeParams)); + + /* + * Set PF's serial value to a new string memory to prevent 'serial' + * property object release of PF when a VF is removed from the system. + */ + n->params.serial = g_strdup(pn->params.serial); n->subsys = pn->subsys; } @@ -8412,6 +8453,8 @@ static Property nvme_props[] = { params.sriov_max_vi_per_vf, 0), DEFINE_PROP_UINT8("sriov_max_vq_per_vf", NvmeCtrl, params.sriov_max_vq_per_vf, 0), + DEFINE_PROP_BOOL("msix-exclusive-bar", NvmeCtrl, params.msix_exclusive_bar, + false), DEFINE_PROP_END_OF_LIST(), }; diff --git a/hw/nvme/meson.build b/hw/nvme/meson.build index 1a6a2ca2f3..7d5caa53c2 100644 --- a/hw/nvme/meson.build +++ b/hw/nvme/meson.build @@ -1 +1 @@ -system_ss.add(when: 'CONFIG_NVME_PCI', if_true: files('ctrl.c', 'dif.c', 'ns.c', 'subsys.c')) +system_ss.add(when: 'CONFIG_NVME_PCI', if_true: files('ctrl.c', 'dif.c', 'ns.c', 'subsys.c', 'nguid.c')) \ No newline at end of file diff --git a/hw/nvme/nguid.c b/hw/nvme/nguid.c new file mode 100644 index 0000000000..829832bd9f --- /dev/null +++ b/hw/nvme/nguid.c @@ -0,0 +1,187 @@ +/* + * QEMU NVMe NGUID functions + * + * Copyright 2024 Google LLC + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + */ + +#include "qemu/osdep.h" +#include "qapi/visitor.h" +#include "qemu/ctype.h" +#include "nvme.h" + +#define NGUID_SEPARATOR '-' + +#define NGUID_VALUE_AUTO "auto" + +#define NGUID_FMT \ + "%02hhx%02hhx%02hhx%02hhx" \ + "%02hhx%02hhx%02hhx%02hhx" \ + "%02hhx%02hhx%02hhx%02hhx" \ + "%02hhx%02hhx%02hhx%02hhx" + +#define NGUID_STR_LEN (2 * NGUID_LEN + 1) + +bool nvme_nguid_is_null(const NvmeNGUID *nguid) +{ + static NvmeNGUID null_nguid; + return memcmp(nguid, &null_nguid, sizeof(NvmeNGUID)) == 0; +} + +static void nvme_nguid_generate(NvmeNGUID *out) +{ + int i; + uint32_t x; + + QEMU_BUILD_BUG_ON((NGUID_LEN % sizeof(x)) != 0); + + for (i = 0; i < NGUID_LEN; i += sizeof(x)) { + x = g_random_int(); + memcpy(&out->data[i], &x, sizeof(x)); + } +} + +/* + * The Linux Kernel typically prints the NGUID of an NVMe namespace using the + * same format as the UUID. For instance: + * + * $ cat /sys/class/block/nvme0n1/nguid + * e9accd3b-8390-4e13-167c-f0593437f57d + * + * When there is no UUID but there is NGUID the Kernel will print the NGUID as + * wwid and it won't use the UUID format: + * + * $ cat /sys/class/block/nvme0n1/wwid + * eui.e9accd3b83904e13167cf0593437f57d + * + * The NGUID has different fields compared to the UUID, so the grouping used in + * the UUID format has no relation with the 3 fields of the NGUID. + * + * This implementation won't expect a strict format as the UUID one and instead + * it will admit any string of hexadecimal digits. Byte groups could be created + * using the '-' separator. The number of bytes needs to be exactly 16 and the + * separator '-' has to be exactly in a byte boundary. The following are + * examples of accepted formats for the NGUID string: + * + * nguid="e9accd3b-8390-4e13-167c-f0593437f57d" + * nguid="e9accd3b83904e13167cf0593437f57d" + * nguid="FEDCBA9876543210-ABCDEF-0123456789" + */ +static bool nvme_nguid_is_valid(const char *str) +{ + int i; + int digit_count = 0; + + for (i = 0; i < strlen(str); i++) { + const char c = str[i]; + if (qemu_isxdigit(c)) { + digit_count++; + continue; + } + if (c == NGUID_SEPARATOR) { + /* + * We need to make sure the separator is in a byte boundary, the + * string does not start with the separator and they are not back to + * back "--". + */ + if ((i > 0) && (str[i - 1] != NGUID_SEPARATOR) && + (digit_count % 2) == 0) { + continue; + } + } + return false; + } + /* + * The string should have the correct byte length and not finish with the + * separator + */ + return (digit_count == (2 * NGUID_LEN)) && (str[i - 1] != NGUID_SEPARATOR); +} + +static int nvme_nguid_parse(const char *str, NvmeNGUID *nguid) +{ + uint8_t *id = &nguid->data[0]; + int ret = 0; + int i; + const char *ptr = str; + + if (!nvme_nguid_is_valid(str)) { + return -1; + } + + for (i = 0; i < NGUID_LEN; i++) { + ret = sscanf(ptr, "%02hhx", &id[i]); + if (ret != 1) { + return -1; + } + ptr += 2; + if (*ptr == NGUID_SEPARATOR) { + ptr++; + } + } + + return 0; +} + +/* + * When converted back to string this implementation will use a raw hex number + * with no separators, for instance: + * + * "e9accd3b83904e13167cf0593437f57d" + */ +static void nvme_nguid_stringify(const NvmeNGUID *nguid, char *out) +{ + const uint8_t *id = &nguid->data[0]; + snprintf(out, NGUID_STR_LEN, NGUID_FMT, + id[0], id[1], id[2], id[3], id[4], id[5], id[6], id[7], + id[8], id[9], id[10], id[11], id[12], id[13], id[14], id[15]); +} + +static void get_nguid(Object *obj, Visitor *v, const char *name, void *opaque, + Error **errp) +{ + Property *prop = opaque; + NvmeNGUID *nguid = object_field_prop_ptr(obj, prop); + char buffer[NGUID_STR_LEN]; + char *p = buffer; + + nvme_nguid_stringify(nguid, buffer); + + visit_type_str(v, name, &p, errp); +} + +static void set_nguid(Object *obj, Visitor *v, const char *name, void *opaque, + Error **errp) +{ + Property *prop = opaque; + NvmeNGUID *nguid = object_field_prop_ptr(obj, prop); + char *str; + + if (!visit_type_str(v, name, &str, errp)) { + return; + } + + if (!strcmp(str, NGUID_VALUE_AUTO)) { + nvme_nguid_generate(nguid); + } else if (nvme_nguid_parse(str, nguid) < 0) { + error_set_from_qdev_prop_error(errp, EINVAL, obj, name, str); + } + g_free(str); +} + +const PropertyInfo qdev_prop_nguid = { + .name = "str", + .description = + "NGUID or \"" NGUID_VALUE_AUTO "\" for random value", + .get = get_nguid, + .set = set_nguid, +}; diff --git a/hw/nvme/ns.c b/hw/nvme/ns.c index 0eabcf5cf5..ea8db175db 100644 --- a/hw/nvme/ns.c +++ b/hw/nvme/ns.c @@ -89,6 +89,7 @@ static int nvme_ns_init(NvmeNamespace *ns, Error **errp) id_ns->mcl = cpu_to_le32(ns->params.mcl); id_ns->msrc = ns->params.msrc; id_ns->eui64 = cpu_to_be64(ns->params.eui64); + memcpy(&id_ns->nguid, &ns->params.nguid.data, sizeof(id_ns->nguid)); ds = 31 - clz32(ns->blkconf.logical_block_size); ms = ns->params.ms; @@ -797,6 +798,7 @@ static Property nvme_ns_props[] = { DEFINE_PROP_BOOL("shared", NvmeNamespace, params.shared, true), DEFINE_PROP_UINT32("nsid", NvmeNamespace, params.nsid, 0), DEFINE_PROP_UUID_NODEFAULT("uuid", NvmeNamespace, params.uuid), + DEFINE_PROP_NGUID_NODEFAULT("nguid", NvmeNamespace, params.nguid), DEFINE_PROP_UINT64("eui64", NvmeNamespace, params.eui64, 0), DEFINE_PROP_UINT16("ms", NvmeNamespace, params.ms, 0), DEFINE_PROP_UINT8("mset", NvmeNamespace, params.mset, 0), diff --git a/hw/nvme/nvme.h b/hw/nvme/nvme.h index 5f2ae7b28b..bed8191bd5 100644 --- a/hw/nvme/nvme.h +++ b/hw/nvme/nvme.h @@ -171,13 +171,27 @@ static const uint8_t nvme_fdp_evf_shifts[FDP_EVT_MAX] = { [FDP_EVT_RUH_IMPLICIT_RU_CHANGE] = 33, }; +#define NGUID_LEN 16 + +typedef struct { + uint8_t data[NGUID_LEN]; +} NvmeNGUID; + +bool nvme_nguid_is_null(const NvmeNGUID *nguid); + +extern const PropertyInfo qdev_prop_nguid; + +#define DEFINE_PROP_NGUID_NODEFAULT(_name, _state, _field) \ + DEFINE_PROP(_name, _state, _field, qdev_prop_nguid, NvmeNGUID) + typedef struct NvmeNamespaceParams { - bool detached; - bool shared; - uint32_t nsid; - QemuUUID uuid; - uint64_t eui64; - bool eui64_default; + bool detached; + bool shared; + uint32_t nsid; + QemuUUID uuid; + NvmeNGUID nguid; + uint64_t eui64; + bool eui64_default; uint16_t ms; uint8_t mset; @@ -522,6 +536,7 @@ typedef struct NvmeParams { uint16_t sriov_vi_flexible; uint8_t sriov_max_vq_per_vf; uint8_t sriov_max_vi_per_vf; + bool msix_exclusive_bar; } NvmeParams; typedef struct NvmeCtrl {