hw/nvme updates

-----BEGIN PGP SIGNATURE-----
 
 iQEzBAABCgAdFiEEUigzqnXi3OaiR2bATeGvMW1PDekFAmXwj+wACgkQTeGvMW1P
 DelOsAf+Jg51zf3vtWpe4MS/WtULjSr5GtnXMJ5hkHS0WdKOiLW3P+pUZXbsohmh
 faVlYeCWptF1CFGfxBf4Trc7XzJy8J6W1YJEofs/07hIAnazo9pwk5shoVu4oiex
 HVsBg7/9y7DuiEEg1MRvVvW895cP60WmG1AqU63SYwrVgxZ51ZH0XNuyRhQeYC/6
 OSXJ3FDYu2iJQ58uEzGEwv8vhskIpEFTdz0J6gQVxIdzFBbuk87VgZo6pqwgfMBm
 /65K85TgFBT4SASc7a2iSUv+iAqSCA6Jdy0VWxCYCikiv5nuPCMCrlbvqcVp+i2B
 GKtgfFXhtgepxx6jmYd03EkRjCrxUA==
 =W3gg
 -----END PGP SIGNATURE-----

Merge tag 'nvme-next-pull-request' of https://gitlab.com/birkelund/qemu into staging

hw/nvme updates

# -----BEGIN PGP SIGNATURE-----
#
# iQEzBAABCgAdFiEEUigzqnXi3OaiR2bATeGvMW1PDekFAmXwj+wACgkQTeGvMW1P
# DelOsAf+Jg51zf3vtWpe4MS/WtULjSr5GtnXMJ5hkHS0WdKOiLW3P+pUZXbsohmh
# faVlYeCWptF1CFGfxBf4Trc7XzJy8J6W1YJEofs/07hIAnazo9pwk5shoVu4oiex
# HVsBg7/9y7DuiEEg1MRvVvW895cP60WmG1AqU63SYwrVgxZ51ZH0XNuyRhQeYC/6
# OSXJ3FDYu2iJQ58uEzGEwv8vhskIpEFTdz0J6gQVxIdzFBbuk87VgZo6pqwgfMBm
# /65K85TgFBT4SASc7a2iSUv+iAqSCA6Jdy0VWxCYCikiv5nuPCMCrlbvqcVp+i2B
# GKtgfFXhtgepxx6jmYd03EkRjCrxUA==
# =W3gg
# -----END PGP SIGNATURE-----
# gpg: Signature made Tue 12 Mar 2024 17:25:00 GMT
# gpg:                using RSA key 522833AA75E2DCE6A24766C04DE1AF316D4F0DE9
# gpg: Good signature from "Klaus Jensen <its@irrelevant.dk>" [full]
# gpg:                 aka "Klaus Jensen <k.jensen@samsung.com>" [full]
# Primary key fingerprint: DDCA 4D9C 9EF9 31CC 3468  4272 63D5 6FC5 E55D A838
#      Subkey fingerprint: 5228 33AA 75E2 DCE6 A247  66C0 4DE1 AF31 6D4F 0DE9

* tag 'nvme-next-pull-request' of https://gitlab.com/birkelund/qemu:
  hw/nvme: add machine compatibility parameter to enable msix exclusive bar
  hw/nvme: generalize the mbar size helper
  hw/nvme: Add NVMe NGUID property
  MAINTAINERS: add Jesper as reviewer on hw/nvme
  hw/nvme: fix invalid check on mcl
  hw/nvme: separate 'serial' property for VFs

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
Peter Maydell 2024-03-12 21:32:51 +00:00
commit 7e52d0b7c1
8 changed files with 291 additions and 35 deletions

View File

@ -2407,6 +2407,7 @@ F: docs/system/devices/virtio-snd.rst
nvme
M: Keith Busch <kbusch@kernel.org>
M: Klaus Jensen <its@irrelevant.dk>
R: Jesper Devantier <foss@defmacro.it>
L: qemu-block@nongnu.org
S: Supported
F: hw/nvme/*

View File

@ -81,6 +81,13 @@ There are a number of parameters available:
Set the UUID of the namespace. This will be reported as a "Namespace UUID"
descriptor in the Namespace Identification Descriptor List.
``nguid``
Set the NGUID of the namespace. This will be reported as a "Namespace Globally
Unique Identifier" descriptor in the Namespace Identification Descriptor List.
It is specified as a string of hexadecimal digits containing exactly 16 bytes
or "auto" for a random value. An optional '-' separator could be used to group
bytes. If not specified the NGUID will remain all zeros.
``eui64``
Set the EUI-64 of the namespace. This will be reported as a "IEEE Extended
Unique Identifier" descriptor in the Namespace Identification Descriptor List.

View File

@ -102,6 +102,7 @@ GlobalProperty hw_compat_5_2[] = {
{ "PIIX4_PM", "smm-compat", "on"},
{ "virtio-blk-device", "report-discard-granularity", "off" },
{ "virtio-net-pci-base", "vectors", "3"},
{ "nvme", "msix-exclusive-bar", "on"},
};
const size_t hw_compat_5_2_len = G_N_ELEMENTS(hw_compat_5_2);

View File

@ -2855,7 +2855,7 @@ static inline uint16_t nvme_check_copy_mcl(NvmeNamespace *ns,
uint32_t nlb;
nvme_copy_source_range_parse(iocb->ranges, idx, iocb->format, NULL,
&nlb, NULL, NULL, NULL);
copy_len += nlb + 1;
copy_len += nlb;
}
if (copy_len > ns->id_ns.mcl) {
@ -5640,6 +5640,10 @@ static uint16_t nvme_identify_ns_descr_list(NvmeCtrl *n, NvmeRequest *req)
NvmeIdNsDescr hdr;
uint8_t v[NVME_NIDL_UUID];
} QEMU_PACKED uuid = {};
struct {
NvmeIdNsDescr hdr;
uint8_t v[NVME_NIDL_NGUID];
} QEMU_PACKED nguid = {};
struct {
NvmeIdNsDescr hdr;
uint64_t v;
@ -5668,6 +5672,14 @@ static uint16_t nvme_identify_ns_descr_list(NvmeCtrl *n, NvmeRequest *req)
pos += sizeof(uuid);
}
if (!nvme_nguid_is_null(&ns->params.nguid)) {
nguid.hdr.nidt = NVME_NIDT_NGUID;
nguid.hdr.nidl = NVME_NIDL_NGUID;
memcpy(nguid.v, ns->params.nguid.data, NVME_NIDL_NGUID);
memcpy(pos, &nguid, sizeof(nguid));
pos += sizeof(nguid);
}
if (ns->params.eui64) {
eui64.hdr.nidt = NVME_NIDT_EUI64;
eui64.hdr.nidl = NVME_NIDL_EUI64;
@ -7798,6 +7810,11 @@ static bool nvme_check_params(NvmeCtrl *n, Error **errp)
}
if (n->pmr.dev) {
if (params->msix_exclusive_bar) {
error_setg(errp, "not enough BARs available to enable PMR");
return false;
}
if (host_memory_backend_is_mapped(n->pmr.dev)) {
error_setg(errp, "can't use already busy memdev: %s",
object_get_canonical_path_component(OBJECT(n->pmr.dev)));
@ -8003,13 +8020,18 @@ static void nvme_init_pmr(NvmeCtrl *n, PCIDevice *pci_dev)
memory_region_set_enabled(&n->pmr.dev->mr, false);
}
static uint64_t nvme_bar_size(unsigned total_queues, unsigned total_irqs,
unsigned *msix_table_offset,
unsigned *msix_pba_offset)
static uint64_t nvme_mbar_size(unsigned total_queues, unsigned total_irqs,
unsigned *msix_table_offset,
unsigned *msix_pba_offset)
{
uint64_t bar_size, msix_table_size, msix_pba_size;
uint64_t bar_size, msix_table_size;
bar_size = sizeof(NvmeBar) + 2 * total_queues * NVME_DB_SIZE;
if (total_irqs == 0) {
goto out;
}
bar_size = QEMU_ALIGN_UP(bar_size, 4 * KiB);
if (msix_table_offset) {
@ -8024,11 +8046,10 @@ static uint64_t nvme_bar_size(unsigned total_queues, unsigned total_irqs,
*msix_pba_offset = bar_size;
}
msix_pba_size = QEMU_ALIGN_UP(total_irqs, 64) / 8;
bar_size += msix_pba_size;
bar_size += QEMU_ALIGN_UP(total_irqs, 64) / 8;
bar_size = pow2ceil(bar_size);
return bar_size;
out:
return pow2ceil(bar_size);
}
static void nvme_init_sriov(NvmeCtrl *n, PCIDevice *pci_dev, uint16_t offset)
@ -8036,7 +8057,7 @@ static void nvme_init_sriov(NvmeCtrl *n, PCIDevice *pci_dev, uint16_t offset)
uint16_t vf_dev_id = n->params.use_intel_id ?
PCI_DEVICE_ID_INTEL_NVME : PCI_DEVICE_ID_REDHAT_NVME;
NvmePriCtrlCap *cap = &n->pri_ctrl_cap;
uint64_t bar_size = nvme_bar_size(le16_to_cpu(cap->vqfrsm),
uint64_t bar_size = nvme_mbar_size(le16_to_cpu(cap->vqfrsm),
le16_to_cpu(cap->vifrsm),
NULL, NULL);
@ -8075,7 +8096,7 @@ static bool nvme_init_pci(NvmeCtrl *n, PCIDevice *pci_dev, Error **errp)
ERRP_GUARD();
uint8_t *pci_conf = pci_dev->config;
uint64_t bar_size;
unsigned msix_table_offset, msix_pba_offset;
unsigned msix_table_offset = 0, msix_pba_offset = 0;
int ret;
pci_conf[PCI_INTERRUPT_PIN] = 1;
@ -8097,24 +8118,38 @@ static bool nvme_init_pci(NvmeCtrl *n, PCIDevice *pci_dev, Error **errp)
pcie_ari_init(pci_dev, 0x100);
}
/* add one to max_ioqpairs to account for the admin queue pair */
bar_size = nvme_bar_size(n->params.max_ioqpairs + 1, n->params.msix_qsize,
&msix_table_offset, &msix_pba_offset);
memory_region_init(&n->bar0, OBJECT(n), "nvme-bar0", bar_size);
memory_region_init_io(&n->iomem, OBJECT(n), &nvme_mmio_ops, n, "nvme",
msix_table_offset);
memory_region_add_subregion(&n->bar0, 0, &n->iomem);
if (pci_is_vf(pci_dev)) {
pcie_sriov_vf_register_bar(pci_dev, 0, &n->bar0);
} else {
if (n->params.msix_exclusive_bar && !pci_is_vf(pci_dev)) {
bar_size = nvme_mbar_size(n->params.max_ioqpairs + 1, 0, NULL, NULL);
memory_region_init_io(&n->iomem, OBJECT(n), &nvme_mmio_ops, n, "nvme",
bar_size);
pci_register_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY |
PCI_BASE_ADDRESS_MEM_TYPE_64, &n->bar0);
PCI_BASE_ADDRESS_MEM_TYPE_64, &n->iomem);
ret = msix_init_exclusive_bar(pci_dev, n->params.msix_qsize, 4, errp);
} else {
assert(n->params.msix_qsize >= 1);
/* add one to max_ioqpairs to account for the admin queue pair */
bar_size = nvme_mbar_size(n->params.max_ioqpairs + 1,
n->params.msix_qsize, &msix_table_offset,
&msix_pba_offset);
memory_region_init(&n->bar0, OBJECT(n), "nvme-bar0", bar_size);
memory_region_init_io(&n->iomem, OBJECT(n), &nvme_mmio_ops, n, "nvme",
msix_table_offset);
memory_region_add_subregion(&n->bar0, 0, &n->iomem);
if (pci_is_vf(pci_dev)) {
pcie_sriov_vf_register_bar(pci_dev, 0, &n->bar0);
} else {
pci_register_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY |
PCI_BASE_ADDRESS_MEM_TYPE_64, &n->bar0);
}
ret = msix_init(pci_dev, n->params.msix_qsize,
&n->bar0, 0, msix_table_offset,
&n->bar0, 0, msix_pba_offset, 0, errp);
}
ret = msix_init(pci_dev, n->params.msix_qsize,
&n->bar0, 0, msix_table_offset,
&n->bar0, 0, msix_pba_offset, 0, errp);
if (ret == -ENOTSUP) {
/* report that msix is not supported, but do not error out */
warn_report_err(*errp);
@ -8309,9 +8344,15 @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp)
if (pci_is_vf(pci_dev)) {
/*
* VFs derive settings from the parent. PF's lifespan exceeds
* that of VF's, so it's safe to share params.serial.
* that of VF's.
*/
memcpy(&n->params, &pn->params, sizeof(NvmeParams));
/*
* Set PF's serial value to a new string memory to prevent 'serial'
* property object release of PF when a VF is removed from the system.
*/
n->params.serial = g_strdup(pn->params.serial);
n->subsys = pn->subsys;
}
@ -8412,6 +8453,8 @@ static Property nvme_props[] = {
params.sriov_max_vi_per_vf, 0),
DEFINE_PROP_UINT8("sriov_max_vq_per_vf", NvmeCtrl,
params.sriov_max_vq_per_vf, 0),
DEFINE_PROP_BOOL("msix-exclusive-bar", NvmeCtrl, params.msix_exclusive_bar,
false),
DEFINE_PROP_END_OF_LIST(),
};

View File

@ -1 +1 @@
system_ss.add(when: 'CONFIG_NVME_PCI', if_true: files('ctrl.c', 'dif.c', 'ns.c', 'subsys.c'))
system_ss.add(when: 'CONFIG_NVME_PCI', if_true: files('ctrl.c', 'dif.c', 'ns.c', 'subsys.c', 'nguid.c'))

187
hw/nvme/nguid.c Normal file
View File

@ -0,0 +1,187 @@
/*
* QEMU NVMe NGUID functions
*
* Copyright 2024 Google LLC
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* for more details.
*/
#include "qemu/osdep.h"
#include "qapi/visitor.h"
#include "qemu/ctype.h"
#include "nvme.h"
#define NGUID_SEPARATOR '-'
#define NGUID_VALUE_AUTO "auto"
#define NGUID_FMT \
"%02hhx%02hhx%02hhx%02hhx" \
"%02hhx%02hhx%02hhx%02hhx" \
"%02hhx%02hhx%02hhx%02hhx" \
"%02hhx%02hhx%02hhx%02hhx"
#define NGUID_STR_LEN (2 * NGUID_LEN + 1)
bool nvme_nguid_is_null(const NvmeNGUID *nguid)
{
static NvmeNGUID null_nguid;
return memcmp(nguid, &null_nguid, sizeof(NvmeNGUID)) == 0;
}
static void nvme_nguid_generate(NvmeNGUID *out)
{
int i;
uint32_t x;
QEMU_BUILD_BUG_ON((NGUID_LEN % sizeof(x)) != 0);
for (i = 0; i < NGUID_LEN; i += sizeof(x)) {
x = g_random_int();
memcpy(&out->data[i], &x, sizeof(x));
}
}
/*
* The Linux Kernel typically prints the NGUID of an NVMe namespace using the
* same format as the UUID. For instance:
*
* $ cat /sys/class/block/nvme0n1/nguid
* e9accd3b-8390-4e13-167c-f0593437f57d
*
* When there is no UUID but there is NGUID the Kernel will print the NGUID as
* wwid and it won't use the UUID format:
*
* $ cat /sys/class/block/nvme0n1/wwid
* eui.e9accd3b83904e13167cf0593437f57d
*
* The NGUID has different fields compared to the UUID, so the grouping used in
* the UUID format has no relation with the 3 fields of the NGUID.
*
* This implementation won't expect a strict format as the UUID one and instead
* it will admit any string of hexadecimal digits. Byte groups could be created
* using the '-' separator. The number of bytes needs to be exactly 16 and the
* separator '-' has to be exactly in a byte boundary. The following are
* examples of accepted formats for the NGUID string:
*
* nguid="e9accd3b-8390-4e13-167c-f0593437f57d"
* nguid="e9accd3b83904e13167cf0593437f57d"
* nguid="FEDCBA9876543210-ABCDEF-0123456789"
*/
static bool nvme_nguid_is_valid(const char *str)
{
int i;
int digit_count = 0;
for (i = 0; i < strlen(str); i++) {
const char c = str[i];
if (qemu_isxdigit(c)) {
digit_count++;
continue;
}
if (c == NGUID_SEPARATOR) {
/*
* We need to make sure the separator is in a byte boundary, the
* string does not start with the separator and they are not back to
* back "--".
*/
if ((i > 0) && (str[i - 1] != NGUID_SEPARATOR) &&
(digit_count % 2) == 0) {
continue;
}
}
return false;
}
/*
* The string should have the correct byte length and not finish with the
* separator
*/
return (digit_count == (2 * NGUID_LEN)) && (str[i - 1] != NGUID_SEPARATOR);
}
static int nvme_nguid_parse(const char *str, NvmeNGUID *nguid)
{
uint8_t *id = &nguid->data[0];
int ret = 0;
int i;
const char *ptr = str;
if (!nvme_nguid_is_valid(str)) {
return -1;
}
for (i = 0; i < NGUID_LEN; i++) {
ret = sscanf(ptr, "%02hhx", &id[i]);
if (ret != 1) {
return -1;
}
ptr += 2;
if (*ptr == NGUID_SEPARATOR) {
ptr++;
}
}
return 0;
}
/*
* When converted back to string this implementation will use a raw hex number
* with no separators, for instance:
*
* "e9accd3b83904e13167cf0593437f57d"
*/
static void nvme_nguid_stringify(const NvmeNGUID *nguid, char *out)
{
const uint8_t *id = &nguid->data[0];
snprintf(out, NGUID_STR_LEN, NGUID_FMT,
id[0], id[1], id[2], id[3], id[4], id[5], id[6], id[7],
id[8], id[9], id[10], id[11], id[12], id[13], id[14], id[15]);
}
static void get_nguid(Object *obj, Visitor *v, const char *name, void *opaque,
Error **errp)
{
Property *prop = opaque;
NvmeNGUID *nguid = object_field_prop_ptr(obj, prop);
char buffer[NGUID_STR_LEN];
char *p = buffer;
nvme_nguid_stringify(nguid, buffer);
visit_type_str(v, name, &p, errp);
}
static void set_nguid(Object *obj, Visitor *v, const char *name, void *opaque,
Error **errp)
{
Property *prop = opaque;
NvmeNGUID *nguid = object_field_prop_ptr(obj, prop);
char *str;
if (!visit_type_str(v, name, &str, errp)) {
return;
}
if (!strcmp(str, NGUID_VALUE_AUTO)) {
nvme_nguid_generate(nguid);
} else if (nvme_nguid_parse(str, nguid) < 0) {
error_set_from_qdev_prop_error(errp, EINVAL, obj, name, str);
}
g_free(str);
}
const PropertyInfo qdev_prop_nguid = {
.name = "str",
.description =
"NGUID or \"" NGUID_VALUE_AUTO "\" for random value",
.get = get_nguid,
.set = set_nguid,
};

View File

@ -89,6 +89,7 @@ static int nvme_ns_init(NvmeNamespace *ns, Error **errp)
id_ns->mcl = cpu_to_le32(ns->params.mcl);
id_ns->msrc = ns->params.msrc;
id_ns->eui64 = cpu_to_be64(ns->params.eui64);
memcpy(&id_ns->nguid, &ns->params.nguid.data, sizeof(id_ns->nguid));
ds = 31 - clz32(ns->blkconf.logical_block_size);
ms = ns->params.ms;
@ -797,6 +798,7 @@ static Property nvme_ns_props[] = {
DEFINE_PROP_BOOL("shared", NvmeNamespace, params.shared, true),
DEFINE_PROP_UINT32("nsid", NvmeNamespace, params.nsid, 0),
DEFINE_PROP_UUID_NODEFAULT("uuid", NvmeNamespace, params.uuid),
DEFINE_PROP_NGUID_NODEFAULT("nguid", NvmeNamespace, params.nguid),
DEFINE_PROP_UINT64("eui64", NvmeNamespace, params.eui64, 0),
DEFINE_PROP_UINT16("ms", NvmeNamespace, params.ms, 0),
DEFINE_PROP_UINT8("mset", NvmeNamespace, params.mset, 0),

View File

@ -171,13 +171,27 @@ static const uint8_t nvme_fdp_evf_shifts[FDP_EVT_MAX] = {
[FDP_EVT_RUH_IMPLICIT_RU_CHANGE] = 33,
};
#define NGUID_LEN 16
typedef struct {
uint8_t data[NGUID_LEN];
} NvmeNGUID;
bool nvme_nguid_is_null(const NvmeNGUID *nguid);
extern const PropertyInfo qdev_prop_nguid;
#define DEFINE_PROP_NGUID_NODEFAULT(_name, _state, _field) \
DEFINE_PROP(_name, _state, _field, qdev_prop_nguid, NvmeNGUID)
typedef struct NvmeNamespaceParams {
bool detached;
bool shared;
uint32_t nsid;
QemuUUID uuid;
uint64_t eui64;
bool eui64_default;
bool detached;
bool shared;
uint32_t nsid;
QemuUUID uuid;
NvmeNGUID nguid;
uint64_t eui64;
bool eui64_default;
uint16_t ms;
uint8_t mset;
@ -522,6 +536,7 @@ typedef struct NvmeParams {
uint16_t sriov_vi_flexible;
uint8_t sriov_max_vq_per_vf;
uint8_t sriov_max_vi_per_vf;
bool msix_exclusive_bar;
} NvmeParams;
typedef struct NvmeCtrl {