Emulated NVMe device updates

* deallocate or unwritten logical block error feature (me)
   * dataset management command (me)
   * compare command (Gollu Appalanaidu)
   * namespace types (Niklas Cassel)
   * zoned namespaces (Dmitry Fomichev)
   * smart critical warning toggle (Zhenwei Pi)
   * allow cmb and pmr to coexist (me)
   * pmr rds/wds support (Naveen Nagar)
   * cmb v1.4 logic (Padmakar Kalghatgi)
 
 And a lot of smaller fixes from Gollu Appalanaidu and Minwoo Im.
 -----BEGIN PGP SIGNATURE-----
 
 iQEzBAABCAAdFiEEUigzqnXi3OaiR2bATeGvMW1PDekFAmAiON4ACgkQTeGvMW1P
 DenhxQf/WzoiO5bXmvZeRv+IHoIn5GhJ1NxLRYO5MX0diswh0BBwUmIscbEDMe1b
 GsD6rpd9YfEO80L/sEGqgV09HT+6e8YwDsNNZBTAsUNRVx0WxckgcGWcrzNNA9Nc
 WEl0Q8si5USSQ1C7djplLXdR6p4pbA/gIk6AjNIo3q2VK1ZqCBhQGESGEfGgrAXW
 xWo8C1V8dnKdxUYI2blbti44sElHZJ6jcF5N3Xmv0UUa1WL0hh0u6qr7IbCZe1kO
 SUFWMIGLF+1C35MUyWpgCjCn5cUdnTA0s/SLEuWtDlNYRhRRh0D6LZviVTZi38Wx
 6Cxg/bRkSlcKo1/jswwYcAaH7qQ4Eg==
 =NB9D
 -----END PGP SIGNATURE-----

Merge remote-tracking branch 'remotes/nvme/tags/nvme-next-pull-request' into staging

Emulated NVMe device updates

  * deallocate or unwritten logical block error feature (me)
  * dataset management command (me)
  * compare command (Gollu Appalanaidu)
  * namespace types (Niklas Cassel)
  * zoned namespaces (Dmitry Fomichev)
  * smart critical warning toggle (Zhenwei Pi)
  * allow cmb and pmr to coexist (me)
  * pmr rds/wds support (Naveen Nagar)
  * cmb v1.4 logic (Padmakar Kalghatgi)

And a lot of smaller fixes from Gollu Appalanaidu and Minwoo Im.

# gpg: Signature made Tue 09 Feb 2021 07:25:18 GMT
# gpg:                using RSA key 522833AA75E2DCE6A24766C04DE1AF316D4F0DE9
# gpg: Good signature from "Klaus Jensen <its@irrelevant.dk>" [unknown]
# gpg:                 aka "Klaus Jensen <k.jensen@samsung.com>" [unknown]
# gpg: WARNING: This key is not certified with a trusted signature!
# gpg:          There is no indication that the signature belongs to the owner.
# Primary key fingerprint: DDCA 4D9C 9EF9 31CC 3468  4272 63D5 6FC5 E55D A838
#      Subkey fingerprint: 5228 33AA 75E2 DCE6 A247  66C0 4DE1 AF31 6D4F 0DE9

* remotes/nvme/tags/nvme-next-pull-request: (56 commits)
  hw/block/nvme: refactor the logic for zone write checks
  hw/block/nvme: fix zone boundary check for append
  hw/block/nvme: fix wrong parameter name 'cross_read'
  hw/block/nvme: align with existing style
  hw/block/nvme: fix set feature save field check
  hw/block/nvme: fix set feature for error recovery
  hw/block/nvme: error if drive less than a zone size
  hw/block/nvme: lift cmb restrictions
  hw/block/nvme: bump to v1.4
  hw/block/nvme: move cmb logic to v1.4
  hw/block/nvme: add PMR RDS/WDS support
  hw/block/nvme: disable PMR at boot up
  hw/block/nvme: remove redundant zeroing of PMR registers
  hw/block/nvme: rename PMR/CMB shift/mask fields
  hw/block/nvme: allow cmb and pmr to coexist
  hw/block/nvme: move msix table and pba to BAR 0
  hw/block/nvme: indicate CMB support through controller capabilities register
  hw/block/nvme: fix 64 bit register hi/lo split writes
  hw/block/nvme: add size to mmio read/write trace events
  hw/block/nvme: trigger async event during injecting smart warning
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
Peter Maydell 2021-02-09 13:24:37 +00:00
commit 1214d55d1c
6 changed files with 2823 additions and 333 deletions

View File

@ -16,6 +16,7 @@
#include "qemu/units.h"
#include "qemu/cutils.h"
#include "qemu/log.h"
#include "qemu/error-report.h"
#include "hw/block/block.h"
#include "hw/pci/pci.h"
#include "sysemu/sysemu.h"
@ -25,28 +26,47 @@
#include "hw/qdev-properties.h"
#include "hw/qdev-core.h"
#include "trace.h"
#include "nvme.h"
#include "nvme-ns.h"
static void nvme_ns_init(NvmeNamespace *ns)
#define MIN_DISCARD_GRANULARITY (4 * KiB)
static int nvme_ns_init(NvmeNamespace *ns, Error **errp)
{
BlockDriverInfo bdi;
NvmeIdNs *id_ns = &ns->id_ns;
int lba_index = NVME_ID_NS_FLBAS_INDEX(ns->id_ns.flbas);
int npdg;
if (blk_get_flags(ns->blkconf.blk) & BDRV_O_UNMAP) {
ns->id_ns.dlfeat = 0x9;
}
id_ns->lbaf[lba_index].ds = 31 - clz32(ns->blkconf.logical_block_size);
id_ns->nsze = cpu_to_le64(nvme_ns_nlbas(ns));
ns->csi = NVME_CSI_NVM;
/* no thin provisioning */
id_ns->ncap = id_ns->nsze;
id_ns->nuse = id_ns->ncap;
/* support DULBE and I/O optimization fields */
id_ns->nsfeat |= (0x4 | 0x10);
npdg = ns->blkconf.discard_granularity / ns->blkconf.logical_block_size;
if (bdrv_get_info(blk_bs(ns->blkconf.blk), &bdi) >= 0 &&
bdi.cluster_size > ns->blkconf.discard_granularity) {
npdg = bdi.cluster_size / ns->blkconf.logical_block_size;
}
id_ns->npda = id_ns->npdg = npdg - 1;
return 0;
}
static int nvme_ns_init_blk(NvmeCtrl *n, NvmeNamespace *ns, Error **errp)
static int nvme_ns_init_blk(NvmeNamespace *ns, Error **errp)
{
bool read_only;
@ -59,19 +79,225 @@ static int nvme_ns_init_blk(NvmeCtrl *n, NvmeNamespace *ns, Error **errp)
return -1;
}
if (ns->blkconf.discard_granularity == -1) {
ns->blkconf.discard_granularity =
MAX(ns->blkconf.logical_block_size, MIN_DISCARD_GRANULARITY);
}
ns->size = blk_getlength(ns->blkconf.blk);
if (ns->size < 0) {
error_setg_errno(errp, -ns->size, "could not get blockdev size");
return -1;
}
if (blk_enable_write_cache(ns->blkconf.blk)) {
n->features.vwc = 0x1;
return 0;
}
static int nvme_ns_zoned_check_calc_geometry(NvmeNamespace *ns, Error **errp)
{
uint64_t zone_size, zone_cap;
uint32_t lbasz = ns->blkconf.logical_block_size;
/* Make sure that the values of ZNS properties are sane */
if (ns->params.zone_size_bs) {
zone_size = ns->params.zone_size_bs;
} else {
zone_size = NVME_DEFAULT_ZONE_SIZE;
}
if (ns->params.zone_cap_bs) {
zone_cap = ns->params.zone_cap_bs;
} else {
zone_cap = zone_size;
}
if (zone_cap > zone_size) {
error_setg(errp, "zone capacity %"PRIu64"B exceeds "
"zone size %"PRIu64"B", zone_cap, zone_size);
return -1;
}
if (zone_size < lbasz) {
error_setg(errp, "zone size %"PRIu64"B too small, "
"must be at least %"PRIu32"B", zone_size, lbasz);
return -1;
}
if (zone_cap < lbasz) {
error_setg(errp, "zone capacity %"PRIu64"B too small, "
"must be at least %"PRIu32"B", zone_cap, lbasz);
return -1;
}
/*
* Save the main zone geometry values to avoid
* calculating them later again.
*/
ns->zone_size = zone_size / lbasz;
ns->zone_capacity = zone_cap / lbasz;
ns->num_zones = ns->size / lbasz / ns->zone_size;
/* Do a few more sanity checks of ZNS properties */
if (!ns->num_zones) {
error_setg(errp,
"insufficient drive capacity, must be at least the size "
"of one zone (%"PRIu64"B)", zone_size);
return -1;
}
if (ns->params.max_open_zones > ns->num_zones) {
error_setg(errp,
"max_open_zones value %u exceeds the number of zones %u",
ns->params.max_open_zones, ns->num_zones);
return -1;
}
if (ns->params.max_active_zones > ns->num_zones) {
error_setg(errp,
"max_active_zones value %u exceeds the number of zones %u",
ns->params.max_active_zones, ns->num_zones);
return -1;
}
if (ns->params.zd_extension_size) {
if (ns->params.zd_extension_size & 0x3f) {
error_setg(errp,
"zone descriptor extension size must be a multiple of 64B");
return -1;
}
if ((ns->params.zd_extension_size >> 6) > 0xff) {
error_setg(errp, "zone descriptor extension size is too large");
return -1;
}
}
return 0;
}
static void nvme_ns_zoned_init_state(NvmeNamespace *ns)
{
uint64_t start = 0, zone_size = ns->zone_size;
uint64_t capacity = ns->num_zones * zone_size;
NvmeZone *zone;
int i;
ns->zone_array = g_new0(NvmeZone, ns->num_zones);
if (ns->params.zd_extension_size) {
ns->zd_extensions = g_malloc0(ns->params.zd_extension_size *
ns->num_zones);
}
QTAILQ_INIT(&ns->exp_open_zones);
QTAILQ_INIT(&ns->imp_open_zones);
QTAILQ_INIT(&ns->closed_zones);
QTAILQ_INIT(&ns->full_zones);
zone = ns->zone_array;
for (i = 0; i < ns->num_zones; i++, zone++) {
if (start + zone_size > capacity) {
zone_size = capacity - start;
}
zone->d.zt = NVME_ZONE_TYPE_SEQ_WRITE;
nvme_set_zone_state(zone, NVME_ZONE_STATE_EMPTY);
zone->d.za = 0;
zone->d.zcap = ns->zone_capacity;
zone->d.zslba = start;
zone->d.wp = start;
zone->w_ptr = start;
start += zone_size;
}
ns->zone_size_log2 = 0;
if (is_power_of_2(ns->zone_size)) {
ns->zone_size_log2 = 63 - clz64(ns->zone_size);
}
}
static void nvme_ns_init_zoned(NvmeNamespace *ns, int lba_index)
{
NvmeIdNsZoned *id_ns_z;
nvme_ns_zoned_init_state(ns);
id_ns_z = g_malloc0(sizeof(NvmeIdNsZoned));
/* MAR/MOR are zeroes-based, 0xffffffff means no limit */
id_ns_z->mar = cpu_to_le32(ns->params.max_active_zones - 1);
id_ns_z->mor = cpu_to_le32(ns->params.max_open_zones - 1);
id_ns_z->zoc = 0;
id_ns_z->ozcs = ns->params.cross_zone_read ? 0x01 : 0x00;
id_ns_z->lbafe[lba_index].zsze = cpu_to_le64(ns->zone_size);
id_ns_z->lbafe[lba_index].zdes =
ns->params.zd_extension_size >> 6; /* Units of 64B */
ns->csi = NVME_CSI_ZONED;
ns->id_ns.nsze = cpu_to_le64(ns->num_zones * ns->zone_size);
ns->id_ns.ncap = ns->id_ns.nsze;
ns->id_ns.nuse = ns->id_ns.ncap;
/*
* The device uses the BDRV_BLOCK_ZERO flag to determine the "deallocated"
* status of logical blocks. Since the spec defines that logical blocks
* SHALL be deallocated when then zone is in the Empty or Offline states,
* we can only support DULBE if the zone size is a multiple of the
* calculated NPDG.
*/
if (ns->zone_size % (ns->id_ns.npdg + 1)) {
warn_report("the zone size (%"PRIu64" blocks) is not a multiple of "
"the calculated deallocation granularity (%d blocks); "
"DULBE support disabled",
ns->zone_size, ns->id_ns.npdg + 1);
ns->id_ns.nsfeat &= ~0x4;
}
ns->id_ns_zoned = id_ns_z;
}
static void nvme_clear_zone(NvmeNamespace *ns, NvmeZone *zone)
{
uint8_t state;
zone->w_ptr = zone->d.wp;
state = nvme_get_zone_state(zone);
if (zone->d.wp != zone->d.zslba ||
(zone->d.za & NVME_ZA_ZD_EXT_VALID)) {
if (state != NVME_ZONE_STATE_CLOSED) {
trace_pci_nvme_clear_ns_close(state, zone->d.zslba);
nvme_set_zone_state(zone, NVME_ZONE_STATE_CLOSED);
}
nvme_aor_inc_active(ns);
QTAILQ_INSERT_HEAD(&ns->closed_zones, zone, entry);
} else {
trace_pci_nvme_clear_ns_reset(state, zone->d.zslba);
nvme_set_zone_state(zone, NVME_ZONE_STATE_EMPTY);
}
}
/*
* Close all the zones that are currently open.
*/
static void nvme_zoned_ns_shutdown(NvmeNamespace *ns)
{
NvmeZone *zone, *next;
QTAILQ_FOREACH_SAFE(zone, &ns->closed_zones, entry, next) {
QTAILQ_REMOVE(&ns->closed_zones, zone, entry);
nvme_aor_dec_active(ns);
nvme_clear_zone(ns, zone);
}
QTAILQ_FOREACH_SAFE(zone, &ns->imp_open_zones, entry, next) {
QTAILQ_REMOVE(&ns->imp_open_zones, zone, entry);
nvme_aor_dec_open(ns);
nvme_aor_dec_active(ns);
nvme_clear_zone(ns, zone);
}
QTAILQ_FOREACH_SAFE(zone, &ns->exp_open_zones, entry, next) {
QTAILQ_REMOVE(&ns->exp_open_zones, zone, entry);
nvme_aor_dec_open(ns);
nvme_aor_dec_active(ns);
nvme_clear_zone(ns, zone);
}
assert(ns->nr_open_zones == 0);
}
static int nvme_ns_check_constraints(NvmeNamespace *ns, Error **errp)
{
if (!ns->blkconf.blk) {
@ -82,20 +308,25 @@ static int nvme_ns_check_constraints(NvmeNamespace *ns, Error **errp)
return 0;
}
int nvme_ns_setup(NvmeCtrl *n, NvmeNamespace *ns, Error **errp)
int nvme_ns_setup(NvmeNamespace *ns, Error **errp)
{
if (nvme_ns_check_constraints(ns, errp)) {
return -1;
}
if (nvme_ns_init_blk(n, ns, errp)) {
if (nvme_ns_init_blk(ns, errp)) {
return -1;
}
nvme_ns_init(ns);
if (nvme_register_namespace(n, ns, errp)) {
if (nvme_ns_init(ns, errp)) {
return -1;
}
if (ns->params.zoned) {
if (nvme_ns_zoned_check_calc_geometry(ns, errp) != 0) {
return -1;
}
nvme_ns_init_zoned(ns, 0);
}
return 0;
}
@ -105,9 +336,21 @@ void nvme_ns_drain(NvmeNamespace *ns)
blk_drain(ns->blkconf.blk);
}
void nvme_ns_flush(NvmeNamespace *ns)
void nvme_ns_shutdown(NvmeNamespace *ns)
{
blk_flush(ns->blkconf.blk);
if (ns->params.zoned) {
nvme_zoned_ns_shutdown(ns);
}
}
void nvme_ns_cleanup(NvmeNamespace *ns)
{
if (ns->params.zoned) {
g_free(ns->id_ns_zoned);
g_free(ns->zone_array);
g_free(ns->zd_extensions);
}
}
static void nvme_ns_realize(DeviceState *dev, Error **errp)
@ -117,16 +360,37 @@ static void nvme_ns_realize(DeviceState *dev, Error **errp)
NvmeCtrl *n = NVME(s->parent);
Error *local_err = NULL;
if (nvme_ns_setup(n, ns, &local_err)) {
if (nvme_ns_setup(ns, &local_err)) {
error_propagate_prepend(errp, local_err,
"could not setup namespace: ");
return;
}
if (nvme_register_namespace(n, ns, errp)) {
error_propagate_prepend(errp, local_err,
"could not register namespace: ");
return;
}
}
static Property nvme_ns_props[] = {
DEFINE_BLOCK_PROPERTIES(NvmeNamespace, blkconf),
DEFINE_PROP_UINT32("nsid", NvmeNamespace, params.nsid, 0),
DEFINE_PROP_UUID("uuid", NvmeNamespace, params.uuid),
DEFINE_PROP_BOOL("zoned", NvmeNamespace, params.zoned, false),
DEFINE_PROP_SIZE("zoned.zone_size", NvmeNamespace, params.zone_size_bs,
NVME_DEFAULT_ZONE_SIZE),
DEFINE_PROP_SIZE("zoned.zone_capacity", NvmeNamespace, params.zone_cap_bs,
0),
DEFINE_PROP_BOOL("zoned.cross_read", NvmeNamespace,
params.cross_zone_read, false),
DEFINE_PROP_UINT32("zoned.max_active", NvmeNamespace,
params.max_active_zones, 0),
DEFINE_PROP_UINT32("zoned.max_open", NvmeNamespace,
params.max_open_zones, 0),
DEFINE_PROP_UINT32("zoned.descr_ext_size", NvmeNamespace,
params.zd_extension_size, 0),
DEFINE_PROP_END_OF_LIST(),
};

View File

@ -19,8 +19,23 @@
#define NVME_NS(obj) \
OBJECT_CHECK(NvmeNamespace, (obj), TYPE_NVME_NS)
typedef struct NvmeZone {
NvmeZoneDescr d;
uint64_t w_ptr;
QTAILQ_ENTRY(NvmeZone) entry;
} NvmeZone;
typedef struct NvmeNamespaceParams {
uint32_t nsid;
QemuUUID uuid;
bool zoned;
bool cross_zone_read;
uint64_t zone_size_bs;
uint64_t zone_cap_bs;
uint32_t max_active_zones;
uint32_t max_open_zones;
uint32_t zd_extension_size;
} NvmeNamespaceParams;
typedef struct NvmeNamespace {
@ -29,8 +44,28 @@ typedef struct NvmeNamespace {
int32_t bootindex;
int64_t size;
NvmeIdNs id_ns;
const uint32_t *iocs;
uint8_t csi;
NvmeIdNsZoned *id_ns_zoned;
NvmeZone *zone_array;
QTAILQ_HEAD(, NvmeZone) exp_open_zones;
QTAILQ_HEAD(, NvmeZone) imp_open_zones;
QTAILQ_HEAD(, NvmeZone) closed_zones;
QTAILQ_HEAD(, NvmeZone) full_zones;
uint32_t num_zones;
uint64_t zone_size;
uint64_t zone_capacity;
uint32_t zone_size_log2;
uint8_t *zd_extensions;
int32_t nr_open_zones;
int32_t nr_active_zones;
NvmeNamespaceParams params;
struct {
uint32_t err_rec;
} features;
} NvmeNamespace;
static inline uint32_t nvme_nsid(NvmeNamespace *ns)
@ -67,8 +102,81 @@ static inline size_t nvme_l2b(NvmeNamespace *ns, uint64_t lba)
typedef struct NvmeCtrl NvmeCtrl;
int nvme_ns_setup(NvmeCtrl *n, NvmeNamespace *ns, Error **errp);
static inline NvmeZoneState nvme_get_zone_state(NvmeZone *zone)
{
return zone->d.zs >> 4;
}
static inline void nvme_set_zone_state(NvmeZone *zone, NvmeZoneState state)
{
zone->d.zs = state << 4;
}
static inline uint64_t nvme_zone_rd_boundary(NvmeNamespace *ns, NvmeZone *zone)
{
return zone->d.zslba + ns->zone_size;
}
static inline uint64_t nvme_zone_wr_boundary(NvmeZone *zone)
{
return zone->d.zslba + zone->d.zcap;
}
static inline bool nvme_wp_is_valid(NvmeZone *zone)
{
uint8_t st = nvme_get_zone_state(zone);
return st != NVME_ZONE_STATE_FULL &&
st != NVME_ZONE_STATE_READ_ONLY &&
st != NVME_ZONE_STATE_OFFLINE;
}
static inline uint8_t *nvme_get_zd_extension(NvmeNamespace *ns,
uint32_t zone_idx)
{
return &ns->zd_extensions[zone_idx * ns->params.zd_extension_size];
}
static inline void nvme_aor_inc_open(NvmeNamespace *ns)
{
assert(ns->nr_open_zones >= 0);
if (ns->params.max_open_zones) {
ns->nr_open_zones++;
assert(ns->nr_open_zones <= ns->params.max_open_zones);
}
}
static inline void nvme_aor_dec_open(NvmeNamespace *ns)
{
if (ns->params.max_open_zones) {
assert(ns->nr_open_zones > 0);
ns->nr_open_zones--;
}
assert(ns->nr_open_zones >= 0);
}
static inline void nvme_aor_inc_active(NvmeNamespace *ns)
{
assert(ns->nr_active_zones >= 0);
if (ns->params.max_active_zones) {
ns->nr_active_zones++;
assert(ns->nr_active_zones <= ns->params.max_active_zones);
}
}
static inline void nvme_aor_dec_active(NvmeNamespace *ns)
{
if (ns->params.max_active_zones) {
assert(ns->nr_active_zones > 0);
ns->nr_active_zones--;
assert(ns->nr_active_zones >= ns->nr_open_zones);
}
assert(ns->nr_active_zones >= 0);
}
int nvme_ns_setup(NvmeNamespace *ns, Error **errp);
void nvme_ns_drain(NvmeNamespace *ns);
void nvme_ns_flush(NvmeNamespace *ns);
void nvme_ns_shutdown(NvmeNamespace *ns);
void nvme_ns_cleanup(NvmeNamespace *ns);
#endif /* NVME_NS_H */

File diff suppressed because it is too large Load Diff

View File

@ -6,6 +6,9 @@
#define NVME_MAX_NAMESPACES 256
#define NVME_DEFAULT_ZONE_SIZE (128 * MiB)
#define NVME_DEFAULT_MAX_ZA_SIZE (128 * KiB)
typedef struct NvmeParams {
char *serial;
uint32_t num_queues; /* deprecated since 5.1 */
@ -16,6 +19,8 @@ typedef struct NvmeParams {
uint32_t aer_max_queued;
uint8_t mdts;
bool use_intel_id;
uint32_t zasl_bs;
bool legacy_cmb;
} NvmeParams;
typedef struct NvmeAsyncEvent {
@ -28,6 +33,7 @@ typedef struct NvmeRequest {
struct NvmeNamespace *ns;
BlockAIOCB *aiocb;
uint16_t status;
void *opaque;
NvmeCqe cqe;
NvmeCmd cmd;
BlockAcctCookie acct;
@ -59,7 +65,12 @@ static inline const char *nvme_io_opc_str(uint8_t opc)
case NVME_CMD_FLUSH: return "NVME_NVM_CMD_FLUSH";
case NVME_CMD_WRITE: return "NVME_NVM_CMD_WRITE";
case NVME_CMD_READ: return "NVME_NVM_CMD_READ";
case NVME_CMD_COMPARE: return "NVME_NVM_CMD_COMPARE";
case NVME_CMD_WRITE_ZEROES: return "NVME_NVM_CMD_WRITE_ZEROES";
case NVME_CMD_DSM: return "NVME_NVM_CMD_DSM";
case NVME_CMD_ZONE_MGMT_SEND: return "NVME_ZONED_CMD_MGMT_SEND";
case NVME_CMD_ZONE_MGMT_RECV: return "NVME_ZONED_CMD_MGMT_RECV";
case NVME_CMD_ZONE_APPEND: return "NVME_ZONED_CMD_ZONE_APPEND";
default: return "NVME_NVM_CMD_UNKNOWN";
}
}
@ -111,13 +122,12 @@ typedef struct NvmeFeatureVal {
uint16_t temp_thresh_low;
};
uint32_t async_config;
uint32_t vwc;
} NvmeFeatureVal;
typedef struct NvmeCtrl {
PCIDevice parent_obj;
MemoryRegion bar0;
MemoryRegion iomem;
MemoryRegion ctrl_mem;
NvmeBar bar;
NvmeParams params;
NvmeBus bus;
@ -133,20 +143,33 @@ typedef struct NvmeCtrl {
uint32_t num_namespaces;
uint32_t max_q_ents;
uint8_t outstanding_aers;
uint8_t *cmbuf;
uint32_t irq_status;
uint64_t host_timestamp; /* Timestamp sent by the host */
uint64_t timestamp_set_qemu_clock_ms; /* QEMU clock time */
uint64_t starttime_ms;
uint16_t temperature;
uint8_t smart_critical_warning;
HostMemoryBackend *pmrdev;
struct {
MemoryRegion mem;
uint8_t *buf;
bool cmse;
hwaddr cba;
} cmb;
struct {
HostMemoryBackend *dev;
bool cmse;
hwaddr cba;
} pmr;
uint8_t aer_mask;
NvmeRequest **aer_reqs;
QTAILQ_HEAD(, NvmeAsyncEvent) aer_queue;
int aer_queued;
uint8_t zasl;
NvmeNamespace namespace;
NvmeNamespace *namespaces[NVME_MAX_NAMESPACES];
NvmeSQueue **sq;

View File

@ -40,16 +40,27 @@ pci_nvme_map_prp(uint64_t trans_len, uint32_t len, uint64_t prp1, uint64_t prp2,
pci_nvme_map_sgl(uint16_t cid, uint8_t typ, uint64_t len) "cid %"PRIu16" type 0x%"PRIx8" len %"PRIu64""
pci_nvme_io_cmd(uint16_t cid, uint32_t nsid, uint16_t sqid, uint8_t opcode, const char *opname) "cid %"PRIu16" nsid %"PRIu32" sqid %"PRIu16" opc 0x%"PRIx8" opname '%s'"
pci_nvme_admin_cmd(uint16_t cid, uint16_t sqid, uint8_t opcode, const char *opname) "cid %"PRIu16" sqid %"PRIu16" opc 0x%"PRIx8" opname '%s'"
pci_nvme_rw(uint16_t cid, const char *verb, uint32_t nsid, uint32_t nlb, uint64_t count, uint64_t lba) "cid %"PRIu16" opname '%s' nsid %"PRIu32" nlb %"PRIu32" count %"PRIu64" lba 0x%"PRIx64""
pci_nvme_read(uint16_t cid, uint32_t nsid, uint32_t nlb, uint64_t count, uint64_t lba) "cid %"PRIu16" nsid %"PRIu32" nlb %"PRIu32" count %"PRIu64" lba 0x%"PRIx64""
pci_nvme_write(uint16_t cid, const char *verb, uint32_t nsid, uint32_t nlb, uint64_t count, uint64_t lba) "cid %"PRIu16" opname '%s' nsid %"PRIu32" nlb %"PRIu32" count %"PRIu64" lba 0x%"PRIx64""
pci_nvme_rw_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'"
pci_nvme_write_zeroes(uint16_t cid, uint32_t nsid, uint64_t slba, uint32_t nlb) "cid %"PRIu16" nsid %"PRIu32" slba %"PRIu64" nlb %"PRIu32""
pci_nvme_block_status(int64_t offset, int64_t bytes, int64_t pnum, int ret, bool zeroed) "offset %"PRId64" bytes %"PRId64" pnum %"PRId64" ret 0x%x zeroed %d"
pci_nvme_dsm(uint16_t cid, uint32_t nsid, uint32_t nr, uint32_t attr) "cid %"PRIu16" nsid %"PRIu32" nr %"PRIu32" attr 0x%"PRIx32""
pci_nvme_dsm_deallocate(uint16_t cid, uint32_t nsid, uint64_t slba, uint32_t nlb) "cid %"PRIu16" nsid %"PRIu32" slba %"PRIu64" nlb %"PRIu32""
pci_nvme_compare(uint16_t cid, uint32_t nsid, uint64_t slba, uint32_t nlb) "cid %"PRIu16" nsid %"PRIu32" slba 0x%"PRIx64" nlb %"PRIu32""
pci_nvme_compare_cb(uint16_t cid) "cid %"PRIu16""
pci_nvme_aio_discard_cb(uint16_t cid) "cid %"PRIu16""
pci_nvme_aio_zone_reset_cb(uint16_t cid, uint64_t zslba) "cid %"PRIu16" zslba 0x%"PRIx64""
pci_nvme_create_sq(uint64_t addr, uint16_t sqid, uint16_t cqid, uint16_t qsize, uint16_t qflags) "create submission queue, addr=0x%"PRIx64", sqid=%"PRIu16", cqid=%"PRIu16", qsize=%"PRIu16", qflags=%"PRIu16""
pci_nvme_create_cq(uint64_t addr, uint16_t cqid, uint16_t vector, uint16_t size, uint16_t qflags, int ien) "create completion queue, addr=0x%"PRIx64", cqid=%"PRIu16", vector=%"PRIu16", qsize=%"PRIu16", qflags=%"PRIu16", ien=%d"
pci_nvme_del_sq(uint16_t qid) "deleting submission queue sqid=%"PRIu16""
pci_nvme_del_cq(uint16_t cqid) "deleted completion queue, cqid=%"PRIu16""
pci_nvme_identify_ctrl(void) "identify controller"
pci_nvme_identify_ctrl_csi(uint8_t csi) "identify controller, csi=0x%"PRIx8""
pci_nvme_identify_ns(uint32_t ns) "nsid %"PRIu32""
pci_nvme_identify_ns_csi(uint32_t ns, uint8_t csi) "nsid=%"PRIu32", csi=0x%"PRIx8""
pci_nvme_identify_nslist(uint32_t ns) "nsid %"PRIu32""
pci_nvme_identify_nslist_csi(uint16_t ns, uint8_t csi) "nsid=%"PRIu16", csi=0x%"PRIx8""
pci_nvme_identify_cmd_set(void) "identify i/o command set"
pci_nvme_identify_ns_descr_list(uint32_t ns) "nsid %"PRIu32""
pci_nvme_get_log(uint16_t cid, uint8_t lid, uint8_t lsp, uint8_t rae, uint32_t len, uint64_t off) "cid %"PRIu16" lid 0x%"PRIx8" lsp 0x%"PRIx8" rae 0x%"PRIx8" len %"PRIu32" off %"PRIu64""
pci_nvme_getfeat(uint16_t cid, uint32_t nsid, uint8_t fid, uint8_t sel, uint32_t cdw11) "cid %"PRIu16" nsid 0x%"PRIx32" fid 0x%"PRIx8" sel 0x%"PRIx8" cdw11 0x%"PRIx32""
@ -69,8 +80,8 @@ pci_nvme_enqueue_event_noqueue(int queued) "queued %d"
pci_nvme_enqueue_event_masked(uint8_t typ) "type 0x%"PRIx8""
pci_nvme_no_outstanding_aers(void) "ignoring event; no outstanding AERs"
pci_nvme_enqueue_req_completion(uint16_t cid, uint16_t cqid, uint16_t status) "cid %"PRIu16" cqid %"PRIu16" status 0x%"PRIx16""
pci_nvme_mmio_read(uint64_t addr) "addr 0x%"PRIx64""
pci_nvme_mmio_write(uint64_t addr, uint64_t data) "addr 0x%"PRIx64" data 0x%"PRIx64""
pci_nvme_mmio_read(uint64_t addr, unsigned size) "addr 0x%"PRIx64" size %d"
pci_nvme_mmio_write(uint64_t addr, uint64_t data, unsigned size) "addr 0x%"PRIx64" data 0x%"PRIx64" size %d"
pci_nvme_mmio_doorbell_cq(uint16_t cqid, uint16_t new_head) "cqid %"PRIu16" new_head %"PRIu16""
pci_nvme_mmio_doorbell_sq(uint16_t sqid, uint16_t new_tail) "sqid %"PRIu16" new_tail %"PRIu16""
pci_nvme_mmio_intm_set(uint64_t data, uint64_t new_mask) "wrote MMIO, interrupt mask set, data=0x%"PRIx64", new_mask=0x%"PRIx64""
@ -85,6 +96,15 @@ pci_nvme_mmio_start_success(void) "setting controller enable bit succeeded"
pci_nvme_mmio_stopped(void) "cleared controller enable bit"
pci_nvme_mmio_shutdown_set(void) "shutdown bit set"
pci_nvme_mmio_shutdown_cleared(void) "shutdown bit cleared"
pci_nvme_open_zone(uint64_t slba, uint32_t zone_idx, int all) "open zone, slba=%"PRIu64", idx=%"PRIu32", all=%"PRIi32""
pci_nvme_close_zone(uint64_t slba, uint32_t zone_idx, int all) "close zone, slba=%"PRIu64", idx=%"PRIu32", all=%"PRIi32""
pci_nvme_finish_zone(uint64_t slba, uint32_t zone_idx, int all) "finish zone, slba=%"PRIu64", idx=%"PRIu32", all=%"PRIi32""
pci_nvme_reset_zone(uint64_t slba, uint32_t zone_idx, int all) "reset zone, slba=%"PRIu64", idx=%"PRIu32", all=%"PRIi32""
pci_nvme_offline_zone(uint64_t slba, uint32_t zone_idx, int all) "offline zone, slba=%"PRIu64", idx=%"PRIu32", all=%"PRIi32""
pci_nvme_set_descriptor_extension(uint64_t slba, uint32_t zone_idx) "set zone descriptor extension, slba=%"PRIu64", idx=%"PRIu32""
pci_nvme_zd_extension_set(uint32_t zone_idx) "set descriptor extension for zone_idx=%"PRIu32""
pci_nvme_clear_ns_close(uint32_t state, uint64_t slba) "zone state=%"PRIu32", slba=%"PRIu64" transitioned to Closed state"
pci_nvme_clear_ns_reset(uint32_t state, uint64_t slba) "zone state=%"PRIu32", slba=%"PRIu64" transitioned to Empty state"
# nvme traces for error conditions
pci_nvme_err_mdts(uint16_t cid, size_t len) "cid %"PRIu16" len %zu"
@ -102,6 +122,25 @@ pci_nvme_err_invalid_prp2_align(uint64_t prp2) "PRP2 is not page aligned: 0x%"PR
pci_nvme_err_invalid_opc(uint8_t opc) "invalid opcode 0x%"PRIx8""
pci_nvme_err_invalid_admin_opc(uint8_t opc) "invalid admin opcode 0x%"PRIx8""
pci_nvme_err_invalid_lba_range(uint64_t start, uint64_t len, uint64_t limit) "Invalid LBA start=%"PRIu64" len=%"PRIu64" limit=%"PRIu64""
pci_nvme_err_invalid_log_page_offset(uint64_t ofs, uint64_t size) "must be <= %"PRIu64", got %"PRIu64""
pci_nvme_err_cmb_invalid_cba(uint64_t cmbmsc) "cmbmsc 0x%"PRIx64""
pci_nvme_err_cmb_not_enabled(uint64_t cmbmsc) "cmbmsc 0x%"PRIx64""
pci_nvme_err_unaligned_zone_cmd(uint8_t action, uint64_t slba, uint64_t zslba) "unaligned zone op 0x%"PRIx32", got slba=%"PRIu64", zslba=%"PRIu64""
pci_nvme_err_invalid_zone_state_transition(uint8_t action, uint64_t slba, uint8_t attrs) "action=0x%"PRIx8", slba=%"PRIu64", attrs=0x%"PRIx32""
pci_nvme_err_write_not_at_wp(uint64_t slba, uint64_t zone, uint64_t wp) "writing at slba=%"PRIu64", zone=%"PRIu64", but wp=%"PRIu64""
pci_nvme_err_append_not_at_start(uint64_t slba, uint64_t zone) "appending at slba=%"PRIu64", but zone=%"PRIu64""
pci_nvme_err_zone_is_full(uint64_t zslba) "zslba 0x%"PRIx64""
pci_nvme_err_zone_is_read_only(uint64_t zslba) "zslba 0x%"PRIx64""
pci_nvme_err_zone_is_offline(uint64_t zslba) "zslba 0x%"PRIx64""
pci_nvme_err_zone_boundary(uint64_t slba, uint32_t nlb, uint64_t zcap) "lba 0x%"PRIx64" nlb %"PRIu32" zcap 0x%"PRIx64""
pci_nvme_err_zone_invalid_write(uint64_t slba, uint64_t wp) "lba 0x%"PRIx64" wp 0x%"PRIx64""
pci_nvme_err_zone_write_not_ok(uint64_t slba, uint32_t nlb, uint16_t status) "slba=%"PRIu64", nlb=%"PRIu32", status=0x%"PRIx16""
pci_nvme_err_zone_read_not_ok(uint64_t slba, uint32_t nlb, uint16_t status) "slba=%"PRIu64", nlb=%"PRIu32", status=0x%"PRIx16""
pci_nvme_err_append_too_large(uint64_t slba, uint32_t nlb, uint8_t zasl) "slba=%"PRIu64", nlb=%"PRIu32", zasl=%"PRIu8""
pci_nvme_err_insuff_active_res(uint32_t max_active) "max_active=%"PRIu32" zone limit exceeded"
pci_nvme_err_insuff_open_res(uint32_t max_open) "max_open=%"PRIu32" zone limit exceeded"
pci_nvme_err_zd_extension_map_error(uint32_t zone_idx) "can't map descriptor extension for zone_idx=%"PRIu32""
pci_nvme_err_invalid_iocsci(uint32_t idx) "unsupported command set combination index %"PRIu32""
pci_nvme_err_invalid_del_sq(uint16_t qid) "invalid submission queue deletion, sid=%"PRIu16""
pci_nvme_err_invalid_create_sq_cqid(uint16_t cqid) "failed creating submission queue, invalid cqid=%"PRIu16""
pci_nvme_err_invalid_create_sq_sqid(uint16_t sqid) "failed creating submission queue, invalid sqid=%"PRIu16""
@ -134,7 +173,9 @@ pci_nvme_err_startfail_sqent_too_large(uint8_t log2ps, uint8_t maxlog2ps) "nvme_
pci_nvme_err_startfail_css(uint8_t css) "nvme_start_ctrl failed because invalid command set selected:%u"
pci_nvme_err_startfail_asqent_sz_zero(void) "nvme_start_ctrl failed because the admin submission queue size is zero"
pci_nvme_err_startfail_acqent_sz_zero(void) "nvme_start_ctrl failed because the admin completion queue size is zero"
pci_nvme_err_startfail_zasl_too_small(uint32_t zasl, uint32_t pagesz) "nvme_start_ctrl failed because zone append size limit %"PRIu32" is too small, needs to be >= %"PRIu32""
pci_nvme_err_startfail(void) "setting controller enable bit failed"
pci_nvme_err_invalid_mgmt_action(uint8_t action) "action=0x%"PRIx8""
# Traces for undefined behavior
pci_nvme_ub_mmiowr_misaligned32(uint64_t offset) "MMIO write not 32-bit aligned, offset=0x%"PRIx64""
@ -158,6 +199,7 @@ pci_nvme_ub_db_wr_invalid_cq(uint32_t qid) "completion queue doorbell write for
pci_nvme_ub_db_wr_invalid_cqhead(uint32_t qid, uint16_t new_head) "completion queue doorbell write value beyond queue size, cqid=%"PRIu32", new_head=%"PRIu16", ignoring"
pci_nvme_ub_db_wr_invalid_sq(uint32_t qid) "submission queue doorbell write for nonexistent queue, sqid=%"PRIu32", ignoring"
pci_nvme_ub_db_wr_invalid_sqtail(uint32_t qid, uint16_t new_tail) "submission queue doorbell write value beyond queue size, sqid=%"PRIu32", new_head=%"PRIu16", ignoring"
pci_nvme_ub_unknown_css_value(void) "unknown value in cc.css field"
# xen-block.c
xen_block_realize(const char *type, uint32_t disk, uint32_t partition) "%s d%up%u"

View File

@ -15,14 +15,19 @@ typedef struct QEMU_PACKED NvmeBar {
uint64_t acq;
uint32_t cmbloc;
uint32_t cmbsz;
uint8_t padding[3520]; /* not used by QEMU */
uint32_t bpinfo;
uint32_t bprsel;
uint64_t bpmbl;
uint64_t cmbmsc;
uint32_t cmbsts;
uint8_t rsvd92[3492];
uint32_t pmrcap;
uint32_t pmrctl;
uint32_t pmrsts;
uint32_t pmrebs;
uint32_t pmrswtp;
uint64_t pmrmsc;
uint8_t reserved[484];
uint8_t css[484];
} NvmeBar;
enum NvmeCapShift {
@ -35,7 +40,8 @@ enum NvmeCapShift {
CAP_CSS_SHIFT = 37,
CAP_MPSMIN_SHIFT = 48,
CAP_MPSMAX_SHIFT = 52,
CAP_PMR_SHIFT = 56,
CAP_PMRS_SHIFT = 56,
CAP_CMBS_SHIFT = 57,
};
enum NvmeCapMask {
@ -48,7 +54,8 @@ enum NvmeCapMask {
CAP_CSS_MASK = 0xff,
CAP_MPSMIN_MASK = 0xf,
CAP_MPSMAX_MASK = 0xf,
CAP_PMR_MASK = 0x1,
CAP_PMRS_MASK = 0x1,
CAP_CMBS_MASK = 0x1,
};
#define NVME_CAP_MQES(cap) (((cap) >> CAP_MQES_SHIFT) & CAP_MQES_MASK)
@ -60,6 +67,8 @@ enum NvmeCapMask {
#define NVME_CAP_CSS(cap) (((cap) >> CAP_CSS_SHIFT) & CAP_CSS_MASK)
#define NVME_CAP_MPSMIN(cap)(((cap) >> CAP_MPSMIN_SHIFT) & CAP_MPSMIN_MASK)
#define NVME_CAP_MPSMAX(cap)(((cap) >> CAP_MPSMAX_SHIFT) & CAP_MPSMAX_MASK)
#define NVME_CAP_PMRS(cap) (((cap) >> CAP_PMRS_SHIFT) & CAP_PMRS_MASK)
#define NVME_CAP_CMBS(cap) (((cap) >> CAP_CMBS_SHIFT) & CAP_CMBS_MASK)
#define NVME_CAP_SET_MQES(cap, val) (cap |= (uint64_t)(val & CAP_MQES_MASK) \
<< CAP_MQES_SHIFT)
@ -79,11 +88,14 @@ enum NvmeCapMask {
<< CAP_MPSMIN_SHIFT)
#define NVME_CAP_SET_MPSMAX(cap, val) (cap |= (uint64_t)(val & CAP_MPSMAX_MASK)\
<< CAP_MPSMAX_SHIFT)
#define NVME_CAP_SET_PMRS(cap, val) (cap |= (uint64_t)(val & CAP_PMR_MASK)\
<< CAP_PMR_SHIFT)
#define NVME_CAP_SET_PMRS(cap, val) (cap |= (uint64_t)(val & CAP_PMRS_MASK) \
<< CAP_PMRS_SHIFT)
#define NVME_CAP_SET_CMBS(cap, val) (cap |= (uint64_t)(val & CAP_CMBS_MASK) \
<< CAP_CMBS_SHIFT)
enum NvmeCapCss {
NVME_CAP_CSS_NVM = 1 << 0,
NVME_CAP_CSS_CSI_SUPP = 1 << 6,
NVME_CAP_CSS_ADMIN_ONLY = 1 << 7,
};
@ -117,9 +129,25 @@ enum NvmeCcMask {
enum NvmeCcCss {
NVME_CC_CSS_NVM = 0x0,
NVME_CC_CSS_CSI = 0x6,
NVME_CC_CSS_ADMIN_ONLY = 0x7,
};
#define NVME_SET_CC_EN(cc, val) \
(cc |= (uint32_t)((val) & CC_EN_MASK) << CC_EN_SHIFT)
#define NVME_SET_CC_CSS(cc, val) \
(cc |= (uint32_t)((val) & CC_CSS_MASK) << CC_CSS_SHIFT)
#define NVME_SET_CC_MPS(cc, val) \
(cc |= (uint32_t)((val) & CC_MPS_MASK) << CC_MPS_SHIFT)
#define NVME_SET_CC_AMS(cc, val) \
(cc |= (uint32_t)((val) & CC_AMS_MASK) << CC_AMS_SHIFT)
#define NVME_SET_CC_SHN(cc, val) \
(cc |= (uint32_t)((val) & CC_SHN_MASK) << CC_SHN_SHIFT)
#define NVME_SET_CC_IOSQES(cc, val) \
(cc |= (uint32_t)((val) & CC_IOSQES_MASK) << CC_IOSQES_SHIFT)
#define NVME_SET_CC_IOCQES(cc, val) \
(cc |= (uint32_t)((val) & CC_IOCQES_MASK) << CC_IOCQES_SHIFT)
enum NvmeCstsShift {
CSTS_RDY_SHIFT = 0,
CSTS_CFS_SHIFT = 1,
@ -163,24 +191,63 @@ enum NvmeAqaMask {
enum NvmeCmblocShift {
CMBLOC_BIR_SHIFT = 0,
CMBLOC_CQMMS_SHIFT = 3,
CMBLOC_CQPDS_SHIFT = 4,
CMBLOC_CDPMLS_SHIFT = 5,
CMBLOC_CDPCILS_SHIFT = 6,
CMBLOC_CDMMMS_SHIFT = 7,
CMBLOC_CQDA_SHIFT = 8,
CMBLOC_OFST_SHIFT = 12,
};
enum NvmeCmblocMask {
CMBLOC_BIR_MASK = 0x7,
CMBLOC_CQMMS_MASK = 0x1,
CMBLOC_CQPDS_MASK = 0x1,
CMBLOC_CDPMLS_MASK = 0x1,
CMBLOC_CDPCILS_MASK = 0x1,
CMBLOC_CDMMMS_MASK = 0x1,
CMBLOC_CQDA_MASK = 0x1,
CMBLOC_OFST_MASK = 0xfffff,
};
#define NVME_CMBLOC_BIR(cmbloc) ((cmbloc >> CMBLOC_BIR_SHIFT) & \
CMBLOC_BIR_MASK)
#define NVME_CMBLOC_OFST(cmbloc)((cmbloc >> CMBLOC_OFST_SHIFT) & \
CMBLOC_OFST_MASK)
#define NVME_CMBLOC_BIR(cmbloc) \
((cmbloc >> CMBLOC_BIR_SHIFT) & CMBLOC_BIR_MASK)
#define NVME_CMBLOC_CQMMS(cmbloc) \
((cmbloc >> CMBLOC_CQMMS_SHIFT) & CMBLOC_CQMMS_MASK)
#define NVME_CMBLOC_CQPDS(cmbloc) \
((cmbloc >> CMBLOC_CQPDS_SHIFT) & CMBLOC_CQPDS_MASK)
#define NVME_CMBLOC_CDPMLS(cmbloc) \
((cmbloc >> CMBLOC_CDPMLS_SHIFT) & CMBLOC_CDPMLS_MASK)
#define NVME_CMBLOC_CDPCILS(cmbloc) \
((cmbloc >> CMBLOC_CDPCILS_SHIFT) & CMBLOC_CDPCILS_MASK)
#define NVME_CMBLOC_CDMMMS(cmbloc) \
((cmbloc >> CMBLOC_CDMMMS_SHIFT) & CMBLOC_CDMMMS_MASK)
#define NVME_CMBLOC_CQDA(cmbloc) \
((cmbloc >> CMBLOC_CQDA_SHIFT) & CMBLOC_CQDA_MASK)
#define NVME_CMBLOC_OFST(cmbloc) \
((cmbloc >> CMBLOC_OFST_SHIFT) & CMBLOC_OFST_MASK)
#define NVME_CMBLOC_SET_BIR(cmbloc, val) \
(cmbloc |= (uint64_t)(val & CMBLOC_BIR_MASK) << CMBLOC_BIR_SHIFT)
#define NVME_CMBLOC_SET_CQMMS(cmbloc, val) \
(cmbloc |= (uint64_t)(val & CMBLOC_CQMMS_MASK) << CMBLOC_CQMMS_SHIFT)
#define NVME_CMBLOC_SET_CQPDS(cmbloc, val) \
(cmbloc |= (uint64_t)(val & CMBLOC_CQPDS_MASK) << CMBLOC_CQPDS_SHIFT)
#define NVME_CMBLOC_SET_CDPMLS(cmbloc, val) \
(cmbloc |= (uint64_t)(val & CMBLOC_CDPMLS_MASK) << CMBLOC_CDPMLS_SHIFT)
#define NVME_CMBLOC_SET_CDPCILS(cmbloc, val) \
(cmbloc |= (uint64_t)(val & CMBLOC_CDPCILS_MASK) << CMBLOC_CDPCILS_SHIFT)
#define NVME_CMBLOC_SET_CDMMMS(cmbloc, val) \
(cmbloc |= (uint64_t)(val & CMBLOC_CDMMMS_MASK) << CMBLOC_CDMMMS_SHIFT)
#define NVME_CMBLOC_SET_CQDA(cmbloc, val) \
(cmbloc |= (uint64_t)(val & CMBLOC_CQDA_MASK) << CMBLOC_CQDA_SHIFT)
#define NVME_CMBLOC_SET_OFST(cmbloc, val) \
(cmbloc |= (uint64_t)(val & CMBLOC_OFST_MASK) << CMBLOC_OFST_SHIFT)
#define NVME_CMBMSMC_SET_CRE (cmbmsc, val) \
(cmbmsc |= (uint64_t)(val & CMBLOC_OFST_MASK) << CMBMSC_CRE_SHIFT)
enum NvmeCmbszShift {
CMBSZ_SQS_SHIFT = 0,
CMBSZ_CQS_SHIFT = 1,
@ -227,6 +294,46 @@ enum NvmeCmbszMask {
#define NVME_CMBSZ_GETSIZE(cmbsz) \
(NVME_CMBSZ_SZ(cmbsz) * (1 << (12 + 4 * NVME_CMBSZ_SZU(cmbsz))))
enum NvmeCmbmscShift {
CMBMSC_CRE_SHIFT = 0,
CMBMSC_CMSE_SHIFT = 1,
CMBMSC_CBA_SHIFT = 12,
};
enum NvmeCmbmscMask {
CMBMSC_CRE_MASK = 0x1,
CMBMSC_CMSE_MASK = 0x1,
CMBMSC_CBA_MASK = ((1ULL << 52) - 1),
};
#define NVME_CMBMSC_CRE(cmbmsc) \
((cmbmsc >> CMBMSC_CRE_SHIFT) & CMBMSC_CRE_MASK)
#define NVME_CMBMSC_CMSE(cmbmsc) \
((cmbmsc >> CMBMSC_CMSE_SHIFT) & CMBMSC_CMSE_MASK)
#define NVME_CMBMSC_CBA(cmbmsc) \
((cmbmsc >> CMBMSC_CBA_SHIFT) & CMBMSC_CBA_MASK)
#define NVME_CMBMSC_SET_CRE(cmbmsc, val) \
(cmbmsc |= (uint64_t)(val & CMBMSC_CRE_MASK) << CMBMSC_CRE_SHIFT)
#define NVME_CMBMSC_SET_CMSE(cmbmsc, val) \
(cmbmsc |= (uint64_t)(val & CMBMSC_CMSE_MASK) << CMBMSC_CMSE_SHIFT)
#define NVME_CMBMSC_SET_CBA(cmbmsc, val) \
(cmbmsc |= (uint64_t)(val & CMBMSC_CBA_MASK) << CMBMSC_CBA_SHIFT)
enum NvmeCmbstsShift {
CMBSTS_CBAI_SHIFT = 0,
};
enum NvmeCmbstsMask {
CMBSTS_CBAI_MASK = 0x1,
};
#define NVME_CMBSTS_CBAI(cmbsts) \
((cmbsts >> CMBSTS_CBAI_SHIFT) & CMBSTS_CBAI_MASK)
#define NVME_CMBSTS_SET_CBAI(cmbsts, val) \
(cmbsts |= (uint64_t)(val & CMBSTS_CBAI_MASK) << CMBSTS_CBAI_SHIFT)
enum NvmePmrcapShift {
PMRCAP_RDS_SHIFT = 3,
PMRCAP_WDS_SHIFT = 4,
@ -472,6 +579,9 @@ enum NvmeIoCommands {
NVME_CMD_COMPARE = 0x05,
NVME_CMD_WRITE_ZEROES = 0x08,
NVME_CMD_DSM = 0x09,
NVME_CMD_ZONE_MGMT_SEND = 0x79,
NVME_CMD_ZONE_MGMT_RECV = 0x7a,
NVME_CMD_ZONE_APPEND = 0x7d,
};
typedef struct QEMU_PACKED NvmeDeleteQ {
@ -540,8 +650,13 @@ typedef struct QEMU_PACKED NvmeIdentify {
uint64_t rsvd2[2];
uint64_t prp1;
uint64_t prp2;
uint32_t cns;
uint32_t rsvd11[5];
uint8_t cns;
uint8_t rsvd10;
uint16_t ctrlid;
uint16_t nvmsetid;
uint8_t rsvd11;
uint8_t csi;
uint32_t rsvd12[4];
} NvmeIdentify;
typedef struct QEMU_PACKED NvmeRwCmd {
@ -632,9 +747,13 @@ typedef struct QEMU_PACKED NvmeAerResult {
uint8_t resv;
} NvmeAerResult;
typedef struct QEMU_PACKED NvmeZonedResult {
uint64_t slba;
} NvmeZonedResult;
typedef struct QEMU_PACKED NvmeCqe {
uint32_t result;
uint32_t rsvd;
uint32_t dw1;
uint16_t sq_head;
uint16_t sq_id;
uint16_t cid;
@ -662,6 +781,8 @@ enum NvmeStatusCodes {
NVME_SGL_DESCR_TYPE_INVALID = 0x0011,
NVME_INVALID_USE_OF_CMB = 0x0012,
NVME_INVALID_PRP_OFFSET = 0x0013,
NVME_CMD_SET_CMB_REJECTED = 0x002b,
NVME_INVALID_CMD_SET = 0x002c,
NVME_LBA_RANGE = 0x0080,
NVME_CAP_EXCEEDED = 0x0081,
NVME_NS_NOT_READY = 0x0082,
@ -686,6 +807,14 @@ enum NvmeStatusCodes {
NVME_CONFLICTING_ATTRS = 0x0180,
NVME_INVALID_PROT_INFO = 0x0181,
NVME_WRITE_TO_RO = 0x0182,
NVME_ZONE_BOUNDARY_ERROR = 0x01b8,
NVME_ZONE_FULL = 0x01b9,
NVME_ZONE_READ_ONLY = 0x01ba,
NVME_ZONE_OFFLINE = 0x01bb,
NVME_ZONE_INVALID_WRITE = 0x01bc,
NVME_ZONE_TOO_MANY_ACTIVE = 0x01bd,
NVME_ZONE_TOO_MANY_OPEN = 0x01be,
NVME_ZONE_INVAL_TRANSITION = 0x01bf,
NVME_WRITE_FAULT = 0x0280,
NVME_UNRECOVERED_READ = 0x0281,
NVME_E2E_GUARD_ERROR = 0x0282,
@ -693,6 +822,7 @@ enum NvmeStatusCodes {
NVME_E2E_REF_ERROR = 0x0284,
NVME_CMP_FAILURE = 0x0285,
NVME_ACCESS_DENIED = 0x0286,
NVME_DULB = 0x0287,
NVME_MORE = 0x2000,
NVME_DNR = 0x4000,
NVME_NO_COMPLETE = 0xffff,
@ -743,18 +873,37 @@ typedef struct QEMU_PACKED NvmeSmartLog {
uint8_t reserved2[320];
} NvmeSmartLog;
#define NVME_SMART_WARN_MAX 6
enum NvmeSmartWarn {
NVME_SMART_SPARE = 1 << 0,
NVME_SMART_TEMPERATURE = 1 << 1,
NVME_SMART_RELIABILITY = 1 << 2,
NVME_SMART_MEDIA_READ_ONLY = 1 << 3,
NVME_SMART_FAILED_VOLATILE_MEDIA = 1 << 4,
NVME_SMART_PMR_UNRELIABLE = 1 << 5,
};
typedef struct NvmeEffectsLog {
uint32_t acs[256];
uint32_t iocs[256];
uint8_t resv[2048];
} NvmeEffectsLog;
enum {
NVME_CMD_EFF_CSUPP = 1 << 0,
NVME_CMD_EFF_LBCC = 1 << 1,
NVME_CMD_EFF_NCC = 1 << 2,
NVME_CMD_EFF_NIC = 1 << 3,
NVME_CMD_EFF_CCC = 1 << 4,
NVME_CMD_EFF_CSE_MASK = 3 << 16,
NVME_CMD_EFF_UUID_SEL = 1 << 19,
};
enum NvmeLogIdentifier {
NVME_LOG_ERROR_INFO = 0x01,
NVME_LOG_SMART_INFO = 0x02,
NVME_LOG_FW_SLOT_INFO = 0x03,
NVME_LOG_CMD_EFFECTS = 0x05,
};
typedef struct QEMU_PACKED NvmePSD {
@ -771,11 +920,19 @@ typedef struct QEMU_PACKED NvmePSD {
#define NVME_IDENTIFY_DATA_SIZE 4096
enum {
NVME_ID_CNS_NS = 0x0,
NVME_ID_CNS_CTRL = 0x1,
NVME_ID_CNS_NS_ACTIVE_LIST = 0x2,
NVME_ID_CNS_NS_DESCR_LIST = 0x3,
enum NvmeIdCns {
NVME_ID_CNS_NS = 0x00,
NVME_ID_CNS_CTRL = 0x01,
NVME_ID_CNS_NS_ACTIVE_LIST = 0x02,
NVME_ID_CNS_NS_DESCR_LIST = 0x03,
NVME_ID_CNS_CS_NS = 0x05,
NVME_ID_CNS_CS_CTRL = 0x06,
NVME_ID_CNS_CS_NS_ACTIVE_LIST = 0x07,
NVME_ID_CNS_NS_PRESENT_LIST = 0x10,
NVME_ID_CNS_NS_PRESENT = 0x11,
NVME_ID_CNS_CS_NS_PRESENT_LIST = 0x1a,
NVME_ID_CNS_CS_NS_PRESENT = 0x1b,
NVME_ID_CNS_IO_COMMAND_SET = 0x1c,
};
typedef struct QEMU_PACKED NvmeIdCtrl {
@ -794,7 +951,8 @@ typedef struct QEMU_PACKED NvmeIdCtrl {
uint32_t rtd3e;
uint32_t oaes;
uint32_t ctratt;
uint8_t rsvd100[12];
uint8_t rsvd100[11];
uint8_t cntrltype;
uint8_t fguid[16];
uint8_t rsvd128[128];
uint16_t oacs;
@ -845,6 +1003,11 @@ typedef struct QEMU_PACKED NvmeIdCtrl {
uint8_t vs[1024];
} NvmeIdCtrl;
typedef struct NvmeIdCtrlZoned {
uint8_t zasl;
uint8_t rsvd1[4095];
} NvmeIdCtrlZoned;
enum NvmeIdCtrlOacs {
NVME_OACS_SECURITY = 1 << 0,
NVME_OACS_FORMAT = 1 << 1,
@ -867,6 +1030,7 @@ enum NvmeIdCtrlFrmw {
enum NvmeIdCtrlLpa {
NVME_LPA_NS_SMART = 1 << 0,
NVME_LPA_CSE = 1 << 1,
NVME_LPA_EXTENDED = 1 << 2,
};
@ -909,6 +1073,9 @@ enum NvmeIdCtrlLpa {
#define NVME_AEC_NS_ATTR(aec) ((aec >> 8) & 0x1)
#define NVME_AEC_FW_ACTIVATION(aec) ((aec >> 9) & 0x1)
#define NVME_ERR_REC_TLER(err_rec) (err_rec & 0xffff)
#define NVME_ERR_REC_DULBE(err_rec) (err_rec & 0x10000)
enum NvmeFeatureIds {
NVME_ARBITRATION = 0x1,
NVME_POWER_MANAGEMENT = 0x2,
@ -922,6 +1089,7 @@ enum NvmeFeatureIds {
NVME_WRITE_ATOMICITY = 0xa,
NVME_ASYNCHRONOUS_EVENT_CONF = 0xb,
NVME_TIMESTAMP = 0xe,
NVME_COMMAND_SET_PROFILE = 0x19,
NVME_SOFTWARE_PROGRESS_MARKER = 0x80,
NVME_FID_MAX = 0x100,
};
@ -968,6 +1136,12 @@ typedef struct QEMU_PACKED NvmeLBAF {
uint8_t rp;
} NvmeLBAF;
typedef struct QEMU_PACKED NvmeLBAFE {
uint64_t zsze;
uint8_t zdes;
uint8_t rsvd9[7];
} NvmeLBAFE;
#define NVME_NSID_BROADCAST 0xffffffff
typedef struct QEMU_PACKED NvmeIdNs {
@ -992,7 +1166,12 @@ typedef struct QEMU_PACKED NvmeIdNs {
uint16_t nabspf;
uint16_t noiob;
uint8_t nvmcap[16];
uint8_t rsvd64[40];
uint16_t npwg;
uint16_t npwa;
uint16_t npdg;
uint16_t npda;
uint16_t nows;
uint8_t rsvd74[30];
uint8_t nguid[16];
uint64_t eui64;
NvmeLBAF lbaf[16];
@ -1006,18 +1185,40 @@ typedef struct QEMU_PACKED NvmeIdNsDescr {
uint8_t rsvd2[2];
} NvmeIdNsDescr;
enum {
NVME_NIDT_EUI64_LEN = 8,
NVME_NIDT_NGUID_LEN = 16,
NVME_NIDT_UUID_LEN = 16,
enum NvmeNsIdentifierLength {
NVME_NIDL_EUI64 = 8,
NVME_NIDL_NGUID = 16,
NVME_NIDL_UUID = 16,
NVME_NIDL_CSI = 1,
};
enum NvmeNsIdentifierType {
NVME_NIDT_EUI64 = 0x1,
NVME_NIDT_NGUID = 0x2,
NVME_NIDT_UUID = 0x3,
NVME_NIDT_EUI64 = 0x01,
NVME_NIDT_NGUID = 0x02,
NVME_NIDT_UUID = 0x03,
NVME_NIDT_CSI = 0x04,
};
enum NvmeCsi {
NVME_CSI_NVM = 0x00,
NVME_CSI_ZONED = 0x02,
};
#define NVME_SET_CSI(vec, csi) (vec |= (uint8_t)(1 << (csi)))
typedef struct QEMU_PACKED NvmeIdNsZoned {
uint16_t zoc;
uint16_t ozcs;
uint32_t mar;
uint32_t mor;
uint32_t rrl;
uint32_t frl;
uint8_t rsvd20[2796];
NvmeLBAFE lbafe[16];
uint8_t rsvd3072[768];
uint8_t vs[256];
} NvmeIdNsZoned;
/*Deallocate Logical Block Features*/
#define NVME_ID_NS_DLFEAT_GUARD_CRC(dlfeat) ((dlfeat) & 0x10)
#define NVME_ID_NS_DLFEAT_WRITE_ZEROES(dlfeat) ((dlfeat) & 0x08)
@ -1029,6 +1230,7 @@ enum NvmeNsIdentifierType {
#define NVME_ID_NS_NSFEAT_THIN(nsfeat) ((nsfeat & 0x1))
#define NVME_ID_NS_NSFEAT_DULBE(nsfeat) ((nsfeat >> 2) & 0x1)
#define NVME_ID_NS_FLBAS_EXTENDED(flbas) ((flbas >> 4) & 0x1)
#define NVME_ID_NS_FLBAS_INDEX(flbas) ((flbas & 0xf))
#define NVME_ID_NS_MC_SEPARATE(mc) ((mc >> 1) & 0x1)
@ -1049,10 +1251,76 @@ enum NvmeIdNsDps {
DPS_FIRST_EIGHT = 8,
};
enum NvmeZoneAttr {
NVME_ZA_FINISHED_BY_CTLR = 1 << 0,
NVME_ZA_FINISH_RECOMMENDED = 1 << 1,
NVME_ZA_RESET_RECOMMENDED = 1 << 2,
NVME_ZA_ZD_EXT_VALID = 1 << 7,
};
typedef struct QEMU_PACKED NvmeZoneReportHeader {
uint64_t nr_zones;
uint8_t rsvd[56];
} NvmeZoneReportHeader;
enum NvmeZoneReceiveAction {
NVME_ZONE_REPORT = 0,
NVME_ZONE_REPORT_EXTENDED = 1,
};
enum NvmeZoneReportType {
NVME_ZONE_REPORT_ALL = 0,
NVME_ZONE_REPORT_EMPTY = 1,
NVME_ZONE_REPORT_IMPLICITLY_OPEN = 2,
NVME_ZONE_REPORT_EXPLICITLY_OPEN = 3,
NVME_ZONE_REPORT_CLOSED = 4,
NVME_ZONE_REPORT_FULL = 5,
NVME_ZONE_REPORT_READ_ONLY = 6,
NVME_ZONE_REPORT_OFFLINE = 7,
};
enum NvmeZoneType {
NVME_ZONE_TYPE_RESERVED = 0x00,
NVME_ZONE_TYPE_SEQ_WRITE = 0x02,
};
enum NvmeZoneSendAction {
NVME_ZONE_ACTION_RSD = 0x00,
NVME_ZONE_ACTION_CLOSE = 0x01,
NVME_ZONE_ACTION_FINISH = 0x02,
NVME_ZONE_ACTION_OPEN = 0x03,
NVME_ZONE_ACTION_RESET = 0x04,
NVME_ZONE_ACTION_OFFLINE = 0x05,
NVME_ZONE_ACTION_SET_ZD_EXT = 0x10,
};
typedef struct QEMU_PACKED NvmeZoneDescr {
uint8_t zt;
uint8_t zs;
uint8_t za;
uint8_t rsvd3[5];
uint64_t zcap;
uint64_t zslba;
uint64_t wp;
uint8_t rsvd32[32];
} NvmeZoneDescr;
typedef enum NvmeZoneState {
NVME_ZONE_STATE_RESERVED = 0x00,
NVME_ZONE_STATE_EMPTY = 0x01,
NVME_ZONE_STATE_IMPLICITLY_OPEN = 0x02,
NVME_ZONE_STATE_EXPLICITLY_OPEN = 0x03,
NVME_ZONE_STATE_CLOSED = 0x04,
NVME_ZONE_STATE_READ_ONLY = 0x0D,
NVME_ZONE_STATE_FULL = 0x0E,
NVME_ZONE_STATE_OFFLINE = 0x0F,
} NvmeZoneState;
static inline void _nvme_check_size(void)
{
QEMU_BUILD_BUG_ON(sizeof(NvmeBar) != 4096);
QEMU_BUILD_BUG_ON(sizeof(NvmeAerResult) != 4);
QEMU_BUILD_BUG_ON(sizeof(NvmeZonedResult) != 8);
QEMU_BUILD_BUG_ON(sizeof(NvmeCqe) != 16);
QEMU_BUILD_BUG_ON(sizeof(NvmeDsmRange) != 16);
QEMU_BUILD_BUG_ON(sizeof(NvmeCmd) != 64);
@ -1066,9 +1334,15 @@ static inline void _nvme_check_size(void)
QEMU_BUILD_BUG_ON(sizeof(NvmeErrorLog) != 64);
QEMU_BUILD_BUG_ON(sizeof(NvmeFwSlotInfoLog) != 512);
QEMU_BUILD_BUG_ON(sizeof(NvmeSmartLog) != 512);
QEMU_BUILD_BUG_ON(sizeof(NvmeEffectsLog) != 4096);
QEMU_BUILD_BUG_ON(sizeof(NvmeIdCtrl) != 4096);
QEMU_BUILD_BUG_ON(sizeof(NvmeIdCtrlZoned) != 4096);
QEMU_BUILD_BUG_ON(sizeof(NvmeLBAF) != 4);
QEMU_BUILD_BUG_ON(sizeof(NvmeLBAFE) != 16);
QEMU_BUILD_BUG_ON(sizeof(NvmeIdNs) != 4096);
QEMU_BUILD_BUG_ON(sizeof(NvmeIdNsZoned) != 4096);
QEMU_BUILD_BUG_ON(sizeof(NvmeSglDescriptor) != 16);
QEMU_BUILD_BUG_ON(sizeof(NvmeIdNsDescr) != 4);
QEMU_BUILD_BUG_ON(sizeof(NvmeZoneDescr) != 64);
}
#endif