nvme queue

-----BEGIN PGP SIGNATURE-----
 
 iQEzBAABCgAdFiEEUigzqnXi3OaiR2bATeGvMW1PDekFAmb7nokACgkQTeGvMW1P
 Del+2gf/YefiiYSL540C2QeYRwMFd6xFKKWYRRJoaARyLAoqInVdLiBql527Oov8
 rgDQq+D0XXP15CNDvfAZ59a36h1bAW79QCfKEUMSbP8GPeqb5pOSRfvYJSwnG1YX
 SC70vKLOrBhzxYiQYSOhLNKdbUM00OUyf2xibu0zk84UpkXtzSR4h/byFnQIHwEV
 /uUh4+cxY6eQK1Tfk/f66FEJLuJTchFOMVswYolDMezu2vJmToWHju/kpy2ugvaC
 +WEUEti8kL66/B399u1uwAad2OejC1Jf4qMFcFQJ9Cs9RV4HTC9byolceJE+1R0V
 CZt1SxvBNdK/ihs1iTjP7fInPqdYKw==
 =16tX
 -----END PGP SIGNATURE-----

Merge tag 'pull-nvme-20241001' of https://gitlab.com/birkelund/qemu into staging

nvme queue

# -----BEGIN PGP SIGNATURE-----
#
# iQEzBAABCgAdFiEEUigzqnXi3OaiR2bATeGvMW1PDekFAmb7nokACgkQTeGvMW1P
# Del+2gf/YefiiYSL540C2QeYRwMFd6xFKKWYRRJoaARyLAoqInVdLiBql527Oov8
# rgDQq+D0XXP15CNDvfAZ59a36h1bAW79QCfKEUMSbP8GPeqb5pOSRfvYJSwnG1YX
# SC70vKLOrBhzxYiQYSOhLNKdbUM00OUyf2xibu0zk84UpkXtzSR4h/byFnQIHwEV
# /uUh4+cxY6eQK1Tfk/f66FEJLuJTchFOMVswYolDMezu2vJmToWHju/kpy2ugvaC
# +WEUEti8kL66/B399u1uwAad2OejC1Jf4qMFcFQJ9Cs9RV4HTC9byolceJE+1R0V
# CZt1SxvBNdK/ihs1iTjP7fInPqdYKw==
# =16tX
# -----END PGP SIGNATURE-----
# gpg: Signature made Tue 01 Oct 2024 08:02:33 BST
# gpg:                using RSA key 522833AA75E2DCE6A24766C04DE1AF316D4F0DE9
# gpg: Good signature from "Klaus Jensen <its@irrelevant.dk>" [full]
# gpg:                 aka "Klaus Jensen <k.jensen@samsung.com>" [full]
# Primary key fingerprint: DDCA 4D9C 9EF9 31CC 3468  4272 63D5 6FC5 E55D A838
#      Subkey fingerprint: 5228 33AA 75E2 DCE6 A247  66C0 4DE1 AF31 6D4F 0DE9

* tag 'pull-nvme-20241001' of https://gitlab.com/birkelund/qemu:
  hw/nvme: add atomic write support
  hw/nvme: add knob for CTRATT.MEM
  hw/nvme: support CTRATT.MEM
  hw/nvme: clear masked events from the aer queue
  hw/nvme: report id controller metadata sgl support

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
Peter Maydell 2024-10-01 11:34:07 +01:00
commit 718780d204
3 changed files with 205 additions and 8 deletions

View File

@ -40,6 +40,9 @@
* sriov_vi_flexible=<N[optional]> \
* sriov_max_vi_per_vf=<N[optional]> \
* sriov_max_vq_per_vf=<N[optional]> \
* atomic.dn=<on|off[optional]>, \
* atomic.awun<N[optional]>, \
* atomic.awupf<N[optional]>, \
* subsys=<subsys_id>
* -device nvme-ns,drive=<drive_id>,bus=<bus_name>,nsid=<nsid>,\
* zoned=<true|false[optional]>, \
@ -254,6 +257,7 @@ static const uint32_t nvme_feature_cap[NVME_FID_MAX] = {
[NVME_ERROR_RECOVERY] = NVME_FEAT_CAP_CHANGE | NVME_FEAT_CAP_NS,
[NVME_VOLATILE_WRITE_CACHE] = NVME_FEAT_CAP_CHANGE,
[NVME_NUMBER_OF_QUEUES] = NVME_FEAT_CAP_CHANGE,
[NVME_WRITE_ATOMICITY] = NVME_FEAT_CAP_CHANGE,
[NVME_ASYNCHRONOUS_EVENT_CONF] = NVME_FEAT_CAP_CHANGE,
[NVME_TIMESTAMP] = NVME_FEAT_CAP_CHANGE,
[NVME_HOST_BEHAVIOR_SUPPORT] = NVME_FEAT_CAP_CHANGE,
@ -1649,9 +1653,16 @@ static void nvme_smart_event(NvmeCtrl *n, uint8_t event)
static void nvme_clear_events(NvmeCtrl *n, uint8_t event_type)
{
NvmeAsyncEvent *event, *next;
n->aer_mask &= ~(1 << event_type);
if (!QTAILQ_EMPTY(&n->aer_queue)) {
nvme_process_aers(n);
QTAILQ_FOREACH_SAFE(event, &n->aer_queue, entry, next) {
if (event->result.event_type == event_type) {
QTAILQ_REMOVE(&n->aer_queue, event, entry);
n->aer_queued--;
g_free(event);
}
}
}
@ -2646,6 +2657,7 @@ static uint16_t nvme_verify(NvmeCtrl *n, NvmeRequest *req)
uint64_t slba = le64_to_cpu(rw->slba);
uint32_t nlb = le16_to_cpu(rw->nlb) + 1;
size_t len = nvme_l2b(ns, nlb);
size_t data_len = len;
int64_t offset = nvme_l2b(ns, slba);
uint8_t prinfo = NVME_RW_PRINFO(le16_to_cpu(rw->control));
uint32_t reftag = le32_to_cpu(rw->reftag);
@ -2665,7 +2677,11 @@ static uint16_t nvme_verify(NvmeCtrl *n, NvmeRequest *req)
}
}
if (len > n->page_size << n->params.vsl) {
if (nvme_ns_ext(ns) && !(NVME_ID_CTRL_CTRATT_MEM(n->id_ctrl.ctratt))) {
data_len += nvme_m2b(ns, nlb);
}
if (data_len > (n->page_size << n->params.vsl)) {
return NVME_INVALID_FIELD | NVME_DNR;
}
@ -3406,7 +3422,11 @@ static uint16_t nvme_compare(NvmeCtrl *n, NvmeRequest *req)
len += nvme_m2b(ns, nlb);
}
status = nvme_check_mdts(n, len);
if (NVME_ID_CTRL_CTRATT_MEM(n->id_ctrl.ctratt)) {
status = nvme_check_mdts(n, data_len);
} else {
status = nvme_check_mdts(n, len);
}
if (status) {
return status;
}
@ -3583,7 +3603,7 @@ static uint16_t nvme_read(NvmeCtrl *n, NvmeRequest *req)
BlockBackend *blk = ns->blkconf.blk;
uint16_t status;
if (nvme_ns_ext(ns)) {
if (nvme_ns_ext(ns) && !(NVME_ID_CTRL_CTRATT_MEM(n->id_ctrl.ctratt))) {
mapped_size += nvme_m2b(ns, nlb);
if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) {
@ -3695,7 +3715,7 @@ static uint16_t nvme_do_write(NvmeCtrl *n, NvmeRequest *req, bool append,
BlockBackend *blk = ns->blkconf.blk;
uint16_t status;
if (nvme_ns_ext(ns)) {
if (nvme_ns_ext(ns) && !(NVME_ID_CTRL_CTRATT_MEM(n->id_ctrl.ctratt))) {
mapped_size += nvme_m2b(ns, nlb);
if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) {
@ -6293,8 +6313,10 @@ defaults:
if (ret) {
return ret;
}
goto out;
break;
case NVME_WRITE_ATOMICITY:
result = n->dn;
break;
default:
result = nvme_feature_default[fid];
@ -6378,6 +6400,8 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeRequest *req)
uint8_t save = NVME_SETFEAT_SAVE(dw10);
uint16_t status;
int i;
NvmeIdCtrl *id = &n->id_ctrl;
NvmeAtomic *atomic = &n->atomic;
trace_pci_nvme_setfeat(nvme_cid(req), nsid, fid, save, dw11);
@ -6530,6 +6554,22 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeRequest *req)
return NVME_CMD_SEQ_ERROR | NVME_DNR;
case NVME_FDP_EVENTS:
return nvme_set_feature_fdp_events(n, ns, req);
case NVME_WRITE_ATOMICITY:
n->dn = 0x1 & dw11;
if (n->dn) {
atomic->atomic_max_write_size = le16_to_cpu(id->awupf) + 1;
} else {
atomic->atomic_max_write_size = le16_to_cpu(id->awun) + 1;
}
if (atomic->atomic_max_write_size == 1) {
atomic->atomic_writes = 0;
} else {
atomic->atomic_writes = 1;
}
break;
default:
return NVME_FEAT_NOT_CHANGEABLE | NVME_DNR;
}
@ -7227,6 +7267,81 @@ static void nvme_update_sq_tail(NvmeSQueue *sq)
trace_pci_nvme_update_sq_tail(sq->sqid, sq->tail);
}
#define NVME_ATOMIC_NO_START 0
#define NVME_ATOMIC_START_ATOMIC 1
#define NVME_ATOMIC_START_NONATOMIC 2
static int nvme_atomic_write_check(NvmeCtrl *n, NvmeCmd *cmd,
NvmeAtomic *atomic)
{
NvmeRwCmd *rw = (NvmeRwCmd *)cmd;
uint64_t slba = le64_to_cpu(rw->slba);
uint32_t nlb = (uint32_t)le16_to_cpu(rw->nlb);
uint64_t elba = slba + nlb;
bool cmd_atomic_wr = true;
int i;
if ((cmd->opcode == NVME_CMD_READ) || ((cmd->opcode == NVME_CMD_WRITE) &&
((rw->nlb + 1) > atomic->atomic_max_write_size))) {
cmd_atomic_wr = false;
}
/*
* Walk the queues to see if there are any atomic conflicts.
*/
for (i = 1; i < n->params.max_ioqpairs + 1; i++) {
NvmeSQueue *sq;
NvmeRequest *req;
NvmeRwCmd *req_rw;
uint64_t req_slba;
uint32_t req_nlb;
uint64_t req_elba;
sq = n->sq[i];
if (!sq) {
continue;
}
/*
* Walk all the requests on a given queue.
*/
QTAILQ_FOREACH(req, &sq->out_req_list, entry) {
req_rw = (NvmeRwCmd *)&req->cmd;
if (((req_rw->opcode == NVME_CMD_WRITE) ||
(req_rw->opcode == NVME_CMD_READ)) &&
(cmd->nsid == req->ns->params.nsid)) {
req_slba = le64_to_cpu(req_rw->slba);
req_nlb = (uint32_t)le16_to_cpu(req_rw->nlb);
req_elba = req_slba + req_nlb;
if (cmd_atomic_wr) {
if ((elba >= req_slba) && (slba <= req_elba)) {
return NVME_ATOMIC_NO_START;
}
} else {
if (req->atomic_write && ((elba >= req_slba) &&
(slba <= req_elba))) {
return NVME_ATOMIC_NO_START;
}
}
}
}
}
if (cmd_atomic_wr) {
return NVME_ATOMIC_START_ATOMIC;
}
return NVME_ATOMIC_START_NONATOMIC;
}
static NvmeAtomic *nvme_get_atomic(NvmeCtrl *n, NvmeCmd *cmd)
{
if (n->atomic.atomic_writes) {
return &n->atomic;
}
return NULL;
}
static void nvme_process_sq(void *opaque)
{
NvmeSQueue *sq = opaque;
@ -7243,6 +7358,9 @@ static void nvme_process_sq(void *opaque)
}
while (!(nvme_sq_empty(sq) || QTAILQ_EMPTY(&sq->req_list))) {
NvmeAtomic *atomic;
bool cmd_is_atomic;
addr = sq->dma_addr + (sq->head << NVME_SQES);
if (nvme_addr_read(n, addr, (void *)&cmd, sizeof(cmd))) {
trace_pci_nvme_err_addr_read(addr);
@ -7250,6 +7368,26 @@ static void nvme_process_sq(void *opaque)
stl_le_p(&n->bar.csts, NVME_CSTS_FAILED);
break;
}
atomic = nvme_get_atomic(n, &cmd);
cmd_is_atomic = false;
if (sq->sqid && atomic) {
int ret;
ret = nvme_atomic_write_check(n, &cmd, atomic);
switch (ret) {
case NVME_ATOMIC_NO_START:
qemu_bh_schedule(sq->bh);
return;
case NVME_ATOMIC_START_ATOMIC:
cmd_is_atomic = true;
break;
case NVME_ATOMIC_START_NONATOMIC:
default:
break;
}
}
nvme_inc_sq_head(sq);
req = QTAILQ_FIRST(&sq->req_list);
@ -7259,6 +7397,10 @@ static void nvme_process_sq(void *opaque)
req->cqe.cid = cmd.cid;
memcpy(&req->cmd, &cmd, sizeof(NvmeCmd));
if (sq->sqid && atomic) {
req->atomic_write = cmd_is_atomic;
}
status = sq->sqid ? nvme_io_cmd(n, req) :
nvme_admin_cmd(n, req);
if (status != NVME_NO_COMPLETE) {
@ -7362,6 +7504,8 @@ static void nvme_ctrl_reset(NvmeCtrl *n, NvmeResetType rst)
n->outstanding_aers = 0;
n->qs_created = false;
n->dn = n->params.atomic_dn; /* Set Disable Normal */
nvme_update_msixcap_ts(pci_dev, n->conf_msix_qsize);
if (pci_is_vf(pci_dev)) {
@ -8138,6 +8282,8 @@ static void nvme_init_state(NvmeCtrl *n)
NvmeSecCtrlEntry *list = n->sec_ctrl_list;
NvmeSecCtrlEntry *sctrl;
PCIDevice *pci = PCI_DEVICE(n);
NvmeAtomic *atomic = &n->atomic;
NvmeIdCtrl *id = &n->id_ctrl;
uint8_t max_vfs;
int i;
@ -8195,6 +8341,29 @@ static void nvme_init_state(NvmeCtrl *n)
cpu_to_le16(n->params.sriov_max_vi_per_vf) :
cap->vifrt / MAX(max_vfs, 1);
}
/* Atomic Write */
id->awun = cpu_to_le16(n->params.atomic_awun);
id->awupf = cpu_to_le16(n->params.atomic_awupf);
n->dn = n->params.atomic_dn;
if (id->awun || id->awupf) {
if (id->awupf > id->awun) {
id->awupf = 0;
}
if (n->dn) {
atomic->atomic_max_write_size = id->awupf + 1;
} else {
atomic->atomic_max_write_size = id->awun + 1;
}
if (atomic->atomic_max_write_size == 1) {
atomic->atomic_writes = 0;
} else {
atomic->atomic_writes = 1;
}
}
}
static void nvme_init_cmb(NvmeCtrl *n, PCIDevice *pci_dev)
@ -8476,7 +8645,11 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev)
id->cntlid = cpu_to_le16(n->cntlid);
id->oaes = cpu_to_le32(NVME_OAES_NS_ATTR);
ctratt = NVME_CTRATT_ELBAS;
if (n->params.ctratt.mem) {
ctratt |= NVME_CTRATT_MEM;
}
id->rab = 6;
@ -8536,7 +8709,8 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev)
id->ocfs = cpu_to_le16(NVME_OCFS_COPY_FORMAT_0 | NVME_OCFS_COPY_FORMAT_1 |
NVME_OCFS_COPY_FORMAT_2 | NVME_OCFS_COPY_FORMAT_3);
id->sgls = cpu_to_le32(NVME_CTRL_SGLS_SUPPORT_NO_ALIGN);
id->sgls = cpu_to_le32(NVME_CTRL_SGLS_SUPPORT_NO_ALIGN |
NVME_CTRL_SGLS_MPTR_SGL);
nvme_init_subnqn(n);
@ -8734,6 +8908,10 @@ static Property nvme_props[] = {
false),
DEFINE_PROP_UINT16("mqes", NvmeCtrl, params.mqes, 0x7ff),
DEFINE_PROP_UINT16("spdm_port", PCIDevice, spdm_port, 0),
DEFINE_PROP_BOOL("ctratt.mem", NvmeCtrl, params.ctratt.mem, false),
DEFINE_PROP_BOOL("atomic.dn", NvmeCtrl, params.atomic_dn, 0),
DEFINE_PROP_UINT16("atomic.awun", NvmeCtrl, params.atomic_awun, 0),
DEFINE_PROP_UINT16("atomic.awupf", NvmeCtrl, params.atomic_awupf, 0),
DEFINE_PROP_END_OF_LIST(),
};

View File

@ -220,6 +220,11 @@ typedef struct NvmeNamespaceParams {
} fdp;
} NvmeNamespaceParams;
typedef struct NvmeAtomic {
uint32_t atomic_max_write_size;
bool atomic_writes;
} NvmeAtomic;
typedef struct NvmeNamespace {
DeviceState parent_obj;
BlockConf blkconf;
@ -421,6 +426,7 @@ typedef struct NvmeRequest {
NvmeCmd cmd;
BlockAcctCookie acct;
NvmeSg sg;
bool atomic_write;
QTAILQ_ENTRY(NvmeRequest)entry;
} NvmeRequest;
@ -538,6 +544,14 @@ typedef struct NvmeParams {
uint32_t sriov_max_vq_per_vf;
uint32_t sriov_max_vi_per_vf;
bool msix_exclusive_bar;
struct {
bool mem;
} ctratt;
uint16_t atomic_awun;
uint16_t atomic_awupf;
bool atomic_dn;
} NvmeParams;
typedef struct NvmeCtrl {
@ -619,6 +633,8 @@ typedef struct NvmeCtrl {
uint16_t vqrfap;
uint16_t virfap;
} next_pri_ctrl_cap; /* These override pri_ctrl_cap after reset */
uint32_t dn; /* Disable Normal */
NvmeAtomic atomic;
} NvmeCtrl;
typedef enum NvmeResetType {

View File

@ -1182,6 +1182,7 @@ enum NvmeIdCtrlOaes {
enum NvmeIdCtrlCtratt {
NVME_CTRATT_ENDGRPS = 1 << 4,
NVME_CTRATT_ELBAS = 1 << 15,
NVME_CTRATT_MEM = 1 << 16,
NVME_CTRATT_FDPS = 1 << 19,
};
@ -1285,6 +1286,8 @@ enum NvmeNsAttachmentOperation {
#define NVME_ERR_REC_TLER(err_rec) (err_rec & 0xffff)
#define NVME_ERR_REC_DULBE(err_rec) (err_rec & 0x10000)
#define NVME_ID_CTRL_CTRATT_MEM(ctratt) (ctratt & NVME_CTRATT_MEM)
enum NvmeFeatureIds {
NVME_ARBITRATION = 0x1,
NVME_POWER_MANAGEMENT = 0x2,