hw/block/nvme: add broadcast nsid support flush command

Add support for using the broadcast nsid to issue a flush on all
namespaces through a single command.

Signed-off-by: Gollu Appalanaidu <anaidu.gollu@samsung.com>
Reviewed-by: Klaus Jensen <k.jensen@samsung.com>
Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
Acked-by: Keith Busch <kbusch@kernel.org>
Signed-off-by: Klaus Jensen <k.jensen@samsung.com>
This commit is contained in:
Gollu Appalanaidu 2021-01-25 15:09:24 +05:30 committed by Klaus Jensen
parent 594a2b742b
commit c94973288c
3 changed files with 127 additions and 7 deletions

View File

@ -1466,6 +1466,41 @@ static void nvme_rw_cb(void *opaque, int ret)
nvme_enqueue_req_completion(nvme_cq(req), req); nvme_enqueue_req_completion(nvme_cq(req), req);
} }
struct nvme_aio_flush_ctx {
NvmeRequest *req;
NvmeNamespace *ns;
BlockAcctCookie acct;
};
static void nvme_aio_flush_cb(void *opaque, int ret)
{
struct nvme_aio_flush_ctx *ctx = opaque;
NvmeRequest *req = ctx->req;
uintptr_t *num_flushes = (uintptr_t *)&req->opaque;
BlockBackend *blk = ctx->ns->blkconf.blk;
BlockAcctCookie *acct = &ctx->acct;
BlockAcctStats *stats = blk_get_stats(blk);
trace_pci_nvme_aio_flush_cb(nvme_cid(req), blk_name(blk));
if (!ret) {
block_acct_done(stats, acct);
} else {
block_acct_failed(stats, acct);
nvme_aio_err(req, ret);
}
(*num_flushes)--;
g_free(ctx);
if (*num_flushes) {
return;
}
nvme_enqueue_req_completion(nvme_cq(req), req);
}
static void nvme_aio_discard_cb(void *opaque, int ret) static void nvme_aio_discard_cb(void *opaque, int ret)
{ {
NvmeRequest *req = opaque; NvmeRequest *req = opaque;
@ -1949,10 +1984,56 @@ static uint16_t nvme_compare(NvmeCtrl *n, NvmeRequest *req)
static uint16_t nvme_flush(NvmeCtrl *n, NvmeRequest *req) static uint16_t nvme_flush(NvmeCtrl *n, NvmeRequest *req)
{ {
uint32_t nsid = le32_to_cpu(req->cmd.nsid);
uintptr_t *num_flushes = (uintptr_t *)&req->opaque;
uint16_t status;
struct nvme_aio_flush_ctx *ctx;
NvmeNamespace *ns;
trace_pci_nvme_flush(nvme_cid(req), nsid);
if (nsid != NVME_NSID_BROADCAST) {
req->ns = nvme_ns(n, nsid);
if (unlikely(!req->ns)) {
return NVME_INVALID_FIELD | NVME_DNR;
}
block_acct_start(blk_get_stats(req->ns->blkconf.blk), &req->acct, 0, block_acct_start(blk_get_stats(req->ns->blkconf.blk), &req->acct, 0,
BLOCK_ACCT_FLUSH); BLOCK_ACCT_FLUSH);
req->aiocb = blk_aio_flush(req->ns->blkconf.blk, nvme_rw_cb, req); req->aiocb = blk_aio_flush(req->ns->blkconf.blk, nvme_rw_cb, req);
return NVME_NO_COMPLETE; return NVME_NO_COMPLETE;
}
/* 1-initialize; see comment in nvme_dsm */
*num_flushes = 1;
for (int i = 1; i <= n->num_namespaces; i++) {
ns = nvme_ns(n, i);
if (!ns) {
continue;
}
ctx = g_new(struct nvme_aio_flush_ctx, 1);
ctx->req = req;
ctx->ns = ns;
(*num_flushes)++;
block_acct_start(blk_get_stats(ns->blkconf.blk), &ctx->acct, 0,
BLOCK_ACCT_FLUSH);
blk_aio_flush(ns->blkconf.blk, nvme_aio_flush_cb, ctx);
}
/* account for the 1-initialization */
(*num_flushes)--;
if (*num_flushes) {
status = NVME_NO_COMPLETE;
} else {
status = req->status;
}
return status;
} }
static uint16_t nvme_read(NvmeCtrl *n, NvmeRequest *req) static uint16_t nvme_read(NvmeCtrl *n, NvmeRequest *req)
@ -2608,6 +2689,29 @@ static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeRequest *req)
return NVME_INVALID_NSID | NVME_DNR; return NVME_INVALID_NSID | NVME_DNR;
} }
/*
* In the base NVM command set, Flush may apply to all namespaces
* (indicated by NSID being set to 0xFFFFFFFF). But if that feature is used
* along with TP 4056 (Namespace Types), it may be pretty screwed up.
*
* If NSID is indeed set to 0xFFFFFFFF, we simply cannot associate the
* opcode with a specific command since we cannot determine a unique I/O
* command set. Opcode 0x0 could have any other meaning than something
* equivalent to flushing and say it DOES have completely different
* semantics in some other command set - does an NSID of 0xFFFFFFFF then
* mean "for all namespaces, apply whatever command set specific command
* that uses the 0x0 opcode?" Or does it mean "for all namespaces, apply
* whatever command that uses the 0x0 opcode if, and only if, it allows
* NSID to be 0xFFFFFFFF"?
*
* Anyway (and luckily), for now, we do not care about this since the
* device only supports namespace types that includes the NVM Flush command
* (NVM and Zoned), so always do an NVM Flush.
*/
if (req->cmd.opcode == NVME_CMD_FLUSH) {
return nvme_flush(n, req);
}
req->ns = nvme_ns(n, nsid); req->ns = nvme_ns(n, nsid);
if (unlikely(!req->ns)) { if (unlikely(!req->ns)) {
return NVME_INVALID_FIELD | NVME_DNR; return NVME_INVALID_FIELD | NVME_DNR;
@ -2619,8 +2723,6 @@ static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeRequest *req)
} }
switch (req->cmd.opcode) { switch (req->cmd.opcode) {
case NVME_CMD_FLUSH:
return nvme_flush(n, req);
case NVME_CMD_WRITE_ZEROES: case NVME_CMD_WRITE_ZEROES:
return nvme_write_zeroes(n, req); return nvme_write_zeroes(n, req);
case NVME_CMD_ZONE_APPEND: case NVME_CMD_ZONE_APPEND:
@ -4750,7 +4852,15 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev)
NVME_ONCS_FEATURES | NVME_ONCS_DSM | NVME_ONCS_FEATURES | NVME_ONCS_DSM |
NVME_ONCS_COMPARE | NVME_ONCS_COPY); NVME_ONCS_COMPARE | NVME_ONCS_COPY);
id->vwc = (0x2 << 1) | 0x1; /*
* NOTE: If this device ever supports a command set that does NOT use 0x0
* as a Flush-equivalent operation, support for the broadcast NSID in Flush
* should probably be removed.
*
* See comment in nvme_io_cmd.
*/
id->vwc = NVME_VWC_NSID_BROADCAST_SUPPORT | NVME_VWC_PRESENT;
id->ocfs = cpu_to_le16(NVME_OCFS_COPY_FORMAT_0); id->ocfs = cpu_to_le16(NVME_OCFS_COPY_FORMAT_0);
id->sgls = cpu_to_le32(NVME_CTRL_SGLS_SUPPORT_NO_ALIGN | id->sgls = cpu_to_le32(NVME_CTRL_SGLS_SUPPORT_NO_ALIGN |
NVME_CTRL_SGLS_BITBUCKET); NVME_CTRL_SGLS_BITBUCKET);

View File

@ -40,6 +40,7 @@ pci_nvme_map_prp(uint64_t trans_len, uint32_t len, uint64_t prp1, uint64_t prp2,
pci_nvme_map_sgl(uint16_t cid, uint8_t typ, uint64_t len) "cid %"PRIu16" type 0x%"PRIx8" len %"PRIu64"" pci_nvme_map_sgl(uint16_t cid, uint8_t typ, uint64_t len) "cid %"PRIu16" type 0x%"PRIx8" len %"PRIu64""
pci_nvme_io_cmd(uint16_t cid, uint32_t nsid, uint16_t sqid, uint8_t opcode, const char *opname) "cid %"PRIu16" nsid %"PRIu32" sqid %"PRIu16" opc 0x%"PRIx8" opname '%s'" pci_nvme_io_cmd(uint16_t cid, uint32_t nsid, uint16_t sqid, uint8_t opcode, const char *opname) "cid %"PRIu16" nsid %"PRIu32" sqid %"PRIu16" opc 0x%"PRIx8" opname '%s'"
pci_nvme_admin_cmd(uint16_t cid, uint16_t sqid, uint8_t opcode, const char *opname) "cid %"PRIu16" sqid %"PRIu16" opc 0x%"PRIx8" opname '%s'" pci_nvme_admin_cmd(uint16_t cid, uint16_t sqid, uint8_t opcode, const char *opname) "cid %"PRIu16" sqid %"PRIu16" opc 0x%"PRIx8" opname '%s'"
pci_nvme_flush(uint16_t cid, uint32_t nsid) "cid %"PRIu16" nsid %"PRIu32""
pci_nvme_read(uint16_t cid, uint32_t nsid, uint32_t nlb, uint64_t count, uint64_t lba) "cid %"PRIu16" nsid %"PRIu32" nlb %"PRIu32" count %"PRIu64" lba 0x%"PRIx64"" pci_nvme_read(uint16_t cid, uint32_t nsid, uint32_t nlb, uint64_t count, uint64_t lba) "cid %"PRIu16" nsid %"PRIu32" nlb %"PRIu32" count %"PRIu64" lba 0x%"PRIx64""
pci_nvme_write(uint16_t cid, const char *verb, uint32_t nsid, uint32_t nlb, uint64_t count, uint64_t lba) "cid %"PRIu16" opname '%s' nsid %"PRIu32" nlb %"PRIu32" count %"PRIu64" lba 0x%"PRIx64"" pci_nvme_write(uint16_t cid, const char *verb, uint32_t nsid, uint32_t nlb, uint64_t count, uint64_t lba) "cid %"PRIu16" opname '%s' nsid %"PRIu32" nlb %"PRIu32" count %"PRIu64" lba 0x%"PRIx64""
pci_nvme_rw_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'" pci_nvme_rw_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'"
@ -55,6 +56,7 @@ pci_nvme_compare_cb(uint16_t cid) "cid %"PRIu16""
pci_nvme_aio_discard_cb(uint16_t cid) "cid %"PRIu16"" pci_nvme_aio_discard_cb(uint16_t cid) "cid %"PRIu16""
pci_nvme_aio_copy_in_cb(uint16_t cid) "cid %"PRIu16"" pci_nvme_aio_copy_in_cb(uint16_t cid) "cid %"PRIu16""
pci_nvme_aio_zone_reset_cb(uint16_t cid, uint64_t zslba) "cid %"PRIu16" zslba 0x%"PRIx64"" pci_nvme_aio_zone_reset_cb(uint16_t cid, uint64_t zslba) "cid %"PRIu16" zslba 0x%"PRIx64""
pci_nvme_aio_flush_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'"
pci_nvme_create_sq(uint64_t addr, uint16_t sqid, uint16_t cqid, uint16_t qsize, uint16_t qflags) "create submission queue, addr=0x%"PRIx64", sqid=%"PRIu16", cqid=%"PRIu16", qsize=%"PRIu16", qflags=%"PRIu16"" pci_nvme_create_sq(uint64_t addr, uint16_t sqid, uint16_t cqid, uint16_t qsize, uint16_t qflags) "create submission queue, addr=0x%"PRIx64", sqid=%"PRIu16", cqid=%"PRIu16", qsize=%"PRIu16", qflags=%"PRIu16""
pci_nvme_create_cq(uint64_t addr, uint16_t cqid, uint16_t vector, uint16_t size, uint16_t qflags, int ien) "create completion queue, addr=0x%"PRIx64", cqid=%"PRIu16", vector=%"PRIu16", qsize=%"PRIu16", qflags=%"PRIu16", ien=%d" pci_nvme_create_cq(uint64_t addr, uint16_t cqid, uint16_t vector, uint16_t size, uint16_t qflags, int ien) "create completion queue, addr=0x%"PRIx64", cqid=%"PRIu16", vector=%"PRIu16", qsize=%"PRIu16", qflags=%"PRIu16", ien=%d"
pci_nvme_del_sq(uint16_t qid) "deleting submission queue sqid=%"PRIu16"" pci_nvme_del_sq(uint16_t qid) "deleting submission queue sqid=%"PRIu16""

View File

@ -1062,6 +1062,14 @@ enum NvmeIdCtrlOcfs {
NVME_OCFS_COPY_FORMAT_0 = 1 << 0, NVME_OCFS_COPY_FORMAT_0 = 1 << 0,
}; };
enum NvmeIdctrlVwc {
NVME_VWC_PRESENT = 1 << 0,
NVME_VWC_NSID_BROADCAST_NO_SUPPORT = 0 << 1,
NVME_VWC_NSID_BROADCAST_RESERVED = 1 << 1,
NVME_VWC_NSID_BROADCAST_CTRL_SPEC = 2 << 1,
NVME_VWC_NSID_BROADCAST_SUPPORT = 3 << 1,
};
enum NvmeIdCtrlFrmw { enum NvmeIdCtrlFrmw {
NVME_FRMW_SLOT1_RO = 1 << 0, NVME_FRMW_SLOT1_RO = 1 << 0,
}; };