hw/block/nvme: refactor aio submission
This pulls block layer aio submission/completion to common functions. For completions, additionally map an AIO error to the Unrecovered Read and Write Fault status codes. Signed-off-by: Klaus Jensen <k.jensen@samsung.com> Reviewed-by: Keith Busch <kbusch@kernel.org>
This commit is contained in:
parent
e2f79209cd
commit
6a09a3d737
136
hw/block/nvme.c
136
hw/block/nvme.c
@ -614,30 +614,110 @@ static inline uint16_t nvme_check_bounds(NvmeCtrl *n, NvmeNamespace *ns,
|
|||||||
static void nvme_rw_cb(void *opaque, int ret)
|
static void nvme_rw_cb(void *opaque, int ret)
|
||||||
{
|
{
|
||||||
NvmeRequest *req = opaque;
|
NvmeRequest *req = opaque;
|
||||||
NvmeSQueue *sq = req->sq;
|
NvmeCtrl *n = nvme_ctrl(req);
|
||||||
NvmeCtrl *n = sq->ctrl;
|
|
||||||
NvmeCQueue *cq = n->cq[sq->cqid];
|
|
||||||
|
|
||||||
trace_pci_nvme_rw_cb(nvme_cid(req));
|
BlockBackend *blk = n->conf.blk;
|
||||||
|
BlockAcctCookie *acct = &req->acct;
|
||||||
|
BlockAcctStats *stats = blk_get_stats(blk);
|
||||||
|
|
||||||
|
Error *local_err = NULL;
|
||||||
|
|
||||||
|
trace_pci_nvme_rw_cb(nvme_cid(req), blk_name(blk));
|
||||||
|
|
||||||
if (!ret) {
|
if (!ret) {
|
||||||
block_acct_done(blk_get_stats(n->conf.blk), &req->acct);
|
block_acct_done(stats, acct);
|
||||||
req->status = NVME_SUCCESS;
|
req->status = NVME_SUCCESS;
|
||||||
} else {
|
} else {
|
||||||
block_acct_failed(blk_get_stats(n->conf.blk), &req->acct);
|
uint16_t status;
|
||||||
req->status = NVME_INTERNAL_DEV_ERROR;
|
|
||||||
|
block_acct_failed(stats, acct);
|
||||||
|
|
||||||
|
switch (req->cmd.opcode) {
|
||||||
|
case NVME_CMD_READ:
|
||||||
|
status = NVME_UNRECOVERED_READ;
|
||||||
|
break;
|
||||||
|
case NVME_CMD_FLUSH:
|
||||||
|
case NVME_CMD_WRITE:
|
||||||
|
case NVME_CMD_WRITE_ZEROES:
|
||||||
|
status = NVME_WRITE_FAULT;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
status = NVME_INTERNAL_DEV_ERROR;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
trace_pci_nvme_err_aio(nvme_cid(req), strerror(ret), status);
|
||||||
|
|
||||||
|
error_setg_errno(&local_err, -ret, "aio failed");
|
||||||
|
error_report_err(local_err);
|
||||||
|
|
||||||
|
req->status = status;
|
||||||
}
|
}
|
||||||
|
|
||||||
nvme_enqueue_req_completion(cq, req);
|
nvme_enqueue_req_completion(nvme_cq(req), req);
|
||||||
|
}
|
||||||
|
|
||||||
|
static uint16_t nvme_do_aio(BlockBackend *blk, int64_t offset, size_t len,
|
||||||
|
NvmeRequest *req)
|
||||||
|
{
|
||||||
|
BlockAcctCookie *acct = &req->acct;
|
||||||
|
BlockAcctStats *stats = blk_get_stats(blk);
|
||||||
|
|
||||||
|
bool is_write = false;
|
||||||
|
|
||||||
|
trace_pci_nvme_do_aio(nvme_cid(req), req->cmd.opcode,
|
||||||
|
nvme_io_opc_str(req->cmd.opcode), blk_name(blk),
|
||||||
|
offset, len);
|
||||||
|
|
||||||
|
switch (req->cmd.opcode) {
|
||||||
|
case NVME_CMD_FLUSH:
|
||||||
|
block_acct_start(stats, acct, 0, BLOCK_ACCT_FLUSH);
|
||||||
|
req->aiocb = blk_aio_flush(blk, nvme_rw_cb, req);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case NVME_CMD_WRITE_ZEROES:
|
||||||
|
block_acct_start(stats, acct, len, BLOCK_ACCT_WRITE);
|
||||||
|
req->aiocb = blk_aio_pwrite_zeroes(blk, offset, len,
|
||||||
|
BDRV_REQ_MAY_UNMAP, nvme_rw_cb,
|
||||||
|
req);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case NVME_CMD_WRITE:
|
||||||
|
is_write = true;
|
||||||
|
|
||||||
|
/* fallthrough */
|
||||||
|
|
||||||
|
case NVME_CMD_READ:
|
||||||
|
block_acct_start(stats, acct, len,
|
||||||
|
is_write ? BLOCK_ACCT_WRITE : BLOCK_ACCT_READ);
|
||||||
|
|
||||||
|
if (req->qsg.sg) {
|
||||||
|
if (is_write) {
|
||||||
|
req->aiocb = dma_blk_write(blk, &req->qsg, offset,
|
||||||
|
BDRV_SECTOR_SIZE, nvme_rw_cb, req);
|
||||||
|
} else {
|
||||||
|
req->aiocb = dma_blk_read(blk, &req->qsg, offset,
|
||||||
|
BDRV_SECTOR_SIZE, nvme_rw_cb, req);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (is_write) {
|
||||||
|
req->aiocb = blk_aio_pwritev(blk, offset, &req->iov, 0,
|
||||||
|
nvme_rw_cb, req);
|
||||||
|
} else {
|
||||||
|
req->aiocb = blk_aio_preadv(blk, offset, &req->iov, 0,
|
||||||
|
nvme_rw_cb, req);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
return NVME_NO_COMPLETE;
|
||||||
}
|
}
|
||||||
|
|
||||||
static uint16_t nvme_flush(NvmeCtrl *n, NvmeRequest *req)
|
static uint16_t nvme_flush(NvmeCtrl *n, NvmeRequest *req)
|
||||||
{
|
{
|
||||||
block_acct_start(blk_get_stats(n->conf.blk), &req->acct, 0,
|
return nvme_do_aio(n->conf.blk, 0, 0, req);
|
||||||
BLOCK_ACCT_FLUSH);
|
|
||||||
req->aiocb = blk_aio_flush(n->conf.blk, nvme_rw_cb, req);
|
|
||||||
|
|
||||||
return NVME_NO_COMPLETE;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static uint16_t nvme_write_zeroes(NvmeCtrl *n, NvmeRequest *req)
|
static uint16_t nvme_write_zeroes(NvmeCtrl *n, NvmeRequest *req)
|
||||||
@ -658,11 +738,7 @@ static uint16_t nvme_write_zeroes(NvmeCtrl *n, NvmeRequest *req)
|
|||||||
return status;
|
return status;
|
||||||
}
|
}
|
||||||
|
|
||||||
block_acct_start(blk_get_stats(n->conf.blk), &req->acct, 0,
|
return nvme_do_aio(n->conf.blk, offset, count, req);
|
||||||
BLOCK_ACCT_WRITE);
|
|
||||||
req->aiocb = blk_aio_pwrite_zeroes(n->conf.blk, offset, count,
|
|
||||||
BDRV_REQ_MAY_UNMAP, nvme_rw_cb, req);
|
|
||||||
return NVME_NO_COMPLETE;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static uint16_t nvme_rw(NvmeCtrl *n, NvmeRequest *req)
|
static uint16_t nvme_rw(NvmeCtrl *n, NvmeRequest *req)
|
||||||
@ -674,8 +750,8 @@ static uint16_t nvme_rw(NvmeCtrl *n, NvmeRequest *req)
|
|||||||
|
|
||||||
uint64_t data_size = nvme_l2b(ns, nlb);
|
uint64_t data_size = nvme_l2b(ns, nlb);
|
||||||
uint64_t data_offset = nvme_l2b(ns, slba);
|
uint64_t data_offset = nvme_l2b(ns, slba);
|
||||||
int is_write = rw->opcode == NVME_CMD_WRITE ? 1 : 0;
|
enum BlockAcctType acct = req->cmd.opcode == NVME_CMD_WRITE ?
|
||||||
enum BlockAcctType acct = is_write ? BLOCK_ACCT_WRITE : BLOCK_ACCT_READ;
|
BLOCK_ACCT_WRITE : BLOCK_ACCT_READ;
|
||||||
uint16_t status;
|
uint16_t status;
|
||||||
|
|
||||||
trace_pci_nvme_rw(nvme_cid(req), nvme_io_opc_str(rw->opcode), nlb,
|
trace_pci_nvme_rw(nvme_cid(req), nvme_io_opc_str(rw->opcode), nlb,
|
||||||
@ -698,25 +774,7 @@ static uint16_t nvme_rw(NvmeCtrl *n, NvmeRequest *req)
|
|||||||
goto invalid;
|
goto invalid;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (req->qsg.nsg > 0) {
|
return nvme_do_aio(n->conf.blk, data_offset, data_size, req);
|
||||||
block_acct_start(blk_get_stats(n->conf.blk), &req->acct, req->qsg.size,
|
|
||||||
acct);
|
|
||||||
req->aiocb = is_write ?
|
|
||||||
dma_blk_write(n->conf.blk, &req->qsg, data_offset, BDRV_SECTOR_SIZE,
|
|
||||||
nvme_rw_cb, req) :
|
|
||||||
dma_blk_read(n->conf.blk, &req->qsg, data_offset, BDRV_SECTOR_SIZE,
|
|
||||||
nvme_rw_cb, req);
|
|
||||||
} else {
|
|
||||||
block_acct_start(blk_get_stats(n->conf.blk), &req->acct, req->iov.size,
|
|
||||||
acct);
|
|
||||||
req->aiocb = is_write ?
|
|
||||||
blk_aio_pwritev(n->conf.blk, data_offset, &req->iov, 0, nvme_rw_cb,
|
|
||||||
req) :
|
|
||||||
blk_aio_preadv(n->conf.blk, data_offset, &req->iov, 0, nvme_rw_cb,
|
|
||||||
req);
|
|
||||||
}
|
|
||||||
|
|
||||||
return NVME_NO_COMPLETE;
|
|
||||||
|
|
||||||
invalid:
|
invalid:
|
||||||
block_acct_invalid(blk_get_stats(n->conf.blk), acct);
|
block_acct_invalid(blk_get_stats(n->conf.blk), acct);
|
||||||
|
@ -171,4 +171,18 @@ static inline uint64_t nvme_ns_nlbas(NvmeCtrl *n, NvmeNamespace *ns)
|
|||||||
return n->ns_size >> nvme_ns_lbads(ns);
|
return n->ns_size >> nvme_ns_lbads(ns);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline NvmeCQueue *nvme_cq(NvmeRequest *req)
|
||||||
|
{
|
||||||
|
NvmeSQueue *sq = req->sq;
|
||||||
|
NvmeCtrl *n = sq->ctrl;
|
||||||
|
|
||||||
|
return n->cq[sq->cqid];
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline NvmeCtrl *nvme_ctrl(NvmeRequest *req)
|
||||||
|
{
|
||||||
|
NvmeSQueue *sq = req->sq;
|
||||||
|
return sq->ctrl;
|
||||||
|
}
|
||||||
|
|
||||||
#endif /* HW_NVME_H */
|
#endif /* HW_NVME_H */
|
||||||
|
@ -39,8 +39,9 @@ pci_nvme_map_prp(uint64_t trans_len, uint32_t len, uint64_t prp1, uint64_t prp2,
|
|||||||
pci_nvme_io_cmd(uint16_t cid, uint32_t nsid, uint16_t sqid, uint8_t opcode, const char *opname) "cid %"PRIu16" nsid %"PRIu32" sqid %"PRIu16" opc 0x%"PRIx8" opname '%s'"
|
pci_nvme_io_cmd(uint16_t cid, uint32_t nsid, uint16_t sqid, uint8_t opcode, const char *opname) "cid %"PRIu16" nsid %"PRIu32" sqid %"PRIu16" opc 0x%"PRIx8" opname '%s'"
|
||||||
pci_nvme_admin_cmd(uint16_t cid, uint16_t sqid, uint8_t opcode, const char *opname) "cid %"PRIu16" sqid %"PRIu16" opc 0x%"PRIx8" opname '%s'"
|
pci_nvme_admin_cmd(uint16_t cid, uint16_t sqid, uint8_t opcode, const char *opname) "cid %"PRIu16" sqid %"PRIu16" opc 0x%"PRIx8" opname '%s'"
|
||||||
pci_nvme_rw(uint16_t cid, const char *verb, uint32_t nlb, uint64_t count, uint64_t lba) "cid %"PRIu16" '%s' nlb %"PRIu32" count %"PRIu64" lba 0x%"PRIx64""
|
pci_nvme_rw(uint16_t cid, const char *verb, uint32_t nlb, uint64_t count, uint64_t lba) "cid %"PRIu16" '%s' nlb %"PRIu32" count %"PRIu64" lba 0x%"PRIx64""
|
||||||
pci_nvme_rw_cb(uint16_t cid) "cid %"PRIu16""
|
pci_nvme_rw_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'"
|
||||||
pci_nvme_write_zeroes(uint16_t cid, uint64_t slba, uint32_t nlb) "cid %"PRIu16" slba %"PRIu64" nlb %"PRIu32""
|
pci_nvme_write_zeroes(uint16_t cid, uint64_t slba, uint32_t nlb) "cid %"PRIu16" slba %"PRIu64" nlb %"PRIu32""
|
||||||
|
pci_nvme_do_aio(uint16_t cid, uint8_t opc, const char *opname, const char *blkname, int64_t offset, size_t len) "cid %"PRIu16" opc 0x%"PRIx8" opname '%s' blk '%s' offset %"PRId64" len %zu"
|
||||||
pci_nvme_create_sq(uint64_t addr, uint16_t sqid, uint16_t cqid, uint16_t qsize, uint16_t qflags) "create submission queue, addr=0x%"PRIx64", sqid=%"PRIu16", cqid=%"PRIu16", qsize=%"PRIu16", qflags=%"PRIu16""
|
pci_nvme_create_sq(uint64_t addr, uint16_t sqid, uint16_t cqid, uint16_t qsize, uint16_t qflags) "create submission queue, addr=0x%"PRIx64", sqid=%"PRIu16", cqid=%"PRIu16", qsize=%"PRIu16", qflags=%"PRIu16""
|
||||||
pci_nvme_create_cq(uint64_t addr, uint16_t cqid, uint16_t vector, uint16_t size, uint16_t qflags, int ien) "create completion queue, addr=0x%"PRIx64", cqid=%"PRIu16", vector=%"PRIu16", qsize=%"PRIu16", qflags=%"PRIu16", ien=%d"
|
pci_nvme_create_cq(uint64_t addr, uint16_t cqid, uint16_t vector, uint16_t size, uint16_t qflags, int ien) "create completion queue, addr=0x%"PRIx64", cqid=%"PRIu16", vector=%"PRIu16", qsize=%"PRIu16", qflags=%"PRIu16", ien=%d"
|
||||||
pci_nvme_del_sq(uint16_t qid) "deleting submission queue sqid=%"PRIu16""
|
pci_nvme_del_sq(uint16_t qid) "deleting submission queue sqid=%"PRIu16""
|
||||||
@ -89,6 +90,7 @@ pci_nvme_err_mdts(uint16_t cid, size_t len) "cid %"PRIu16" len %zu"
|
|||||||
pci_nvme_err_addr_read(uint64_t addr) "addr 0x%"PRIx64""
|
pci_nvme_err_addr_read(uint64_t addr) "addr 0x%"PRIx64""
|
||||||
pci_nvme_err_addr_write(uint64_t addr) "addr 0x%"PRIx64""
|
pci_nvme_err_addr_write(uint64_t addr) "addr 0x%"PRIx64""
|
||||||
pci_nvme_err_cfs(void) "controller fatal status"
|
pci_nvme_err_cfs(void) "controller fatal status"
|
||||||
|
pci_nvme_err_aio(uint16_t cid, const char *errname, uint16_t status) "cid %"PRIu16" err '%s' status 0x%"PRIx16""
|
||||||
pci_nvme_err_invalid_dma(void) "PRP/SGL is too small for transfer size"
|
pci_nvme_err_invalid_dma(void) "PRP/SGL is too small for transfer size"
|
||||||
pci_nvme_err_invalid_prplist_ent(uint64_t prplist) "PRP list entry is null or not page aligned: 0x%"PRIx64""
|
pci_nvme_err_invalid_prplist_ent(uint64_t prplist) "PRP list entry is null or not page aligned: 0x%"PRIx64""
|
||||||
pci_nvme_err_invalid_prp2_align(uint64_t prp2) "PRP2 is not page aligned: 0x%"PRIx64""
|
pci_nvme_err_invalid_prp2_align(uint64_t prp2) "PRP2 is not page aligned: 0x%"PRIx64""
|
||||||
|
Loading…
Reference in New Issue
Block a user