hw/block/nvme: try to deal with the iov/qsg duality
Introduce NvmeSg and try to deal with that pesky qsg/iov duality that haunts all the memory-related functions. Signed-off-by: Klaus Jensen <k.jensen@samsung.com> Reviewed-by: Keith Busch <kbusch@kernel.org>
This commit is contained in:
parent
569dbe19c4
commit
f80a1c331a
191
hw/block/nvme.c
191
hw/block/nvme.c
@ -441,15 +441,31 @@ static void nvme_req_clear(NvmeRequest *req)
|
||||
req->status = NVME_SUCCESS;
|
||||
}
|
||||
|
||||
static void nvme_req_exit(NvmeRequest *req)
|
||||
static inline void nvme_sg_init(NvmeCtrl *n, NvmeSg *sg, bool dma)
|
||||
{
|
||||
if (req->qsg.sg) {
|
||||
qemu_sglist_destroy(&req->qsg);
|
||||
if (dma) {
|
||||
pci_dma_sglist_init(&sg->qsg, &n->parent_obj, 0);
|
||||
sg->flags = NVME_SG_DMA;
|
||||
} else {
|
||||
qemu_iovec_init(&sg->iov, 0);
|
||||
}
|
||||
|
||||
if (req->iov.iov) {
|
||||
qemu_iovec_destroy(&req->iov);
|
||||
sg->flags |= NVME_SG_ALLOC;
|
||||
}
|
||||
|
||||
static inline void nvme_sg_unmap(NvmeSg *sg)
|
||||
{
|
||||
if (!(sg->flags & NVME_SG_ALLOC)) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (sg->flags & NVME_SG_DMA) {
|
||||
qemu_sglist_destroy(&sg->qsg);
|
||||
} else {
|
||||
qemu_iovec_destroy(&sg->iov);
|
||||
}
|
||||
|
||||
memset(sg, 0x0, sizeof(*sg));
|
||||
}
|
||||
|
||||
static uint16_t nvme_map_addr_cmb(NvmeCtrl *n, QEMUIOVector *iov, hwaddr addr,
|
||||
@ -486,8 +502,7 @@ static uint16_t nvme_map_addr_pmr(NvmeCtrl *n, QEMUIOVector *iov, hwaddr addr,
|
||||
return NVME_SUCCESS;
|
||||
}
|
||||
|
||||
static uint16_t nvme_map_addr(NvmeCtrl *n, QEMUSGList *qsg, QEMUIOVector *iov,
|
||||
hwaddr addr, size_t len)
|
||||
static uint16_t nvme_map_addr(NvmeCtrl *n, NvmeSg *sg, hwaddr addr, size_t len)
|
||||
{
|
||||
bool cmb = false, pmr = false;
|
||||
|
||||
@ -504,38 +519,31 @@ static uint16_t nvme_map_addr(NvmeCtrl *n, QEMUSGList *qsg, QEMUIOVector *iov,
|
||||
}
|
||||
|
||||
if (cmb || pmr) {
|
||||
if (qsg && qsg->sg) {
|
||||
if (sg->flags & NVME_SG_DMA) {
|
||||
return NVME_INVALID_USE_OF_CMB | NVME_DNR;
|
||||
}
|
||||
|
||||
assert(iov);
|
||||
|
||||
if (!iov->iov) {
|
||||
qemu_iovec_init(iov, 1);
|
||||
}
|
||||
|
||||
if (cmb) {
|
||||
return nvme_map_addr_cmb(n, iov, addr, len);
|
||||
return nvme_map_addr_cmb(n, &sg->iov, addr, len);
|
||||
} else {
|
||||
return nvme_map_addr_pmr(n, iov, addr, len);
|
||||
return nvme_map_addr_pmr(n, &sg->iov, addr, len);
|
||||
}
|
||||
}
|
||||
|
||||
if (iov && iov->iov) {
|
||||
if (!(sg->flags & NVME_SG_DMA)) {
|
||||
return NVME_INVALID_USE_OF_CMB | NVME_DNR;
|
||||
}
|
||||
|
||||
assert(qsg);
|
||||
|
||||
if (!qsg->sg) {
|
||||
pci_dma_sglist_init(qsg, &n->parent_obj, 1);
|
||||
}
|
||||
|
||||
qemu_sglist_add(qsg, addr, len);
|
||||
qemu_sglist_add(&sg->qsg, addr, len);
|
||||
|
||||
return NVME_SUCCESS;
|
||||
}
|
||||
|
||||
static inline bool nvme_addr_is_dma(NvmeCtrl *n, hwaddr addr)
|
||||
{
|
||||
return !(nvme_addr_is_cmb(n, addr) || nvme_addr_is_pmr(n, addr));
|
||||
}
|
||||
|
||||
static uint16_t nvme_map_prp(NvmeCtrl *n, uint64_t prp1, uint64_t prp2,
|
||||
uint32_t len, NvmeRequest *req)
|
||||
{
|
||||
@ -545,20 +553,13 @@ static uint16_t nvme_map_prp(NvmeCtrl *n, uint64_t prp1, uint64_t prp2,
|
||||
uint16_t status;
|
||||
int ret;
|
||||
|
||||
QEMUSGList *qsg = &req->qsg;
|
||||
QEMUIOVector *iov = &req->iov;
|
||||
|
||||
trace_pci_nvme_map_prp(trans_len, len, prp1, prp2, num_prps);
|
||||
|
||||
if (nvme_addr_is_cmb(n, prp1) || (nvme_addr_is_pmr(n, prp1))) {
|
||||
qemu_iovec_init(iov, num_prps);
|
||||
} else {
|
||||
pci_dma_sglist_init(qsg, &n->parent_obj, num_prps);
|
||||
}
|
||||
nvme_sg_init(n, &req->sg, nvme_addr_is_dma(n, prp1));
|
||||
|
||||
status = nvme_map_addr(n, qsg, iov, prp1, trans_len);
|
||||
status = nvme_map_addr(n, &req->sg, prp1, trans_len);
|
||||
if (status) {
|
||||
return status;
|
||||
goto unmap;
|
||||
}
|
||||
|
||||
len -= trans_len;
|
||||
@ -573,7 +574,8 @@ static uint16_t nvme_map_prp(NvmeCtrl *n, uint64_t prp1, uint64_t prp2,
|
||||
ret = nvme_addr_read(n, prp2, (void *)prp_list, prp_trans);
|
||||
if (ret) {
|
||||
trace_pci_nvme_err_addr_read(prp2);
|
||||
return NVME_DATA_TRAS_ERROR;
|
||||
status = NVME_DATA_TRAS_ERROR;
|
||||
goto unmap;
|
||||
}
|
||||
while (len != 0) {
|
||||
uint64_t prp_ent = le64_to_cpu(prp_list[i]);
|
||||
@ -581,7 +583,8 @@ static uint16_t nvme_map_prp(NvmeCtrl *n, uint64_t prp1, uint64_t prp2,
|
||||
if (i == n->max_prp_ents - 1 && len > n->page_size) {
|
||||
if (unlikely(prp_ent & (n->page_size - 1))) {
|
||||
trace_pci_nvme_err_invalid_prplist_ent(prp_ent);
|
||||
return NVME_INVALID_PRP_OFFSET | NVME_DNR;
|
||||
status = NVME_INVALID_PRP_OFFSET | NVME_DNR;
|
||||
goto unmap;
|
||||
}
|
||||
|
||||
i = 0;
|
||||
@ -591,20 +594,22 @@ static uint16_t nvme_map_prp(NvmeCtrl *n, uint64_t prp1, uint64_t prp2,
|
||||
prp_trans);
|
||||
if (ret) {
|
||||
trace_pci_nvme_err_addr_read(prp_ent);
|
||||
return NVME_DATA_TRAS_ERROR;
|
||||
status = NVME_DATA_TRAS_ERROR;
|
||||
goto unmap;
|
||||
}
|
||||
prp_ent = le64_to_cpu(prp_list[i]);
|
||||
}
|
||||
|
||||
if (unlikely(prp_ent & (n->page_size - 1))) {
|
||||
trace_pci_nvme_err_invalid_prplist_ent(prp_ent);
|
||||
return NVME_INVALID_PRP_OFFSET | NVME_DNR;
|
||||
status = NVME_INVALID_PRP_OFFSET | NVME_DNR;
|
||||
goto unmap;
|
||||
}
|
||||
|
||||
trans_len = MIN(len, n->page_size);
|
||||
status = nvme_map_addr(n, qsg, iov, prp_ent, trans_len);
|
||||
status = nvme_map_addr(n, &req->sg, prp_ent, trans_len);
|
||||
if (status) {
|
||||
return status;
|
||||
goto unmap;
|
||||
}
|
||||
|
||||
len -= trans_len;
|
||||
@ -613,24 +618,28 @@ static uint16_t nvme_map_prp(NvmeCtrl *n, uint64_t prp1, uint64_t prp2,
|
||||
} else {
|
||||
if (unlikely(prp2 & (n->page_size - 1))) {
|
||||
trace_pci_nvme_err_invalid_prp2_align(prp2);
|
||||
return NVME_INVALID_PRP_OFFSET | NVME_DNR;
|
||||
status = NVME_INVALID_PRP_OFFSET | NVME_DNR;
|
||||
goto unmap;
|
||||
}
|
||||
status = nvme_map_addr(n, qsg, iov, prp2, len);
|
||||
status = nvme_map_addr(n, &req->sg, prp2, len);
|
||||
if (status) {
|
||||
return status;
|
||||
goto unmap;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return NVME_SUCCESS;
|
||||
|
||||
unmap:
|
||||
nvme_sg_unmap(&req->sg);
|
||||
return status;
|
||||
}
|
||||
|
||||
/*
|
||||
* Map 'nsgld' data descriptors from 'segment'. The function will subtract the
|
||||
* number of bytes mapped in len.
|
||||
*/
|
||||
static uint16_t nvme_map_sgl_data(NvmeCtrl *n, QEMUSGList *qsg,
|
||||
QEMUIOVector *iov,
|
||||
static uint16_t nvme_map_sgl_data(NvmeCtrl *n, NvmeSg *sg,
|
||||
NvmeSglDescriptor *segment, uint64_t nsgld,
|
||||
size_t *len, NvmeRequest *req)
|
||||
{
|
||||
@ -688,7 +697,7 @@ static uint16_t nvme_map_sgl_data(NvmeCtrl *n, QEMUSGList *qsg,
|
||||
return NVME_DATA_SGL_LEN_INVALID | NVME_DNR;
|
||||
}
|
||||
|
||||
status = nvme_map_addr(n, qsg, iov, addr, trans_len);
|
||||
status = nvme_map_addr(n, sg, addr, trans_len);
|
||||
if (status) {
|
||||
return status;
|
||||
}
|
||||
@ -700,9 +709,8 @@ next:
|
||||
return NVME_SUCCESS;
|
||||
}
|
||||
|
||||
static uint16_t nvme_map_sgl(NvmeCtrl *n, QEMUSGList *qsg, QEMUIOVector *iov,
|
||||
NvmeSglDescriptor sgl, size_t len,
|
||||
NvmeRequest *req)
|
||||
static uint16_t nvme_map_sgl(NvmeCtrl *n, NvmeSg *sg, NvmeSglDescriptor sgl,
|
||||
size_t len, NvmeRequest *req)
|
||||
{
|
||||
/*
|
||||
* Read the segment in chunks of 256 descriptors (one 4k page) to avoid
|
||||
@ -725,12 +733,14 @@ static uint16_t nvme_map_sgl(NvmeCtrl *n, QEMUSGList *qsg, QEMUIOVector *iov,
|
||||
|
||||
trace_pci_nvme_map_sgl(nvme_cid(req), NVME_SGL_TYPE(sgl.type), len);
|
||||
|
||||
nvme_sg_init(n, sg, nvme_addr_is_dma(n, addr));
|
||||
|
||||
/*
|
||||
* If the entire transfer can be described with a single data block it can
|
||||
* be mapped directly.
|
||||
*/
|
||||
if (NVME_SGL_TYPE(sgl.type) == NVME_SGL_DESCR_TYPE_DATA_BLOCK) {
|
||||
status = nvme_map_sgl_data(n, qsg, iov, sgld, 1, &len, req);
|
||||
status = nvme_map_sgl_data(n, sg, sgld, 1, &len, req);
|
||||
if (status) {
|
||||
goto unmap;
|
||||
}
|
||||
@ -768,7 +778,7 @@ static uint16_t nvme_map_sgl(NvmeCtrl *n, QEMUSGList *qsg, QEMUIOVector *iov,
|
||||
goto unmap;
|
||||
}
|
||||
|
||||
status = nvme_map_sgl_data(n, qsg, iov, segment, SEG_CHUNK_SIZE,
|
||||
status = nvme_map_sgl_data(n, sg, segment, SEG_CHUNK_SIZE,
|
||||
&len, req);
|
||||
if (status) {
|
||||
goto unmap;
|
||||
@ -795,7 +805,7 @@ static uint16_t nvme_map_sgl(NvmeCtrl *n, QEMUSGList *qsg, QEMUIOVector *iov,
|
||||
switch (NVME_SGL_TYPE(last_sgld->type)) {
|
||||
case NVME_SGL_DESCR_TYPE_DATA_BLOCK:
|
||||
case NVME_SGL_DESCR_TYPE_BIT_BUCKET:
|
||||
status = nvme_map_sgl_data(n, qsg, iov, segment, nsgld, &len, req);
|
||||
status = nvme_map_sgl_data(n, sg, segment, nsgld, &len, req);
|
||||
if (status) {
|
||||
goto unmap;
|
||||
}
|
||||
@ -822,7 +832,7 @@ static uint16_t nvme_map_sgl(NvmeCtrl *n, QEMUSGList *qsg, QEMUIOVector *iov,
|
||||
* Do not map the last descriptor; it will be a Segment or Last Segment
|
||||
* descriptor and is handled by the next iteration.
|
||||
*/
|
||||
status = nvme_map_sgl_data(n, qsg, iov, segment, nsgld - 1, &len, req);
|
||||
status = nvme_map_sgl_data(n, sg, segment, nsgld - 1, &len, req);
|
||||
if (status) {
|
||||
goto unmap;
|
||||
}
|
||||
@ -838,14 +848,7 @@ out:
|
||||
return NVME_SUCCESS;
|
||||
|
||||
unmap:
|
||||
if (iov->iov) {
|
||||
qemu_iovec_destroy(iov);
|
||||
}
|
||||
|
||||
if (qsg->sg) {
|
||||
qemu_sglist_destroy(qsg);
|
||||
}
|
||||
|
||||
nvme_sg_unmap(sg);
|
||||
return status;
|
||||
}
|
||||
|
||||
@ -866,8 +869,7 @@ static uint16_t nvme_map_dptr(NvmeCtrl *n, size_t len, NvmeRequest *req)
|
||||
return NVME_INVALID_FIELD | NVME_DNR;
|
||||
}
|
||||
|
||||
return nvme_map_sgl(n, &req->qsg, &req->iov, req->cmd.dptr.sgl, len,
|
||||
req);
|
||||
return nvme_map_sgl(n, &req->sg, req->cmd.dptr.sgl, len, req);
|
||||
default:
|
||||
return NVME_INVALID_FIELD;
|
||||
}
|
||||
@ -883,16 +885,13 @@ static uint16_t nvme_dma(NvmeCtrl *n, uint8_t *ptr, uint32_t len,
|
||||
return status;
|
||||
}
|
||||
|
||||
/* assert that only one of qsg and iov carries data */
|
||||
assert((req->qsg.nsg > 0) != (req->iov.niov > 0));
|
||||
|
||||
if (req->qsg.nsg > 0) {
|
||||
if (req->sg.flags & NVME_SG_DMA) {
|
||||
uint64_t residual;
|
||||
|
||||
if (dir == DMA_DIRECTION_TO_DEVICE) {
|
||||
residual = dma_buf_write(ptr, len, &req->qsg);
|
||||
residual = dma_buf_write(ptr, len, &req->sg.qsg);
|
||||
} else {
|
||||
residual = dma_buf_read(ptr, len, &req->qsg);
|
||||
residual = dma_buf_read(ptr, len, &req->sg.qsg);
|
||||
}
|
||||
|
||||
if (unlikely(residual)) {
|
||||
@ -903,9 +902,9 @@ static uint16_t nvme_dma(NvmeCtrl *n, uint8_t *ptr, uint32_t len,
|
||||
size_t bytes;
|
||||
|
||||
if (dir == DMA_DIRECTION_TO_DEVICE) {
|
||||
bytes = qemu_iovec_to_buf(&req->iov, 0, ptr, len);
|
||||
bytes = qemu_iovec_to_buf(&req->sg.iov, 0, ptr, len);
|
||||
} else {
|
||||
bytes = qemu_iovec_from_buf(&req->iov, 0, ptr, len);
|
||||
bytes = qemu_iovec_from_buf(&req->sg.iov, 0, ptr, len);
|
||||
}
|
||||
|
||||
if (unlikely(bytes != len)) {
|
||||
@ -917,6 +916,32 @@ static uint16_t nvme_dma(NvmeCtrl *n, uint8_t *ptr, uint32_t len,
|
||||
return status;
|
||||
}
|
||||
|
||||
static inline void nvme_blk_read(BlockBackend *blk, int64_t offset,
|
||||
BlockCompletionFunc *cb, NvmeRequest *req)
|
||||
{
|
||||
assert(req->sg.flags & NVME_SG_ALLOC);
|
||||
|
||||
if (req->sg.flags & NVME_SG_DMA) {
|
||||
req->aiocb = dma_blk_read(blk, &req->sg.qsg, offset, BDRV_SECTOR_SIZE,
|
||||
cb, req);
|
||||
} else {
|
||||
req->aiocb = blk_aio_preadv(blk, offset, &req->sg.iov, 0, cb, req);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void nvme_blk_write(BlockBackend *blk, int64_t offset,
|
||||
BlockCompletionFunc *cb, NvmeRequest *req)
|
||||
{
|
||||
assert(req->sg.flags & NVME_SG_ALLOC);
|
||||
|
||||
if (req->sg.flags & NVME_SG_DMA) {
|
||||
req->aiocb = dma_blk_write(blk, &req->sg.qsg, offset, BDRV_SECTOR_SIZE,
|
||||
cb, req);
|
||||
} else {
|
||||
req->aiocb = blk_aio_pwritev(blk, offset, &req->sg.iov, 0, cb, req);
|
||||
}
|
||||
}
|
||||
|
||||
static void nvme_post_cqes(void *opaque)
|
||||
{
|
||||
NvmeCQueue *cq = opaque;
|
||||
@ -947,7 +972,7 @@ static void nvme_post_cqes(void *opaque)
|
||||
}
|
||||
QTAILQ_REMOVE(&cq->req_list, req, entry);
|
||||
nvme_inc_cq_tail(cq);
|
||||
nvme_req_exit(req);
|
||||
nvme_sg_unmap(&req->sg);
|
||||
QTAILQ_INSERT_TAIL(&sq->req_list, req, entry);
|
||||
}
|
||||
if (cq->tail != cq->head) {
|
||||
@ -1644,14 +1669,14 @@ static void nvme_copy_in_complete(NvmeRequest *req)
|
||||
zone->w_ptr += ctx->nlb;
|
||||
}
|
||||
|
||||
qemu_iovec_init(&req->iov, 1);
|
||||
qemu_iovec_add(&req->iov, ctx->bounce, nvme_l2b(ns, ctx->nlb));
|
||||
qemu_iovec_init(&req->sg.iov, 1);
|
||||
qemu_iovec_add(&req->sg.iov, ctx->bounce, nvme_l2b(ns, ctx->nlb));
|
||||
|
||||
block_acct_start(blk_get_stats(ns->blkconf.blk), &req->acct, 0,
|
||||
BLOCK_ACCT_WRITE);
|
||||
|
||||
req->aiocb = blk_aio_pwritev(ns->blkconf.blk, nvme_l2b(ns, sdlba),
|
||||
&req->iov, 0, nvme_copy_cb, req);
|
||||
&req->sg.iov, 0, nvme_copy_cb, req);
|
||||
|
||||
return;
|
||||
|
||||
@ -2087,13 +2112,7 @@ static uint16_t nvme_read(NvmeCtrl *n, NvmeRequest *req)
|
||||
|
||||
block_acct_start(blk_get_stats(blk), &req->acct, data_size,
|
||||
BLOCK_ACCT_READ);
|
||||
if (req->qsg.sg) {
|
||||
req->aiocb = dma_blk_read(blk, &req->qsg, data_offset,
|
||||
BDRV_SECTOR_SIZE, nvme_rw_cb, req);
|
||||
} else {
|
||||
req->aiocb = blk_aio_preadv(blk, data_offset, &req->iov, 0,
|
||||
nvme_rw_cb, req);
|
||||
}
|
||||
nvme_blk_read(blk, data_offset, nvme_rw_cb, req);
|
||||
return NVME_NO_COMPLETE;
|
||||
|
||||
invalid:
|
||||
@ -2173,13 +2192,7 @@ static uint16_t nvme_do_write(NvmeCtrl *n, NvmeRequest *req, bool append,
|
||||
|
||||
block_acct_start(blk_get_stats(blk), &req->acct, data_size,
|
||||
BLOCK_ACCT_WRITE);
|
||||
if (req->qsg.sg) {
|
||||
req->aiocb = dma_blk_write(blk, &req->qsg, data_offset,
|
||||
BDRV_SECTOR_SIZE, nvme_rw_cb, req);
|
||||
} else {
|
||||
req->aiocb = blk_aio_pwritev(blk, data_offset, &req->iov, 0,
|
||||
nvme_rw_cb, req);
|
||||
}
|
||||
nvme_blk_write(blk, data_offset, nvme_rw_cb, req);
|
||||
} else {
|
||||
req->aiocb = blk_aio_pwrite_zeroes(blk, data_offset, data_size,
|
||||
BDRV_REQ_MAY_UNMAP, nvme_rw_cb,
|
||||
|
@ -29,6 +29,20 @@ typedef struct NvmeAsyncEvent {
|
||||
NvmeAerResult result;
|
||||
} NvmeAsyncEvent;
|
||||
|
||||
enum {
|
||||
NVME_SG_ALLOC = 1 << 0,
|
||||
NVME_SG_DMA = 1 << 1,
|
||||
};
|
||||
|
||||
typedef struct NvmeSg {
|
||||
int flags;
|
||||
|
||||
union {
|
||||
QEMUSGList qsg;
|
||||
QEMUIOVector iov;
|
||||
};
|
||||
} NvmeSg;
|
||||
|
||||
typedef struct NvmeRequest {
|
||||
struct NvmeSQueue *sq;
|
||||
struct NvmeNamespace *ns;
|
||||
@ -38,8 +52,7 @@ typedef struct NvmeRequest {
|
||||
NvmeCqe cqe;
|
||||
NvmeCmd cmd;
|
||||
BlockAcctCookie acct;
|
||||
QEMUSGList qsg;
|
||||
QEMUIOVector iov;
|
||||
NvmeSg sg;
|
||||
QTAILQ_ENTRY(NvmeRequest)entry;
|
||||
} NvmeRequest;
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user