hw/nvme updates
performance improvements by Jinhao ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * shadow doorbells * ioeventfd plus some misc fixes (Darren, Niklas). -----BEGIN PGP SIGNATURE----- iQEzBAABCAAdFiEEUigzqnXi3OaiR2bATeGvMW1PDekFAmLRKGwACgkQTeGvMW1P Deki7Af9Hg0ltW9RyxzUtYB5hwaMpgrHHcViBoLK8mt7wa5hh5luFb1P3/+yltUG LU/cws93mq3jDy30dKnVa5+xugDmuEy470OxjJPCivLEpV6qpONulp+iHFIKim4N kPXX8K1R4XVTVvCFFpmub6GUCFZpXRVW9uPAAL96BzaSjEK7K+5H3boJ7HfT5YUY Tx9LuPQUcIUHViF/4wNU0Sqx15PoOOjHqSnA3EjCDCscqPkbhaoEoyI5Pk+BMxzf tElNh/ffP5x0BSaKOofdtW+iHaxlSgPJ6IA0W9dwXJyRCvoaa9near2iGXDa6PEA bRpQpudzIkL3Swfgcm4D+N7NQbCSOg== =Wg5B -----END PGP SIGNATURE----- Merge tag 'nvme-next-pull-request' of git://git.infradead.org/qemu-nvme into staging hw/nvme updates performance improvements by Jinhao ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * shadow doorbells * ioeventfd plus some misc fixes (Darren, Niklas). # gpg: Signature made Fri 15 Jul 2022 09:42:20 BST # gpg: using RSA key 522833AA75E2DCE6A24766C04DE1AF316D4F0DE9 # gpg: Good signature from "Klaus Jensen <its@irrelevant.dk>" [unknown] # gpg: aka "Klaus Jensen <k.jensen@samsung.com>" [unknown] # gpg: WARNING: This key is not certified with a trusted signature! # gpg: There is no indication that the signature belongs to the owner. # Primary key fingerprint: DDCA 4D9C 9EF9 31CC 3468 4272 63D5 6FC5 E55D A838 # Subkey fingerprint: 5228 33AA 75E2 DCE6 A247 66C0 4DE1 AF31 6D4F 0DE9 * tag 'nvme-next-pull-request' of git://git.infradead.org/qemu-nvme: hw/nvme: Use ioeventfd to handle doorbell updates nvme: Fix misleading macro when mixed with ternary operator hw/nvme: force nvme-ns param 'shared' to false if no nvme-subsys node hw/nvme: fix example serial in documentation hw/nvme: Add trace events for shadow doorbell buffer hw/nvme: Implement shadow doorbell buffer support Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
commit
0ebf76aae5
@ -104,8 +104,8 @@ multipath I/O.
|
|||||||
.. code-block:: console
|
.. code-block:: console
|
||||||
|
|
||||||
-device nvme-subsys,id=nvme-subsys-0,nqn=subsys0
|
-device nvme-subsys,id=nvme-subsys-0,nqn=subsys0
|
||||||
-device nvme,serial=a,subsys=nvme-subsys-0
|
-device nvme,serial=deadbeef,subsys=nvme-subsys-0
|
||||||
-device nvme,serial=b,subsys=nvme-subsys-0
|
-device nvme,serial=deadbeef,subsys=nvme-subsys-0
|
||||||
|
|
||||||
This will create an NVM subsystem with two controllers. Having controllers
|
This will create an NVM subsystem with two controllers. Having controllers
|
||||||
linked to an ``nvme-subsys`` device allows additional ``nvme-ns`` parameters:
|
linked to an ``nvme-subsys`` device allows additional ``nvme-ns`` parameters:
|
||||||
|
233
hw/nvme/ctrl.c
233
hw/nvme/ctrl.c
@ -264,6 +264,7 @@ static const uint32_t nvme_cse_acs[256] = {
|
|||||||
[NVME_ADM_CMD_ASYNC_EV_REQ] = NVME_CMD_EFF_CSUPP,
|
[NVME_ADM_CMD_ASYNC_EV_REQ] = NVME_CMD_EFF_CSUPP,
|
||||||
[NVME_ADM_CMD_NS_ATTACHMENT] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_NIC,
|
[NVME_ADM_CMD_NS_ATTACHMENT] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_NIC,
|
||||||
[NVME_ADM_CMD_VIRT_MNGMT] = NVME_CMD_EFF_CSUPP,
|
[NVME_ADM_CMD_VIRT_MNGMT] = NVME_CMD_EFF_CSUPP,
|
||||||
|
[NVME_ADM_CMD_DBBUF_CONFIG] = NVME_CMD_EFF_CSUPP,
|
||||||
[NVME_ADM_CMD_FORMAT_NVM] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
|
[NVME_ADM_CMD_FORMAT_NVM] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -1330,6 +1331,13 @@ static inline void nvme_blk_write(BlockBackend *blk, int64_t offset,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void nvme_update_cq_head(NvmeCQueue *cq)
|
||||||
|
{
|
||||||
|
pci_dma_read(&cq->ctrl->parent_obj, cq->db_addr, &cq->head,
|
||||||
|
sizeof(cq->head));
|
||||||
|
trace_pci_nvme_shadow_doorbell_cq(cq->cqid, cq->head);
|
||||||
|
}
|
||||||
|
|
||||||
static void nvme_post_cqes(void *opaque)
|
static void nvme_post_cqes(void *opaque)
|
||||||
{
|
{
|
||||||
NvmeCQueue *cq = opaque;
|
NvmeCQueue *cq = opaque;
|
||||||
@ -1342,6 +1350,10 @@ static void nvme_post_cqes(void *opaque)
|
|||||||
NvmeSQueue *sq;
|
NvmeSQueue *sq;
|
||||||
hwaddr addr;
|
hwaddr addr;
|
||||||
|
|
||||||
|
if (n->dbbuf_enabled) {
|
||||||
|
nvme_update_cq_head(cq);
|
||||||
|
}
|
||||||
|
|
||||||
if (nvme_cq_full(cq)) {
|
if (nvme_cq_full(cq)) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -1388,7 +1400,14 @@ static void nvme_enqueue_req_completion(NvmeCQueue *cq, NvmeRequest *req)
|
|||||||
|
|
||||||
QTAILQ_REMOVE(&req->sq->out_req_list, req, entry);
|
QTAILQ_REMOVE(&req->sq->out_req_list, req, entry);
|
||||||
QTAILQ_INSERT_TAIL(&cq->req_list, req, entry);
|
QTAILQ_INSERT_TAIL(&cq->req_list, req, entry);
|
||||||
timer_mod(cq->timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 500);
|
|
||||||
|
if (req->sq->ioeventfd_enabled) {
|
||||||
|
/* Post CQE directly since we are in main loop thread */
|
||||||
|
nvme_post_cqes(cq);
|
||||||
|
} else {
|
||||||
|
/* Schedule the timer to post CQE later since we are in vcpu thread */
|
||||||
|
timer_mod(cq->timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 500);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void nvme_process_aers(void *opaque)
|
static void nvme_process_aers(void *opaque)
|
||||||
@ -4214,10 +4233,82 @@ static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeRequest *req)
|
|||||||
return NVME_INVALID_OPCODE | NVME_DNR;
|
return NVME_INVALID_OPCODE | NVME_DNR;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void nvme_cq_notifier(EventNotifier *e)
|
||||||
|
{
|
||||||
|
NvmeCQueue *cq = container_of(e, NvmeCQueue, notifier);
|
||||||
|
NvmeCtrl *n = cq->ctrl;
|
||||||
|
|
||||||
|
event_notifier_test_and_clear(&cq->notifier);
|
||||||
|
|
||||||
|
nvme_update_cq_head(cq);
|
||||||
|
|
||||||
|
if (cq->tail == cq->head) {
|
||||||
|
if (cq->irq_enabled) {
|
||||||
|
n->cq_pending--;
|
||||||
|
}
|
||||||
|
|
||||||
|
nvme_irq_deassert(n, cq);
|
||||||
|
}
|
||||||
|
|
||||||
|
nvme_post_cqes(cq);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int nvme_init_cq_ioeventfd(NvmeCQueue *cq)
|
||||||
|
{
|
||||||
|
NvmeCtrl *n = cq->ctrl;
|
||||||
|
uint16_t offset = (cq->cqid << 3) + (1 << 2);
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
ret = event_notifier_init(&cq->notifier, 0);
|
||||||
|
if (ret < 0) {
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
event_notifier_set_handler(&cq->notifier, nvme_cq_notifier);
|
||||||
|
memory_region_add_eventfd(&n->iomem,
|
||||||
|
0x1000 + offset, 4, false, 0, &cq->notifier);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void nvme_sq_notifier(EventNotifier *e)
|
||||||
|
{
|
||||||
|
NvmeSQueue *sq = container_of(e, NvmeSQueue, notifier);
|
||||||
|
|
||||||
|
event_notifier_test_and_clear(&sq->notifier);
|
||||||
|
|
||||||
|
nvme_process_sq(sq);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int nvme_init_sq_ioeventfd(NvmeSQueue *sq)
|
||||||
|
{
|
||||||
|
NvmeCtrl *n = sq->ctrl;
|
||||||
|
uint16_t offset = sq->sqid << 3;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
ret = event_notifier_init(&sq->notifier, 0);
|
||||||
|
if (ret < 0) {
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
event_notifier_set_handler(&sq->notifier, nvme_sq_notifier);
|
||||||
|
memory_region_add_eventfd(&n->iomem,
|
||||||
|
0x1000 + offset, 4, false, 0, &sq->notifier);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
static void nvme_free_sq(NvmeSQueue *sq, NvmeCtrl *n)
|
static void nvme_free_sq(NvmeSQueue *sq, NvmeCtrl *n)
|
||||||
{
|
{
|
||||||
|
uint16_t offset = sq->sqid << 3;
|
||||||
|
|
||||||
n->sq[sq->sqid] = NULL;
|
n->sq[sq->sqid] = NULL;
|
||||||
timer_free(sq->timer);
|
timer_free(sq->timer);
|
||||||
|
if (sq->ioeventfd_enabled) {
|
||||||
|
memory_region_del_eventfd(&n->iomem,
|
||||||
|
0x1000 + offset, 4, false, 0, &sq->notifier);
|
||||||
|
event_notifier_cleanup(&sq->notifier);
|
||||||
|
}
|
||||||
g_free(sq->io_req);
|
g_free(sq->io_req);
|
||||||
if (sq->sqid) {
|
if (sq->sqid) {
|
||||||
g_free(sq);
|
g_free(sq);
|
||||||
@ -4287,6 +4378,17 @@ static void nvme_init_sq(NvmeSQueue *sq, NvmeCtrl *n, uint64_t dma_addr,
|
|||||||
}
|
}
|
||||||
sq->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, nvme_process_sq, sq);
|
sq->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, nvme_process_sq, sq);
|
||||||
|
|
||||||
|
if (n->dbbuf_enabled) {
|
||||||
|
sq->db_addr = n->dbbuf_dbs + (sqid << 3);
|
||||||
|
sq->ei_addr = n->dbbuf_eis + (sqid << 3);
|
||||||
|
|
||||||
|
if (n->params.ioeventfd && sq->sqid != 0) {
|
||||||
|
if (!nvme_init_sq_ioeventfd(sq)) {
|
||||||
|
sq->ioeventfd_enabled = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
assert(n->cq[cqid]);
|
assert(n->cq[cqid]);
|
||||||
cq = n->cq[cqid];
|
cq = n->cq[cqid];
|
||||||
QTAILQ_INSERT_TAIL(&(cq->sq_list), sq, entry);
|
QTAILQ_INSERT_TAIL(&(cq->sq_list), sq, entry);
|
||||||
@ -4588,8 +4690,15 @@ static uint16_t nvme_get_log(NvmeCtrl *n, NvmeRequest *req)
|
|||||||
|
|
||||||
static void nvme_free_cq(NvmeCQueue *cq, NvmeCtrl *n)
|
static void nvme_free_cq(NvmeCQueue *cq, NvmeCtrl *n)
|
||||||
{
|
{
|
||||||
|
uint16_t offset = (cq->cqid << 3) + (1 << 2);
|
||||||
|
|
||||||
n->cq[cq->cqid] = NULL;
|
n->cq[cq->cqid] = NULL;
|
||||||
timer_free(cq->timer);
|
timer_free(cq->timer);
|
||||||
|
if (cq->ioeventfd_enabled) {
|
||||||
|
memory_region_del_eventfd(&n->iomem,
|
||||||
|
0x1000 + offset, 4, false, 0, &cq->notifier);
|
||||||
|
event_notifier_cleanup(&cq->notifier);
|
||||||
|
}
|
||||||
if (msix_enabled(&n->parent_obj)) {
|
if (msix_enabled(&n->parent_obj)) {
|
||||||
msix_vector_unuse(&n->parent_obj, cq->vector);
|
msix_vector_unuse(&n->parent_obj, cq->vector);
|
||||||
}
|
}
|
||||||
@ -4645,6 +4754,16 @@ static void nvme_init_cq(NvmeCQueue *cq, NvmeCtrl *n, uint64_t dma_addr,
|
|||||||
cq->head = cq->tail = 0;
|
cq->head = cq->tail = 0;
|
||||||
QTAILQ_INIT(&cq->req_list);
|
QTAILQ_INIT(&cq->req_list);
|
||||||
QTAILQ_INIT(&cq->sq_list);
|
QTAILQ_INIT(&cq->sq_list);
|
||||||
|
if (n->dbbuf_enabled) {
|
||||||
|
cq->db_addr = n->dbbuf_dbs + (cqid << 3) + (1 << 2);
|
||||||
|
cq->ei_addr = n->dbbuf_eis + (cqid << 3) + (1 << 2);
|
||||||
|
|
||||||
|
if (n->params.ioeventfd && cqid != 0) {
|
||||||
|
if (!nvme_init_cq_ioeventfd(cq)) {
|
||||||
|
cq->ioeventfd_enabled = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
n->cq[cqid] = cq;
|
n->cq[cqid] = cq;
|
||||||
cq->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, nvme_post_cqes, cq);
|
cq->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, nvme_post_cqes, cq);
|
||||||
}
|
}
|
||||||
@ -5988,6 +6107,64 @@ static uint16_t nvme_virt_mngmt(NvmeCtrl *n, NvmeRequest *req)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static uint16_t nvme_dbbuf_config(NvmeCtrl *n, const NvmeRequest *req)
|
||||||
|
{
|
||||||
|
uint64_t dbs_addr = le64_to_cpu(req->cmd.dptr.prp1);
|
||||||
|
uint64_t eis_addr = le64_to_cpu(req->cmd.dptr.prp2);
|
||||||
|
int i;
|
||||||
|
|
||||||
|
/* Address should be page aligned */
|
||||||
|
if (dbs_addr & (n->page_size - 1) || eis_addr & (n->page_size - 1)) {
|
||||||
|
return NVME_INVALID_FIELD | NVME_DNR;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Save shadow buffer base addr for use during queue creation */
|
||||||
|
n->dbbuf_dbs = dbs_addr;
|
||||||
|
n->dbbuf_eis = eis_addr;
|
||||||
|
n->dbbuf_enabled = true;
|
||||||
|
|
||||||
|
for (i = 0; i < n->params.max_ioqpairs + 1; i++) {
|
||||||
|
NvmeSQueue *sq = n->sq[i];
|
||||||
|
NvmeCQueue *cq = n->cq[i];
|
||||||
|
|
||||||
|
if (sq) {
|
||||||
|
/*
|
||||||
|
* CAP.DSTRD is 0, so offset of ith sq db_addr is (i<<3)
|
||||||
|
* nvme_process_db() uses this hard-coded way to calculate
|
||||||
|
* doorbell offsets. Be consistent with that here.
|
||||||
|
*/
|
||||||
|
sq->db_addr = dbs_addr + (i << 3);
|
||||||
|
sq->ei_addr = eis_addr + (i << 3);
|
||||||
|
pci_dma_write(&n->parent_obj, sq->db_addr, &sq->tail,
|
||||||
|
sizeof(sq->tail));
|
||||||
|
|
||||||
|
if (n->params.ioeventfd && sq->sqid != 0) {
|
||||||
|
if (!nvme_init_sq_ioeventfd(sq)) {
|
||||||
|
sq->ioeventfd_enabled = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (cq) {
|
||||||
|
/* CAP.DSTRD is 0, so offset of ith cq db_addr is (i<<3)+(1<<2) */
|
||||||
|
cq->db_addr = dbs_addr + (i << 3) + (1 << 2);
|
||||||
|
cq->ei_addr = eis_addr + (i << 3) + (1 << 2);
|
||||||
|
pci_dma_write(&n->parent_obj, cq->db_addr, &cq->head,
|
||||||
|
sizeof(cq->head));
|
||||||
|
|
||||||
|
if (n->params.ioeventfd && cq->cqid != 0) {
|
||||||
|
if (!nvme_init_cq_ioeventfd(cq)) {
|
||||||
|
cq->ioeventfd_enabled = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
trace_pci_nvme_dbbuf_config(dbs_addr, eis_addr);
|
||||||
|
|
||||||
|
return NVME_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
static uint16_t nvme_admin_cmd(NvmeCtrl *n, NvmeRequest *req)
|
static uint16_t nvme_admin_cmd(NvmeCtrl *n, NvmeRequest *req)
|
||||||
{
|
{
|
||||||
trace_pci_nvme_admin_cmd(nvme_cid(req), nvme_sqid(req), req->cmd.opcode,
|
trace_pci_nvme_admin_cmd(nvme_cid(req), nvme_sqid(req), req->cmd.opcode,
|
||||||
@ -6032,6 +6209,8 @@ static uint16_t nvme_admin_cmd(NvmeCtrl *n, NvmeRequest *req)
|
|||||||
return nvme_ns_attachment(n, req);
|
return nvme_ns_attachment(n, req);
|
||||||
case NVME_ADM_CMD_VIRT_MNGMT:
|
case NVME_ADM_CMD_VIRT_MNGMT:
|
||||||
return nvme_virt_mngmt(n, req);
|
return nvme_virt_mngmt(n, req);
|
||||||
|
case NVME_ADM_CMD_DBBUF_CONFIG:
|
||||||
|
return nvme_dbbuf_config(n, req);
|
||||||
case NVME_ADM_CMD_FORMAT_NVM:
|
case NVME_ADM_CMD_FORMAT_NVM:
|
||||||
return nvme_format(n, req);
|
return nvme_format(n, req);
|
||||||
default:
|
default:
|
||||||
@ -6041,6 +6220,20 @@ static uint16_t nvme_admin_cmd(NvmeCtrl *n, NvmeRequest *req)
|
|||||||
return NVME_INVALID_OPCODE | NVME_DNR;
|
return NVME_INVALID_OPCODE | NVME_DNR;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void nvme_update_sq_eventidx(const NvmeSQueue *sq)
|
||||||
|
{
|
||||||
|
pci_dma_write(&sq->ctrl->parent_obj, sq->ei_addr, &sq->tail,
|
||||||
|
sizeof(sq->tail));
|
||||||
|
trace_pci_nvme_eventidx_sq(sq->sqid, sq->tail);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void nvme_update_sq_tail(NvmeSQueue *sq)
|
||||||
|
{
|
||||||
|
pci_dma_read(&sq->ctrl->parent_obj, sq->db_addr, &sq->tail,
|
||||||
|
sizeof(sq->tail));
|
||||||
|
trace_pci_nvme_shadow_doorbell_sq(sq->sqid, sq->tail);
|
||||||
|
}
|
||||||
|
|
||||||
static void nvme_process_sq(void *opaque)
|
static void nvme_process_sq(void *opaque)
|
||||||
{
|
{
|
||||||
NvmeSQueue *sq = opaque;
|
NvmeSQueue *sq = opaque;
|
||||||
@ -6052,6 +6245,10 @@ static void nvme_process_sq(void *opaque)
|
|||||||
NvmeCmd cmd;
|
NvmeCmd cmd;
|
||||||
NvmeRequest *req;
|
NvmeRequest *req;
|
||||||
|
|
||||||
|
if (n->dbbuf_enabled) {
|
||||||
|
nvme_update_sq_tail(sq);
|
||||||
|
}
|
||||||
|
|
||||||
while (!(nvme_sq_empty(sq) || QTAILQ_EMPTY(&sq->req_list))) {
|
while (!(nvme_sq_empty(sq) || QTAILQ_EMPTY(&sq->req_list))) {
|
||||||
addr = sq->dma_addr + sq->head * n->sqe_size;
|
addr = sq->dma_addr + sq->head * n->sqe_size;
|
||||||
if (nvme_addr_read(n, addr, (void *)&cmd, sizeof(cmd))) {
|
if (nvme_addr_read(n, addr, (void *)&cmd, sizeof(cmd))) {
|
||||||
@ -6075,6 +6272,11 @@ static void nvme_process_sq(void *opaque)
|
|||||||
req->status = status;
|
req->status = status;
|
||||||
nvme_enqueue_req_completion(cq, req);
|
nvme_enqueue_req_completion(cq, req);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (n->dbbuf_enabled) {
|
||||||
|
nvme_update_sq_eventidx(sq);
|
||||||
|
nvme_update_sq_tail(sq);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -6184,6 +6386,10 @@ static void nvme_ctrl_reset(NvmeCtrl *n, NvmeResetType rst)
|
|||||||
stl_le_p(&n->bar.intms, 0);
|
stl_le_p(&n->bar.intms, 0);
|
||||||
stl_le_p(&n->bar.intmc, 0);
|
stl_le_p(&n->bar.intmc, 0);
|
||||||
stl_le_p(&n->bar.cc, 0);
|
stl_le_p(&n->bar.cc, 0);
|
||||||
|
|
||||||
|
n->dbbuf_dbs = 0;
|
||||||
|
n->dbbuf_eis = 0;
|
||||||
|
n->dbbuf_enabled = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void nvme_ctrl_shutdown(NvmeCtrl *n)
|
static void nvme_ctrl_shutdown(NvmeCtrl *n)
|
||||||
@ -6694,6 +6900,10 @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val)
|
|||||||
|
|
||||||
start_sqs = nvme_cq_full(cq) ? 1 : 0;
|
start_sqs = nvme_cq_full(cq) ? 1 : 0;
|
||||||
cq->head = new_head;
|
cq->head = new_head;
|
||||||
|
if (!qid && n->dbbuf_enabled) {
|
||||||
|
pci_dma_write(&n->parent_obj, cq->db_addr, &cq->head,
|
||||||
|
sizeof(cq->head));
|
||||||
|
}
|
||||||
if (start_sqs) {
|
if (start_sqs) {
|
||||||
NvmeSQueue *sq;
|
NvmeSQueue *sq;
|
||||||
QTAILQ_FOREACH(sq, &cq->sq_list, entry) {
|
QTAILQ_FOREACH(sq, &cq->sq_list, entry) {
|
||||||
@ -6751,6 +6961,23 @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val)
|
|||||||
trace_pci_nvme_mmio_doorbell_sq(sq->sqid, new_tail);
|
trace_pci_nvme_mmio_doorbell_sq(sq->sqid, new_tail);
|
||||||
|
|
||||||
sq->tail = new_tail;
|
sq->tail = new_tail;
|
||||||
|
if (!qid && n->dbbuf_enabled) {
|
||||||
|
/*
|
||||||
|
* The spec states "the host shall also update the controller's
|
||||||
|
* corresponding doorbell property to match the value of that entry
|
||||||
|
* in the Shadow Doorbell buffer."
|
||||||
|
*
|
||||||
|
* Since this context is currently a VM trap, we can safely enforce
|
||||||
|
* the requirement from the device side in case the host is
|
||||||
|
* misbehaving.
|
||||||
|
*
|
||||||
|
* Note, we shouldn't have to do this, but various drivers
|
||||||
|
* including ones that run on Linux, are not updating Admin Queues,
|
||||||
|
* so we can't trust reading it for an appropriate sq tail.
|
||||||
|
*/
|
||||||
|
pci_dma_write(&n->parent_obj, sq->db_addr, &sq->tail,
|
||||||
|
sizeof(sq->tail));
|
||||||
|
}
|
||||||
timer_mod(sq->timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 500);
|
timer_mod(sq->timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 500);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -7231,7 +7458,8 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev)
|
|||||||
|
|
||||||
id->mdts = n->params.mdts;
|
id->mdts = n->params.mdts;
|
||||||
id->ver = cpu_to_le32(NVME_SPEC_VER);
|
id->ver = cpu_to_le32(NVME_SPEC_VER);
|
||||||
id->oacs = cpu_to_le16(NVME_OACS_NS_MGMT | NVME_OACS_FORMAT);
|
id->oacs =
|
||||||
|
cpu_to_le16(NVME_OACS_NS_MGMT | NVME_OACS_FORMAT | NVME_OACS_DBBUF);
|
||||||
id->cntrltype = 0x1;
|
id->cntrltype = 0x1;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -7436,6 +7664,7 @@ static Property nvme_props[] = {
|
|||||||
DEFINE_PROP_UINT8("vsl", NvmeCtrl, params.vsl, 7),
|
DEFINE_PROP_UINT8("vsl", NvmeCtrl, params.vsl, 7),
|
||||||
DEFINE_PROP_BOOL("use-intel-id", NvmeCtrl, params.use_intel_id, false),
|
DEFINE_PROP_BOOL("use-intel-id", NvmeCtrl, params.use_intel_id, false),
|
||||||
DEFINE_PROP_BOOL("legacy-cmb", NvmeCtrl, params.legacy_cmb, false),
|
DEFINE_PROP_BOOL("legacy-cmb", NvmeCtrl, params.legacy_cmb, false),
|
||||||
|
DEFINE_PROP_BOOL("ioeventfd", NvmeCtrl, params.ioeventfd, true),
|
||||||
DEFINE_PROP_UINT8("zoned.zasl", NvmeCtrl, params.zasl, 0),
|
DEFINE_PROP_UINT8("zoned.zasl", NvmeCtrl, params.zasl, 0),
|
||||||
DEFINE_PROP_BOOL("zoned.auto_transition", NvmeCtrl,
|
DEFINE_PROP_BOOL("zoned.auto_transition", NvmeCtrl,
|
||||||
params.auto_transition_zones, true),
|
params.auto_transition_zones, true),
|
||||||
|
@ -546,6 +546,8 @@ static void nvme_ns_realize(DeviceState *dev, Error **errp)
|
|||||||
int i;
|
int i;
|
||||||
|
|
||||||
if (!n->subsys) {
|
if (!n->subsys) {
|
||||||
|
/* If no subsys, the ns cannot be attached to more than one ctrl. */
|
||||||
|
ns->params.shared = false;
|
||||||
if (ns->params.detached) {
|
if (ns->params.detached) {
|
||||||
error_setg(errp, "detached requires that the nvme device is "
|
error_setg(errp, "detached requires that the nvme device is "
|
||||||
"linked to an nvme-subsys device");
|
"linked to an nvme-subsys device");
|
||||||
|
@ -341,6 +341,7 @@ static inline const char *nvme_adm_opc_str(uint8_t opc)
|
|||||||
case NVME_ADM_CMD_ASYNC_EV_REQ: return "NVME_ADM_CMD_ASYNC_EV_REQ";
|
case NVME_ADM_CMD_ASYNC_EV_REQ: return "NVME_ADM_CMD_ASYNC_EV_REQ";
|
||||||
case NVME_ADM_CMD_NS_ATTACHMENT: return "NVME_ADM_CMD_NS_ATTACHMENT";
|
case NVME_ADM_CMD_NS_ATTACHMENT: return "NVME_ADM_CMD_NS_ATTACHMENT";
|
||||||
case NVME_ADM_CMD_VIRT_MNGMT: return "NVME_ADM_CMD_VIRT_MNGMT";
|
case NVME_ADM_CMD_VIRT_MNGMT: return "NVME_ADM_CMD_VIRT_MNGMT";
|
||||||
|
case NVME_ADM_CMD_DBBUF_CONFIG: return "NVME_ADM_CMD_DBBUF_CONFIG";
|
||||||
case NVME_ADM_CMD_FORMAT_NVM: return "NVME_ADM_CMD_FORMAT_NVM";
|
case NVME_ADM_CMD_FORMAT_NVM: return "NVME_ADM_CMD_FORMAT_NVM";
|
||||||
default: return "NVME_ADM_CMD_UNKNOWN";
|
default: return "NVME_ADM_CMD_UNKNOWN";
|
||||||
}
|
}
|
||||||
@ -372,7 +373,11 @@ typedef struct NvmeSQueue {
|
|||||||
uint32_t tail;
|
uint32_t tail;
|
||||||
uint32_t size;
|
uint32_t size;
|
||||||
uint64_t dma_addr;
|
uint64_t dma_addr;
|
||||||
|
uint64_t db_addr;
|
||||||
|
uint64_t ei_addr;
|
||||||
QEMUTimer *timer;
|
QEMUTimer *timer;
|
||||||
|
EventNotifier notifier;
|
||||||
|
bool ioeventfd_enabled;
|
||||||
NvmeRequest *io_req;
|
NvmeRequest *io_req;
|
||||||
QTAILQ_HEAD(, NvmeRequest) req_list;
|
QTAILQ_HEAD(, NvmeRequest) req_list;
|
||||||
QTAILQ_HEAD(, NvmeRequest) out_req_list;
|
QTAILQ_HEAD(, NvmeRequest) out_req_list;
|
||||||
@ -389,7 +394,11 @@ typedef struct NvmeCQueue {
|
|||||||
uint32_t vector;
|
uint32_t vector;
|
||||||
uint32_t size;
|
uint32_t size;
|
||||||
uint64_t dma_addr;
|
uint64_t dma_addr;
|
||||||
|
uint64_t db_addr;
|
||||||
|
uint64_t ei_addr;
|
||||||
QEMUTimer *timer;
|
QEMUTimer *timer;
|
||||||
|
EventNotifier notifier;
|
||||||
|
bool ioeventfd_enabled;
|
||||||
QTAILQ_HEAD(, NvmeSQueue) sq_list;
|
QTAILQ_HEAD(, NvmeSQueue) sq_list;
|
||||||
QTAILQ_HEAD(, NvmeRequest) req_list;
|
QTAILQ_HEAD(, NvmeRequest) req_list;
|
||||||
} NvmeCQueue;
|
} NvmeCQueue;
|
||||||
@ -412,6 +421,7 @@ typedef struct NvmeParams {
|
|||||||
uint8_t zasl;
|
uint8_t zasl;
|
||||||
bool auto_transition_zones;
|
bool auto_transition_zones;
|
||||||
bool legacy_cmb;
|
bool legacy_cmb;
|
||||||
|
bool ioeventfd;
|
||||||
uint8_t sriov_max_vfs;
|
uint8_t sriov_max_vfs;
|
||||||
uint16_t sriov_vq_flexible;
|
uint16_t sriov_vq_flexible;
|
||||||
uint16_t sriov_vi_flexible;
|
uint16_t sriov_vi_flexible;
|
||||||
@ -445,6 +455,9 @@ typedef struct NvmeCtrl {
|
|||||||
uint8_t smart_critical_warning;
|
uint8_t smart_critical_warning;
|
||||||
uint32_t conf_msix_qsize;
|
uint32_t conf_msix_qsize;
|
||||||
uint32_t conf_ioqpairs;
|
uint32_t conf_ioqpairs;
|
||||||
|
uint64_t dbbuf_dbs;
|
||||||
|
uint64_t dbbuf_eis;
|
||||||
|
bool dbbuf_enabled;
|
||||||
|
|
||||||
struct {
|
struct {
|
||||||
MemoryRegion mem;
|
MemoryRegion mem;
|
||||||
|
@ -3,6 +3,7 @@ pci_nvme_irq_msix(uint32_t vector) "raising MSI-X IRQ vector %u"
|
|||||||
pci_nvme_irq_pin(void) "pulsing IRQ pin"
|
pci_nvme_irq_pin(void) "pulsing IRQ pin"
|
||||||
pci_nvme_irq_masked(void) "IRQ is masked"
|
pci_nvme_irq_masked(void) "IRQ is masked"
|
||||||
pci_nvme_dma_read(uint64_t prp1, uint64_t prp2) "DMA read, prp1=0x%"PRIx64" prp2=0x%"PRIx64""
|
pci_nvme_dma_read(uint64_t prp1, uint64_t prp2) "DMA read, prp1=0x%"PRIx64" prp2=0x%"PRIx64""
|
||||||
|
pci_nvme_dbbuf_config(uint64_t dbs_addr, uint64_t eis_addr) "dbs_addr=0x%"PRIx64" eis_addr=0x%"PRIx64""
|
||||||
pci_nvme_map_addr(uint64_t addr, uint64_t len) "addr 0x%"PRIx64" len %"PRIu64""
|
pci_nvme_map_addr(uint64_t addr, uint64_t len) "addr 0x%"PRIx64" len %"PRIu64""
|
||||||
pci_nvme_map_addr_cmb(uint64_t addr, uint64_t len) "addr 0x%"PRIx64" len %"PRIu64""
|
pci_nvme_map_addr_cmb(uint64_t addr, uint64_t len) "addr 0x%"PRIx64" len %"PRIu64""
|
||||||
pci_nvme_map_prp(uint64_t trans_len, uint32_t len, uint64_t prp1, uint64_t prp2, int num_prps) "trans_len %"PRIu64" len %"PRIu32" prp1 0x%"PRIx64" prp2 0x%"PRIx64" num_prps %d"
|
pci_nvme_map_prp(uint64_t trans_len, uint32_t len, uint64_t prp1, uint64_t prp2, int num_prps) "trans_len %"PRIu64" len %"PRIu32" prp1 0x%"PRIx64" prp2 0x%"PRIx64" num_prps %d"
|
||||||
@ -83,6 +84,8 @@ pci_nvme_enqueue_event_noqueue(int queued) "queued %d"
|
|||||||
pci_nvme_enqueue_event_masked(uint8_t typ) "type 0x%"PRIx8""
|
pci_nvme_enqueue_event_masked(uint8_t typ) "type 0x%"PRIx8""
|
||||||
pci_nvme_no_outstanding_aers(void) "ignoring event; no outstanding AERs"
|
pci_nvme_no_outstanding_aers(void) "ignoring event; no outstanding AERs"
|
||||||
pci_nvme_enqueue_req_completion(uint16_t cid, uint16_t cqid, uint32_t dw0, uint32_t dw1, uint16_t status) "cid %"PRIu16" cqid %"PRIu16" dw0 0x%"PRIx32" dw1 0x%"PRIx32" status 0x%"PRIx16""
|
pci_nvme_enqueue_req_completion(uint16_t cid, uint16_t cqid, uint32_t dw0, uint32_t dw1, uint16_t status) "cid %"PRIu16" cqid %"PRIu16" dw0 0x%"PRIx32" dw1 0x%"PRIx32" status 0x%"PRIx16""
|
||||||
|
pci_nvme_eventidx_cq(uint16_t cqid, uint16_t new_eventidx) "cqid %"PRIu16" new_eventidx %"PRIu16""
|
||||||
|
pci_nvme_eventidx_sq(uint16_t sqid, uint16_t new_eventidx) "sqid %"PRIu16" new_eventidx %"PRIu16""
|
||||||
pci_nvme_mmio_read(uint64_t addr, unsigned size) "addr 0x%"PRIx64" size %d"
|
pci_nvme_mmio_read(uint64_t addr, unsigned size) "addr 0x%"PRIx64" size %d"
|
||||||
pci_nvme_mmio_write(uint64_t addr, uint64_t data, unsigned size) "addr 0x%"PRIx64" data 0x%"PRIx64" size %d"
|
pci_nvme_mmio_write(uint64_t addr, uint64_t data, unsigned size) "addr 0x%"PRIx64" data 0x%"PRIx64" size %d"
|
||||||
pci_nvme_mmio_doorbell_cq(uint16_t cqid, uint16_t new_head) "cqid %"PRIu16" new_head %"PRIu16""
|
pci_nvme_mmio_doorbell_cq(uint16_t cqid, uint16_t new_head) "cqid %"PRIu16" new_head %"PRIu16""
|
||||||
@ -99,6 +102,8 @@ pci_nvme_mmio_start_success(void) "setting controller enable bit succeeded"
|
|||||||
pci_nvme_mmio_stopped(void) "cleared controller enable bit"
|
pci_nvme_mmio_stopped(void) "cleared controller enable bit"
|
||||||
pci_nvme_mmio_shutdown_set(void) "shutdown bit set"
|
pci_nvme_mmio_shutdown_set(void) "shutdown bit set"
|
||||||
pci_nvme_mmio_shutdown_cleared(void) "shutdown bit cleared"
|
pci_nvme_mmio_shutdown_cleared(void) "shutdown bit cleared"
|
||||||
|
pci_nvme_shadow_doorbell_cq(uint16_t cqid, uint16_t new_shadow_doorbell) "cqid %"PRIu16" new_shadow_doorbell %"PRIu16""
|
||||||
|
pci_nvme_shadow_doorbell_sq(uint16_t sqid, uint16_t new_shadow_doorbell) "sqid %"PRIu16" new_shadow_doorbell %"PRIu16""
|
||||||
pci_nvme_open_zone(uint64_t slba, uint32_t zone_idx, int all) "open zone, slba=%"PRIu64", idx=%"PRIu32", all=%"PRIi32""
|
pci_nvme_open_zone(uint64_t slba, uint32_t zone_idx, int all) "open zone, slba=%"PRIu64", idx=%"PRIu32", all=%"PRIi32""
|
||||||
pci_nvme_close_zone(uint64_t slba, uint32_t zone_idx, int all) "close zone, slba=%"PRIu64", idx=%"PRIu32", all=%"PRIi32""
|
pci_nvme_close_zone(uint64_t slba, uint32_t zone_idx, int all) "close zone, slba=%"PRIu64", idx=%"PRIu32", all=%"PRIi32""
|
||||||
pci_nvme_finish_zone(uint64_t slba, uint32_t zone_idx, int all) "finish zone, slba=%"PRIu64", idx=%"PRIu32", all=%"PRIi32""
|
pci_nvme_finish_zone(uint64_t slba, uint32_t zone_idx, int all) "finish zone, slba=%"PRIu64", idx=%"PRIu32", all=%"PRIi32""
|
||||||
|
@ -98,28 +98,28 @@ enum NvmeCapMask {
|
|||||||
#define NVME_CAP_PMRS(cap) (((cap) >> CAP_PMRS_SHIFT) & CAP_PMRS_MASK)
|
#define NVME_CAP_PMRS(cap) (((cap) >> CAP_PMRS_SHIFT) & CAP_PMRS_MASK)
|
||||||
#define NVME_CAP_CMBS(cap) (((cap) >> CAP_CMBS_SHIFT) & CAP_CMBS_MASK)
|
#define NVME_CAP_CMBS(cap) (((cap) >> CAP_CMBS_SHIFT) & CAP_CMBS_MASK)
|
||||||
|
|
||||||
#define NVME_CAP_SET_MQES(cap, val) (cap |= (uint64_t)(val & CAP_MQES_MASK) \
|
#define NVME_CAP_SET_MQES(cap, val) \
|
||||||
<< CAP_MQES_SHIFT)
|
((cap) |= (uint64_t)((val) & CAP_MQES_MASK) << CAP_MQES_SHIFT)
|
||||||
#define NVME_CAP_SET_CQR(cap, val) (cap |= (uint64_t)(val & CAP_CQR_MASK) \
|
#define NVME_CAP_SET_CQR(cap, val) \
|
||||||
<< CAP_CQR_SHIFT)
|
((cap) |= (uint64_t)((val) & CAP_CQR_MASK) << CAP_CQR_SHIFT)
|
||||||
#define NVME_CAP_SET_AMS(cap, val) (cap |= (uint64_t)(val & CAP_AMS_MASK) \
|
#define NVME_CAP_SET_AMS(cap, val) \
|
||||||
<< CAP_AMS_SHIFT)
|
((cap) |= (uint64_t)((val) & CAP_AMS_MASK) << CAP_AMS_SHIFT)
|
||||||
#define NVME_CAP_SET_TO(cap, val) (cap |= (uint64_t)(val & CAP_TO_MASK) \
|
#define NVME_CAP_SET_TO(cap, val) \
|
||||||
<< CAP_TO_SHIFT)
|
((cap) |= (uint64_t)((val) & CAP_TO_MASK) << CAP_TO_SHIFT)
|
||||||
#define NVME_CAP_SET_DSTRD(cap, val) (cap |= (uint64_t)(val & CAP_DSTRD_MASK) \
|
#define NVME_CAP_SET_DSTRD(cap, val) \
|
||||||
<< CAP_DSTRD_SHIFT)
|
((cap) |= (uint64_t)((val) & CAP_DSTRD_MASK) << CAP_DSTRD_SHIFT)
|
||||||
#define NVME_CAP_SET_NSSRS(cap, val) (cap |= (uint64_t)(val & CAP_NSSRS_MASK) \
|
#define NVME_CAP_SET_NSSRS(cap, val) \
|
||||||
<< CAP_NSSRS_SHIFT)
|
((cap) |= (uint64_t)((val) & CAP_NSSRS_MASK) << CAP_NSSRS_SHIFT)
|
||||||
#define NVME_CAP_SET_CSS(cap, val) (cap |= (uint64_t)(val & CAP_CSS_MASK) \
|
#define NVME_CAP_SET_CSS(cap, val) \
|
||||||
<< CAP_CSS_SHIFT)
|
((cap) |= (uint64_t)((val) & CAP_CSS_MASK) << CAP_CSS_SHIFT)
|
||||||
#define NVME_CAP_SET_MPSMIN(cap, val) (cap |= (uint64_t)(val & CAP_MPSMIN_MASK)\
|
#define NVME_CAP_SET_MPSMIN(cap, val) \
|
||||||
<< CAP_MPSMIN_SHIFT)
|
((cap) |= (uint64_t)((val) & CAP_MPSMIN_MASK) << CAP_MPSMIN_SHIFT)
|
||||||
#define NVME_CAP_SET_MPSMAX(cap, val) (cap |= (uint64_t)(val & CAP_MPSMAX_MASK)\
|
#define NVME_CAP_SET_MPSMAX(cap, val) \
|
||||||
<< CAP_MPSMAX_SHIFT)
|
((cap) |= (uint64_t)((val) & CAP_MPSMAX_MASK) << CAP_MPSMAX_SHIFT)
|
||||||
#define NVME_CAP_SET_PMRS(cap, val) (cap |= (uint64_t)(val & CAP_PMRS_MASK) \
|
#define NVME_CAP_SET_PMRS(cap, val) \
|
||||||
<< CAP_PMRS_SHIFT)
|
((cap) |= (uint64_t)((val) & CAP_PMRS_MASK) << CAP_PMRS_SHIFT)
|
||||||
#define NVME_CAP_SET_CMBS(cap, val) (cap |= (uint64_t)(val & CAP_CMBS_MASK) \
|
#define NVME_CAP_SET_CMBS(cap, val) \
|
||||||
<< CAP_CMBS_SHIFT)
|
((cap) |= (uint64_t)((val) & CAP_CMBS_MASK) << CAP_CMBS_SHIFT)
|
||||||
|
|
||||||
enum NvmeCapCss {
|
enum NvmeCapCss {
|
||||||
NVME_CAP_CSS_NVM = 1 << 0,
|
NVME_CAP_CSS_NVM = 1 << 0,
|
||||||
@ -596,6 +596,7 @@ enum NvmeAdminCommands {
|
|||||||
NVME_ADM_CMD_DOWNLOAD_FW = 0x11,
|
NVME_ADM_CMD_DOWNLOAD_FW = 0x11,
|
||||||
NVME_ADM_CMD_NS_ATTACHMENT = 0x15,
|
NVME_ADM_CMD_NS_ATTACHMENT = 0x15,
|
||||||
NVME_ADM_CMD_VIRT_MNGMT = 0x1c,
|
NVME_ADM_CMD_VIRT_MNGMT = 0x1c,
|
||||||
|
NVME_ADM_CMD_DBBUF_CONFIG = 0x7c,
|
||||||
NVME_ADM_CMD_FORMAT_NVM = 0x80,
|
NVME_ADM_CMD_FORMAT_NVM = 0x80,
|
||||||
NVME_ADM_CMD_SECURITY_SEND = 0x81,
|
NVME_ADM_CMD_SECURITY_SEND = 0x81,
|
||||||
NVME_ADM_CMD_SECURITY_RECV = 0x82,
|
NVME_ADM_CMD_SECURITY_RECV = 0x82,
|
||||||
@ -1141,6 +1142,7 @@ enum NvmeIdCtrlOacs {
|
|||||||
NVME_OACS_FORMAT = 1 << 1,
|
NVME_OACS_FORMAT = 1 << 1,
|
||||||
NVME_OACS_FW = 1 << 2,
|
NVME_OACS_FW = 1 << 2,
|
||||||
NVME_OACS_NS_MGMT = 1 << 3,
|
NVME_OACS_NS_MGMT = 1 << 3,
|
||||||
|
NVME_OACS_DBBUF = 1 << 8,
|
||||||
};
|
};
|
||||||
|
|
||||||
enum NvmeIdCtrlOncs {
|
enum NvmeIdCtrlOncs {
|
||||||
|
Loading…
Reference in New Issue
Block a user