From 625dc38a28c0f6345adfdd4eb0b89b340a420ac0 Mon Sep 17 00:00:00 2001 From: Augustin Cavalier Date: Sun, 22 Mar 2020 15:59:49 -0400 Subject: [PATCH] libnvme: Rework qpairs to lock themselves. They were theoretically guarded by the controller lock, but it appears nvme_ns bypassed that, meaning that if ns_read was executed at the same time as qpair_poll, unpredictable races could occur. This solves that by making the qpairs guarded by their own mutex, which also has the advantage of poll() being executable on more than one qpair at a time. Seems to fix the KDLs in #15123 (and maybe other NVMe tickets), though the I/O corruptions remain. --- .../kernel/drivers/disk/nvme/libnvme/nvme.h | 2 +- .../drivers/disk/nvme/libnvme/nvme_ctrlr.c | 22 ------------- .../drivers/disk/nvme/libnvme/nvme_internal.h | 15 +++++---- .../drivers/disk/nvme/libnvme/nvme_qpair.c | 33 ++++++++++++++++++- .../kernel/drivers/disk/nvme/nvme_disk.cpp | 10 ++---- 5 files changed, 44 insertions(+), 38 deletions(-) diff --git a/src/add-ons/kernel/drivers/disk/nvme/libnvme/nvme.h b/src/add-ons/kernel/drivers/disk/nvme/libnvme/nvme.h index 9e958ac161..767488861e 100644 --- a/src/add-ons/kernel/drivers/disk/nvme/libnvme/nvme.h +++ b/src/add-ons/kernel/drivers/disk/nvme/libnvme/nvme.h @@ -769,7 +769,7 @@ extern int nvme_ioqp_submit_cmd(struct nvme_qpair *qpair, * * @sa nvme_cmd_cb */ -extern unsigned int nvme_ioqp_poll(struct nvme_qpair *qpair, +extern unsigned int nvme_qpair_poll(struct nvme_qpair *qpair, unsigned int max_completions); /** diff --git a/src/add-ons/kernel/drivers/disk/nvme/libnvme/nvme_ctrlr.c b/src/add-ons/kernel/drivers/disk/nvme/libnvme/nvme_ctrlr.c index 1c0c9abd60..49382b9cd0 100644 --- a/src/add-ons/kernel/drivers/disk/nvme/libnvme/nvme_ctrlr.c +++ b/src/add-ons/kernel/drivers/disk/nvme/libnvme/nvme_ctrlr.c @@ -1517,36 +1517,14 @@ int nvme_ioqp_submit_cmd(struct nvme_qpair *qpair, void *buf, size_t len, nvme_cmd_cb cb_fn, void *cb_arg) { - struct nvme_ctrlr *ctrlr = qpair->ctrlr; struct nvme_request *req; int ret = ENOMEM; - pthread_mutex_lock(&ctrlr->lock); - req = nvme_request_allocate_contig(qpair, buf, len, cb_fn, cb_arg); if (req) { memcpy(&req->cmd, cmd, sizeof(req->cmd)); ret = nvme_qpair_submit_request(qpair, req); } - pthread_mutex_unlock(&ctrlr->lock); - - return ret; -} - -/* - * Poll for completion of NVMe commands submitted to the - * specified I/O queue pair. - */ -unsigned int nvme_ioqp_poll(struct nvme_qpair *qpair, - unsigned int max_completions) -{ - struct nvme_ctrlr *ctrlr = qpair->ctrlr; - int ret; - - pthread_mutex_lock(&ctrlr->lock); - ret = nvme_qpair_poll(qpair, max_completions); - pthread_mutex_unlock(&ctrlr->lock); - return ret; } diff --git a/src/add-ons/kernel/drivers/disk/nvme/libnvme/nvme_internal.h b/src/add-ons/kernel/drivers/disk/nvme/libnvme/nvme_internal.h index 3388102353..0bfc544dd0 100644 --- a/src/add-ons/kernel/drivers/disk/nvme/libnvme/nvme_internal.h +++ b/src/add-ons/kernel/drivers/disk/nvme/libnvme/nvme_internal.h @@ -339,6 +339,10 @@ nvme_static_assert((offsetof(struct nvme_tracker, u.sgl) & 7) == 0, "SGL must be Qword aligned"); struct nvme_qpair { + /* + * Guards access to this structure. + */ + pthread_mutex_t lock; volatile uint32_t *sq_tdbl; volatile uint32_t *cq_hdbl; @@ -543,17 +547,16 @@ struct nvme_ctrlr { nvme_aer_cb aer_cb_fn; void *aer_cb_arg; - /* - * Guards access to the controller itself, including admin queues. - */ - pthread_mutex_t lock; - - /* * Admin queue pair. */ struct nvme_qpair adminq; + /* + * Guards access to the controller itself. + */ + pthread_mutex_t lock; + /* * Identify Controller data. */ diff --git a/src/add-ons/kernel/drivers/disk/nvme/libnvme/nvme_qpair.c b/src/add-ons/kernel/drivers/disk/nvme/libnvme/nvme_qpair.c index cb407824ad..33854d42a4 100644 --- a/src/add-ons/kernel/drivers/disk/nvme/libnvme/nvme_qpair.c +++ b/src/add-ons/kernel/drivers/disk/nvme/libnvme/nvme_qpair.c @@ -843,6 +843,8 @@ int nvme_qpair_construct(struct nvme_ctrlr *ctrlr, struct nvme_qpair *qpair, nvme_assert(entries != 0, "Invalid number of entries\n"); nvme_assert(trackers != 0, "Invalid trackers\n"); + pthread_mutex_init(&qpair->lock, NULL); + qpair->entries = entries; qpair->trackers = trackers; qpair->qprio = qprio; @@ -973,9 +975,10 @@ void nvme_qpair_destroy(struct nvme_qpair *qpair) } nvme_request_pool_destroy(qpair); + pthread_mutex_destroy(&qpair->lock); } -bool nvme_qpair_enabled(struct nvme_qpair *qpair) +static bool nvme_qpair_enabled(struct nvme_qpair *qpair) { if (!qpair->enabled && !qpair->ctrlr->resetting) nvme_qpair_enable(qpair); @@ -1022,6 +1025,8 @@ int nvme_qpair_submit_request(struct nvme_qpair *qpair, return ret; } + pthread_mutex_lock(&qpair->lock); + tr = LIST_FIRST(&qpair->free_tr); if (tr == NULL || !qpair->enabled) { /* @@ -1062,9 +1067,15 @@ int nvme_qpair_submit_request(struct nvme_qpair *qpair, if (ret == 0) nvme_qpair_submit_tracker(qpair, tr); + pthread_mutex_unlock(&qpair->lock); + return ret; } +/* + * Poll for completion of NVMe commands submitted to the + * specified I/O queue pair. + */ unsigned int nvme_qpair_poll(struct nvme_qpair *qpair, unsigned int max_completions) { @@ -1090,6 +1101,8 @@ unsigned int nvme_qpair_poll(struct nvme_qpair *qpair, */ max_completions = qpair->entries - 1; + pthread_mutex_lock(&qpair->lock); + while (1) { cpl = &qpair->cpl[qpair->cq_head]; @@ -1117,11 +1130,15 @@ unsigned int nvme_qpair_poll(struct nvme_qpair *qpair, if (num_completions > 0) nvme_mmio_write_4(qpair->cq_hdbl, qpair->cq_head); + pthread_mutex_unlock(&qpair->lock); + return num_completions; } void nvme_qpair_reset(struct nvme_qpair *qpair) { + pthread_mutex_lock(&qpair->lock); + qpair->sq_tail = qpair->cq_head = 0; /* @@ -1134,22 +1151,32 @@ void nvme_qpair_reset(struct nvme_qpair *qpair) memset(qpair->cmd, 0, qpair->entries * sizeof(struct nvme_cmd)); memset(qpair->cpl, 0, qpair->entries * sizeof(struct nvme_cpl)); + + pthread_mutex_unlock(&qpair->lock); } void nvme_qpair_enable(struct nvme_qpair *qpair) { + pthread_mutex_lock(&qpair->lock); + if (nvme_qpair_is_io_queue(qpair)) _nvme_qpair_io_qpair_enable(qpair); else _nvme_qpair_admin_qpair_enable(qpair); + + pthread_mutex_unlock(&qpair->lock); } void nvme_qpair_disable(struct nvme_qpair *qpair) { + pthread_mutex_lock(&qpair->lock); + if (nvme_qpair_is_io_queue(qpair)) _nvme_qpair_io_qpair_disable(qpair); else _nvme_qpair_admin_qpair_disable(qpair); + + pthread_mutex_unlock(&qpair->lock); } void nvme_qpair_fail(struct nvme_qpair *qpair) @@ -1157,6 +1184,8 @@ void nvme_qpair_fail(struct nvme_qpair *qpair) struct nvme_tracker *tr; struct nvme_request *req; + pthread_mutex_lock(&qpair->lock); + while (!STAILQ_EMPTY(&qpair->queued_req)) { nvme_notice("Failing queued I/O command\n"); @@ -1182,5 +1211,7 @@ void nvme_qpair_fail(struct nvme_qpair *qpair) 1, true); } + + pthread_mutex_unlock(&qpair->lock); } diff --git a/src/add-ons/kernel/drivers/disk/nvme/nvme_disk.cpp b/src/add-ons/kernel/drivers/disk/nvme/nvme_disk.cpp index b2d891c555..2e5b449f08 100644 --- a/src/add-ons/kernel/drivers/disk/nvme/nvme_disk.cpp +++ b/src/add-ons/kernel/drivers/disk/nvme/nvme_disk.cpp @@ -87,7 +87,6 @@ typedef struct { struct qpair_info { struct nvme_qpair* qpair; - mutex mtx; } qpairs[NVME_MAX_QPAIRS]; uint32 qpair_count; uint32 next_qpair; @@ -238,7 +237,6 @@ nvme_disk_init_device(void* _info, void** _cookie) if (info->qpairs[i].qpair == NULL) break; - mutex_init(&info->qpairs[i].mtx, "qpair mutex"); info->qpair_count++; } if (info->qpair_count == 0) { @@ -394,13 +392,13 @@ await_status(nvme_disk_driver_info* info, struct nvme_qpair* qpair, status_t& st while (status == EINPROGRESS) { info->interrupt.Add(&entry); - nvme_ioqp_poll(qpair, 0); + nvme_qpair_poll(qpair, 0); if (status != EINPROGRESS) return; entry.Wait(); - nvme_ioqp_poll(qpair, 0); + nvme_qpair_poll(qpair, 0); } } @@ -415,7 +413,6 @@ do_nvme_io(nvme_disk_driver_info* info, off_t rounded_pos, void* buffer, status_t status = EINPROGRESS; qpair_info* qpinfo = get_next_qpair(info); - mutex_lock(&qpinfo->mtx); int ret = -1; if (write) { ret = nvme_ns_write(info->ns, qpinfo->qpair, buffer, @@ -426,7 +423,6 @@ do_nvme_io(nvme_disk_driver_info* info, off_t rounded_pos, void* buffer, rounded_pos / block_size, *rounded_len / block_size, (nvme_cmd_cb)disk_io_callback, &status, 0); } - mutex_unlock(&qpinfo->mtx); if (ret != 0) { TRACE_ERROR("attempt to queue %s I/O at %" B_PRIdOFF " of %" B_PRIuSIZE " bytes failed!\n", write ? "write" : "read", rounded_pos, *rounded_len); @@ -573,10 +569,8 @@ nvme_disk_flush(nvme_disk_driver_info* info) status_t status = EINPROGRESS; qpair_info* qpinfo = get_next_qpair(info); - mutex_lock(&qpinfo->mtx); int ret = nvme_ns_flush(info->ns, qpinfo->qpair, (nvme_cmd_cb)disk_io_callback, &status); - mutex_unlock(&qpinfo->mtx); if (ret != 0) return ret;