libnvme: Rework qpairs to lock themselves.

They were theoretically guarded by the controller lock, but it appears
nvme_ns bypassed that, meaning that if ns_read was executed at the
same time as qpair_poll, unpredictable races could occur. This solves
that by making the qpairs guarded by their own mutex, which also
has the advantage of poll() being executable on more than one qpair
at a time.

Seems to fix the KDLs in #15123 (and maybe other NVMe tickets),
though the I/O corruptions remain.
This commit is contained in:
Augustin Cavalier 2020-03-22 15:59:49 -04:00
parent f1e5a6c914
commit 625dc38a28
5 changed files with 44 additions and 38 deletions

View File

@ -769,7 +769,7 @@ extern int nvme_ioqp_submit_cmd(struct nvme_qpair *qpair,
*
* @sa nvme_cmd_cb
*/
extern unsigned int nvme_ioqp_poll(struct nvme_qpair *qpair,
extern unsigned int nvme_qpair_poll(struct nvme_qpair *qpair,
unsigned int max_completions);
/**

View File

@ -1517,36 +1517,14 @@ int nvme_ioqp_submit_cmd(struct nvme_qpair *qpair,
void *buf, size_t len,
nvme_cmd_cb cb_fn, void *cb_arg)
{
struct nvme_ctrlr *ctrlr = qpair->ctrlr;
struct nvme_request *req;
int ret = ENOMEM;
pthread_mutex_lock(&ctrlr->lock);
req = nvme_request_allocate_contig(qpair, buf, len, cb_fn, cb_arg);
if (req) {
memcpy(&req->cmd, cmd, sizeof(req->cmd));
ret = nvme_qpair_submit_request(qpair, req);
}
pthread_mutex_unlock(&ctrlr->lock);
return ret;
}
/*
* Poll for completion of NVMe commands submitted to the
* specified I/O queue pair.
*/
unsigned int nvme_ioqp_poll(struct nvme_qpair *qpair,
unsigned int max_completions)
{
struct nvme_ctrlr *ctrlr = qpair->ctrlr;
int ret;
pthread_mutex_lock(&ctrlr->lock);
ret = nvme_qpair_poll(qpair, max_completions);
pthread_mutex_unlock(&ctrlr->lock);
return ret;
}

View File

@ -339,6 +339,10 @@ nvme_static_assert((offsetof(struct nvme_tracker, u.sgl) & 7) == 0,
"SGL must be Qword aligned");
struct nvme_qpair {
/*
* Guards access to this structure.
*/
pthread_mutex_t lock;
volatile uint32_t *sq_tdbl;
volatile uint32_t *cq_hdbl;
@ -543,17 +547,16 @@ struct nvme_ctrlr {
nvme_aer_cb aer_cb_fn;
void *aer_cb_arg;
/*
* Guards access to the controller itself, including admin queues.
*/
pthread_mutex_t lock;
/*
* Admin queue pair.
*/
struct nvme_qpair adminq;
/*
* Guards access to the controller itself.
*/
pthread_mutex_t lock;
/*
* Identify Controller data.
*/

View File

@ -843,6 +843,8 @@ int nvme_qpair_construct(struct nvme_ctrlr *ctrlr, struct nvme_qpair *qpair,
nvme_assert(entries != 0, "Invalid number of entries\n");
nvme_assert(trackers != 0, "Invalid trackers\n");
pthread_mutex_init(&qpair->lock, NULL);
qpair->entries = entries;
qpair->trackers = trackers;
qpair->qprio = qprio;
@ -973,9 +975,10 @@ void nvme_qpair_destroy(struct nvme_qpair *qpair)
}
nvme_request_pool_destroy(qpair);
pthread_mutex_destroy(&qpair->lock);
}
bool nvme_qpair_enabled(struct nvme_qpair *qpair)
static bool nvme_qpair_enabled(struct nvme_qpair *qpair)
{
if (!qpair->enabled && !qpair->ctrlr->resetting)
nvme_qpair_enable(qpair);
@ -1022,6 +1025,8 @@ int nvme_qpair_submit_request(struct nvme_qpair *qpair,
return ret;
}
pthread_mutex_lock(&qpair->lock);
tr = LIST_FIRST(&qpair->free_tr);
if (tr == NULL || !qpair->enabled) {
/*
@ -1062,9 +1067,15 @@ int nvme_qpair_submit_request(struct nvme_qpair *qpair,
if (ret == 0)
nvme_qpair_submit_tracker(qpair, tr);
pthread_mutex_unlock(&qpair->lock);
return ret;
}
/*
* Poll for completion of NVMe commands submitted to the
* specified I/O queue pair.
*/
unsigned int nvme_qpair_poll(struct nvme_qpair *qpair,
unsigned int max_completions)
{
@ -1090,6 +1101,8 @@ unsigned int nvme_qpair_poll(struct nvme_qpair *qpair,
*/
max_completions = qpair->entries - 1;
pthread_mutex_lock(&qpair->lock);
while (1) {
cpl = &qpair->cpl[qpair->cq_head];
@ -1117,11 +1130,15 @@ unsigned int nvme_qpair_poll(struct nvme_qpair *qpair,
if (num_completions > 0)
nvme_mmio_write_4(qpair->cq_hdbl, qpair->cq_head);
pthread_mutex_unlock(&qpair->lock);
return num_completions;
}
void nvme_qpair_reset(struct nvme_qpair *qpair)
{
pthread_mutex_lock(&qpair->lock);
qpair->sq_tail = qpair->cq_head = 0;
/*
@ -1134,22 +1151,32 @@ void nvme_qpair_reset(struct nvme_qpair *qpair)
memset(qpair->cmd, 0, qpair->entries * sizeof(struct nvme_cmd));
memset(qpair->cpl, 0, qpair->entries * sizeof(struct nvme_cpl));
pthread_mutex_unlock(&qpair->lock);
}
void nvme_qpair_enable(struct nvme_qpair *qpair)
{
pthread_mutex_lock(&qpair->lock);
if (nvme_qpair_is_io_queue(qpair))
_nvme_qpair_io_qpair_enable(qpair);
else
_nvme_qpair_admin_qpair_enable(qpair);
pthread_mutex_unlock(&qpair->lock);
}
void nvme_qpair_disable(struct nvme_qpair *qpair)
{
pthread_mutex_lock(&qpair->lock);
if (nvme_qpair_is_io_queue(qpair))
_nvme_qpair_io_qpair_disable(qpair);
else
_nvme_qpair_admin_qpair_disable(qpair);
pthread_mutex_unlock(&qpair->lock);
}
void nvme_qpair_fail(struct nvme_qpair *qpair)
@ -1157,6 +1184,8 @@ void nvme_qpair_fail(struct nvme_qpair *qpair)
struct nvme_tracker *tr;
struct nvme_request *req;
pthread_mutex_lock(&qpair->lock);
while (!STAILQ_EMPTY(&qpair->queued_req)) {
nvme_notice("Failing queued I/O command\n");
@ -1182,5 +1211,7 @@ void nvme_qpair_fail(struct nvme_qpair *qpair)
1, true);
}
pthread_mutex_unlock(&qpair->lock);
}

View File

@ -87,7 +87,6 @@ typedef struct {
struct qpair_info {
struct nvme_qpair* qpair;
mutex mtx;
} qpairs[NVME_MAX_QPAIRS];
uint32 qpair_count;
uint32 next_qpair;
@ -238,7 +237,6 @@ nvme_disk_init_device(void* _info, void** _cookie)
if (info->qpairs[i].qpair == NULL)
break;
mutex_init(&info->qpairs[i].mtx, "qpair mutex");
info->qpair_count++;
}
if (info->qpair_count == 0) {
@ -394,13 +392,13 @@ await_status(nvme_disk_driver_info* info, struct nvme_qpair* qpair, status_t& st
while (status == EINPROGRESS) {
info->interrupt.Add(&entry);
nvme_ioqp_poll(qpair, 0);
nvme_qpair_poll(qpair, 0);
if (status != EINPROGRESS)
return;
entry.Wait();
nvme_ioqp_poll(qpair, 0);
nvme_qpair_poll(qpair, 0);
}
}
@ -415,7 +413,6 @@ do_nvme_io(nvme_disk_driver_info* info, off_t rounded_pos, void* buffer,
status_t status = EINPROGRESS;
qpair_info* qpinfo = get_next_qpair(info);
mutex_lock(&qpinfo->mtx);
int ret = -1;
if (write) {
ret = nvme_ns_write(info->ns, qpinfo->qpair, buffer,
@ -426,7 +423,6 @@ do_nvme_io(nvme_disk_driver_info* info, off_t rounded_pos, void* buffer,
rounded_pos / block_size, *rounded_len / block_size,
(nvme_cmd_cb)disk_io_callback, &status, 0);
}
mutex_unlock(&qpinfo->mtx);
if (ret != 0) {
TRACE_ERROR("attempt to queue %s I/O at %" B_PRIdOFF " of %" B_PRIuSIZE
" bytes failed!\n", write ? "write" : "read", rounded_pos, *rounded_len);
@ -573,10 +569,8 @@ nvme_disk_flush(nvme_disk_driver_info* info)
status_t status = EINPROGRESS;
qpair_info* qpinfo = get_next_qpair(info);
mutex_lock(&qpinfo->mtx);
int ret = nvme_ns_flush(info->ns, qpinfo->qpair,
(nvme_cmd_cb)disk_io_callback, &status);
mutex_unlock(&qpinfo->mtx);
if (ret != 0)
return ret;