nvme_disk: Allocate up to 8 qpairs and round-robin them for I/O.

Before this commit, only one qpair was allocated and it was locked for the duration of the transfer. Now we allocate more qpairs if the device supports it and lock only when actually calling read(), enabling multiple reads to both be queued and execute simultaneously. This is probably a significant performance improvement, even more so than it is/would be for SCSI, as SSDs can actually read from as many sectors as bandwidth allows at once.
2019-04-17 19:44:23 -04:00 · 2019-04-17 19:44:23 -04:00 · 872c8209b0
commit 872c8209b0
parent 16525505c6
1 changed files with 48 additions and 17 deletions
--- a/src/add-ons/kernel/drivers/disk/nvme/nvme_disk.cpp
+++ b/src/add-ons/kernel/drivers/disk/nvme/nvme_disk.cpp
@ -27,6 +27,7 @@ extern "C" {
 #else
 #	define TRACE(x...) ;
 #endif
 #define TRACE_ALWAYS(x...)	dprintf("nvme_disk: " x)
 #define TRACE_ERROR(x...)	dprintf("\33[33mnvme_disk:\33[0m " x)
 #define CALLED() 			TRACE("CALLED %s\n", __PRETTY_FUNCTION__)
@ -62,6 +63,8 @@ static const uint8 kDriveIcon[] = {
 #define NVME_DISK_DEVICE_MODULE_NAME 	"drivers/disk/nvme_disk/device_v1"
 #define NVME_DISK_DEVICE_ID_GENERATOR	"nvme_disk/device_id"
 #define NVME_MAX_QPAIRS					(8)
 static device_manager_info* sDeviceManager;
@ -76,9 +79,14 @@ typedef struct {
 	uint32					block_size;
 	status_t				media_status;
-	struct nvme_qpair*		qpair;
+	struct qpair_info {
-	mutex					qpair_mtx;
+		struct nvme_qpair*	qpair;
 		mutex				mtx;
 	}						qpairs[NVME_MAX_QPAIRS];
 	uint32					qpair_count;
 	uint32					next_qpair;
 } nvme_disk_driver_info;
 typedef nvme_disk_driver_info::qpair_info qpair_info;
 typedef struct {
@ -185,6 +193,8 @@ nvme_disk_init_device(void* _info, void** _cookie)
 		return err;
 	}
 	TRACE_ALWAYS("attached to NVMe device \"%s (%s)\n", cstat.mn, cstat.sn);
 	// TODO: export more than just the first namespace!
 	info->ns = nvme_ns_open(info->ctrlr, cstat.ns_ids[0]);
 	if (info->ns == NULL) {
@ -205,15 +215,21 @@ nvme_disk_init_device(void* _info, void** _cookie)
 	TRACE("capacity: %" B_PRIu64 ", block_size %" B_PRIu32 "\n",
 		info->capacity, info->block_size);
-	// allocate a qpair
+	// allocate qpairs
-	// TODO: allocate more than one qpair
+	info->qpair_count = info->next_qpair = 0;
-	info->qpair = nvme_ioqp_get(info->ctrlr, (enum nvme_qprio)0, 0);
+	for (uint32 i = 0; i < NVME_MAX_QPAIRS && i < cstat.io_qpairs; i++) {
-	if (info->qpair == NULL) {
+		info->qpairs[i].qpair = nvme_ioqp_get(info->ctrlr,
-		TRACE_ERROR("failed to allocate qpair!\n");
+			(enum nvme_qprio)0, 0);
-		return B_ERROR;
+		if (info->qpairs[i].qpair == NULL)
-	}
+			break;
-	mutex_init(&info->qpair_mtx, "qpair mtx");
+		mutex_init(&info->qpairs[i].mtx, "qpair mutex");
 		info->qpair_count++;
 	}
 	if (info->qpair_count == 0) {
 		TRACE_ERROR("failed to allocate qpairs!\n");
 		return B_NO_MEMORY;
 	}
 	*_cookie = info;
 	return B_OK;
@ -270,8 +286,16 @@ nvme_disk_free(void* cookie)
 // #pragma mark - I/O functions
 static qpair_info*
 get_next_qpair(nvme_disk_driver_info* info)
 {
 	return &info->qpairs[atomic_add((int32*)&info->next_qpair, 1)
 		% info->qpair_count];
 }
 static void
-disk_read_callback(status_t* status, const struct nvme_cpl* cpl)
+disk_io_callback(status_t* status, const struct nvme_cpl* cpl)
 {
 	*status = nvme_cpl_is_error(cpl) ? B_IO_ERROR : B_OK;
 }
@ -313,16 +337,23 @@ nvme_disk_read(void* cookie, off_t pos, void* buffer, size_t* length)
 	}
 	// Actually perform the read.
-	MutexLocker _(handle->info->qpair_mtx);
+	qpair_info* qpinfo = get_next_qpair(handle->info);
-	int ret = nvme_ns_read(handle->info->ns, handle->info->qpair,
+	mutex_lock(&qpinfo->mtx);
-		buffer, rounded_pos / block_size, rounded_len / block_size,
+	int ret = nvme_ns_read(handle->info->ns, qpinfo->qpair, buffer,
-		(nvme_cmd_cb)disk_read_callback, &status, 0);
+		rounded_pos / block_size, rounded_len / block_size,
 		(nvme_cmd_cb)disk_io_callback, &status, 0);
 	mutex_unlock(&qpinfo->mtx);
 	if (ret != 0)
 		return ret;
 	while (status == EINPROGRESS) {
-		nvme_ioqp_poll(handle->info->qpair, 1);
+		// nvme_ioqp_poll uses locking internally on the entire device,
-		snooze(5);
+		// not just this qpair, so it is entirely possible that it could
 		// return 0 (i.e. no completions processed) and yet our status
 		// changed, because some other thread processed the completion
 		// before we got to it. So, recheck it before sleeping.
 		if (nvme_ioqp_poll(qpinfo->qpair, 0) == 0 && status == EINPROGRESS)
 			snooze(5);
 	}
 	if (status != B_OK)