nvme_disk: Allocate up to 8 qpairs and round-robin them for I/O.

Before this commit, only one qpair was allocated and it was
locked for the duration of the transfer. Now we allocate more
qpairs if the device supports it and lock only when actually
calling read(), enabling multiple reads to both be queued
and execute simultaneously.

This is probably a significant performance improvement,
even more so than it is/would be for SCSI, as SSDs
can actually read from as many sectors as bandwidth
allows at once.
This commit is contained in:
Augustin Cavalier 2019-04-17 19:44:23 -04:00
parent 16525505c6
commit 872c8209b0

View File

@ -27,6 +27,7 @@ extern "C" {
#else #else
# define TRACE(x...) ; # define TRACE(x...) ;
#endif #endif
#define TRACE_ALWAYS(x...) dprintf("nvme_disk: " x)
#define TRACE_ERROR(x...) dprintf("\33[33mnvme_disk:\33[0m " x) #define TRACE_ERROR(x...) dprintf("\33[33mnvme_disk:\33[0m " x)
#define CALLED() TRACE("CALLED %s\n", __PRETTY_FUNCTION__) #define CALLED() TRACE("CALLED %s\n", __PRETTY_FUNCTION__)
@ -62,6 +63,8 @@ static const uint8 kDriveIcon[] = {
#define NVME_DISK_DEVICE_MODULE_NAME "drivers/disk/nvme_disk/device_v1" #define NVME_DISK_DEVICE_MODULE_NAME "drivers/disk/nvme_disk/device_v1"
#define NVME_DISK_DEVICE_ID_GENERATOR "nvme_disk/device_id" #define NVME_DISK_DEVICE_ID_GENERATOR "nvme_disk/device_id"
#define NVME_MAX_QPAIRS (8)
static device_manager_info* sDeviceManager; static device_manager_info* sDeviceManager;
@ -76,9 +79,14 @@ typedef struct {
uint32 block_size; uint32 block_size;
status_t media_status; status_t media_status;
struct nvme_qpair* qpair; struct qpair_info {
mutex qpair_mtx; struct nvme_qpair* qpair;
mutex mtx;
} qpairs[NVME_MAX_QPAIRS];
uint32 qpair_count;
uint32 next_qpair;
} nvme_disk_driver_info; } nvme_disk_driver_info;
typedef nvme_disk_driver_info::qpair_info qpair_info;
typedef struct { typedef struct {
@ -185,6 +193,8 @@ nvme_disk_init_device(void* _info, void** _cookie)
return err; return err;
} }
TRACE_ALWAYS("attached to NVMe device \"%s (%s)\n", cstat.mn, cstat.sn);
// TODO: export more than just the first namespace! // TODO: export more than just the first namespace!
info->ns = nvme_ns_open(info->ctrlr, cstat.ns_ids[0]); info->ns = nvme_ns_open(info->ctrlr, cstat.ns_ids[0]);
if (info->ns == NULL) { if (info->ns == NULL) {
@ -205,15 +215,21 @@ nvme_disk_init_device(void* _info, void** _cookie)
TRACE("capacity: %" B_PRIu64 ", block_size %" B_PRIu32 "\n", TRACE("capacity: %" B_PRIu64 ", block_size %" B_PRIu32 "\n",
info->capacity, info->block_size); info->capacity, info->block_size);
// allocate a qpair // allocate qpairs
// TODO: allocate more than one qpair info->qpair_count = info->next_qpair = 0;
info->qpair = nvme_ioqp_get(info->ctrlr, (enum nvme_qprio)0, 0); for (uint32 i = 0; i < NVME_MAX_QPAIRS && i < cstat.io_qpairs; i++) {
if (info->qpair == NULL) { info->qpairs[i].qpair = nvme_ioqp_get(info->ctrlr,
TRACE_ERROR("failed to allocate qpair!\n"); (enum nvme_qprio)0, 0);
return B_ERROR; if (info->qpairs[i].qpair == NULL)
} break;
mutex_init(&info->qpair_mtx, "qpair mtx"); mutex_init(&info->qpairs[i].mtx, "qpair mutex");
info->qpair_count++;
}
if (info->qpair_count == 0) {
TRACE_ERROR("failed to allocate qpairs!\n");
return B_NO_MEMORY;
}
*_cookie = info; *_cookie = info;
return B_OK; return B_OK;
@ -270,8 +286,16 @@ nvme_disk_free(void* cookie)
// #pragma mark - I/O functions // #pragma mark - I/O functions
static qpair_info*
get_next_qpair(nvme_disk_driver_info* info)
{
return &info->qpairs[atomic_add((int32*)&info->next_qpair, 1)
% info->qpair_count];
}
static void static void
disk_read_callback(status_t* status, const struct nvme_cpl* cpl) disk_io_callback(status_t* status, const struct nvme_cpl* cpl)
{ {
*status = nvme_cpl_is_error(cpl) ? B_IO_ERROR : B_OK; *status = nvme_cpl_is_error(cpl) ? B_IO_ERROR : B_OK;
} }
@ -313,16 +337,23 @@ nvme_disk_read(void* cookie, off_t pos, void* buffer, size_t* length)
} }
// Actually perform the read. // Actually perform the read.
MutexLocker _(handle->info->qpair_mtx); qpair_info* qpinfo = get_next_qpair(handle->info);
int ret = nvme_ns_read(handle->info->ns, handle->info->qpair, mutex_lock(&qpinfo->mtx);
buffer, rounded_pos / block_size, rounded_len / block_size, int ret = nvme_ns_read(handle->info->ns, qpinfo->qpair, buffer,
(nvme_cmd_cb)disk_read_callback, &status, 0); rounded_pos / block_size, rounded_len / block_size,
(nvme_cmd_cb)disk_io_callback, &status, 0);
mutex_unlock(&qpinfo->mtx);
if (ret != 0) if (ret != 0)
return ret; return ret;
while (status == EINPROGRESS) { while (status == EINPROGRESS) {
nvme_ioqp_poll(handle->info->qpair, 1); // nvme_ioqp_poll uses locking internally on the entire device,
snooze(5); // not just this qpair, so it is entirely possible that it could
// return 0 (i.e. no completions processed) and yet our status
// changed, because some other thread processed the completion
// before we got to it. So, recheck it before sleeping.
if (nvme_ioqp_poll(qpinfo->qpair, 0) == 0 && status == EINPROGRESS)
snooze(5);
} }
if (status != B_OK) if (status != B_OK)