From 2bd1323ab88a8f1d422960ac89b39f32eeef38b6 Mon Sep 17 00:00:00 2001 From: Augustin Cavalier Date: Wed, 17 Apr 2019 23:15:06 -0400 Subject: [PATCH] nvme_disk: Respect the maximum transfer size by segmenting I/O. On VirtualBox, the reported maximum transfer size is 2MB; but all I/O >= 753KB fails in an identical way to the "maximum size" failures. In other news, there are multiple open tickets in the VirtualBox tracker about Linux systems failing to boot off NVMe... I tested with a hacked-in maximum segment size of 752KB and everything seemed to work just fine. Writing a HPKG to a FAT partition on NVMe, rebooting, and then reading back the HPKG showed it had the same sha256sum as the one stored in /system did. So this is working! --- .../kernel/drivers/disk/nvme/nvme_disk.cpp | 80 ++++++++++++++++--- 1 file changed, 69 insertions(+), 11 deletions(-) diff --git a/src/add-ons/kernel/drivers/disk/nvme/nvme_disk.cpp b/src/add-ons/kernel/drivers/disk/nvme/nvme_disk.cpp index 8a61a688a4..9a69cb8346 100644 --- a/src/add-ons/kernel/drivers/disk/nvme/nvme_disk.cpp +++ b/src/add-ons/kernel/drivers/disk/nvme/nvme_disk.cpp @@ -10,6 +10,7 @@ #include #include +#include #include #include #include @@ -78,6 +79,7 @@ typedef struct { uint64 capacity; uint32 block_size; + size_t max_transfer_size; status_t media_status; struct qpair_info { @@ -195,6 +197,8 @@ nvme_disk_init_device(void* _info, void** _cookie) } TRACE_ALWAYS("attached to NVMe device \"%s (%s)\"\n", cstat.mn, cstat.sn); + TRACE_ALWAYS("\tmaximum transfer size: %" B_PRIuSIZE "\n", cstat.max_xfer_size); + TRACE_ALWAYS("\tqpair count: %d\n", cstat.io_qpairs); // TODO: export more than just the first namespace! info->ns = nvme_ns_open(info->ctrlr, cstat.ns_ids[0]); @@ -212,6 +216,8 @@ nvme_disk_init_device(void* _info, void** _cookie) // store capacity information nvme_disk_set_capacity(info, nsstat.sectors, nsstat.sector_size); + info->max_transfer_size = ROUNDDOWN(cstat.max_xfer_size, + nsstat.sector_size); TRACE("capacity: %" B_PRIu64 ", block_size %" B_PRIu32 "\n", info->capacity, info->block_size); @@ -350,6 +356,42 @@ do_nvme_io(nvme_disk_driver_info* info, off_t rounded_pos, void* buffer, } +static status_t +do_nvme_segmented_io(nvme_disk_driver_info* info, off_t rounded_pos, + void* buffer, size_t* rounded_len, bool write = false) +{ + // The max transfer size is already a multiple of the block size, + // so divide and iterate appropriately. In the case where the length + // is less than the maximum transfer size, we'll wind up with 0 in the + // division, and only one transfer to take care of. + const size_t max_xfer = info->max_transfer_size; + int32 transfers = *rounded_len / max_xfer; + if ((*rounded_len % max_xfer) != 0) + transfers++; + + size_t transferred = 0; + for (int32 i = 0; i < transfers; i++) { + size_t transfer_len = max_xfer; + // The last transfer will usually be smaller. + if (i == (transfers - 1)) + transfer_len = *rounded_len - transferred; + + status_t status = do_nvme_io(info, rounded_pos, buffer, + &transfer_len, write); + if (status != B_OK) { + *rounded_len = transferred; + return transferred > 0 ? (write ? B_PARTIAL_WRITE : B_PARTIAL_READ) + : status; + } + + transferred += transfer_len; + rounded_pos += transfer_len; + buffer = ((int8*)buffer) + transfer_len; + } + return B_OK; +} + + static status_t nvme_disk_read(void* cookie, off_t pos, void* buffer, size_t* length) { @@ -365,14 +407,20 @@ nvme_disk_read(void* cookie, off_t pos, void* buffer, size_t* length) || IS_USER_ADDRESS(buffer)) { void* bounceBuffer = malloc(rounded_len); MemoryDeleter _(bounceBuffer); - if (bounceBuffer == NULL) + if (bounceBuffer == NULL) { + *length = 0; return B_NO_MEMORY; + } status_t status = nvme_disk_read(cookie, rounded_pos, bounceBuffer, &rounded_len); if (status != B_OK) { - *length = 0; - return status; + // The "rounded_len" will be the actual transferred length, but + // of course it will contain the padding. + *length = std::min(*length, (size_t)std::max((off_t)0, + rounded_len - (pos - rounded_pos))); + if (*length == 0) + return status; } void* offsetBuffer = ((int8*)bounceBuffer) + (pos - rounded_pos); @@ -385,7 +433,7 @@ nvme_disk_read(void* cookie, off_t pos, void* buffer, size_t* length) // If we got here, that means the arguments are already rounded to LBAs, // so just do the I/O directly. - return do_nvme_io(handle->info, pos, buffer, length); + return do_nvme_segmented_io(handle->info, pos, buffer, length); } @@ -402,8 +450,10 @@ nvme_disk_write(void* cookie, off_t pos, const void* buffer, size_t* length) || IS_USER_ADDRESS(buffer)) { void* bounceBuffer = malloc(rounded_len); MemoryDeleter _(bounceBuffer); - if (bounceBuffer == NULL) + if (bounceBuffer == NULL) { + *length = 0; return B_NO_MEMORY; + } // Since we rounded, we need to read in the first and last logical // blocks before we copy our information to the bounce buffer. @@ -411,14 +461,18 @@ nvme_disk_write(void* cookie, off_t pos, const void* buffer, size_t* length) size_t readlen = block_size; status_t status = do_nvme_io(handle->info, rounded_pos, bounceBuffer, &readlen); - if (status != B_OK) + if (status != B_OK) { + *length = 0; return status; + } if (rounded_len > block_size) { off_t offset = rounded_len - block_size; status = do_nvme_io(handle->info, rounded_pos + offset, ((int8*)bounceBuffer) + offset, &readlen); - if (status != B_OK) + if (status != B_OK) { + *length = 0; return status; + } } void* offsetBuffer = ((int8*)bounceBuffer) + (pos - rounded_pos); @@ -426,19 +480,23 @@ nvme_disk_write(void* cookie, off_t pos, const void* buffer, size_t* length) status = user_memcpy(offsetBuffer, buffer, *length); else memcpy(offsetBuffer, buffer, *length); - if (status != B_OK) + if (status != B_OK) { + *length = 0; return status; + } status = nvme_disk_write(cookie, rounded_pos, bounceBuffer, &rounded_len); - if (status != B_OK) - *length = 0; + if (status != B_OK) { + *length = std::min(*length, (size_t)std::max((off_t)0, + rounded_len - (pos - rounded_pos))); + } return status; } // If we got here, that means the arguments are already rounded to LBAs, // so just do the I/O directly. - return do_nvme_io(handle->info, pos, (void*)buffer, length, true); + return do_nvme_segmented_io(handle->info, pos, (void*)buffer, length, true); }