From 99086aa32333be5792b0f059ccbbe3c90be05ea6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Axel=20D=C3=B6rfler?= Date: Thu, 15 Aug 2013 01:34:59 +0200 Subject: [PATCH] trim: Target SCSI UNMAP command instead of WRITE SAME. * The UNMAP command is theoretically much faster, as it can get many block ranges instead of just a single range. * Furthermore, the ATA TRIM command resembles it much better. * Therefore, fs_trim_data now gets an array of ranges, and we use SCSI UNMAP to trim. * Updated BFS code to collect array ranges to fully support the new fs_trim_data possibilities. --- headers/os/drivers/Drivers.h | 9 ++- headers/private/drivers/scsi_cmds.h | 37 ++++++++- headers/private/drivers/scsi_periph.h | 6 +- headers/private/fs_shell/fssh_drivers.h | 9 ++- headers/private/kernel/util/fs_trim_support.h | 48 ++++++++++++ .../drivers/disk/scsi/scsi_disk/scsi_disk.cpp | 30 +++++--- .../file_systems/bfs/BlockAllocator.cpp | 75 +++++++++++++------ .../kernel/file_systems/bfs/BlockAllocator.h | 10 ++- .../file_systems/bfs/kernel_interface.cpp | 29 +++++-- .../kernel/generic/scsi_periph/Jamfile | 2 +- .../kernel/generic/scsi_periph/block.cpp | 49 ++++++++++-- .../generic/scsi_periph/scsi_periph_int.h | 2 +- src/bin/fstrim.cpp | 7 +- 13 files changed, 249 insertions(+), 64 deletions(-) create mode 100644 headers/private/kernel/util/fs_trim_support.h diff --git a/headers/os/drivers/Drivers.h b/headers/os/drivers/Drivers.h index ff4e1e0071..a854305b38 100644 --- a/headers/os/drivers/Drivers.h +++ b/headers/os/drivers/Drivers.h @@ -172,9 +172,12 @@ typedef struct { /* B_TRIM_DEVICE data structure */ typedef struct { - off_t offset; /* offset (in bytes) */ - off_t size; - off_t trimmed_size; /* filled on return */ + uint32 range_count; + uint64 trimmed_size; /* filled on return */ + struct range { + uint64 offset; /* offset (in bytes) */ + uint64 size; + } ranges[1]; } fs_trim_data; diff --git a/headers/private/drivers/scsi_cmds.h b/headers/private/drivers/scsi_cmds.h index ea4523302c..732583d9ba 100644 --- a/headers/private/drivers/scsi_cmds.h +++ b/headers/private/drivers/scsi_cmds.h @@ -1,5 +1,5 @@ /* - * Copyright 2004-2010, Haiku, Inc. All RightsReserved. + * Copyright 2004-2013, Haiku, Inc. All RightsReserved. * Copyright 2002/03, Thomas Kurschel. All rights reserved. * * Distributed under the terms of the MIT License. @@ -185,6 +185,8 @@ #define SCSI_OP_WRITE_BUFFER 0x3b #define SCSI_OP_READ_BUFFER 0x3c #define SCSI_OP_CHANGE_DEFINITION 0x40 +#define SCSI_OP_WRITE_SAME_10 0x41 +#define SCSI_OP_UNMAP 0x42 #define SCSI_OP_READ_SUB_CHANNEL 0x42 #define SCSI_OP_READ_TOC 0x43 #define SCSI_OP_PLAY_MSF 0x47 @@ -457,7 +459,38 @@ typedef struct scsi_cmd_wsame_16 { ); uint8 control; } _PACKED scsi_cmd_wsame_16; - + + +// UNMAP + +typedef struct scsi_cmd_unmap { + uint8 opcode; + LBITFIELD8_2( + anchor : 1, + _reserved1_7 : 7 + ); + uint32 _reserved1; + LBITFIELD8_2( + group_number : 5, + _reserved5_7 : 3 + ); + uint16 length; + uint8 control; +} _PACKED scsi_cmd_unmap; + +struct scsi_unmap_block_descriptor { + uint64 lba; + uint32 block_count; + uint32 _reserved1; +} _PACKED; + +struct scsi_unmap_parameter_list { + uint16 data_length; + uint16 block_data_length; + uint32 _reserved1; + struct scsi_unmap_block_descriptor blocks[1]; +} _PACKED; + // REQUEST SENSE diff --git a/headers/private/drivers/scsi_periph.h b/headers/private/drivers/scsi_periph.h index b8795213c9..9be19ab86f 100644 --- a/headers/private/drivers/scsi_periph.h +++ b/headers/private/drivers/scsi_periph.h @@ -72,6 +72,10 @@ typedef struct scsi_periph_callbacks { void (*media_changed)(periph_device_cookie cookie, scsi_ccb *request); } scsi_periph_callbacks; +typedef struct scsi_block_range { + uint64 offset; + uint64 size; +} scsi_block_range; // functions provided by this module typedef struct scsi_periph_interface { @@ -119,7 +123,7 @@ typedef struct scsi_periph_interface { err_res (*synchronize_cache)(scsi_periph_device device, scsi_ccb *request); status_t (*trim_device)(scsi_periph_device_info *device, scsi_ccb *request, - uint64 offset, uint64 numBlocks); + scsi_block_range* ranges, uint32 rangeCount); // *** removable media *** // to be called when a medium change is detected to block subsequent commands diff --git a/headers/private/fs_shell/fssh_drivers.h b/headers/private/fs_shell/fssh_drivers.h index aa63da35d7..aa67e69c23 100644 --- a/headers/private/fs_shell/fssh_drivers.h +++ b/headers/private/fs_shell/fssh_drivers.h @@ -215,9 +215,12 @@ typedef struct { /* B_TRIM_DEVICE data structure */ typedef struct { - fssh_off_t offset; /* offset (in bytes) */ - fssh_off_t size; - fssh_off_t trimmed_size; /* filled on return */ + uint32_t range_count; + uint64_t trimmed_size; /* filled on return */ + struct range { + uint64_t offset; /* offset (in bytes) */ + uint64_t size; + } ranges[1]; } fssh_fs_trim_data; diff --git a/headers/private/kernel/util/fs_trim_support.h b/headers/private/kernel/util/fs_trim_support.h new file mode 100644 index 0000000000..a9f993c7b6 --- /dev/null +++ b/headers/private/kernel/util/fs_trim_support.h @@ -0,0 +1,48 @@ +/* + * Copyright 2013, Axel Dörfler, axeld@pinc-software.de. + * Distributed under the terms of the MIT license. + */ +#ifndef _FS_TRIM_SUPPORT_H +#define _FS_TRIM_SUPPORT_H + + +#include + +#include + + +static inline status_t +copy_trim_data_from_user(void* buffer, size_t size, fs_trim_data*& _trimData) +{ + if (!IS_USER_ADDRESS(buffer)) + return B_BAD_ADDRESS; + + uint32 count; + if (user_memcpy(&count, buffer, sizeof(count)) != B_OK) + return B_BAD_ADDRESS; + + size_t bytes = (count - 1) * sizeof(uint64) * 2 + sizeof(fs_trim_data); + if (bytes > size) + return B_BAD_VALUE; + + void* trimBuffer = malloc(bytes); + if (trimBuffer == NULL) + return B_NO_MEMORY; + + if (user_memcpy(trimBuffer, buffer, bytes) != B_OK) + return B_BAD_ADDRESS; + + _trimData = (fs_trim_data*)trimBuffer; + return B_OK; +} + + +static inline status_t +copy_trim_data_to_user(void* buffer, fs_trim_data* trimData) +{ + // Do not copy any ranges + return user_memcpy(buffer, trimData, sizeof(uint64) * 2); +} + + +#endif // _FS_TRIM_SUPPORT_H diff --git a/src/add-ons/kernel/drivers/disk/scsi/scsi_disk/scsi_disk.cpp b/src/add-ons/kernel/drivers/disk/scsi/scsi_disk/scsi_disk.cpp index c56639617f..f445764dd1 100644 --- a/src/add-ons/kernel/drivers/disk/scsi/scsi_disk/scsi_disk.cpp +++ b/src/add-ons/kernel/drivers/disk/scsi/scsi_disk/scsi_disk.cpp @@ -21,7 +21,10 @@ #include #include +#include + #include +#include #include "dma_resources.h" #include "IORequest.h" @@ -154,7 +157,7 @@ synchronize_cache(das_driver_info *device) static status_t -trim_device(das_driver_info* device, off_t offset, off_t size) +trim_device(das_driver_info* device, fs_trim_data* trimData) { TRACE("trim_device()\n"); @@ -162,10 +165,17 @@ trim_device(das_driver_info* device, off_t offset, off_t size) if (request == NULL) return B_NO_MEMORY; + uint64 trimmedSize = 0; + for (uint32 i = 0; i < trimData->range_count; i++) { + trimmedSize += trimData->ranges[i].size; + } status_t status = sSCSIPeripheral->trim_device(device->scsi_periph_device, - request, offset / device->block_size, size / device->block_size); + request, (scsi_block_range*)&trimData->ranges[0], + trimData->range_count); device->scsi->free_ccb(request); + if (status == B_OK) + trimData->trimmed_size = trimmedSize; return status; } @@ -405,17 +415,19 @@ das_ioctl(void* cookie, uint32 op, void* buffer, size_t length) case B_TRIM_DEVICE: { - fs_trim_data trimData; - if (user_memcpy(&trimData, buffer, sizeof(fs_trim_data)) != B_OK) - return B_BAD_ADDRESS; - - status_t status = trim_device(info, trimData.offset, trimData.size); + fs_trim_data* trimData; + status_t status = copy_trim_data_from_user(buffer, length, + trimData); if (status != B_OK) return status; - trimData.trimmed_size = trimData.size; + MemoryDeleter deleter(trimData); - return user_memcpy(buffer, &trimData, sizeof(fs_trim_data)); + status = trim_device(info, trimData); + if (status != B_OK) + return status; + + return copy_trim_data_to_user(buffer, trimData); } default: diff --git a/src/add-ons/kernel/file_systems/bfs/BlockAllocator.cpp b/src/add-ons/kernel/file_systems/bfs/BlockAllocator.cpp index 432df6c963..31dd75380e 100644 --- a/src/add-ons/kernel/file_systems/bfs/BlockAllocator.cpp +++ b/src/add-ons/kernel/file_systems/bfs/BlockAllocator.cpp @@ -1191,19 +1191,28 @@ BlockAllocator::_CheckGroup(int32 groupIndex) const status_t -BlockAllocator::Trim(off_t offset, off_t size, off_t& trimmedSize) +BlockAllocator::Trim(uint64 offset, uint64 size, uint64& trimmedSize) { + const uint32 kTrimRanges = 128; + fs_trim_data* trimData = (fs_trim_data*)malloc(sizeof(fs_trim_data) + + sizeof(uint64) * kTrimRanges); + if (trimData == NULL) + return B_NO_MEMORY; + + MemoryDeleter deleter(trimData); RecursiveLocker locker(fLock); // TODO: take given offset and size into account! int32 lastGroup = fNumGroups - 1; uint32 firstBlock = 0; uint32 firstBit = 0; - off_t currentBlock = 0; + uint64 currentBlock = 0; uint32 blockShift = fVolume->BlockShift(); - off_t firstFree = -1; + uint64 firstFree = 0; size_t freeLength = 0; + + trimData->range_count = 0; trimmedSize = 0; AllocationBlock cached(fVolume); @@ -1217,8 +1226,9 @@ BlockAllocator::Trim(off_t offset, off_t size, off_t& trimmedSize) if (cached.IsUsed(i)) { // Block is in use if (freeLength > 0) { - status_t status = _TrimNext(firstFree << blockShift, - freeLength << blockShift, trimmedSize); + status_t status = _TrimNext(*trimData, kTrimRanges, + firstFree << blockShift, freeLength << blockShift, + false, trimmedSize); if (status != B_OK) return status; @@ -1237,12 +1247,8 @@ BlockAllocator::Trim(off_t offset, off_t size, off_t& trimmedSize) firstBit = 0; } - if (freeLength > 0) { - return _TrimNext(firstFree << blockShift, freeLength << blockShift, - trimmedSize); - } - - return B_OK; + return _TrimNext(*trimData, kTrimRanges, firstFree << blockShift, + freeLength << blockShift, true, trimmedSize); } @@ -2181,21 +2187,44 @@ BlockAllocator::_AddInodeToIndex(Inode* inode) status_t -BlockAllocator::_TrimNext(off_t offset, off_t size, off_t& trimmedSize) +BlockAllocator::_AddTrim(fs_trim_data& trimData, uint32 maxRanges, + uint64 offset, uint64 size) { - PRINT(("_TrimNext(offset %lld, size %lld)\n", offset, size)); - - fs_trim_data trimData; - trimData.offset = offset; - trimData.size = size; - trimData.trimmed_size = 0; - - if (ioctl(fVolume->Device(), B_TRIM_DEVICE, &trimData, - sizeof(fs_trim_data)) != 0) { - return errno; + if (trimData.range_count < maxRanges && size > 0) { + trimData.ranges[trimData.range_count].offset = offset; + trimData.ranges[trimData.range_count].size = size; + trimData.range_count++; + return true; } - trimmedSize += trimData.trimmed_size; + return false; +} + + +status_t +BlockAllocator::_TrimNext(fs_trim_data& trimData, uint32 maxRanges, + uint64 offset, uint64 size, bool force, uint64& trimmedSize) +{ + PRINT(("_TrimNext(index %" B_PRIu32 ", offset %" B_PRIu64 ", size %" + B_PRIu64 ")\n", trimData.range_count, offset, size)); + + bool pushed = _AddTrim(trimData, maxRanges, offset, size); + + if (!pushed || force) { + // Trim now + trimData.trimmed_size = 0; + if (ioctl(fVolume->Device(), B_TRIM_DEVICE, &trimData, + sizeof(fs_trim_data)) != 0) { + return errno; + } + + trimmedSize += trimData.trimmed_size; + trimData.range_count = 0; + } + + if (!pushed) + _AddTrim(trimData, maxRanges, offset, size); + return B_OK; } diff --git a/src/add-ons/kernel/file_systems/bfs/BlockAllocator.h b/src/add-ons/kernel/file_systems/bfs/BlockAllocator.h index 88b9d525d1..d3da8672f5 100644 --- a/src/add-ons/kernel/file_systems/bfs/BlockAllocator.h +++ b/src/add-ons/kernel/file_systems/bfs/BlockAllocator.h @@ -46,7 +46,8 @@ public: int32 group, uint16 start, uint16 numBlocks, uint16 minimum, block_run& run); - status_t Trim(off_t offset, off_t size, off_t& trimmedSize); + status_t Trim(uint64 offset, uint64 size, + uint64& trimmedSize); status_t StartChecking(const check_control* control); status_t StopChecking(check_control* control); @@ -83,8 +84,11 @@ private: void _FreeIndices(); status_t _AddInodeToIndex(Inode* inode); status_t _WriteBackCheckBitmap(); - status_t _TrimNext(off_t offset, off_t size, - off_t& trimmedSize); + status_t _AddTrim(fs_trim_data& trimData, uint32 maxRanges, + uint64 offset, uint64 size); + status_t _TrimNext(fs_trim_data& trimData, uint32 maxRanges, + uint64 offset, uint64 size, bool force, + uint64& trimmedSize); static status_t _Initialize(BlockAllocator* self); diff --git a/src/add-ons/kernel/file_systems/bfs/kernel_interface.cpp b/src/add-ons/kernel/file_systems/bfs/kernel_interface.cpp index 11400033ca..ab68d576c9 100644 --- a/src/add-ons/kernel/file_systems/bfs/kernel_interface.cpp +++ b/src/add-ons/kernel/file_systems/bfs/kernel_interface.cpp @@ -20,8 +20,10 @@ // TODO: temporary solution as long as there is no public I/O requests API #ifndef BFS_SHELL # include +# include #endif + #define BFS_IO_SIZE 65536 @@ -623,19 +625,32 @@ bfs_ioctl(fs_volume* _volume, fs_vnode* _node, void* _cookie, uint32 cmd, Volume* volume = (Volume*)_volume->private_volume; switch (cmd) { +#ifndef BFS_SHELL case B_TRIM_DEVICE: { - fs_trim_data trimData; - if (user_memcpy(&trimData, buffer, sizeof(fs_trim_data)) != B_OK) - return B_BAD_ADDRESS; - - status_t status = volume->Allocator().Trim(trimData.offset, - trimData.size, trimData.trimmed_size); + fs_trim_data* trimData; + status_t status = copy_trim_data_from_user(buffer, bufferLength, + trimData); if (status != B_OK) return status; - return user_memcpy(buffer, &trimData, sizeof(fs_trim_data)); + MemoryDeleter deleter(trimData); + trimData->trimmed_size = 0; + + for (uint32 i = 0; i < trimData->range_count; i++) { + uint64 trimmedSize = 0; + status_t status = volume->Allocator().Trim( + trimData->ranges[i].offset, trimData->ranges[i].size, + trimmedSize); + if (status != B_OK) + return status; + + trimData->trimmed_size += trimmedSize; + } + + return copy_trim_data_to_user(buffer, trimData); } +#endif case BFS_IOCTL_VERSION: { diff --git a/src/add-ons/kernel/generic/scsi_periph/Jamfile b/src/add-ons/kernel/generic/scsi_periph/Jamfile index 2aaa529b35..5fd764c5bf 100644 --- a/src/add-ons/kernel/generic/scsi_periph/Jamfile +++ b/src/add-ons/kernel/generic/scsi_periph/Jamfile @@ -1,6 +1,6 @@ SubDir HAIKU_TOP src add-ons kernel generic scsi_periph ; -UsePrivateHeaders drivers kernel ; +UsePrivateHeaders drivers kernel shared ; SubDirHdrs $(HAIKU_TOP) src system kernel device_manager ; # disable debug output, if debugging is disabled diff --git a/src/add-ons/kernel/generic/scsi_periph/block.cpp b/src/add-ons/kernel/generic/scsi_periph/block.cpp index 0c6eb0d8ad..7f6cfcd77a 100644 --- a/src/add-ons/kernel/generic/scsi_periph/block.cpp +++ b/src/add-ons/kernel/generic/scsi_periph/block.cpp @@ -9,9 +9,12 @@ //! Handling of block device -#include "scsi_periph_int.h" #include +#include + +#include "scsi_periph_int.h" + status_t periph_check_capacity(scsi_periph_device_info *device, scsi_ccb *request) @@ -117,23 +120,53 @@ periph_check_capacity(scsi_periph_device_info *device, scsi_ccb *request) status_t periph_trim_device(scsi_periph_device_info *device, scsi_ccb *request, - uint64 offset, uint64 numBlocks) + scsi_block_range* ranges, uint32 rangeCount) { err_res res; int retries = 0; + size_t unmapBlockSize = (rangeCount - 1) + * sizeof(scsi_unmap_block_descriptor) + + sizeof(scsi_unmap_parameter_list); + + // TODO: check block limits VPD page + // TODO: instead of failing, we should try to complete the request in + // several passes. + if (unmapBlockSize > 65536 || rangeCount == 0) + return B_BAD_VALUE; + + scsi_unmap_parameter_list* unmapBlocks + = (scsi_unmap_parameter_list*)malloc(unmapBlockSize); + if (unmapBlocks == NULL) + return B_NO_MEMORY; + + MemoryDeleter deleter(unmapBlocks); + + // Prepare request data + memset(unmapBlocks, 0, unmapBlockSize); + unmapBlocks->data_length = B_HOST_TO_BENDIAN_INT16(unmapBlockSize - 1); + unmapBlocks->data_length = B_HOST_TO_BENDIAN_INT16(unmapBlockSize - 3); + + for (uint32 i = 0; i < rangeCount; i++) { + unmapBlocks->blocks[i].lba = B_HOST_TO_BENDIAN_INT64( + ranges[i].offset / device->block_size); + unmapBlocks->blocks[i].block_count = B_HOST_TO_BENDIAN_INT32( + ranges[i].size / device->block_size); + } + do { request->flags = SCSI_DIR_OUT; - request->sort = offset; + request->sort = ranges[0].offset / device->block_size; request->timeout = device->std_timeout; - scsi_cmd_wsame_16* cmd = (scsi_cmd_wsame_16*)request->cdb; + scsi_cmd_unmap* cmd = (scsi_cmd_unmap*)request->cdb; memset(cmd, 0, sizeof(*cmd)); - cmd->opcode = SCSI_OP_WRITE_SAME_16; - cmd->unmap = 1; - cmd->lba = B_HOST_TO_BENDIAN_INT64(offset); - cmd->length = B_HOST_TO_BENDIAN_INT32(numBlocks); + cmd->opcode = SCSI_OP_UNMAP; + cmd->length = B_HOST_TO_BENDIAN_INT16(unmapBlockSize); + + request->data = (uint8*)unmapBlocks; + request->data_length = unmapBlockSize; request->cdb_length = sizeof(*cmd); diff --git a/src/add-ons/kernel/generic/scsi_periph/scsi_periph_int.h b/src/add-ons/kernel/generic/scsi_periph/scsi_periph_int.h index 30df67ab4b..af032b45b9 100644 --- a/src/add-ons/kernel/generic/scsi_periph/scsi_periph_int.h +++ b/src/add-ons/kernel/generic/scsi_periph/scsi_periph_int.h @@ -78,7 +78,7 @@ status_t periph_handle_free(scsi_periph_handle_info *handle); status_t periph_check_capacity(scsi_periph_device_info *device, scsi_ccb *ccb); status_t periph_trim_device(scsi_periph_device_info *device, scsi_ccb *request, - uint64 offset, uint64 numBlocks); + scsi_block_range* ranges, uint32 rangeCount); // device.c diff --git a/src/bin/fstrim.cpp b/src/bin/fstrim.cpp index d5dda26dfe..cc0b47f91a 100644 --- a/src/bin/fstrim.cpp +++ b/src/bin/fstrim.cpp @@ -7,7 +7,7 @@ #include #include #include -#include +#include #include #include #include @@ -66,8 +66,9 @@ main(int argc, char** argv) FileDescriptorCloser closer(fd); fs_trim_data trimData; - trimData.offset = 0; - trimData.size = OFF_MAX; + trimData.range_count = 1; + trimData.ranges[0].offset = 0; + trimData.ranges[0].size = UINT64_MAX; trimData.trimmed_size = 0; if (ioctl(fd, B_TRIM_DEVICE, &trimData, sizeof(fs_trim_data)) != 0) {