trim: Target SCSI UNMAP command instead of WRITE SAME.

* The UNMAP command is theoretically much faster, as it can get many block
  ranges instead of just a single range.
* Furthermore, the ATA TRIM command resembles it much better.
* Therefore, fs_trim_data now gets an array of ranges, and we use SCSI UNMAP
  to trim.
* Updated BFS code to collect array ranges to fully support the new
  fs_trim_data possibilities.
This commit is contained in:
Axel Dörfler 2013-08-15 01:34:59 +02:00
parent 960c56aea5
commit 99086aa323
13 changed files with 249 additions and 64 deletions

View File

@ -172,9 +172,12 @@ typedef struct {
/* B_TRIM_DEVICE data structure */
typedef struct {
off_t offset; /* offset (in bytes) */
off_t size;
off_t trimmed_size; /* filled on return */
uint32 range_count;
uint64 trimmed_size; /* filled on return */
struct range {
uint64 offset; /* offset (in bytes) */
uint64 size;
} ranges[1];
} fs_trim_data;

View File

@ -1,5 +1,5 @@
/*
* Copyright 2004-2010, Haiku, Inc. All RightsReserved.
* Copyright 2004-2013, Haiku, Inc. All RightsReserved.
* Copyright 2002/03, Thomas Kurschel. All rights reserved.
*
* Distributed under the terms of the MIT License.
@ -185,6 +185,8 @@
#define SCSI_OP_WRITE_BUFFER 0x3b
#define SCSI_OP_READ_BUFFER 0x3c
#define SCSI_OP_CHANGE_DEFINITION 0x40
#define SCSI_OP_WRITE_SAME_10 0x41
#define SCSI_OP_UNMAP 0x42
#define SCSI_OP_READ_SUB_CHANNEL 0x42
#define SCSI_OP_READ_TOC 0x43
#define SCSI_OP_PLAY_MSF 0x47
@ -457,7 +459,38 @@ typedef struct scsi_cmd_wsame_16 {
);
uint8 control;
} _PACKED scsi_cmd_wsame_16;
// UNMAP
typedef struct scsi_cmd_unmap {
uint8 opcode;
LBITFIELD8_2(
anchor : 1,
_reserved1_7 : 7
);
uint32 _reserved1;
LBITFIELD8_2(
group_number : 5,
_reserved5_7 : 3
);
uint16 length;
uint8 control;
} _PACKED scsi_cmd_unmap;
struct scsi_unmap_block_descriptor {
uint64 lba;
uint32 block_count;
uint32 _reserved1;
} _PACKED;
struct scsi_unmap_parameter_list {
uint16 data_length;
uint16 block_data_length;
uint32 _reserved1;
struct scsi_unmap_block_descriptor blocks[1];
} _PACKED;
// REQUEST SENSE

View File

@ -72,6 +72,10 @@ typedef struct scsi_periph_callbacks {
void (*media_changed)(periph_device_cookie cookie, scsi_ccb *request);
} scsi_periph_callbacks;
typedef struct scsi_block_range {
uint64 offset;
uint64 size;
} scsi_block_range;
// functions provided by this module
typedef struct scsi_periph_interface {
@ -119,7 +123,7 @@ typedef struct scsi_periph_interface {
err_res (*synchronize_cache)(scsi_periph_device device, scsi_ccb *request);
status_t (*trim_device)(scsi_periph_device_info *device, scsi_ccb *request,
uint64 offset, uint64 numBlocks);
scsi_block_range* ranges, uint32 rangeCount);
// *** removable media ***
// to be called when a medium change is detected to block subsequent commands

View File

@ -215,9 +215,12 @@ typedef struct {
/* B_TRIM_DEVICE data structure */
typedef struct {
fssh_off_t offset; /* offset (in bytes) */
fssh_off_t size;
fssh_off_t trimmed_size; /* filled on return */
uint32_t range_count;
uint64_t trimmed_size; /* filled on return */
struct range {
uint64_t offset; /* offset (in bytes) */
uint64_t size;
} ranges[1];
} fssh_fs_trim_data;

View File

@ -0,0 +1,48 @@
/*
* Copyright 2013, Axel Dörfler, axeld@pinc-software.de.
* Distributed under the terms of the MIT license.
*/
#ifndef _FS_TRIM_SUPPORT_H
#define _FS_TRIM_SUPPORT_H
#include <Drivers.h>
#include <kernel.h>
static inline status_t
copy_trim_data_from_user(void* buffer, size_t size, fs_trim_data*& _trimData)
{
if (!IS_USER_ADDRESS(buffer))
return B_BAD_ADDRESS;
uint32 count;
if (user_memcpy(&count, buffer, sizeof(count)) != B_OK)
return B_BAD_ADDRESS;
size_t bytes = (count - 1) * sizeof(uint64) * 2 + sizeof(fs_trim_data);
if (bytes > size)
return B_BAD_VALUE;
void* trimBuffer = malloc(bytes);
if (trimBuffer == NULL)
return B_NO_MEMORY;
if (user_memcpy(trimBuffer, buffer, bytes) != B_OK)
return B_BAD_ADDRESS;
_trimData = (fs_trim_data*)trimBuffer;
return B_OK;
}
static inline status_t
copy_trim_data_to_user(void* buffer, fs_trim_data* trimData)
{
// Do not copy any ranges
return user_memcpy(buffer, trimData, sizeof(uint64) * 2);
}
#endif // _FS_TRIM_SUPPORT_H

View File

@ -21,7 +21,10 @@
#include <string.h>
#include <stdlib.h>
#include <AutoDeleter.h>
#include <fs/devfs.h>
#include <util/fs_trim_support.h>
#include "dma_resources.h"
#include "IORequest.h"
@ -154,7 +157,7 @@ synchronize_cache(das_driver_info *device)
static status_t
trim_device(das_driver_info* device, off_t offset, off_t size)
trim_device(das_driver_info* device, fs_trim_data* trimData)
{
TRACE("trim_device()\n");
@ -162,10 +165,17 @@ trim_device(das_driver_info* device, off_t offset, off_t size)
if (request == NULL)
return B_NO_MEMORY;
uint64 trimmedSize = 0;
for (uint32 i = 0; i < trimData->range_count; i++) {
trimmedSize += trimData->ranges[i].size;
}
status_t status = sSCSIPeripheral->trim_device(device->scsi_periph_device,
request, offset / device->block_size, size / device->block_size);
request, (scsi_block_range*)&trimData->ranges[0],
trimData->range_count);
device->scsi->free_ccb(request);
if (status == B_OK)
trimData->trimmed_size = trimmedSize;
return status;
}
@ -405,17 +415,19 @@ das_ioctl(void* cookie, uint32 op, void* buffer, size_t length)
case B_TRIM_DEVICE:
{
fs_trim_data trimData;
if (user_memcpy(&trimData, buffer, sizeof(fs_trim_data)) != B_OK)
return B_BAD_ADDRESS;
status_t status = trim_device(info, trimData.offset, trimData.size);
fs_trim_data* trimData;
status_t status = copy_trim_data_from_user(buffer, length,
trimData);
if (status != B_OK)
return status;
trimData.trimmed_size = trimData.size;
MemoryDeleter deleter(trimData);
return user_memcpy(buffer, &trimData, sizeof(fs_trim_data));
status = trim_device(info, trimData);
if (status != B_OK)
return status;
return copy_trim_data_to_user(buffer, trimData);
}
default:

View File

@ -1191,19 +1191,28 @@ BlockAllocator::_CheckGroup(int32 groupIndex) const
status_t
BlockAllocator::Trim(off_t offset, off_t size, off_t& trimmedSize)
BlockAllocator::Trim(uint64 offset, uint64 size, uint64& trimmedSize)
{
const uint32 kTrimRanges = 128;
fs_trim_data* trimData = (fs_trim_data*)malloc(sizeof(fs_trim_data)
+ sizeof(uint64) * kTrimRanges);
if (trimData == NULL)
return B_NO_MEMORY;
MemoryDeleter deleter(trimData);
RecursiveLocker locker(fLock);
// TODO: take given offset and size into account!
int32 lastGroup = fNumGroups - 1;
uint32 firstBlock = 0;
uint32 firstBit = 0;
off_t currentBlock = 0;
uint64 currentBlock = 0;
uint32 blockShift = fVolume->BlockShift();
off_t firstFree = -1;
uint64 firstFree = 0;
size_t freeLength = 0;
trimData->range_count = 0;
trimmedSize = 0;
AllocationBlock cached(fVolume);
@ -1217,8 +1226,9 @@ BlockAllocator::Trim(off_t offset, off_t size, off_t& trimmedSize)
if (cached.IsUsed(i)) {
// Block is in use
if (freeLength > 0) {
status_t status = _TrimNext(firstFree << blockShift,
freeLength << blockShift, trimmedSize);
status_t status = _TrimNext(*trimData, kTrimRanges,
firstFree << blockShift, freeLength << blockShift,
false, trimmedSize);
if (status != B_OK)
return status;
@ -1237,12 +1247,8 @@ BlockAllocator::Trim(off_t offset, off_t size, off_t& trimmedSize)
firstBit = 0;
}
if (freeLength > 0) {
return _TrimNext(firstFree << blockShift, freeLength << blockShift,
trimmedSize);
}
return B_OK;
return _TrimNext(*trimData, kTrimRanges, firstFree << blockShift,
freeLength << blockShift, true, trimmedSize);
}
@ -2181,21 +2187,44 @@ BlockAllocator::_AddInodeToIndex(Inode* inode)
status_t
BlockAllocator::_TrimNext(off_t offset, off_t size, off_t& trimmedSize)
BlockAllocator::_AddTrim(fs_trim_data& trimData, uint32 maxRanges,
uint64 offset, uint64 size)
{
PRINT(("_TrimNext(offset %lld, size %lld)\n", offset, size));
fs_trim_data trimData;
trimData.offset = offset;
trimData.size = size;
trimData.trimmed_size = 0;
if (ioctl(fVolume->Device(), B_TRIM_DEVICE, &trimData,
sizeof(fs_trim_data)) != 0) {
return errno;
if (trimData.range_count < maxRanges && size > 0) {
trimData.ranges[trimData.range_count].offset = offset;
trimData.ranges[trimData.range_count].size = size;
trimData.range_count++;
return true;
}
trimmedSize += trimData.trimmed_size;
return false;
}
status_t
BlockAllocator::_TrimNext(fs_trim_data& trimData, uint32 maxRanges,
uint64 offset, uint64 size, bool force, uint64& trimmedSize)
{
PRINT(("_TrimNext(index %" B_PRIu32 ", offset %" B_PRIu64 ", size %"
B_PRIu64 ")\n", trimData.range_count, offset, size));
bool pushed = _AddTrim(trimData, maxRanges, offset, size);
if (!pushed || force) {
// Trim now
trimData.trimmed_size = 0;
if (ioctl(fVolume->Device(), B_TRIM_DEVICE, &trimData,
sizeof(fs_trim_data)) != 0) {
return errno;
}
trimmedSize += trimData.trimmed_size;
trimData.range_count = 0;
}
if (!pushed)
_AddTrim(trimData, maxRanges, offset, size);
return B_OK;
}

View File

@ -46,7 +46,8 @@ public:
int32 group, uint16 start, uint16 numBlocks,
uint16 minimum, block_run& run);
status_t Trim(off_t offset, off_t size, off_t& trimmedSize);
status_t Trim(uint64 offset, uint64 size,
uint64& trimmedSize);
status_t StartChecking(const check_control* control);
status_t StopChecking(check_control* control);
@ -83,8 +84,11 @@ private:
void _FreeIndices();
status_t _AddInodeToIndex(Inode* inode);
status_t _WriteBackCheckBitmap();
status_t _TrimNext(off_t offset, off_t size,
off_t& trimmedSize);
status_t _AddTrim(fs_trim_data& trimData, uint32 maxRanges,
uint64 offset, uint64 size);
status_t _TrimNext(fs_trim_data& trimData, uint32 maxRanges,
uint64 offset, uint64 size, bool force,
uint64& trimmedSize);
static status_t _Initialize(BlockAllocator* self);

View File

@ -20,8 +20,10 @@
// TODO: temporary solution as long as there is no public I/O requests API
#ifndef BFS_SHELL
# include <io_requests.h>
# include <util/fs_trim_support.h>
#endif
#define BFS_IO_SIZE 65536
@ -623,19 +625,32 @@ bfs_ioctl(fs_volume* _volume, fs_vnode* _node, void* _cookie, uint32 cmd,
Volume* volume = (Volume*)_volume->private_volume;
switch (cmd) {
#ifndef BFS_SHELL
case B_TRIM_DEVICE:
{
fs_trim_data trimData;
if (user_memcpy(&trimData, buffer, sizeof(fs_trim_data)) != B_OK)
return B_BAD_ADDRESS;
status_t status = volume->Allocator().Trim(trimData.offset,
trimData.size, trimData.trimmed_size);
fs_trim_data* trimData;
status_t status = copy_trim_data_from_user(buffer, bufferLength,
trimData);
if (status != B_OK)
return status;
return user_memcpy(buffer, &trimData, sizeof(fs_trim_data));
MemoryDeleter deleter(trimData);
trimData->trimmed_size = 0;
for (uint32 i = 0; i < trimData->range_count; i++) {
uint64 trimmedSize = 0;
status_t status = volume->Allocator().Trim(
trimData->ranges[i].offset, trimData->ranges[i].size,
trimmedSize);
if (status != B_OK)
return status;
trimData->trimmed_size += trimmedSize;
}
return copy_trim_data_to_user(buffer, trimData);
}
#endif
case BFS_IOCTL_VERSION:
{

View File

@ -1,6 +1,6 @@
SubDir HAIKU_TOP src add-ons kernel generic scsi_periph ;
UsePrivateHeaders drivers kernel ;
UsePrivateHeaders drivers kernel shared ;
SubDirHdrs $(HAIKU_TOP) src system kernel device_manager ;
# disable debug output, if debugging is disabled

View File

@ -9,9 +9,12 @@
//! Handling of block device
#include "scsi_periph_int.h"
#include <string.h>
#include <AutoDeleter.h>
#include "scsi_periph_int.h"
status_t
periph_check_capacity(scsi_periph_device_info *device, scsi_ccb *request)
@ -117,23 +120,53 @@ periph_check_capacity(scsi_periph_device_info *device, scsi_ccb *request)
status_t
periph_trim_device(scsi_periph_device_info *device, scsi_ccb *request,
uint64 offset, uint64 numBlocks)
scsi_block_range* ranges, uint32 rangeCount)
{
err_res res;
int retries = 0;
size_t unmapBlockSize = (rangeCount - 1)
* sizeof(scsi_unmap_block_descriptor)
+ sizeof(scsi_unmap_parameter_list);
// TODO: check block limits VPD page
// TODO: instead of failing, we should try to complete the request in
// several passes.
if (unmapBlockSize > 65536 || rangeCount == 0)
return B_BAD_VALUE;
scsi_unmap_parameter_list* unmapBlocks
= (scsi_unmap_parameter_list*)malloc(unmapBlockSize);
if (unmapBlocks == NULL)
return B_NO_MEMORY;
MemoryDeleter deleter(unmapBlocks);
// Prepare request data
memset(unmapBlocks, 0, unmapBlockSize);
unmapBlocks->data_length = B_HOST_TO_BENDIAN_INT16(unmapBlockSize - 1);
unmapBlocks->data_length = B_HOST_TO_BENDIAN_INT16(unmapBlockSize - 3);
for (uint32 i = 0; i < rangeCount; i++) {
unmapBlocks->blocks[i].lba = B_HOST_TO_BENDIAN_INT64(
ranges[i].offset / device->block_size);
unmapBlocks->blocks[i].block_count = B_HOST_TO_BENDIAN_INT32(
ranges[i].size / device->block_size);
}
do {
request->flags = SCSI_DIR_OUT;
request->sort = offset;
request->sort = ranges[0].offset / device->block_size;
request->timeout = device->std_timeout;
scsi_cmd_wsame_16* cmd = (scsi_cmd_wsame_16*)request->cdb;
scsi_cmd_unmap* cmd = (scsi_cmd_unmap*)request->cdb;
memset(cmd, 0, sizeof(*cmd));
cmd->opcode = SCSI_OP_WRITE_SAME_16;
cmd->unmap = 1;
cmd->lba = B_HOST_TO_BENDIAN_INT64(offset);
cmd->length = B_HOST_TO_BENDIAN_INT32(numBlocks);
cmd->opcode = SCSI_OP_UNMAP;
cmd->length = B_HOST_TO_BENDIAN_INT16(unmapBlockSize);
request->data = (uint8*)unmapBlocks;
request->data_length = unmapBlockSize;
request->cdb_length = sizeof(*cmd);

View File

@ -78,7 +78,7 @@ status_t periph_handle_free(scsi_periph_handle_info *handle);
status_t periph_check_capacity(scsi_periph_device_info *device, scsi_ccb *ccb);
status_t periph_trim_device(scsi_periph_device_info *device, scsi_ccb *request,
uint64 offset, uint64 numBlocks);
scsi_block_range* ranges, uint32 rangeCount);
// device.c

View File

@ -7,7 +7,7 @@
#include <errno.h>
#include <fcntl.h>
#include <getopt.h>
#include <limits.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
@ -66,8 +66,9 @@ main(int argc, char** argv)
FileDescriptorCloser closer(fd);
fs_trim_data trimData;
trimData.offset = 0;
trimData.size = OFF_MAX;
trimData.range_count = 1;
trimData.ranges[0].offset = 0;
trimData.ranges[0].size = UINT64_MAX;
trimData.trimmed_size = 0;
if (ioctl(fd, B_TRIM_DEVICE, &trimData, sizeof(fs_trim_data)) != 0) {