qemu/hw/scsi-generic.c
Christoph Hellwig 428c149b0b block: add topology qdev properties
Add three new qdev properties to export block topology information to
the guest.  This is needed to get optimal I/O alignment for RAID arrays
or SSDs.

The options are:

 - physical_block_size to specify the physical block size of the device,
   this is going to increase from 512 bytes to 4096 kilobytes for many
   modern storage devices
 - min_io_size to specify the minimal I/O size without performance impact,
   this is typically set to the RAID chunk size for arrays.
 - opt_io_size to specify the optimal sustained I/O size, this is
   typically the RAID stripe width for arrays.

I decided to not auto-probe these values from blkid which might easily
be possible as I don't know how to deal with these issues on migration.

Note that we specificly only set the physical_block_size, and not the
logial one which is the unit all I/O is described in.  The reason for
that is that IDE does not support increasing the logical block size and
at last for now I want to stick to one meachnisms in queue and allow
for easy switching of transports for a given backing image which would
not be possible if scsi and virtio use real 4k sectors, while ide only
uses the physical block exponent.

To make this more common for the different block drivers introduce a
new BlockConf structure holding all common block properties and a
DEFINE_BLOCK_PROPERTIES macro to add them all together, mirroring
what is done for network drivers.  Also switch over all block drivers
to use it, except for the floppy driver which has weird driveA/driveB
properties and probably won't require any advanced block options ever.

Example usage for a virtio device with 4k physical block size and
8k optimal I/O size:

  -drive file=scratch.img,media=disk,cache=none,id=scratch \
  -device virtio-blk-pci,drive=scratch,physical_block_size=4096,opt_io_size=8192

aliguori: updated patch to take into account BLOCK events

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>
2010-02-10 16:53:25 -06:00

539 lines
15 KiB
C

/*
* Generic SCSI Device support
*
* Copyright (c) 2007 Bull S.A.S.
* Based on code by Paul Brook
* Based on code by Fabrice Bellard
*
* Written by Laurent Vivier <Laurent.Vivier@bull.net>
*
* This code is licenced under the LGPL.
*
*/
#include "qemu-common.h"
#include "block.h"
#include "scsi.h"
#ifdef __linux__
//#define DEBUG_SCSI
#ifdef DEBUG_SCSI
#define DPRINTF(fmt, ...) \
do { printf("scsi-generic: " fmt , ## __VA_ARGS__); } while (0)
#else
#define DPRINTF(fmt, ...) do {} while(0)
#endif
#define BADF(fmt, ...) \
do { fprintf(stderr, "scsi-generic: " fmt , ## __VA_ARGS__); } while (0)
#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <scsi/sg.h>
#include "scsi-defs.h"
#define SCSI_SENSE_BUF_SIZE 96
#define SG_ERR_DRIVER_TIMEOUT 0x06
#define SG_ERR_DRIVER_SENSE 0x08
#ifndef MAX_UINT
#define MAX_UINT ((unsigned int)-1)
#endif
typedef struct SCSIGenericState SCSIGenericState;
typedef struct SCSIGenericReq {
SCSIRequest req;
uint8_t *buf;
int buflen;
int len;
sg_io_hdr_t io_header;
} SCSIGenericReq;
struct SCSIGenericState
{
SCSIDevice qdev;
BlockDriverState *bs;
int lun;
int driver_status;
uint8_t sensebuf[SCSI_SENSE_BUF_SIZE];
uint8_t senselen;
};
static SCSIGenericReq *scsi_new_request(SCSIDevice *d, uint32_t tag, uint32_t lun)
{
SCSIRequest *req;
req = scsi_req_alloc(sizeof(SCSIGenericReq), d, tag, lun);
return DO_UPCAST(SCSIGenericReq, req, req);
}
static void scsi_remove_request(SCSIGenericReq *r)
{
qemu_free(r->buf);
scsi_req_free(&r->req);
}
static SCSIGenericReq *scsi_find_request(SCSIGenericState *s, uint32_t tag)
{
return DO_UPCAST(SCSIGenericReq, req, scsi_req_find(&s->qdev, tag));
}
/* Helper function for command completion. */
static void scsi_command_complete(void *opaque, int ret)
{
SCSIGenericReq *r = (SCSIGenericReq *)opaque;
SCSIGenericState *s = DO_UPCAST(SCSIGenericState, qdev, r->req.dev);
s->driver_status = r->io_header.driver_status;
if (s->driver_status & SG_ERR_DRIVER_SENSE)
s->senselen = r->io_header.sb_len_wr;
if (ret != 0)
r->req.status = BUSY << 1;
else {
if (s->driver_status & SG_ERR_DRIVER_TIMEOUT) {
r->req.status = BUSY << 1;
BADF("Driver Timeout\n");
} else if (r->io_header.status)
r->req.status = r->io_header.status;
else if (s->driver_status & SG_ERR_DRIVER_SENSE)
r->req.status = CHECK_CONDITION << 1;
else
r->req.status = GOOD << 1;
}
DPRINTF("Command complete 0x%p tag=0x%x status=%d\n",
r, r->req.tag, r->req.status);
scsi_req_complete(&r->req);
scsi_remove_request(r);
}
/* Cancel a pending data transfer. */
static void scsi_cancel_io(SCSIDevice *d, uint32_t tag)
{
DPRINTF("scsi_cancel_io 0x%x\n", tag);
SCSIGenericState *s = DO_UPCAST(SCSIGenericState, qdev, d);
SCSIGenericReq *r;
DPRINTF("Cancel tag=0x%x\n", tag);
r = scsi_find_request(s, tag);
if (r) {
if (r->req.aiocb)
bdrv_aio_cancel(r->req.aiocb);
r->req.aiocb = NULL;
scsi_remove_request(r);
}
}
static int execute_command(BlockDriverState *bdrv,
SCSIGenericReq *r, int direction,
BlockDriverCompletionFunc *complete)
{
SCSIGenericState *s = DO_UPCAST(SCSIGenericState, qdev, r->req.dev);
r->io_header.interface_id = 'S';
r->io_header.dxfer_direction = direction;
r->io_header.dxferp = r->buf;
r->io_header.dxfer_len = r->buflen;
r->io_header.cmdp = r->req.cmd.buf;
r->io_header.cmd_len = r->req.cmd.len;
r->io_header.mx_sb_len = sizeof(s->sensebuf);
r->io_header.sbp = s->sensebuf;
r->io_header.timeout = MAX_UINT;
r->io_header.usr_ptr = r;
r->io_header.flags |= SG_FLAG_DIRECT_IO;
r->req.aiocb = bdrv_aio_ioctl(bdrv, SG_IO, &r->io_header, complete, r);
if (r->req.aiocb == NULL) {
BADF("execute_command: read failed !\n");
return -1;
}
return 0;
}
static void scsi_read_complete(void * opaque, int ret)
{
SCSIGenericReq *r = (SCSIGenericReq *)opaque;
int len;
if (ret) {
DPRINTF("IO error\n");
scsi_command_complete(r, ret);
return;
}
len = r->io_header.dxfer_len - r->io_header.resid;
DPRINTF("Data ready tag=0x%x len=%d\n", r->req.tag, len);
r->len = -1;
r->req.bus->complete(r->req.bus, SCSI_REASON_DATA, r->req.tag, len);
if (len == 0)
scsi_command_complete(r, 0);
}
/* Read more data from scsi device into buffer. */
static void scsi_read_data(SCSIDevice *d, uint32_t tag)
{
SCSIGenericState *s = DO_UPCAST(SCSIGenericState, qdev, d);
SCSIGenericReq *r;
int ret;
DPRINTF("scsi_read_data 0x%x\n", tag);
r = scsi_find_request(s, tag);
if (!r) {
BADF("Bad read tag 0x%x\n", tag);
/* ??? This is the wrong error. */
scsi_command_complete(r, -EINVAL);
return;
}
if (r->len == -1) {
scsi_command_complete(r, 0);
return;
}
if (r->req.cmd.buf[0] == REQUEST_SENSE && s->driver_status & SG_ERR_DRIVER_SENSE)
{
s->senselen = MIN(r->len, s->senselen);
memcpy(r->buf, s->sensebuf, s->senselen);
r->io_header.driver_status = 0;
r->io_header.status = 0;
r->io_header.dxfer_len = s->senselen;
r->len = -1;
DPRINTF("Data ready tag=0x%x len=%d\n", r->req.tag, s->senselen);
DPRINTF("Sense: %d %d %d %d %d %d %d %d\n",
r->buf[0], r->buf[1], r->buf[2], r->buf[3],
r->buf[4], r->buf[5], r->buf[6], r->buf[7]);
r->req.bus->complete(r->req.bus, SCSI_REASON_DATA, r->req.tag, s->senselen);
return;
}
ret = execute_command(s->bs, r, SG_DXFER_FROM_DEV, scsi_read_complete);
if (ret == -1) {
scsi_command_complete(r, -EINVAL);
return;
}
}
static void scsi_write_complete(void * opaque, int ret)
{
SCSIGenericReq *r = (SCSIGenericReq *)opaque;
SCSIGenericState *s = DO_UPCAST(SCSIGenericState, qdev, r->req.dev);
DPRINTF("scsi_write_complete() ret = %d\n", ret);
if (ret) {
DPRINTF("IO error\n");
scsi_command_complete(r, ret);
return;
}
if (r->req.cmd.buf[0] == MODE_SELECT && r->req.cmd.buf[4] == 12 &&
s->qdev.type == TYPE_TAPE) {
s->qdev.blocksize = (r->buf[9] << 16) | (r->buf[10] << 8) | r->buf[11];
DPRINTF("block size %d\n", s->blocksize);
}
scsi_command_complete(r, ret);
}
/* Write data to a scsi device. Returns nonzero on failure.
The transfer may complete asynchronously. */
static int scsi_write_data(SCSIDevice *d, uint32_t tag)
{
SCSIGenericState *s = DO_UPCAST(SCSIGenericState, qdev, d);
SCSIGenericReq *r;
int ret;
DPRINTF("scsi_write_data 0x%x\n", tag);
r = scsi_find_request(s, tag);
if (!r) {
BADF("Bad write tag 0x%x\n", tag);
/* ??? This is the wrong error. */
scsi_command_complete(r, -EINVAL);
return 0;
}
if (r->len == 0) {
r->len = r->buflen;
r->req.bus->complete(r->req.bus, SCSI_REASON_DATA, r->req.tag, r->len);
return 0;
}
ret = execute_command(s->bs, r, SG_DXFER_TO_DEV, scsi_write_complete);
if (ret == -1) {
scsi_command_complete(r, -EINVAL);
return 1;
}
return 0;
}
/* Return a pointer to the data buffer. */
static uint8_t *scsi_get_buf(SCSIDevice *d, uint32_t tag)
{
SCSIGenericState *s = DO_UPCAST(SCSIGenericState, qdev, d);
SCSIGenericReq *r;
r = scsi_find_request(s, tag);
if (!r) {
BADF("Bad buffer tag 0x%x\n", tag);
return NULL;
}
return r->buf;
}
static void scsi_req_fixup(SCSIRequest *req)
{
switch(req->cmd.buf[0]) {
case WRITE_10:
req->cmd.buf[1] &= ~0x08; /* disable FUA */
break;
case READ_10:
req->cmd.buf[1] &= ~0x08; /* disable FUA */
break;
case REWIND:
case START_STOP:
if (req->dev->type == TYPE_TAPE) {
/* force IMMED, otherwise qemu waits end of command */
req->cmd.buf[1] = 0x01;
}
break;
}
}
/* Execute a scsi command. Returns the length of the data expected by the
command. This will be Positive for data transfers from the device
(eg. disk reads), negative for transfers to the device (eg. disk writes),
and zero if the command does not transfer any data. */
static int32_t scsi_send_command(SCSIDevice *d, uint32_t tag,
uint8_t *cmd, int lun)
{
SCSIGenericState *s = DO_UPCAST(SCSIGenericState, qdev, d);
SCSIGenericReq *r;
SCSIBus *bus;
int ret;
if (cmd[0] != REQUEST_SENSE &&
(lun != s->lun || (cmd[1] >> 5) != s->lun)) {
DPRINTF("Unimplemented LUN %d\n", lun ? lun : cmd[1] >> 5);
s->sensebuf[0] = 0x70;
s->sensebuf[1] = 0x00;
s->sensebuf[2] = ILLEGAL_REQUEST;
s->sensebuf[3] = 0x00;
s->sensebuf[4] = 0x00;
s->sensebuf[5] = 0x00;
s->sensebuf[6] = 0x00;
s->senselen = 7;
s->driver_status = SG_ERR_DRIVER_SENSE;
bus = scsi_bus_from_device(d);
bus->complete(bus, SCSI_REASON_DONE, tag, CHECK_CONDITION << 1);
return 0;
}
r = scsi_find_request(s, tag);
if (r) {
BADF("Tag 0x%x already in use %p\n", tag, r);
scsi_cancel_io(d, tag);
}
r = scsi_new_request(d, tag, lun);
if (-1 == scsi_req_parse(&r->req, cmd)) {
BADF("Unsupported command length, command %x\n", cmd[0]);
scsi_remove_request(r);
return 0;
}
scsi_req_fixup(&r->req);
DPRINTF("Command: lun=%d tag=0x%x data=0x%02x len %d\n", lun, tag,
cmd[0], r->req.cmd.xfer);
if (r->req.cmd.xfer == 0) {
if (r->buf != NULL)
qemu_free(r->buf);
r->buflen = 0;
r->buf = NULL;
ret = execute_command(s->bs, r, SG_DXFER_NONE, scsi_command_complete);
if (ret == -1) {
scsi_command_complete(r, -EINVAL);
return 0;
}
return 0;
}
if (r->buflen != r->req.cmd.xfer) {
if (r->buf != NULL)
qemu_free(r->buf);
r->buf = qemu_malloc(r->req.cmd.xfer);
r->buflen = r->req.cmd.xfer;
}
memset(r->buf, 0, r->buflen);
r->len = r->req.cmd.xfer;
if (r->req.cmd.mode == SCSI_XFER_TO_DEV) {
r->len = 0;
return -r->req.cmd.xfer;
}
return r->req.cmd.xfer;
}
static int get_blocksize(BlockDriverState *bdrv)
{
uint8_t cmd[10];
uint8_t buf[8];
uint8_t sensebuf[8];
sg_io_hdr_t io_header;
int ret;
memset(cmd, 0, sizeof(cmd));
memset(buf, 0, sizeof(buf));
cmd[0] = READ_CAPACITY;
memset(&io_header, 0, sizeof(io_header));
io_header.interface_id = 'S';
io_header.dxfer_direction = SG_DXFER_FROM_DEV;
io_header.dxfer_len = sizeof(buf);
io_header.dxferp = buf;
io_header.cmdp = cmd;
io_header.cmd_len = sizeof(cmd);
io_header.mx_sb_len = sizeof(sensebuf);
io_header.sbp = sensebuf;
io_header.timeout = 6000; /* XXX */
ret = bdrv_ioctl(bdrv, SG_IO, &io_header);
if (ret < 0)
return -1;
return (buf[4] << 24) | (buf[5] << 16) | (buf[6] << 8) | buf[7];
}
static int get_stream_blocksize(BlockDriverState *bdrv)
{
uint8_t cmd[6];
uint8_t buf[12];
uint8_t sensebuf[8];
sg_io_hdr_t io_header;
int ret;
memset(cmd, 0, sizeof(cmd));
memset(buf, 0, sizeof(buf));
cmd[0] = MODE_SENSE;
cmd[4] = sizeof(buf);
memset(&io_header, 0, sizeof(io_header));
io_header.interface_id = 'S';
io_header.dxfer_direction = SG_DXFER_FROM_DEV;
io_header.dxfer_len = sizeof(buf);
io_header.dxferp = buf;
io_header.cmdp = cmd;
io_header.cmd_len = sizeof(cmd);
io_header.mx_sb_len = sizeof(sensebuf);
io_header.sbp = sensebuf;
io_header.timeout = 6000; /* XXX */
ret = bdrv_ioctl(bdrv, SG_IO, &io_header);
if (ret < 0)
return -1;
return (buf[9] << 16) | (buf[10] << 8) | buf[11];
}
static void scsi_destroy(SCSIDevice *d)
{
SCSIGenericState *s = DO_UPCAST(SCSIGenericState, qdev, d);
SCSIGenericReq *r;
while (!QTAILQ_EMPTY(&s->qdev.requests)) {
r = DO_UPCAST(SCSIGenericReq, req, QTAILQ_FIRST(&s->qdev.requests));
scsi_remove_request(r);
}
drive_uninit(s->qdev.conf.dinfo);
}
static int scsi_generic_initfn(SCSIDevice *dev)
{
SCSIGenericState *s = DO_UPCAST(SCSIGenericState, qdev, dev);
int sg_version;
struct sg_scsi_id scsiid;
if (!s->qdev.conf.dinfo || !s->qdev.conf.dinfo->bdrv) {
qemu_error("scsi-generic: drive property not set\n");
return -1;
}
s->bs = s->qdev.conf.dinfo->bdrv;
/* check we are really using a /dev/sg* file */
if (!bdrv_is_sg(s->bs)) {
qemu_error("scsi-generic: not /dev/sg*\n");
return -1;
}
/* check we are using a driver managing SG_IO (version 3 and after */
if (bdrv_ioctl(s->bs, SG_GET_VERSION_NUM, &sg_version) < 0 ||
sg_version < 30000) {
qemu_error("scsi-generic: scsi generic interface too old\n");
return -1;
}
/* get LUN of the /dev/sg? */
if (bdrv_ioctl(s->bs, SG_GET_SCSI_ID, &scsiid)) {
qemu_error("scsi-generic: SG_GET_SCSI_ID ioctl failed\n");
return -1;
}
/* define device state */
s->lun = scsiid.lun;
DPRINTF("LUN %d\n", s->lun);
s->qdev.type = scsiid.scsi_type;
DPRINTF("device type %d\n", s->qdev.type);
if (s->qdev.type == TYPE_TAPE) {
s->qdev.blocksize = get_stream_blocksize(s->bs);
if (s->qdev.blocksize == -1)
s->qdev.blocksize = 0;
} else {
s->qdev.blocksize = get_blocksize(s->bs);
/* removable media returns 0 if not present */
if (s->qdev.blocksize <= 0) {
if (s->qdev.type == TYPE_ROM || s->qdev.type == TYPE_WORM)
s->qdev.blocksize = 2048;
else
s->qdev.blocksize = 512;
}
}
DPRINTF("block size %d\n", s->qdev.blocksize);
s->driver_status = 0;
memset(s->sensebuf, 0, sizeof(s->sensebuf));
return 0;
}
static SCSIDeviceInfo scsi_generic_info = {
.qdev.name = "scsi-generic",
.qdev.desc = "pass through generic scsi device (/dev/sg*)",
.qdev.size = sizeof(SCSIGenericState),
.init = scsi_generic_initfn,
.destroy = scsi_destroy,
.send_command = scsi_send_command,
.read_data = scsi_read_data,
.write_data = scsi_write_data,
.cancel_io = scsi_cancel_io,
.get_buf = scsi_get_buf,
.qdev.props = (Property[]) {
DEFINE_BLOCK_PROPERTIES(SCSIGenericState, qdev.conf),
DEFINE_PROP_END_OF_LIST(),
},
};
static void scsi_generic_register_devices(void)
{
scsi_qdev_register(&scsi_generic_info);
}
device_init(scsi_generic_register_devices)
#endif /* __linux__ */