2008-12-04 22:52:44 +03:00
|
|
|
/*
|
|
|
|
* Virtio Block Device
|
|
|
|
*
|
|
|
|
* Copyright IBM, Corp. 2007
|
|
|
|
*
|
|
|
|
* Authors:
|
|
|
|
* Anthony Liguori <aliguori@us.ibm.com>
|
|
|
|
*
|
|
|
|
* This work is licensed under the terms of the GNU GPL, version 2. See
|
|
|
|
* the COPYING file in the top-level directory.
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
2009-01-22 22:52:25 +03:00
|
|
|
#include <qemu-common.h>
|
2010-07-06 16:37:43 +04:00
|
|
|
#include "qemu-error.h"
|
2010-05-22 21:15:08 +04:00
|
|
|
#include "trace.h"
|
2010-08-24 19:22:24 +04:00
|
|
|
#include "blockdev.h"
|
2008-12-04 22:52:44 +03:00
|
|
|
#include "virtio-blk.h"
|
2009-04-27 12:29:14 +04:00
|
|
|
#ifdef __linux__
|
|
|
|
# include <scsi/sg.h>
|
|
|
|
#endif
|
2008-12-04 22:52:44 +03:00
|
|
|
|
|
|
|
typedef struct VirtIOBlock
|
|
|
|
{
|
|
|
|
VirtIODevice vdev;
|
|
|
|
BlockDriverState *bs;
|
|
|
|
VirtQueue *vq;
|
2009-01-22 22:52:25 +03:00
|
|
|
void *rq;
|
2009-07-28 22:33:41 +04:00
|
|
|
QEMUBH *bh;
|
2010-02-11 01:37:25 +03:00
|
|
|
BlockConf *conf;
|
2010-03-04 16:20:17 +03:00
|
|
|
unsigned short sector_mask;
|
2010-07-02 21:44:25 +04:00
|
|
|
char sn[BLOCK_SERIAL_STRLEN];
|
2010-07-20 21:14:22 +04:00
|
|
|
DeviceState *qdev;
|
2008-12-04 22:52:44 +03:00
|
|
|
} VirtIOBlock;
|
|
|
|
|
|
|
|
static VirtIOBlock *to_virtio_blk(VirtIODevice *vdev)
|
|
|
|
{
|
|
|
|
return (VirtIOBlock *)vdev;
|
|
|
|
}
|
|
|
|
|
|
|
|
typedef struct VirtIOBlockReq
|
|
|
|
{
|
|
|
|
VirtIOBlock *dev;
|
|
|
|
VirtQueueElement elem;
|
|
|
|
struct virtio_blk_inhdr *in;
|
|
|
|
struct virtio_blk_outhdr *out;
|
2009-04-27 12:29:14 +04:00
|
|
|
struct virtio_scsi_inhdr *scsi;
|
2009-03-28 20:46:14 +03:00
|
|
|
QEMUIOVector qiov;
|
2009-01-22 22:52:25 +03:00
|
|
|
struct VirtIOBlockReq *next;
|
2008-12-04 22:52:44 +03:00
|
|
|
} VirtIOBlockReq;
|
|
|
|
|
2009-01-22 22:52:25 +03:00
|
|
|
static void virtio_blk_req_complete(VirtIOBlockReq *req, int status)
|
|
|
|
{
|
|
|
|
VirtIOBlock *s = req->dev;
|
|
|
|
|
2010-05-22 21:15:08 +04:00
|
|
|
trace_virtio_blk_req_complete(req, status);
|
|
|
|
|
2009-01-22 22:52:25 +03:00
|
|
|
req->in->status = status;
|
2009-03-28 20:46:14 +03:00
|
|
|
virtqueue_push(s->vq, &req->elem, req->qiov.size + sizeof(*req->in));
|
2009-01-22 22:52:25 +03:00
|
|
|
virtio_notify(&s->vdev, s->vq);
|
|
|
|
|
|
|
|
qemu_free(req);
|
|
|
|
}
|
|
|
|
|
2009-11-27 15:25:39 +03:00
|
|
|
static int virtio_blk_handle_rw_error(VirtIOBlockReq *req, int error,
|
|
|
|
int is_read)
|
2009-01-22 22:52:25 +03:00
|
|
|
{
|
2010-06-02 20:55:17 +04:00
|
|
|
BlockErrorAction action = bdrv_get_on_error(req->dev->bs, is_read);
|
2009-01-22 22:52:25 +03:00
|
|
|
VirtIOBlock *s = req->dev;
|
|
|
|
|
2010-02-03 17:41:04 +03:00
|
|
|
if (action == BLOCK_ERR_IGNORE) {
|
2010-03-31 19:46:59 +04:00
|
|
|
bdrv_mon_event(s->bs, BDRV_ACTION_IGNORE, is_read);
|
2009-01-22 22:52:25 +03:00
|
|
|
return 0;
|
2010-02-03 17:41:04 +03:00
|
|
|
}
|
2009-01-22 22:52:25 +03:00
|
|
|
|
|
|
|
if ((error == ENOSPC && action == BLOCK_ERR_STOP_ENOSPC)
|
|
|
|
|| action == BLOCK_ERR_STOP_ANY) {
|
|
|
|
req->next = s->rq;
|
|
|
|
s->rq = req;
|
2010-03-31 19:46:59 +04:00
|
|
|
bdrv_mon_event(s->bs, BDRV_ACTION_STOP, is_read);
|
2010-02-25 18:06:58 +03:00
|
|
|
vm_stop(0);
|
2009-01-22 22:52:25 +03:00
|
|
|
} else {
|
|
|
|
virtio_blk_req_complete(req, VIRTIO_BLK_S_IOERR);
|
2010-03-31 19:46:59 +04:00
|
|
|
bdrv_mon_event(s->bs, BDRV_ACTION_REPORT, is_read);
|
2009-01-22 22:52:25 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
2008-12-04 22:52:44 +03:00
|
|
|
static void virtio_blk_rw_complete(void *opaque, int ret)
|
|
|
|
{
|
|
|
|
VirtIOBlockReq *req = opaque;
|
|
|
|
|
2010-05-22 21:15:08 +04:00
|
|
|
trace_virtio_blk_rw_complete(req, ret);
|
|
|
|
|
2009-11-27 15:25:39 +03:00
|
|
|
if (ret) {
|
|
|
|
int is_read = !(req->out->type & VIRTIO_BLK_T_OUT);
|
|
|
|
if (virtio_blk_handle_rw_error(req, -ret, is_read))
|
2009-01-22 22:52:25 +03:00
|
|
|
return;
|
2008-12-04 22:52:44 +03:00
|
|
|
}
|
|
|
|
|
2009-11-27 15:25:39 +03:00
|
|
|
virtio_blk_req_complete(req, VIRTIO_BLK_S_OK);
|
2009-01-22 22:52:25 +03:00
|
|
|
}
|
2008-12-04 22:52:44 +03:00
|
|
|
|
2009-09-04 21:02:23 +04:00
|
|
|
static void virtio_blk_flush_complete(void *opaque, int ret)
|
|
|
|
{
|
|
|
|
VirtIOBlockReq *req = opaque;
|
|
|
|
|
2010-10-20 15:17:30 +04:00
|
|
|
if (ret) {
|
|
|
|
if (virtio_blk_handle_rw_error(req, -ret, 0)) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
virtio_blk_req_complete(req, VIRTIO_BLK_S_OK);
|
2009-09-04 21:02:23 +04:00
|
|
|
}
|
|
|
|
|
2009-01-22 22:52:25 +03:00
|
|
|
static VirtIOBlockReq *virtio_blk_alloc_request(VirtIOBlock *s)
|
|
|
|
{
|
2010-05-15 01:52:30 +04:00
|
|
|
VirtIOBlockReq *req = qemu_malloc(sizeof(*req));
|
2009-02-06 01:06:05 +03:00
|
|
|
req->dev = s;
|
2010-05-15 01:52:30 +04:00
|
|
|
req->qiov.size = 0;
|
|
|
|
req->next = NULL;
|
2009-01-22 22:52:25 +03:00
|
|
|
return req;
|
2008-12-04 22:52:44 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
static VirtIOBlockReq *virtio_blk_get_request(VirtIOBlock *s)
|
|
|
|
{
|
2009-01-22 22:52:25 +03:00
|
|
|
VirtIOBlockReq *req = virtio_blk_alloc_request(s);
|
2008-12-04 22:52:44 +03:00
|
|
|
|
2009-01-22 22:52:25 +03:00
|
|
|
if (req != NULL) {
|
|
|
|
if (!virtqueue_pop(s->vq, &req->elem)) {
|
|
|
|
qemu_free(req);
|
|
|
|
return NULL;
|
|
|
|
}
|
2008-12-04 22:52:44 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
return req;
|
|
|
|
}
|
|
|
|
|
2009-04-27 12:29:14 +04:00
|
|
|
#ifdef __linux__
|
|
|
|
static void virtio_blk_handle_scsi(VirtIOBlockReq *req)
|
|
|
|
{
|
|
|
|
struct sg_io_hdr hdr;
|
2010-01-13 15:30:32 +03:00
|
|
|
int ret;
|
2009-04-27 12:29:14 +04:00
|
|
|
int status;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* We require at least one output segment each for the virtio_blk_outhdr
|
|
|
|
* and the SCSI command block.
|
|
|
|
*
|
|
|
|
* We also at least require the virtio_blk_inhdr, the virtio_scsi_inhdr
|
|
|
|
* and the sense buffer pointer in the input segments.
|
|
|
|
*/
|
|
|
|
if (req->elem.out_num < 2 || req->elem.in_num < 3) {
|
|
|
|
virtio_blk_req_complete(req, VIRTIO_BLK_S_IOERR);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* No support for bidirection commands yet.
|
|
|
|
*/
|
|
|
|
if (req->elem.out_num > 2 && req->elem.in_num > 3) {
|
|
|
|
virtio_blk_req_complete(req, VIRTIO_BLK_S_UNSUPP);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The scsi inhdr is placed in the second-to-last input segment, just
|
|
|
|
* before the regular inhdr.
|
|
|
|
*/
|
|
|
|
req->scsi = (void *)req->elem.in_sg[req->elem.in_num - 2].iov_base;
|
|
|
|
|
|
|
|
memset(&hdr, 0, sizeof(struct sg_io_hdr));
|
|
|
|
hdr.interface_id = 'S';
|
|
|
|
hdr.cmd_len = req->elem.out_sg[1].iov_len;
|
|
|
|
hdr.cmdp = req->elem.out_sg[1].iov_base;
|
|
|
|
hdr.dxfer_len = 0;
|
|
|
|
|
|
|
|
if (req->elem.out_num > 2) {
|
|
|
|
/*
|
|
|
|
* If there are more than the minimally required 2 output segments
|
|
|
|
* there is write payload starting from the third iovec.
|
|
|
|
*/
|
|
|
|
hdr.dxfer_direction = SG_DXFER_TO_DEV;
|
|
|
|
hdr.iovec_count = req->elem.out_num - 2;
|
|
|
|
|
|
|
|
for (i = 0; i < hdr.iovec_count; i++)
|
|
|
|
hdr.dxfer_len += req->elem.out_sg[i + 2].iov_len;
|
|
|
|
|
|
|
|
hdr.dxferp = req->elem.out_sg + 2;
|
|
|
|
|
|
|
|
} else if (req->elem.in_num > 3) {
|
|
|
|
/*
|
|
|
|
* If we have more than 3 input segments the guest wants to actually
|
|
|
|
* read data.
|
|
|
|
*/
|
|
|
|
hdr.dxfer_direction = SG_DXFER_FROM_DEV;
|
|
|
|
hdr.iovec_count = req->elem.in_num - 3;
|
|
|
|
for (i = 0; i < hdr.iovec_count; i++)
|
|
|
|
hdr.dxfer_len += req->elem.in_sg[i].iov_len;
|
|
|
|
|
|
|
|
hdr.dxferp = req->elem.in_sg;
|
|
|
|
} else {
|
|
|
|
/*
|
|
|
|
* Some SCSI commands don't actually transfer any data.
|
|
|
|
*/
|
|
|
|
hdr.dxfer_direction = SG_DXFER_NONE;
|
|
|
|
}
|
|
|
|
|
|
|
|
hdr.sbp = req->elem.in_sg[req->elem.in_num - 3].iov_base;
|
|
|
|
hdr.mx_sb_len = req->elem.in_sg[req->elem.in_num - 3].iov_len;
|
|
|
|
|
|
|
|
ret = bdrv_ioctl(req->dev->bs, SG_IO, &hdr);
|
|
|
|
if (ret) {
|
|
|
|
status = VIRTIO_BLK_S_UNSUPP;
|
|
|
|
hdr.status = ret;
|
|
|
|
hdr.resid = hdr.dxfer_len;
|
|
|
|
} else if (hdr.status) {
|
|
|
|
status = VIRTIO_BLK_S_IOERR;
|
|
|
|
} else {
|
|
|
|
status = VIRTIO_BLK_S_OK;
|
|
|
|
}
|
|
|
|
|
|
|
|
req->scsi->errors = hdr.status;
|
|
|
|
req->scsi->residual = hdr.resid;
|
|
|
|
req->scsi->sense_len = hdr.sb_len_wr;
|
|
|
|
req->scsi->data_len = hdr.dxfer_len;
|
|
|
|
|
|
|
|
virtio_blk_req_complete(req, status);
|
|
|
|
}
|
|
|
|
#else
|
|
|
|
static void virtio_blk_handle_scsi(VirtIOBlockReq *req)
|
|
|
|
{
|
|
|
|
virtio_blk_req_complete(req, VIRTIO_BLK_S_UNSUPP);
|
|
|
|
}
|
|
|
|
#endif /* __linux__ */
|
|
|
|
|
2010-06-08 20:26:07 +04:00
|
|
|
typedef struct MultiReqBuffer {
|
|
|
|
BlockRequest blkreq[32];
|
|
|
|
unsigned int num_writes;
|
|
|
|
} MultiReqBuffer;
|
|
|
|
|
|
|
|
static void virtio_submit_multiwrite(BlockDriverState *bs, MultiReqBuffer *mrb)
|
2009-01-22 22:52:25 +03:00
|
|
|
{
|
2009-09-09 19:53:38 +04:00
|
|
|
int i, ret;
|
|
|
|
|
2010-06-08 20:26:07 +04:00
|
|
|
if (!mrb->num_writes) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
ret = bdrv_aio_multiwrite(bs, mrb->blkreq, mrb->num_writes);
|
2009-09-09 19:53:38 +04:00
|
|
|
if (ret != 0) {
|
2010-06-08 20:26:07 +04:00
|
|
|
for (i = 0; i < mrb->num_writes; i++) {
|
|
|
|
if (mrb->blkreq[i].error) {
|
|
|
|
virtio_blk_rw_complete(mrb->blkreq[i].opaque, -EIO);
|
2009-09-09 19:53:38 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2010-06-08 20:26:07 +04:00
|
|
|
|
|
|
|
mrb->num_writes = 0;
|
2009-09-09 19:53:38 +04:00
|
|
|
}
|
2009-08-13 18:49:56 +04:00
|
|
|
|
2010-06-08 20:26:07 +04:00
|
|
|
static void virtio_blk_handle_flush(VirtIOBlockReq *req, MultiReqBuffer *mrb)
|
2009-09-04 21:02:23 +04:00
|
|
|
{
|
|
|
|
BlockDriverAIOCB *acb;
|
|
|
|
|
2010-05-19 14:40:09 +04:00
|
|
|
/*
|
|
|
|
* Make sure all outstanding writes are posted to the backing device.
|
|
|
|
*/
|
2010-06-08 20:26:07 +04:00
|
|
|
virtio_submit_multiwrite(req->dev->bs, mrb);
|
2010-05-19 14:40:09 +04:00
|
|
|
|
2009-09-04 21:02:23 +04:00
|
|
|
acb = bdrv_aio_flush(req->dev->bs, virtio_blk_flush_complete, req);
|
|
|
|
if (!acb) {
|
2010-10-27 15:10:15 +04:00
|
|
|
virtio_blk_flush_complete(req, -EIO);
|
2009-09-04 21:02:23 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2010-06-08 20:26:07 +04:00
|
|
|
static void virtio_blk_handle_write(VirtIOBlockReq *req, MultiReqBuffer *mrb)
|
2009-09-09 19:53:38 +04:00
|
|
|
{
|
2010-06-08 20:26:07 +04:00
|
|
|
BlockRequest *blkreq;
|
|
|
|
|
2010-05-22 21:15:08 +04:00
|
|
|
trace_virtio_blk_handle_write(req, req->out->sector, req->qiov.size / 512);
|
|
|
|
|
2010-03-04 16:20:17 +03:00
|
|
|
if (req->out->sector & req->dev->sector_mask) {
|
|
|
|
virtio_blk_rw_complete(req, -EIO);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2010-06-08 20:26:07 +04:00
|
|
|
if (mrb->num_writes == 32) {
|
|
|
|
virtio_submit_multiwrite(req->dev->bs, mrb);
|
2009-08-13 18:49:56 +04:00
|
|
|
}
|
2009-09-09 19:53:38 +04:00
|
|
|
|
2010-06-08 20:26:07 +04:00
|
|
|
blkreq = &mrb->blkreq[mrb->num_writes];
|
|
|
|
blkreq->sector = req->out->sector;
|
|
|
|
blkreq->nb_sectors = req->qiov.size / BDRV_SECTOR_SIZE;
|
|
|
|
blkreq->qiov = &req->qiov;
|
|
|
|
blkreq->cb = virtio_blk_rw_complete;
|
|
|
|
blkreq->opaque = req;
|
|
|
|
blkreq->error = 0;
|
2009-09-09 19:53:38 +04:00
|
|
|
|
2010-06-08 20:26:07 +04:00
|
|
|
mrb->num_writes++;
|
2009-03-28 20:46:14 +03:00
|
|
|
}
|
2009-01-22 22:52:25 +03:00
|
|
|
|
2009-03-28 20:46:14 +03:00
|
|
|
static void virtio_blk_handle_read(VirtIOBlockReq *req)
|
|
|
|
{
|
2009-08-13 18:49:56 +04:00
|
|
|
BlockDriverAIOCB *acb;
|
|
|
|
|
2010-03-04 16:20:17 +03:00
|
|
|
if (req->out->sector & req->dev->sector_mask) {
|
|
|
|
virtio_blk_rw_complete(req, -EIO);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2009-08-13 18:49:56 +04:00
|
|
|
acb = bdrv_aio_readv(req->dev->bs, req->out->sector, &req->qiov,
|
2010-05-27 18:20:33 +04:00
|
|
|
req->qiov.size / BDRV_SECTOR_SIZE,
|
|
|
|
virtio_blk_rw_complete, req);
|
2009-08-13 18:49:56 +04:00
|
|
|
if (!acb) {
|
2010-01-27 15:12:36 +03:00
|
|
|
virtio_blk_rw_complete(req, -EIO);
|
2009-08-13 18:49:56 +04:00
|
|
|
}
|
2009-01-22 22:52:25 +03:00
|
|
|
}
|
|
|
|
|
2010-01-27 15:12:34 +03:00
|
|
|
static void virtio_blk_handle_request(VirtIOBlockReq *req,
|
|
|
|
MultiReqBuffer *mrb)
|
|
|
|
{
|
|
|
|
if (req->elem.out_num < 1 || req->elem.in_num < 1) {
|
2010-11-15 23:44:35 +03:00
|
|
|
error_report("virtio-blk missing headers");
|
2010-01-27 15:12:34 +03:00
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (req->elem.out_sg[0].iov_len < sizeof(*req->out) ||
|
|
|
|
req->elem.in_sg[req->elem.in_num - 1].iov_len < sizeof(*req->in)) {
|
2010-11-15 23:44:35 +03:00
|
|
|
error_report("virtio-blk header not in correct element");
|
2010-01-27 15:12:34 +03:00
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
|
|
|
|
req->out = (void *)req->elem.out_sg[0].iov_base;
|
|
|
|
req->in = (void *)req->elem.in_sg[req->elem.in_num - 1].iov_base;
|
|
|
|
|
|
|
|
if (req->out->type & VIRTIO_BLK_T_FLUSH) {
|
2010-06-08 20:26:07 +04:00
|
|
|
virtio_blk_handle_flush(req, mrb);
|
2010-01-27 15:12:34 +03:00
|
|
|
} else if (req->out->type & VIRTIO_BLK_T_SCSI_CMD) {
|
|
|
|
virtio_blk_handle_scsi(req);
|
2010-07-02 21:44:25 +04:00
|
|
|
} else if (req->out->type & VIRTIO_BLK_T_GET_ID) {
|
|
|
|
VirtIOBlock *s = req->dev;
|
|
|
|
|
|
|
|
memcpy(req->elem.in_sg[0].iov_base, s->sn,
|
|
|
|
MIN(req->elem.in_sg[0].iov_len, sizeof(s->sn)));
|
|
|
|
virtio_blk_req_complete(req, VIRTIO_BLK_S_OK);
|
2010-01-27 15:12:34 +03:00
|
|
|
} else if (req->out->type & VIRTIO_BLK_T_OUT) {
|
|
|
|
qemu_iovec_init_external(&req->qiov, &req->elem.out_sg[1],
|
|
|
|
req->elem.out_num - 1);
|
2010-06-08 20:26:07 +04:00
|
|
|
virtio_blk_handle_write(req, mrb);
|
2010-01-27 15:12:34 +03:00
|
|
|
} else {
|
|
|
|
qemu_iovec_init_external(&req->qiov, &req->elem.in_sg[0],
|
|
|
|
req->elem.in_num - 1);
|
|
|
|
virtio_blk_handle_read(req);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2008-12-04 22:52:44 +03:00
|
|
|
static void virtio_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq)
|
|
|
|
{
|
|
|
|
VirtIOBlock *s = to_virtio_blk(vdev);
|
|
|
|
VirtIOBlockReq *req;
|
2010-01-27 15:12:34 +03:00
|
|
|
MultiReqBuffer mrb = {
|
|
|
|
.num_writes = 0,
|
|
|
|
};
|
2008-12-04 22:52:44 +03:00
|
|
|
|
|
|
|
while ((req = virtio_blk_get_request(s))) {
|
2010-01-27 15:12:34 +03:00
|
|
|
virtio_blk_handle_request(req, &mrb);
|
2008-12-04 22:52:44 +03:00
|
|
|
}
|
2009-09-09 19:53:38 +04:00
|
|
|
|
2010-06-08 20:26:07 +04:00
|
|
|
virtio_submit_multiwrite(s->bs, &mrb);
|
2009-09-09 19:53:38 +04:00
|
|
|
|
2008-12-04 22:52:44 +03:00
|
|
|
/*
|
|
|
|
* FIXME: Want to check for completions before returning to guest mode,
|
|
|
|
* so cached reads and writes are reported as quickly as possible. But
|
|
|
|
* that should be done in the generic block layer.
|
|
|
|
*/
|
|
|
|
}
|
|
|
|
|
2009-07-28 22:33:41 +04:00
|
|
|
static void virtio_blk_dma_restart_bh(void *opaque)
|
2009-01-22 22:52:25 +03:00
|
|
|
{
|
|
|
|
VirtIOBlock *s = opaque;
|
|
|
|
VirtIOBlockReq *req = s->rq;
|
2010-01-27 15:12:35 +03:00
|
|
|
MultiReqBuffer mrb = {
|
|
|
|
.num_writes = 0,
|
|
|
|
};
|
2009-01-22 22:52:25 +03:00
|
|
|
|
2009-07-28 22:33:41 +04:00
|
|
|
qemu_bh_delete(s->bh);
|
|
|
|
s->bh = NULL;
|
2009-01-22 22:52:25 +03:00
|
|
|
|
|
|
|
s->rq = NULL;
|
|
|
|
|
|
|
|
while (req) {
|
2010-01-27 15:12:35 +03:00
|
|
|
virtio_blk_handle_request(req, &mrb);
|
2009-01-22 22:52:25 +03:00
|
|
|
req = req->next;
|
|
|
|
}
|
2010-01-27 15:12:35 +03:00
|
|
|
|
2010-06-08 20:26:07 +04:00
|
|
|
virtio_submit_multiwrite(s->bs, &mrb);
|
2009-01-22 22:52:25 +03:00
|
|
|
}
|
|
|
|
|
2009-07-28 22:33:41 +04:00
|
|
|
static void virtio_blk_dma_restart_cb(void *opaque, int running, int reason)
|
|
|
|
{
|
|
|
|
VirtIOBlock *s = opaque;
|
|
|
|
|
|
|
|
if (!running)
|
|
|
|
return;
|
|
|
|
|
|
|
|
if (!s->bh) {
|
|
|
|
s->bh = qemu_bh_new(virtio_blk_dma_restart_bh, s);
|
|
|
|
qemu_bh_schedule(s->bh);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2008-12-04 22:52:44 +03:00
|
|
|
static void virtio_blk_reset(VirtIODevice *vdev)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* This should cancel pending requests, but can't do nicely until there
|
|
|
|
* are per-device request lists.
|
|
|
|
*/
|
|
|
|
qemu_aio_flush();
|
|
|
|
}
|
|
|
|
|
2009-06-22 22:26:51 +04:00
|
|
|
/* coalesce internal state, copy to pci i/o region 0
|
|
|
|
*/
|
2008-12-04 22:52:44 +03:00
|
|
|
static void virtio_blk_update_config(VirtIODevice *vdev, uint8_t *config)
|
|
|
|
{
|
|
|
|
VirtIOBlock *s = to_virtio_blk(vdev);
|
|
|
|
struct virtio_blk_config blkcfg;
|
|
|
|
uint64_t capacity;
|
|
|
|
int cylinders, heads, secs;
|
|
|
|
|
|
|
|
bdrv_get_geometry(s->bs, &capacity);
|
|
|
|
bdrv_get_geometry_hint(s->bs, &cylinders, &heads, &secs);
|
2009-06-12 11:50:18 +04:00
|
|
|
memset(&blkcfg, 0, sizeof(blkcfg));
|
2008-12-04 22:52:44 +03:00
|
|
|
stq_raw(&blkcfg.capacity, capacity);
|
|
|
|
stl_raw(&blkcfg.seg_max, 128 - 2);
|
|
|
|
stw_raw(&blkcfg.cylinders, cylinders);
|
|
|
|
blkcfg.heads = heads;
|
2010-03-04 16:20:17 +03:00
|
|
|
blkcfg.sectors = secs & ~s->sector_mask;
|
|
|
|
blkcfg.blk_size = s->conf->logical_block_size;
|
2009-06-13 17:20:25 +04:00
|
|
|
blkcfg.size_max = 0;
|
2010-02-11 01:37:25 +03:00
|
|
|
blkcfg.physical_block_exp = get_physical_block_exp(s->conf);
|
|
|
|
blkcfg.alignment_offset = 0;
|
2010-03-04 16:20:17 +03:00
|
|
|
blkcfg.min_io_size = s->conf->min_io_size / blkcfg.blk_size;
|
|
|
|
blkcfg.opt_io_size = s->conf->opt_io_size / blkcfg.blk_size;
|
2010-02-11 01:36:49 +03:00
|
|
|
memcpy(config, &blkcfg, sizeof(struct virtio_blk_config));
|
2008-12-04 22:52:44 +03:00
|
|
|
}
|
|
|
|
|
2010-01-10 14:52:53 +03:00
|
|
|
static uint32_t virtio_blk_get_features(VirtIODevice *vdev, uint32_t features)
|
2008-12-04 22:52:44 +03:00
|
|
|
{
|
2009-06-22 22:26:51 +04:00
|
|
|
VirtIOBlock *s = to_virtio_blk(vdev);
|
2009-04-27 12:29:14 +04:00
|
|
|
|
|
|
|
features |= (1 << VIRTIO_BLK_F_SEG_MAX);
|
|
|
|
features |= (1 << VIRTIO_BLK_F_GEOMETRY);
|
2010-02-11 01:37:25 +03:00
|
|
|
features |= (1 << VIRTIO_BLK_F_TOPOLOGY);
|
2010-03-04 16:20:17 +03:00
|
|
|
features |= (1 << VIRTIO_BLK_F_BLK_SIZE);
|
2009-09-04 21:02:23 +04:00
|
|
|
|
|
|
|
if (bdrv_enable_write_cache(s->bs))
|
|
|
|
features |= (1 << VIRTIO_BLK_F_WCACHE);
|
2009-10-29 12:42:11 +03:00
|
|
|
|
|
|
|
if (bdrv_is_read_only(s->bs))
|
|
|
|
features |= 1 << VIRTIO_BLK_F_RO;
|
2009-04-27 12:29:14 +04:00
|
|
|
|
|
|
|
return features;
|
2008-12-04 22:52:44 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
static void virtio_blk_save(QEMUFile *f, void *opaque)
|
|
|
|
{
|
|
|
|
VirtIOBlock *s = opaque;
|
2009-01-22 22:52:25 +03:00
|
|
|
VirtIOBlockReq *req = s->rq;
|
|
|
|
|
2008-12-04 22:52:44 +03:00
|
|
|
virtio_save(&s->vdev, f);
|
2009-01-22 22:52:25 +03:00
|
|
|
|
|
|
|
while (req) {
|
|
|
|
qemu_put_sbyte(f, 1);
|
|
|
|
qemu_put_buffer(f, (unsigned char*)&req->elem, sizeof(req->elem));
|
|
|
|
req = req->next;
|
|
|
|
}
|
|
|
|
qemu_put_sbyte(f, 0);
|
2008-12-04 22:52:44 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
static int virtio_blk_load(QEMUFile *f, void *opaque, int version_id)
|
|
|
|
{
|
|
|
|
VirtIOBlock *s = opaque;
|
|
|
|
|
2009-01-22 22:52:25 +03:00
|
|
|
if (version_id != 2)
|
2008-12-04 22:52:44 +03:00
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
virtio_load(&s->vdev, f);
|
2009-01-22 22:52:25 +03:00
|
|
|
while (qemu_get_sbyte(f)) {
|
|
|
|
VirtIOBlockReq *req = virtio_blk_alloc_request(s);
|
|
|
|
qemu_get_buffer(f, (unsigned char*)&req->elem, sizeof(req->elem));
|
|
|
|
req->next = s->rq;
|
2010-06-21 12:50:01 +04:00
|
|
|
s->rq = req;
|
2010-08-03 18:57:02 +04:00
|
|
|
|
|
|
|
virtqueue_map_sg(req->elem.in_sg, req->elem.in_addr,
|
|
|
|
req->elem.in_num, 1);
|
|
|
|
virtqueue_map_sg(req->elem.out_sg, req->elem.out_addr,
|
|
|
|
req->elem.out_num, 0);
|
2009-01-22 22:52:25 +03:00
|
|
|
}
|
2008-12-04 22:52:44 +03:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
block: add topology qdev properties
Add three new qdev properties to export block topology information to
the guest. This is needed to get optimal I/O alignment for RAID arrays
or SSDs.
The options are:
- physical_block_size to specify the physical block size of the device,
this is going to increase from 512 bytes to 4096 kilobytes for many
modern storage devices
- min_io_size to specify the minimal I/O size without performance impact,
this is typically set to the RAID chunk size for arrays.
- opt_io_size to specify the optimal sustained I/O size, this is
typically the RAID stripe width for arrays.
I decided to not auto-probe these values from blkid which might easily
be possible as I don't know how to deal with these issues on migration.
Note that we specificly only set the physical_block_size, and not the
logial one which is the unit all I/O is described in. The reason for
that is that IDE does not support increasing the logical block size and
at last for now I want to stick to one meachnisms in queue and allow
for easy switching of transports for a given backing image which would
not be possible if scsi and virtio use real 4k sectors, while ide only
uses the physical block exponent.
To make this more common for the different block drivers introduce a
new BlockConf structure holding all common block properties and a
DEFINE_BLOCK_PROPERTIES macro to add them all together, mirroring
what is done for network drivers. Also switch over all block drivers
to use it, except for the floppy driver which has weird driveA/driveB
properties and probably won't require any advanced block options ever.
Example usage for a virtio device with 4k physical block size and
8k optimal I/O size:
-drive file=scratch.img,media=disk,cache=none,id=scratch \
-device virtio-blk-pci,drive=scratch,physical_block_size=4096,opt_io_size=8192
aliguori: updated patch to take into account BLOCK events
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>
2010-02-11 01:37:09 +03:00
|
|
|
VirtIODevice *virtio_blk_init(DeviceState *dev, BlockConf *conf)
|
2008-12-04 22:52:44 +03:00
|
|
|
{
|
|
|
|
VirtIOBlock *s;
|
|
|
|
int cylinders, heads, secs;
|
|
|
|
static int virtio_blk_id;
|
2010-07-02 21:44:25 +04:00
|
|
|
DriveInfo *dinfo;
|
2009-05-15 01:35:07 +04:00
|
|
|
|
2010-07-06 16:37:43 +04:00
|
|
|
if (!conf->bs) {
|
|
|
|
error_report("virtio-blk-pci: drive property not set");
|
|
|
|
return NULL;
|
|
|
|
}
|
2010-07-06 16:37:44 +04:00
|
|
|
if (!bdrv_is_inserted(conf->bs)) {
|
|
|
|
error_report("Device needs media, but drive is empty");
|
|
|
|
return NULL;
|
|
|
|
}
|
2010-07-06 16:37:43 +04:00
|
|
|
|
2009-05-18 17:51:59 +04:00
|
|
|
s = (VirtIOBlock *)virtio_common_init("virtio-blk", VIRTIO_ID_BLOCK,
|
2010-02-11 01:36:49 +03:00
|
|
|
sizeof(struct virtio_blk_config),
|
2009-05-18 17:51:59 +04:00
|
|
|
sizeof(VirtIOBlock));
|
2008-12-04 22:52:44 +03:00
|
|
|
|
|
|
|
s->vdev.get_config = virtio_blk_update_config;
|
|
|
|
s->vdev.get_features = virtio_blk_get_features;
|
|
|
|
s->vdev.reset = virtio_blk_reset;
|
2010-05-05 18:36:52 +04:00
|
|
|
s->bs = conf->bs;
|
2010-02-11 01:37:25 +03:00
|
|
|
s->conf = conf;
|
2009-01-22 22:52:25 +03:00
|
|
|
s->rq = NULL;
|
2010-05-27 18:20:33 +04:00
|
|
|
s->sector_mask = (s->conf->logical_block_size / BDRV_SECTOR_SIZE) - 1;
|
2008-12-04 22:52:44 +03:00
|
|
|
bdrv_guess_geometry(s->bs, &cylinders, &heads, &secs);
|
|
|
|
|
2010-07-02 21:44:25 +04:00
|
|
|
/* NB: per existing s/n string convention the string is terminated
|
|
|
|
* by '\0' only when less than sizeof (s->sn)
|
|
|
|
*/
|
|
|
|
dinfo = drive_get_by_blockdev(s->bs);
|
|
|
|
strncpy(s->sn, dinfo->serial, sizeof (s->sn));
|
|
|
|
|
2008-12-04 22:52:44 +03:00
|
|
|
s->vq = virtio_add_queue(&s->vdev, 128, virtio_blk_handle_output);
|
|
|
|
|
2009-01-22 22:52:25 +03:00
|
|
|
qemu_add_vm_change_state_handler(virtio_blk_dma_restart_cb, s);
|
2010-07-20 21:14:22 +04:00
|
|
|
s->qdev = dev;
|
2010-06-25 21:09:07 +04:00
|
|
|
register_savevm(dev, "virtio-blk", virtio_blk_id++, 2,
|
2008-12-04 22:52:44 +03:00
|
|
|
virtio_blk_save, virtio_blk_load, s);
|
block: Fix virtual media change for if=none
BlockDriverState member removable controls whether virtual media
change (monitor commands change, eject) is allowed. It is set when
the "type hint" is BDRV_TYPE_CDROM or BDRV_TYPE_FLOPPY.
The type hint is only set by drive_init(). It sets BDRV_TYPE_FLOPPY
for if=floppy. It sets BDRV_TYPE_CDROM for media=cdrom and if=ide,
scsi, xen, or none.
if=ide and if=scsi work, because the type hint makes it a CD-ROM.
if=xen likewise, I think.
For the same reason, if=none works when it's used by ide-drive or
scsi-disk. For other guest devices, there are problems:
* fdc: you can't change virtual media
$ qemu [...] -drive if=none,id=foo,... -global isa-fdc.driveA=foo
QEMU 0.12.50 monitor - type 'help' for more information
(qemu) eject foo
Device 'foo' is not removable
unless you add media=cdrom, but that makes it readonly.
* virtio: if you add media=cdrom, you can change virtual media. If
you eject, the guest gets I/O errors. If you change, the guest sees
the drive's contents suddenly change.
* scsi-generic: if you add media=cdrom, you can change virtual media.
I didn't test what that does to the guest or the physical device,
but it can't be pretty.
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2010-06-25 15:42:14 +04:00
|
|
|
bdrv_set_removable(s->bs, 0);
|
2010-09-13 01:43:39 +04:00
|
|
|
s->bs->buffer_alignment = conf->logical_block_size;
|
2008-12-04 22:52:44 +03:00
|
|
|
|
2010-12-08 14:35:05 +03:00
|
|
|
add_boot_device_path(conf->bootindex, dev, "/disk@0,0");
|
|
|
|
|
2009-05-18 17:51:59 +04:00
|
|
|
return &s->vdev;
|
2008-12-04 22:52:44 +03:00
|
|
|
}
|
2010-07-20 21:14:22 +04:00
|
|
|
|
|
|
|
void virtio_blk_exit(VirtIODevice *vdev)
|
|
|
|
{
|
|
|
|
VirtIOBlock *s = to_virtio_blk(vdev);
|
|
|
|
unregister_savevm(s->qdev, "virtio-blk", s);
|
|
|
|
}
|