Add virtio-blk support

Virtio-blk is a paravirtual block device based on VirtIO.  It can be used by
specifying the if=virtio parameter to the -drive parameter.

When using -enable-kvm, it can achieve very good performance compared to IDE or
SCSI.

Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>



git-svn-id: svn://svn.savannah.nongnu.org/qemu/trunk@5870 c046a42c-6fe2-441c-8c8c-71466251a162
This commit is contained in:
aliguori 2008-12-04 19:52:44 +00:00
parent 967f97fa00
commit 6e02c38dad
7 changed files with 346 additions and 5 deletions

View File

@ -665,7 +665,7 @@ OBJS+= fdc.o mc146818rtc.o serial.o i8259.o i8254.o pcspk.o pc.o
OBJS+= cirrus_vga.o apic.o parallel.o acpi.o piix_pci.o OBJS+= cirrus_vga.o apic.o parallel.o acpi.o piix_pci.o
OBJS+= usb-uhci.o vmmouse.o vmport.o vmware_vga.o OBJS+= usb-uhci.o vmmouse.o vmport.o vmware_vga.o
# virtio support # virtio support
OBJS+= virtio.o OBJS+= virtio.o virtio-blk.o
CPPFLAGS += -DHAS_AUDIO -DHAS_AUDIO_CHOICE CPPFLAGS += -DHAS_AUDIO -DHAS_AUDIO_CHOICE
endif endif
ifeq ($(TARGET_BASE_ARCH), ppc) ifeq ($(TARGET_BASE_ARCH), ppc)
@ -684,7 +684,7 @@ OBJS+= unin_pci.o ppc_chrp.o
# PowerPC 4xx boards # PowerPC 4xx boards
OBJS+= pflash_cfi02.o ppc4xx_devs.o ppc4xx_pci.o ppc405_uc.o ppc405_boards.o OBJS+= pflash_cfi02.o ppc4xx_devs.o ppc4xx_pci.o ppc405_uc.o ppc405_boards.o
# virtio support # virtio support
OBJS+= virtio.o OBJS+= virtio.o virtio-blk.o
endif endif
ifeq ($(TARGET_BASE_ARCH), mips) ifeq ($(TARGET_BASE_ARCH), mips)
OBJS+= mips_r4k.o mips_jazz.o mips_malta.o mips_mipssim.o OBJS+= mips_r4k.o mips_jazz.o mips_malta.o mips_mipssim.o

13
hw/pc.c
View File

@ -33,6 +33,7 @@
#include "boards.h" #include "boards.h"
#include "console.h" #include "console.h"
#include "fw_cfg.h" #include "fw_cfg.h"
#include "virtio-blk.h"
/* output Bochs bios info messages */ /* output Bochs bios info messages */
//#define DEBUG_BIOS //#define DEBUG_BIOS
@ -1092,6 +1093,18 @@ static void pc_init1(ram_addr_t ram_size, int vga_ram_size,
} }
} }
} }
/* Add virtio block devices */
if (pci_enabled) {
int index;
int unit_id = 0;
while ((index = drive_get_index(IF_VIRTIO, 0, unit_id)) != -1) {
virtio_blk_init(pci_bus, 0x1AF4, 0x1001,
drives_table[index].bdrv);
unit_id++;
}
}
} }
static void pc_init_pci(ram_addr_t ram_size, int vga_ram_size, static void pc_init_pci(ram_addr_t ram_size, int vga_ram_size,

248
hw/virtio-blk.c Normal file
View File

@ -0,0 +1,248 @@
/*
* Virtio Block Device
*
* Copyright IBM, Corp. 2007
*
* Authors:
* Anthony Liguori <aliguori@us.ibm.com>
*
* This work is licensed under the terms of the GNU GPL, version 2. See
* the COPYING file in the top-level directory.
*
*/
#include "virtio-blk.h"
#include "block_int.h"
typedef struct VirtIOBlock
{
VirtIODevice vdev;
BlockDriverState *bs;
VirtQueue *vq;
} VirtIOBlock;
static VirtIOBlock *to_virtio_blk(VirtIODevice *vdev)
{
return (VirtIOBlock *)vdev;
}
typedef struct VirtIOBlockReq
{
VirtIOBlock *dev;
VirtQueueElement elem;
struct virtio_blk_inhdr *in;
struct virtio_blk_outhdr *out;
size_t size;
uint8_t *buffer;
} VirtIOBlockReq;
static void virtio_blk_rw_complete(void *opaque, int ret)
{
VirtIOBlockReq *req = opaque;
VirtIOBlock *s = req->dev;
/* Copy read data to the guest */
if (!ret && !(req->out->type & VIRTIO_BLK_T_OUT)) {
size_t offset = 0;
int i;
for (i = 0; i < req->elem.in_num - 1; i++) {
size_t len;
/* Be pretty defensive wrt malicious guests */
len = MIN(req->elem.in_sg[i].iov_len,
req->size - offset);
memcpy(req->elem.in_sg[i].iov_base,
req->buffer + offset,
len);
offset += len;
}
}
req->in->status = ret ? VIRTIO_BLK_S_IOERR : VIRTIO_BLK_S_OK;
virtqueue_push(s->vq, &req->elem, req->size + sizeof(*req->in));
virtio_notify(&s->vdev, s->vq);
qemu_free(req->buffer);
qemu_free(req);
}
static VirtIOBlockReq *virtio_blk_get_request(VirtIOBlock *s)
{
VirtIOBlockReq *req;
req = qemu_mallocz(sizeof(*req));
if (req == NULL)
return NULL;
req->dev = s;
if (!virtqueue_pop(s->vq, &req->elem)) {
qemu_free(req);
return NULL;
}
return req;
}
static void virtio_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq)
{
VirtIOBlock *s = to_virtio_blk(vdev);
VirtIOBlockReq *req;
while ((req = virtio_blk_get_request(s))) {
int i;
if (req->elem.out_num < 1 || req->elem.in_num < 1) {
fprintf(stderr, "virtio-blk missing headers\n");
exit(1);
}
if (req->elem.out_sg[0].iov_len < sizeof(*req->out) ||
req->elem.in_sg[req->elem.in_num - 1].iov_len < sizeof(*req->in)) {
fprintf(stderr, "virtio-blk header not in correct element\n");
exit(1);
}
req->out = (void *)req->elem.out_sg[0].iov_base;
req->in = (void *)req->elem.in_sg[req->elem.in_num - 1].iov_base;
if (req->out->type & VIRTIO_BLK_T_SCSI_CMD) {
unsigned int len = sizeof(*req->in);
req->in->status = VIRTIO_BLK_S_UNSUPP;
virtqueue_push(vq, &req->elem, len);
virtio_notify(vdev, vq);
qemu_free(req);
} else if (req->out->type & VIRTIO_BLK_T_OUT) {
size_t offset;
for (i = 1; i < req->elem.out_num; i++)
req->size += req->elem.out_sg[i].iov_len;
req->buffer = qemu_memalign(512, req->size);
if (req->buffer == NULL) {
qemu_free(req);
break;
}
/* We copy the data from the SG list to avoid splitting up the request. This helps
performance a lot until we can pass full sg lists as AIO operations */
offset = 0;
for (i = 1; i < req->elem.out_num; i++) {
size_t len;
len = MIN(req->elem.out_sg[i].iov_len,
req->size - offset);
memcpy(req->buffer + offset,
req->elem.out_sg[i].iov_base,
len);
offset += len;
}
bdrv_aio_write(s->bs, req->out->sector,
req->buffer,
req->size / 512,
virtio_blk_rw_complete,
req);
} else {
for (i = 0; i < req->elem.in_num - 1; i++)
req->size += req->elem.in_sg[i].iov_len;
req->buffer = qemu_memalign(512, req->size);
if (req->buffer == NULL) {
qemu_free(req);
break;
}
bdrv_aio_read(s->bs, req->out->sector,
req->buffer,
req->size / 512,
virtio_blk_rw_complete,
req);
}
}
/*
* FIXME: Want to check for completions before returning to guest mode,
* so cached reads and writes are reported as quickly as possible. But
* that should be done in the generic block layer.
*/
}
static void virtio_blk_reset(VirtIODevice *vdev)
{
/*
* This should cancel pending requests, but can't do nicely until there
* are per-device request lists.
*/
qemu_aio_flush();
}
static void virtio_blk_update_config(VirtIODevice *vdev, uint8_t *config)
{
VirtIOBlock *s = to_virtio_blk(vdev);
struct virtio_blk_config blkcfg;
uint64_t capacity;
int cylinders, heads, secs;
bdrv_get_geometry(s->bs, &capacity);
bdrv_get_geometry_hint(s->bs, &cylinders, &heads, &secs);
stq_raw(&blkcfg.capacity, capacity);
stl_raw(&blkcfg.seg_max, 128 - 2);
stw_raw(&blkcfg.cylinders, cylinders);
blkcfg.heads = heads;
blkcfg.sectors = secs;
memcpy(config, &blkcfg, sizeof(blkcfg));
}
static uint32_t virtio_blk_get_features(VirtIODevice *vdev)
{
return (1 << VIRTIO_BLK_F_SEG_MAX | 1 << VIRTIO_BLK_F_GEOMETRY);
}
static void virtio_blk_save(QEMUFile *f, void *opaque)
{
VirtIOBlock *s = opaque;
virtio_save(&s->vdev, f);
}
static int virtio_blk_load(QEMUFile *f, void *opaque, int version_id)
{
VirtIOBlock *s = opaque;
if (version_id != 1)
return -EINVAL;
virtio_load(&s->vdev, f);
return 0;
}
void *virtio_blk_init(PCIBus *bus, uint16_t vendor, uint16_t device,
BlockDriverState *bs)
{
VirtIOBlock *s;
int cylinders, heads, secs;
static int virtio_blk_id;
s = (VirtIOBlock *)virtio_init_pci(bus, "virtio-blk", vendor, device,
0, VIRTIO_ID_BLOCK,
0x01, 0x80, 0x00,
sizeof(struct virtio_blk_config), sizeof(VirtIOBlock));
if (!s)
return NULL;
s->vdev.get_config = virtio_blk_update_config;
s->vdev.get_features = virtio_blk_get_features;
s->vdev.reset = virtio_blk_reset;
s->bs = bs;
bdrv_guess_geometry(s->bs, &cylinders, &heads, &secs);
bdrv_set_geometry_hint(s->bs, cylinders, heads, secs);
s->vq = virtio_add_queue(&s->vdev, 128, virtio_blk_handle_output);
register_savevm("virtio-blk", virtio_blk_id++, 1,
virtio_blk_save, virtio_blk_load, s);
return s;
}

76
hw/virtio-blk.h Normal file
View File

@ -0,0 +1,76 @@
/*
* Virtio Block Device
*
* Copyright IBM, Corp. 2007
*
* Authors:
* Anthony Liguori <aliguori@us.ibm.com>
*
* This work is licensed under the terms of the GNU GPL, version 2. See
* the COPYING file in the top-level directory.
*
*/
#ifndef _QEMU_VIRTIO_BLK_H
#define _QEMU_VIRTIO_BLK_H
#include "virtio.h"
#include "block.h"
#include "pci.h"
/* from Linux's linux/virtio_blk.h */
/* The ID for virtio_block */
#define VIRTIO_ID_BLOCK 2
/* Feature bits */
#define VIRTIO_BLK_F_BARRIER 0 /* Does host support barriers? */
#define VIRTIO_BLK_F_SIZE_MAX 1 /* Indicates maximum segment size */
#define VIRTIO_BLK_F_SEG_MAX 2 /* Indicates maximum # of segments */
#define VIRTIO_BLK_F_GEOMETRY 4 /* Indicates support of legacy geometry */
struct virtio_blk_config
{
uint64_t capacity;
uint32_t size_max;
uint32_t seg_max;
uint16_t cylinders;
uint8_t heads;
uint8_t sectors;
} __attribute__((packed));
/* These two define direction. */
#define VIRTIO_BLK_T_IN 0
#define VIRTIO_BLK_T_OUT 1
/* This bit says it's a scsi command, not an actual read or write. */
#define VIRTIO_BLK_T_SCSI_CMD 2
/* Barrier before this op. */
#define VIRTIO_BLK_T_BARRIER 0x80000000
/* This is the first element of the read scatter-gather list. */
struct virtio_blk_outhdr
{
/* VIRTIO_BLK_T* */
uint32_t type;
/* io priority. */
uint32_t ioprio;
/* Sector (ie. 512 byte offset) */
uint64_t sector;
};
#define VIRTIO_BLK_S_OK 0
#define VIRTIO_BLK_S_IOERR 1
#define VIRTIO_BLK_S_UNSUPP 2
/* This is the first element of the write scatter-gather list */
struct virtio_blk_inhdr
{
unsigned char status;
};
void *virtio_blk_init(PCIBus *bus, uint16_t vendor, uint16_t device,
BlockDriverState *bs);
#endif

View File

@ -253,7 +253,7 @@ this drive. If the filename contains comma, you must double it
(for instance, "file=my,,file" to use file "my,file"). (for instance, "file=my,,file" to use file "my,file").
@item if=@var{interface} @item if=@var{interface}
This option defines on which type on interface the drive is connected. This option defines on which type on interface the drive is connected.
Available types are: ide, scsi, sd, mtd, floppy, pflash. Available types are: ide, scsi, sd, mtd, floppy, pflash, virtio.
@item bus=@var{bus},unit=@var{unit} @item bus=@var{bus},unit=@var{unit}
These options define where is connected the drive by defining the bus number and These options define where is connected the drive by defining the bus number and
the unit id. the unit id.

View File

@ -123,7 +123,7 @@ extern unsigned int nb_prom_envs;
#endif #endif
typedef enum { typedef enum {
IF_IDE, IF_SCSI, IF_FLOPPY, IF_PFLASH, IF_MTD, IF_SD IF_IDE, IF_SCSI, IF_FLOPPY, IF_PFLASH, IF_MTD, IF_SD, IF_VIRTIO
} BlockInterfaceType; } BlockInterfaceType;
typedef struct DriveInfo { typedef struct DriveInfo {

6
vl.c
View File

@ -2267,7 +2267,10 @@ static int drive_init(struct drive_opt *arg, int snapshot,
} else if (!strcmp(buf, "sd")) { } else if (!strcmp(buf, "sd")) {
type = IF_SD; type = IF_SD;
max_devs = 0; max_devs = 0;
} else { } else if (!strcmp(buf, "virtio")) {
type = IF_VIRTIO;
max_devs = 0;
} else {
fprintf(stderr, "qemu: '%s' unsupported bus type '%s'\n", str, buf); fprintf(stderr, "qemu: '%s' unsupported bus type '%s'\n", str, buf);
return -1; return -1;
} }
@ -2474,6 +2477,7 @@ static int drive_init(struct drive_opt *arg, int snapshot,
break; break;
case IF_PFLASH: case IF_PFLASH:
case IF_MTD: case IF_MTD:
case IF_VIRTIO:
break; break;
} }
if (!file[0]) if (!file[0])