contrib/vhost-user-blk: introduce a vhost-user-blk sample application
This commit introduces a vhost-user-blk backend device, it uses UNIX domain socket to communicate with QEMU. The vhost-user-blk sample application should be used with QEMU vhost-user-blk-pci device. To use it, complie with: make vhost-user-blk and start like this: vhost-user-blk -b /dev/sdb -s /path/vhost.socket Signed-off-by: Changpeng Liu <changpeng.liu@intel.com> Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com> Reviewed-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
This commit is contained in:
parent
0bc24d831e
commit
406d2aa2cc
1
.gitignore
vendored
1
.gitignore
vendored
@ -54,6 +54,7 @@
|
||||
/module_block.h
|
||||
/scsi/qemu-pr-helper
|
||||
/vhost-user-scsi
|
||||
/vhost-user-blk
|
||||
/fsdev/virtfs-proxy-helper
|
||||
*.tmp
|
||||
*.[1-9]
|
||||
|
3
Makefile
3
Makefile
@ -334,6 +334,7 @@ dummy := $(call unnest-vars,, \
|
||||
ivshmem-server-obj-y \
|
||||
libvhost-user-obj-y \
|
||||
vhost-user-scsi-obj-y \
|
||||
vhost-user-blk-obj-y \
|
||||
qga-vss-dll-obj-y \
|
||||
block-obj-y \
|
||||
block-obj-m \
|
||||
@ -565,6 +566,8 @@ ivshmem-server$(EXESUF): $(ivshmem-server-obj-y) $(COMMON_LDADDS)
|
||||
endif
|
||||
vhost-user-scsi$(EXESUF): $(vhost-user-scsi-obj-y) libvhost-user.a
|
||||
$(call LINK, $^)
|
||||
vhost-user-blk$(EXESUF): $(vhost-user-blk-obj-y) libvhost-user.a
|
||||
$(call LINK, $^)
|
||||
|
||||
module_block.h: $(SRC_PATH)/scripts/modules/module_block.py config-host.mak
|
||||
$(call quiet-command,$(PYTHON) $< $@ \
|
||||
|
@ -115,6 +115,7 @@ libvhost-user-obj-y = contrib/libvhost-user/
|
||||
vhost-user-scsi.o-cflags := $(LIBISCSI_CFLAGS)
|
||||
vhost-user-scsi.o-libs := $(LIBISCSI_LIBS)
|
||||
vhost-user-scsi-obj-y = contrib/vhost-user-scsi/
|
||||
vhost-user-blk-obj-y = contrib/vhost-user-blk/
|
||||
|
||||
######################################################################
|
||||
trace-events-subdirs =
|
||||
|
1
contrib/vhost-user-blk/Makefile.objs
Normal file
1
contrib/vhost-user-blk/Makefile.objs
Normal file
@ -0,0 +1 @@
|
||||
vhost-user-blk-obj-y = vhost-user-blk.o
|
545
contrib/vhost-user-blk/vhost-user-blk.c
Normal file
545
contrib/vhost-user-blk/vhost-user-blk.c
Normal file
@ -0,0 +1,545 @@
|
||||
/*
|
||||
* vhost-user-blk sample application
|
||||
*
|
||||
* Copyright (c) 2017 Intel Corporation. All rights reserved.
|
||||
*
|
||||
* Author:
|
||||
* Changpeng Liu <changpeng.liu@intel.com>
|
||||
*
|
||||
* This work is based on the "vhost-user-scsi" sample and "virtio-blk" driver
|
||||
* implementation by:
|
||||
* Felipe Franciosi <felipe@nutanix.com>
|
||||
* Anthony Liguori <aliguori@us.ibm.com>
|
||||
*
|
||||
* This work is licensed under the terms of the GNU GPL, version 2 only.
|
||||
* See the COPYING file in the top-level directory.
|
||||
*/
|
||||
|
||||
#include "qemu/osdep.h"
|
||||
#include "standard-headers/linux/virtio_blk.h"
|
||||
#include "contrib/libvhost-user/libvhost-user-glib.h"
|
||||
#include "contrib/libvhost-user/libvhost-user.h"
|
||||
|
||||
#include <glib.h>
|
||||
|
||||
struct virtio_blk_inhdr {
|
||||
unsigned char status;
|
||||
};
|
||||
|
||||
/* vhost user block device */
|
||||
typedef struct VubDev {
|
||||
VugDev parent;
|
||||
int blk_fd;
|
||||
struct virtio_blk_config blkcfg;
|
||||
char *blk_name;
|
||||
GMainLoop *loop;
|
||||
} VubDev;
|
||||
|
||||
typedef struct VubReq {
|
||||
VuVirtqElement *elem;
|
||||
int64_t sector_num;
|
||||
size_t size;
|
||||
struct virtio_blk_inhdr *in;
|
||||
struct virtio_blk_outhdr *out;
|
||||
VubDev *vdev_blk;
|
||||
struct VuVirtq *vq;
|
||||
} VubReq;
|
||||
|
||||
/* refer util/iov.c */
|
||||
static size_t vub_iov_size(const struct iovec *iov,
|
||||
const unsigned int iov_cnt)
|
||||
{
|
||||
size_t len;
|
||||
unsigned int i;
|
||||
|
||||
len = 0;
|
||||
for (i = 0; i < iov_cnt; i++) {
|
||||
len += iov[i].iov_len;
|
||||
}
|
||||
return len;
|
||||
}
|
||||
|
||||
static void vub_panic_cb(VuDev *vu_dev, const char *buf)
|
||||
{
|
||||
VugDev *gdev;
|
||||
VubDev *vdev_blk;
|
||||
|
||||
assert(vu_dev);
|
||||
|
||||
gdev = container_of(vu_dev, VugDev, parent);
|
||||
vdev_blk = container_of(gdev, VubDev, parent);
|
||||
if (buf) {
|
||||
g_warning("vu_panic: %s", buf);
|
||||
}
|
||||
|
||||
g_main_loop_quit(vdev_blk->loop);
|
||||
}
|
||||
|
||||
static void vub_req_complete(VubReq *req)
|
||||
{
|
||||
VugDev *gdev = &req->vdev_blk->parent;
|
||||
VuDev *vu_dev = &gdev->parent;
|
||||
|
||||
/* IO size with 1 extra status byte */
|
||||
vu_queue_push(vu_dev, req->vq, req->elem,
|
||||
req->size + 1);
|
||||
vu_queue_notify(vu_dev, req->vq);
|
||||
|
||||
if (req->elem) {
|
||||
free(req->elem);
|
||||
}
|
||||
|
||||
g_free(req);
|
||||
}
|
||||
|
||||
static int vub_open(const char *file_name, bool wce)
|
||||
{
|
||||
int fd;
|
||||
int flags = O_RDWR;
|
||||
|
||||
if (!wce) {
|
||||
flags |= O_DIRECT;
|
||||
}
|
||||
|
||||
fd = open(file_name, flags);
|
||||
if (fd < 0) {
|
||||
fprintf(stderr, "Cannot open file %s, %s\n", file_name,
|
||||
strerror(errno));
|
||||
return -1;
|
||||
}
|
||||
|
||||
return fd;
|
||||
}
|
||||
|
||||
static ssize_t
|
||||
vub_readv(VubReq *req, struct iovec *iov, uint32_t iovcnt)
|
||||
{
|
||||
VubDev *vdev_blk = req->vdev_blk;
|
||||
ssize_t rc;
|
||||
|
||||
if (!iovcnt) {
|
||||
fprintf(stderr, "Invalid Read IOV count\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
req->size = vub_iov_size(iov, iovcnt);
|
||||
rc = preadv(vdev_blk->blk_fd, iov, iovcnt, req->sector_num * 512);
|
||||
if (rc < 0) {
|
||||
fprintf(stderr, "%s, Sector %"PRIu64", Size %lu failed with %s\n",
|
||||
vdev_blk->blk_name, req->sector_num, req->size,
|
||||
strerror(errno));
|
||||
return -1;
|
||||
}
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
static ssize_t
|
||||
vub_writev(VubReq *req, struct iovec *iov, uint32_t iovcnt)
|
||||
{
|
||||
VubDev *vdev_blk = req->vdev_blk;
|
||||
ssize_t rc;
|
||||
|
||||
if (!iovcnt) {
|
||||
fprintf(stderr, "Invalid Write IOV count\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
req->size = vub_iov_size(iov, iovcnt);
|
||||
rc = pwritev(vdev_blk->blk_fd, iov, iovcnt, req->sector_num * 512);
|
||||
if (rc < 0) {
|
||||
fprintf(stderr, "%s, Sector %"PRIu64", Size %lu failed with %s\n",
|
||||
vdev_blk->blk_name, req->sector_num, req->size,
|
||||
strerror(errno));
|
||||
return -1;
|
||||
}
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
static void
|
||||
vub_flush(VubReq *req)
|
||||
{
|
||||
VubDev *vdev_blk = req->vdev_blk;
|
||||
|
||||
fdatasync(vdev_blk->blk_fd);
|
||||
}
|
||||
|
||||
static int vub_virtio_process_req(VubDev *vdev_blk,
|
||||
VuVirtq *vq)
|
||||
{
|
||||
VugDev *gdev = &vdev_blk->parent;
|
||||
VuDev *vu_dev = &gdev->parent;
|
||||
VuVirtqElement *elem;
|
||||
uint32_t type;
|
||||
unsigned in_num;
|
||||
unsigned out_num;
|
||||
VubReq *req;
|
||||
|
||||
elem = vu_queue_pop(vu_dev, vq, sizeof(VuVirtqElement) + sizeof(VubReq));
|
||||
if (!elem) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* refer to hw/block/virtio_blk.c */
|
||||
if (elem->out_num < 1 || elem->in_num < 1) {
|
||||
fprintf(stderr, "virtio-blk request missing headers\n");
|
||||
free(elem);
|
||||
return -1;
|
||||
}
|
||||
|
||||
req = g_new0(VubReq, 1);
|
||||
req->vdev_blk = vdev_blk;
|
||||
req->vq = vq;
|
||||
req->elem = elem;
|
||||
|
||||
in_num = elem->in_num;
|
||||
out_num = elem->out_num;
|
||||
|
||||
/* don't support VIRTIO_F_ANY_LAYOUT and virtio 1.0 only */
|
||||
if (elem->out_sg[0].iov_len < sizeof(struct virtio_blk_outhdr)) {
|
||||
fprintf(stderr, "Invalid outhdr size\n");
|
||||
goto err;
|
||||
}
|
||||
req->out = (struct virtio_blk_outhdr *)elem->out_sg[0].iov_base;
|
||||
out_num--;
|
||||
|
||||
if (elem->in_sg[in_num - 1].iov_len < sizeof(struct virtio_blk_inhdr)) {
|
||||
fprintf(stderr, "Invalid inhdr size\n");
|
||||
goto err;
|
||||
}
|
||||
req->in = (struct virtio_blk_inhdr *)elem->in_sg[in_num - 1].iov_base;
|
||||
in_num--;
|
||||
|
||||
type = le32toh(req->out->type);
|
||||
switch (type & ~(VIRTIO_BLK_T_OUT | VIRTIO_BLK_T_BARRIER)) {
|
||||
case VIRTIO_BLK_T_IN: {
|
||||
ssize_t ret = 0;
|
||||
bool is_write = type & VIRTIO_BLK_T_OUT;
|
||||
req->sector_num = le64toh(req->out->sector);
|
||||
if (is_write) {
|
||||
ret = vub_writev(req, &elem->out_sg[1], out_num);
|
||||
} else {
|
||||
ret = vub_readv(req, &elem->in_sg[0], in_num);
|
||||
}
|
||||
if (ret >= 0) {
|
||||
req->in->status = VIRTIO_BLK_S_OK;
|
||||
} else {
|
||||
req->in->status = VIRTIO_BLK_S_IOERR;
|
||||
}
|
||||
vub_req_complete(req);
|
||||
break;
|
||||
}
|
||||
case VIRTIO_BLK_T_FLUSH: {
|
||||
vub_flush(req);
|
||||
req->in->status = VIRTIO_BLK_S_OK;
|
||||
vub_req_complete(req);
|
||||
break;
|
||||
}
|
||||
case VIRTIO_BLK_T_GET_ID: {
|
||||
size_t size = MIN(vub_iov_size(&elem->in_sg[0], in_num),
|
||||
VIRTIO_BLK_ID_BYTES);
|
||||
snprintf(elem->in_sg[0].iov_base, size, "%s", "vhost_user_blk");
|
||||
req->in->status = VIRTIO_BLK_S_OK;
|
||||
req->size = elem->in_sg[0].iov_len;
|
||||
vub_req_complete(req);
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
req->in->status = VIRTIO_BLK_S_UNSUPP;
|
||||
vub_req_complete(req);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
err:
|
||||
free(elem);
|
||||
g_free(req);
|
||||
return -1;
|
||||
}
|
||||
|
||||
static void vub_process_vq(VuDev *vu_dev, int idx)
|
||||
{
|
||||
VugDev *gdev;
|
||||
VubDev *vdev_blk;
|
||||
VuVirtq *vq;
|
||||
int ret;
|
||||
|
||||
if ((idx < 0) || (idx >= VHOST_MAX_NR_VIRTQUEUE)) {
|
||||
fprintf(stderr, "VQ Index out of range: %d\n", idx);
|
||||
vub_panic_cb(vu_dev, NULL);
|
||||
return;
|
||||
}
|
||||
|
||||
gdev = container_of(vu_dev, VugDev, parent);
|
||||
vdev_blk = container_of(gdev, VubDev, parent);
|
||||
assert(vdev_blk);
|
||||
|
||||
vq = vu_get_queue(vu_dev, idx);
|
||||
assert(vq);
|
||||
|
||||
while (1) {
|
||||
ret = vub_virtio_process_req(vdev_blk, vq);
|
||||
if (ret) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void vub_queue_set_started(VuDev *vu_dev, int idx, bool started)
|
||||
{
|
||||
VuVirtq *vq;
|
||||
|
||||
assert(vu_dev);
|
||||
|
||||
vq = vu_get_queue(vu_dev, idx);
|
||||
vu_set_queue_handler(vu_dev, vq, started ? vub_process_vq : NULL);
|
||||
}
|
||||
|
||||
static uint64_t
|
||||
vub_get_features(VuDev *dev)
|
||||
{
|
||||
return 1ull << VIRTIO_BLK_F_SIZE_MAX |
|
||||
1ull << VIRTIO_BLK_F_SEG_MAX |
|
||||
1ull << VIRTIO_BLK_F_TOPOLOGY |
|
||||
1ull << VIRTIO_BLK_F_BLK_SIZE |
|
||||
1ull << VIRTIO_BLK_F_FLUSH |
|
||||
1ull << VIRTIO_BLK_F_CONFIG_WCE |
|
||||
1ull << VIRTIO_F_VERSION_1 |
|
||||
1ull << VHOST_USER_F_PROTOCOL_FEATURES;
|
||||
}
|
||||
|
||||
static int
|
||||
vub_get_config(VuDev *vu_dev, uint8_t *config, uint32_t len)
|
||||
{
|
||||
VugDev *gdev;
|
||||
VubDev *vdev_blk;
|
||||
|
||||
gdev = container_of(vu_dev, VugDev, parent);
|
||||
vdev_blk = container_of(gdev, VubDev, parent);
|
||||
memcpy(config, &vdev_blk->blkcfg, len);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
vub_set_config(VuDev *vu_dev, const uint8_t *data,
|
||||
uint32_t offset, uint32_t size, uint32_t flags)
|
||||
{
|
||||
VugDev *gdev;
|
||||
VubDev *vdev_blk;
|
||||
uint8_t wce;
|
||||
int fd;
|
||||
|
||||
/* don't support live migration */
|
||||
if (flags != VHOST_SET_CONFIG_TYPE_MASTER) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
gdev = container_of(vu_dev, VugDev, parent);
|
||||
vdev_blk = container_of(gdev, VubDev, parent);
|
||||
|
||||
if (offset != offsetof(struct virtio_blk_config, wce) ||
|
||||
size != 1) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
wce = *data;
|
||||
if (wce == vdev_blk->blkcfg.wce) {
|
||||
/* Do nothing as same with old configuration */
|
||||
return 0;
|
||||
}
|
||||
|
||||
vdev_blk->blkcfg.wce = wce;
|
||||
fprintf(stdout, "Write Cache Policy Changed\n");
|
||||
if (vdev_blk->blk_fd >= 0) {
|
||||
close(vdev_blk->blk_fd);
|
||||
vdev_blk->blk_fd = -1;
|
||||
}
|
||||
|
||||
fd = vub_open(vdev_blk->blk_name, wce);
|
||||
if (fd < 0) {
|
||||
fprintf(stderr, "Error to open block device %s\n", vdev_blk->blk_name);
|
||||
vdev_blk->blk_fd = -1;
|
||||
return -1;
|
||||
}
|
||||
vdev_blk->blk_fd = fd;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const VuDevIface vub_iface = {
|
||||
.get_features = vub_get_features,
|
||||
.queue_set_started = vub_queue_set_started,
|
||||
.get_config = vub_get_config,
|
||||
.set_config = vub_set_config,
|
||||
};
|
||||
|
||||
static int unix_sock_new(char *unix_fn)
|
||||
{
|
||||
int sock;
|
||||
struct sockaddr_un un;
|
||||
size_t len;
|
||||
|
||||
assert(unix_fn);
|
||||
|
||||
sock = socket(AF_UNIX, SOCK_STREAM, 0);
|
||||
if (sock <= 0) {
|
||||
perror("socket");
|
||||
return -1;
|
||||
}
|
||||
|
||||
un.sun_family = AF_UNIX;
|
||||
(void)snprintf(un.sun_path, sizeof(un.sun_path), "%s", unix_fn);
|
||||
len = sizeof(un.sun_family) + strlen(un.sun_path);
|
||||
|
||||
(void)unlink(unix_fn);
|
||||
if (bind(sock, (struct sockaddr *)&un, len) < 0) {
|
||||
perror("bind");
|
||||
goto fail;
|
||||
}
|
||||
|
||||
if (listen(sock, 1) < 0) {
|
||||
perror("listen");
|
||||
goto fail;
|
||||
}
|
||||
|
||||
return sock;
|
||||
|
||||
fail:
|
||||
(void)close(sock);
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
static void vub_free(struct VubDev *vdev_blk)
|
||||
{
|
||||
if (!vdev_blk) {
|
||||
return;
|
||||
}
|
||||
|
||||
g_main_loop_unref(vdev_blk->loop);
|
||||
if (vdev_blk->blk_fd >= 0) {
|
||||
close(vdev_blk->blk_fd);
|
||||
}
|
||||
g_free(vdev_blk);
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
vub_get_blocksize(int fd)
|
||||
{
|
||||
uint32_t blocksize = 512;
|
||||
|
||||
#if defined(__linux__) && defined(BLKSSZGET)
|
||||
if (ioctl(fd, BLKSSZGET, &blocksize) == 0) {
|
||||
return blocklen;
|
||||
}
|
||||
#endif
|
||||
|
||||
return blocksize;
|
||||
}
|
||||
|
||||
static void
|
||||
vub_initialize_config(int fd, struct virtio_blk_config *config)
|
||||
{
|
||||
off64_t capacity;
|
||||
|
||||
capacity = lseek64(fd, 0, SEEK_END);
|
||||
config->capacity = capacity >> 9;
|
||||
config->blk_size = vub_get_blocksize(fd);
|
||||
config->size_max = 65536;
|
||||
config->seg_max = 128 - 2;
|
||||
config->min_io_size = 1;
|
||||
config->opt_io_size = 1;
|
||||
config->num_queues = 1;
|
||||
}
|
||||
|
||||
static VubDev *
|
||||
vub_new(char *blk_file)
|
||||
{
|
||||
VubDev *vdev_blk;
|
||||
|
||||
vdev_blk = g_new0(VubDev, 1);
|
||||
vdev_blk->loop = g_main_loop_new(NULL, FALSE);
|
||||
vdev_blk->blk_fd = vub_open(blk_file, 0);
|
||||
if (vdev_blk->blk_fd < 0) {
|
||||
fprintf(stderr, "Error to open block device %s\n", blk_file);
|
||||
vub_free(vdev_blk);
|
||||
return NULL;
|
||||
}
|
||||
vdev_blk->blkcfg.wce = 0;
|
||||
vdev_blk->blk_name = blk_file;
|
||||
|
||||
/* fill virtio_blk_config with block parameters */
|
||||
vub_initialize_config(vdev_blk->blk_fd, &vdev_blk->blkcfg);
|
||||
|
||||
return vdev_blk;
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
int opt;
|
||||
char *unix_socket = NULL;
|
||||
char *blk_file = NULL;
|
||||
int lsock = -1, csock = -1;
|
||||
VubDev *vdev_blk = NULL;
|
||||
|
||||
while ((opt = getopt(argc, argv, "b:s:h")) != -1) {
|
||||
switch (opt) {
|
||||
case 'b':
|
||||
blk_file = g_strdup(optarg);
|
||||
break;
|
||||
case 's':
|
||||
unix_socket = g_strdup(optarg);
|
||||
break;
|
||||
case 'h':
|
||||
default:
|
||||
printf("Usage: %s [-b block device or file, -s UNIX domain socket]"
|
||||
" | [ -h ]\n", argv[0]);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (!unix_socket || !blk_file) {
|
||||
printf("Usage: %s [-b block device or file, -s UNIX domain socket] |"
|
||||
" [ -h ]\n", argv[0]);
|
||||
return -1;
|
||||
}
|
||||
|
||||
lsock = unix_sock_new(unix_socket);
|
||||
if (lsock < 0) {
|
||||
goto err;
|
||||
}
|
||||
|
||||
csock = accept(lsock, (void *)0, (void *)0);
|
||||
if (csock < 0) {
|
||||
fprintf(stderr, "Accept error %s\n", strerror(errno));
|
||||
goto err;
|
||||
}
|
||||
|
||||
vdev_blk = vub_new(blk_file);
|
||||
if (!vdev_blk) {
|
||||
goto err;
|
||||
}
|
||||
|
||||
vug_init(&vdev_blk->parent, csock, vub_panic_cb, &vub_iface);
|
||||
|
||||
g_main_loop_run(vdev_blk->loop);
|
||||
|
||||
vug_deinit(&vdev_blk->parent);
|
||||
|
||||
err:
|
||||
vub_free(vdev_blk);
|
||||
if (csock >= 0) {
|
||||
close(csock);
|
||||
}
|
||||
if (lsock >= 0) {
|
||||
close(lsock);
|
||||
}
|
||||
g_free(unix_socket);
|
||||
g_free(blk_file);
|
||||
|
||||
return 0;
|
||||
}
|
Loading…
Reference in New Issue
Block a user