raw-posix: Switch to bdrv_co_* interfaces
In order to use the modern byte-based .bdrv_co_preadv/pwritev() interface, this patch switches raw-posix to coroutine-based interfaces as a first step. In terms of semantics and performance, it doesn't make a difference with the existing code whether we go from a coroutine to a callback-based interface already in block/io.c or only in linux-aio.c As there have been concerns in the past that this change may be a step in the wrong direction with respect to a possible AIO fast path, the old callback-based interface for linux-aio is left around and can be reactivated when a fast path (e.g. directly from virtio-blk dataplane, bypassing the whole block layer) is implemented. Signed-off-by: Kevin Wolf <kwolf@redhat.com> Reviewed-by: Eric Blake <eblake@redhat.com> Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
This commit is contained in:
parent
9896c8765f
commit
2174f12bde
@ -11,8 +11,10 @@
|
||||
#include "qemu-common.h"
|
||||
#include "block/aio.h"
|
||||
#include "qemu/queue.h"
|
||||
#include "block/block.h"
|
||||
#include "block/raw-aio.h"
|
||||
#include "qemu/event_notifier.h"
|
||||
#include "qemu/coroutine.h"
|
||||
|
||||
#include <libaio.h>
|
||||
|
||||
@ -30,6 +32,7 @@
|
||||
|
||||
struct qemu_laiocb {
|
||||
BlockAIOCB common;
|
||||
Coroutine *co;
|
||||
LinuxAioState *ctx;
|
||||
struct iocb iocb;
|
||||
ssize_t ret;
|
||||
@ -88,9 +91,14 @@ static void qemu_laio_process_completion(struct qemu_laiocb *laiocb)
|
||||
}
|
||||
}
|
||||
}
|
||||
laiocb->common.cb(laiocb->common.opaque, ret);
|
||||
|
||||
laiocb->ret = ret;
|
||||
if (laiocb->co) {
|
||||
qemu_coroutine_enter(laiocb->co, NULL);
|
||||
} else {
|
||||
laiocb->common.cb(laiocb->common.opaque, ret);
|
||||
qemu_aio_unref(laiocb);
|
||||
}
|
||||
}
|
||||
|
||||
/* The completion BH fetches completed I/O requests and invokes their
|
||||
@ -230,22 +238,12 @@ void laio_io_unplug(BlockDriverState *bs, LinuxAioState *s)
|
||||
}
|
||||
}
|
||||
|
||||
BlockAIOCB *laio_submit(BlockDriverState *bs, LinuxAioState *s, int fd,
|
||||
int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
|
||||
BlockCompletionFunc *cb, void *opaque, int type)
|
||||
static int laio_do_submit(int fd, struct qemu_laiocb *laiocb, off_t offset,
|
||||
int type)
|
||||
{
|
||||
struct qemu_laiocb *laiocb;
|
||||
struct iocb *iocbs;
|
||||
off_t offset = sector_num * 512;
|
||||
|
||||
laiocb = qemu_aio_get(&laio_aiocb_info, bs, cb, opaque);
|
||||
laiocb->nbytes = nb_sectors * 512;
|
||||
laiocb->ctx = s;
|
||||
laiocb->ret = -EINPROGRESS;
|
||||
laiocb->is_read = (type == QEMU_AIO_READ);
|
||||
laiocb->qiov = qiov;
|
||||
|
||||
iocbs = &laiocb->iocb;
|
||||
LinuxAioState *s = laiocb->ctx;
|
||||
struct iocb *iocbs = &laiocb->iocb;
|
||||
QEMUIOVector *qiov = laiocb->qiov;
|
||||
|
||||
switch (type) {
|
||||
case QEMU_AIO_WRITE:
|
||||
@ -258,7 +256,7 @@ BlockAIOCB *laio_submit(BlockDriverState *bs, LinuxAioState *s, int fd,
|
||||
default:
|
||||
fprintf(stderr, "%s: invalid AIO request type 0x%x.\n",
|
||||
__func__, type);
|
||||
goto out_free_aiocb;
|
||||
return -EIO;
|
||||
}
|
||||
io_set_eventfd(&laiocb->iocb, event_notifier_get_fd(&s->e));
|
||||
|
||||
@ -268,11 +266,56 @@ BlockAIOCB *laio_submit(BlockDriverState *bs, LinuxAioState *s, int fd,
|
||||
(!s->io_q.plugged || s->io_q.n >= MAX_QUEUED_IO)) {
|
||||
ioq_submit(s);
|
||||
}
|
||||
return &laiocb->common;
|
||||
|
||||
out_free_aiocb:
|
||||
return 0;
|
||||
}
|
||||
|
||||
int coroutine_fn laio_co_submit(BlockDriverState *bs, LinuxAioState *s, int fd,
|
||||
int64_t sector_num, QEMUIOVector *qiov,
|
||||
int nb_sectors, int type)
|
||||
{
|
||||
off_t offset = sector_num * BDRV_SECTOR_SIZE;
|
||||
int ret;
|
||||
|
||||
struct qemu_laiocb laiocb = {
|
||||
.co = qemu_coroutine_self(),
|
||||
.nbytes = nb_sectors * BDRV_SECTOR_SIZE,
|
||||
.ctx = s,
|
||||
.is_read = (type == QEMU_AIO_READ),
|
||||
.qiov = qiov,
|
||||
};
|
||||
|
||||
ret = laio_do_submit(fd, &laiocb, offset, type);
|
||||
if (ret < 0) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
qemu_coroutine_yield();
|
||||
return laiocb.ret;
|
||||
}
|
||||
|
||||
BlockAIOCB *laio_submit(BlockDriverState *bs, LinuxAioState *s, int fd,
|
||||
int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
|
||||
BlockCompletionFunc *cb, void *opaque, int type)
|
||||
{
|
||||
struct qemu_laiocb *laiocb;
|
||||
off_t offset = sector_num * BDRV_SECTOR_SIZE;
|
||||
int ret;
|
||||
|
||||
laiocb = qemu_aio_get(&laio_aiocb_info, bs, cb, opaque);
|
||||
laiocb->nbytes = nb_sectors * BDRV_SECTOR_SIZE;
|
||||
laiocb->ctx = s;
|
||||
laiocb->ret = -EINPROGRESS;
|
||||
laiocb->is_read = (type == QEMU_AIO_READ);
|
||||
laiocb->qiov = qiov;
|
||||
|
||||
ret = laio_do_submit(fd, laiocb, offset, type);
|
||||
if (ret < 0) {
|
||||
qemu_aio_unref(laiocb);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return &laiocb->common;
|
||||
}
|
||||
|
||||
void laio_detach_aio_context(LinuxAioState *s, AioContext *old_context)
|
||||
|
@ -15,6 +15,7 @@
|
||||
#ifndef QEMU_RAW_AIO_H
|
||||
#define QEMU_RAW_AIO_H
|
||||
|
||||
#include "qemu/coroutine.h"
|
||||
#include "qemu/iov.h"
|
||||
|
||||
/* AIO request types */
|
||||
@ -38,6 +39,9 @@
|
||||
typedef struct LinuxAioState LinuxAioState;
|
||||
LinuxAioState *laio_init(void);
|
||||
void laio_cleanup(LinuxAioState *s);
|
||||
int coroutine_fn laio_co_submit(BlockDriverState *bs, LinuxAioState *s, int fd,
|
||||
int64_t sector_num, QEMUIOVector *qiov,
|
||||
int nb_sectors, int type);
|
||||
BlockAIOCB *laio_submit(BlockDriverState *bs, LinuxAioState *s, int fd,
|
||||
int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
|
||||
BlockCompletionFunc *cb, void *opaque, int type);
|
||||
|
@ -1325,14 +1325,13 @@ static BlockAIOCB *paio_submit(BlockDriverState *bs, int fd,
|
||||
return thread_pool_submit_aio(pool, aio_worker, acb, cb, opaque);
|
||||
}
|
||||
|
||||
static BlockAIOCB *raw_aio_submit(BlockDriverState *bs,
|
||||
int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
|
||||
BlockCompletionFunc *cb, void *opaque, int type)
|
||||
static int coroutine_fn raw_co_rw(BlockDriverState *bs, int64_t sector_num,
|
||||
int nb_sectors, QEMUIOVector *qiov, int type)
|
||||
{
|
||||
BDRVRawState *s = bs->opaque;
|
||||
|
||||
if (fd_open(bs) < 0)
|
||||
return NULL;
|
||||
return -EIO;
|
||||
|
||||
/*
|
||||
* Check if the underlying device requires requests to be aligned,
|
||||
@ -1345,14 +1344,26 @@ static BlockAIOCB *raw_aio_submit(BlockDriverState *bs,
|
||||
type |= QEMU_AIO_MISALIGNED;
|
||||
#ifdef CONFIG_LINUX_AIO
|
||||
} else if (s->use_aio) {
|
||||
return laio_submit(bs, s->aio_ctx, s->fd, sector_num, qiov,
|
||||
nb_sectors, cb, opaque, type);
|
||||
return laio_co_submit(bs, s->aio_ctx, s->fd, sector_num, qiov,
|
||||
nb_sectors, type);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
return paio_submit(bs, s->fd, sector_num, qiov, nb_sectors,
|
||||
cb, opaque, type);
|
||||
return paio_submit_co(bs, s->fd, sector_num * BDRV_SECTOR_SIZE, qiov,
|
||||
nb_sectors * BDRV_SECTOR_SIZE, type);
|
||||
}
|
||||
|
||||
static int coroutine_fn raw_co_readv(BlockDriverState *bs, int64_t sector_num,
|
||||
int nb_sectors, QEMUIOVector *qiov)
|
||||
{
|
||||
return raw_co_rw(bs, sector_num, nb_sectors, qiov, QEMU_AIO_READ);
|
||||
}
|
||||
|
||||
static int coroutine_fn raw_co_writev(BlockDriverState *bs, int64_t sector_num,
|
||||
int nb_sectors, QEMUIOVector *qiov)
|
||||
{
|
||||
return raw_co_rw(bs, sector_num, nb_sectors, qiov, QEMU_AIO_WRITE);
|
||||
}
|
||||
|
||||
static void raw_aio_plug(BlockDriverState *bs)
|
||||
@ -1375,22 +1386,6 @@ static void raw_aio_unplug(BlockDriverState *bs)
|
||||
#endif
|
||||
}
|
||||
|
||||
static BlockAIOCB *raw_aio_readv(BlockDriverState *bs,
|
||||
int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
|
||||
BlockCompletionFunc *cb, void *opaque)
|
||||
{
|
||||
return raw_aio_submit(bs, sector_num, qiov, nb_sectors,
|
||||
cb, opaque, QEMU_AIO_READ);
|
||||
}
|
||||
|
||||
static BlockAIOCB *raw_aio_writev(BlockDriverState *bs,
|
||||
int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
|
||||
BlockCompletionFunc *cb, void *opaque)
|
||||
{
|
||||
return raw_aio_submit(bs, sector_num, qiov, nb_sectors,
|
||||
cb, opaque, QEMU_AIO_WRITE);
|
||||
}
|
||||
|
||||
static BlockAIOCB *raw_aio_flush(BlockDriverState *bs,
|
||||
BlockCompletionFunc *cb, void *opaque)
|
||||
{
|
||||
@ -1957,8 +1952,8 @@ BlockDriver bdrv_file = {
|
||||
.bdrv_co_get_block_status = raw_co_get_block_status,
|
||||
.bdrv_co_pwrite_zeroes = raw_co_pwrite_zeroes,
|
||||
|
||||
.bdrv_aio_readv = raw_aio_readv,
|
||||
.bdrv_aio_writev = raw_aio_writev,
|
||||
.bdrv_co_readv = raw_co_readv,
|
||||
.bdrv_co_writev = raw_co_writev,
|
||||
.bdrv_aio_flush = raw_aio_flush,
|
||||
.bdrv_aio_discard = raw_aio_discard,
|
||||
.bdrv_refresh_limits = raw_refresh_limits,
|
||||
@ -2405,8 +2400,8 @@ static BlockDriver bdrv_host_device = {
|
||||
.create_opts = &raw_create_opts,
|
||||
.bdrv_co_pwrite_zeroes = hdev_co_pwrite_zeroes,
|
||||
|
||||
.bdrv_aio_readv = raw_aio_readv,
|
||||
.bdrv_aio_writev = raw_aio_writev,
|
||||
.bdrv_co_readv = raw_co_readv,
|
||||
.bdrv_co_writev = raw_co_writev,
|
||||
.bdrv_aio_flush = raw_aio_flush,
|
||||
.bdrv_aio_discard = hdev_aio_discard,
|
||||
.bdrv_refresh_limits = raw_refresh_limits,
|
||||
@ -2535,8 +2530,8 @@ static BlockDriver bdrv_host_cdrom = {
|
||||
.bdrv_create = hdev_create,
|
||||
.create_opts = &raw_create_opts,
|
||||
|
||||
.bdrv_aio_readv = raw_aio_readv,
|
||||
.bdrv_aio_writev = raw_aio_writev,
|
||||
.bdrv_co_readv = raw_co_readv,
|
||||
.bdrv_co_writev = raw_co_writev,
|
||||
.bdrv_aio_flush = raw_aio_flush,
|
||||
.bdrv_refresh_limits = raw_refresh_limits,
|
||||
.bdrv_io_plug = raw_aio_plug,
|
||||
@ -2670,8 +2665,8 @@ static BlockDriver bdrv_host_cdrom = {
|
||||
.bdrv_create = hdev_create,
|
||||
.create_opts = &raw_create_opts,
|
||||
|
||||
.bdrv_aio_readv = raw_aio_readv,
|
||||
.bdrv_aio_writev = raw_aio_writev,
|
||||
.bdrv_co_readv = raw_co_readv,
|
||||
.bdrv_co_writev = raw_co_writev,
|
||||
.bdrv_aio_flush = raw_aio_flush,
|
||||
.bdrv_refresh_limits = raw_refresh_limits,
|
||||
.bdrv_io_plug = raw_aio_plug,
|
||||
|
Loading…
Reference in New Issue
Block a user