Block layer patches
-----BEGIN PGP SIGNATURE----- Version: GnuPG v2.0.22 (GNU/Linux) iQIcBAABAgAGBQJYc5OQAAoJEH8JsnLIjy/W5Z8P+wVaf86agtt5tAnrytkKc3SM Ic+XiFQb7iN4xjL85O02FkXijB8AqMzGqgdeSts5r7AFu/VVPTa3+F8YinPuwxZY lxikRtCIihcRvkx1zvZhXtErGFif6BsfQY9F+JyYLrxkg9lepM/kHHlmdAmDiBLx qL+/CKOkkO5qKsRSqJ+nH33NGhVyJx8NubRbgEiuA0WOcwZ1gCvrAnBlN2dVSl+B 6NN3BDg3DkNSfD67ZVHoC6RNsd7HaZfL2I1ox/uCdsVj2xm4z+iGOmX9pE0gOeGk arHBeJXbN3ybgJPD0X0bWeFAV3KJMC9Ndjh27ZZjtepAbHvdttDxr8ph0NoGTHV+ CRWYKyObMqQy+1+GpfMNnRHENcSZPlBDTCliKWW0t7JssGEiJZ7Z7kffsmV5r4rU RnlvIvQ4PaPpPDzubtbyjcwPqsfQFGvRDiBBqSXEDdpy34ru4HLm9w87qHeQkTeG HkLqkbrNo/0v0TJldOwtDOnKo98vgYds7oZ0TjUDHHf9COeJfU5BEC/7AJhU6U+q x7hQLw0lWyRnuWKTLxic04T5EQrR5j1EO9PQHa3fk1AIxXf4e7gf5zW++C4DwITT z+Ma4UcowLwddwSR0MkGcODYvlPWdakrmF/VlG08ul4l0K+0ReffHtANGdBgkBT9 3LefuCDPdc7ushYN2+6g =wM6S -----END PGP SIGNATURE----- Merge remote-tracking branch 'remotes/kevin/tags/for-upstream' into staging Block layer patches # gpg: Signature made Mon 09 Jan 2017 13:43:44 GMT # gpg: using RSA key 0x7F09B272C88F2FD6 # gpg: Good signature from "Kevin Wolf <kwolf@redhat.com>" # Primary key fingerprint: DC3D EB15 9A9A F95D 3D74 56FE 7F09 B272 C88F 2FD6 * remotes/kevin/tags/for-upstream: block: Rename raw-{posix,win32} to file-*.c block: Rename raw_bsd to raw-format.c blkverify: Implement bdrv_co_preadv/pwritev/flush blkdebug: Implement bdrv_co_preadv/pwritev/flush quorum: Clean up quorum_aio_get() quorum: Inline quorum_fifo_aio_cb() quorum: Implement .bdrv_co_preadv/pwritev() quorum: Avoid bdrv_aio_writev() for rewrites quorum: Inline quorum_aio_cb() quorum: Do cleanup in caller coroutine quorum: Implement .bdrv_co_readv/writev quorum: Remove s from quorum_aio_get() arguments coroutine: Introduce qemu_coroutine_enter_if_inactive() qemu-img: fix in-flight count for qemu-img bench Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
commit
dba5c337c8
@ -1718,9 +1718,9 @@ L: qemu-block@nongnu.org
|
||||
S: Supported
|
||||
F: block/linux-aio.c
|
||||
F: include/block/raw-aio.h
|
||||
F: block/raw-posix.c
|
||||
F: block/raw-win32.c
|
||||
F: block/raw_bsd.c
|
||||
F: block/raw-format.c
|
||||
F: block/file-posix.c
|
||||
F: block/file-win32.c
|
||||
F: block/win32-aio.c
|
||||
|
||||
qcow2
|
||||
|
@ -1,4 +1,4 @@
|
||||
block-obj-y += raw_bsd.o qcow.o vdi.o vmdk.o cloop.o bochs.o vpc.o vvfat.o dmg.o
|
||||
block-obj-y += raw-format.o qcow.o vdi.o vmdk.o cloop.o bochs.o vpc.o vvfat.o dmg.o
|
||||
block-obj-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o qcow2-cache.o
|
||||
block-obj-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o
|
||||
block-obj-y += qed-check.o
|
||||
@ -6,8 +6,8 @@ block-obj-y += vhdx.o vhdx-endian.o vhdx-log.o
|
||||
block-obj-y += quorum.o
|
||||
block-obj-y += parallels.o blkdebug.o blkverify.o blkreplay.o
|
||||
block-obj-y += block-backend.o snapshot.o qapi.o
|
||||
block-obj-$(CONFIG_WIN32) += raw-win32.o win32-aio.o
|
||||
block-obj-$(CONFIG_POSIX) += raw-posix.o
|
||||
block-obj-$(CONFIG_WIN32) += file-win32.o win32-aio.o
|
||||
block-obj-$(CONFIG_POSIX) += file-posix.o
|
||||
block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o
|
||||
block-obj-y += null.o mirror.o commit.o io.o
|
||||
block-obj-y += throttle-groups.o
|
||||
|
@ -58,10 +58,6 @@ typedef struct BlkdebugSuspendedReq {
|
||||
QLIST_ENTRY(BlkdebugSuspendedReq) next;
|
||||
} BlkdebugSuspendedReq;
|
||||
|
||||
static const AIOCBInfo blkdebug_aiocb_info = {
|
||||
.aiocb_size = sizeof(BlkdebugAIOCB),
|
||||
};
|
||||
|
||||
enum {
|
||||
ACTION_INJECT_ERROR,
|
||||
ACTION_SET_STATE,
|
||||
@ -77,7 +73,7 @@ typedef struct BlkdebugRule {
|
||||
int error;
|
||||
int immediately;
|
||||
int once;
|
||||
int64_t sector;
|
||||
int64_t offset;
|
||||
} inject;
|
||||
struct {
|
||||
int new_state;
|
||||
@ -174,6 +170,7 @@ static int add_rule(void *opaque, QemuOpts *opts, Error **errp)
|
||||
const char* event_name;
|
||||
BlkdebugEvent event;
|
||||
struct BlkdebugRule *rule;
|
||||
int64_t sector;
|
||||
|
||||
/* Find the right event for the rule */
|
||||
event_name = qemu_opt_get(opts, "event");
|
||||
@ -200,7 +197,9 @@ static int add_rule(void *opaque, QemuOpts *opts, Error **errp)
|
||||
rule->options.inject.once = qemu_opt_get_bool(opts, "once", 0);
|
||||
rule->options.inject.immediately =
|
||||
qemu_opt_get_bool(opts, "immediately", 0);
|
||||
rule->options.inject.sector = qemu_opt_get_number(opts, "sector", -1);
|
||||
sector = qemu_opt_get_number(opts, "sector", -1);
|
||||
rule->options.inject.offset =
|
||||
sector == -1 ? -1 : sector * BDRV_SECTOR_SIZE;
|
||||
break;
|
||||
|
||||
case ACTION_SET_STATE:
|
||||
@ -408,17 +407,14 @@ out:
|
||||
|
||||
static void error_callback_bh(void *opaque)
|
||||
{
|
||||
struct BlkdebugAIOCB *acb = opaque;
|
||||
acb->common.cb(acb->common.opaque, acb->ret);
|
||||
qemu_aio_unref(acb);
|
||||
Coroutine *co = opaque;
|
||||
qemu_coroutine_enter(co);
|
||||
}
|
||||
|
||||
static BlockAIOCB *inject_error(BlockDriverState *bs,
|
||||
BlockCompletionFunc *cb, void *opaque, BlkdebugRule *rule)
|
||||
static int inject_error(BlockDriverState *bs, BlkdebugRule *rule)
|
||||
{
|
||||
BDRVBlkdebugState *s = bs->opaque;
|
||||
int error = rule->options.inject.error;
|
||||
struct BlkdebugAIOCB *acb;
|
||||
bool immediately = rule->options.inject.immediately;
|
||||
|
||||
if (rule->options.inject.once) {
|
||||
@ -426,81 +422,79 @@ static BlockAIOCB *inject_error(BlockDriverState *bs,
|
||||
remove_rule(rule);
|
||||
}
|
||||
|
||||
if (immediately) {
|
||||
return NULL;
|
||||
if (!immediately) {
|
||||
aio_bh_schedule_oneshot(bdrv_get_aio_context(bs), error_callback_bh,
|
||||
qemu_coroutine_self());
|
||||
qemu_coroutine_yield();
|
||||
}
|
||||
|
||||
acb = qemu_aio_get(&blkdebug_aiocb_info, bs, cb, opaque);
|
||||
acb->ret = -error;
|
||||
|
||||
aio_bh_schedule_oneshot(bdrv_get_aio_context(bs), error_callback_bh, acb);
|
||||
|
||||
return &acb->common;
|
||||
return -error;
|
||||
}
|
||||
|
||||
static BlockAIOCB *blkdebug_aio_readv(BlockDriverState *bs,
|
||||
int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
|
||||
BlockCompletionFunc *cb, void *opaque)
|
||||
static int coroutine_fn
|
||||
blkdebug_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
|
||||
QEMUIOVector *qiov, int flags)
|
||||
{
|
||||
BDRVBlkdebugState *s = bs->opaque;
|
||||
BlkdebugRule *rule = NULL;
|
||||
|
||||
QSIMPLEQ_FOREACH(rule, &s->active_rules, active_next) {
|
||||
if (rule->options.inject.sector == -1 ||
|
||||
(rule->options.inject.sector >= sector_num &&
|
||||
rule->options.inject.sector < sector_num + nb_sectors)) {
|
||||
uint64_t inject_offset = rule->options.inject.offset;
|
||||
|
||||
if (inject_offset == -1 ||
|
||||
(inject_offset >= offset && inject_offset < offset + bytes))
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (rule && rule->options.inject.error) {
|
||||
return inject_error(bs, cb, opaque, rule);
|
||||
return inject_error(bs, rule);
|
||||
}
|
||||
|
||||
return bdrv_aio_readv(bs->file, sector_num, qiov, nb_sectors,
|
||||
cb, opaque);
|
||||
return bdrv_co_preadv(bs->file, offset, bytes, qiov, flags);
|
||||
}
|
||||
|
||||
static BlockAIOCB *blkdebug_aio_writev(BlockDriverState *bs,
|
||||
int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
|
||||
BlockCompletionFunc *cb, void *opaque)
|
||||
static int coroutine_fn
|
||||
blkdebug_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
|
||||
QEMUIOVector *qiov, int flags)
|
||||
{
|
||||
BDRVBlkdebugState *s = bs->opaque;
|
||||
BlkdebugRule *rule = NULL;
|
||||
|
||||
QSIMPLEQ_FOREACH(rule, &s->active_rules, active_next) {
|
||||
if (rule->options.inject.sector == -1 ||
|
||||
(rule->options.inject.sector >= sector_num &&
|
||||
rule->options.inject.sector < sector_num + nb_sectors)) {
|
||||
uint64_t inject_offset = rule->options.inject.offset;
|
||||
|
||||
if (inject_offset == -1 ||
|
||||
(inject_offset >= offset && inject_offset < offset + bytes))
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (rule && rule->options.inject.error) {
|
||||
return inject_error(bs, cb, opaque, rule);
|
||||
return inject_error(bs, rule);
|
||||
}
|
||||
|
||||
return bdrv_aio_writev(bs->file, sector_num, qiov, nb_sectors,
|
||||
cb, opaque);
|
||||
return bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags);
|
||||
}
|
||||
|
||||
static BlockAIOCB *blkdebug_aio_flush(BlockDriverState *bs,
|
||||
BlockCompletionFunc *cb, void *opaque)
|
||||
static int blkdebug_co_flush(BlockDriverState *bs)
|
||||
{
|
||||
BDRVBlkdebugState *s = bs->opaque;
|
||||
BlkdebugRule *rule = NULL;
|
||||
|
||||
QSIMPLEQ_FOREACH(rule, &s->active_rules, active_next) {
|
||||
if (rule->options.inject.sector == -1) {
|
||||
if (rule->options.inject.offset == -1) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (rule && rule->options.inject.error) {
|
||||
return inject_error(bs, cb, opaque, rule);
|
||||
return inject_error(bs, rule);
|
||||
}
|
||||
|
||||
return bdrv_aio_flush(bs->file->bs, cb, opaque);
|
||||
return bdrv_co_flush(bs->file->bs);
|
||||
}
|
||||
|
||||
|
||||
@ -752,9 +746,9 @@ static BlockDriver bdrv_blkdebug = {
|
||||
.bdrv_refresh_filename = blkdebug_refresh_filename,
|
||||
.bdrv_refresh_limits = blkdebug_refresh_limits,
|
||||
|
||||
.bdrv_aio_readv = blkdebug_aio_readv,
|
||||
.bdrv_aio_writev = blkdebug_aio_writev,
|
||||
.bdrv_aio_flush = blkdebug_aio_flush,
|
||||
.bdrv_co_preadv = blkdebug_co_preadv,
|
||||
.bdrv_co_pwritev = blkdebug_co_pwritev,
|
||||
.bdrv_co_flush_to_disk = blkdebug_co_flush,
|
||||
|
||||
.bdrv_debug_event = blkdebug_debug_event,
|
||||
.bdrv_debug_breakpoint = blkdebug_debug_breakpoint,
|
||||
|
@ -19,38 +19,36 @@ typedef struct {
|
||||
BdrvChild *test_file;
|
||||
} BDRVBlkverifyState;
|
||||
|
||||
typedef struct BlkverifyAIOCB BlkverifyAIOCB;
|
||||
struct BlkverifyAIOCB {
|
||||
BlockAIOCB common;
|
||||
typedef struct BlkverifyRequest {
|
||||
Coroutine *co;
|
||||
BlockDriverState *bs;
|
||||
|
||||
/* Request metadata */
|
||||
bool is_write;
|
||||
int64_t sector_num;
|
||||
int nb_sectors;
|
||||
uint64_t offset;
|
||||
uint64_t bytes;
|
||||
int flags;
|
||||
|
||||
int (*request_fn)(BdrvChild *, int64_t, unsigned int, QEMUIOVector *,
|
||||
BdrvRequestFlags);
|
||||
|
||||
int ret; /* test image result */
|
||||
int raw_ret; /* raw image result */
|
||||
|
||||
int ret; /* first completed request's result */
|
||||
unsigned int done; /* completion counter */
|
||||
|
||||
QEMUIOVector *qiov; /* user I/O vector */
|
||||
QEMUIOVector raw_qiov; /* cloned I/O vector for raw file */
|
||||
void *buf; /* buffer for raw file I/O */
|
||||
QEMUIOVector *raw_qiov; /* cloned I/O vector for raw file */
|
||||
} BlkverifyRequest;
|
||||
|
||||
void (*verify)(BlkverifyAIOCB *acb);
|
||||
};
|
||||
|
||||
static const AIOCBInfo blkverify_aiocb_info = {
|
||||
.aiocb_size = sizeof(BlkverifyAIOCB),
|
||||
};
|
||||
|
||||
static void GCC_FMT_ATTR(2, 3) blkverify_err(BlkverifyAIOCB *acb,
|
||||
static void GCC_FMT_ATTR(2, 3) blkverify_err(BlkverifyRequest *r,
|
||||
const char *fmt, ...)
|
||||
{
|
||||
va_list ap;
|
||||
|
||||
va_start(ap, fmt);
|
||||
fprintf(stderr, "blkverify: %s sector_num=%" PRId64 " nb_sectors=%d ",
|
||||
acb->is_write ? "write" : "read", acb->sector_num,
|
||||
acb->nb_sectors);
|
||||
fprintf(stderr, "blkverify: %s offset=%" PRId64 " bytes=%" PRId64 " ",
|
||||
r->is_write ? "write" : "read", r->offset, r->bytes);
|
||||
vfprintf(stderr, fmt, ap);
|
||||
fprintf(stderr, "\n");
|
||||
va_end(ap);
|
||||
@ -166,113 +164,106 @@ static int64_t blkverify_getlength(BlockDriverState *bs)
|
||||
return bdrv_getlength(s->test_file->bs);
|
||||
}
|
||||
|
||||
static BlkverifyAIOCB *blkverify_aio_get(BlockDriverState *bs, bool is_write,
|
||||
int64_t sector_num, QEMUIOVector *qiov,
|
||||
int nb_sectors,
|
||||
BlockCompletionFunc *cb,
|
||||
void *opaque)
|
||||
static void coroutine_fn blkverify_do_test_req(void *opaque)
|
||||
{
|
||||
BlkverifyAIOCB *acb = qemu_aio_get(&blkverify_aiocb_info, bs, cb, opaque);
|
||||
BlkverifyRequest *r = opaque;
|
||||
BDRVBlkverifyState *s = r->bs->opaque;
|
||||
|
||||
acb->is_write = is_write;
|
||||
acb->sector_num = sector_num;
|
||||
acb->nb_sectors = nb_sectors;
|
||||
acb->ret = -EINPROGRESS;
|
||||
acb->done = 0;
|
||||
acb->qiov = qiov;
|
||||
acb->buf = NULL;
|
||||
acb->verify = NULL;
|
||||
return acb;
|
||||
r->ret = r->request_fn(s->test_file, r->offset, r->bytes, r->qiov,
|
||||
r->flags);
|
||||
r->done++;
|
||||
qemu_coroutine_enter_if_inactive(r->co);
|
||||
}
|
||||
|
||||
static void blkverify_aio_bh(void *opaque)
|
||||
static void coroutine_fn blkverify_do_raw_req(void *opaque)
|
||||
{
|
||||
BlkverifyAIOCB *acb = opaque;
|
||||
BlkverifyRequest *r = opaque;
|
||||
|
||||
if (acb->buf) {
|
||||
qemu_iovec_destroy(&acb->raw_qiov);
|
||||
qemu_vfree(acb->buf);
|
||||
}
|
||||
acb->common.cb(acb->common.opaque, acb->ret);
|
||||
qemu_aio_unref(acb);
|
||||
r->raw_ret = r->request_fn(r->bs->file, r->offset, r->bytes, r->raw_qiov,
|
||||
r->flags);
|
||||
r->done++;
|
||||
qemu_coroutine_enter_if_inactive(r->co);
|
||||
}
|
||||
|
||||
static void blkverify_aio_cb(void *opaque, int ret)
|
||||
static int coroutine_fn
|
||||
blkverify_co_prwv(BlockDriverState *bs, BlkverifyRequest *r, uint64_t offset,
|
||||
uint64_t bytes, QEMUIOVector *qiov, QEMUIOVector *raw_qiov,
|
||||
int flags, bool is_write)
|
||||
{
|
||||
BlkverifyAIOCB *acb = opaque;
|
||||
Coroutine *co_a, *co_b;
|
||||
|
||||
switch (++acb->done) {
|
||||
case 1:
|
||||
acb->ret = ret;
|
||||
break;
|
||||
*r = (BlkverifyRequest) {
|
||||
.co = qemu_coroutine_self(),
|
||||
.bs = bs,
|
||||
.offset = offset,
|
||||
.bytes = bytes,
|
||||
.qiov = qiov,
|
||||
.raw_qiov = raw_qiov,
|
||||
.flags = flags,
|
||||
.is_write = is_write,
|
||||
.request_fn = is_write ? bdrv_co_pwritev : bdrv_co_preadv,
|
||||
};
|
||||
|
||||
case 2:
|
||||
if (acb->ret != ret) {
|
||||
blkverify_err(acb, "return value mismatch %d != %d", acb->ret, ret);
|
||||
co_a = qemu_coroutine_create(blkverify_do_test_req, r);
|
||||
co_b = qemu_coroutine_create(blkverify_do_raw_req, r);
|
||||
|
||||
qemu_coroutine_enter(co_a);
|
||||
qemu_coroutine_enter(co_b);
|
||||
|
||||
while (r->done < 2) {
|
||||
qemu_coroutine_yield();
|
||||
}
|
||||
|
||||
if (acb->verify) {
|
||||
acb->verify(acb);
|
||||
if (r->ret != r->raw_ret) {
|
||||
blkverify_err(r, "return value mismatch %d != %d", r->ret, r->raw_ret);
|
||||
}
|
||||
|
||||
aio_bh_schedule_oneshot(bdrv_get_aio_context(acb->common.bs),
|
||||
blkverify_aio_bh, acb);
|
||||
break;
|
||||
return r->ret;
|
||||
}
|
||||
|
||||
static int coroutine_fn
|
||||
blkverify_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
|
||||
QEMUIOVector *qiov, int flags)
|
||||
{
|
||||
BlkverifyRequest r;
|
||||
QEMUIOVector raw_qiov;
|
||||
void *buf;
|
||||
ssize_t cmp_offset;
|
||||
int ret;
|
||||
|
||||
buf = qemu_blockalign(bs->file->bs, qiov->size);
|
||||
qemu_iovec_init(&raw_qiov, qiov->niov);
|
||||
qemu_iovec_clone(&raw_qiov, qiov, buf);
|
||||
|
||||
ret = blkverify_co_prwv(bs, &r, offset, bytes, qiov, &raw_qiov, flags,
|
||||
false);
|
||||
|
||||
cmp_offset = qemu_iovec_compare(qiov, &raw_qiov);
|
||||
if (cmp_offset != -1) {
|
||||
blkverify_err(&r, "contents mismatch at offset %" PRId64,
|
||||
offset + cmp_offset);
|
||||
}
|
||||
|
||||
qemu_iovec_destroy(&raw_qiov);
|
||||
qemu_vfree(buf);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void blkverify_verify_readv(BlkverifyAIOCB *acb)
|
||||
static int coroutine_fn
|
||||
blkverify_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
|
||||
QEMUIOVector *qiov, int flags)
|
||||
{
|
||||
ssize_t offset = qemu_iovec_compare(acb->qiov, &acb->raw_qiov);
|
||||
if (offset != -1) {
|
||||
blkverify_err(acb, "contents mismatch in sector %" PRId64,
|
||||
acb->sector_num + (int64_t)(offset / BDRV_SECTOR_SIZE));
|
||||
}
|
||||
BlkverifyRequest r;
|
||||
return blkverify_co_prwv(bs, &r, offset, bytes, qiov, qiov, flags, true);
|
||||
}
|
||||
|
||||
static BlockAIOCB *blkverify_aio_readv(BlockDriverState *bs,
|
||||
int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
|
||||
BlockCompletionFunc *cb, void *opaque)
|
||||
{
|
||||
BDRVBlkverifyState *s = bs->opaque;
|
||||
BlkverifyAIOCB *acb = blkverify_aio_get(bs, false, sector_num, qiov,
|
||||
nb_sectors, cb, opaque);
|
||||
|
||||
acb->verify = blkverify_verify_readv;
|
||||
acb->buf = qemu_blockalign(bs->file->bs, qiov->size);
|
||||
qemu_iovec_init(&acb->raw_qiov, acb->qiov->niov);
|
||||
qemu_iovec_clone(&acb->raw_qiov, qiov, acb->buf);
|
||||
|
||||
bdrv_aio_readv(s->test_file, sector_num, qiov, nb_sectors,
|
||||
blkverify_aio_cb, acb);
|
||||
bdrv_aio_readv(bs->file, sector_num, &acb->raw_qiov, nb_sectors,
|
||||
blkverify_aio_cb, acb);
|
||||
return &acb->common;
|
||||
}
|
||||
|
||||
static BlockAIOCB *blkverify_aio_writev(BlockDriverState *bs,
|
||||
int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
|
||||
BlockCompletionFunc *cb, void *opaque)
|
||||
{
|
||||
BDRVBlkverifyState *s = bs->opaque;
|
||||
BlkverifyAIOCB *acb = blkverify_aio_get(bs, true, sector_num, qiov,
|
||||
nb_sectors, cb, opaque);
|
||||
|
||||
bdrv_aio_writev(s->test_file, sector_num, qiov, nb_sectors,
|
||||
blkverify_aio_cb, acb);
|
||||
bdrv_aio_writev(bs->file, sector_num, qiov, nb_sectors,
|
||||
blkverify_aio_cb, acb);
|
||||
return &acb->common;
|
||||
}
|
||||
|
||||
static BlockAIOCB *blkverify_aio_flush(BlockDriverState *bs,
|
||||
BlockCompletionFunc *cb,
|
||||
void *opaque)
|
||||
static int blkverify_co_flush(BlockDriverState *bs)
|
||||
{
|
||||
BDRVBlkverifyState *s = bs->opaque;
|
||||
|
||||
/* Only flush test file, the raw file is not important */
|
||||
return bdrv_aio_flush(s->test_file->bs, cb, opaque);
|
||||
return bdrv_co_flush(s->test_file->bs);
|
||||
}
|
||||
|
||||
static bool blkverify_recurse_is_first_non_filter(BlockDriverState *bs,
|
||||
@ -332,9 +323,9 @@ static BlockDriver bdrv_blkverify = {
|
||||
.bdrv_getlength = blkverify_getlength,
|
||||
.bdrv_refresh_filename = blkverify_refresh_filename,
|
||||
|
||||
.bdrv_aio_readv = blkverify_aio_readv,
|
||||
.bdrv_aio_writev = blkverify_aio_writev,
|
||||
.bdrv_aio_flush = blkverify_aio_flush,
|
||||
.bdrv_co_preadv = blkverify_co_preadv,
|
||||
.bdrv_co_pwritev = blkverify_co_pwritev,
|
||||
.bdrv_co_flush = blkverify_co_flush,
|
||||
|
||||
.is_filter = true,
|
||||
.bdrv_recurse_is_first_non_filter = blkverify_recurse_is_first_non_filter,
|
||||
|
@ -1253,7 +1253,7 @@ static int qemu_gluster_has_zero_init(BlockDriverState *bs)
|
||||
* If @start is in a trailing hole or beyond EOF, return -ENXIO.
|
||||
* If we can't find out, return a negative errno other than -ENXIO.
|
||||
*
|
||||
* (Shamefully copied from raw-posix.c, only miniscule adaptions.)
|
||||
* (Shamefully copied from file-posix.c, only miniscule adaptions.)
|
||||
*/
|
||||
static int find_allocation(BlockDriverState *bs, off_t start,
|
||||
off_t *data, off_t *hole)
|
||||
@ -1349,7 +1349,7 @@ exit:
|
||||
* 'nb_sectors' is the max value 'pnum' should be set to. If nb_sectors goes
|
||||
* beyond the end of the disk image it will be clamped.
|
||||
*
|
||||
* (Based on raw_co_get_block_status() from raw-posix.c.)
|
||||
* (Based on raw_co_get_block_status() from file-posix.c.)
|
||||
*/
|
||||
static int64_t coroutine_fn qemu_gluster_co_get_block_status(
|
||||
BlockDriverState *bs, int64_t sector_num, int nb_sectors, int *pnum,
|
||||
|
410
block/quorum.c
410
block/quorum.c
@ -97,7 +97,7 @@ typedef struct QuorumAIOCB QuorumAIOCB;
|
||||
* $children_count QuorumChildRequest.
|
||||
*/
|
||||
typedef struct QuorumChildRequest {
|
||||
BlockAIOCB *aiocb;
|
||||
BlockDriverState *bs;
|
||||
QEMUIOVector qiov;
|
||||
uint8_t *buf;
|
||||
int ret;
|
||||
@ -110,11 +110,12 @@ typedef struct QuorumChildRequest {
|
||||
* used to do operations on each children and track overall progress.
|
||||
*/
|
||||
struct QuorumAIOCB {
|
||||
BlockAIOCB common;
|
||||
BlockDriverState *bs;
|
||||
Coroutine *co;
|
||||
|
||||
/* Request metadata */
|
||||
uint64_t sector_num;
|
||||
int nb_sectors;
|
||||
uint64_t offset;
|
||||
uint64_t bytes;
|
||||
|
||||
QEMUIOVector *qiov; /* calling IOV */
|
||||
|
||||
@ -133,32 +134,15 @@ struct QuorumAIOCB {
|
||||
int children_read; /* how many children have been read from */
|
||||
};
|
||||
|
||||
static bool quorum_vote(QuorumAIOCB *acb);
|
||||
|
||||
static void quorum_aio_cancel(BlockAIOCB *blockacb)
|
||||
{
|
||||
QuorumAIOCB *acb = container_of(blockacb, QuorumAIOCB, common);
|
||||
BDRVQuorumState *s = acb->common.bs->opaque;
|
||||
int i;
|
||||
|
||||
/* cancel all callbacks */
|
||||
for (i = 0; i < s->num_children; i++) {
|
||||
if (acb->qcrs[i].aiocb) {
|
||||
bdrv_aio_cancel_async(acb->qcrs[i].aiocb);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static AIOCBInfo quorum_aiocb_info = {
|
||||
.aiocb_size = sizeof(QuorumAIOCB),
|
||||
.cancel_async = quorum_aio_cancel,
|
||||
};
|
||||
typedef struct QuorumCo {
|
||||
QuorumAIOCB *acb;
|
||||
int idx;
|
||||
} QuorumCo;
|
||||
|
||||
static void quorum_aio_finalize(QuorumAIOCB *acb)
|
||||
{
|
||||
acb->common.cb(acb->common.opaque, acb->vote_ret);
|
||||
g_free(acb->qcrs);
|
||||
qemu_aio_unref(acb);
|
||||
g_free(acb);
|
||||
}
|
||||
|
||||
static bool quorum_sha256_compare(QuorumVoteValue *a, QuorumVoteValue *b)
|
||||
@ -171,30 +155,26 @@ static bool quorum_64bits_compare(QuorumVoteValue *a, QuorumVoteValue *b)
|
||||
return a->l == b->l;
|
||||
}
|
||||
|
||||
static QuorumAIOCB *quorum_aio_get(BDRVQuorumState *s,
|
||||
BlockDriverState *bs,
|
||||
static QuorumAIOCB *quorum_aio_get(BlockDriverState *bs,
|
||||
QEMUIOVector *qiov,
|
||||
uint64_t sector_num,
|
||||
int nb_sectors,
|
||||
BlockCompletionFunc *cb,
|
||||
void *opaque)
|
||||
uint64_t offset,
|
||||
uint64_t bytes)
|
||||
{
|
||||
QuorumAIOCB *acb = qemu_aio_get(&quorum_aiocb_info, bs, cb, opaque);
|
||||
BDRVQuorumState *s = bs->opaque;
|
||||
QuorumAIOCB *acb = g_new(QuorumAIOCB, 1);
|
||||
int i;
|
||||
|
||||
acb->common.bs->opaque = s;
|
||||
acb->sector_num = sector_num;
|
||||
acb->nb_sectors = nb_sectors;
|
||||
acb->qiov = qiov;
|
||||
acb->qcrs = g_new0(QuorumChildRequest, s->num_children);
|
||||
acb->count = 0;
|
||||
acb->success_count = 0;
|
||||
acb->rewrite_count = 0;
|
||||
acb->votes.compare = quorum_sha256_compare;
|
||||
QLIST_INIT(&acb->votes.vote_list);
|
||||
acb->is_read = false;
|
||||
acb->vote_ret = 0;
|
||||
*acb = (QuorumAIOCB) {
|
||||
.co = qemu_coroutine_self(),
|
||||
.bs = bs,
|
||||
.offset = offset,
|
||||
.bytes = bytes,
|
||||
.qiov = qiov,
|
||||
.votes.compare = quorum_sha256_compare,
|
||||
.votes.vote_list = QLIST_HEAD_INITIALIZER(acb.votes.vote_list),
|
||||
};
|
||||
|
||||
acb->qcrs = g_new0(QuorumChildRequest, s->num_children);
|
||||
for (i = 0; i < s->num_children; i++) {
|
||||
acb->qcrs[i].buf = NULL;
|
||||
acb->qcrs[i].ret = 0;
|
||||
@ -204,30 +184,37 @@ static QuorumAIOCB *quorum_aio_get(BDRVQuorumState *s,
|
||||
return acb;
|
||||
}
|
||||
|
||||
static void quorum_report_bad(QuorumOpType type, uint64_t sector_num,
|
||||
int nb_sectors, char *node_name, int ret)
|
||||
static void quorum_report_bad(QuorumOpType type, uint64_t offset,
|
||||
uint64_t bytes, char *node_name, int ret)
|
||||
{
|
||||
const char *msg = NULL;
|
||||
int64_t start_sector = offset / BDRV_SECTOR_SIZE;
|
||||
int64_t end_sector = DIV_ROUND_UP(offset + bytes, BDRV_SECTOR_SIZE);
|
||||
|
||||
if (ret < 0) {
|
||||
msg = strerror(-ret);
|
||||
}
|
||||
|
||||
qapi_event_send_quorum_report_bad(type, !!msg, msg, node_name,
|
||||
sector_num, nb_sectors, &error_abort);
|
||||
qapi_event_send_quorum_report_bad(type, !!msg, msg, node_name, start_sector,
|
||||
end_sector - start_sector, &error_abort);
|
||||
}
|
||||
|
||||
static void quorum_report_failure(QuorumAIOCB *acb)
|
||||
{
|
||||
const char *reference = bdrv_get_device_or_node_name(acb->common.bs);
|
||||
qapi_event_send_quorum_failure(reference, acb->sector_num,
|
||||
acb->nb_sectors, &error_abort);
|
||||
const char *reference = bdrv_get_device_or_node_name(acb->bs);
|
||||
int64_t start_sector = acb->offset / BDRV_SECTOR_SIZE;
|
||||
int64_t end_sector = DIV_ROUND_UP(acb->offset + acb->bytes,
|
||||
BDRV_SECTOR_SIZE);
|
||||
|
||||
qapi_event_send_quorum_failure(reference, start_sector,
|
||||
end_sector - start_sector, &error_abort);
|
||||
}
|
||||
|
||||
static int quorum_vote_error(QuorumAIOCB *acb);
|
||||
|
||||
static bool quorum_has_too_much_io_failed(QuorumAIOCB *acb)
|
||||
{
|
||||
BDRVQuorumState *s = acb->common.bs->opaque;
|
||||
BDRVQuorumState *s = acb->bs->opaque;
|
||||
|
||||
if (acb->success_count < s->threshold) {
|
||||
acb->vote_ret = quorum_vote_error(acb);
|
||||
@ -238,22 +225,7 @@ static bool quorum_has_too_much_io_failed(QuorumAIOCB *acb)
|
||||
return false;
|
||||
}
|
||||
|
||||
static void quorum_rewrite_aio_cb(void *opaque, int ret)
|
||||
{
|
||||
QuorumAIOCB *acb = opaque;
|
||||
|
||||
/* one less rewrite to do */
|
||||
acb->rewrite_count--;
|
||||
|
||||
/* wait until all rewrite callbacks have completed */
|
||||
if (acb->rewrite_count) {
|
||||
return;
|
||||
}
|
||||
|
||||
quorum_aio_finalize(acb);
|
||||
}
|
||||
|
||||
static BlockAIOCB *read_fifo_child(QuorumAIOCB *acb);
|
||||
static int read_fifo_child(QuorumAIOCB *acb);
|
||||
|
||||
static void quorum_copy_qiov(QEMUIOVector *dest, QEMUIOVector *source)
|
||||
{
|
||||
@ -272,70 +244,7 @@ static void quorum_report_bad_acb(QuorumChildRequest *sacb, int ret)
|
||||
{
|
||||
QuorumAIOCB *acb = sacb->parent;
|
||||
QuorumOpType type = acb->is_read ? QUORUM_OP_TYPE_READ : QUORUM_OP_TYPE_WRITE;
|
||||
quorum_report_bad(type, acb->sector_num, acb->nb_sectors,
|
||||
sacb->aiocb->bs->node_name, ret);
|
||||
}
|
||||
|
||||
static void quorum_fifo_aio_cb(void *opaque, int ret)
|
||||
{
|
||||
QuorumChildRequest *sacb = opaque;
|
||||
QuorumAIOCB *acb = sacb->parent;
|
||||
BDRVQuorumState *s = acb->common.bs->opaque;
|
||||
|
||||
assert(acb->is_read && s->read_pattern == QUORUM_READ_PATTERN_FIFO);
|
||||
|
||||
if (ret < 0) {
|
||||
quorum_report_bad_acb(sacb, ret);
|
||||
|
||||
/* We try to read next child in FIFO order if we fail to read */
|
||||
if (acb->children_read < s->num_children) {
|
||||
read_fifo_child(acb);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
acb->vote_ret = ret;
|
||||
|
||||
/* FIXME: rewrite failed children if acb->children_read > 1? */
|
||||
quorum_aio_finalize(acb);
|
||||
}
|
||||
|
||||
static void quorum_aio_cb(void *opaque, int ret)
|
||||
{
|
||||
QuorumChildRequest *sacb = opaque;
|
||||
QuorumAIOCB *acb = sacb->parent;
|
||||
BDRVQuorumState *s = acb->common.bs->opaque;
|
||||
bool rewrite = false;
|
||||
int i;
|
||||
|
||||
sacb->ret = ret;
|
||||
if (ret == 0) {
|
||||
acb->success_count++;
|
||||
} else {
|
||||
quorum_report_bad_acb(sacb, ret);
|
||||
}
|
||||
acb->count++;
|
||||
assert(acb->count <= s->num_children);
|
||||
assert(acb->success_count <= s->num_children);
|
||||
if (acb->count < s->num_children) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* Do the vote on read */
|
||||
if (acb->is_read) {
|
||||
rewrite = quorum_vote(acb);
|
||||
for (i = 0; i < s->num_children; i++) {
|
||||
qemu_vfree(acb->qcrs[i].buf);
|
||||
qemu_iovec_destroy(&acb->qcrs[i].qiov);
|
||||
}
|
||||
} else {
|
||||
quorum_has_too_much_io_failed(acb);
|
||||
}
|
||||
|
||||
/* if no rewrite is done the code will finish right away */
|
||||
if (!rewrite) {
|
||||
quorum_aio_finalize(acb);
|
||||
}
|
||||
quorum_report_bad(type, acb->offset, acb->bytes, sacb->bs->node_name, ret);
|
||||
}
|
||||
|
||||
static void quorum_report_bad_versions(BDRVQuorumState *s,
|
||||
@ -350,14 +259,31 @@ static void quorum_report_bad_versions(BDRVQuorumState *s,
|
||||
continue;
|
||||
}
|
||||
QLIST_FOREACH(item, &version->items, next) {
|
||||
quorum_report_bad(QUORUM_OP_TYPE_READ, acb->sector_num,
|
||||
acb->nb_sectors,
|
||||
quorum_report_bad(QUORUM_OP_TYPE_READ, acb->offset, acb->bytes,
|
||||
s->children[item->index]->bs->node_name, 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static bool quorum_rewrite_bad_versions(BDRVQuorumState *s, QuorumAIOCB *acb,
|
||||
static void quorum_rewrite_entry(void *opaque)
|
||||
{
|
||||
QuorumCo *co = opaque;
|
||||
QuorumAIOCB *acb = co->acb;
|
||||
BDRVQuorumState *s = acb->bs->opaque;
|
||||
|
||||
/* Ignore any errors, it's just a correction attempt for already
|
||||
* corrupted data. */
|
||||
bdrv_co_pwritev(s->children[co->idx], acb->offset, acb->bytes,
|
||||
acb->qiov, 0);
|
||||
|
||||
/* Wake up the caller after the last rewrite */
|
||||
acb->rewrite_count--;
|
||||
if (!acb->rewrite_count) {
|
||||
qemu_coroutine_enter_if_inactive(acb->co);
|
||||
}
|
||||
}
|
||||
|
||||
static bool quorum_rewrite_bad_versions(QuorumAIOCB *acb,
|
||||
QuorumVoteValue *value)
|
||||
{
|
||||
QuorumVoteVersion *version;
|
||||
@ -376,7 +302,7 @@ static bool quorum_rewrite_bad_versions(BDRVQuorumState *s, QuorumAIOCB *acb,
|
||||
}
|
||||
}
|
||||
|
||||
/* quorum_rewrite_aio_cb will count down this to zero */
|
||||
/* quorum_rewrite_entry will count down this to zero */
|
||||
acb->rewrite_count = count;
|
||||
|
||||
/* now fire the correcting rewrites */
|
||||
@ -385,9 +311,14 @@ static bool quorum_rewrite_bad_versions(BDRVQuorumState *s, QuorumAIOCB *acb,
|
||||
continue;
|
||||
}
|
||||
QLIST_FOREACH(item, &version->items, next) {
|
||||
bdrv_aio_writev(s->children[item->index], acb->sector_num,
|
||||
acb->qiov, acb->nb_sectors, quorum_rewrite_aio_cb,
|
||||
acb);
|
||||
Coroutine *co;
|
||||
QuorumCo data = {
|
||||
.acb = acb,
|
||||
.idx = item->index,
|
||||
};
|
||||
|
||||
co = qemu_coroutine_create(quorum_rewrite_entry, &data);
|
||||
qemu_coroutine_enter(co);
|
||||
}
|
||||
}
|
||||
|
||||
@ -507,8 +438,8 @@ static void GCC_FMT_ATTR(2, 3) quorum_err(QuorumAIOCB *acb,
|
||||
va_list ap;
|
||||
|
||||
va_start(ap, fmt);
|
||||
fprintf(stderr, "quorum: sector_num=%" PRId64 " nb_sectors=%d ",
|
||||
acb->sector_num, acb->nb_sectors);
|
||||
fprintf(stderr, "quorum: offset=%" PRIu64 " bytes=%" PRIu64 " ",
|
||||
acb->offset, acb->bytes);
|
||||
vfprintf(stderr, fmt, ap);
|
||||
fprintf(stderr, "\n");
|
||||
va_end(ap);
|
||||
@ -519,16 +450,15 @@ static bool quorum_compare(QuorumAIOCB *acb,
|
||||
QEMUIOVector *a,
|
||||
QEMUIOVector *b)
|
||||
{
|
||||
BDRVQuorumState *s = acb->common.bs->opaque;
|
||||
BDRVQuorumState *s = acb->bs->opaque;
|
||||
ssize_t offset;
|
||||
|
||||
/* This driver will replace blkverify in this particular case */
|
||||
if (s->is_blkverify) {
|
||||
offset = qemu_iovec_compare(a, b);
|
||||
if (offset != -1) {
|
||||
quorum_err(acb, "contents mismatch in sector %" PRId64,
|
||||
acb->sector_num +
|
||||
(uint64_t)(offset / BDRV_SECTOR_SIZE));
|
||||
quorum_err(acb, "contents mismatch at offset %" PRIu64,
|
||||
acb->offset + offset);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
@ -539,7 +469,7 @@ static bool quorum_compare(QuorumAIOCB *acb,
|
||||
/* Do a vote to get the error code */
|
||||
static int quorum_vote_error(QuorumAIOCB *acb)
|
||||
{
|
||||
BDRVQuorumState *s = acb->common.bs->opaque;
|
||||
BDRVQuorumState *s = acb->bs->opaque;
|
||||
QuorumVoteVersion *winner = NULL;
|
||||
QuorumVotes error_votes;
|
||||
QuorumVoteValue result_value;
|
||||
@ -568,17 +498,16 @@ static int quorum_vote_error(QuorumAIOCB *acb)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static bool quorum_vote(QuorumAIOCB *acb)
|
||||
static void quorum_vote(QuorumAIOCB *acb)
|
||||
{
|
||||
bool quorum = true;
|
||||
bool rewrite = false;
|
||||
int i, j, ret;
|
||||
QuorumVoteValue hash;
|
||||
BDRVQuorumState *s = acb->common.bs->opaque;
|
||||
BDRVQuorumState *s = acb->bs->opaque;
|
||||
QuorumVoteVersion *winner;
|
||||
|
||||
if (quorum_has_too_much_io_failed(acb)) {
|
||||
return false;
|
||||
return;
|
||||
}
|
||||
|
||||
/* get the index of the first successful read */
|
||||
@ -606,7 +535,7 @@ static bool quorum_vote(QuorumAIOCB *acb)
|
||||
/* Every successful read agrees */
|
||||
if (quorum) {
|
||||
quorum_copy_qiov(acb->qiov, &acb->qcrs[i].qiov);
|
||||
return false;
|
||||
return;
|
||||
}
|
||||
|
||||
/* compute hashes for each successful read, also store indexes */
|
||||
@ -641,19 +570,46 @@ static bool quorum_vote(QuorumAIOCB *acb)
|
||||
|
||||
/* corruption correction is enabled */
|
||||
if (s->rewrite_corrupted) {
|
||||
rewrite = quorum_rewrite_bad_versions(s, acb, &winner->value);
|
||||
quorum_rewrite_bad_versions(acb, &winner->value);
|
||||
}
|
||||
|
||||
free_exit:
|
||||
/* free lists */
|
||||
quorum_free_vote_list(&acb->votes);
|
||||
return rewrite;
|
||||
}
|
||||
|
||||
static BlockAIOCB *read_quorum_children(QuorumAIOCB *acb)
|
||||
static void read_quorum_children_entry(void *opaque)
|
||||
{
|
||||
BDRVQuorumState *s = acb->common.bs->opaque;
|
||||
int i;
|
||||
QuorumCo *co = opaque;
|
||||
QuorumAIOCB *acb = co->acb;
|
||||
BDRVQuorumState *s = acb->bs->opaque;
|
||||
int i = co->idx;
|
||||
QuorumChildRequest *sacb = &acb->qcrs[i];
|
||||
|
||||
sacb->bs = s->children[i]->bs;
|
||||
sacb->ret = bdrv_co_preadv(s->children[i], acb->offset, acb->bytes,
|
||||
&acb->qcrs[i].qiov, 0);
|
||||
|
||||
if (sacb->ret == 0) {
|
||||
acb->success_count++;
|
||||
} else {
|
||||
quorum_report_bad_acb(sacb, sacb->ret);
|
||||
}
|
||||
|
||||
acb->count++;
|
||||
assert(acb->count <= s->num_children);
|
||||
assert(acb->success_count <= s->num_children);
|
||||
|
||||
/* Wake up the caller after the last read */
|
||||
if (acb->count == s->num_children) {
|
||||
qemu_coroutine_enter_if_inactive(acb->co);
|
||||
}
|
||||
}
|
||||
|
||||
static int read_quorum_children(QuorumAIOCB *acb)
|
||||
{
|
||||
BDRVQuorumState *s = acb->bs->opaque;
|
||||
int i, ret;
|
||||
|
||||
acb->children_read = s->num_children;
|
||||
for (i = 0; i < s->num_children; i++) {
|
||||
@ -663,65 +619,131 @@ static BlockAIOCB *read_quorum_children(QuorumAIOCB *acb)
|
||||
}
|
||||
|
||||
for (i = 0; i < s->num_children; i++) {
|
||||
acb->qcrs[i].aiocb = bdrv_aio_readv(s->children[i], acb->sector_num,
|
||||
&acb->qcrs[i].qiov, acb->nb_sectors,
|
||||
quorum_aio_cb, &acb->qcrs[i]);
|
||||
Coroutine *co;
|
||||
QuorumCo data = {
|
||||
.acb = acb,
|
||||
.idx = i,
|
||||
};
|
||||
|
||||
co = qemu_coroutine_create(read_quorum_children_entry, &data);
|
||||
qemu_coroutine_enter(co);
|
||||
}
|
||||
|
||||
return &acb->common;
|
||||
while (acb->count < s->num_children) {
|
||||
qemu_coroutine_yield();
|
||||
}
|
||||
|
||||
/* Do the vote on read */
|
||||
quorum_vote(acb);
|
||||
for (i = 0; i < s->num_children; i++) {
|
||||
qemu_vfree(acb->qcrs[i].buf);
|
||||
qemu_iovec_destroy(&acb->qcrs[i].qiov);
|
||||
}
|
||||
|
||||
while (acb->rewrite_count) {
|
||||
qemu_coroutine_yield();
|
||||
}
|
||||
|
||||
ret = acb->vote_ret;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static BlockAIOCB *read_fifo_child(QuorumAIOCB *acb)
|
||||
static int read_fifo_child(QuorumAIOCB *acb)
|
||||
{
|
||||
BDRVQuorumState *s = acb->common.bs->opaque;
|
||||
int n = acb->children_read++;
|
||||
BDRVQuorumState *s = acb->bs->opaque;
|
||||
int n, ret;
|
||||
|
||||
acb->qcrs[n].aiocb = bdrv_aio_readv(s->children[n], acb->sector_num,
|
||||
acb->qiov, acb->nb_sectors,
|
||||
quorum_fifo_aio_cb, &acb->qcrs[n]);
|
||||
/* We try to read the next child in FIFO order if we failed to read */
|
||||
do {
|
||||
n = acb->children_read++;
|
||||
acb->qcrs[n].bs = s->children[n]->bs;
|
||||
ret = bdrv_co_preadv(s->children[n], acb->offset, acb->bytes,
|
||||
acb->qiov, 0);
|
||||
if (ret < 0) {
|
||||
quorum_report_bad_acb(&acb->qcrs[n], ret);
|
||||
}
|
||||
} while (ret < 0 && acb->children_read < s->num_children);
|
||||
|
||||
return &acb->common;
|
||||
/* FIXME: rewrite failed children if acb->children_read > 1? */
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static BlockAIOCB *quorum_aio_readv(BlockDriverState *bs,
|
||||
int64_t sector_num,
|
||||
QEMUIOVector *qiov,
|
||||
int nb_sectors,
|
||||
BlockCompletionFunc *cb,
|
||||
void *opaque)
|
||||
static int quorum_co_preadv(BlockDriverState *bs, uint64_t offset,
|
||||
uint64_t bytes, QEMUIOVector *qiov, int flags)
|
||||
{
|
||||
BDRVQuorumState *s = bs->opaque;
|
||||
QuorumAIOCB *acb = quorum_aio_get(s, bs, qiov, sector_num,
|
||||
nb_sectors, cb, opaque);
|
||||
QuorumAIOCB *acb = quorum_aio_get(bs, qiov, offset, bytes);
|
||||
int ret;
|
||||
|
||||
acb->is_read = true;
|
||||
acb->children_read = 0;
|
||||
|
||||
if (s->read_pattern == QUORUM_READ_PATTERN_QUORUM) {
|
||||
return read_quorum_children(acb);
|
||||
ret = read_quorum_children(acb);
|
||||
} else {
|
||||
ret = read_fifo_child(acb);
|
||||
}
|
||||
quorum_aio_finalize(acb);
|
||||
|
||||
return read_fifo_child(acb);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static BlockAIOCB *quorum_aio_writev(BlockDriverState *bs,
|
||||
int64_t sector_num,
|
||||
QEMUIOVector *qiov,
|
||||
int nb_sectors,
|
||||
BlockCompletionFunc *cb,
|
||||
void *opaque)
|
||||
static void write_quorum_entry(void *opaque)
|
||||
{
|
||||
QuorumCo *co = opaque;
|
||||
QuorumAIOCB *acb = co->acb;
|
||||
BDRVQuorumState *s = acb->bs->opaque;
|
||||
int i = co->idx;
|
||||
QuorumChildRequest *sacb = &acb->qcrs[i];
|
||||
|
||||
sacb->bs = s->children[i]->bs;
|
||||
sacb->ret = bdrv_co_pwritev(s->children[i], acb->offset, acb->bytes,
|
||||
acb->qiov, 0);
|
||||
if (sacb->ret == 0) {
|
||||
acb->success_count++;
|
||||
} else {
|
||||
quorum_report_bad_acb(sacb, sacb->ret);
|
||||
}
|
||||
acb->count++;
|
||||
assert(acb->count <= s->num_children);
|
||||
assert(acb->success_count <= s->num_children);
|
||||
|
||||
/* Wake up the caller after the last write */
|
||||
if (acb->count == s->num_children) {
|
||||
qemu_coroutine_enter_if_inactive(acb->co);
|
||||
}
|
||||
}
|
||||
|
||||
static int quorum_co_pwritev(BlockDriverState *bs, uint64_t offset,
|
||||
uint64_t bytes, QEMUIOVector *qiov, int flags)
|
||||
{
|
||||
BDRVQuorumState *s = bs->opaque;
|
||||
QuorumAIOCB *acb = quorum_aio_get(s, bs, qiov, sector_num, nb_sectors,
|
||||
cb, opaque);
|
||||
int i;
|
||||
QuorumAIOCB *acb = quorum_aio_get(bs, qiov, offset, bytes);
|
||||
int i, ret;
|
||||
|
||||
for (i = 0; i < s->num_children; i++) {
|
||||
acb->qcrs[i].aiocb = bdrv_aio_writev(s->children[i], sector_num,
|
||||
qiov, nb_sectors, &quorum_aio_cb,
|
||||
&acb->qcrs[i]);
|
||||
Coroutine *co;
|
||||
QuorumCo data = {
|
||||
.acb = acb,
|
||||
.idx = i,
|
||||
};
|
||||
|
||||
co = qemu_coroutine_create(write_quorum_entry, &data);
|
||||
qemu_coroutine_enter(co);
|
||||
}
|
||||
|
||||
return &acb->common;
|
||||
while (acb->count < s->num_children) {
|
||||
qemu_coroutine_yield();
|
||||
}
|
||||
|
||||
quorum_has_too_much_io_failed(acb);
|
||||
|
||||
ret = acb->vote_ret;
|
||||
quorum_aio_finalize(acb);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int64_t quorum_getlength(BlockDriverState *bs)
|
||||
@ -765,7 +787,7 @@ static coroutine_fn int quorum_co_flush(BlockDriverState *bs)
|
||||
result = bdrv_co_flush(s->children[i]->bs);
|
||||
if (result) {
|
||||
quorum_report_bad(QUORUM_OP_TYPE_FLUSH, 0,
|
||||
bdrv_nb_sectors(s->children[i]->bs),
|
||||
bdrv_getlength(s->children[i]->bs),
|
||||
s->children[i]->bs->node_name, result);
|
||||
result_value.l = result;
|
||||
quorum_count_vote(&error_votes, &result_value, i);
|
||||
@ -1098,8 +1120,8 @@ static BlockDriver bdrv_quorum = {
|
||||
|
||||
.bdrv_getlength = quorum_getlength,
|
||||
|
||||
.bdrv_aio_readv = quorum_aio_readv,
|
||||
.bdrv_aio_writev = quorum_aio_writev,
|
||||
.bdrv_co_preadv = quorum_co_preadv,
|
||||
.bdrv_co_pwritev = quorum_co_pwritev,
|
||||
|
||||
.bdrv_add_child = quorum_add_child,
|
||||
.bdrv_del_child = quorum_del_child,
|
||||
|
@ -1,4 +1,4 @@
|
||||
/* BlockDriver implementation for "raw"
|
||||
/* BlockDriver implementation for "raw" format driver
|
||||
*
|
||||
* Copyright (C) 2010-2016 Red Hat, Inc.
|
||||
* Copyright (C) 2010, Blue Swirl <blauwirbel@gmail.com>
|
@ -53,8 +53,8 @@ qmp_block_job_resume(void *job) "job %p"
|
||||
qmp_block_job_complete(void *job) "job %p"
|
||||
qmp_block_stream(void *bs, void *job) "bs %p job %p"
|
||||
|
||||
# block/raw-win32.c
|
||||
# block/raw-posix.c
|
||||
# block/file-win32.c
|
||||
# block/file-posix.c
|
||||
paio_submit_co(int64_t offset, int count, int type) "offset %"PRId64" count %d type %d"
|
||||
paio_submit(void *acb, void *opaque, int64_t offset, int count, int type) "acb %p opaque %p offset %"PRId64" count %d type %d"
|
||||
|
||||
|
2
configure
vendored
2
configure
vendored
@ -2750,7 +2750,7 @@ if compile_prog "" "" ; then
|
||||
fi
|
||||
|
||||
##########################################
|
||||
# xfsctl() probe, used for raw-posix
|
||||
# xfsctl() probe, used for file-posix.c
|
||||
if test "$xfs" != "no" ; then
|
||||
cat > $TMPC << EOF
|
||||
#include <stddef.h> /* NULL */
|
||||
|
@ -184,7 +184,7 @@ struct BlockDriver {
|
||||
|
||||
/*
|
||||
* Flushes all data that was already written to the OS all the way down to
|
||||
* the disk (for example raw-posix calls fsync()).
|
||||
* the disk (for example file-posix.c calls fsync()).
|
||||
*/
|
||||
int coroutine_fn (*bdrv_co_flush_to_disk)(BlockDriverState *bs);
|
||||
|
||||
|
@ -70,6 +70,12 @@ Coroutine *qemu_coroutine_create(CoroutineEntry *entry, void *opaque);
|
||||
*/
|
||||
void qemu_coroutine_enter(Coroutine *coroutine);
|
||||
|
||||
/**
|
||||
* Transfer control to a coroutine if it's not active (i.e. part of the call
|
||||
* stack of the running coroutine). Otherwise, do nothing.
|
||||
*/
|
||||
void qemu_coroutine_enter_if_inactive(Coroutine *co);
|
||||
|
||||
/**
|
||||
* Transfer control back to a coroutine's caller
|
||||
*
|
||||
|
17
qemu-img.c
17
qemu-img.c
@ -3559,20 +3559,23 @@ static void bench_cb(void *opaque, int ret)
|
||||
}
|
||||
|
||||
while (b->n > b->in_flight && b->in_flight < b->nrreq) {
|
||||
int64_t offset = b->offset;
|
||||
/* blk_aio_* might look for completed I/Os and kick bench_cb
|
||||
* again, so make sure this operation is counted by in_flight
|
||||
* and b->offset is ready for the next submission.
|
||||
*/
|
||||
b->in_flight++;
|
||||
b->offset += b->step;
|
||||
b->offset %= b->image_size;
|
||||
if (b->write) {
|
||||
acb = blk_aio_pwritev(b->blk, b->offset, b->qiov, 0,
|
||||
bench_cb, b);
|
||||
acb = blk_aio_pwritev(b->blk, offset, b->qiov, 0, bench_cb, b);
|
||||
} else {
|
||||
acb = blk_aio_preadv(b->blk, b->offset, b->qiov, 0,
|
||||
bench_cb, b);
|
||||
acb = blk_aio_preadv(b->blk, offset, b->qiov, 0, bench_cb, b);
|
||||
}
|
||||
if (!acb) {
|
||||
error_report("Failed to issue request");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
b->in_flight++;
|
||||
b->offset += b->step;
|
||||
b->offset %= b->image_size;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -12,7 +12,7 @@ read 512/512 bytes at offset 229376
|
||||
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
|
||||
wrote 512/512 bytes at offset 0
|
||||
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
|
||||
blkverify: read sector_num=0 nb_sectors=1 contents mismatch in sector 0
|
||||
blkverify: read offset=0 bytes=512 contents mismatch at offset 0
|
||||
|
||||
=== Testing blkverify through file blockref ===
|
||||
|
||||
@ -26,7 +26,7 @@ read 512/512 bytes at offset 229376
|
||||
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
|
||||
wrote 512/512 bytes at offset 0
|
||||
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
|
||||
blkverify: read sector_num=0 nb_sectors=1 contents mismatch in sector 0
|
||||
blkverify: read offset=0 bytes=512 contents mismatch at offset 0
|
||||
|
||||
=== Testing blkdebug through filename ===
|
||||
|
||||
@ -56,7 +56,7 @@ QMP_VERSION
|
||||
{"return": {}}
|
||||
{"return": {}}
|
||||
{"return": {}}
|
||||
blkverify: read sector_num=0 nb_sectors=1 contents mismatch in sector 0
|
||||
blkverify: read offset=0 bytes=512 contents mismatch at offset 0
|
||||
|
||||
|
||||
=== Testing blkverify on existing raw block device ===
|
||||
@ -66,7 +66,7 @@ QMP_VERSION
|
||||
{"return": {}}
|
||||
{"return": {}}
|
||||
{"return": {}}
|
||||
blkverify: read sector_num=0 nb_sectors=1 contents mismatch in sector 0
|
||||
blkverify: read offset=0 bytes=512 contents mismatch at offset 0
|
||||
|
||||
|
||||
=== Testing blkdebug's set-state through QMP ===
|
||||
|
@ -131,6 +131,13 @@ void qemu_coroutine_enter(Coroutine *co)
|
||||
}
|
||||
}
|
||||
|
||||
void qemu_coroutine_enter_if_inactive(Coroutine *co)
|
||||
{
|
||||
if (!qemu_coroutine_entered(co)) {
|
||||
qemu_coroutine_enter(co);
|
||||
}
|
||||
}
|
||||
|
||||
void coroutine_fn qemu_coroutine_yield(void)
|
||||
{
|
||||
Coroutine *self = qemu_coroutine_self();
|
||||
|
Loading…
Reference in New Issue
Block a user