From d8716b41b9bd2c7ce6fe36ab388de534ce81a69a Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 5 Oct 2011 09:17:31 +0200 Subject: [PATCH 01/19] sheepdog: add coroutine_fn markers This makes the following patch easier to review. Cc: MORITA Kazutaka Signed-off-by: Paolo Bonzini Signed-off-by: Kevin Wolf --- block/sheepdog.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/block/sheepdog.c b/block/sheepdog.c index ae857e294c..9f8060960f 100644 --- a/block/sheepdog.c +++ b/block/sheepdog.c @@ -396,7 +396,7 @@ static inline int free_aio_req(BDRVSheepdogState *s, AIOReq *aio_req) return !QLIST_EMPTY(&acb->aioreq_head); } -static void sd_finish_aiocb(SheepdogAIOCB *acb) +static void coroutine_fn sd_finish_aiocb(SheepdogAIOCB *acb) { if (!acb->canceled) { qemu_coroutine_enter(acb->coroutine, NULL); @@ -735,7 +735,7 @@ out: return ret; } -static int add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req, +static int coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req, struct iovec *iov, int niov, int create, enum AIOCBState aiocb_type); @@ -743,7 +743,7 @@ static int add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req, * This function searchs pending requests to the object `oid', and * sends them. */ -static void send_pending_req(BDRVSheepdogState *s, uint64_t oid, uint32_t id) +static void coroutine_fn send_pending_req(BDRVSheepdogState *s, uint64_t oid, uint32_t id) { AIOReq *aio_req, *next; SheepdogAIOCB *acb; @@ -777,7 +777,7 @@ static void send_pending_req(BDRVSheepdogState *s, uint64_t oid, uint32_t id) * This function is registered as a fd handler, and called from the * main loop when s->fd is ready for reading responses. */ -static void aio_read_response(void *opaque) +static void coroutine_fn aio_read_response(void *opaque) { SheepdogObjRsp rsp; BDRVSheepdogState *s = opaque; @@ -1064,7 +1064,7 @@ out: return ret; } -static int add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req, +static int coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req, struct iovec *iov, int niov, int create, enum AIOCBState aiocb_type) { @@ -1517,7 +1517,7 @@ static int sd_truncate(BlockDriverState *bs, int64_t offset) * update metadata, this sends a write request to the vdi object. * Otherwise, this switches back to sd_co_readv/writev. */ -static void sd_write_done(SheepdogAIOCB *acb) +static void coroutine_fn sd_write_done(SheepdogAIOCB *acb) { int ret; BDRVSheepdogState *s = acb->common.bs->opaque; @@ -1615,7 +1615,7 @@ out: * Returns 1 when we need to wait a response, 0 when there is no sent * request and -errno in error cases. */ -static int sd_co_rw_vector(void *p) +static int coroutine_fn sd_co_rw_vector(void *p) { SheepdogAIOCB *acb = p; int ret = 0; From 154b9a0cbd67be53afcb45ae84a240add8c6cb67 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 5 Oct 2011 09:17:32 +0200 Subject: [PATCH 02/19] add socket_set_block Cc: MORITA Kazutaka Signed-off-by: Paolo Bonzini Signed-off-by: Kevin Wolf --- oslib-posix.c | 7 +++++++ oslib-win32.c | 6 ++++++ qemu_socket.h | 1 + 3 files changed, 14 insertions(+) diff --git a/oslib-posix.c b/oslib-posix.c index a304fb0f53..dbc8ee8960 100644 --- a/oslib-posix.c +++ b/oslib-posix.c @@ -103,6 +103,13 @@ void qemu_vfree(void *ptr) free(ptr); } +void socket_set_block(int fd) +{ + int f; + f = fcntl(fd, F_GETFL); + fcntl(fd, F_SETFL, f & ~O_NONBLOCK); +} + void socket_set_nonblock(int fd) { int f; diff --git a/oslib-win32.c b/oslib-win32.c index 5f0759ffc4..5e3de7dc8a 100644 --- a/oslib-win32.c +++ b/oslib-win32.c @@ -73,6 +73,12 @@ void qemu_vfree(void *ptr) VirtualFree(ptr, 0, MEM_RELEASE); } +void socket_set_block(int fd) +{ + unsigned long opt = 0; + ioctlsocket(fd, FIONBIO, &opt); +} + void socket_set_nonblock(int fd) { unsigned long opt = 1; diff --git a/qemu_socket.h b/qemu_socket.h index 180e4dbd9b..9e32fac651 100644 --- a/qemu_socket.h +++ b/qemu_socket.h @@ -35,6 +35,7 @@ int inet_aton(const char *cp, struct in_addr *ia); /* misc helpers */ int qemu_socket(int domain, int type, int protocol); int qemu_accept(int s, struct sockaddr *addr, socklen_t *addrlen); +void socket_set_block(int fd); void socket_set_nonblock(int fd); int send_all(int fd, const void *buf, int len1); From 35246a6825121f3881b8540beb731d582aa6d697 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 14 Oct 2011 10:41:29 +0200 Subject: [PATCH 03/19] block: rename bdrv_co_rw_bh Signed-off-by: Paolo Bonzini Signed-off-by: Kevin Wolf --- block.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/block.c b/block.c index 9873b57edd..7184a0f991 100644 --- a/block.c +++ b/block.c @@ -2735,7 +2735,7 @@ static AIOPool bdrv_em_co_aio_pool = { .cancel = bdrv_aio_co_cancel_em, }; -static void bdrv_co_rw_bh(void *opaque) +static void bdrv_co_em_bh(void *opaque) { BlockDriverAIOCBCoroutine *acb = opaque; @@ -2758,7 +2758,7 @@ static void coroutine_fn bdrv_co_do_rw(void *opaque) acb->req.nb_sectors, acb->req.qiov); } - acb->bh = qemu_bh_new(bdrv_co_rw_bh, acb); + acb->bh = qemu_bh_new(bdrv_co_em_bh, acb); qemu_bh_schedule(acb->bh); } From b1b1dad31f3a092e046b09795f4476705c4e564e Mon Sep 17 00:00:00 2001 From: Alex Jia Date: Wed, 28 Sep 2011 14:57:01 +0800 Subject: [PATCH 04/19] fix memory leak in aio_write_f Haven't released memory of 'ctx' before return. Signed-off-by: Alex Jia Signed-off-by: Kevin Wolf --- qemu-io.c | 1 + 1 file changed, 1 insertion(+) diff --git a/qemu-io.c b/qemu-io.c index e91af3747b..c45a4138b2 100644 --- a/qemu-io.c +++ b/qemu-io.c @@ -1248,6 +1248,7 @@ static int aio_write_f(int argc, char **argv) case 'P': pattern = parse_pattern(optarg); if (pattern < 0) { + free(ctx); return 0; } break; From 5cce43bb288ea22ef1c3faa77f2c5e8aa04b6236 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 21 Oct 2011 12:16:44 +0200 Subject: [PATCH 05/19] xen_disk: Always set feature-barrier = 1 The synchronous .bdrv_flush callback doesn't exist any more and a device really shouldn't poke into the block layer internals anyway. All drivers are supposed to have a correctly working bdrv_flush, so let's just hard-code this. Signed-off-by: Kevin Wolf --- hw/xen_disk.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/hw/xen_disk.c b/hw/xen_disk.c index 8a9fac499b..286bbac54a 100644 --- a/hw/xen_disk.c +++ b/hw/xen_disk.c @@ -620,7 +620,7 @@ static void blk_alloc(struct XenDevice *xendev) static int blk_init(struct XenDevice *xendev) { struct XenBlkDev *blkdev = container_of(xendev, struct XenBlkDev, xendev); - int index, qflags, have_barriers, info = 0; + int index, qflags, info = 0; /* read xenstore entries */ if (blkdev->params == NULL) { @@ -706,7 +706,6 @@ static int blk_init(struct XenDevice *xendev) blkdev->bs->drv ? blkdev->bs->drv->format_name : "-"); blkdev->file_size = 0; } - have_barriers = blkdev->bs->drv && blkdev->bs->drv->bdrv_flush ? 1 : 0; xen_be_printf(xendev, 1, "type \"%s\", fileproto \"%s\", filename \"%s\"," " size %" PRId64 " (%" PRId64 " MB)\n", @@ -714,7 +713,7 @@ static int blk_init(struct XenDevice *xendev) blkdev->file_size, blkdev->file_size >> 20); /* fill info */ - xenstore_write_be_int(&blkdev->xendev, "feature-barrier", have_barriers); + xenstore_write_be_int(&blkdev->xendev, "feature-barrier", 1); xenstore_write_be_int(&blkdev->xendev, "info", info); xenstore_write_be_int(&blkdev->xendev, "sector-size", blkdev->file_blk); xenstore_write_be_int(&blkdev->xendev, "sectors", From 07f076157475e1a8ea454e304e0ea5496d05b539 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 17 Oct 2011 12:32:12 +0200 Subject: [PATCH 06/19] block: unify flush implementations Add coroutine support for flush and apply the same emulation that we already do for read/write. bdrv_aio_flush is simplified to always go through a coroutine. Signed-off-by: Paolo Bonzini Signed-off-by: Kevin Wolf --- block.c | 166 ++++++++++++++++++++++++---------------------------- block.h | 1 + block_int.h | 1 + 3 files changed, 77 insertions(+), 91 deletions(-) diff --git a/block.c b/block.c index 7184a0f991..7b8b14d971 100644 --- a/block.c +++ b/block.c @@ -53,17 +53,12 @@ static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs, static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs, int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, BlockDriverCompletionFunc *cb, void *opaque); -static BlockDriverAIOCB *bdrv_aio_flush_em(BlockDriverState *bs, - BlockDriverCompletionFunc *cb, void *opaque); -static BlockDriverAIOCB *bdrv_aio_noop_em(BlockDriverState *bs, - BlockDriverCompletionFunc *cb, void *opaque); static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs, int64_t sector_num, int nb_sectors, QEMUIOVector *iov); static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs, int64_t sector_num, int nb_sectors, QEMUIOVector *iov); -static int coroutine_fn bdrv_co_flush_em(BlockDriverState *bs); static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs, int64_t sector_num, int nb_sectors, QEMUIOVector *qiov); static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs, @@ -203,9 +198,6 @@ void bdrv_register(BlockDriver *bdrv) } } - if (!bdrv->bdrv_aio_flush) - bdrv->bdrv_aio_flush = bdrv_aio_flush_em; - QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list); } @@ -1027,11 +1019,6 @@ static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num, nb_sectors * BDRV_SECTOR_SIZE); } -static inline bool bdrv_has_async_flush(BlockDriver *drv) -{ - return drv->bdrv_aio_flush != bdrv_aio_flush_em; -} - typedef struct RwCo { BlockDriverState *bs; int64_t sector_num; @@ -1759,33 +1746,6 @@ const char *bdrv_get_device_name(BlockDriverState *bs) return bs->device_name; } -int bdrv_flush(BlockDriverState *bs) -{ - if (bs->open_flags & BDRV_O_NO_FLUSH) { - return 0; - } - - if (bs->drv && bdrv_has_async_flush(bs->drv) && qemu_in_coroutine()) { - return bdrv_co_flush_em(bs); - } - - if (bs->drv && bs->drv->bdrv_flush) { - return bs->drv->bdrv_flush(bs); - } - - /* - * Some block drivers always operate in either writethrough or unsafe mode - * and don't support bdrv_flush therefore. Usually qemu doesn't know how - * the server works (because the behaviour is hardcoded or depends on - * server-side configuration), so we can't ensure that everything is safe - * on disk. Returning an error doesn't work because that would break guests - * even if the server operates in writethrough mode. - * - * Let's hope the user knows what he's doing. - */ - return 0; -} - void bdrv_flush_all(void) { BlockDriverState *bs; @@ -2610,22 +2570,6 @@ fail: return -1; } -BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs, - BlockDriverCompletionFunc *cb, void *opaque) -{ - BlockDriver *drv = bs->drv; - - trace_bdrv_aio_flush(bs, opaque); - - if (bs->open_flags & BDRV_O_NO_FLUSH) { - return bdrv_aio_noop_em(bs, cb, opaque); - } - - if (!drv) - return NULL; - return drv->bdrv_aio_flush(bs, cb, opaque); -} - void bdrv_aio_cancel(BlockDriverAIOCB *acb) { acb->pool->cancel(acb); @@ -2785,41 +2729,28 @@ static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs, return &acb->common; } -static BlockDriverAIOCB *bdrv_aio_flush_em(BlockDriverState *bs, - BlockDriverCompletionFunc *cb, void *opaque) +static void coroutine_fn bdrv_aio_flush_co_entry(void *opaque) { - BlockDriverAIOCBSync *acb; + BlockDriverAIOCBCoroutine *acb = opaque; + BlockDriverState *bs = acb->common.bs; - acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque); - acb->is_write = 1; /* don't bounce in the completion hadler */ - acb->qiov = NULL; - acb->bounce = NULL; - acb->ret = 0; - - if (!acb->bh) - acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb); - - bdrv_flush(bs); + acb->req.error = bdrv_co_flush(bs); + acb->bh = qemu_bh_new(bdrv_co_em_bh, acb); qemu_bh_schedule(acb->bh); - return &acb->common; } -static BlockDriverAIOCB *bdrv_aio_noop_em(BlockDriverState *bs, +BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs, BlockDriverCompletionFunc *cb, void *opaque) { - BlockDriverAIOCBSync *acb; + trace_bdrv_aio_flush(bs, opaque); - acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque); - acb->is_write = 1; /* don't bounce in the completion handler */ - acb->qiov = NULL; - acb->bounce = NULL; - acb->ret = 0; + Coroutine *co; + BlockDriverAIOCBCoroutine *acb; - if (!acb->bh) { - acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb); - } + acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque); + co = qemu_coroutine_create(bdrv_aio_flush_co_entry); + qemu_coroutine_enter(co, acb); - qemu_bh_schedule(acb->bh); return &acb->common; } @@ -2916,19 +2847,72 @@ static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs, return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, true); } -static int coroutine_fn bdrv_co_flush_em(BlockDriverState *bs) +static void coroutine_fn bdrv_flush_co_entry(void *opaque) { - CoroutineIOCompletion co = { - .coroutine = qemu_coroutine_self(), - }; - BlockDriverAIOCB *acb; + RwCo *rwco = opaque; - acb = bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co); - if (!acb) { - return -EIO; + rwco->ret = bdrv_co_flush(rwco->bs); +} + +int coroutine_fn bdrv_co_flush(BlockDriverState *bs) +{ + if (bs->open_flags & BDRV_O_NO_FLUSH) { + return 0; + } else if (!bs->drv) { + return 0; + } else if (bs->drv->bdrv_co_flush) { + return bs->drv->bdrv_co_flush(bs); + } else if (bs->drv->bdrv_aio_flush) { + BlockDriverAIOCB *acb; + CoroutineIOCompletion co = { + .coroutine = qemu_coroutine_self(), + }; + + acb = bs->drv->bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co); + if (acb == NULL) { + return -EIO; + } else { + qemu_coroutine_yield(); + return co.ret; + } + } else if (bs->drv->bdrv_flush) { + return bs->drv->bdrv_flush(bs); + } else { + /* + * Some block drivers always operate in either writethrough or unsafe + * mode and don't support bdrv_flush therefore. Usually qemu doesn't + * know how the server works (because the behaviour is hardcoded or + * depends on server-side configuration), so we can't ensure that + * everything is safe on disk. Returning an error doesn't work because + * that would break guests even if the server operates in writethrough + * mode. + * + * Let's hope the user knows what he's doing. + */ + return 0; } - qemu_coroutine_yield(); - return co.ret; +} + +int bdrv_flush(BlockDriverState *bs) +{ + Coroutine *co; + RwCo rwco = { + .bs = bs, + .ret = NOT_DONE, + }; + + if (qemu_in_coroutine()) { + /* Fast-path if already in coroutine context */ + bdrv_flush_co_entry(&rwco); + } else { + co = qemu_coroutine_create(bdrv_flush_co_entry); + qemu_coroutine_enter(co, &rwco); + while (rwco.ret == NOT_DONE) { + qemu_aio_wait(); + } + } + + return rwco.ret; } /**************************************************************/ diff --git a/block.h b/block.h index e77988e49c..65c51663fa 100644 --- a/block.h +++ b/block.h @@ -191,6 +191,7 @@ BlockDriverAIOCB *bdrv_aio_ioctl(BlockDriverState *bs, /* Ensure contents are flushed to disk. */ int bdrv_flush(BlockDriverState *bs); +int coroutine_fn bdrv_co_flush(BlockDriverState *bs); void bdrv_flush_all(void); void bdrv_close_all(void); diff --git a/block_int.h b/block_int.h index f2f4f2db38..9cb536d392 100644 --- a/block_int.h +++ b/block_int.h @@ -83,6 +83,7 @@ struct BlockDriver { int64_t sector_num, int nb_sectors, QEMUIOVector *qiov); int coroutine_fn (*bdrv_co_writev)(BlockDriverState *bs, int64_t sector_num, int nb_sectors, QEMUIOVector *qiov); + int coroutine_fn (*bdrv_co_flush)(BlockDriverState *bs); int (*bdrv_aio_multiwrite)(BlockDriverState *bs, BlockRequest *reqs, int num_reqs); From 6f6dc6565e2b65ec8c0cf47622b4f5f1468d194e Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Mon, 17 Oct 2011 12:32:13 +0200 Subject: [PATCH 07/19] block: drop redundant bdrv_flush implementation Block drivers now only need to provide either of .bdrv_co_flush, .bdrv_aio_flush() or for legacy drivers .bdrv_flush(). Remove the redundant .bdrv_flush() implementations. [Paolo Bonzini: change raw driver to bdrv_co_flush] Signed-off-by: Stefan Hajnoczi Signed-off-by: Paolo Bonzini Signed-off-by: Kevin Wolf --- block/blkdebug.c | 6 ------ block/blkverify.c | 9 --------- block/qcow.c | 6 ------ block/qcow2.c | 19 ------------------- block/qed.c | 6 ------ block/raw-posix.c | 18 ------------------ block/raw.c | 13 +++---------- 7 files changed, 3 insertions(+), 74 deletions(-) diff --git a/block/blkdebug.c b/block/blkdebug.c index b3c5d42cef..9b885359e4 100644 --- a/block/blkdebug.c +++ b/block/blkdebug.c @@ -397,11 +397,6 @@ static void blkdebug_close(BlockDriverState *bs) } } -static int blkdebug_flush(BlockDriverState *bs) -{ - return bdrv_flush(bs->file); -} - static BlockDriverAIOCB *blkdebug_aio_flush(BlockDriverState *bs, BlockDriverCompletionFunc *cb, void *opaque) { @@ -454,7 +449,6 @@ static BlockDriver bdrv_blkdebug = { .bdrv_file_open = blkdebug_open, .bdrv_close = blkdebug_close, - .bdrv_flush = blkdebug_flush, .bdrv_aio_readv = blkdebug_aio_readv, .bdrv_aio_writev = blkdebug_aio_writev, diff --git a/block/blkverify.c b/block/blkverify.c index c7522b4093..483f3b3cfe 100644 --- a/block/blkverify.c +++ b/block/blkverify.c @@ -116,14 +116,6 @@ static void blkverify_close(BlockDriverState *bs) s->test_file = NULL; } -static int blkverify_flush(BlockDriverState *bs) -{ - BDRVBlkverifyState *s = bs->opaque; - - /* Only flush test file, the raw file is not important */ - return bdrv_flush(s->test_file); -} - static int64_t blkverify_getlength(BlockDriverState *bs) { BDRVBlkverifyState *s = bs->opaque; @@ -368,7 +360,6 @@ static BlockDriver bdrv_blkverify = { .bdrv_file_open = blkverify_open, .bdrv_close = blkverify_close, - .bdrv_flush = blkverify_flush, .bdrv_aio_readv = blkverify_aio_readv, .bdrv_aio_writev = blkverify_aio_writev, diff --git a/block/qcow.c b/block/qcow.c index eba5a04c44..f93e3eb0d9 100644 --- a/block/qcow.c +++ b/block/qcow.c @@ -781,11 +781,6 @@ static int qcow_write_compressed(BlockDriverState *bs, int64_t sector_num, return 0; } -static int qcow_flush(BlockDriverState *bs) -{ - return bdrv_flush(bs->file); -} - static BlockDriverAIOCB *qcow_aio_flush(BlockDriverState *bs, BlockDriverCompletionFunc *cb, void *opaque) { @@ -826,7 +821,6 @@ static BlockDriver bdrv_qcow = { .bdrv_open = qcow_open, .bdrv_close = qcow_close, .bdrv_create = qcow_create, - .bdrv_flush = qcow_flush, .bdrv_is_allocated = qcow_is_allocated, .bdrv_set_key = qcow_set_key, .bdrv_make_empty = qcow_make_empty, diff --git a/block/qcow2.c b/block/qcow2.c index 510ff6897f..4dc980c2c5 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -1092,24 +1092,6 @@ static int qcow2_write_compressed(BlockDriverState *bs, int64_t sector_num, return 0; } -static int qcow2_flush(BlockDriverState *bs) -{ - BDRVQcowState *s = bs->opaque; - int ret; - - ret = qcow2_cache_flush(bs, s->l2_table_cache); - if (ret < 0) { - return ret; - } - - ret = qcow2_cache_flush(bs, s->refcount_block_cache); - if (ret < 0) { - return ret; - } - - return bdrv_flush(bs->file); -} - static BlockDriverAIOCB *qcow2_aio_flush(BlockDriverState *bs, BlockDriverCompletionFunc *cb, void *opaque) @@ -1242,7 +1224,6 @@ static BlockDriver bdrv_qcow2 = { .bdrv_open = qcow2_open, .bdrv_close = qcow2_close, .bdrv_create = qcow2_create, - .bdrv_flush = qcow2_flush, .bdrv_is_allocated = qcow2_is_allocated, .bdrv_set_key = qcow2_set_key, .bdrv_make_empty = qcow2_make_empty, diff --git a/block/qed.c b/block/qed.c index e87dc4decf..2e06992784 100644 --- a/block/qed.c +++ b/block/qed.c @@ -533,11 +533,6 @@ static void bdrv_qed_close(BlockDriverState *bs) qemu_vfree(s->l1_table); } -static int bdrv_qed_flush(BlockDriverState *bs) -{ - return bdrv_flush(bs->file); -} - static int qed_create(const char *filename, uint32_t cluster_size, uint64_t image_size, uint32_t table_size, const char *backing_file, const char *backing_fmt) @@ -1479,7 +1474,6 @@ static BlockDriver bdrv_qed = { .bdrv_open = bdrv_qed_open, .bdrv_close = bdrv_qed_close, .bdrv_create = bdrv_qed_create, - .bdrv_flush = bdrv_qed_flush, .bdrv_is_allocated = bdrv_qed_is_allocated, .bdrv_make_empty = bdrv_qed_make_empty, .bdrv_aio_readv = bdrv_qed_aio_readv, diff --git a/block/raw-posix.c b/block/raw-posix.c index c7f5544edd..afcb4c192c 100644 --- a/block/raw-posix.c +++ b/block/raw-posix.c @@ -583,19 +583,6 @@ static int raw_create(const char *filename, QEMUOptionParameter *options) return result; } -static int raw_flush(BlockDriverState *bs) -{ - BDRVRawState *s = bs->opaque; - int ret; - - ret = qemu_fdatasync(s->fd); - if (ret < 0) { - return -errno; - } - - return 0; -} - #ifdef CONFIG_XFS static int xfs_discard(BDRVRawState *s, int64_t sector_num, int nb_sectors) { @@ -645,7 +632,6 @@ static BlockDriver bdrv_file = { .bdrv_file_open = raw_open, .bdrv_close = raw_close, .bdrv_create = raw_create, - .bdrv_flush = raw_flush, .bdrv_discard = raw_discard, .bdrv_aio_readv = raw_aio_readv, @@ -915,7 +901,6 @@ static BlockDriver bdrv_host_device = { .bdrv_create = hdev_create, .create_options = raw_create_options, .bdrv_has_zero_init = hdev_has_zero_init, - .bdrv_flush = raw_flush, .bdrv_aio_readv = raw_aio_readv, .bdrv_aio_writev = raw_aio_writev, @@ -1035,7 +1020,6 @@ static BlockDriver bdrv_host_floppy = { .bdrv_create = hdev_create, .create_options = raw_create_options, .bdrv_has_zero_init = hdev_has_zero_init, - .bdrv_flush = raw_flush, .bdrv_aio_readv = raw_aio_readv, .bdrv_aio_writev = raw_aio_writev, @@ -1135,7 +1119,6 @@ static BlockDriver bdrv_host_cdrom = { .bdrv_create = hdev_create, .create_options = raw_create_options, .bdrv_has_zero_init = hdev_has_zero_init, - .bdrv_flush = raw_flush, .bdrv_aio_readv = raw_aio_readv, .bdrv_aio_writev = raw_aio_writev, @@ -1255,7 +1238,6 @@ static BlockDriver bdrv_host_cdrom = { .bdrv_create = hdev_create, .create_options = raw_create_options, .bdrv_has_zero_init = hdev_has_zero_init, - .bdrv_flush = raw_flush, .bdrv_aio_readv = raw_aio_readv, .bdrv_aio_writev = raw_aio_writev, diff --git a/block/raw.c b/block/raw.c index 5ca606b68a..39a3a9afae 100644 --- a/block/raw.c +++ b/block/raw.c @@ -25,15 +25,9 @@ static void raw_close(BlockDriverState *bs) { } -static int raw_flush(BlockDriverState *bs) +static int coroutine_fn raw_co_flush(BlockDriverState *bs) { - return bdrv_flush(bs->file); -} - -static BlockDriverAIOCB *raw_aio_flush(BlockDriverState *bs, - BlockDriverCompletionFunc *cb, void *opaque) -{ - return bdrv_aio_flush(bs->file, cb, opaque); + return bdrv_co_flush(bs->file); } static int64_t raw_getlength(BlockDriverState *bs) @@ -117,12 +111,11 @@ static BlockDriver bdrv_raw = { .bdrv_close = raw_close, .bdrv_co_readv = raw_co_readv, .bdrv_co_writev = raw_co_writev, - .bdrv_flush = raw_flush, + .bdrv_co_flush = raw_co_flush, .bdrv_probe = raw_probe, .bdrv_getlength = raw_getlength, .bdrv_truncate = raw_truncate, - .bdrv_aio_flush = raw_aio_flush, .bdrv_discard = raw_discard, .bdrv_is_inserted = raw_is_inserted, From 4265d620c5a91b58c757ce76fa2b0fff86fa1df1 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 17 Oct 2011 12:32:14 +0200 Subject: [PATCH 08/19] block: add bdrv_co_discard and bdrv_aio_discard support This similarly adds support for coroutine and asynchronous discard. Signed-off-by: Paolo Bonzini Signed-off-by: Kevin Wolf --- block.c | 102 +++++++++++++++++++++++++++++++++++++++++++++------ block.h | 4 ++ block/raw.c | 10 +++-- block_int.h | 9 ++++- trace-events | 1 + 5 files changed, 109 insertions(+), 17 deletions(-) diff --git a/block.c b/block.c index 7b8b14d971..28508f21d1 100644 --- a/block.c +++ b/block.c @@ -1768,17 +1768,6 @@ int bdrv_has_zero_init(BlockDriverState *bs) return 1; } -int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors) -{ - if (!bs->drv) { - return -ENOMEDIUM; - } - if (!bs->drv->bdrv_discard) { - return 0; - } - return bs->drv->bdrv_discard(bs, sector_num, nb_sectors); -} - /* * Returns true iff the specified sector is present in the disk image. Drivers * not implementing the functionality are assumed to not support backing files, @@ -2754,6 +2743,34 @@ BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs, return &acb->common; } +static void coroutine_fn bdrv_aio_discard_co_entry(void *opaque) +{ + BlockDriverAIOCBCoroutine *acb = opaque; + BlockDriverState *bs = acb->common.bs; + + acb->req.error = bdrv_co_discard(bs, acb->req.sector, acb->req.nb_sectors); + acb->bh = qemu_bh_new(bdrv_co_em_bh, acb); + qemu_bh_schedule(acb->bh); +} + +BlockDriverAIOCB *bdrv_aio_discard(BlockDriverState *bs, + int64_t sector_num, int nb_sectors, + BlockDriverCompletionFunc *cb, void *opaque) +{ + Coroutine *co; + BlockDriverAIOCBCoroutine *acb; + + trace_bdrv_aio_discard(bs, sector_num, nb_sectors, opaque); + + acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque); + acb->req.sector = sector_num; + acb->req.nb_sectors = nb_sectors; + co = qemu_coroutine_create(bdrv_aio_discard_co_entry); + qemu_coroutine_enter(co, acb); + + return &acb->common; +} + void bdrv_init(void) { module_call_init(MODULE_INIT_BLOCK); @@ -2915,6 +2932,69 @@ int bdrv_flush(BlockDriverState *bs) return rwco.ret; } +static void coroutine_fn bdrv_discard_co_entry(void *opaque) +{ + RwCo *rwco = opaque; + + rwco->ret = bdrv_co_discard(rwco->bs, rwco->sector_num, rwco->nb_sectors); +} + +int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num, + int nb_sectors) +{ + if (!bs->drv) { + return -ENOMEDIUM; + } else if (bdrv_check_request(bs, sector_num, nb_sectors)) { + return -EIO; + } else if (bs->read_only) { + return -EROFS; + } else if (bs->drv->bdrv_co_discard) { + return bs->drv->bdrv_co_discard(bs, sector_num, nb_sectors); + } else if (bs->drv->bdrv_aio_discard) { + BlockDriverAIOCB *acb; + CoroutineIOCompletion co = { + .coroutine = qemu_coroutine_self(), + }; + + acb = bs->drv->bdrv_aio_discard(bs, sector_num, nb_sectors, + bdrv_co_io_em_complete, &co); + if (acb == NULL) { + return -EIO; + } else { + qemu_coroutine_yield(); + return co.ret; + } + } else if (bs->drv->bdrv_discard) { + return bs->drv->bdrv_discard(bs, sector_num, nb_sectors); + } else { + return 0; + } +} + +int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors) +{ + Coroutine *co; + RwCo rwco = { + .bs = bs, + .sector_num = sector_num, + .nb_sectors = nb_sectors, + .ret = NOT_DONE, + }; + + if (qemu_in_coroutine()) { + /* Fast-path if already in coroutine context */ + bdrv_discard_co_entry(&rwco); + } else { + co = qemu_coroutine_create(bdrv_discard_co_entry); + qemu_coroutine_enter(co, &rwco); + while (rwco.ret == NOT_DONE) { + qemu_aio_wait(); + } + } + + return rwco.ret; +} + /**************************************************************/ /* removable device support */ diff --git a/block.h b/block.h index 65c51663fa..5a042c96e7 100644 --- a/block.h +++ b/block.h @@ -166,6 +166,9 @@ BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num, BlockDriverCompletionFunc *cb, void *opaque); BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs, BlockDriverCompletionFunc *cb, void *opaque); +BlockDriverAIOCB *bdrv_aio_discard(BlockDriverState *bs, + int64_t sector_num, int nb_sectors, + BlockDriverCompletionFunc *cb, void *opaque); void bdrv_aio_cancel(BlockDriverAIOCB *acb); typedef struct BlockRequest { @@ -196,6 +199,7 @@ void bdrv_flush_all(void); void bdrv_close_all(void); int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors); +int bdrv_co_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors); int bdrv_has_zero_init(BlockDriverState *bs); int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors, int *pnum); diff --git a/block/raw.c b/block/raw.c index 39a3a9afae..33cc4716d3 100644 --- a/block/raw.c +++ b/block/raw.c @@ -45,9 +45,10 @@ static int raw_probe(const uint8_t *buf, int buf_size, const char *filename) return 1; /* everything can be opened as raw image */ } -static int raw_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors) +static int coroutine_fn raw_co_discard(BlockDriverState *bs, + int64_t sector_num, int nb_sectors) { - return bdrv_discard(bs->file, sector_num, nb_sectors); + return bdrv_co_discard(bs->file, sector_num, nb_sectors); } static int raw_is_inserted(BlockDriverState *bs) @@ -109,15 +110,16 @@ static BlockDriver bdrv_raw = { .bdrv_open = raw_open, .bdrv_close = raw_close, + .bdrv_co_readv = raw_co_readv, .bdrv_co_writev = raw_co_writev, .bdrv_co_flush = raw_co_flush, + .bdrv_co_discard = raw_co_discard, + .bdrv_probe = raw_probe, .bdrv_getlength = raw_getlength, .bdrv_truncate = raw_truncate, - .bdrv_discard = raw_discard, - .bdrv_is_inserted = raw_is_inserted, .bdrv_media_changed = raw_media_changed, .bdrv_eject = raw_eject, diff --git a/block_int.h b/block_int.h index 9cb536d392..384598ff02 100644 --- a/block_int.h +++ b/block_int.h @@ -63,6 +63,8 @@ struct BlockDriver { void (*bdrv_close)(BlockDriverState *bs); int (*bdrv_create)(const char *filename, QEMUOptionParameter *options); int (*bdrv_flush)(BlockDriverState *bs); + int (*bdrv_discard)(BlockDriverState *bs, int64_t sector_num, + int nb_sectors); int (*bdrv_is_allocated)(BlockDriverState *bs, int64_t sector_num, int nb_sectors, int *pnum); int (*bdrv_set_key)(BlockDriverState *bs, const char *key); @@ -76,14 +78,17 @@ struct BlockDriver { BlockDriverCompletionFunc *cb, void *opaque); BlockDriverAIOCB *(*bdrv_aio_flush)(BlockDriverState *bs, BlockDriverCompletionFunc *cb, void *opaque); - int (*bdrv_discard)(BlockDriverState *bs, int64_t sector_num, - int nb_sectors); + BlockDriverAIOCB *(*bdrv_aio_discard)(BlockDriverState *bs, + int64_t sector_num, int nb_sectors, + BlockDriverCompletionFunc *cb, void *opaque); int coroutine_fn (*bdrv_co_readv)(BlockDriverState *bs, int64_t sector_num, int nb_sectors, QEMUIOVector *qiov); int coroutine_fn (*bdrv_co_writev)(BlockDriverState *bs, int64_t sector_num, int nb_sectors, QEMUIOVector *qiov); int coroutine_fn (*bdrv_co_flush)(BlockDriverState *bs); + int coroutine_fn (*bdrv_co_discard)(BlockDriverState *bs, + int64_t sector_num, int nb_sectors); int (*bdrv_aio_multiwrite)(BlockDriverState *bs, BlockRequest *reqs, int num_reqs); diff --git a/trace-events b/trace-events index 7f9cec4071..49ac1c170b 100644 --- a/trace-events +++ b/trace-events @@ -61,6 +61,7 @@ multiwrite_cb(void *mcb, int ret) "mcb %p ret %d" bdrv_aio_multiwrite(void *mcb, int num_callbacks, int num_reqs) "mcb %p num_callbacks %d num_reqs %d" bdrv_aio_multiwrite_earlyfail(void *mcb) "mcb %p" bdrv_aio_multiwrite_latefail(void *mcb, int i) "mcb %p i %d" +bdrv_aio_discard(void *bs, int64_t sector_num, int nb_sectors, void *opaque) "bs %p sector_num %"PRId64" nb_sectors %d opaque %p" bdrv_aio_flush(void *bs, void *opaque) "bs %p opaque %p" bdrv_aio_readv(void *bs, int64_t sector_num, int nb_sectors, void *opaque) "bs %p sector_num %"PRId64" nb_sectors %d opaque %p" bdrv_aio_writev(void *bs, int64_t sector_num, int nb_sectors, void *opaque) "bs %p sector_num %"PRId64" nb_sectors %d opaque %p" From a18e67f5f9dc1e5e9a293a946666e2e74daa1c91 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Tue, 18 Oct 2011 16:41:45 +0200 Subject: [PATCH 09/19] fdc: Fix floppy port I/O The floppy device was broken by commit 212ec7ba (fdc: Convert to isa_register_portio_list). While the old interface provided the port number relative to the floppy drive's io_base, the new one provides the real port number, so we need to apply a bitmask now to get the register number. Signed-off-by: Kevin Wolf --- hw/fdc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/hw/fdc.c b/hw/fdc.c index 4b06e049e8..f8af2de2ce 100644 --- a/hw/fdc.c +++ b/hw/fdc.c @@ -434,6 +434,7 @@ static uint32_t fdctrl_read (void *opaque, uint32_t reg) FDCtrl *fdctrl = opaque; uint32_t retval; + reg &= 7; switch (reg) { case FD_REG_SRA: retval = fdctrl_read_statusA(fdctrl); @@ -471,6 +472,7 @@ static void fdctrl_write (void *opaque, uint32_t reg, uint32_t value) FLOPPY_DPRINTF("write reg%d: 0x%02x\n", reg & 7, value); + reg &= 7; switch (reg) { case FD_REG_DOR: fdctrl_write_dor(fdctrl, value); From 41521fa4a60344eaad433db1ab4597ba6bf78f69 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Tue, 18 Oct 2011 16:19:42 +0200 Subject: [PATCH 10/19] qemu-img: Don't allow preallocation and compression at the same time Only qcow and qcow2 can do compression at all, and they require unallocated clusters when writing the compressed data. Signed-off-by: Kevin Wolf Reviewed-by: Stefan Hajnoczi --- qemu-img.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/qemu-img.c b/qemu-img.c index 6a3973163f..86127f0b11 100644 --- a/qemu-img.c +++ b/qemu-img.c @@ -824,6 +824,8 @@ static int img_convert(int argc, char **argv) if (compress) { QEMUOptionParameter *encryption = get_option_parameter(param, BLOCK_OPT_ENCRYPT); + QEMUOptionParameter *preallocation = + get_option_parameter(param, BLOCK_OPT_PREALLOC); if (!drv->bdrv_write_compressed) { error_report("Compression not supported for this file format"); @@ -837,6 +839,15 @@ static int img_convert(int argc, char **argv) ret = -1; goto out; } + + if (preallocation && preallocation->value.s + && strcmp(preallocation->value.s, "off")) + { + error_report("Compression and preallocation not supported at " + "the same time"); + ret = -1; + goto out; + } } /* Create the new image */ From 8f1efd00c4b2aa2b75fd20b5ee592ed47d33d5a7 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Tue, 18 Oct 2011 17:12:44 +0200 Subject: [PATCH 11/19] qcow2: Fix bdrv_write_compressed error handling If during allocation of compressed clusters the cluster was already allocated uncompressed, fail and properly release the l2_table (the latter avoids a failed assertion). While at it, make it return some real error numbers instead of -1. Signed-off-by: Kevin Wolf Reviewed-by: Dong Xu Wang --- block/qcow2-cluster.c | 6 ++++-- block/qcow2.c | 29 ++++++++++++++++++----------- 2 files changed, 22 insertions(+), 13 deletions(-) diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c index 2f76311354..f4e049fa90 100644 --- a/block/qcow2-cluster.c +++ b/block/qcow2-cluster.c @@ -568,8 +568,10 @@ uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs, } cluster_offset = be64_to_cpu(l2_table[l2_index]); - if (cluster_offset & QCOW_OFLAG_COPIED) - return cluster_offset & ~QCOW_OFLAG_COPIED; + if (cluster_offset & QCOW_OFLAG_COPIED) { + qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table); + return 0; + } if (cluster_offset) qcow2_free_any_clusters(bs, cluster_offset, 1); diff --git a/block/qcow2.c b/block/qcow2.c index 4dc980c2c5..91f4f04dd7 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -1053,8 +1053,8 @@ static int qcow2_write_compressed(BlockDriverState *bs, int64_t sector_num, Z_DEFLATED, -12, 9, Z_DEFAULT_STRATEGY); if (ret != 0) { - g_free(out_buf); - return -1; + ret = -EINVAL; + goto fail; } strm.avail_in = s->cluster_size; @@ -1064,9 +1064,9 @@ static int qcow2_write_compressed(BlockDriverState *bs, int64_t sector_num, ret = deflate(&strm, Z_FINISH); if (ret != Z_STREAM_END && ret != Z_OK) { - g_free(out_buf); deflateEnd(&strm); - return -1; + ret = -EINVAL; + goto fail; } out_len = strm.next_out - out_buf; @@ -1074,22 +1074,29 @@ static int qcow2_write_compressed(BlockDriverState *bs, int64_t sector_num, if (ret != Z_STREAM_END || out_len >= s->cluster_size) { /* could not compress: write normal cluster */ - bdrv_write(bs, sector_num, buf, s->cluster_sectors); + ret = bdrv_write(bs, sector_num, buf, s->cluster_sectors); + if (ret < 0) { + goto fail; + } } else { cluster_offset = qcow2_alloc_compressed_cluster_offset(bs, sector_num << 9, out_len); - if (!cluster_offset) - return -1; + if (!cluster_offset) { + ret = -EIO; + goto fail; + } cluster_offset &= s->cluster_offset_mask; BLKDBG_EVENT(bs->file, BLKDBG_WRITE_COMPRESSED); - if (bdrv_pwrite(bs->file, cluster_offset, out_buf, out_len) != out_len) { - g_free(out_buf); - return -1; + ret = bdrv_pwrite(bs->file, cluster_offset, out_buf, out_len); + if (ret < 0) { + goto fail; } } + ret = 0; +fail: g_free(out_buf); - return 0; + return ret; } static BlockDriverAIOCB *qcow2_aio_flush(BlockDriverState *bs, From 34d4260e1846d69d7241f690534e3dd4b3e6fd5b Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Thu, 20 Oct 2011 16:37:26 +0200 Subject: [PATCH 12/19] pc: Fix floppy drives with if=none Commit 63ffb564 broke floppy devices specified on the command line like -drive file=...,if=none,id=floppy -global isa-fdc.driveA=floppy because it relies on drive_get() which works only with -fda/-drive if=floppy. This patch resembles what we're already doing for IDE, i.e. remember the floppy device that was created and use that to extract the BlockDriverStates where needed. Signed-off-by: Kevin Wolf Reviewed-by: Markus Armbruster --- hw/fdc.c | 12 ++++++++++++ hw/fdc.h | 9 +++++++-- hw/pc.c | 25 ++++++++++++++----------- hw/pc.h | 3 ++- hw/pc_piix.c | 5 +++-- 5 files changed, 38 insertions(+), 16 deletions(-) diff --git a/hw/fdc.c b/hw/fdc.c index f8af2de2ce..ecaad0965c 100644 --- a/hw/fdc.c +++ b/hw/fdc.c @@ -1947,6 +1947,18 @@ static int sun4m_fdc_init1(SysBusDevice *dev) return fdctrl_init_common(fdctrl); } +void fdc_get_bs(BlockDriverState *bs[], ISADevice *dev) +{ + FDCtrlISABus *isa = DO_UPCAST(FDCtrlISABus, busdev, dev); + FDCtrl *fdctrl = &isa->state; + int i; + + for (i = 0; i < MAX_FD; i++) { + bs[i] = fdctrl->drives[i].bs; + } +} + + static const VMStateDescription vmstate_isa_fdc ={ .name = "fdc", .version_id = 2, diff --git a/hw/fdc.h b/hw/fdc.h index 09f73c6a9f..506feb6557 100644 --- a/hw/fdc.h +++ b/hw/fdc.h @@ -7,14 +7,15 @@ /* fdc.c */ #define MAX_FD 2 -static inline void fdctrl_init_isa(DriveInfo **fds) +static inline ISADevice *fdctrl_init_isa(DriveInfo **fds) { ISADevice *dev; dev = isa_try_create("isa-fdc"); if (!dev) { - return; + return NULL; } + if (fds[0]) { qdev_prop_set_drive_nofail(&dev->qdev, "driveA", fds[0]->bdrv); } @@ -22,10 +23,14 @@ static inline void fdctrl_init_isa(DriveInfo **fds) qdev_prop_set_drive_nofail(&dev->qdev, "driveB", fds[1]->bdrv); } qdev_init_nofail(&dev->qdev); + + return dev; } void fdctrl_init_sysbus(qemu_irq irq, int dma_chann, target_phys_addr_t mmio_base, DriveInfo **fds); void sun4m_fdctrl_init(qemu_irq irq, target_phys_addr_t io_base, DriveInfo **fds, qemu_irq *fdc_tc); +void fdc_get_bs(BlockDriverState *bs[], ISADevice *dev); + #endif diff --git a/hw/pc.c b/hw/pc.c index f0802b7097..eb4c2d8770 100644 --- a/hw/pc.c +++ b/hw/pc.c @@ -331,12 +331,12 @@ static void pc_cmos_init_late(void *opaque) void pc_cmos_init(ram_addr_t ram_size, ram_addr_t above_4g_mem_size, const char *boot_device, - BusState *idebus0, BusState *idebus1, + ISADevice *floppy, BusState *idebus0, BusState *idebus1, ISADevice *s) { int val, nb, nb_heads, max_track, last_sect, i; FDriveType fd_type[2]; - DriveInfo *fd[2]; + BlockDriverState *fd[MAX_FD]; static pc_cmos_init_late_arg arg; /* various important CMOS locations needed by PC/Bochs bios */ @@ -378,14 +378,16 @@ void pc_cmos_init(ram_addr_t ram_size, ram_addr_t above_4g_mem_size, } /* floppy type */ - for (i = 0; i < 2; i++) { - fd[i] = drive_get(IF_FLOPPY, 0, i); - if (fd[i] && bdrv_is_inserted(fd[i]->bdrv)) { - bdrv_get_floppy_geometry_hint(fd[i]->bdrv, &nb_heads, &max_track, - &last_sect, FDRIVE_DRV_NONE, - &fd_type[i]); - } else { - fd_type[i] = FDRIVE_DRV_NONE; + if (floppy) { + fdc_get_bs(fd, floppy); + for (i = 0; i < 2; i++) { + if (fd[i] && bdrv_is_inserted(fd[i])) { + bdrv_get_floppy_geometry_hint(fd[i], &nb_heads, &max_track, + &last_sect, FDRIVE_DRV_NONE, + &fd_type[i]); + } else { + fd_type[i] = FDRIVE_DRV_NONE; + } } } val = (cmos_get_fd_drive_type(fd_type[0]) << 4) | @@ -1124,6 +1126,7 @@ static void cpu_request_exit(void *opaque, int irq, int level) void pc_basic_device_init(qemu_irq *gsi, ISADevice **rtc_state, + ISADevice **floppy, bool no_vmport) { int i; @@ -1188,7 +1191,7 @@ void pc_basic_device_init(qemu_irq *gsi, for(i = 0; i < MAX_FD; i++) { fd[i] = drive_get(IF_FLOPPY, 0, i); } - fdctrl_init_isa(fd); + *floppy = fdctrl_init_isa(fd); } void pc_pci_device_init(PCIBus *pci_bus) diff --git a/hw/pc.h b/hw/pc.h index b8ad9a3c88..4515006381 100644 --- a/hw/pc.h +++ b/hw/pc.h @@ -142,11 +142,12 @@ qemu_irq *pc_allocate_cpu_irq(void); void pc_vga_init(PCIBus *pci_bus); void pc_basic_device_init(qemu_irq *gsi, ISADevice **rtc_state, + ISADevice **floppy, bool no_vmport); void pc_init_ne2k_isa(NICInfo *nd); void pc_cmos_init(ram_addr_t ram_size, ram_addr_t above_4g_mem_size, const char *boot_device, - BusState *ide0, BusState *ide1, + ISADevice *floppy, BusState *ide0, BusState *ide1, ISADevice *s); void pc_pci_device_init(PCIBus *pci_bus); diff --git a/hw/pc_piix.c b/hw/pc_piix.c index c89042f1ce..8c7f2b7337 100644 --- a/hw/pc_piix.c +++ b/hw/pc_piix.c @@ -95,6 +95,7 @@ static void pc_init1(MemoryRegion *system_memory, DriveInfo *hd[MAX_IDE_BUS * MAX_IDE_DEVS]; BusState *idebus[MAX_IDE_BUS]; ISADevice *rtc_state; + ISADevice *floppy; MemoryRegion *ram_memory; MemoryRegion *pci_memory; MemoryRegion *rom_memory; @@ -174,7 +175,7 @@ static void pc_init1(MemoryRegion *system_memory, } /* init basic PC hardware */ - pc_basic_device_init(gsi, &rtc_state, xen_enabled()); + pc_basic_device_init(gsi, &rtc_state, &floppy, xen_enabled()); for(i = 0; i < nb_nics; i++) { NICInfo *nd = &nd_table[i]; @@ -207,7 +208,7 @@ static void pc_init1(MemoryRegion *system_memory, audio_init(gsi, pci_enabled ? pci_bus : NULL); pc_cmos_init(below_4g_mem_size, above_4g_mem_size, boot_device, - idebus[0], idebus[1], rtc_state); + floppy, idebus[0], idebus[1], rtc_state); if (pci_enabled && usb_enabled) { usb_uhci_piix3_init(pci_bus, piix3_devfn + 2); From 588b65a37abf7bbe47c3a7243a871d83fa1aa66b Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 20 Oct 2011 13:16:19 +0200 Subject: [PATCH 13/19] vmdk: fix return values of vmdk_parent_open While vmdk_open_desc_file (touched by the patch) correctly changed -1 to -EINVAL, vmdk_open did not. Fix it directly in vmdk_parent_open. Signed-off-by: Paolo Bonzini Signed-off-by: Kevin Wolf --- block/vmdk.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/block/vmdk.c b/block/vmdk.c index 5d16ec49bc..ea00938755 100644 --- a/block/vmdk.c +++ b/block/vmdk.c @@ -283,10 +283,12 @@ static int vmdk_parent_open(BlockDriverState *bs) char *p_name; char desc[DESC_SIZE + 1]; BDRVVmdkState *s = bs->opaque; + int ret; desc[DESC_SIZE] = '\0'; - if (bdrv_pread(bs->file, s->desc_offset, desc, DESC_SIZE) != DESC_SIZE) { - return -1; + ret = bdrv_pread(bs->file, s->desc_offset, desc, DESC_SIZE); + if (ret < 0) { + return ret; } p_name = strstr(desc, "parentFileNameHint"); @@ -296,10 +298,10 @@ static int vmdk_parent_open(BlockDriverState *bs) p_name += sizeof("parentFileNameHint") + 1; end_name = strchr(p_name, '\"'); if (end_name == NULL) { - return -1; + return -EINVAL; } if ((end_name - p_name) > sizeof(bs->backing_file) - 1) { - return -1; + return -EINVAL; } pstrcpy(bs->backing_file, end_name - p_name + 1, p_name); @@ -629,9 +631,10 @@ static int vmdk_open_desc_file(BlockDriverState *bs, int flags, } /* try to open parent images, if exist */ - if (vmdk_parent_open(bs)) { + ret = vmdk_parent_open(bs); + if (ret) { vmdk_free_extents(bs); - return -EINVAL; + return ret; } s->parent_cid = vmdk_read_cid(bs, 1); return 0; From bae0a0cc38d324c83ba737b92215f3447981d73b Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 20 Oct 2011 13:16:20 +0200 Subject: [PATCH 14/19] vmdk: clean up open Move vmdk_parent_open to vmdk_open. There's another path how vmdk_parent_open can be reached: vmdk_parse_extents() -> vmdk_open_sparse() -> vmdk_open_vmdk4() -> vmdk_open_desc_file(). If that can happen, however, the code is bogus. vmdk_parent_open reads from bs->file: if (bdrv_pread(bs->file, s->desc_offset, desc, DESC_SIZE) != DESC_SIZE) { but it is always called with s->desc_offset == 0 and with the same bs->file. So the data that vmdk_parent_open reads comes always from the same place, and anyway there is only one place where it can write it, namely bs->backing_file. So, if it cannot happen, the patched code is okay. It is also possible that the recursive call can happen, but only once. In that case there would still be a bug in vmdk_open_desc_file setting s->desc_offset = 0, but the patched code is okay. Finally, in the case where multiple recursive calls can happen the code would need to be rewritten anyway. It is likely that this would anyway involve adding several parameters to vmdk_parent_open, and calling it from vmdk_open_vmdk4. Signed-off-by: Paolo Bonzini Signed-off-by: Kevin Wolf --- block/vmdk.c | 39 ++++++++++++++++----------------------- 1 file changed, 16 insertions(+), 23 deletions(-) diff --git a/block/vmdk.c b/block/vmdk.c index ea00938755..ace29774f4 100644 --- a/block/vmdk.c +++ b/block/vmdk.c @@ -624,20 +624,7 @@ static int vmdk_open_desc_file(BlockDriverState *bs, int flags, return -ENOTSUP; } s->desc_offset = 0; - ret = vmdk_parse_extents(buf, bs, bs->file->filename); - if (ret) { - vmdk_free_extents(bs); - return ret; - } - - /* try to open parent images, if exist */ - ret = vmdk_parent_open(bs); - if (ret) { - vmdk_free_extents(bs); - return ret; - } - s->parent_cid = vmdk_read_cid(bs, 1); - return 0; + return vmdk_parse_extents(buf, bs, bs->file->filename); } static int vmdk_open(BlockDriverState *bs, int flags) @@ -647,17 +634,23 @@ static int vmdk_open(BlockDriverState *bs, int flags) if (vmdk_open_sparse(bs, bs->file, flags) == 0) { s->desc_offset = 0x200; - /* try to open parent images, if exist */ - ret = vmdk_parent_open(bs); - if (ret) { - vmdk_free_extents(bs); - return ret; - } - s->parent_cid = vmdk_read_cid(bs, 1); - return 0; } else { - return vmdk_open_desc_file(bs, flags, 0); + ret = vmdk_open_desc_file(bs, flags, 0); + if (ret) { + goto fail; + } } + /* try to open parent images, if exist */ + ret = vmdk_parent_open(bs); + if (ret) { + goto fail; + } + s->parent_cid = vmdk_read_cid(bs, 1); + return ret; + +fail: + vmdk_free_extents(bs); + return ret; } static int get_whole_cluster(BlockDriverState *bs, From 848c66e8f5b631961580f7f010a5831430dc84c2 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 20 Oct 2011 13:16:21 +0200 Subject: [PATCH 15/19] block: add a CoMutex to synchronous read drivers The big conversion of bdrv_read/write to coroutines caused the two homonymous callbacks in BlockDriver to become reentrant. It goes like this: 1) bdrv_read is now called in a coroutine, and calls bdrv_read or bdrv_pread. 2) the nested bdrv_read goes through the fast path in bdrv_rw_co_entry; 3) in the common case when the protocol is file, bdrv_co_do_readv calls bdrv_co_readv_em (and from here goes to bdrv_co_io_em), which yields until the AIO operation is complete; 4) if bdrv_read had been called from a bottom half, the main loop is free to iterate again: a device model or another bottom half can then come and call bdrv_read again. This applies to all four of read/write/flush/discard. It would also apply to is_allocated, but it is not used from within coroutines: besides qemu-img.c and qemu-io.c, which operate synchronously, the only user is the monitor. Copy-on-read will introduce a use in the block layer, and will require converting it. The solution is "simply" to convert all drivers to coroutines! We just need to add a CoMutex that is taken around affected operations. Signed-off-by: Paolo Bonzini Signed-off-by: Kevin Wolf --- block/bochs.c | 2 ++ block/cloop.c | 2 ++ block/cow.c | 2 ++ block/dmg.c | 2 ++ block/nbd.c | 2 ++ block/parallels.c | 2 ++ block/vmdk.c | 2 ++ block/vpc.c | 2 ++ block/vvfat.c | 2 ++ 9 files changed, 18 insertions(+) diff --git a/block/bochs.c b/block/bochs.c index 3c2f8d1b12..b0f80729fe 100644 --- a/block/bochs.c +++ b/block/bochs.c @@ -80,6 +80,7 @@ struct bochs_header { }; typedef struct BDRVBochsState { + CoMutex lock; uint32_t *catalog_bitmap; int catalog_size; @@ -150,6 +151,7 @@ static int bochs_open(BlockDriverState *bs, int flags) s->extent_size = le32_to_cpu(bochs.extra.redolog.extent); + qemu_co_mutex_init(&s->lock); return 0; fail: return -1; diff --git a/block/cloop.c b/block/cloop.c index 8cff9f2cac..a91f372aec 100644 --- a/block/cloop.c +++ b/block/cloop.c @@ -27,6 +27,7 @@ #include typedef struct BDRVCloopState { + CoMutex lock; uint32_t block_size; uint32_t n_blocks; uint64_t* offsets; @@ -93,6 +94,7 @@ static int cloop_open(BlockDriverState *bs, int flags) s->sectors_per_block = s->block_size/512; bs->total_sectors = s->n_blocks*s->sectors_per_block; + qemu_co_mutex_init(&s->lock); return 0; cloop_close: diff --git a/block/cow.c b/block/cow.c index 4cf543c832..2f426e7427 100644 --- a/block/cow.c +++ b/block/cow.c @@ -42,6 +42,7 @@ struct cow_header_v2 { }; typedef struct BDRVCowState { + CoMutex lock; int64_t cow_sectors_offset; } BDRVCowState; @@ -84,6 +85,7 @@ static int cow_open(BlockDriverState *bs, int flags) bitmap_size = ((bs->total_sectors + 7) >> 3) + sizeof(cow_header); s->cow_sectors_offset = (bitmap_size + 511) & ~511; + qemu_co_mutex_init(&s->lock); return 0; fail: return -1; diff --git a/block/dmg.c b/block/dmg.c index 64c3cce46a..111aeaecd9 100644 --- a/block/dmg.c +++ b/block/dmg.c @@ -28,6 +28,7 @@ #include typedef struct BDRVDMGState { + CoMutex lock; /* each chunk contains a certain number of sectors, * offsets[i] is the offset in the .dmg file, * lengths[i] is the length of the compressed chunk, @@ -177,6 +178,7 @@ static int dmg_open(BlockDriverState *bs, int flags) s->current_chunk = s->n_chunks; + qemu_co_mutex_init(&s->lock); return 0; fail: return -1; diff --git a/block/nbd.c b/block/nbd.c index 76f04d863c..14ab2259c2 100644 --- a/block/nbd.c +++ b/block/nbd.c @@ -47,6 +47,7 @@ #endif typedef struct BDRVNBDState { + CoMutex lock; int sock; uint32_t nbdflags; off_t size; @@ -175,6 +176,7 @@ static int nbd_open(BlockDriverState *bs, const char* filename, int flags) */ result = nbd_establish_connection(bs); + qemu_co_mutex_init(&s->lock); return result; } diff --git a/block/parallels.c b/block/parallels.c index c64103ddbb..b86e87e48f 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -46,6 +46,7 @@ struct parallels_header { } QEMU_PACKED; typedef struct BDRVParallelsState { + CoMutex lock; uint32_t *catalog_bitmap; int catalog_size; @@ -95,6 +96,7 @@ static int parallels_open(BlockDriverState *bs, int flags) for (i = 0; i < s->catalog_size; i++) le32_to_cpus(&s->catalog_bitmap[i]); + qemu_co_mutex_init(&s->lock); return 0; fail: if (s->catalog_bitmap) diff --git a/block/vmdk.c b/block/vmdk.c index ace29774f4..1ce220d535 100644 --- a/block/vmdk.c +++ b/block/vmdk.c @@ -90,6 +90,7 @@ typedef struct VmdkExtent { } VmdkExtent; typedef struct BDRVVmdkState { + CoMutex lock; int desc_offset; bool cid_updated; uint32_t parent_cid; @@ -646,6 +647,7 @@ static int vmdk_open(BlockDriverState *bs, int flags) goto fail; } s->parent_cid = vmdk_read_cid(bs, 1); + qemu_co_mutex_init(&s->lock); return ret; fail: diff --git a/block/vpc.c b/block/vpc.c index cb6c570f44..9155ee15e3 100644 --- a/block/vpc.c +++ b/block/vpc.c @@ -110,6 +110,7 @@ struct vhd_dyndisk_header { }; typedef struct BDRVVPCState { + CoMutex lock; uint8_t footer_buf[HEADER_SIZE]; uint64_t free_data_block_offset; int max_table_entries; @@ -226,6 +227,7 @@ static int vpc_open(BlockDriverState *bs, int flags) s->last_pagetable = -1; #endif + qemu_co_mutex_init(&s->lock); return 0; fail: return err; diff --git a/block/vvfat.c b/block/vvfat.c index 7e9e35a3a3..864bfcf245 100644 --- a/block/vvfat.c +++ b/block/vvfat.c @@ -317,6 +317,7 @@ static void print_mapping(const struct mapping_t* mapping); /* here begins the real VVFAT driver */ typedef struct BDRVVVFATState { + CoMutex lock; BlockDriverState* bs; /* pointer to parent */ unsigned int first_sectors_number; /* 1 for a single partition, 0x40 for a disk with partition table */ unsigned char first_sectors[0x40*0x200]; @@ -1065,6 +1066,7 @@ DLOG(if (stderr == NULL) { bs->heads = bs->cyls = bs->secs = 0; // assert(is_consistent(s)); + qemu_co_mutex_init(&s->lock); return 0; } From 2914caa088e3fbbdbfd73106af0cae49af1d472e Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 20 Oct 2011 13:16:22 +0200 Subject: [PATCH 16/19] block: take lock around bdrv_read implementations This does the first part of the conversion to coroutines, by wrapping bdrv_read implementations to take the mutex. Drivers that implement bdrv_read rather than bdrv_co_readv can then benefit from asynchronous operation (at least if the underlying protocol supports it, which is not the case for raw-win32), even though they still operate with a bounce buffer. raw-win32 does not need the lock, because it cannot yield. nbd also doesn't probably, but better be safe. Signed-off-by: Paolo Bonzini Signed-off-by: Kevin Wolf --- block/bochs.c | 13 ++++++++++++- block/cloop.c | 13 ++++++++++++- block/cow.c | 13 ++++++++++++- block/dmg.c | 13 ++++++++++++- block/nbd.c | 13 ++++++++++++- block/parallels.c | 13 ++++++++++++- block/vmdk.c | 13 ++++++++++++- block/vpc.c | 13 ++++++++++++- block/vvfat.c | 13 ++++++++++++- 9 files changed, 108 insertions(+), 9 deletions(-) diff --git a/block/bochs.c b/block/bochs.c index b0f80729fe..ab7944dc43 100644 --- a/block/bochs.c +++ b/block/bochs.c @@ -209,6 +209,17 @@ static int bochs_read(BlockDriverState *bs, int64_t sector_num, return 0; } +static coroutine_fn int bochs_co_read(BlockDriverState *bs, int64_t sector_num, + uint8_t *buf, int nb_sectors) +{ + int ret; + BDRVBochsState *s = bs->opaque; + qemu_co_mutex_lock(&s->lock); + ret = bochs_read(bs, sector_num, buf, nb_sectors); + qemu_co_mutex_unlock(&s->lock); + return ret; +} + static void bochs_close(BlockDriverState *bs) { BDRVBochsState *s = bs->opaque; @@ -220,7 +231,7 @@ static BlockDriver bdrv_bochs = { .instance_size = sizeof(BDRVBochsState), .bdrv_probe = bochs_probe, .bdrv_open = bochs_open, - .bdrv_read = bochs_read, + .bdrv_read = bochs_co_read, .bdrv_close = bochs_close, }; diff --git a/block/cloop.c b/block/cloop.c index a91f372aec..775f8a98e1 100644 --- a/block/cloop.c +++ b/block/cloop.c @@ -146,6 +146,17 @@ static int cloop_read(BlockDriverState *bs, int64_t sector_num, return 0; } +static coroutine_fn int cloop_co_read(BlockDriverState *bs, int64_t sector_num, + uint8_t *buf, int nb_sectors) +{ + int ret; + BDRVCloopState *s = bs->opaque; + qemu_co_mutex_lock(&s->lock); + ret = cloop_read(bs, sector_num, buf, nb_sectors); + qemu_co_mutex_unlock(&s->lock); + return ret; +} + static void cloop_close(BlockDriverState *bs) { BDRVCloopState *s = bs->opaque; @@ -161,7 +172,7 @@ static BlockDriver bdrv_cloop = { .instance_size = sizeof(BDRVCloopState), .bdrv_probe = cloop_probe, .bdrv_open = cloop_open, - .bdrv_read = cloop_read, + .bdrv_read = cloop_co_read, .bdrv_close = cloop_close, }; diff --git a/block/cow.c b/block/cow.c index 2f426e7427..a5fcd20817 100644 --- a/block/cow.c +++ b/block/cow.c @@ -201,6 +201,17 @@ static int cow_read(BlockDriverState *bs, int64_t sector_num, return 0; } +static coroutine_fn int cow_co_read(BlockDriverState *bs, int64_t sector_num, + uint8_t *buf, int nb_sectors) +{ + int ret; + BDRVCowState *s = bs->opaque; + qemu_co_mutex_lock(&s->lock); + ret = cow_read(bs, sector_num, buf, nb_sectors); + qemu_co_mutex_unlock(&s->lock); + return ret; +} + static int cow_write(BlockDriverState *bs, int64_t sector_num, const uint8_t *buf, int nb_sectors) { @@ -308,7 +319,7 @@ static BlockDriver bdrv_cow = { .instance_size = sizeof(BDRVCowState), .bdrv_probe = cow_probe, .bdrv_open = cow_open, - .bdrv_read = cow_read, + .bdrv_read = cow_co_read, .bdrv_write = cow_write, .bdrv_close = cow_close, .bdrv_create = cow_create, diff --git a/block/dmg.c b/block/dmg.c index 111aeaecd9..37902a4347 100644 --- a/block/dmg.c +++ b/block/dmg.c @@ -282,6 +282,17 @@ static int dmg_read(BlockDriverState *bs, int64_t sector_num, return 0; } +static coroutine_fn int dmg_co_read(BlockDriverState *bs, int64_t sector_num, + uint8_t *buf, int nb_sectors) +{ + int ret; + BDRVDMGState *s = bs->opaque; + qemu_co_mutex_lock(&s->lock); + ret = dmg_read(bs, sector_num, buf, nb_sectors); + qemu_co_mutex_unlock(&s->lock); + return ret; +} + static void dmg_close(BlockDriverState *bs) { BDRVDMGState *s = bs->opaque; @@ -302,7 +313,7 @@ static BlockDriver bdrv_dmg = { .instance_size = sizeof(BDRVDMGState), .bdrv_probe = dmg_probe, .bdrv_open = dmg_open, - .bdrv_read = dmg_read, + .bdrv_read = dmg_co_read, .bdrv_close = dmg_close, }; diff --git a/block/nbd.c b/block/nbd.c index 14ab2259c2..6b22ae1d15 100644 --- a/block/nbd.c +++ b/block/nbd.c @@ -240,6 +240,17 @@ static int nbd_write(BlockDriverState *bs, int64_t sector_num, return 0; } +static coroutine_fn int nbd_co_read(BlockDriverState *bs, int64_t sector_num, + uint8_t *buf, int nb_sectors) +{ + int ret; + BDRVNBDState *s = bs->opaque; + qemu_co_mutex_lock(&s->lock); + ret = nbd_read(bs, sector_num, buf, nb_sectors); + qemu_co_mutex_unlock(&s->lock); + return ret; +} + static void nbd_close(BlockDriverState *bs) { BDRVNBDState *s = bs->opaque; @@ -260,7 +271,7 @@ static BlockDriver bdrv_nbd = { .format_name = "nbd", .instance_size = sizeof(BDRVNBDState), .bdrv_file_open = nbd_open, - .bdrv_read = nbd_read, + .bdrv_read = nbd_co_read, .bdrv_write = nbd_write, .bdrv_close = nbd_close, .bdrv_getlength = nbd_getlength, diff --git a/block/parallels.c b/block/parallels.c index b86e87e48f..d30f0ecf77 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -136,6 +136,17 @@ static int parallels_read(BlockDriverState *bs, int64_t sector_num, return 0; } +static coroutine_fn int parallels_co_read(BlockDriverState *bs, int64_t sector_num, + uint8_t *buf, int nb_sectors) +{ + int ret; + BDRVParallelsState *s = bs->opaque; + qemu_co_mutex_lock(&s->lock); + ret = parallels_read(bs, sector_num, buf, nb_sectors); + qemu_co_mutex_unlock(&s->lock); + return ret; +} + static void parallels_close(BlockDriverState *bs) { BDRVParallelsState *s = bs->opaque; @@ -147,7 +158,7 @@ static BlockDriver bdrv_parallels = { .instance_size = sizeof(BDRVParallelsState), .bdrv_probe = parallels_probe, .bdrv_open = parallels_open, - .bdrv_read = parallels_read, + .bdrv_read = parallels_co_read, .bdrv_close = parallels_close, }; diff --git a/block/vmdk.c b/block/vmdk.c index 1ce220d535..0e791f2d85 100644 --- a/block/vmdk.c +++ b/block/vmdk.c @@ -1024,6 +1024,17 @@ static int vmdk_read(BlockDriverState *bs, int64_t sector_num, return 0; } +static coroutine_fn int vmdk_co_read(BlockDriverState *bs, int64_t sector_num, + uint8_t *buf, int nb_sectors) +{ + int ret; + BDRVVmdkState *s = bs->opaque; + qemu_co_mutex_lock(&s->lock); + ret = vmdk_read(bs, sector_num, buf, nb_sectors); + qemu_co_mutex_unlock(&s->lock); + return ret; +} + static int vmdk_write(BlockDriverState *bs, int64_t sector_num, const uint8_t *buf, int nb_sectors) { @@ -1542,7 +1553,7 @@ static BlockDriver bdrv_vmdk = { .instance_size = sizeof(BDRVVmdkState), .bdrv_probe = vmdk_probe, .bdrv_open = vmdk_open, - .bdrv_read = vmdk_read, + .bdrv_read = vmdk_co_read, .bdrv_write = vmdk_write, .bdrv_close = vmdk_close, .bdrv_create = vmdk_create, diff --git a/block/vpc.c b/block/vpc.c index 9155ee15e3..0941533f43 100644 --- a/block/vpc.c +++ b/block/vpc.c @@ -409,6 +409,17 @@ static int vpc_read(BlockDriverState *bs, int64_t sector_num, return 0; } +static coroutine_fn int vpc_co_read(BlockDriverState *bs, int64_t sector_num, + uint8_t *buf, int nb_sectors) +{ + int ret; + BDRVVPCState *s = bs->opaque; + qemu_co_mutex_lock(&s->lock); + ret = vpc_read(bs, sector_num, buf, nb_sectors); + qemu_co_mutex_unlock(&s->lock); + return ret; +} + static int vpc_write(BlockDriverState *bs, int64_t sector_num, const uint8_t *buf, int nb_sectors) { @@ -641,7 +652,7 @@ static BlockDriver bdrv_vpc = { .instance_size = sizeof(BDRVVPCState), .bdrv_probe = vpc_probe, .bdrv_open = vpc_open, - .bdrv_read = vpc_read, + .bdrv_read = vpc_co_read, .bdrv_write = vpc_write, .bdrv_flush = vpc_flush, .bdrv_close = vpc_close, diff --git a/block/vvfat.c b/block/vvfat.c index 864bfcf245..970cccf1a4 100644 --- a/block/vvfat.c +++ b/block/vvfat.c @@ -1281,6 +1281,17 @@ DLOG(fprintf(stderr, "sector %d not allocated\n", (int)sector_num)); return 0; } +static coroutine_fn int vvfat_co_read(BlockDriverState *bs, int64_t sector_num, + uint8_t *buf, int nb_sectors) +{ + int ret; + BDRVVVFATState *s = bs->opaque; + qemu_co_mutex_lock(&s->lock); + ret = vvfat_read(bs, sector_num, buf, nb_sectors); + qemu_co_mutex_unlock(&s->lock); + return ret; +} + /* LATER TODO: statify all functions */ /* @@ -2805,7 +2816,7 @@ static BlockDriver bdrv_vvfat = { .format_name = "vvfat", .instance_size = sizeof(BDRVVVFATState), .bdrv_file_open = vvfat_open, - .bdrv_read = vvfat_read, + .bdrv_read = vvfat_co_read, .bdrv_write = vvfat_write, .bdrv_close = vvfat_close, .bdrv_is_allocated = vvfat_is_allocated, From e183ef75cc28d31addbb937a4680090495786944 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 20 Oct 2011 13:16:23 +0200 Subject: [PATCH 17/19] block: take lock around bdrv_write implementations This does the first part of the conversion to coroutines, by wrapping bdrv_write implementations to take the mutex. Drivers that implement bdrv_write rather than bdrv_co_writev can then benefit from asynchronous operation (at least if the underlying protocol supports it, which is not the case for raw-win32), even though they still operate with a bounce buffer. Signed-off-by: Paolo Bonzini Signed-off-by: Kevin Wolf --- block/cow.c | 13 ++++++++++++- block/nbd.c | 13 ++++++++++++- block/vmdk.c | 13 ++++++++++++- block/vpc.c | 13 ++++++++++++- block/vvfat.c | 13 ++++++++++++- 5 files changed, 60 insertions(+), 5 deletions(-) diff --git a/block/cow.c b/block/cow.c index a5fcd20817..29fa8444a3 100644 --- a/block/cow.c +++ b/block/cow.c @@ -226,6 +226,17 @@ static int cow_write(BlockDriverState *bs, int64_t sector_num, return cow_update_bitmap(bs, sector_num, nb_sectors); } +static coroutine_fn int cow_co_write(BlockDriverState *bs, int64_t sector_num, + const uint8_t *buf, int nb_sectors) +{ + int ret; + BDRVCowState *s = bs->opaque; + qemu_co_mutex_lock(&s->lock); + ret = cow_write(bs, sector_num, buf, nb_sectors); + qemu_co_mutex_unlock(&s->lock); + return ret; +} + static void cow_close(BlockDriverState *bs) { } @@ -320,7 +331,7 @@ static BlockDriver bdrv_cow = { .bdrv_probe = cow_probe, .bdrv_open = cow_open, .bdrv_read = cow_co_read, - .bdrv_write = cow_write, + .bdrv_write = cow_co_write, .bdrv_close = cow_close, .bdrv_create = cow_create, .bdrv_flush = cow_flush, diff --git a/block/nbd.c b/block/nbd.c index 6b22ae1d15..882b2dc84a 100644 --- a/block/nbd.c +++ b/block/nbd.c @@ -251,6 +251,17 @@ static coroutine_fn int nbd_co_read(BlockDriverState *bs, int64_t sector_num, return ret; } +static coroutine_fn int nbd_co_write(BlockDriverState *bs, int64_t sector_num, + const uint8_t *buf, int nb_sectors) +{ + int ret; + BDRVNBDState *s = bs->opaque; + qemu_co_mutex_lock(&s->lock); + ret = nbd_write(bs, sector_num, buf, nb_sectors); + qemu_co_mutex_unlock(&s->lock); + return ret; +} + static void nbd_close(BlockDriverState *bs) { BDRVNBDState *s = bs->opaque; @@ -272,7 +283,7 @@ static BlockDriver bdrv_nbd = { .instance_size = sizeof(BDRVNBDState), .bdrv_file_open = nbd_open, .bdrv_read = nbd_co_read, - .bdrv_write = nbd_write, + .bdrv_write = nbd_co_write, .bdrv_close = nbd_close, .bdrv_getlength = nbd_getlength, .protocol_name = "nbd", diff --git a/block/vmdk.c b/block/vmdk.c index 0e791f2d85..3b376ed648 100644 --- a/block/vmdk.c +++ b/block/vmdk.c @@ -1116,6 +1116,17 @@ static int vmdk_write(BlockDriverState *bs, int64_t sector_num, return 0; } +static coroutine_fn int vmdk_co_write(BlockDriverState *bs, int64_t sector_num, + const uint8_t *buf, int nb_sectors) +{ + int ret; + BDRVVmdkState *s = bs->opaque; + qemu_co_mutex_lock(&s->lock); + ret = vmdk_write(bs, sector_num, buf, nb_sectors); + qemu_co_mutex_unlock(&s->lock); + return ret; +} + static int vmdk_create_extent(const char *filename, int64_t filesize, bool flat, bool compress) @@ -1554,7 +1565,7 @@ static BlockDriver bdrv_vmdk = { .bdrv_probe = vmdk_probe, .bdrv_open = vmdk_open, .bdrv_read = vmdk_co_read, - .bdrv_write = vmdk_write, + .bdrv_write = vmdk_co_write, .bdrv_close = vmdk_close, .bdrv_create = vmdk_create, .bdrv_flush = vmdk_flush, diff --git a/block/vpc.c b/block/vpc.c index 0941533f43..74ca642605 100644 --- a/block/vpc.c +++ b/block/vpc.c @@ -456,6 +456,17 @@ static int vpc_write(BlockDriverState *bs, int64_t sector_num, return 0; } +static coroutine_fn int vpc_co_write(BlockDriverState *bs, int64_t sector_num, + const uint8_t *buf, int nb_sectors) +{ + int ret; + BDRVVPCState *s = bs->opaque; + qemu_co_mutex_lock(&s->lock); + ret = vpc_write(bs, sector_num, buf, nb_sectors); + qemu_co_mutex_unlock(&s->lock); + return ret; +} + static int vpc_flush(BlockDriverState *bs) { return bdrv_flush(bs->file); @@ -653,7 +664,7 @@ static BlockDriver bdrv_vpc = { .bdrv_probe = vpc_probe, .bdrv_open = vpc_open, .bdrv_read = vpc_co_read, - .bdrv_write = vpc_write, + .bdrv_write = vpc_co_write, .bdrv_flush = vpc_flush, .bdrv_close = vpc_close, .bdrv_create = vpc_create, diff --git a/block/vvfat.c b/block/vvfat.c index 970cccf1a4..e1fcdbc45b 100644 --- a/block/vvfat.c +++ b/block/vvfat.c @@ -2727,6 +2727,17 @@ DLOG(checkpoint()); return 0; } +static coroutine_fn int vvfat_co_write(BlockDriverState *bs, int64_t sector_num, + const uint8_t *buf, int nb_sectors) +{ + int ret; + BDRVVVFATState *s = bs->opaque; + qemu_co_mutex_lock(&s->lock); + ret = vvfat_write(bs, sector_num, buf, nb_sectors); + qemu_co_mutex_unlock(&s->lock); + return ret; +} + static int vvfat_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors, int* n) { @@ -2817,7 +2828,7 @@ static BlockDriver bdrv_vvfat = { .instance_size = sizeof(BDRVVVFATState), .bdrv_file_open = vvfat_open, .bdrv_read = vvfat_co_read, - .bdrv_write = vvfat_write, + .bdrv_write = vvfat_co_write, .bdrv_close = vvfat_close, .bdrv_is_allocated = vvfat_is_allocated, .protocol_name = "fat", From 8b94ff85737062876c03e7506abb500521c749b9 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 20 Oct 2011 13:16:24 +0200 Subject: [PATCH 18/19] block: change flush to co_flush Since coroutine operation is now mandatory, convert all bdrv_flush implementations to coroutines. For qcow2, this means taking the lock. Other implementations are simpler and just forward bdrv_flush to the underlying protocol, so they can avoid the lock. The bdrv_flush callback is then unused and can be eliminated. Signed-off-by: Paolo Bonzini Signed-off-by: Kevin Wolf --- block.c | 2 -- block/cow.c | 6 +++--- block/qcow.c | 11 +++++------ block/qcow2.c | 14 +++++++------- block/raw-win32.c | 4 ++-- block/rbd.c | 4 ++-- block/vdi.c | 6 +++--- block/vmdk.c | 8 ++++---- block/vpc.c | 6 +++--- block_int.h | 1 - 10 files changed, 29 insertions(+), 33 deletions(-) diff --git a/block.c b/block.c index 28508f21d1..81fb709011 100644 --- a/block.c +++ b/block.c @@ -2892,8 +2892,6 @@ int coroutine_fn bdrv_co_flush(BlockDriverState *bs) qemu_coroutine_yield(); return co.ret; } - } else if (bs->drv->bdrv_flush) { - return bs->drv->bdrv_flush(bs); } else { /* * Some block drivers always operate in either writethrough or unsafe diff --git a/block/cow.c b/block/cow.c index 29fa8444a3..707c0aad88 100644 --- a/block/cow.c +++ b/block/cow.c @@ -306,9 +306,9 @@ exit: return ret; } -static int cow_flush(BlockDriverState *bs) +static coroutine_fn int cow_co_flush(BlockDriverState *bs) { - return bdrv_flush(bs->file); + return bdrv_co_flush(bs->file); } static QEMUOptionParameter cow_create_options[] = { @@ -334,7 +334,7 @@ static BlockDriver bdrv_cow = { .bdrv_write = cow_co_write, .bdrv_close = cow_close, .bdrv_create = cow_create, - .bdrv_flush = cow_flush, + .bdrv_co_flush = cow_co_flush, .bdrv_is_allocated = cow_is_allocated, .create_options = cow_create_options, diff --git a/block/qcow.c b/block/qcow.c index f93e3eb0d9..ab36b2995c 100644 --- a/block/qcow.c +++ b/block/qcow.c @@ -781,10 +781,9 @@ static int qcow_write_compressed(BlockDriverState *bs, int64_t sector_num, return 0; } -static BlockDriverAIOCB *qcow_aio_flush(BlockDriverState *bs, - BlockDriverCompletionFunc *cb, void *opaque) +static coroutine_fn int qcow_co_flush(BlockDriverState *bs) { - return bdrv_aio_flush(bs->file, cb, opaque); + return bdrv_co_flush(bs->file); } static int qcow_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) @@ -824,9 +823,9 @@ static BlockDriver bdrv_qcow = { .bdrv_is_allocated = qcow_is_allocated, .bdrv_set_key = qcow_set_key, .bdrv_make_empty = qcow_make_empty, - .bdrv_co_readv = qcow_co_readv, - .bdrv_co_writev = qcow_co_writev, - .bdrv_aio_flush = qcow_aio_flush, + .bdrv_co_readv = qcow_co_readv, + .bdrv_co_writev = qcow_co_writev, + .bdrv_co_flush = qcow_co_flush, .bdrv_write_compressed = qcow_write_compressed, .bdrv_get_info = qcow_get_info, diff --git a/block/qcow2.c b/block/qcow2.c index 91f4f04dd7..6ef38bf1b1 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -1099,24 +1099,24 @@ fail: return ret; } -static BlockDriverAIOCB *qcow2_aio_flush(BlockDriverState *bs, - BlockDriverCompletionFunc *cb, - void *opaque) +static int qcow2_co_flush(BlockDriverState *bs) { BDRVQcowState *s = bs->opaque; int ret; + qemu_co_mutex_lock(&s->lock); ret = qcow2_cache_flush(bs, s->l2_table_cache); if (ret < 0) { - return NULL; + return ret; } ret = qcow2_cache_flush(bs, s->refcount_block_cache); if (ret < 0) { - return NULL; + return ret; } + qemu_co_mutex_unlock(&s->lock); - return bdrv_aio_flush(bs->file, cb, opaque); + return bdrv_co_flush(bs->file); } static int64_t qcow2_vm_state_offset(BDRVQcowState *s) @@ -1237,7 +1237,7 @@ static BlockDriver bdrv_qcow2 = { .bdrv_co_readv = qcow2_co_readv, .bdrv_co_writev = qcow2_co_writev, - .bdrv_aio_flush = qcow2_aio_flush, + .bdrv_co_flush = qcow2_co_flush, .bdrv_discard = qcow2_discard, .bdrv_truncate = qcow2_truncate, diff --git a/block/raw-win32.c b/block/raw-win32.c index b7dd357c6d..f5f73bcd64 100644 --- a/block/raw-win32.c +++ b/block/raw-win32.c @@ -281,7 +281,7 @@ static BlockDriver bdrv_file = { .bdrv_file_open = raw_open, .bdrv_close = raw_close, .bdrv_create = raw_create, - .bdrv_flush = raw_flush, + .bdrv_co_flush = raw_flush, .bdrv_read = raw_read, .bdrv_write = raw_write, .bdrv_truncate = raw_truncate, @@ -409,7 +409,7 @@ static BlockDriver bdrv_host_device = { .bdrv_probe_device = hdev_probe_device, .bdrv_file_open = hdev_open, .bdrv_close = raw_close, - .bdrv_flush = raw_flush, + .bdrv_co_flush = raw_flush, .bdrv_has_zero_init = hdev_has_zero_init, .bdrv_read = raw_read, diff --git a/block/rbd.c b/block/rbd.c index 3068c829fe..c684e0cb0b 100644 --- a/block/rbd.c +++ b/block/rbd.c @@ -705,7 +705,7 @@ static BlockDriverAIOCB *qemu_rbd_aio_writev(BlockDriverState *bs, return rbd_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1); } -static int qemu_rbd_flush(BlockDriverState *bs) +static int qemu_rbd_co_flush(BlockDriverState *bs) { #if LIBRBD_VERSION_CODE >= LIBRBD_VERSION(0, 1, 1) /* rbd_flush added in 0.1.1 */ @@ -851,7 +851,7 @@ static BlockDriver bdrv_rbd = { .bdrv_file_open = qemu_rbd_open, .bdrv_close = qemu_rbd_close, .bdrv_create = qemu_rbd_create, - .bdrv_flush = qemu_rbd_flush, + .bdrv_co_flush = qemu_rbd_co_flush, .bdrv_get_info = qemu_rbd_getinfo, .create_options = qemu_rbd_create_options, .bdrv_getlength = qemu_rbd_getlength, diff --git a/block/vdi.c b/block/vdi.c index 1d5ad2bf49..883046d5a2 100644 --- a/block/vdi.c +++ b/block/vdi.c @@ -936,10 +936,10 @@ static void vdi_close(BlockDriverState *bs) { } -static int vdi_flush(BlockDriverState *bs) +static coroutine_fn int vdi_co_flush(BlockDriverState *bs) { logout("\n"); - return bdrv_flush(bs->file); + return bdrv_co_flush(bs->file); } @@ -975,7 +975,7 @@ static BlockDriver bdrv_vdi = { .bdrv_open = vdi_open, .bdrv_close = vdi_close, .bdrv_create = vdi_create, - .bdrv_flush = vdi_flush, + .bdrv_co_flush = vdi_co_flush, .bdrv_is_allocated = vdi_is_allocated, .bdrv_make_empty = vdi_make_empty, diff --git a/block/vmdk.c b/block/vmdk.c index 3b376ed648..6be592ffd6 100644 --- a/block/vmdk.c +++ b/block/vmdk.c @@ -1494,14 +1494,14 @@ static void vmdk_close(BlockDriverState *bs) vmdk_free_extents(bs); } -static int vmdk_flush(BlockDriverState *bs) +static coroutine_fn int vmdk_co_flush(BlockDriverState *bs) { int i, ret, err; BDRVVmdkState *s = bs->opaque; - ret = bdrv_flush(bs->file); + ret = bdrv_co_flush(bs->file); for (i = 0; i < s->num_extents; i++) { - err = bdrv_flush(s->extents[i].file); + err = bdrv_co_flush(s->extents[i].file); if (err < 0) { ret = err; } @@ -1568,7 +1568,7 @@ static BlockDriver bdrv_vmdk = { .bdrv_write = vmdk_co_write, .bdrv_close = vmdk_close, .bdrv_create = vmdk_create, - .bdrv_flush = vmdk_flush, + .bdrv_co_flush = vmdk_co_flush, .bdrv_is_allocated = vmdk_is_allocated, .bdrv_get_allocated_file_size = vmdk_get_allocated_file_size, diff --git a/block/vpc.c b/block/vpc.c index 74ca642605..79be7d051b 100644 --- a/block/vpc.c +++ b/block/vpc.c @@ -467,9 +467,9 @@ static coroutine_fn int vpc_co_write(BlockDriverState *bs, int64_t sector_num, return ret; } -static int vpc_flush(BlockDriverState *bs) +static coroutine_fn int vpc_co_flush(BlockDriverState *bs) { - return bdrv_flush(bs->file); + return bdrv_co_flush(bs->file); } /* @@ -665,7 +665,7 @@ static BlockDriver bdrv_vpc = { .bdrv_open = vpc_open, .bdrv_read = vpc_co_read, .bdrv_write = vpc_co_write, - .bdrv_flush = vpc_flush, + .bdrv_co_flush = vpc_co_flush, .bdrv_close = vpc_close, .bdrv_create = vpc_create, diff --git a/block_int.h b/block_int.h index 384598ff02..bc3b07e2f8 100644 --- a/block_int.h +++ b/block_int.h @@ -62,7 +62,6 @@ struct BlockDriver { const uint8_t *buf, int nb_sectors); void (*bdrv_close)(BlockDriverState *bs); int (*bdrv_create)(const char *filename, QEMUOptionParameter *options); - int (*bdrv_flush)(BlockDriverState *bs); int (*bdrv_discard)(BlockDriverState *bs, int64_t sector_num, int nb_sectors); int (*bdrv_is_allocated)(BlockDriverState *bs, int64_t sector_num, From 6db39ae2e250f15da17462444ea18a9fcef41e05 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 20 Oct 2011 13:16:25 +0200 Subject: [PATCH 19/19] block: change discard to co_discard Since coroutine operation is now mandatory, convert both bdrv_discard implementations to coroutines. For qcow2, this means taking the lock around the operation. raw-posix remains synchronous. The bdrv_discard callback is then unused and can be eliminated. Reviewed-by: Kevin Wolf Signed-off-by: Paolo Bonzini Signed-off-by: Kevin Wolf --- block.c | 2 -- block/qcow2.c | 14 ++++++++++---- block/raw-posix.c | 5 +++-- block_int.h | 2 -- 4 files changed, 13 insertions(+), 10 deletions(-) diff --git a/block.c b/block.c index 81fb709011..70aab63e4b 100644 --- a/block.c +++ b/block.c @@ -2962,8 +2962,6 @@ int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num, qemu_coroutine_yield(); return co.ret; } - } else if (bs->drv->bdrv_discard) { - return bs->drv->bdrv_discard(bs, sector_num, nb_sectors); } else { return 0; } diff --git a/block/qcow2.c b/block/qcow2.c index 6ef38bf1b1..a181932b67 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -978,11 +978,17 @@ static int qcow2_make_empty(BlockDriverState *bs) return 0; } -static int qcow2_discard(BlockDriverState *bs, int64_t sector_num, - int nb_sectors) +static coroutine_fn int qcow2_co_discard(BlockDriverState *bs, + int64_t sector_num, int nb_sectors) { - return qcow2_discard_clusters(bs, sector_num << BDRV_SECTOR_BITS, + int ret; + BDRVQcowState *s = bs->opaque; + + qemu_co_mutex_lock(&s->lock); + ret = qcow2_discard_clusters(bs, sector_num << BDRV_SECTOR_BITS, nb_sectors); + qemu_co_mutex_unlock(&s->lock); + return ret; } static int qcow2_truncate(BlockDriverState *bs, int64_t offset) @@ -1239,7 +1245,7 @@ static BlockDriver bdrv_qcow2 = { .bdrv_co_writev = qcow2_co_writev, .bdrv_co_flush = qcow2_co_flush, - .bdrv_discard = qcow2_discard, + .bdrv_co_discard = qcow2_co_discard, .bdrv_truncate = qcow2_truncate, .bdrv_write_compressed = qcow2_write_compressed, diff --git a/block/raw-posix.c b/block/raw-posix.c index afcb4c192c..a3de373586 100644 --- a/block/raw-posix.c +++ b/block/raw-posix.c @@ -602,7 +602,8 @@ static int xfs_discard(BDRVRawState *s, int64_t sector_num, int nb_sectors) } #endif -static int raw_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors) +static coroutine_fn int raw_co_discard(BlockDriverState *bs, + int64_t sector_num, int nb_sectors) { #ifdef CONFIG_XFS BDRVRawState *s = bs->opaque; @@ -632,7 +633,7 @@ static BlockDriver bdrv_file = { .bdrv_file_open = raw_open, .bdrv_close = raw_close, .bdrv_create = raw_create, - .bdrv_discard = raw_discard, + .bdrv_co_discard = raw_co_discard, .bdrv_aio_readv = raw_aio_readv, .bdrv_aio_writev = raw_aio_writev, diff --git a/block_int.h b/block_int.h index bc3b07e2f8..dac00f504f 100644 --- a/block_int.h +++ b/block_int.h @@ -62,8 +62,6 @@ struct BlockDriver { const uint8_t *buf, int nb_sectors); void (*bdrv_close)(BlockDriverState *bs); int (*bdrv_create)(const char *filename, QEMUOptionParameter *options); - int (*bdrv_discard)(BlockDriverState *bs, int64_t sector_num, - int nb_sectors); int (*bdrv_is_allocated)(BlockDriverState *bs, int64_t sector_num, int nb_sectors, int *pnum); int (*bdrv_set_key)(BlockDriverState *bs, const char *key);