diff --git a/block.c b/block.c index 430edf79bb..ee5b28a979 100644 --- a/block.c +++ b/block.c @@ -4458,6 +4458,15 @@ static void bdrv_close(BlockDriverState *bs) } QLIST_INIT(&bs->aio_notifiers); bdrv_drained_end(bs); + + /* + * If we're still inside some bdrv_drain_all_begin()/end() sections, end + * them now since this BDS won't exist anymore when bdrv_drain_all_end() + * gets called. + */ + if (bs->quiesce_counter) { + bdrv_drain_all_end_quiesce(bs); + } } void bdrv_close_all(void) diff --git a/block/io.c b/block/io.c index 02528b3823..9918f2499c 100644 --- a/block/io.c +++ b/block/io.c @@ -633,6 +633,19 @@ void bdrv_drain_all_begin(void) } } +void bdrv_drain_all_end_quiesce(BlockDriverState *bs) +{ + int drained_end_counter = 0; + + g_assert(bs->quiesce_counter > 0); + g_assert(!bs->refcnt); + + while (bs->quiesce_counter) { + bdrv_do_drained_end(bs, false, NULL, true, &drained_end_counter); + } + BDRV_POLL_WHILE(bs, qatomic_read(&drained_end_counter) > 0); +} + void bdrv_drain_all_end(void) { BlockDriverState *bs = NULL; @@ -2282,17 +2295,17 @@ static int coroutine_fn bdrv_co_block_status(BlockDriverState *bs, if (ret & (BDRV_BLOCK_DATA | BDRV_BLOCK_ZERO)) { ret |= BDRV_BLOCK_ALLOCATED; - } else if (want_zero && bs->drv->supports_backing) { + } else if (bs->drv->supports_backing) { BlockDriverState *cow_bs = bdrv_cow_bs(bs); - if (cow_bs) { + if (!cow_bs) { + ret |= BDRV_BLOCK_ZERO; + } else if (want_zero) { int64_t size2 = bdrv_getlength(cow_bs); if (size2 >= 0 && offset >= size2) { ret |= BDRV_BLOCK_ZERO; } - } else { - ret |= BDRV_BLOCK_ZERO; } } @@ -2447,6 +2460,33 @@ int bdrv_block_status(BlockDriverState *bs, int64_t offset, int64_t bytes, offset, bytes, pnum, map, file); } +/* + * Check @bs (and its backing chain) to see if the range defined + * by @offset and @bytes is known to read as zeroes. + * Return 1 if that is the case, 0 otherwise and -errno on error. + * This test is meant to be fast rather than accurate so returning 0 + * does not guarantee non-zero data. + */ +int coroutine_fn bdrv_co_is_zero_fast(BlockDriverState *bs, int64_t offset, + int64_t bytes) +{ + int ret; + int64_t pnum = bytes; + + if (!bytes) { + return 1; + } + + ret = bdrv_common_block_status_above(bs, NULL, false, false, offset, + bytes, &pnum, NULL, NULL); + + if (ret < 0) { + return ret; + } + + return (pnum == bytes) && (ret & BDRV_BLOCK_ZERO); +} + int coroutine_fn bdrv_is_allocated(BlockDriverState *bs, int64_t offset, int64_t bytes, int64_t *pnum) { diff --git a/block/qcow2.c b/block/qcow2.c index b6cb4db8bb..4274806a2a 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -2387,26 +2387,26 @@ static bool merge_cow(uint64_t offset, unsigned bytes, return false; } -static bool is_unallocated(BlockDriverState *bs, int64_t offset, int64_t bytes) -{ - int64_t nr; - return !bytes || - (!bdrv_is_allocated_above(bs, NULL, false, offset, bytes, &nr) && - nr == bytes); -} - -static bool is_zero_cow(BlockDriverState *bs, QCowL2Meta *m) +/* + * Return 1 if the COW regions read as zeroes, 0 if not, < 0 on error. + * Note that returning 0 does not guarantee non-zero data. + */ +static int is_zero_cow(BlockDriverState *bs, QCowL2Meta *m) { /* * This check is designed for optimization shortcut so it must be * efficient. - * Instead of is_zero(), use is_unallocated() as it is faster (but not - * as accurate and can result in false negatives). + * Instead of is_zero(), use bdrv_co_is_zero_fast() as it is + * faster (but not as accurate and can result in false negatives). */ - return is_unallocated(bs, m->offset + m->cow_start.offset, - m->cow_start.nb_bytes) && - is_unallocated(bs, m->offset + m->cow_end.offset, - m->cow_end.nb_bytes); + int ret = bdrv_co_is_zero_fast(bs, m->offset + m->cow_start.offset, + m->cow_start.nb_bytes); + if (ret <= 0) { + return ret; + } + + return bdrv_co_is_zero_fast(bs, m->offset + m->cow_end.offset, + m->cow_end.nb_bytes); } static int handle_alloc_space(BlockDriverState *bs, QCowL2Meta *l2meta) @@ -2432,7 +2432,10 @@ static int handle_alloc_space(BlockDriverState *bs, QCowL2Meta *l2meta) continue; } - if (!is_zero_cow(bs, m)) { + ret = is_zero_cow(bs, m); + if (ret < 0) { + return ret; + } else if (ret == 0) { continue; } diff --git a/docs/tools/qemu-img.rst b/docs/tools/qemu-img.rst index c35bd64822..b615aa8419 100644 --- a/docs/tools/qemu-img.rst +++ b/docs/tools/qemu-img.rst @@ -188,6 +188,10 @@ Parameters to convert subcommand: allocated target image depending on the host support for getting allocation information. +.. option:: -r + + Rate limit for the convert process + .. option:: --salvage Try to ignore I/O errors when reading. Unless in quiet mode (``-q``), errors @@ -349,7 +353,7 @@ Command description: state after (the attempt at) repairing it. That is, a successful ``-r all`` will yield the exit code 0, independently of the image state before. -.. option:: commit [--object OBJECTDEF] [--image-opts] [-q] [-f FMT] [-t CACHE] [-b BASE] [-d] [-p] FILENAME +.. option:: commit [--object OBJECTDEF] [--image-opts] [-q] [-f FMT] [-t CACHE] [-b BASE] [-r RATE_LIMIT] [-d] [-p] FILENAME Commit the changes recorded in *FILENAME* in its base image or backing file. If the backing file is smaller than the snapshot, then the backing file will be @@ -371,6 +375,8 @@ Command description: garbage data when read. For this reason, ``-b`` implies ``-d`` (so that the top image stays valid). + The rate limit for the commit process is specified by ``-r``. + .. option:: compare [--object OBJECTDEF] [--image-opts] [-f FMT] [-F FMT] [-T SRC_CACHE] [-p] [-q] [-s] [-U] FILENAME1 FILENAME2 Check if two images have the same content. You can compare images with @@ -408,7 +414,7 @@ Command description: 4 Error on reading data -.. option:: convert [--object OBJECTDEF] [--image-opts] [--target-image-opts] [--target-is-zero] [--bitmaps] [-U] [-C] [-c] [-p] [-q] [-n] [-f FMT] [-t CACHE] [-T SRC_CACHE] [-O OUTPUT_FMT] [-B BACKING_FILE] [-o OPTIONS] [-l SNAPSHOT_PARAM] [-S SPARSE_SIZE] [-m NUM_COROUTINES] [-W] FILENAME [FILENAME2 [...]] OUTPUT_FILENAME +.. option:: convert [--object OBJECTDEF] [--image-opts] [--target-image-opts] [--target-is-zero] [--bitmaps] [-U] [-C] [-c] [-p] [-q] [-n] [-f FMT] [-t CACHE] [-T SRC_CACHE] [-O OUTPUT_FMT] [-B BACKING_FILE] [-o OPTIONS] [-l SNAPSHOT_PARAM] [-S SPARSE_SIZE] [-r RATE_LIMIT] [-m NUM_COROUTINES] [-W] FILENAME [FILENAME2 [...]] OUTPUT_FILENAME Convert the disk image *FILENAME* or a snapshot *SNAPSHOT_PARAM* to disk image *OUTPUT_FILENAME* using format *OUTPUT_FMT*. It can diff --git a/include/block/block.h b/include/block/block.h index d16c401cb4..4bfe3b546b 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -508,6 +508,8 @@ int bdrv_is_allocated(BlockDriverState *bs, int64_t offset, int64_t bytes, int bdrv_is_allocated_above(BlockDriverState *top, BlockDriverState *base, bool include_base, int64_t offset, int64_t bytes, int64_t *pnum); +int coroutine_fn bdrv_co_is_zero_fast(BlockDriverState *bs, int64_t offset, + int64_t bytes); bool bdrv_is_read_only(BlockDriverState *bs); int bdrv_can_set_read_only(BlockDriverState *bs, bool read_only, @@ -779,6 +781,12 @@ void bdrv_drained_end(BlockDriverState *bs); */ void bdrv_drained_end_no_poll(BlockDriverState *bs, int *drained_end_counter); +/** + * End all quiescent sections started by bdrv_drain_all_begin(). This is + * only needed when deleting a BDS before bdrv_drain_all_end() is called. + */ +void bdrv_drain_all_end_quiesce(BlockDriverState *bs); + /** * End a quiescent section started by bdrv_subtree_drained_begin(). */ diff --git a/qemu-img-cmds.hx b/qemu-img-cmds.hx index cab8234235..b3620f29e5 100644 --- a/qemu-img-cmds.hx +++ b/qemu-img-cmds.hx @@ -34,9 +34,9 @@ SRST ERST DEF("commit", img_commit, - "commit [--object objectdef] [--image-opts] [-q] [-f fmt] [-t cache] [-b base] [-d] [-p] filename") + "commit [--object objectdef] [--image-opts] [-q] [-f fmt] [-t cache] [-b base] [-r rate_limit] [-d] [-p] filename") SRST -.. option:: commit [--object OBJECTDEF] [--image-opts] [-q] [-f FMT] [-t CACHE] [-b BASE] [-d] [-p] FILENAME +.. option:: commit [--object OBJECTDEF] [--image-opts] [-q] [-f FMT] [-t CACHE] [-b BASE] [-r RATE_LIMIT] [-d] [-p] FILENAME ERST DEF("compare", img_compare, @@ -46,9 +46,9 @@ SRST ERST DEF("convert", img_convert, - "convert [--object objectdef] [--image-opts] [--target-image-opts] [--target-is-zero] [--bitmaps] [-U] [-C] [-c] [-p] [-q] [-n] [-f fmt] [-t cache] [-T src_cache] [-O output_fmt] [-B backing_file] [-o options] [-l snapshot_param] [-S sparse_size] [-m num_coroutines] [-W] [--salvage] filename [filename2 [...]] output_filename") + "convert [--object objectdef] [--image-opts] [--target-image-opts] [--target-is-zero] [--bitmaps] [-U] [-C] [-c] [-p] [-q] [-n] [-f fmt] [-t cache] [-T src_cache] [-O output_fmt] [-B backing_file] [-o options] [-l snapshot_param] [-S sparse_size] [-r rate_limit] [-m num_coroutines] [-W] [--salvage] filename [filename2 [...]] output_filename") SRST -.. option:: convert [--object OBJECTDEF] [--image-opts] [--target-image-opts] [--target-is-zero] [--bitmaps] [-U] [-C] [-c] [-p] [-q] [-n] [-f FMT] [-t CACHE] [-T SRC_CACHE] [-O OUTPUT_FMT] [-B BACKING_FILE] [-o OPTIONS] [-l SNAPSHOT_PARAM] [-S SPARSE_SIZE] [-m NUM_COROUTINES] [-W] [--salvage] FILENAME [FILENAME2 [...]] OUTPUT_FILENAME +.. option:: convert [--object OBJECTDEF] [--image-opts] [--target-image-opts] [--target-is-zero] [--bitmaps] [-U] [-C] [-c] [-p] [-q] [-n] [-f FMT] [-t CACHE] [-T SRC_CACHE] [-O OUTPUT_FMT] [-B BACKING_FILE] [-o OPTIONS] [-l SNAPSHOT_PARAM] [-S SPARSE_SIZE] [-r RATE_LIMIT] [-m NUM_COROUTINES] [-W] [--salvage] FILENAME [FILENAME2 [...]] OUTPUT_FILENAME ERST DEF("create", img_create, diff --git a/qemu-img.c b/qemu-img.c index 2103507936..a968c74cba 100644 --- a/qemu-img.c +++ b/qemu-img.c @@ -50,6 +50,8 @@ #include "block/qapi.h" #include "crypto/init.h" #include "trace/control.h" +#include "qemu/throttle.h" +#include "block/throttle-groups.h" #define QEMU_IMG_VERSION "qemu-img version " QEMU_FULL_VERSION \ "\n" QEMU_COPYRIGHT "\n" @@ -980,6 +982,7 @@ static int img_commit(int argc, char **argv) CommonBlockJobCBInfo cbi; bool image_opts = false; AioContext *aio_context; + int64_t rate_limit = 0; fmt = NULL; cache = BDRV_DEFAULT_CACHE; @@ -991,7 +994,7 @@ static int img_commit(int argc, char **argv) {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS}, {0, 0, 0, 0} }; - c = getopt_long(argc, argv, ":f:ht:b:dpq", + c = getopt_long(argc, argv, ":f:ht:b:dpqr:", long_options, NULL); if (c == -1) { break; @@ -1026,6 +1029,12 @@ static int img_commit(int argc, char **argv) case 'q': quiet = true; break; + case 'r': + rate_limit = cvtnum("rate limit", optarg); + if (rate_limit < 0) { + return 1; + } + break; case OPTION_OBJECT: { QemuOpts *opts; opts = qemu_opts_parse_noisily(&qemu_object_opts, @@ -1099,7 +1108,7 @@ static int img_commit(int argc, char **argv) aio_context = bdrv_get_aio_context(bs); aio_context_acquire(aio_context); - commit_active_start("commit", bs, base_bs, JOB_DEFAULT, 0, + commit_active_start("commit", bs, base_bs, JOB_DEFAULT, rate_limit, BLOCKDEV_ON_ERROR_REPORT, NULL, common_block_job_cb, &cbi, false, &local_err); aio_context_release(aio_context); @@ -1662,6 +1671,7 @@ enum ImgConvertBlockStatus { }; #define MAX_COROUTINES 16 +#define CONVERT_THROTTLE_GROUP "img_convert" typedef struct ImgConvertState { BlockBackend **src; @@ -2177,6 +2187,17 @@ static int convert_copy_bitmaps(BlockDriverState *src, BlockDriverState *dst) #define MAX_BUF_SECTORS 32768 +static void set_rate_limit(BlockBackend *blk, int64_t rate_limit) +{ + ThrottleConfig cfg; + + throttle_config_init(&cfg); + cfg.buckets[THROTTLE_BPS_WRITE].avg = rate_limit; + + blk_io_limits_enable(blk, CONVERT_THROTTLE_GROUP); + blk_set_io_limits(blk, &cfg); +} + static int img_convert(int argc, char **argv) { int c, bs_i, flags, src_flags = 0; @@ -2197,6 +2218,7 @@ static int img_convert(int argc, char **argv) bool force_share = false; bool explict_min_sparse = false; bool bitmaps = false; + int64_t rate_limit = 0; ImgConvertState s = (ImgConvertState) { /* Need at least 4k of zeros for sparse detection */ @@ -2219,7 +2241,7 @@ static int img_convert(int argc, char **argv) {"bitmaps", no_argument, 0, OPTION_BITMAPS}, {0, 0, 0, 0} }; - c = getopt_long(argc, argv, ":hf:O:B:Cco:l:S:pt:T:qnm:WU", + c = getopt_long(argc, argv, ":hf:O:B:Cco:l:S:pt:T:qnm:WUr:", long_options, NULL); if (c == -1) { break; @@ -2316,6 +2338,12 @@ static int img_convert(int argc, char **argv) case 'U': force_share = true; break; + case 'r': + rate_limit = cvtnum("rate limit", optarg); + if (rate_limit < 0) { + goto fail_getopt; + } + break; case OPTION_OBJECT: { QemuOpts *object_opts; object_opts = qemu_opts_parse_noisily(&qemu_object_opts, @@ -2705,6 +2733,10 @@ static int img_convert(int argc, char **argv) s.cluster_sectors = bdi.cluster_size / BDRV_SECTOR_SIZE; } + if (rate_limit) { + set_rate_limit(s.target, rate_limit); + } + ret = convert_do_copy(&s); /* Now copy the bitmaps */ diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c index 1595bbc92e..8a29e33e00 100644 --- a/tests/test-bdrv-drain.c +++ b/tests/test-bdrv-drain.c @@ -594,6 +594,7 @@ static void test_graph_change_drain_all(void) g_assert_cmpint(bs_b->quiesce_counter, ==, 0); g_assert_cmpint(b_s->drain_count, ==, 0); + g_assert_cmpint(qemu_get_aio_context()->external_disable_cnt, ==, 0); bdrv_unref(bs_b); blk_unref(blk_b);