From 2d9187bc65727d9dd63e2c410b5500add3db0b0d Mon Sep 17 00:00:00 2001 From: Peter Lieven Date: Tue, 28 Feb 2017 13:40:07 +0100 Subject: [PATCH 01/46] qemu-img: make convert async the convert process is currently completely implemented with sync operations. That means it reads one buffer and then writes it. No parallelism and each sync request takes as long as it takes until it is completed. This can be a big performance hit when the convert process reads and writes to devices which do not benefit from kernel readahead or pagecache. In our environment we heavily have the following two use cases when using qemu-img convert. a) reading from NFS and writing to iSCSI for deploying templates b) reading from iSCSI and writing to NFS for backups In both processes we use libiscsi and libnfs so we have no kernel cache. This patch changes the convert process to work with parallel running coroutines which can significantly improve performance for network storage devices: qemu-img (master) nfs -> iscsi 22.8 secs nfs -> ram 11.7 secs ram -> iscsi 12.3 secs qemu-img-async (8 coroutines, in-order write disabled) nfs -> iscsi 11.0 secs nfs -> ram 10.4 secs ram -> iscsi 9.0 secs This patches introduces 2 new cmdline parameters. The -m parameter to specify the number of coroutines running in parallel (defaults to 8). And the -W parameter to allow qemu-img to write to the target out of order rather than sequential. This improves performance as the writes do not have to wait for each other to complete. Signed-off-by: Peter Lieven Signed-off-by: Kevin Wolf --- qemu-img-cmds.hx | 4 +- qemu-img.c | 322 +++++++++++++++++++++++++++++++++-------------- qemu-img.texi | 16 ++- 3 files changed, 243 insertions(+), 99 deletions(-) diff --git a/qemu-img-cmds.hx b/qemu-img-cmds.hx index f054599a91..9c9702cc62 100644 --- a/qemu-img-cmds.hx +++ b/qemu-img-cmds.hx @@ -40,9 +40,9 @@ STEXI ETEXI DEF("convert", img_convert, - "convert [--object objectdef] [--image-opts] [-c] [-p] [-q] [-n] [-f fmt] [-t cache] [-T src_cache] [-O output_fmt] [-o options] [-s snapshot_id_or_name] [-l snapshot_param] [-S sparse_size] filename [filename2 [...]] output_filename") + "convert [--object objectdef] [--image-opts] [-c] [-p] [-q] [-n] [-f fmt] [-t cache] [-T src_cache] [-O output_fmt] [-o options] [-s snapshot_id_or_name] [-l snapshot_param] [-S sparse_size] [-m num_coroutines] [-W] filename [filename2 [...]] output_filename") STEXI -@item convert [--object @var{objectdef}] [--image-opts] [-c] [-p] [-q] [-n] [-f @var{fmt}] [-t @var{cache}] [-T @var{src_cache}] [-O @var{output_fmt}] [-o @var{options}] [-s @var{snapshot_id_or_name}] [-l @var{snapshot_param}] [-S @var{sparse_size}] @var{filename} [@var{filename2} [...]] @var{output_filename} +@item convert [--object @var{objectdef}] [--image-opts] [-c] [-p] [-q] [-n] [-f @var{fmt}] [-t @var{cache}] [-T @var{src_cache}] [-O @var{output_fmt}] [-o @var{options}] [-s @var{snapshot_id_or_name}] [-l @var{snapshot_param}] [-S @var{sparse_size}] [-m @var{num_coroutines}] [-W] @var{filename} [@var{filename2} [...]] @var{output_filename} ETEXI DEF("dd", img_dd, diff --git a/qemu-img.c b/qemu-img.c index df3aefd35a..caa76a7400 100644 --- a/qemu-img.c +++ b/qemu-img.c @@ -156,6 +156,11 @@ static void QEMU_NORETURN help(void) " kinds of errors, with a higher risk of choosing the wrong fix or\n" " hiding corruption that has already occurred.\n" "\n" + "Parameters to convert subcommand:\n" + " '-m' specifies how many coroutines work in parallel during the convert\n" + " process (defaults to 8)\n" + " '-W' allow to write to the target out of order rather than sequential\n" + "\n" "Parameters to snapshot subcommand:\n" " 'snapshot' is the name of the snapshot to create, apply or delete\n" " '-a' applies a snapshot (revert disk to saved state)\n" @@ -1462,48 +1467,61 @@ enum ImgConvertBlockStatus { BLK_BACKING_FILE, }; +#define MAX_COROUTINES 16 + typedef struct ImgConvertState { BlockBackend **src; int64_t *src_sectors; - int src_cur, src_num; - int64_t src_cur_offset; + int src_num; int64_t total_sectors; int64_t allocated_sectors; + int64_t allocated_done; + int64_t sector_num; + int64_t wr_offs; enum ImgConvertBlockStatus status; int64_t sector_next_status; BlockBackend *target; bool has_zero_init; bool compressed; bool target_has_backing; + bool wr_in_order; int min_sparse; size_t cluster_sectors; size_t buf_sectors; + int num_coroutines; + int running_coroutines; + Coroutine *co[MAX_COROUTINES]; + int64_t wait_sector_num[MAX_COROUTINES]; + CoMutex lock; + int ret; } ImgConvertState; -static void convert_select_part(ImgConvertState *s, int64_t sector_num) +static void convert_select_part(ImgConvertState *s, int64_t sector_num, + int *src_cur, int64_t *src_cur_offset) { - assert(sector_num >= s->src_cur_offset); - while (sector_num - s->src_cur_offset >= s->src_sectors[s->src_cur]) { - s->src_cur_offset += s->src_sectors[s->src_cur]; - s->src_cur++; - assert(s->src_cur < s->src_num); + *src_cur = 0; + *src_cur_offset = 0; + while (sector_num - *src_cur_offset >= s->src_sectors[*src_cur]) { + *src_cur_offset += s->src_sectors[*src_cur]; + (*src_cur)++; + assert(*src_cur < s->src_num); } } static int convert_iteration_sectors(ImgConvertState *s, int64_t sector_num) { - int64_t ret; - int n; + int64_t ret, src_cur_offset; + int n, src_cur; - convert_select_part(s, sector_num); + convert_select_part(s, sector_num, &src_cur, &src_cur_offset); assert(s->total_sectors > sector_num); n = MIN(s->total_sectors - sector_num, BDRV_REQUEST_MAX_SECTORS); if (s->sector_next_status <= sector_num) { BlockDriverState *file; - ret = bdrv_get_block_status(blk_bs(s->src[s->src_cur]), - sector_num - s->src_cur_offset, + ret = bdrv_get_block_status(blk_bs(s->src[src_cur]), + sector_num - src_cur_offset, n, &n, &file); if (ret < 0) { return ret; @@ -1519,8 +1537,8 @@ static int convert_iteration_sectors(ImgConvertState *s, int64_t sector_num) /* Check block status of the backing file chain to avoid * needlessly reading zeroes and limiting the iteration to the * buffer size */ - ret = bdrv_get_block_status_above(blk_bs(s->src[s->src_cur]), NULL, - sector_num - s->src_cur_offset, + ret = bdrv_get_block_status_above(blk_bs(s->src[src_cur]), NULL, + sector_num - src_cur_offset, n, &n, &file); if (ret < 0) { return ret; @@ -1558,28 +1576,34 @@ static int convert_iteration_sectors(ImgConvertState *s, int64_t sector_num) return n; } -static int convert_read(ImgConvertState *s, int64_t sector_num, int nb_sectors, - uint8_t *buf) +static int coroutine_fn convert_co_read(ImgConvertState *s, int64_t sector_num, + int nb_sectors, uint8_t *buf) { - int n; - int ret; + int n, ret; + QEMUIOVector qiov; + struct iovec iov; assert(nb_sectors <= s->buf_sectors); while (nb_sectors > 0) { BlockBackend *blk; - int64_t bs_sectors; + int src_cur; + int64_t bs_sectors, src_cur_offset; /* In the case of compression with multiple source files, we can get a * nb_sectors that spreads into the next part. So we must be able to * read across multiple BDSes for one convert_read() call. */ - convert_select_part(s, sector_num); - blk = s->src[s->src_cur]; - bs_sectors = s->src_sectors[s->src_cur]; + convert_select_part(s, sector_num, &src_cur, &src_cur_offset); + blk = s->src[src_cur]; + bs_sectors = s->src_sectors[src_cur]; - n = MIN(nb_sectors, bs_sectors - (sector_num - s->src_cur_offset)); - ret = blk_pread(blk, - (sector_num - s->src_cur_offset) << BDRV_SECTOR_BITS, - buf, n << BDRV_SECTOR_BITS); + n = MIN(nb_sectors, bs_sectors - (sector_num - src_cur_offset)); + iov.iov_base = buf; + iov.iov_len = n << BDRV_SECTOR_BITS; + qemu_iovec_init_external(&qiov, &iov, 1); + + ret = blk_co_preadv( + blk, (sector_num - src_cur_offset) << BDRV_SECTOR_BITS, + n << BDRV_SECTOR_BITS, &qiov, 0); if (ret < 0) { return ret; } @@ -1592,15 +1616,18 @@ static int convert_read(ImgConvertState *s, int64_t sector_num, int nb_sectors, return 0; } -static int convert_write(ImgConvertState *s, int64_t sector_num, int nb_sectors, - const uint8_t *buf) + +static int coroutine_fn convert_co_write(ImgConvertState *s, int64_t sector_num, + int nb_sectors, uint8_t *buf, + enum ImgConvertBlockStatus status) { int ret; + QEMUIOVector qiov; + struct iovec iov; while (nb_sectors > 0) { int n = nb_sectors; - - switch (s->status) { + switch (status) { case BLK_BACKING_FILE: /* If we have a backing file, leave clusters unallocated that are * unallocated in the source image, so that the backing file is @@ -1621,9 +1648,13 @@ static int convert_write(ImgConvertState *s, int64_t sector_num, int nb_sectors, break; } - ret = blk_pwrite_compressed(s->target, - sector_num << BDRV_SECTOR_BITS, - buf, n << BDRV_SECTOR_BITS); + iov.iov_base = buf; + iov.iov_len = n << BDRV_SECTOR_BITS; + qemu_iovec_init_external(&qiov, &iov, 1); + + ret = blk_co_pwritev(s->target, sector_num << BDRV_SECTOR_BITS, + n << BDRV_SECTOR_BITS, &qiov, + BDRV_REQ_WRITE_COMPRESSED); if (ret < 0) { return ret; } @@ -1636,8 +1667,12 @@ static int convert_write(ImgConvertState *s, int64_t sector_num, int nb_sectors, if (!s->min_sparse || is_allocated_sectors_min(buf, n, &n, s->min_sparse)) { - ret = blk_pwrite(s->target, sector_num << BDRV_SECTOR_BITS, - buf, n << BDRV_SECTOR_BITS, 0); + iov.iov_base = buf; + iov.iov_len = n << BDRV_SECTOR_BITS; + qemu_iovec_init_external(&qiov, &iov, 1); + + ret = blk_co_pwritev(s->target, sector_num << BDRV_SECTOR_BITS, + n << BDRV_SECTOR_BITS, &qiov, 0); if (ret < 0) { return ret; } @@ -1649,8 +1684,9 @@ static int convert_write(ImgConvertState *s, int64_t sector_num, int nb_sectors, if (s->has_zero_init) { break; } - ret = blk_pwrite_zeroes(s->target, sector_num << BDRV_SECTOR_BITS, - n << BDRV_SECTOR_BITS, 0); + ret = blk_co_pwrite_zeroes(s->target, + sector_num << BDRV_SECTOR_BITS, + n << BDRV_SECTOR_BITS, 0); if (ret < 0) { return ret; } @@ -1665,12 +1701,122 @@ static int convert_write(ImgConvertState *s, int64_t sector_num, int nb_sectors, return 0; } +static void coroutine_fn convert_co_do_copy(void *opaque) +{ + ImgConvertState *s = opaque; + uint8_t *buf = NULL; + int ret, i; + int index = -1; + + for (i = 0; i < s->num_coroutines; i++) { + if (s->co[i] == qemu_coroutine_self()) { + index = i; + break; + } + } + assert(index >= 0); + + s->running_coroutines++; + buf = blk_blockalign(s->target, s->buf_sectors * BDRV_SECTOR_SIZE); + + while (1) { + int n; + int64_t sector_num; + enum ImgConvertBlockStatus status; + + qemu_co_mutex_lock(&s->lock); + if (s->ret != -EINPROGRESS || s->sector_num >= s->total_sectors) { + qemu_co_mutex_unlock(&s->lock); + goto out; + } + n = convert_iteration_sectors(s, s->sector_num); + if (n < 0) { + qemu_co_mutex_unlock(&s->lock); + s->ret = n; + goto out; + } + /* save current sector and allocation status to local variables */ + sector_num = s->sector_num; + status = s->status; + if (!s->min_sparse && s->status == BLK_ZERO) { + n = MIN(n, s->buf_sectors); + } + /* increment global sector counter so that other coroutines can + * already continue reading beyond this request */ + s->sector_num += n; + qemu_co_mutex_unlock(&s->lock); + + if (status == BLK_DATA || (!s->min_sparse && status == BLK_ZERO)) { + s->allocated_done += n; + qemu_progress_print(100.0 * s->allocated_done / + s->allocated_sectors, 0); + } + + if (status == BLK_DATA) { + ret = convert_co_read(s, sector_num, n, buf); + if (ret < 0) { + error_report("error while reading sector %" PRId64 + ": %s", sector_num, strerror(-ret)); + s->ret = ret; + goto out; + } + } else if (!s->min_sparse && status == BLK_ZERO) { + status = BLK_DATA; + memset(buf, 0x00, n * BDRV_SECTOR_SIZE); + } + + if (s->wr_in_order) { + /* keep writes in order */ + while (s->wr_offs != sector_num) { + if (s->ret != -EINPROGRESS) { + goto out; + } + s->wait_sector_num[index] = sector_num; + qemu_coroutine_yield(); + } + s->wait_sector_num[index] = -1; + } + + ret = convert_co_write(s, sector_num, n, buf, status); + if (ret < 0) { + error_report("error while writing sector %" PRId64 + ": %s", sector_num, strerror(-ret)); + s->ret = ret; + goto out; + } + + if (s->wr_in_order) { + /* reenter the coroutine that might have waited + * for this write to complete */ + s->wr_offs = sector_num + n; + for (i = 0; i < s->num_coroutines; i++) { + if (s->co[i] && s->wait_sector_num[i] == s->wr_offs) { + /* + * A -> B -> A cannot occur because A has + * s->wait_sector_num[i] == -1 during A -> B. Therefore + * B will never enter A during this time window. + */ + qemu_coroutine_enter(s->co[i]); + break; + } + } + } + } + +out: + qemu_vfree(buf); + s->co[index] = NULL; + s->running_coroutines--; + if (!s->running_coroutines && s->ret == -EINPROGRESS) { + /* the convert job finished successfully */ + s->ret = 0; + } +} + static int convert_do_copy(ImgConvertState *s) { - uint8_t *buf = NULL; - int64_t sector_num, allocated_done; - int ret; - int n; + int ret, i, n; + int64_t sector_num = 0; /* Check whether we have zero initialisation or can get it efficiently */ s->has_zero_init = s->min_sparse && !s->target_has_backing @@ -1691,21 +1837,15 @@ static int convert_do_copy(ImgConvertState *s) if (s->compressed) { if (s->cluster_sectors <= 0 || s->cluster_sectors > s->buf_sectors) { error_report("invalid cluster size"); - ret = -EINVAL; - goto fail; + return -EINVAL; } s->buf_sectors = s->cluster_sectors; } - buf = blk_blockalign(s->target, s->buf_sectors * BDRV_SECTOR_SIZE); - /* Calculate allocated sectors for progress */ - s->allocated_sectors = 0; - sector_num = 0; while (sector_num < s->total_sectors) { n = convert_iteration_sectors(s, sector_num); if (n < 0) { - ret = n; - goto fail; + return n; } if (s->status == BLK_DATA || (!s->min_sparse && s->status == BLK_ZERO)) { @@ -1715,61 +1855,29 @@ static int convert_do_copy(ImgConvertState *s) } /* Do the copy */ - s->src_cur = 0; - s->src_cur_offset = 0; s->sector_next_status = 0; + s->ret = -EINPROGRESS; - sector_num = 0; - allocated_done = 0; - - while (sector_num < s->total_sectors) { - n = convert_iteration_sectors(s, sector_num); - if (n < 0) { - ret = n; - goto fail; - } - if (s->status == BLK_DATA || (!s->min_sparse && s->status == BLK_ZERO)) - { - allocated_done += n; - qemu_progress_print(100.0 * allocated_done / s->allocated_sectors, - 0); - } - - if (s->status == BLK_DATA) { - ret = convert_read(s, sector_num, n, buf); - if (ret < 0) { - error_report("error while reading sector %" PRId64 - ": %s", sector_num, strerror(-ret)); - goto fail; - } - } else if (!s->min_sparse && s->status == BLK_ZERO) { - n = MIN(n, s->buf_sectors); - memset(buf, 0, n * BDRV_SECTOR_SIZE); - s->status = BLK_DATA; - } - - ret = convert_write(s, sector_num, n, buf); - if (ret < 0) { - error_report("error while writing sector %" PRId64 - ": %s", sector_num, strerror(-ret)); - goto fail; - } - - sector_num += n; + qemu_co_mutex_init(&s->lock); + for (i = 0; i < s->num_coroutines; i++) { + s->co[i] = qemu_coroutine_create(convert_co_do_copy, s); + s->wait_sector_num[i] = -1; + qemu_coroutine_enter(s->co[i]); } - if (s->compressed) { + while (s->ret == -EINPROGRESS) { + main_loop_wait(false); + } + + if (s->compressed && !s->ret) { /* signal EOF to align */ ret = blk_pwrite_compressed(s->target, 0, NULL, 0); if (ret < 0) { - goto fail; + return ret; } } - ret = 0; -fail: - qemu_vfree(buf); - return ret; + return s->ret; } static int img_convert(int argc, char **argv) @@ -1797,6 +1905,8 @@ static int img_convert(int argc, char **argv) QemuOpts *sn_opts = NULL; ImgConvertState state; bool image_opts = false; + bool wr_in_order = true; + long num_coroutines = 8; fmt = NULL; out_fmt = "raw"; @@ -1812,7 +1922,7 @@ static int img_convert(int argc, char **argv) {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS}, {0, 0, 0, 0} }; - c = getopt_long(argc, argv, "hf:O:B:ce6o:s:l:S:pt:T:qn", + c = getopt_long(argc, argv, "hf:O:B:ce6o:s:l:S:pt:T:qnm:W", long_options, NULL); if (c == -1) { break; @@ -1904,6 +2014,18 @@ static int img_convert(int argc, char **argv) case 'n': skip_create = 1; break; + case 'm': + if (qemu_strtol(optarg, NULL, 0, &num_coroutines) || + num_coroutines < 1 || num_coroutines > MAX_COROUTINES) { + error_report("Invalid number of coroutines. Allowed number of" + " coroutines is between 1 and %d", MAX_COROUTINES); + ret = -1; + goto fail_getopt; + } + break; + case 'W': + wr_in_order = false; + break; case OPTION_OBJECT: opts = qemu_opts_parse_noisily(&qemu_object_opts, optarg, true); @@ -1923,6 +2045,12 @@ static int img_convert(int argc, char **argv) goto fail_getopt; } + if (!wr_in_order && compress) { + error_report("Out of order write and compress are mutually exclusive"); + ret = -1; + goto fail_getopt; + } + /* Initialize before goto out */ if (quiet) { progress = 0; @@ -2163,6 +2291,8 @@ static int img_convert(int argc, char **argv) .min_sparse = min_sparse, .cluster_sectors = cluster_sectors, .buf_sectors = bufsectors, + .wr_in_order = wr_in_order, + .num_coroutines = num_coroutines, }; ret = convert_do_copy(&state); diff --git a/qemu-img.texi b/qemu-img.texi index 174aae38b7..c81db3e81c 100644 --- a/qemu-img.texi +++ b/qemu-img.texi @@ -137,6 +137,12 @@ Parameters to convert subcommand: @item -n Skip the creation of the target volume +@item -m +Number of parallel coroutines for the convert process +@item -W +Allow out-of-order writes to the destination. This option improves performance, +but is only recommended for preallocated devices like host devices or other +raw block devices. @end table Parameters to dd subcommand: @@ -296,7 +302,7 @@ Error on reading data @end table -@item convert [-c] [-p] [-n] [-f @var{fmt}] [-t @var{cache}] [-T @var{src_cache}] [-O @var{output_fmt}] [-o @var{options}] [-s @var{snapshot_id_or_name}] [-l @var{snapshot_param}] [-S @var{sparse_size}] @var{filename} [@var{filename2} [...]] @var{output_filename} +@item convert [-c] [-p] [-n] [-f @var{fmt}] [-t @var{cache}] [-T @var{src_cache}] [-O @var{output_fmt}] [-o @var{options}] [-s @var{snapshot_id_or_name}] [-l @var{snapshot_param}] [-m @var{num_coroutines}] [-W] [-S @var{sparse_size}] @var{filename} [@var{filename2} [...]] @var{output_filename} Convert the disk image @var{filename} or a snapshot @var{snapshot_param}(@var{snapshot_id_or_name} is deprecated) to disk image @var{output_filename} using format @var{output_fmt}. It can be optionally compressed (@code{-c} @@ -326,6 +332,14 @@ skipped. This is useful for formats such as @code{rbd} if the target volume has already been created with site specific options that cannot be supplied through qemu-img. +Out of order writes can be enabled with @code{-W} to improve performance. +This is only recommended for preallocated devices like host devices or other +raw block devices. Out of order write does not work in combination with +creating compressed images. + +@var{num_coroutines} specifies how many coroutines work in parallel during +the convert process (defaults to 8). + @item dd [-f @var{fmt}] [-O @var{output_fmt}] [bs=@var{block_size}] [count=@var{blocks}] [skip=@var{blocks}] if=@var{input} of=@var{output} Dd copies from @var{input} file to @var{output} file converting it from From 9e19ad4e49c8dc7f776bf770f52ad6ea1ec28edc Mon Sep 17 00:00:00 2001 From: Markus Armbruster Date: Mon, 27 Feb 2017 13:55:43 +0100 Subject: [PATCH 02/46] option: Tweak invalid size error message and unbreak iotest 049 Commit 75cdcd1 neglected to update tests/qemu-iotests/049.out, and made the error message for negative size worse. Fix that. Reported-by: Thomas Huth Signed-off-by: Markus Armbruster Reviewed-by: Eric Blake Reviewed-by: Thomas Huth Tested-by: Christian Borntraeger Signed-off-by: Kevin Wolf --- tests/qemu-iotests/049.out | 14 +++++++++----- util/qemu-option.c | 2 +- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/tests/qemu-iotests/049.out b/tests/qemu-iotests/049.out index 4673b67f37..34e66db691 100644 --- a/tests/qemu-iotests/049.out +++ b/tests/qemu-iotests/049.out @@ -95,14 +95,14 @@ qemu-img create -f qcow2 TEST_DIR/t.qcow2 -- -1024 qemu-img: Image size must be less than 8 EiB! qemu-img create -f qcow2 -o size=-1024 TEST_DIR/t.qcow2 -qemu-img: Parameter 'size' expects a non-negative number below 2^64 +qemu-img: Value '-1024' is out of range for parameter 'size' qemu-img: TEST_DIR/t.qcow2: Invalid options for file format 'qcow2' qemu-img create -f qcow2 TEST_DIR/t.qcow2 -- -1k qemu-img: Image size must be less than 8 EiB! qemu-img create -f qcow2 -o size=-1k TEST_DIR/t.qcow2 -qemu-img: Parameter 'size' expects a non-negative number below 2^64 +qemu-img: Value '-1k' is out of range for parameter 'size' qemu-img: TEST_DIR/t.qcow2: Invalid options for file format 'qcow2' qemu-img create -f qcow2 TEST_DIR/t.qcow2 -- 1kilobyte @@ -110,15 +110,19 @@ qemu-img: Invalid image size specified! You may use k, M, G, T, P or E suffixes qemu-img: kilobytes, megabytes, gigabytes, terabytes, petabytes and exabytes. qemu-img create -f qcow2 -o size=1kilobyte TEST_DIR/t.qcow2 -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=1024 encryption=off cluster_size=65536 lazy_refcounts=off refcount_bits=16 +qemu-img: Parameter 'size' expects a non-negative number below 2^64 +Optional suffix k, M, G, T, P or E means kilo-, mega-, giga-, tera-, peta- +and exabytes, respectively. +qemu-img: TEST_DIR/t.qcow2: Invalid options for file format 'qcow2' qemu-img create -f qcow2 TEST_DIR/t.qcow2 -- foobar qemu-img: Invalid image size specified! You may use k, M, G, T, P or E suffixes for qemu-img: kilobytes, megabytes, gigabytes, terabytes, petabytes and exabytes. qemu-img create -f qcow2 -o size=foobar TEST_DIR/t.qcow2 -qemu-img: Parameter 'size' expects a size -You may use k, M, G or T suffixes for kilobytes, megabytes, gigabytes and terabytes. +qemu-img: Parameter 'size' expects a non-negative number below 2^64 +Optional suffix k, M, G, T, P or E means kilo-, mega-, giga-, tera-, peta- +and exabytes, respectively. qemu-img: TEST_DIR/t.qcow2: Invalid options for file format 'qcow2' == Check correct interpretation of suffixes for cluster size == diff --git a/util/qemu-option.c b/util/qemu-option.c index 419f2528b8..5ce1b5c246 100644 --- a/util/qemu-option.c +++ b/util/qemu-option.c @@ -179,7 +179,7 @@ void parse_option_size(const char *name, const char *value, err = qemu_strtosz(value, NULL, &size); if (err == -ERANGE) { - error_setg(errp, "Value '%s' is too large for parameter '%s'", + error_setg(errp, "Value '%s' is out of range for parameter '%s'", value, name); return; } From 7006c9a76119a863e5812de572841618b9540ac2 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Tue, 20 Dec 2016 16:52:41 +0100 Subject: [PATCH 03/46] block: Add op blocker permission constants This patch defines the permission categories that will be used by the new op blocker system. Signed-off-by: Kevin Wolf Reviewed-by: Max Reitz Acked-by: Fam Zheng --- include/block/block.h | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/include/block/block.h b/include/block/block.h index bde5ebda18..f62f38e6a4 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -187,6 +187,42 @@ typedef enum BlockOpType { BLOCK_OP_TYPE_MAX, } BlockOpType; +/* Block node permission constants */ +enum { + /** + * A user that has the "permission" of consistent reads is guaranteed that + * their view of the contents of the block device is complete and + * self-consistent, representing the contents of a disk at a specific + * point. + * + * For most block devices (including their backing files) this is true, but + * the property cannot be maintained in a few situations like for + * intermediate nodes of a commit block job. + */ + BLK_PERM_CONSISTENT_READ = 0x01, + + /** This permission is required to change the visible disk contents. */ + BLK_PERM_WRITE = 0x02, + + /** + * This permission (which is weaker than BLK_PERM_WRITE) is both enough and + * required for writes to the block node when the caller promises that + * the visible disk content doesn't change. + */ + BLK_PERM_WRITE_UNCHANGED = 0x04, + + /** This permission is required to change the size of a block node. */ + BLK_PERM_RESIZE = 0x08, + + /** + * This permission is required to change the node that this BdrvChild + * points to. + */ + BLK_PERM_GRAPH_MOD = 0x10, + + BLK_PERM_ALL = 0x1f, +}; + /* disk I/O throttling */ void bdrv_init(void); void bdrv_init_with_whitelist(void); From 8b2ff5291f9e39fb1c0c6c0c4321daac60aab4db Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Tue, 20 Dec 2016 22:21:17 +0100 Subject: [PATCH 04/46] block: Add Error argument to bdrv_attach_child() It will have to return an error soon, so prepare the callers for it. Signed-off-by: Kevin Wolf Reviewed-by: Max Reitz Acked-by: Fam Zheng --- block.c | 16 +++++++++++++--- block/quorum.c | 9 ++++++++- include/block/block.h | 3 ++- 3 files changed, 23 insertions(+), 5 deletions(-) diff --git a/block.c b/block.c index b663204f3f..65240facf6 100644 --- a/block.c +++ b/block.c @@ -1368,7 +1368,8 @@ BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs, BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs, BlockDriverState *child_bs, const char *child_name, - const BdrvChildRole *child_role) + const BdrvChildRole *child_role, + Error **errp) { BdrvChild *child = bdrv_root_attach_child(child_bs, child_name, child_role, parent_bs); @@ -1469,7 +1470,9 @@ void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd) bs->backing = NULL; goto out; } - bs->backing = bdrv_attach_child(bs, backing_hd, "backing", &child_backing); + /* FIXME Error handling */ + bs->backing = bdrv_attach_child(bs, backing_hd, "backing", &child_backing, + &error_abort); bs->open_flags &= ~BDRV_O_NO_BACKING; pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_hd->filename); pstrcpy(bs->backing_format, sizeof(bs->backing_format), @@ -1648,6 +1651,7 @@ BdrvChild *bdrv_open_child(const char *filename, const BdrvChildRole *child_role, bool allow_none, Error **errp) { + BdrvChild *c; BlockDriverState *bs; bs = bdrv_open_child_bs(filename, options, bdref_key, parent, child_role, @@ -1656,7 +1660,13 @@ BdrvChild *bdrv_open_child(const char *filename, return NULL; } - return bdrv_attach_child(parent, bs, bdref_key, child_role); + c = bdrv_attach_child(parent, bs, bdref_key, child_role, errp); + if (!c) { + bdrv_unref(bs); + return NULL; + } + + return c; } static BlockDriverState *bdrv_append_temp_snapshot(BlockDriverState *bs, diff --git a/block/quorum.c b/block/quorum.c index 86e2072dce..bdbcec6e00 100644 --- a/block/quorum.c +++ b/block/quorum.c @@ -1032,10 +1032,17 @@ static void quorum_add_child(BlockDriverState *bs, BlockDriverState *child_bs, /* We can safely add the child now */ bdrv_ref(child_bs); - child = bdrv_attach_child(bs, child_bs, indexstr, &child_format); + + child = bdrv_attach_child(bs, child_bs, indexstr, &child_format, errp); + if (child == NULL) { + s->next_child_index--; + bdrv_unref(child_bs); + goto out; + } s->children = g_renew(BdrvChild *, s->children, s->num_children + 1); s->children[s->num_children++] = child; +out: bdrv_drained_end(bs); } diff --git a/include/block/block.h b/include/block/block.h index f62f38e6a4..ff951ea772 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -520,7 +520,8 @@ void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child); BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs, BlockDriverState *child_bs, const char *child_name, - const BdrvChildRole *child_role); + const BdrvChildRole *child_role, + Error **errp); bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp); void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason); From d5e6f437c5508614803d11e59ee16a758dde09ef Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Wed, 14 Dec 2016 17:24:36 +0100 Subject: [PATCH 05/46] block: Let callers request permissions when attaching a child node When attaching a node as a child to a new parent, the required and shared permissions for this parent are checked against all other parents of the node now, and an error is returned if there is a conflict. This allows error returns to a function that previously always succeeded, and the same is true for quite a few callers and their callers. Converting all of them within the same patch would be too much, so for now everyone tells that they don't need any permissions and allow everyone else to do anything. This way we can use &error_abort initially and convert caller by caller to pass actual permission requirements and implement error handling. All these places are marked with FIXME comments and it will be the job of the next patches to clean them up again. Signed-off-by: Kevin Wolf Reviewed-by: Max Reitz Acked-by: Fam Zheng --- block.c | 66 ++++++++++++++++++++++++++++++++++----- block/block-backend.c | 8 +++-- include/block/block_int.h | 15 ++++++++- 3 files changed, 78 insertions(+), 11 deletions(-) diff --git a/block.c b/block.c index 65240facf6..9628c7a3a9 100644 --- a/block.c +++ b/block.c @@ -1326,6 +1326,38 @@ static int bdrv_fill_options(QDict **options, const char *filename, return 0; } +static int bdrv_check_update_perm(BlockDriverState *bs, uint64_t new_used_perm, + uint64_t new_shared_perm, + BdrvChild *ignore_child, Error **errp) +{ + BdrvChild *c; + + /* There is no reason why anyone couldn't tolerate write_unchanged */ + assert(new_shared_perm & BLK_PERM_WRITE_UNCHANGED); + + QLIST_FOREACH(c, &bs->parents, next_parent) { + if (c == ignore_child) { + continue; + } + + if ((new_used_perm & c->shared_perm) != new_used_perm || + (c->perm & new_shared_perm) != c->perm) + { + const char *user = NULL; + if (c->role->get_name) { + user = c->role->get_name(c); + if (user && !*user) { + user = NULL; + } + } + error_setg(errp, "Conflicts with %s", user ?: "another operation"); + return -EPERM; + } + } + + return 0; +} + static void bdrv_replace_child(BdrvChild *child, BlockDriverState *new_bs) { BlockDriverState *old_bs = child->bs; @@ -1350,14 +1382,25 @@ static void bdrv_replace_child(BdrvChild *child, BlockDriverState *new_bs) BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs, const char *child_name, const BdrvChildRole *child_role, - void *opaque) + uint64_t perm, uint64_t shared_perm, + void *opaque, Error **errp) { - BdrvChild *child = g_new(BdrvChild, 1); + BdrvChild *child; + int ret; + + ret = bdrv_check_update_perm(child_bs, perm, shared_perm, NULL, errp); + if (ret < 0) { + return NULL; + } + + child = g_new(BdrvChild, 1); *child = (BdrvChild) { - .bs = NULL, - .name = g_strdup(child_name), - .role = child_role, - .opaque = opaque, + .bs = NULL, + .name = g_strdup(child_name), + .role = child_role, + .perm = perm, + .shared_perm = shared_perm, + .opaque = opaque, }; bdrv_replace_child(child, child_bs); @@ -1371,8 +1414,15 @@ BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs, const BdrvChildRole *child_role, Error **errp) { - BdrvChild *child = bdrv_root_attach_child(child_bs, child_name, child_role, - parent_bs); + BdrvChild *child; + + /* FIXME Use real permissions */ + child = bdrv_root_attach_child(child_bs, child_name, child_role, + 0, BLK_PERM_ALL, parent_bs, errp); + if (child == NULL) { + return NULL; + } + QLIST_INSERT_HEAD(&parent_bs->children, child, next); return child; } diff --git a/block/block-backend.c b/block/block-backend.c index 492e71e41f..9bb45285ef 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -163,7 +163,9 @@ BlockBackend *blk_new_open(const char *filename, const char *reference, return NULL; } - blk->root = bdrv_root_attach_child(bs, "root", &child_root, blk); + /* FIXME Use real permissions */ + blk->root = bdrv_root_attach_child(bs, "root", &child_root, + 0, BLK_PERM_ALL, blk, &error_abort); return blk; } @@ -498,7 +500,9 @@ void blk_remove_bs(BlockBackend *blk) void blk_insert_bs(BlockBackend *blk, BlockDriverState *bs) { bdrv_ref(bs); - blk->root = bdrv_root_attach_child(bs, "root", &child_root, blk); + /* FIXME Use real permissions */ + blk->root = bdrv_root_attach_child(bs, "root", &child_root, + 0, BLK_PERM_ALL, blk, &error_abort); notifier_list_notify(&blk->insert_bs_notifiers, blk); if (blk->public.throttle_state) { diff --git a/include/block/block_int.h b/include/block/block_int.h index 1670941da9..ed63badcfb 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -419,6 +419,18 @@ struct BdrvChild { char *name; const BdrvChildRole *role; void *opaque; + + /** + * Granted permissions for operating on this BdrvChild (BLK_PERM_* bitmask) + */ + uint64_t perm; + + /** + * Permissions that can still be granted to other users of @bs while this + * BdrvChild is still attached to it. (BLK_PERM_* bitmask) + */ + uint64_t shared_perm; + QLIST_ENTRY(BdrvChild) next; QLIST_ENTRY(BdrvChild) next_parent; }; @@ -796,7 +808,8 @@ void hmp_drive_add_node(Monitor *mon, const char *optstr); BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs, const char *child_name, const BdrvChildRole *child_role, - void *opaque); + uint64_t perm, uint64_t shared_perm, + void *opaque, Error **errp); void bdrv_root_unref_child(BdrvChild *child); const char *bdrv_get_parent_name(const BlockDriverState *bs); From 33a610c398603efafd954c706ba07850835a5098 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Thu, 15 Dec 2016 13:04:20 +0100 Subject: [PATCH 06/46] block: Involve block drivers in permission granting In many cases, the required permissions of one node on its children depend on what its parents require from it. For example, the raw format or most filter drivers only need to request consistent reads if that's something that one of their parents wants. In order to achieve this, this patch introduces two new BlockDriver callbacks. The first one lets drivers first check (recursively) whether the requested permissions can be set; the second one actually sets the new permission bitmask. Also add helper functions that drivers can use in their implementation of the callbacks to update their permissions on a specific child. Signed-off-by: Kevin Wolf Acked-by: Fam Zheng Reviewed-by: Max Reitz --- block.c | 206 +++++++++++++++++++++++++++++++++++++- include/block/block_int.h | 61 +++++++++++ 2 files changed, 263 insertions(+), 4 deletions(-) diff --git a/block.c b/block.c index 9628c7a3a9..cf3534fd5f 100644 --- a/block.c +++ b/block.c @@ -1326,11 +1326,146 @@ static int bdrv_fill_options(QDict **options, const char *filename, return 0; } +/* + * Check whether permissions on this node can be changed in a way that + * @cumulative_perms and @cumulative_shared_perms are the new cumulative + * permissions of all its parents. This involves checking whether all necessary + * permission changes to child nodes can be performed. + * + * A call to this function must always be followed by a call to bdrv_set_perm() + * or bdrv_abort_perm_update(). + */ +static int bdrv_check_perm(BlockDriverState *bs, uint64_t cumulative_perms, + uint64_t cumulative_shared_perms, Error **errp) +{ + BlockDriver *drv = bs->drv; + BdrvChild *c; + int ret; + + /* Write permissions never work with read-only images */ + if ((cumulative_perms & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED)) && + bdrv_is_read_only(bs)) + { + error_setg(errp, "Block node is read-only"); + return -EPERM; + } + + /* Check this node */ + if (!drv) { + return 0; + } + + if (drv->bdrv_check_perm) { + return drv->bdrv_check_perm(bs, cumulative_perms, + cumulative_shared_perms, errp); + } + + /* Drivers may not have .bdrv_child_perm() */ + if (!drv->bdrv_child_perm) { + return 0; + } + + /* Check all children */ + QLIST_FOREACH(c, &bs->children, next) { + uint64_t cur_perm, cur_shared; + drv->bdrv_child_perm(bs, c, c->role, + cumulative_perms, cumulative_shared_perms, + &cur_perm, &cur_shared); + ret = bdrv_child_check_perm(c, cur_perm, cur_shared, errp); + if (ret < 0) { + return ret; + } + } + + return 0; +} + +/* + * Notifies drivers that after a previous bdrv_check_perm() call, the + * permission update is not performed and any preparations made for it (e.g. + * taken file locks) need to be undone. + * + * This function recursively notifies all child nodes. + */ +static void bdrv_abort_perm_update(BlockDriverState *bs) +{ + BlockDriver *drv = bs->drv; + BdrvChild *c; + + if (!drv) { + return; + } + + if (drv->bdrv_abort_perm_update) { + drv->bdrv_abort_perm_update(bs); + } + + QLIST_FOREACH(c, &bs->children, next) { + bdrv_child_abort_perm_update(c); + } +} + +static void bdrv_set_perm(BlockDriverState *bs, uint64_t cumulative_perms, + uint64_t cumulative_shared_perms) +{ + BlockDriver *drv = bs->drv; + BdrvChild *c; + + if (!drv) { + return; + } + + /* Update this node */ + if (drv->bdrv_set_perm) { + drv->bdrv_set_perm(bs, cumulative_perms, cumulative_shared_perms); + } + + /* Drivers may not have .bdrv_child_perm() */ + if (!drv->bdrv_child_perm) { + return; + } + + /* Update all children */ + QLIST_FOREACH(c, &bs->children, next) { + uint64_t cur_perm, cur_shared; + drv->bdrv_child_perm(bs, c, c->role, + cumulative_perms, cumulative_shared_perms, + &cur_perm, &cur_shared); + bdrv_child_set_perm(c, cur_perm, cur_shared); + } +} + +static void bdrv_get_cumulative_perm(BlockDriverState *bs, uint64_t *perm, + uint64_t *shared_perm) +{ + BdrvChild *c; + uint64_t cumulative_perms = 0; + uint64_t cumulative_shared_perms = BLK_PERM_ALL; + + QLIST_FOREACH(c, &bs->parents, next_parent) { + cumulative_perms |= c->perm; + cumulative_shared_perms &= c->shared_perm; + } + + *perm = cumulative_perms; + *shared_perm = cumulative_shared_perms; +} + +/* + * Checks whether a new reference to @bs can be added if the new user requires + * @new_used_perm/@new_shared_perm as its permissions. If @ignore_child is set, + * this old reference is ignored in the calculations; this allows checking + * permission updates for an existing reference. + * + * Needs to be followed by a call to either bdrv_set_perm() or + * bdrv_abort_perm_update(). */ static int bdrv_check_update_perm(BlockDriverState *bs, uint64_t new_used_perm, uint64_t new_shared_perm, BdrvChild *ignore_child, Error **errp) { BdrvChild *c; + uint64_t cumulative_perms = new_used_perm; + uint64_t cumulative_shared_perms = new_shared_perm; /* There is no reason why anyone couldn't tolerate write_unchanged */ assert(new_shared_perm & BLK_PERM_WRITE_UNCHANGED); @@ -1353,20 +1488,73 @@ static int bdrv_check_update_perm(BlockDriverState *bs, uint64_t new_used_perm, error_setg(errp, "Conflicts with %s", user ?: "another operation"); return -EPERM; } + + cumulative_perms |= c->perm; + cumulative_shared_perms &= c->shared_perm; } + return bdrv_check_perm(bs, cumulative_perms, cumulative_shared_perms, errp); +} + +/* Needs to be followed by a call to either bdrv_child_set_perm() or + * bdrv_child_abort_perm_update(). */ +int bdrv_child_check_perm(BdrvChild *c, uint64_t perm, uint64_t shared, + Error **errp) +{ + return bdrv_check_update_perm(c->bs, perm, shared, c, errp); +} + +void bdrv_child_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared) +{ + uint64_t cumulative_perms, cumulative_shared_perms; + + c->perm = perm; + c->shared_perm = shared; + + bdrv_get_cumulative_perm(c->bs, &cumulative_perms, + &cumulative_shared_perms); + bdrv_set_perm(c->bs, cumulative_perms, cumulative_shared_perms); +} + +void bdrv_child_abort_perm_update(BdrvChild *c) +{ + bdrv_abort_perm_update(c->bs); +} + +int bdrv_child_try_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared, + Error **errp) +{ + int ret; + + ret = bdrv_child_check_perm(c, perm, shared, errp); + if (ret < 0) { + bdrv_child_abort_perm_update(c); + return ret; + } + + bdrv_child_set_perm(c, perm, shared); + return 0; } -static void bdrv_replace_child(BdrvChild *child, BlockDriverState *new_bs) +static void bdrv_replace_child(BdrvChild *child, BlockDriverState *new_bs, + bool check_new_perm) { BlockDriverState *old_bs = child->bs; + uint64_t perm, shared_perm; if (old_bs) { if (old_bs->quiesce_counter && child->role->drained_end) { child->role->drained_end(child); } QLIST_REMOVE(child, next_parent); + + /* Update permissions for old node. This is guaranteed to succeed + * because we're just taking a parent away, so we're loosening + * restrictions. */ + bdrv_get_cumulative_perm(old_bs, &perm, &shared_perm); + bdrv_check_perm(old_bs, perm, shared_perm, &error_abort); + bdrv_set_perm(old_bs, perm, shared_perm); } child->bs = new_bs; @@ -1376,6 +1564,12 @@ static void bdrv_replace_child(BdrvChild *child, BlockDriverState *new_bs) if (new_bs->quiesce_counter && child->role->drained_begin) { child->role->drained_begin(child); } + + bdrv_get_cumulative_perm(new_bs, &perm, &shared_perm); + if (check_new_perm) { + bdrv_check_perm(new_bs, perm, shared_perm, &error_abort); + } + bdrv_set_perm(new_bs, perm, shared_perm); } } @@ -1390,6 +1584,7 @@ BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs, ret = bdrv_check_update_perm(child_bs, perm, shared_perm, NULL, errp); if (ret < 0) { + bdrv_abort_perm_update(child_bs); return NULL; } @@ -1403,7 +1598,8 @@ BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs, .opaque = opaque, }; - bdrv_replace_child(child, child_bs); + /* This performs the matching bdrv_set_perm() for the above check. */ + bdrv_replace_child(child, child_bs, false); return child; } @@ -1434,7 +1630,7 @@ static void bdrv_detach_child(BdrvChild *child) child->next.le_prev = NULL; } - bdrv_replace_child(child, NULL); + bdrv_replace_child(child, NULL, false); g_free(child->name); g_free(child); @@ -2541,7 +2737,9 @@ static void change_parent_backing_link(BlockDriverState *from, assert(c->role != &child_backing); bdrv_ref(to); - bdrv_replace_child(c, to); + /* FIXME Are we sure that bdrv_replace_child() can't run into + * &error_abort because of permissions? */ + bdrv_replace_child(c, to, true); bdrv_unref(from); } } diff --git a/include/block/block_int.h b/include/block/block_int.h index ed63badcfb..cef2b6e0bc 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -320,6 +320,59 @@ struct BlockDriver { void (*bdrv_del_child)(BlockDriverState *parent, BdrvChild *child, Error **errp); + /** + * Informs the block driver that a permission change is intended. The + * driver checks whether the change is permissible and may take other + * preparations for the change (e.g. get file system locks). This operation + * is always followed either by a call to either .bdrv_set_perm or + * .bdrv_abort_perm_update. + * + * Checks whether the requested set of cumulative permissions in @perm + * can be granted for accessing @bs and whether no other users are using + * permissions other than those given in @shared (both arguments take + * BLK_PERM_* bitmasks). + * + * If both conditions are met, 0 is returned. Otherwise, -errno is returned + * and errp is set to an error describing the conflict. + */ + int (*bdrv_check_perm)(BlockDriverState *bs, uint64_t perm, + uint64_t shared, Error **errp); + + /** + * Called to inform the driver that the set of cumulative set of used + * permissions for @bs has changed to @perm, and the set of sharable + * permission to @shared. The driver can use this to propagate changes to + * its children (i.e. request permissions only if a parent actually needs + * them). + * + * This function is only invoked after bdrv_check_perm(), so block drivers + * may rely on preparations made in their .bdrv_check_perm implementation. + */ + void (*bdrv_set_perm)(BlockDriverState *bs, uint64_t perm, uint64_t shared); + + /* + * Called to inform the driver that after a previous bdrv_check_perm() + * call, the permission update is not performed and any preparations made + * for it (e.g. taken file locks) need to be undone. + * + * This function can be called even for nodes that never saw a + * bdrv_check_perm() call. It is a no-op then. + */ + void (*bdrv_abort_perm_update)(BlockDriverState *bs); + + /** + * Returns in @nperm and @nshared the permissions that the driver for @bs + * needs on its child @c, based on the cumulative permissions requested by + * the parents in @parent_perm and @parent_shared. + * + * If @c is NULL, return the permissions for attaching a new child for the + * given @role. + */ + void (*bdrv_child_perm)(BlockDriverState *bs, BdrvChild *c, + const BdrvChildRole *role, + uint64_t parent_perm, uint64_t parent_shared, + uint64_t *nperm, uint64_t *nshared); + QLIST_ENTRY(BlockDriver) list; }; @@ -812,6 +865,14 @@ BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs, void *opaque, Error **errp); void bdrv_root_unref_child(BdrvChild *child); +int bdrv_child_check_perm(BdrvChild *c, uint64_t perm, uint64_t shared, + Error **errp); +void bdrv_child_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared); +void bdrv_child_abort_perm_update(BdrvChild *c); +int bdrv_child_try_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared, + Error **errp); + + const char *bdrv_get_parent_name(const BlockDriverState *bs); void blk_dev_change_media_cb(BlockBackend *blk, bool load); bool blk_dev_has_removable_media(BlockBackend *blk); From 6a1b9ee152101868082dbf24ccb683af0431c85b Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Thu, 15 Dec 2016 11:27:32 +0100 Subject: [PATCH 07/46] block: Default .bdrv_child_perm() for filter drivers Most filters need permissions related to read and write for their children, but only if the node has a parent that wants to use the same operation on the filter. The same is true for resize. This adds a default implementation that simply forwards all necessary permissions to all children of the node and leaves the other permissions unchanged. Signed-off-by: Kevin Wolf Acked-by: Fam Zheng Reviewed-by: Max Reitz --- block.c | 23 +++++++++++++++++++++++ include/block/block_int.h | 8 ++++++++ 2 files changed, 31 insertions(+) diff --git a/block.c b/block.c index cf3534fd5f..064e9d77fa 100644 --- a/block.c +++ b/block.c @@ -1537,6 +1537,29 @@ int bdrv_child_try_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared, return 0; } +#define DEFAULT_PERM_PASSTHROUGH (BLK_PERM_CONSISTENT_READ \ + | BLK_PERM_WRITE \ + | BLK_PERM_WRITE_UNCHANGED \ + | BLK_PERM_RESIZE) +#define DEFAULT_PERM_UNCHANGED (BLK_PERM_ALL & ~DEFAULT_PERM_PASSTHROUGH) + +void bdrv_filter_default_perms(BlockDriverState *bs, BdrvChild *c, + const BdrvChildRole *role, + uint64_t perm, uint64_t shared, + uint64_t *nperm, uint64_t *nshared) +{ + if (c == NULL) { + *nperm = perm & DEFAULT_PERM_PASSTHROUGH; + *nshared = (shared & DEFAULT_PERM_PASSTHROUGH) | DEFAULT_PERM_UNCHANGED; + return; + } + + *nperm = (perm & DEFAULT_PERM_PASSTHROUGH) | + (c->perm & DEFAULT_PERM_UNCHANGED); + *nshared = (shared & DEFAULT_PERM_PASSTHROUGH) | + (c->shared_perm & DEFAULT_PERM_UNCHANGED); +} + static void bdrv_replace_child(BdrvChild *child, BlockDriverState *new_bs, bool check_new_perm) { diff --git a/include/block/block_int.h b/include/block/block_int.h index cef2b6e0bc..17f4c2d6cd 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -872,6 +872,14 @@ void bdrv_child_abort_perm_update(BdrvChild *c); int bdrv_child_try_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared, Error **errp); +/* Default implementation for BlockDriver.bdrv_child_perm() that can be used by + * block filters: Forward CONSISTENT_READ, WRITE, WRITE_UNCHANGED and RESIZE to + * all children */ +void bdrv_filter_default_perms(BlockDriverState *bs, BdrvChild *c, + const BdrvChildRole *role, + uint64_t perm, uint64_t shared, + uint64_t *nperm, uint64_t *nshared); + const char *bdrv_get_parent_name(const BlockDriverState *bs); void blk_dev_change_media_cb(BlockBackend *blk, bool load); From d7010dfb685506cc4addd102627205f85fcd0fe7 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Thu, 15 Dec 2016 12:28:58 +0100 Subject: [PATCH 08/46] block: Request child permissions in filter drivers All callers will have to request permissions for all of their child nodes. Block drivers that act as simply filters can use the default implementation of .bdrv_child_perm(). Signed-off-by: Kevin Wolf Reviewed-by: Max Reitz Acked-by: Fam Zheng --- block/blkdebug.c | 2 ++ block/blkreplay.c | 1 + block/blkverify.c | 1 + block/quorum.c | 2 ++ block/raw-format.c | 1 + block/replication.c | 1 + 6 files changed, 8 insertions(+) diff --git a/block/blkdebug.c b/block/blkdebug.c index 6117ce5fca..67e8024e36 100644 --- a/block/blkdebug.c +++ b/block/blkdebug.c @@ -734,6 +734,8 @@ static BlockDriver bdrv_blkdebug = { .bdrv_file_open = blkdebug_open, .bdrv_close = blkdebug_close, .bdrv_reopen_prepare = blkdebug_reopen_prepare, + .bdrv_child_perm = bdrv_filter_default_perms, + .bdrv_getlength = blkdebug_getlength, .bdrv_truncate = blkdebug_truncate, .bdrv_refresh_filename = blkdebug_refresh_filename, diff --git a/block/blkreplay.c b/block/blkreplay.c index cfc8c5be02..e1102119fb 100755 --- a/block/blkreplay.c +++ b/block/blkreplay.c @@ -137,6 +137,7 @@ static BlockDriver bdrv_blkreplay = { .bdrv_file_open = blkreplay_open, .bdrv_close = blkreplay_close, + .bdrv_child_perm = bdrv_filter_default_perms, .bdrv_getlength = blkreplay_getlength, .bdrv_co_preadv = blkreplay_co_preadv, diff --git a/block/blkverify.c b/block/blkverify.c index 43a940c2f5..9a1e21c6ad 100644 --- a/block/blkverify.c +++ b/block/blkverify.c @@ -320,6 +320,7 @@ static BlockDriver bdrv_blkverify = { .bdrv_parse_filename = blkverify_parse_filename, .bdrv_file_open = blkverify_open, .bdrv_close = blkverify_close, + .bdrv_child_perm = bdrv_filter_default_perms, .bdrv_getlength = blkverify_getlength, .bdrv_refresh_filename = blkverify_refresh_filename, diff --git a/block/quorum.c b/block/quorum.c index bdbcec6e00..40205fb1b3 100644 --- a/block/quorum.c +++ b/block/quorum.c @@ -1133,6 +1133,8 @@ static BlockDriver bdrv_quorum = { .bdrv_add_child = quorum_add_child, .bdrv_del_child = quorum_del_child, + .bdrv_child_perm = bdrv_filter_default_perms, + .is_filter = true, .bdrv_recurse_is_first_non_filter = quorum_recurse_is_first_non_filter, }; diff --git a/block/raw-format.c b/block/raw-format.c index ce34d1b1cd..86fbc657eb 100644 --- a/block/raw-format.c +++ b/block/raw-format.c @@ -467,6 +467,7 @@ BlockDriver bdrv_raw = { .bdrv_reopen_abort = &raw_reopen_abort, .bdrv_open = &raw_open, .bdrv_close = &raw_close, + .bdrv_child_perm = bdrv_filter_default_perms, .bdrv_create = &raw_create, .bdrv_co_preadv = &raw_co_preadv, .bdrv_co_pwritev = &raw_co_pwritev, diff --git a/block/replication.c b/block/replication.c index eff85c77ba..91465cbae9 100644 --- a/block/replication.c +++ b/block/replication.c @@ -660,6 +660,7 @@ BlockDriver bdrv_replication = { .bdrv_open = replication_open, .bdrv_close = replication_close, + .bdrv_child_perm = bdrv_filter_default_perms, .bdrv_getlength = replication_getlength, .bdrv_co_readv = replication_co_readv, From 6b1a044afb363f03b51c9d0218bef44a34ea98c3 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Mon, 19 Dec 2016 15:21:48 +0100 Subject: [PATCH 09/46] block: Default .bdrv_child_perm() for format drivers Almost all format drivers have the same characteristics as far as permissions are concerned: They have one or more children for storing their own data and, more importantly, metadata (can be written to and grow even without external write requests, must be protected against other writers and present consistent data) and optionally a backing file (this is just data, so like for a filter, it only depends on what the parent nodes need). This provides a default implementation that can be shared by most of our format drivers. Signed-off-by: Kevin Wolf Acked-by: Fam Zheng Reviewed-by: Max Reitz --- block.c | 44 +++++++++++++++++++++++++++++++++++++++ include/block/block_int.h | 8 +++++++ 2 files changed, 52 insertions(+) diff --git a/block.c b/block.c index 064e9d77fa..d67819f2a2 100644 --- a/block.c +++ b/block.c @@ -1560,6 +1560,50 @@ void bdrv_filter_default_perms(BlockDriverState *bs, BdrvChild *c, (c->shared_perm & DEFAULT_PERM_UNCHANGED); } +void bdrv_format_default_perms(BlockDriverState *bs, BdrvChild *c, + const BdrvChildRole *role, + uint64_t perm, uint64_t shared, + uint64_t *nperm, uint64_t *nshared) +{ + bool backing = (role == &child_backing); + assert(role == &child_backing || role == &child_file); + + if (!backing) { + /* Apart from the modifications below, the same permissions are + * forwarded and left alone as for filters */ + bdrv_filter_default_perms(bs, c, role, perm, shared, &perm, &shared); + + /* Format drivers may touch metadata even if the guest doesn't write */ + if (!bdrv_is_read_only(bs)) { + perm |= BLK_PERM_WRITE | BLK_PERM_RESIZE; + } + + /* bs->file always needs to be consistent because of the metadata. We + * can never allow other users to resize or write to it. */ + perm |= BLK_PERM_CONSISTENT_READ; + shared &= ~(BLK_PERM_WRITE | BLK_PERM_RESIZE); + } else { + /* We want consistent read from backing files if the parent needs it. + * No other operations are performed on backing files. */ + perm &= BLK_PERM_CONSISTENT_READ; + + /* If the parent can deal with changing data, we're okay with a + * writable and resizable backing file. */ + /* TODO Require !(perm & BLK_PERM_CONSISTENT_READ), too? */ + if (shared & BLK_PERM_WRITE) { + shared = BLK_PERM_WRITE | BLK_PERM_RESIZE; + } else { + shared = 0; + } + + shared |= BLK_PERM_CONSISTENT_READ | BLK_PERM_GRAPH_MOD | + BLK_PERM_WRITE_UNCHANGED; + } + + *nperm = perm; + *nshared = shared; +} + static void bdrv_replace_child(BdrvChild *child, BlockDriverState *new_bs, bool check_new_perm) { diff --git a/include/block/block_int.h b/include/block/block_int.h index 17f4c2d6cd..eb0598e580 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -880,6 +880,14 @@ void bdrv_filter_default_perms(BlockDriverState *bs, BdrvChild *c, uint64_t perm, uint64_t shared, uint64_t *nperm, uint64_t *nshared); +/* Default implementation for BlockDriver.bdrv_child_perm() that can be used by + * (non-raw) image formats: Like above for bs->backing, but for bs->file it + * requires WRITE | RESIZE for read-write images, always requires + * CONSISTENT_READ and doesn't share WRITE. */ +void bdrv_format_default_perms(BlockDriverState *bs, BdrvChild *c, + const BdrvChildRole *role, + uint64_t perm, uint64_t shared, + uint64_t *nperm, uint64_t *nshared); const char *bdrv_get_parent_name(const BlockDriverState *bs); void blk_dev_change_media_cb(BlockBackend *blk, bool load); From 862f215fabf31c80c953155fcb223fea5320bbdf Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Mon, 19 Dec 2016 16:36:02 +0100 Subject: [PATCH 10/46] block: Request child permissions in format drivers This makes use of the .bdrv_child_perm() implementation for formats that we just added. All format drivers expose the permissions they actually need nows, so that they can be set accordingly and updated when parents are attached or detached. The only format not included here is raw, which was already converted with the other filter drivers. Signed-off-by: Kevin Wolf Reviewed-by: Max Reitz Acked-by: Fam Zheng --- block/bochs.c | 1 + block/cloop.c | 1 + block/crypto.c | 1 + block/dmg.c | 1 + block/parallels.c | 1 + block/qcow.c | 1 + block/qcow2.c | 1 + block/qed.c | 1 + block/vdi.c | 1 + block/vhdx.c | 1 + block/vmdk.c | 1 + block/vpc.c | 1 + 12 files changed, 12 insertions(+) diff --git a/block/bochs.c b/block/bochs.c index 7dd2ac4f51..516da56c3b 100644 --- a/block/bochs.c +++ b/block/bochs.c @@ -293,6 +293,7 @@ static BlockDriver bdrv_bochs = { .instance_size = sizeof(BDRVBochsState), .bdrv_probe = bochs_probe, .bdrv_open = bochs_open, + .bdrv_child_perm = bdrv_format_default_perms, .bdrv_refresh_limits = bochs_refresh_limits, .bdrv_co_preadv = bochs_co_preadv, .bdrv_close = bochs_close, diff --git a/block/cloop.c b/block/cloop.c index 877c9b0d1b..a6c7b9dbe6 100644 --- a/block/cloop.c +++ b/block/cloop.c @@ -290,6 +290,7 @@ static BlockDriver bdrv_cloop = { .instance_size = sizeof(BDRVCloopState), .bdrv_probe = cloop_probe, .bdrv_open = cloop_open, + .bdrv_child_perm = bdrv_format_default_perms, .bdrv_refresh_limits = cloop_refresh_limits, .bdrv_co_preadv = cloop_co_preadv, .bdrv_close = cloop_close, diff --git a/block/crypto.c b/block/crypto.c index 7cb2ff2946..4a2038888d 100644 --- a/block/crypto.c +++ b/block/crypto.c @@ -628,6 +628,7 @@ BlockDriver bdrv_crypto_luks = { .bdrv_probe = block_crypto_probe_luks, .bdrv_open = block_crypto_open_luks, .bdrv_close = block_crypto_close, + .bdrv_child_perm = bdrv_format_default_perms, .bdrv_create = block_crypto_create_luks, .bdrv_truncate = block_crypto_truncate, .create_opts = &block_crypto_create_opts_luks, diff --git a/block/dmg.c b/block/dmg.c index 8e387cdfe5..a7d25fc47b 100644 --- a/block/dmg.c +++ b/block/dmg.c @@ -697,6 +697,7 @@ static BlockDriver bdrv_dmg = { .bdrv_probe = dmg_probe, .bdrv_open = dmg_open, .bdrv_refresh_limits = dmg_refresh_limits, + .bdrv_child_perm = bdrv_format_default_perms, .bdrv_co_preadv = dmg_co_preadv, .bdrv_close = dmg_close, }; diff --git a/block/parallels.c b/block/parallels.c index b2ec09f7e6..6b0c0a917c 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -762,6 +762,7 @@ static BlockDriver bdrv_parallels = { .bdrv_probe = parallels_probe, .bdrv_open = parallels_open, .bdrv_close = parallels_close, + .bdrv_child_perm = bdrv_format_default_perms, .bdrv_co_get_block_status = parallels_co_get_block_status, .bdrv_has_zero_init = bdrv_has_zero_init_1, .bdrv_co_flush_to_os = parallels_co_flush_to_os, diff --git a/block/qcow.c b/block/qcow.c index 038b05ab1b..eb5d54c65a 100644 --- a/block/qcow.c +++ b/block/qcow.c @@ -1052,6 +1052,7 @@ static BlockDriver bdrv_qcow = { .bdrv_probe = qcow_probe, .bdrv_open = qcow_open, .bdrv_close = qcow_close, + .bdrv_child_perm = bdrv_format_default_perms, .bdrv_reopen_prepare = qcow_reopen_prepare, .bdrv_create = qcow_create, .bdrv_has_zero_init = bdrv_has_zero_init_1, diff --git a/block/qcow2.c b/block/qcow2.c index 21e61427eb..ef028f64fb 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -3403,6 +3403,7 @@ BlockDriver bdrv_qcow2 = { .bdrv_reopen_commit = qcow2_reopen_commit, .bdrv_reopen_abort = qcow2_reopen_abort, .bdrv_join_options = qcow2_join_options, + .bdrv_child_perm = bdrv_format_default_perms, .bdrv_create = qcow2_create, .bdrv_has_zero_init = bdrv_has_zero_init_1, .bdrv_co_get_block_status = qcow2_co_get_block_status, diff --git a/block/qed.c b/block/qed.c index 62a0a09326..d8f947aaa3 100644 --- a/block/qed.c +++ b/block/qed.c @@ -1704,6 +1704,7 @@ static BlockDriver bdrv_qed = { .bdrv_open = bdrv_qed_open, .bdrv_close = bdrv_qed_close, .bdrv_reopen_prepare = bdrv_qed_reopen_prepare, + .bdrv_child_perm = bdrv_format_default_perms, .bdrv_create = bdrv_qed_create, .bdrv_has_zero_init = bdrv_has_zero_init_1, .bdrv_co_get_block_status = bdrv_qed_co_get_block_status, diff --git a/block/vdi.c b/block/vdi.c index 18b4773aac..fd6e26dfed 100644 --- a/block/vdi.c +++ b/block/vdi.c @@ -891,6 +891,7 @@ static BlockDriver bdrv_vdi = { .bdrv_open = vdi_open, .bdrv_close = vdi_close, .bdrv_reopen_prepare = vdi_reopen_prepare, + .bdrv_child_perm = bdrv_format_default_perms, .bdrv_create = vdi_create, .bdrv_has_zero_init = bdrv_has_zero_init_1, .bdrv_co_get_block_status = vdi_co_get_block_status, diff --git a/block/vhdx.c b/block/vhdx.c index 9918ee98ff..ab747f6393 100644 --- a/block/vhdx.c +++ b/block/vhdx.c @@ -1983,6 +1983,7 @@ static BlockDriver bdrv_vhdx = { .bdrv_open = vhdx_open, .bdrv_close = vhdx_close, .bdrv_reopen_prepare = vhdx_reopen_prepare, + .bdrv_child_perm = bdrv_format_default_perms, .bdrv_co_readv = vhdx_co_readv, .bdrv_co_writev = vhdx_co_writev, .bdrv_create = vhdx_create, diff --git a/block/vmdk.c b/block/vmdk.c index 9d68ec5a4e..f5e2fb5cf0 100644 --- a/block/vmdk.c +++ b/block/vmdk.c @@ -2359,6 +2359,7 @@ static BlockDriver bdrv_vmdk = { .bdrv_open = vmdk_open, .bdrv_check = vmdk_check, .bdrv_reopen_prepare = vmdk_reopen_prepare, + .bdrv_child_perm = bdrv_format_default_perms, .bdrv_co_preadv = vmdk_co_preadv, .bdrv_co_pwritev = vmdk_co_pwritev, .bdrv_co_pwritev_compressed = vmdk_co_pwritev_compressed, diff --git a/block/vpc.c b/block/vpc.c index d0df2a1c54..b9c9832637 100644 --- a/block/vpc.c +++ b/block/vpc.c @@ -1067,6 +1067,7 @@ static BlockDriver bdrv_vpc = { .bdrv_open = vpc_open, .bdrv_close = vpc_close, .bdrv_reopen_prepare = vpc_reopen_prepare, + .bdrv_child_perm = bdrv_format_default_perms, .bdrv_create = vpc_create, .bdrv_co_preadv = vpc_co_preadv, From 91ef38257ad225f7fa17a6583fb792c0be9e8acf Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Tue, 20 Dec 2016 16:23:46 +0100 Subject: [PATCH 11/46] vvfat: Implement .bdrv_child_perm() vvfat is the last remaining driver that can have children, but doesn't implement .bdrv_child_perm() yet. The default handlers aren't suitable here, so let's implement a very simple driver-specific one that protects the internal child from being used by other users as good as our permissions permit. Signed-off-by: Kevin Wolf Reviewed-by: Max Reitz Acked-by: Fam Zheng --- block.c | 2 +- block/vvfat.c | 22 ++++++++++++++++++++++ include/block/block_int.h | 1 + 3 files changed, 24 insertions(+), 1 deletion(-) diff --git a/block.c b/block.c index d67819f2a2..281ce7bec3 100644 --- a/block.c +++ b/block.c @@ -823,7 +823,7 @@ static void bdrv_backing_options(int *child_flags, QDict *child_options, *child_flags = flags; } -static const BdrvChildRole child_backing = { +const BdrvChildRole child_backing = { .inherit_options = bdrv_backing_options, .drained_begin = bdrv_child_cb_drained_begin, .drained_end = bdrv_child_cb_drained_end, diff --git a/block/vvfat.c b/block/vvfat.c index 7f230be006..72b482cb1f 100644 --- a/block/vvfat.c +++ b/block/vvfat.c @@ -3052,6 +3052,27 @@ err: return ret; } +static void vvfat_child_perm(BlockDriverState *bs, BdrvChild *c, + const BdrvChildRole *role, + uint64_t perm, uint64_t shared, + uint64_t *nperm, uint64_t *nshared) +{ + BDRVVVFATState *s = bs->opaque; + + assert(c == s->qcow || role == &child_backing); + + if (c == s->qcow) { + /* This is a private node, nobody should try to attach to it */ + *nperm = BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE; + *nshared = BLK_PERM_WRITE_UNCHANGED; + } else { + /* The backing file is there so 'commit' can use it. vvfat doesn't + * access it in any way. */ + *nperm = 0; + *nshared = BLK_PERM_ALL; + } +} + static void vvfat_close(BlockDriverState *bs) { BDRVVVFATState *s = bs->opaque; @@ -3077,6 +3098,7 @@ static BlockDriver bdrv_vvfat = { .bdrv_file_open = vvfat_open, .bdrv_refresh_limits = vvfat_refresh_limits, .bdrv_close = vvfat_close, + .bdrv_child_perm = vvfat_child_perm, .bdrv_co_preadv = vvfat_co_preadv, .bdrv_co_pwritev = vvfat_co_pwritev, diff --git a/include/block/block_int.h b/include/block/block_int.h index eb0598e580..63d54460e5 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -466,6 +466,7 @@ struct BdrvChildRole { extern const BdrvChildRole child_file; extern const BdrvChildRole child_format; +extern const BdrvChildRole child_backing; struct BdrvChild { BlockDriverState *bs; From 78e421c9fbed9d501d7ada84ddc786a92178a71d Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Tue, 20 Dec 2016 23:25:12 +0100 Subject: [PATCH 12/46] block: Require .bdrv_child_perm() with child nodes All block drivers that can have child nodes implement .bdrv_child_perm() now. Make this officially a requirement by asserting that only drivers without children can omit .bdrv_child_perm(). Signed-off-by: Kevin Wolf Reviewed-by: Max Reitz Acked-by: Fam Zheng --- block.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/block.c b/block.c index 281ce7bec3..52c1f280dc 100644 --- a/block.c +++ b/block.c @@ -1360,8 +1360,9 @@ static int bdrv_check_perm(BlockDriverState *bs, uint64_t cumulative_perms, cumulative_shared_perms, errp); } - /* Drivers may not have .bdrv_child_perm() */ + /* Drivers that never have children can omit .bdrv_child_perm() */ if (!drv->bdrv_child_perm) { + assert(QLIST_EMPTY(&bs->children)); return 0; } @@ -1420,8 +1421,9 @@ static void bdrv_set_perm(BlockDriverState *bs, uint64_t cumulative_perms, drv->bdrv_set_perm(bs, cumulative_perms, cumulative_shared_perms); } - /* Drivers may not have .bdrv_child_perm() */ + /* Drivers that never have children can omit .bdrv_child_perm() */ if (!drv->bdrv_child_perm) { + assert(QLIST_EMPTY(&bs->children)); return; } From f68c598be6a48995ca4c7cc42fc1f6e1195ec7aa Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Tue, 20 Dec 2016 15:51:12 +0100 Subject: [PATCH 13/46] block: Request real permissions in bdrv_attach_child() Now that all block drivers with children tell us what permissions they need from each of their children, bdrv_attach_child() can use this information and make the right requirements while trying to attach new children. Signed-off-by: Kevin Wolf Acked-by: Fam Zheng Reviewed-by: Max Reitz --- block.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/block.c b/block.c index 52c1f280dc..bed236747a 100644 --- a/block.c +++ b/block.c @@ -1680,10 +1680,16 @@ BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs, Error **errp) { BdrvChild *child; + uint64_t perm, shared_perm; + + bdrv_get_cumulative_perm(parent_bs, &perm, &shared_perm); + + assert(parent_bs->drv); + parent_bs->drv->bdrv_child_perm(parent_bs, NULL, child_role, + perm, shared_perm, &perm, &shared_perm); - /* FIXME Use real permissions */ child = bdrv_root_attach_child(child_bs, child_name, child_role, - 0, BLK_PERM_ALL, parent_bs, errp); + perm, shared_perm, parent_bs, errp); if (child == NULL) { return NULL; } From 981776b34875ef75b218a338e1831b8fc65ea6bd Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 20 Jan 2017 15:42:39 +0100 Subject: [PATCH 14/46] block: Add permissions to BlockBackend The BlockBackend can now store the permissions that its user requires. This is necessary because nodes can be ejected from or inserted into a BlockBackend and all of these operations must make sure that the user still gets what it requested initially. Signed-off-by: Kevin Wolf Reviewed-by: Max Reitz Acked-by: Fam Zheng --- block/block-backend.c | 27 +++++++++++++++++++++++++++ include/sysemu/block-backend.h | 2 ++ 2 files changed, 29 insertions(+) diff --git a/block/block-backend.c b/block/block-backend.c index 9bb45285ef..1ed75c6c15 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -59,6 +59,9 @@ struct BlockBackend { bool iostatus_enabled; BlockDeviceIoStatus iostatus; + uint64_t perm; + uint64_t shared_perm; + bool allow_write_beyond_eof; NotifierList remove_bs_notifiers, insert_bs_notifiers; @@ -126,6 +129,8 @@ BlockBackend *blk_new(void) blk = g_new0(BlockBackend, 1); blk->refcnt = 1; + blk->perm = 0; + blk->shared_perm = BLK_PERM_ALL; blk_set_enable_write_cache(blk, true); qemu_co_queue_init(&blk->public.throttled_reqs[0]); @@ -511,6 +516,27 @@ void blk_insert_bs(BlockBackend *blk, BlockDriverState *bs) } } +/* + * Sets the permission bitmasks that the user of the BlockBackend needs. + */ +int blk_set_perm(BlockBackend *blk, uint64_t perm, uint64_t shared_perm, + Error **errp) +{ + int ret; + + if (blk->root) { + ret = bdrv_child_try_set_perm(blk->root, perm, shared_perm, errp); + if (ret < 0) { + return ret; + } + } + + blk->perm = perm; + blk->shared_perm = shared_perm; + + return 0; +} + static int blk_do_attach_dev(BlockBackend *blk, void *dev) { if (blk->dev) { @@ -557,6 +583,7 @@ void blk_detach_dev(BlockBackend *blk, void *dev) blk->dev_ops = NULL; blk->dev_opaque = NULL; blk->guest_block_size = 512; + blk_set_perm(blk, 0, BLK_PERM_ALL, &error_abort); blk_unref(blk); } diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h index f365a51acf..4a18e86b85 100644 --- a/include/sysemu/block-backend.h +++ b/include/sysemu/block-backend.h @@ -105,6 +105,8 @@ void blk_remove_bs(BlockBackend *blk); void blk_insert_bs(BlockBackend *blk, BlockDriverState *bs); bool bdrv_has_blk(BlockDriverState *bs); bool bdrv_is_root_node(BlockDriverState *bs); +int blk_set_perm(BlockBackend *blk, uint64_t perm, uint64_t shared_perm, + Error **errp); void blk_set_allow_write_beyond_eof(BlockBackend *blk, bool allow); void blk_iostatus_enable(BlockBackend *blk); From 6d0eb64d5c6d57017c52a4f36ccae1db79215ee1 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 20 Jan 2017 17:07:26 +0100 Subject: [PATCH 15/46] block: Add permissions to blk_new() We want every user to be specific about the permissions it needs, so we'll pass the initial permissions as parameters to blk_new(). A user only needs to call blk_set_perm() if it wants to change the permissions after the fact. The permissions are stored in the BlockBackend and applied whenever a BlockDriverState should be attached in blk_insert_bs(). This does not include actually choosing the right set of permissions everywhere yet. Instead, the usual FIXME comment is added to each place and will be addressed in individual patches. Signed-off-by: Kevin Wolf Reviewed-by: Max Reitz Acked-by: Fam Zheng --- block.c | 2 +- block/backup.c | 3 ++- block/block-backend.c | 21 ++++++++++++++------- block/commit.c | 12 ++++++++---- block/mirror.c | 3 ++- block/qcow2.c | 2 +- blockdev.c | 4 ++-- blockjob.c | 3 ++- hmp.c | 3 ++- hw/block/fdc.c | 3 ++- hw/core/qdev-properties-system.c | 3 ++- hw/ide/qdev.c | 3 ++- hw/scsi/scsi-disk.c | 3 ++- include/sysemu/block-backend.h | 2 +- migration/block.c | 3 ++- nbd/server.c | 3 ++- tests/test-blockjob.c | 3 ++- tests/test-throttle.c | 7 ++++--- 18 files changed, 53 insertions(+), 30 deletions(-) diff --git a/block.c b/block.c index bed236747a..41b8b11424 100644 --- a/block.c +++ b/block.c @@ -2193,7 +2193,7 @@ static BlockDriverState *bdrv_open_inherit(const char *filename, goto fail; } if (file_bs != NULL) { - file = blk_new(); + file = blk_new(BLK_PERM_CONSISTENT_READ, BLK_PERM_ALL); blk_insert_bs(file, file_bs); bdrv_unref(file_bs); diff --git a/block/backup.c b/block/backup.c index fe010e78e3..4b3c94c812 100644 --- a/block/backup.c +++ b/block/backup.c @@ -624,7 +624,8 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs, goto error; } - job->target = blk_new(); + /* FIXME Use real permissions */ + job->target = blk_new(0, BLK_PERM_ALL); blk_insert_bs(job->target, target); job->on_source_error = on_source_error; diff --git a/block/block-backend.c b/block/block-backend.c index 1ed75c6c15..0319220a78 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -120,17 +120,23 @@ static const BdrvChildRole child_root = { /* * Create a new BlockBackend with a reference count of one. - * Store an error through @errp on failure, unless it's null. + * + * @perm is a bitmasks of BLK_PERM_* constants which describes the permissions + * to request for a block driver node that is attached to this BlockBackend. + * @shared_perm is a bitmask which describes which permissions may be granted + * to other users of the attached node. + * Both sets of permissions can be changed later using blk_set_perm(). + * * Return the new BlockBackend on success, null on failure. */ -BlockBackend *blk_new(void) +BlockBackend *blk_new(uint64_t perm, uint64_t shared_perm) { BlockBackend *blk; blk = g_new0(BlockBackend, 1); blk->refcnt = 1; - blk->perm = 0; - blk->shared_perm = BLK_PERM_ALL; + blk->perm = perm; + blk->shared_perm = shared_perm; blk_set_enable_write_cache(blk, true); qemu_co_queue_init(&blk->public.throttled_reqs[0]); @@ -161,7 +167,7 @@ BlockBackend *blk_new_open(const char *filename, const char *reference, BlockBackend *blk; BlockDriverState *bs; - blk = blk_new(); + blk = blk_new(0, BLK_PERM_ALL); bs = bdrv_open(filename, reference, options, flags, errp); if (!bs) { blk_unref(blk); @@ -505,9 +511,10 @@ void blk_remove_bs(BlockBackend *blk) void blk_insert_bs(BlockBackend *blk, BlockDriverState *bs) { bdrv_ref(bs); - /* FIXME Use real permissions */ + /* FIXME Error handling */ blk->root = bdrv_root_attach_child(bs, "root", &child_root, - 0, BLK_PERM_ALL, blk, &error_abort); + blk->perm, blk->shared_perm, blk, + &error_abort); notifier_list_notify(&blk->insert_bs_notifiers, blk); if (blk->public.throttle_state) { diff --git a/block/commit.c b/block/commit.c index c284e8535d..1897e982c5 100644 --- a/block/commit.c +++ b/block/commit.c @@ -275,10 +275,12 @@ void commit_start(const char *job_id, BlockDriverState *bs, block_job_add_bdrv(&s->common, overlay_bs); } - s->base = blk_new(); + /* FIXME Use real permissions */ + s->base = blk_new(0, BLK_PERM_ALL); blk_insert_bs(s->base, base); - s->top = blk_new(); + /* FIXME Use real permissions */ + s->top = blk_new(0, BLK_PERM_ALL); blk_insert_bs(s->top, top); s->active = bs; @@ -328,10 +330,12 @@ int bdrv_commit(BlockDriverState *bs) } } - src = blk_new(); + /* FIXME Use real permissions */ + src = blk_new(0, BLK_PERM_ALL); blk_insert_bs(src, bs); - backing = blk_new(); + /* FIXME Use real permissions */ + backing = blk_new(0, BLK_PERM_ALL); blk_insert_bs(backing, bs->backing->bs); length = blk_getlength(src); diff --git a/block/mirror.c b/block/mirror.c index 1b34b366d0..30398fb857 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -1017,7 +1017,8 @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs, return; } - s->target = blk_new(); + /* FIXME Use real permissions */ + s->target = blk_new(0, BLK_PERM_ALL); blk_insert_bs(s->target, target); s->replaces = g_strdup(replaces); diff --git a/block/qcow2.c b/block/qcow2.c index ef028f64fb..0356e69e4e 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -3262,7 +3262,7 @@ static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts, } if (new_size) { - BlockBackend *blk = blk_new(); + BlockBackend *blk = blk_new(BLK_PERM_RESIZE, BLK_PERM_ALL); blk_insert_bs(blk, bs); ret = blk_truncate(blk, new_size); blk_unref(blk); diff --git a/blockdev.c b/blockdev.c index 8682bd81d8..cd5642dd2e 100644 --- a/blockdev.c +++ b/blockdev.c @@ -558,7 +558,7 @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts, if ((!file || !*file) && !qdict_size(bs_opts)) { BlockBackendRootState *blk_rs; - blk = blk_new(); + blk = blk_new(0, BLK_PERM_ALL); blk_rs = blk_get_root_state(blk); blk_rs->open_flags = bdrv_flags; blk_rs->read_only = read_only; @@ -2890,7 +2890,7 @@ void qmp_block_resize(bool has_device, const char *device, goto out; } - blk = blk_new(); + blk = blk_new(BLK_PERM_RESIZE, BLK_PERM_ALL); blk_insert_bs(blk, bs); /* complete all in-flight operations before resizing the device */ diff --git a/blockjob.c b/blockjob.c index abee11bb08..508e0e5069 100644 --- a/blockjob.c +++ b/blockjob.c @@ -159,7 +159,8 @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver, } } - blk = blk_new(); + /* FIXME Use real permissions */ + blk = blk_new(0, BLK_PERM_ALL); blk_insert_bs(blk, bs); job = g_malloc0(driver->instance_size); diff --git a/hmp.c b/hmp.c index 83e287e0a4..020141b344 100644 --- a/hmp.c +++ b/hmp.c @@ -2050,7 +2050,8 @@ void hmp_qemu_io(Monitor *mon, const QDict *qdict) if (!blk) { BlockDriverState *bs = bdrv_lookup_bs(NULL, device, &err); if (bs) { - blk = local_blk = blk_new(); + /* FIXME Use real permissions */ + blk = local_blk = blk_new(0, BLK_PERM_ALL); blk_insert_bs(blk, bs); } else { goto fail; diff --git a/hw/block/fdc.c b/hw/block/fdc.c index 17d29e7bc5..74f36344f6 100644 --- a/hw/block/fdc.c +++ b/hw/block/fdc.c @@ -533,7 +533,8 @@ static int floppy_drive_init(DeviceState *qdev) if (!dev->conf.blk) { /* Anonymous BlockBackend for an empty drive */ - dev->conf.blk = blk_new(); + /* FIXME Use real permissions */ + dev->conf.blk = blk_new(0, BLK_PERM_ALL); ret = blk_attach_dev(dev->conf.blk, qdev); assert(ret == 0); } diff --git a/hw/core/qdev-properties-system.c b/hw/core/qdev-properties-system.c index 94f4d8bde4..cca4775fc7 100644 --- a/hw/core/qdev-properties-system.c +++ b/hw/core/qdev-properties-system.c @@ -78,7 +78,8 @@ static void parse_drive(DeviceState *dev, const char *str, void **ptr, if (!blk) { BlockDriverState *bs = bdrv_lookup_bs(NULL, str, NULL); if (bs) { - blk = blk_new(); + /* FIXME Use real permissions */ + blk = blk_new(0, BLK_PERM_ALL); blk_insert_bs(blk, bs); blk_created = true; } diff --git a/hw/ide/qdev.c b/hw/ide/qdev.c index dbaa75cf59..bb3c377800 100644 --- a/hw/ide/qdev.c +++ b/hw/ide/qdev.c @@ -170,7 +170,8 @@ static int ide_dev_initfn(IDEDevice *dev, IDEDriveKind kind) return -1; } else { /* Anonymous BlockBackend for an empty drive */ - dev->conf.blk = blk_new(); + /* FIXME Use real permissions */ + dev->conf.blk = blk_new(0, BLK_PERM_ALL); } } diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c index bbfb5dc289..546acc7b62 100644 --- a/hw/scsi/scsi-disk.c +++ b/hw/scsi/scsi-disk.c @@ -2380,7 +2380,8 @@ static void scsi_cd_realize(SCSIDevice *dev, Error **errp) SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, dev); if (!dev->conf.blk) { - dev->conf.blk = blk_new(); + /* FIXME Use real permissions */ + dev->conf.blk = blk_new(0, BLK_PERM_ALL); } s->qdev.blocksize = 2048; diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h index 4a18e86b85..6651f437db 100644 --- a/include/sysemu/block-backend.h +++ b/include/sysemu/block-backend.h @@ -84,7 +84,7 @@ typedef struct BlockBackendPublic { QLIST_ENTRY(BlockBackendPublic) round_robin; } BlockBackendPublic; -BlockBackend *blk_new(void); +BlockBackend *blk_new(uint64_t perm, uint64_t shared_perm); BlockBackend *blk_new_open(const char *filename, const char *reference, QDict *options, int flags, Error **errp); int blk_get_refcnt(BlockBackend *blk); diff --git a/migration/block.c b/migration/block.c index ebc10e628d..6b7ffd4eb6 100644 --- a/migration/block.c +++ b/migration/block.c @@ -415,7 +415,8 @@ static void init_blk_migration(QEMUFile *f) } bmds = g_new0(BlkMigDevState, 1); - bmds->blk = blk_new(); + /* FIXME Use real permissions */ + bmds->blk = blk_new(0, BLK_PERM_ALL); bmds->blk_name = g_strdup(bdrv_get_device_name(bs)); bmds->bulk_completed = 0; bmds->total_sectors = sectors; diff --git a/nbd/server.c b/nbd/server.c index ac92fa0727..936d5aa465 100644 --- a/nbd/server.c +++ b/nbd/server.c @@ -892,7 +892,8 @@ NBDExport *nbd_export_new(BlockDriverState *bs, off_t dev_offset, off_t size, BlockBackend *blk; NBDExport *exp = g_malloc0(sizeof(NBDExport)); - blk = blk_new(); + /* FIXME Use real permissions */ + blk = blk_new(0, BLK_PERM_ALL); blk_insert_bs(blk, bs); blk_set_enable_write_cache(blk, !writethrough); diff --git a/tests/test-blockjob.c b/tests/test-blockjob.c index 068c9e419b..1dd1cfa45a 100644 --- a/tests/test-blockjob.c +++ b/tests/test-blockjob.c @@ -53,7 +53,8 @@ static BlockJob *do_test_id(BlockBackend *blk, const char *id, * BlockDriverState inserted. */ static BlockBackend *create_blk(const char *name) { - BlockBackend *blk = blk_new(); + /* FIXME Use real permissions */ + BlockBackend *blk = blk_new(0, BLK_PERM_ALL); BlockDriverState *bs; bs = bdrv_open("null-co://", NULL, NULL, 0, &error_abort); diff --git a/tests/test-throttle.c b/tests/test-throttle.c index 363b59a38f..5846433c9f 100644 --- a/tests/test-throttle.c +++ b/tests/test-throttle.c @@ -593,9 +593,10 @@ static void test_groups(void) BlockBackend *blk1, *blk2, *blk3; BlockBackendPublic *blkp1, *blkp2, *blkp3; - blk1 = blk_new(); - blk2 = blk_new(); - blk3 = blk_new(); + /* FIXME Use real permissions */ + blk1 = blk_new(0, BLK_PERM_ALL); + blk2 = blk_new(0, BLK_PERM_ALL); + blk3 = blk_new(0, BLK_PERM_ALL); blkp1 = blk_get_public(blk1); blkp2 = blk_get_public(blk2); From d7086422b1c1e75e320519cfe26176db6ec97a37 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 13 Jan 2017 19:02:32 +0100 Subject: [PATCH 16/46] block: Add error parameter to blk_insert_bs() Now that blk_insert_bs() requests the BlockBackend permissions for the node it attaches to, it can fail. Instead of aborting, pass the errors to the callers. Signed-off-by: Kevin Wolf Reviewed-by: Max Reitz Acked-by: Fam Zheng --- block.c | 5 ++++- block/backup.c | 5 ++++- block/block-backend.c | 13 ++++++----- block/commit.c | 38 +++++++++++++++++++++++++------- block/mirror.c | 15 ++++++++++--- block/qcow2.c | 10 +++++++-- blockdev.c | 11 +++++++-- blockjob.c | 7 +++++- hmp.c | 6 ++++- hw/core/qdev-properties-system.c | 7 +++++- include/sysemu/block-backend.h | 2 +- migration/block.c | 2 +- nbd/server.c | 6 ++++- tests/test-blockjob.c | 2 +- 14 files changed, 100 insertions(+), 29 deletions(-) diff --git a/block.c b/block.c index 41b8b11424..5f2dd6fa47 100644 --- a/block.c +++ b/block.c @@ -2194,8 +2194,11 @@ static BlockDriverState *bdrv_open_inherit(const char *filename, } if (file_bs != NULL) { file = blk_new(BLK_PERM_CONSISTENT_READ, BLK_PERM_ALL); - blk_insert_bs(file, file_bs); + blk_insert_bs(file, file_bs, &local_err); bdrv_unref(file_bs); + if (local_err) { + goto fail; + } qdict_put(options, "file", qstring_from_str(bdrv_get_node_name(file_bs))); diff --git a/block/backup.c b/block/backup.c index 4b3c94c812..f38d1d030e 100644 --- a/block/backup.c +++ b/block/backup.c @@ -626,7 +626,10 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs, /* FIXME Use real permissions */ job->target = blk_new(0, BLK_PERM_ALL); - blk_insert_bs(job->target, target); + ret = blk_insert_bs(job->target, target, errp); + if (ret < 0) { + goto error; + } job->on_source_error = on_source_error; job->on_target_error = on_target_error; diff --git a/block/block-backend.c b/block/block-backend.c index 0319220a78..299948f96b 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -508,19 +508,22 @@ void blk_remove_bs(BlockBackend *blk) /* * Associates a new BlockDriverState with @blk. */ -void blk_insert_bs(BlockBackend *blk, BlockDriverState *bs) +int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp) { - bdrv_ref(bs); - /* FIXME Error handling */ blk->root = bdrv_root_attach_child(bs, "root", &child_root, - blk->perm, blk->shared_perm, blk, - &error_abort); + blk->perm, blk->shared_perm, blk, errp); + if (blk->root == NULL) { + return -EPERM; + } + bdrv_ref(bs); notifier_list_notify(&blk->insert_bs_notifiers, blk); if (blk->public.throttle_state) { throttle_timers_attach_aio_context( &blk->public.throttle_timers, bdrv_get_aio_context(bs)); } + + return 0; } /* diff --git a/block/commit.c b/block/commit.c index 1897e982c5..2ad8138aac 100644 --- a/block/commit.c +++ b/block/commit.c @@ -220,6 +220,7 @@ void commit_start(const char *job_id, BlockDriverState *bs, BlockDriverState *iter; BlockDriverState *overlay_bs; Error *local_err = NULL; + int ret; assert(top != bs); if (top == base) { @@ -256,8 +257,7 @@ void commit_start(const char *job_id, BlockDriverState *bs, bdrv_reopen_multiple(bdrv_get_aio_context(bs), reopen_queue, &local_err); if (local_err != NULL) { error_propagate(errp, local_err); - block_job_unref(&s->common); - return; + goto fail; } } @@ -277,11 +277,17 @@ void commit_start(const char *job_id, BlockDriverState *bs, /* FIXME Use real permissions */ s->base = blk_new(0, BLK_PERM_ALL); - blk_insert_bs(s->base, base); + ret = blk_insert_bs(s->base, base, errp); + if (ret < 0) { + goto fail; + } /* FIXME Use real permissions */ s->top = blk_new(0, BLK_PERM_ALL); - blk_insert_bs(s->top, top); + ret = blk_insert_bs(s->top, top, errp); + if (ret < 0) { + goto fail; + } s->active = bs; @@ -294,6 +300,16 @@ void commit_start(const char *job_id, BlockDriverState *bs, trace_commit_start(bs, base, top, s); block_job_start(&s->common); + return; + +fail: + if (s->base) { + blk_unref(s->base); + } + if (s->top) { + blk_unref(s->top); + } + block_job_unref(&s->common); } @@ -332,11 +348,17 @@ int bdrv_commit(BlockDriverState *bs) /* FIXME Use real permissions */ src = blk_new(0, BLK_PERM_ALL); - blk_insert_bs(src, bs); - - /* FIXME Use real permissions */ backing = blk_new(0, BLK_PERM_ALL); - blk_insert_bs(backing, bs->backing->bs); + + ret = blk_insert_bs(src, bs, NULL); + if (ret < 0) { + goto ro_cleanup; + } + + ret = blk_insert_bs(backing, bs->backing->bs, NULL); + if (ret < 0) { + goto ro_cleanup; + } length = blk_getlength(src); if (length < 0) { diff --git a/block/mirror.c b/block/mirror.c index 30398fb857..063925a1f0 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -525,9 +525,12 @@ static void mirror_exit(BlockJob *job, void *opaque) bdrv_replace_in_backing_chain(to_replace, target_bs); bdrv_drained_end(target_bs); - /* We just changed the BDS the job BB refers to */ + /* We just changed the BDS the job BB refers to, so switch the BB back + * so the cleanup does the right thing. We don't need any permissions + * any more now. */ blk_remove_bs(job->blk); - blk_insert_bs(job->blk, src); + blk_set_perm(job->blk, 0, BLK_PERM_ALL, &error_abort); + blk_insert_bs(job->blk, src, &error_abort); } if (s->to_replace) { bdrv_op_unblock_all(s->to_replace, s->replace_blocker); @@ -995,6 +998,7 @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs, bool auto_complete) { MirrorBlockJob *s; + int ret; if (granularity == 0) { granularity = bdrv_get_default_bitmap_granularity(target); @@ -1019,7 +1023,12 @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs, /* FIXME Use real permissions */ s->target = blk_new(0, BLK_PERM_ALL); - blk_insert_bs(s->target, target); + ret = blk_insert_bs(s->target, target, errp); + if (ret < 0) { + blk_unref(s->target); + block_job_unref(&s->common); + return; + } s->replaces = g_strdup(replaces); s->on_source_error = on_source_error; diff --git a/block/qcow2.c b/block/qcow2.c index 0356e69e4e..6f79df8517 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -3113,6 +3113,7 @@ static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts, uint64_t cluster_size = s->cluster_size; bool encrypt; int refcount_bits = s->refcount_bits; + Error *local_err = NULL; int ret; QemuOptDesc *desc = opts->list->desc; Qcow2AmendHelperCBInfo helper_cb_info; @@ -3263,10 +3264,15 @@ static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts, if (new_size) { BlockBackend *blk = blk_new(BLK_PERM_RESIZE, BLK_PERM_ALL); - blk_insert_bs(blk, bs); + ret = blk_insert_bs(blk, bs, &local_err); + if (ret < 0) { + error_report_err(local_err); + blk_unref(blk); + return ret; + } + ret = blk_truncate(blk, new_size); blk_unref(blk); - if (ret < 0) { return ret; } diff --git a/blockdev.c b/blockdev.c index cd5642dd2e..84a64b77fb 100644 --- a/blockdev.c +++ b/blockdev.c @@ -2436,6 +2436,7 @@ static void qmp_blockdev_insert_anon_medium(BlockBackend *blk, BlockDriverState *bs, Error **errp) { bool has_device; + int ret; /* For BBs without a device, we can exchange the BDS tree at will */ has_device = blk_get_attached_dev(blk); @@ -2455,7 +2456,10 @@ static void qmp_blockdev_insert_anon_medium(BlockBackend *blk, return; } - blk_insert_bs(blk, bs); + ret = blk_insert_bs(blk, bs, errp); + if (ret < 0) { + return; + } if (!blk_dev_has_tray(blk)) { /* For tray-less devices, blockdev-close-tray is a no-op (or may not be @@ -2891,7 +2895,10 @@ void qmp_block_resize(bool has_device, const char *device, } blk = blk_new(BLK_PERM_RESIZE, BLK_PERM_ALL); - blk_insert_bs(blk, bs); + ret = blk_insert_bs(blk, bs, errp); + if (ret < 0) { + goto out; + } /* complete all in-flight operations before resizing the device */ bdrv_drain_all(); diff --git a/blockjob.c b/blockjob.c index 508e0e5069..72b7d4c3f2 100644 --- a/blockjob.c +++ b/blockjob.c @@ -128,6 +128,7 @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver, { BlockBackend *blk; BlockJob *job; + int ret; if (bs->job) { error_setg(errp, QERR_DEVICE_IN_USE, bdrv_get_device_name(bs)); @@ -161,7 +162,11 @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver, /* FIXME Use real permissions */ blk = blk_new(0, BLK_PERM_ALL); - blk_insert_bs(blk, bs); + ret = blk_insert_bs(blk, bs, errp); + if (ret < 0) { + blk_unref(blk); + return NULL; + } job = g_malloc0(driver->instance_size); error_setg(&job->blocker, "block device is in use by block job: %s", diff --git a/hmp.c b/hmp.c index 020141b344..e219f97239 100644 --- a/hmp.c +++ b/hmp.c @@ -2045,6 +2045,7 @@ void hmp_qemu_io(Monitor *mon, const QDict *qdict) const char* device = qdict_get_str(qdict, "device"); const char* command = qdict_get_str(qdict, "command"); Error *err = NULL; + int ret; blk = blk_by_name(device); if (!blk) { @@ -2052,7 +2053,10 @@ void hmp_qemu_io(Monitor *mon, const QDict *qdict) if (bs) { /* FIXME Use real permissions */ blk = local_blk = blk_new(0, BLK_PERM_ALL); - blk_insert_bs(blk, bs); + ret = blk_insert_bs(blk, bs, &err); + if (ret < 0) { + goto fail; + } } else { goto fail; } diff --git a/hw/core/qdev-properties-system.c b/hw/core/qdev-properties-system.c index cca4775fc7..66ba367a1d 100644 --- a/hw/core/qdev-properties-system.c +++ b/hw/core/qdev-properties-system.c @@ -73,6 +73,7 @@ static void parse_drive(DeviceState *dev, const char *str, void **ptr, { BlockBackend *blk; bool blk_created = false; + int ret; blk = blk_by_name(str); if (!blk) { @@ -80,8 +81,12 @@ static void parse_drive(DeviceState *dev, const char *str, void **ptr, if (bs) { /* FIXME Use real permissions */ blk = blk_new(0, BLK_PERM_ALL); - blk_insert_bs(blk, bs); blk_created = true; + + ret = blk_insert_bs(blk, bs, errp); + if (ret < 0) { + goto fail; + } } } if (!blk) { diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h index 6651f437db..08611136b7 100644 --- a/include/sysemu/block-backend.h +++ b/include/sysemu/block-backend.h @@ -102,7 +102,7 @@ BlockBackend *blk_by_public(BlockBackendPublic *public); BlockDriverState *blk_bs(BlockBackend *blk); void blk_remove_bs(BlockBackend *blk); -void blk_insert_bs(BlockBackend *blk, BlockDriverState *bs); +int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp); bool bdrv_has_blk(BlockDriverState *bs); bool bdrv_is_root_node(BlockDriverState *bs); int blk_set_perm(BlockBackend *blk, uint64_t perm, uint64_t shared_perm, diff --git a/migration/block.c b/migration/block.c index 6b7ffd4eb6..d2599360a0 100644 --- a/migration/block.c +++ b/migration/block.c @@ -446,7 +446,7 @@ static void init_blk_migration(QEMUFile *f) BlockDriverState *bs = bmds_bs[i].bs; if (bmds) { - blk_insert_bs(bmds->blk, bs); + blk_insert_bs(bmds->blk, bs, &error_abort); alloc_aio_bitmap(bmds); error_setg(&bmds->blocker, "block device is in use by migration"); diff --git a/nbd/server.c b/nbd/server.c index 936d5aa465..89362ba760 100644 --- a/nbd/server.c +++ b/nbd/server.c @@ -891,10 +891,14 @@ NBDExport *nbd_export_new(BlockDriverState *bs, off_t dev_offset, off_t size, { BlockBackend *blk; NBDExport *exp = g_malloc0(sizeof(NBDExport)); + int ret; /* FIXME Use real permissions */ blk = blk_new(0, BLK_PERM_ALL); - blk_insert_bs(blk, bs); + ret = blk_insert_bs(blk, bs, errp); + if (ret < 0) { + goto fail; + } blk_set_enable_write_cache(blk, !writethrough); exp->refcount = 1; diff --git a/tests/test-blockjob.c b/tests/test-blockjob.c index 1dd1cfa45a..143ce96fa1 100644 --- a/tests/test-blockjob.c +++ b/tests/test-blockjob.c @@ -60,7 +60,7 @@ static BlockBackend *create_blk(const char *name) bs = bdrv_open("null-co://", NULL, NULL, 0, &error_abort); g_assert_nonnull(bs); - blk_insert_bs(blk, bs); + blk_insert_bs(blk, bs, &error_abort); bdrv_unref(bs); if (name) { From 55880601d82d55cbfa3b5bd9757496b6ebbc527c Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 17 Feb 2017 15:07:38 +0100 Subject: [PATCH 17/46] block: Add BDRV_O_RESIZE for blk_new_open() blk_new_open() is a convenience function that processes flags rather than QDict options as a simple way to just open an image file. In order to keep it convenient in the future, it must automatically request the necessary permissions. This can easily be inferred from the flags for read and write, but we need another flag that tells us whether to get the resize permission. We can't just always request it because that means that no block jobs can run on the resulting BlockBackend (which is something that e.g. qemu-img commit wants to do), but we also can't request it never because most of the .bdrv_create() implementations call blk_truncate(). The solution is to introduce another flag that is passed by all users that want to resize the image. Signed-off-by: Kevin Wolf Acked-by: Fam Zheng Reviewed-by: Max Reitz --- block/parallels.c | 3 ++- block/qcow.c | 3 ++- block/qcow2.c | 6 ++++-- block/qed.c | 3 ++- block/sheepdog.c | 2 +- block/vdi.c | 3 ++- block/vhdx.c | 3 ++- block/vmdk.c | 6 ++++-- block/vpc.c | 3 ++- include/block/block.h | 1 + qemu-img.c | 2 +- 11 files changed, 23 insertions(+), 12 deletions(-) diff --git a/block/parallels.c b/block/parallels.c index 6b0c0a917c..19935e29a9 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -488,7 +488,8 @@ static int parallels_create(const char *filename, QemuOpts *opts, Error **errp) } file = blk_new_open(filename, NULL, NULL, - BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err); + BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, + &local_err); if (file == NULL) { error_propagate(errp, local_err); return -EIO; diff --git a/block/qcow.c b/block/qcow.c index eb5d54c65a..9d6ac83959 100644 --- a/block/qcow.c +++ b/block/qcow.c @@ -823,7 +823,8 @@ static int qcow_create(const char *filename, QemuOpts *opts, Error **errp) } qcow_blk = blk_new_open(filename, NULL, NULL, - BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err); + BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, + &local_err); if (qcow_blk == NULL) { error_propagate(errp, local_err); ret = -EIO; diff --git a/block/qcow2.c b/block/qcow2.c index 6f79df8517..6a92d2ef3f 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -2202,7 +2202,8 @@ static int qcow2_create2(const char *filename, int64_t total_size, } blk = blk_new_open(filename, NULL, NULL, - BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err); + BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, + &local_err); if (blk == NULL) { error_propagate(errp, local_err); return -EIO; @@ -2266,7 +2267,8 @@ static int qcow2_create2(const char *filename, int64_t total_size, options = qdict_new(); qdict_put(options, "driver", qstring_from_str("qcow2")); blk = blk_new_open(filename, NULL, options, - BDRV_O_RDWR | BDRV_O_NO_FLUSH, &local_err); + BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_NO_FLUSH, + &local_err); if (blk == NULL) { error_propagate(errp, local_err); ret = -EIO; diff --git a/block/qed.c b/block/qed.c index d8f947aaa3..5ec7fd83f2 100644 --- a/block/qed.c +++ b/block/qed.c @@ -625,7 +625,8 @@ static int qed_create(const char *filename, uint32_t cluster_size, } blk = blk_new_open(filename, NULL, NULL, - BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err); + BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, + &local_err); if (blk == NULL) { error_propagate(errp, local_err); return -EIO; diff --git a/block/sheepdog.c b/block/sheepdog.c index 860ba61502..743471043e 100644 --- a/block/sheepdog.c +++ b/block/sheepdog.c @@ -1609,7 +1609,7 @@ static int sd_prealloc(const char *filename, Error **errp) int ret; blk = blk_new_open(filename, NULL, NULL, - BDRV_O_RDWR | BDRV_O_PROTOCOL, errp); + BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, errp); if (blk == NULL) { ret = -EIO; goto out_with_err_set; diff --git a/block/vdi.c b/block/vdi.c index fd6e26dfed..9b4f70e977 100644 --- a/block/vdi.c +++ b/block/vdi.c @@ -763,7 +763,8 @@ static int vdi_create(const char *filename, QemuOpts *opts, Error **errp) } blk = blk_new_open(filename, NULL, NULL, - BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err); + BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, + &local_err); if (blk == NULL) { error_propagate(errp, local_err); ret = -EIO; diff --git a/block/vhdx.c b/block/vhdx.c index ab747f6393..052a753159 100644 --- a/block/vhdx.c +++ b/block/vhdx.c @@ -1859,7 +1859,8 @@ static int vhdx_create(const char *filename, QemuOpts *opts, Error **errp) } blk = blk_new_open(filename, NULL, NULL, - BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err); + BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, + &local_err); if (blk == NULL) { error_propagate(errp, local_err); ret = -EIO; diff --git a/block/vmdk.c b/block/vmdk.c index f5e2fb5cf0..a9bd22bf93 100644 --- a/block/vmdk.c +++ b/block/vmdk.c @@ -1703,7 +1703,8 @@ static int vmdk_create_extent(const char *filename, int64_t filesize, } blk = blk_new_open(filename, NULL, NULL, - BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err); + BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, + &local_err); if (blk == NULL) { error_propagate(errp, local_err); ret = -EIO; @@ -2071,7 +2072,8 @@ static int vmdk_create(const char *filename, QemuOpts *opts, Error **errp) } new_blk = blk_new_open(filename, NULL, NULL, - BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err); + BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, + &local_err); if (new_blk == NULL) { error_propagate(errp, local_err); ret = -EIO; diff --git a/block/vpc.c b/block/vpc.c index b9c9832637..f591d4be38 100644 --- a/block/vpc.c +++ b/block/vpc.c @@ -915,7 +915,8 @@ static int vpc_create(const char *filename, QemuOpts *opts, Error **errp) } blk = blk_new_open(filename, NULL, NULL, - BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err); + BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, + &local_err); if (blk == NULL) { error_propagate(errp, local_err); ret = -EIO; diff --git a/include/block/block.h b/include/block/block.h index ff951ea772..07f7561886 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -82,6 +82,7 @@ typedef struct HDGeometry { } HDGeometry; #define BDRV_O_RDWR 0x0002 +#define BDRV_O_RESIZE 0x0004 /* request permission for resizing the node */ #define BDRV_O_SNAPSHOT 0x0008 /* open the file read only and save writes in a snapshot */ #define BDRV_O_TEMPORARY 0x0010 /* delete the file after use */ #define BDRV_O_NOCACHE 0x0020 /* do not use the host page cache */ diff --git a/qemu-img.c b/qemu-img.c index caa76a7400..a48a471042 100644 --- a/qemu-img.c +++ b/qemu-img.c @@ -3419,7 +3419,7 @@ static int img_resize(int argc, char **argv) qemu_opts_del(param); blk = img_open(image_opts, filename, fmt, - BDRV_O_RDWR, false, quiet); + BDRV_O_RDWR | BDRV_O_RESIZE, false, quiet); if (!blk) { ret = -1; goto out; From c62d32f503b37322a3960bad4cd4cdb69947d81e Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 13 Jan 2017 19:13:00 +0100 Subject: [PATCH 18/46] block: Request real permissions in blk_new_open() We can figure out the necessary permissions from the flags that the caller passed. Signed-off-by: Kevin Wolf Reviewed-by: Max Reitz Acked-by: Fam Zheng --- block/block-backend.c | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/block/block-backend.c b/block/block-backend.c index 299948f96b..03d5495a87 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -166,17 +166,33 @@ BlockBackend *blk_new_open(const char *filename, const char *reference, { BlockBackend *blk; BlockDriverState *bs; + uint64_t perm; - blk = blk_new(0, BLK_PERM_ALL); + /* blk_new_open() is mainly used in .bdrv_create implementations and the + * tools where sharing isn't a concern because the BDS stays private, so we + * just request permission according to the flags. + * + * The exceptions are xen_disk and blockdev_init(); in these cases, the + * caller of blk_new_open() doesn't make use of the permissions, but they + * shouldn't hurt either. We can still share everything here because the + * guest devices will add their own blockers if they can't share. */ + perm = BLK_PERM_CONSISTENT_READ; + if (flags & BDRV_O_RDWR) { + perm |= BLK_PERM_WRITE; + } + if (flags & BDRV_O_RESIZE) { + perm |= BLK_PERM_RESIZE; + } + + blk = blk_new(perm, BLK_PERM_ALL); bs = bdrv_open(filename, reference, options, flags, errp); if (!bs) { blk_unref(blk); return NULL; } - /* FIXME Use real permissions */ blk->root = bdrv_root_attach_child(bs, "root", &child_root, - 0, BLK_PERM_ALL, blk, &error_abort); + perm, BLK_PERM_ALL, blk, &error_abort); return blk; } From 39829a01ae524788c68dc0794e6912faa898eb75 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Tue, 24 Jan 2017 14:21:41 +0100 Subject: [PATCH 19/46] block: Allow error return in BlockDevOps.change_media_cb() Some devices allow a media change between read-only and read-write media. They need to adapt the permissions in their .change_media_cb() implementation, which can fail. So add an Error parameter to the function. Signed-off-by: Kevin Wolf Reviewed-by: Max Reitz Acked-by: Fam Zheng --- block/block-backend.c | 20 +++++++++++++++----- blockdev.c | 19 +++++++++++++++---- hw/block/fdc.c | 2 +- hw/ide/core.c | 2 +- hw/scsi/scsi-disk.c | 2 +- hw/sd/sd.c | 2 +- include/block/block_int.h | 2 +- include/sysemu/block-backend.h | 2 +- 8 files changed, 36 insertions(+), 15 deletions(-) diff --git a/block/block-backend.c b/block/block-backend.c index 03d5495a87..fcc42b591e 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -677,19 +677,29 @@ void blk_set_dev_ops(BlockBackend *blk, const BlockDevOps *ops, /* * Notify @blk's attached device model of media change. - * If @load is true, notify of media load. - * Else, notify of media eject. + * + * If @load is true, notify of media load. This action can fail, meaning that + * the medium cannot be loaded. @errp is set then. + * + * If @load is false, notify of media eject. This can never fail. + * * Also send DEVICE_TRAY_MOVED events as appropriate. */ -void blk_dev_change_media_cb(BlockBackend *blk, bool load) +void blk_dev_change_media_cb(BlockBackend *blk, bool load, Error **errp) { if (blk->dev_ops && blk->dev_ops->change_media_cb) { bool tray_was_open, tray_is_open; + Error *local_err = NULL; assert(!blk->legacy_dev); tray_was_open = blk_dev_is_tray_open(blk); - blk->dev_ops->change_media_cb(blk->dev_opaque, load); + blk->dev_ops->change_media_cb(blk->dev_opaque, load, &local_err); + if (local_err) { + assert(load == true); + error_propagate(errp, local_err); + return; + } tray_is_open = blk_dev_is_tray_open(blk); if (tray_was_open != tray_is_open) { @@ -703,7 +713,7 @@ void blk_dev_change_media_cb(BlockBackend *blk, bool load) static void blk_root_change_media(BdrvChild *child, bool load) { - blk_dev_change_media_cb(child->opaque, load); + blk_dev_change_media_cb(child->opaque, load, NULL); } /* diff --git a/blockdev.c b/blockdev.c index 84a64b77fb..0a0226bca1 100644 --- a/blockdev.c +++ b/blockdev.c @@ -2311,7 +2311,7 @@ static int do_open_tray(const char *blk_name, const char *qdev_id, } if (!locked || force) { - blk_dev_change_media_cb(blk, false); + blk_dev_change_media_cb(blk, false, &error_abort); } if (locked && !force) { @@ -2349,6 +2349,7 @@ void qmp_blockdev_close_tray(bool has_device, const char *device, Error **errp) { BlockBackend *blk; + Error *local_err = NULL; device = has_device ? device : NULL; id = has_id ? id : NULL; @@ -2372,7 +2373,11 @@ void qmp_blockdev_close_tray(bool has_device, const char *device, return; } - blk_dev_change_media_cb(blk, true); + blk_dev_change_media_cb(blk, true, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } } void qmp_x_blockdev_remove_medium(bool has_device, const char *device, @@ -2425,7 +2430,7 @@ void qmp_x_blockdev_remove_medium(bool has_device, const char *device, * called at all); therefore, the medium needs to be ejected here. * Do it after blk_remove_bs() so blk_is_inserted(blk) returns the @load * value passed here (i.e. false). */ - blk_dev_change_media_cb(blk, false); + blk_dev_change_media_cb(blk, false, &error_abort); } out: @@ -2435,6 +2440,7 @@ out: static void qmp_blockdev_insert_anon_medium(BlockBackend *blk, BlockDriverState *bs, Error **errp) { + Error *local_err = NULL; bool has_device; int ret; @@ -2467,7 +2473,12 @@ static void qmp_blockdev_insert_anon_medium(BlockBackend *blk, * slot here. * Do it after blk_insert_bs() so blk_is_inserted(blk) returns the @load * value passed here (i.e. true). */ - blk_dev_change_media_cb(blk, true); + blk_dev_change_media_cb(blk, true, &local_err); + if (local_err) { + error_propagate(errp, local_err); + blk_remove_bs(blk); + return; + } } } diff --git a/hw/block/fdc.c b/hw/block/fdc.c index 74f36344f6..5f6c496f7a 100644 --- a/hw/block/fdc.c +++ b/hw/block/fdc.c @@ -469,7 +469,7 @@ static void fd_revalidate(FDrive *drv) } } -static void fd_change_cb(void *opaque, bool load) +static void fd_change_cb(void *opaque, bool load, Error **errp) { FDrive *drive = opaque; diff --git a/hw/ide/core.c b/hw/ide/core.c index cfa5de6ebf..db509b3e15 100644 --- a/hw/ide/core.c +++ b/hw/ide/core.c @@ -1120,7 +1120,7 @@ static void ide_cfata_metadata_write(IDEState *s) } /* called when the inserted state of the media has changed */ -static void ide_cd_change_cb(void *opaque, bool load) +static void ide_cd_change_cb(void *opaque, bool load, Error **errp) { IDEState *s = opaque; uint64_t nb_sectors; diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c index 546acc7b62..c1ccfad1ee 100644 --- a/hw/scsi/scsi-disk.c +++ b/hw/scsi/scsi-disk.c @@ -2240,7 +2240,7 @@ static void scsi_disk_resize_cb(void *opaque) } } -static void scsi_cd_change_media_cb(void *opaque, bool load) +static void scsi_cd_change_media_cb(void *opaque, bool load, Error **errp) { SCSIDiskState *s = opaque; diff --git a/hw/sd/sd.c b/hw/sd/sd.c index 8e88e8311a..8e31491089 100644 --- a/hw/sd/sd.c +++ b/hw/sd/sd.c @@ -458,7 +458,7 @@ static bool sd_get_readonly(SDState *sd) return sd->wp_switch; } -static void sd_cardchange(void *opaque, bool load) +static void sd_cardchange(void *opaque, bool load, Error **errp) { SDState *sd = opaque; DeviceState *dev = DEVICE(sd); diff --git a/include/block/block_int.h b/include/block/block_int.h index 63d54460e5..e00d0f4c8c 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -891,7 +891,7 @@ void bdrv_format_default_perms(BlockDriverState *bs, BdrvChild *c, uint64_t *nperm, uint64_t *nshared); const char *bdrv_get_parent_name(const BlockDriverState *bs); -void blk_dev_change_media_cb(BlockBackend *blk, bool load); +void blk_dev_change_media_cb(BlockBackend *blk, bool load, Error **errp); bool blk_dev_has_removable_media(BlockBackend *blk); bool blk_dev_has_tray(BlockBackend *blk); void blk_dev_eject_request(BlockBackend *blk, bool force); diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h index 08611136b7..b23f6830db 100644 --- a/include/sysemu/block-backend.h +++ b/include/sysemu/block-backend.h @@ -34,7 +34,7 @@ typedef struct BlockDevOps { * changes. Sure would be useful if it did. * Device models with removable media must implement this callback. */ - void (*change_media_cb)(void *opaque, bool load); + void (*change_media_cb)(void *opaque, bool load, Error **errp); /* * Runs when an eject request is issued from the monitor, the tray * is closed, and the medium is locked. From a17c17a274f24f0c0259f89d288f29b8ce0511aa Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Tue, 24 Jan 2017 13:43:31 +0100 Subject: [PATCH 20/46] hw/block: Request permissions This makes all device emulations with a qdev drive property request permissions on their BlockBackend. The only thing we block at this point is resizing images for some devices that can't support it. Signed-off-by: Kevin Wolf Acked-by: Fam Zheng Reviewed-by: Max Reitz --- hw/block/block.c | 22 +++++++++++++++++++++- hw/block/fdc.c | 25 +++++++++++++++++++++++-- hw/block/m25p80.c | 8 ++++++++ hw/block/nand.c | 7 +++++++ hw/block/nvme.c | 8 +++++++- hw/block/onenand.c | 7 +++++++ hw/block/pflash_cfi01.c | 18 ++++++++++++------ hw/block/pflash_cfi02.c | 19 +++++++++++++------ hw/block/virtio-blk.c | 8 +++++++- hw/core/qdev-properties-system.c | 1 - hw/ide/qdev.c | 8 ++++++-- hw/nvram/spapr_nvram.c | 8 ++++++++ hw/scsi/scsi-disk.c | 9 +++++++-- hw/sd/sd.c | 6 ++++++ hw/usb/dev-storage.c | 6 +++++- include/hw/block/block.h | 3 ++- tests/qemu-iotests/051.pc.out | 6 +++--- 17 files changed, 142 insertions(+), 27 deletions(-) diff --git a/hw/block/block.c b/hw/block/block.c index 8dc9d84a39..7059ba1420 100644 --- a/hw/block/block.c +++ b/hw/block/block.c @@ -51,11 +51,31 @@ void blkconf_blocksizes(BlockConf *conf) } } -void blkconf_apply_backend_options(BlockConf *conf) +void blkconf_apply_backend_options(BlockConf *conf, bool readonly, + bool resizable, Error **errp) { BlockBackend *blk = conf->blk; BlockdevOnError rerror, werror; + uint64_t perm, shared_perm; bool wce; + int ret; + + perm = BLK_PERM_CONSISTENT_READ; + if (!readonly) { + perm |= BLK_PERM_WRITE; + } + + /* TODO Remove BLK_PERM_WRITE unless explicitly configured so */ + shared_perm = BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED | + BLK_PERM_GRAPH_MOD | BLK_PERM_WRITE; + if (resizable) { + shared_perm |= BLK_PERM_RESIZE; + } + + ret = blk_set_perm(blk, perm, shared_perm, errp); + if (ret < 0) { + return; + } switch (conf->wce) { case ON_OFF_AUTO_ON: wce = true; break; diff --git a/hw/block/fdc.c b/hw/block/fdc.c index 5f6c496f7a..a328693d15 100644 --- a/hw/block/fdc.c +++ b/hw/block/fdc.c @@ -186,6 +186,7 @@ typedef enum FDiskFlags { struct FDrive { FDCtrl *fdctrl; BlockBackend *blk; + BlockConf *conf; /* Drive status */ FloppyDriveType drive; /* CMOS drive type */ uint8_t perpendicular; /* 2.88 MB access mode */ @@ -472,6 +473,19 @@ static void fd_revalidate(FDrive *drv) static void fd_change_cb(void *opaque, bool load, Error **errp) { FDrive *drive = opaque; + Error *local_err = NULL; + + if (!load) { + blk_set_perm(drive->blk, 0, BLK_PERM_ALL, &error_abort); + } else { + blkconf_apply_backend_options(drive->conf, + blk_is_read_only(drive->blk), false, + &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + } drive->media_changed = 1; drive->media_validated = false; @@ -508,6 +522,7 @@ static int floppy_drive_init(DeviceState *qdev) FloppyDrive *dev = FLOPPY_DRIVE(qdev); FloppyBus *bus = FLOPPY_BUS(qdev->parent_bus); FDrive *drive; + Error *local_err = NULL; int ret; if (dev->unit == -1) { @@ -533,7 +548,6 @@ static int floppy_drive_init(DeviceState *qdev) if (!dev->conf.blk) { /* Anonymous BlockBackend for an empty drive */ - /* FIXME Use real permissions */ dev->conf.blk = blk_new(0, BLK_PERM_ALL); ret = blk_attach_dev(dev->conf.blk, qdev); assert(ret == 0); @@ -552,7 +566,13 @@ static int floppy_drive_init(DeviceState *qdev) * blkconf_apply_backend_options(). */ dev->conf.rerror = BLOCKDEV_ON_ERROR_AUTO; dev->conf.werror = BLOCKDEV_ON_ERROR_AUTO; - blkconf_apply_backend_options(&dev->conf); + + blkconf_apply_backend_options(&dev->conf, blk_is_read_only(dev->conf.blk), + false, &local_err); + if (local_err) { + error_report_err(local_err); + return -1; + } /* 'enospc' is the default for -drive, 'report' is what blk_new() gives us * for empty drives. */ @@ -566,6 +586,7 @@ static int floppy_drive_init(DeviceState *qdev) return -1; } + drive->conf = &dev->conf; drive->blk = dev->conf.blk; drive->fdctrl = bus->fdc; diff --git a/hw/block/m25p80.c b/hw/block/m25p80.c index 2d6eb46a04..190573cefa 100644 --- a/hw/block/m25p80.c +++ b/hw/block/m25p80.c @@ -1215,6 +1215,7 @@ static void m25p80_realize(SSISlave *ss, Error **errp) { Flash *s = M25P80(ss); M25P80Class *mc = M25P80_GET_CLASS(s); + int ret; s->pi = mc->pi; @@ -1222,6 +1223,13 @@ static void m25p80_realize(SSISlave *ss, Error **errp) s->dirty_page = -1; if (s->blk) { + uint64_t perm = BLK_PERM_CONSISTENT_READ | + (blk_is_read_only(s->blk) ? 0 : BLK_PERM_WRITE); + ret = blk_set_perm(s->blk, perm, BLK_PERM_ALL, errp); + if (ret < 0) { + return; + } + DB_PRINT_L(0, "Binding to IF_MTD drive\n"); s->storage = blk_blockalign(s->blk, s->size); diff --git a/hw/block/nand.c b/hw/block/nand.c index c69e6755d9..0d33ac281f 100644 --- a/hw/block/nand.c +++ b/hw/block/nand.c @@ -373,6 +373,8 @@ static void nand_realize(DeviceState *dev, Error **errp) { int pagesize; NANDFlashState *s = NAND(dev); + int ret; + s->buswidth = nand_flash_ids[s->chip_id].width >> 3; s->size = nand_flash_ids[s->chip_id].size << 20; @@ -407,6 +409,11 @@ static void nand_realize(DeviceState *dev, Error **errp) error_setg(errp, "Can't use a read-only drive"); return; } + ret = blk_set_perm(s->blk, BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE, + BLK_PERM_ALL, errp); + if (ret < 0) { + return; + } if (blk_getlength(s->blk) >= (s->pages << s->page_shift) + (s->pages << s->oob_shift)) { pagesize = 0; diff --git a/hw/block/nvme.c b/hw/block/nvme.c index ae91a18f17..ae303d44e5 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -835,6 +835,7 @@ static int nvme_init(PCIDevice *pci_dev) int i; int64_t bs_size; uint8_t *pci_conf; + Error *local_err = NULL; if (!n->conf.blk) { return -1; @@ -850,7 +851,12 @@ static int nvme_init(PCIDevice *pci_dev) return -1; } blkconf_blocksizes(&n->conf); - blkconf_apply_backend_options(&n->conf); + blkconf_apply_backend_options(&n->conf, blk_is_read_only(n->conf.blk), + false, &local_err); + if (local_err) { + error_report_err(local_err); + return -1; + } pci_conf = pci_dev->config; pci_conf[PCI_INTERRUPT_PIN] = 1; diff --git a/hw/block/onenand.c b/hw/block/onenand.c index 8d8422739e..ddf5492426 100644 --- a/hw/block/onenand.c +++ b/hw/block/onenand.c @@ -778,6 +778,7 @@ static int onenand_initfn(SysBusDevice *sbd) OneNANDState *s = ONE_NAND(dev); uint32_t size = 1 << (24 + ((s->id.dev >> 4) & 7)); void *ram; + Error *local_err = NULL; s->base = (hwaddr)-1; s->rdy = NULL; @@ -796,6 +797,12 @@ static int onenand_initfn(SysBusDevice *sbd) error_report("Can't use a read-only drive"); return -1; } + blk_set_perm(s->blk, BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE, + BLK_PERM_ALL, &local_err); + if (local_err) { + error_report_err(local_err); + return -1; + } s->blk_cur = s->blk; } s->otp = memset(g_malloc((64 + 2) << PAGE_SHIFT), diff --git a/hw/block/pflash_cfi01.c b/hw/block/pflash_cfi01.c index 71b98a3eef..594d4cf6fe 100644 --- a/hw/block/pflash_cfi01.c +++ b/hw/block/pflash_cfi01.c @@ -757,6 +757,18 @@ static void pflash_cfi01_realize(DeviceState *dev, Error **errp) pfl->storage = memory_region_get_ram_ptr(&pfl->mem); sysbus_init_mmio(SYS_BUS_DEVICE(dev), &pfl->mem); + if (pfl->blk) { + uint64_t perm; + pfl->ro = blk_is_read_only(pfl->blk); + perm = BLK_PERM_CONSISTENT_READ | (pfl->ro ? 0 : BLK_PERM_WRITE); + ret = blk_set_perm(pfl->blk, perm, BLK_PERM_ALL, errp); + if (ret < 0) { + return; + } + } else { + pfl->ro = 0; + } + if (pfl->blk) { /* read the initial flash content */ ret = blk_pread(pfl->blk, 0, pfl->storage, total_len); @@ -768,12 +780,6 @@ static void pflash_cfi01_realize(DeviceState *dev, Error **errp) } } - if (pfl->blk) { - pfl->ro = blk_is_read_only(pfl->blk); - } else { - pfl->ro = 0; - } - /* Default to devices being used at their maximum device width. This was * assumed before the device_width support was added. */ diff --git a/hw/block/pflash_cfi02.c b/hw/block/pflash_cfi02.c index ef71322759..e6c5c6c25d 100644 --- a/hw/block/pflash_cfi02.c +++ b/hw/block/pflash_cfi02.c @@ -632,6 +632,19 @@ static void pflash_cfi02_realize(DeviceState *dev, Error **errp) vmstate_register_ram(&pfl->orig_mem, DEVICE(pfl)); pfl->storage = memory_region_get_ram_ptr(&pfl->orig_mem); pfl->chip_len = chip_len; + + if (pfl->blk) { + uint64_t perm; + pfl->ro = blk_is_read_only(pfl->blk); + perm = BLK_PERM_CONSISTENT_READ | (pfl->ro ? 0 : BLK_PERM_WRITE); + ret = blk_set_perm(pfl->blk, perm, BLK_PERM_ALL, errp); + if (ret < 0) { + return; + } + } else { + pfl->ro = 0; + } + if (pfl->blk) { /* read the initial flash content */ ret = blk_pread(pfl->blk, 0, pfl->storage, chip_len); @@ -646,12 +659,6 @@ static void pflash_cfi02_realize(DeviceState *dev, Error **errp) pfl->rom_mode = 1; sysbus_init_mmio(SYS_BUS_DEVICE(dev), &pfl->mem); - if (pfl->blk) { - pfl->ro = blk_is_read_only(pfl->blk); - } else { - pfl->ro = 0; - } - pfl->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, pflash_timer, pfl); pfl->wcycle = 0; pfl->cmd = 0; diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c index 843bd2fa73..98c16a7a9a 100644 --- a/hw/block/virtio-blk.c +++ b/hw/block/virtio-blk.c @@ -928,7 +928,13 @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp) } blkconf_serial(&conf->conf, &conf->serial); - blkconf_apply_backend_options(&conf->conf); + blkconf_apply_backend_options(&conf->conf, + blk_is_read_only(conf->conf.blk), true, + &err); + if (err) { + error_propagate(errp, err); + return; + } s->original_wce = blk_enable_write_cache(conf->conf.blk); blkconf_geometry(&conf->conf, NULL, 65535, 255, 255, &err); if (err) { diff --git a/hw/core/qdev-properties-system.c b/hw/core/qdev-properties-system.c index 66ba367a1d..c34be1c1ba 100644 --- a/hw/core/qdev-properties-system.c +++ b/hw/core/qdev-properties-system.c @@ -79,7 +79,6 @@ static void parse_drive(DeviceState *dev, const char *str, void **ptr, if (!blk) { BlockDriverState *bs = bdrv_lookup_bs(NULL, str, NULL); if (bs) { - /* FIXME Use real permissions */ blk = blk_new(0, BLK_PERM_ALL); blk_created = true; diff --git a/hw/ide/qdev.c b/hw/ide/qdev.c index bb3c377800..4383cd111d 100644 --- a/hw/ide/qdev.c +++ b/hw/ide/qdev.c @@ -170,7 +170,6 @@ static int ide_dev_initfn(IDEDevice *dev, IDEDriveKind kind) return -1; } else { /* Anonymous BlockBackend for an empty drive */ - /* FIXME Use real permissions */ dev->conf.blk = blk_new(0, BLK_PERM_ALL); } } @@ -197,7 +196,12 @@ static int ide_dev_initfn(IDEDevice *dev, IDEDriveKind kind) return -1; } } - blkconf_apply_backend_options(&dev->conf); + blkconf_apply_backend_options(&dev->conf, kind == IDE_CD, kind != IDE_CD, + &err); + if (err) { + error_report_err(err); + return -1; + } if (ide_init_drive(s, dev->conf.blk, kind, dev->version, dev->serial, dev->model, dev->wwn, diff --git a/hw/nvram/spapr_nvram.c b/hw/nvram/spapr_nvram.c index 65ba188555..aa5d2c1f5f 100644 --- a/hw/nvram/spapr_nvram.c +++ b/hw/nvram/spapr_nvram.c @@ -141,9 +141,17 @@ static void rtas_nvram_store(PowerPCCPU *cpu, sPAPRMachineState *spapr, static void spapr_nvram_realize(VIOsPAPRDevice *dev, Error **errp) { sPAPRNVRAM *nvram = VIO_SPAPR_NVRAM(dev); + int ret; if (nvram->blk) { nvram->size = blk_getlength(nvram->blk); + + ret = blk_set_perm(nvram->blk, + BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE, + BLK_PERM_ALL, errp); + if (ret < 0) { + return; + } } else { nvram->size = DEFAULT_NVRAM_SIZE; } diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c index c1ccfad1ee..a53f058621 100644 --- a/hw/scsi/scsi-disk.c +++ b/hw/scsi/scsi-disk.c @@ -2328,7 +2328,13 @@ static void scsi_realize(SCSIDevice *dev, Error **errp) return; } } - blkconf_apply_backend_options(&dev->conf); + blkconf_apply_backend_options(&dev->conf, + blk_is_read_only(s->qdev.conf.blk), + dev->type == TYPE_DISK, &err); + if (err) { + error_propagate(errp, err); + return; + } if (s->qdev.conf.discard_granularity == -1) { s->qdev.conf.discard_granularity = @@ -2380,7 +2386,6 @@ static void scsi_cd_realize(SCSIDevice *dev, Error **errp) SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, dev); if (!dev->conf.blk) { - /* FIXME Use real permissions */ dev->conf.blk = blk_new(0, BLK_PERM_ALL); } diff --git a/hw/sd/sd.c b/hw/sd/sd.c index 8e31491089..ba47bff4db 100644 --- a/hw/sd/sd.c +++ b/hw/sd/sd.c @@ -1887,6 +1887,7 @@ static void sd_instance_finalize(Object *obj) static void sd_realize(DeviceState *dev, Error **errp) { SDState *sd = SD_CARD(dev); + int ret; if (sd->blk && blk_is_read_only(sd->blk)) { error_setg(errp, "Cannot use read-only drive as SD card"); @@ -1894,6 +1895,11 @@ static void sd_realize(DeviceState *dev, Error **errp) } if (sd->blk) { + ret = blk_set_perm(sd->blk, BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE, + BLK_PERM_ALL, errp); + if (ret < 0) { + return; + } blk_set_dev_ops(sd->blk, &sd_block_ops, sd); } } diff --git a/hw/usb/dev-storage.c b/hw/usb/dev-storage.c index c607f7606d..a71b354fa6 100644 --- a/hw/usb/dev-storage.c +++ b/hw/usb/dev-storage.c @@ -603,7 +603,11 @@ static void usb_msd_realize_storage(USBDevice *dev, Error **errp) blkconf_serial(&s->conf, &dev->serial); blkconf_blocksizes(&s->conf); - blkconf_apply_backend_options(&s->conf); + blkconf_apply_backend_options(&s->conf, blk_is_read_only(blk), true, &err); + if (err) { + error_propagate(errp, err); + return; + } /* * Hack alert: this pretends to be a block device, but it's really diff --git a/include/hw/block/block.h b/include/hw/block/block.h index df9d207d81..5d462eb6e4 100644 --- a/include/hw/block/block.h +++ b/include/hw/block/block.h @@ -73,7 +73,8 @@ void blkconf_geometry(BlockConf *conf, int *trans, unsigned cyls_max, unsigned heads_max, unsigned secs_max, Error **errp); void blkconf_blocksizes(BlockConf *conf); -void blkconf_apply_backend_options(BlockConf *conf); +void blkconf_apply_backend_options(BlockConf *conf, bool readonly, + bool resizable, Error **errp); /* Hard disk geometry */ diff --git a/tests/qemu-iotests/051.pc.out b/tests/qemu-iotests/051.pc.out index e206ad6c29..c6f4eef215 100644 --- a/tests/qemu-iotests/051.pc.out +++ b/tests/qemu-iotests/051.pc.out @@ -179,7 +179,7 @@ qququiquit Testing: -drive file=TEST_DIR/t.qcow2,if=ide,readonly=on QEMU X.Y.Z monitor - type 'help' for more information -(qemu) QEMU_PROG: Can't use a read-only drive +(qemu) QEMU_PROG: Block node is read-only QEMU_PROG: Initialization of device ide-hd failed: Device initialization failed. Testing: -drive file=TEST_DIR/t.qcow2,if=scsi,readonly=on @@ -201,12 +201,12 @@ QEMU X.Y.Z monitor - type 'help' for more information Testing: -drive file=TEST_DIR/t.qcow2,if=none,id=disk,readonly=on -device ide-drive,drive=disk QEMU X.Y.Z monitor - type 'help' for more information -(qemu) QEMU_PROG: -device ide-drive,drive=disk: Can't use a read-only drive +(qemu) QEMU_PROG: -device ide-drive,drive=disk: Block node is read-only QEMU_PROG: -device ide-drive,drive=disk: Device initialization failed. Testing: -drive file=TEST_DIR/t.qcow2,if=none,id=disk,readonly=on -device ide-hd,drive=disk QEMU X.Y.Z monitor - type 'help' for more information -(qemu) QEMU_PROG: -device ide-hd,drive=disk: Can't use a read-only drive +(qemu) QEMU_PROG: -device ide-hd,drive=disk: Block node is read-only QEMU_PROG: -device ide-hd,drive=disk: Device initialization failed. Testing: -drive file=TEST_DIR/t.qcow2,if=none,id=disk,readonly=on -device lsi53c895a -device scsi-disk,drive=disk From dabd18f64c8800d441fd9fb232c2102e8409aa2e Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Tue, 24 Jan 2017 13:58:00 +0100 Subject: [PATCH 21/46] hw/block: Introduce share-rw qdev property By default, don't allow another writer for block devices that are attached to a guest device. For the cases where this setup is intended (e.g. using a cluster filesystem on the disk), the new option can be used to allow it. This change affects only devices using DEFINE_BLOCK_PROPERTIES(). Devices directly using DEFINE_PROP_DRIVE() still accept writers unconditionally. Signed-off-by: Kevin Wolf Reviewed-by: Max Reitz Acked-by: Fam Zheng --- hw/block/block.c | 6 +++-- include/hw/block/block.h | 5 +++- tests/qemu-iotests/172.out | 53 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 61 insertions(+), 3 deletions(-) diff --git a/hw/block/block.c b/hw/block/block.c index 7059ba1420..27878d0087 100644 --- a/hw/block/block.c +++ b/hw/block/block.c @@ -65,12 +65,14 @@ void blkconf_apply_backend_options(BlockConf *conf, bool readonly, perm |= BLK_PERM_WRITE; } - /* TODO Remove BLK_PERM_WRITE unless explicitly configured so */ shared_perm = BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED | - BLK_PERM_GRAPH_MOD | BLK_PERM_WRITE; + BLK_PERM_GRAPH_MOD; if (resizable) { shared_perm |= BLK_PERM_RESIZE; } + if (conf->share_rw) { + shared_perm |= BLK_PERM_WRITE; + } ret = blk_set_perm(blk, perm, shared_perm, errp); if (ret < 0) { diff --git a/include/hw/block/block.h b/include/hw/block/block.h index 5d462eb6e4..f3f6e8ef02 100644 --- a/include/hw/block/block.h +++ b/include/hw/block/block.h @@ -26,6 +26,7 @@ typedef struct BlockConf { /* geometry, not all devices use this */ uint32_t cyls, heads, secs; OnOffAuto wce; + bool share_rw; BlockdevOnError rerror; BlockdevOnError werror; } BlockConf; @@ -53,7 +54,9 @@ static inline unsigned int get_physical_block_exp(BlockConf *conf) DEFINE_PROP_UINT32("opt_io_size", _state, _conf.opt_io_size, 0), \ DEFINE_PROP_UINT32("discard_granularity", _state, \ _conf.discard_granularity, -1), \ - DEFINE_PROP_ON_OFF_AUTO("write-cache", _state, _conf.wce, ON_OFF_AUTO_AUTO) + DEFINE_PROP_ON_OFF_AUTO("write-cache", _state, _conf.wce, \ + ON_OFF_AUTO_AUTO), \ + DEFINE_PROP_BOOL("share-rw", _state, _conf.share_rw, false) #define DEFINE_BLOCK_CHS_PROPERTIES(_state, _conf) \ DEFINE_PROP_UINT32("cyls", _state, _conf.cyls, 0), \ diff --git a/tests/qemu-iotests/172.out b/tests/qemu-iotests/172.out index 6b7edaf28f..54b53293d7 100644 --- a/tests/qemu-iotests/172.out +++ b/tests/qemu-iotests/172.out @@ -28,6 +28,7 @@ Testing: opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "288" @@ -57,6 +58,7 @@ Testing: -fda TEST_DIR/t.qcow2 opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" Testing: -fdb TEST_DIR/t.qcow2 @@ -83,6 +85,7 @@ Testing: -fdb TEST_DIR/t.qcow2 opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" dev: floppy, id "" unit = 0 (0x0) @@ -93,6 +96,7 @@ Testing: -fdb TEST_DIR/t.qcow2 opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "288" Testing: -fda TEST_DIR/t.qcow2 -fdb TEST_DIR/t.qcow2 @@ -119,6 +123,7 @@ Testing: -fda TEST_DIR/t.qcow2 -fdb TEST_DIR/t.qcow2 opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" dev: floppy, id "" unit = 0 (0x0) @@ -129,6 +134,7 @@ Testing: -fda TEST_DIR/t.qcow2 -fdb TEST_DIR/t.qcow2 opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" @@ -158,6 +164,7 @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" Testing: -drive if=floppy,file=TEST_DIR/t.qcow2,index=1 @@ -184,6 +191,7 @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2,index=1 opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" dev: floppy, id "" unit = 0 (0x0) @@ -194,6 +202,7 @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2,index=1 opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "288" Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=floppy,file=TEST_DIR/t.qcow2,index=1 @@ -220,6 +229,7 @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=floppy,file=TEST_DIR/t opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" dev: floppy, id "" unit = 0 (0x0) @@ -230,6 +240,7 @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=floppy,file=TEST_DIR/t opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" @@ -259,6 +270,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveA=none0 opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" Testing: -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveB=none0 @@ -285,6 +297,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveB=none0 opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveA=none0 -global isa-fdc.driveB=none1 @@ -311,6 +324,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" dev: floppy, id "" unit = 0 (0x0) @@ -321,6 +335,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" @@ -350,6 +365,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0 opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,unit=1 @@ -376,6 +392,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,unit=1 opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0 -device floppy,drive=none1,unit=1 @@ -402,6 +419,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" dev: floppy, id "" unit = 0 (0x0) @@ -412,6 +430,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" @@ -441,6 +460,7 @@ Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa- opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" dev: floppy, id "" unit = 0 (0x0) @@ -451,6 +471,7 @@ Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa- opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveA=none0 @@ -477,6 +498,7 @@ Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa- opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" dev: floppy, id "" unit = 0 (0x0) @@ -487,6 +509,7 @@ Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa- opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveA=none0 @@ -513,6 +536,7 @@ Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa- opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveB=none0 @@ -539,6 +563,7 @@ Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa- opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" @@ -568,6 +593,7 @@ Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device flop opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" dev: floppy, id "" unit = 0 (0x0) @@ -578,6 +604,7 @@ Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device flop opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,unit=1 @@ -604,6 +631,7 @@ Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device flop opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" dev: floppy, id "" unit = 0 (0x0) @@ -614,6 +642,7 @@ Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device flop opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0 @@ -640,6 +669,7 @@ Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device flop opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" dev: floppy, id "" unit = 1 (0x1) @@ -650,6 +680,7 @@ Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device flop opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,unit=0 @@ -676,6 +707,7 @@ Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device flop opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" dev: floppy, id "" unit = 1 (0x1) @@ -686,6 +718,7 @@ Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device flop opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,unit=0 @@ -723,6 +756,7 @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.q opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" dev: floppy, id "" unit = 0 (0x0) @@ -733,6 +767,7 @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.q opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,unit=1 @@ -759,6 +794,7 @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.q opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" dev: floppy, id "" unit = 0 (0x0) @@ -769,6 +805,7 @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.q opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,unit=0 @@ -802,6 +839,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" dev: floppy, id "" unit = 0 (0x0) @@ -812,6 +850,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveA=none0 -device floppy,drive=none1,unit=1 @@ -838,6 +877,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" dev: floppy, id "" unit = 0 (0x0) @@ -848,6 +888,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveB=none0 -device floppy,drive=none1 @@ -874,6 +915,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" dev: floppy, id "" unit = 1 (0x1) @@ -884,6 +926,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveB=none0 -device floppy,drive=none1,unit=0 @@ -910,6 +953,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" dev: floppy, id "" unit = 1 (0x1) @@ -920,6 +964,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveA=none0 -device floppy,drive=none1,unit=0 @@ -964,6 +1009,7 @@ Testing: -device floppy opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "288" Testing: -device floppy,drive-type=120 @@ -990,6 +1036,7 @@ Testing: -device floppy,drive-type=120 opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "120" Testing: -device floppy,drive-type=144 @@ -1016,6 +1063,7 @@ Testing: -device floppy,drive-type=144 opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" Testing: -device floppy,drive-type=288 @@ -1042,6 +1090,7 @@ Testing: -device floppy,drive-type=288 opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "288" @@ -1071,6 +1120,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,drive-t opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "120" Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,drive-type=288 @@ -1097,6 +1147,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,drive-t opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "288" @@ -1126,6 +1177,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,logical opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,physical_block_size=512 @@ -1152,6 +1204,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,physica opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,logical_block_size=4096 From c6cc12bfa7bb9c61f4fa20491258b9bebc5b4771 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Mon, 16 Jan 2017 17:18:09 +0100 Subject: [PATCH 22/46] blockjob: Add permissions to block_job_create() This functions creates a BlockBackend internally, so the block jobs need to tell it what they want to do with the BB. Signed-off-by: Kevin Wolf Reviewed-by: Max Reitz Acked-by: Fam Zheng --- block/backup.c | 5 +++-- block/commit.c | 5 +++-- block/mirror.c | 5 +++-- block/stream.c | 5 +++-- blockjob.c | 6 +++--- include/block/blockjob_int.h | 4 +++- tests/test-blockjob-txn.c | 6 +++--- tests/test-blockjob.c | 5 +++-- 8 files changed, 24 insertions(+), 17 deletions(-) diff --git a/block/backup.c b/block/backup.c index f38d1d030e..c7596840e4 100644 --- a/block/backup.c +++ b/block/backup.c @@ -618,8 +618,9 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs, goto error; } - job = block_job_create(job_id, &backup_job_driver, bs, speed, - creation_flags, cb, opaque, errp); + /* FIXME Use real permissions */ + job = block_job_create(job_id, &backup_job_driver, bs, 0, BLK_PERM_ALL, + speed, creation_flags, cb, opaque, errp); if (!job) { goto error; } diff --git a/block/commit.c b/block/commit.c index 2ad8138aac..60d29a9c0f 100644 --- a/block/commit.c +++ b/block/commit.c @@ -235,8 +235,9 @@ void commit_start(const char *job_id, BlockDriverState *bs, return; } - s = block_job_create(job_id, &commit_job_driver, bs, speed, - BLOCK_JOB_DEFAULT, NULL, NULL, errp); + /* FIXME Use real permissions */ + s = block_job_create(job_id, &commit_job_driver, bs, 0, BLK_PERM_ALL, + speed, BLOCK_JOB_DEFAULT, NULL, NULL, errp); if (!s) { return; } diff --git a/block/mirror.c b/block/mirror.c index 063925a1f0..18128e6163 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -1015,8 +1015,9 @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs, buf_size = DEFAULT_MIRROR_BUF_SIZE; } - s = block_job_create(job_id, driver, bs, speed, creation_flags, - cb, opaque, errp); + /* FIXME Use real permissions */ + s = block_job_create(job_id, driver, bs, 0, BLK_PERM_ALL, speed, + creation_flags, cb, opaque, errp); if (!s) { return; } diff --git a/block/stream.c b/block/stream.c index 1523ba7dfb..7f49279b38 100644 --- a/block/stream.c +++ b/block/stream.c @@ -229,8 +229,9 @@ void stream_start(const char *job_id, BlockDriverState *bs, BlockDriverState *iter; int orig_bs_flags; - s = block_job_create(job_id, &stream_job_driver, bs, speed, - BLOCK_JOB_DEFAULT, NULL, NULL, errp); + /* FIXME Use real permissions */ + s = block_job_create(job_id, &stream_job_driver, bs, 0, BLK_PERM_ALL, + speed, BLOCK_JOB_DEFAULT, NULL, NULL, errp); if (!s) { return; } diff --git a/blockjob.c b/blockjob.c index 72b7d4c3f2..27833c7c0d 100644 --- a/blockjob.c +++ b/blockjob.c @@ -123,7 +123,8 @@ void block_job_add_bdrv(BlockJob *job, BlockDriverState *bs) } void *block_job_create(const char *job_id, const BlockJobDriver *driver, - BlockDriverState *bs, int64_t speed, int flags, + BlockDriverState *bs, uint64_t perm, + uint64_t shared_perm, int64_t speed, int flags, BlockCompletionFunc *cb, void *opaque, Error **errp) { BlockBackend *blk; @@ -160,8 +161,7 @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver, } } - /* FIXME Use real permissions */ - blk = blk_new(0, BLK_PERM_ALL); + blk = blk_new(perm, shared_perm); ret = blk_insert_bs(blk, bs, errp); if (ret < 0) { blk_unref(blk); diff --git a/include/block/blockjob_int.h b/include/block/blockjob_int.h index 82238229c6..3f86cc5acc 100644 --- a/include/block/blockjob_int.h +++ b/include/block/blockjob_int.h @@ -119,6 +119,7 @@ struct BlockJobDriver { * generated automatically. * @job_type: The class object for the newly-created job. * @bs: The block + * @perm, @shared_perm: Permissions to request for @bs * @speed: The maximum speed, in bytes per second, or 0 for unlimited. * @cb: Completion function for the job. * @opaque: Opaque pointer value passed to @cb. @@ -134,7 +135,8 @@ struct BlockJobDriver { * called from a wrapper that is specific to the job type. */ void *block_job_create(const char *job_id, const BlockJobDriver *driver, - BlockDriverState *bs, int64_t speed, int flags, + BlockDriverState *bs, uint64_t perm, + uint64_t shared_perm, int64_t speed, int flags, BlockCompletionFunc *cb, void *opaque, Error **errp); /** diff --git a/tests/test-blockjob-txn.c b/tests/test-blockjob-txn.c index f6dfd08746..4ccbda14af 100644 --- a/tests/test-blockjob-txn.c +++ b/tests/test-blockjob-txn.c @@ -101,9 +101,9 @@ static BlockJob *test_block_job_start(unsigned int iterations, g_assert_nonnull(bs); snprintf(job_id, sizeof(job_id), "job%u", counter++); - s = block_job_create(job_id, &test_block_job_driver, bs, 0, - BLOCK_JOB_DEFAULT, test_block_job_cb, - data, &error_abort); + s = block_job_create(job_id, &test_block_job_driver, bs, + 0, BLK_PERM_ALL, 0, BLOCK_JOB_DEFAULT, + test_block_job_cb, data, &error_abort); s->iterations = iterations; s->use_timer = use_timer; s->rc = rc; diff --git a/tests/test-blockjob.c b/tests/test-blockjob.c index 143ce96fa1..1afe17b449 100644 --- a/tests/test-blockjob.c +++ b/tests/test-blockjob.c @@ -30,8 +30,9 @@ static BlockJob *do_test_id(BlockBackend *blk, const char *id, BlockJob *job; Error *errp = NULL; - job = block_job_create(id, &test_block_job_driver, blk_bs(blk), 0, - BLOCK_JOB_DEFAULT, block_job_cb, NULL, &errp); + job = block_job_create(id, &test_block_job_driver, blk_bs(blk), + 0, BLK_PERM_ALL, 0, BLOCK_JOB_DEFAULT, block_job_cb, + NULL, &errp); if (should_succeed) { g_assert_null(errp); g_assert_nonnull(job); From b5411555877121b74cbe0a12a867d16d861746b1 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Tue, 17 Jan 2017 15:56:16 +0100 Subject: [PATCH 23/46] block: Add BdrvChildRole.get_parent_desc() For meaningful error messages in the permission system, we need to get some human-readable description of the parent of a BdrvChild. Signed-off-by: Kevin Wolf Reviewed-by: Max Reitz Acked-by: Fam Zheng --- block.c | 9 +++++++++ block/block-backend.c | 21 +++++++++++++++++++++ include/block/block_int.h | 6 ++++++ 3 files changed, 36 insertions(+) diff --git a/block.c b/block.c index 5f2dd6fa47..e03cc5da42 100644 --- a/block.c +++ b/block.c @@ -707,6 +707,12 @@ int bdrv_parse_cache_mode(const char *mode, int *flags, bool *writethrough) return 0; } +static char *bdrv_child_get_parent_desc(BdrvChild *c) +{ + BlockDriverState *parent = c->opaque; + return g_strdup(bdrv_get_device_or_node_name(parent)); +} + static void bdrv_child_cb_drained_begin(BdrvChild *child) { BlockDriverState *bs = child->opaque; @@ -774,6 +780,7 @@ static void bdrv_inherited_options(int *child_flags, QDict *child_options, } const BdrvChildRole child_file = { + .get_parent_desc = bdrv_child_get_parent_desc, .inherit_options = bdrv_inherited_options, .drained_begin = bdrv_child_cb_drained_begin, .drained_end = bdrv_child_cb_drained_end, @@ -794,6 +801,7 @@ static void bdrv_inherited_fmt_options(int *child_flags, QDict *child_options, } const BdrvChildRole child_format = { + .get_parent_desc = bdrv_child_get_parent_desc, .inherit_options = bdrv_inherited_fmt_options, .drained_begin = bdrv_child_cb_drained_begin, .drained_end = bdrv_child_cb_drained_end, @@ -824,6 +832,7 @@ static void bdrv_backing_options(int *child_flags, QDict *child_options, } const BdrvChildRole child_backing = { + .get_parent_desc = bdrv_child_get_parent_desc, .inherit_options = bdrv_backing_options, .drained_begin = bdrv_child_cb_drained_begin, .drained_end = bdrv_child_cb_drained_end, diff --git a/block/block-backend.c b/block/block-backend.c index fcc42b591e..38a3858e96 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -80,6 +80,7 @@ static const AIOCBInfo block_backend_aiocb_info = { static void drive_info_del(DriveInfo *dinfo); static BlockBackend *bdrv_first_blk(BlockDriverState *bs); +static char *blk_get_attached_dev_id(BlockBackend *blk); /* All BlockBackends */ static QTAILQ_HEAD(, BlockBackend) block_backends = @@ -102,6 +103,25 @@ static void blk_root_drained_end(BdrvChild *child); static void blk_root_change_media(BdrvChild *child, bool load); static void blk_root_resize(BdrvChild *child); +static char *blk_root_get_parent_desc(BdrvChild *child) +{ + BlockBackend *blk = child->opaque; + char *dev_id; + + if (blk->name) { + return g_strdup(blk->name); + } + + dev_id = blk_get_attached_dev_id(blk); + if (*dev_id) { + return dev_id; + } else { + /* TODO Callback into the BB owner for something more detailed */ + g_free(dev_id); + return g_strdup("a block device"); + } +} + static const char *blk_root_get_name(BdrvChild *child) { return blk_name(child->opaque); @@ -113,6 +133,7 @@ static const BdrvChildRole child_root = { .change_media = blk_root_change_media, .resize = blk_root_resize, .get_name = blk_root_get_name, + .get_parent_desc = blk_root_get_parent_desc, .drained_begin = blk_root_drained_begin, .drained_end = blk_root_drained_end, diff --git a/include/block/block_int.h b/include/block/block_int.h index e00d0f4c8c..3177b9f496 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -452,6 +452,12 @@ struct BdrvChildRole { * name), or NULL if the parent can't provide a better name. */ const char* (*get_name)(BdrvChild *child); + /* Returns a malloced string that describes the parent of the child for a + * human reader. This could be a node-name, BlockBackend name, qdev ID or + * QOM path of the device owning the BlockBackend, job type and ID etc. The + * caller is responsible for freeing the memory. */ + char* (*get_parent_desc)(BdrvChild *child); + /* * If this pair of functions is implemented, the parent doesn't issue new * requests after returning from .drained_begin() until .drained_end() is From d083319fe007e100b38995d0ea254845c8efa433 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Mon, 16 Jan 2017 18:26:20 +0100 Subject: [PATCH 24/46] block: Include details on permission errors in message Instead of just telling that there was some conflict, we can be specific and tell which permissions were in conflict and which way the conflict is. Signed-off-by: Kevin Wolf Acked-by: Fam Zheng Reviewed-by: Max Reitz --- block.c | 67 +++++++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 56 insertions(+), 11 deletions(-) diff --git a/block.c b/block.c index e03cc5da42..f72a67f230 100644 --- a/block.c +++ b/block.c @@ -1462,6 +1462,43 @@ static void bdrv_get_cumulative_perm(BlockDriverState *bs, uint64_t *perm, *shared_perm = cumulative_shared_perms; } +static char *bdrv_child_user_desc(BdrvChild *c) +{ + if (c->role->get_parent_desc) { + return c->role->get_parent_desc(c); + } + + return g_strdup("another user"); +} + +static char *bdrv_perm_names(uint64_t perm) +{ + struct perm_name { + uint64_t perm; + const char *name; + } permissions[] = { + { BLK_PERM_CONSISTENT_READ, "consistent read" }, + { BLK_PERM_WRITE, "write" }, + { BLK_PERM_WRITE_UNCHANGED, "write unchanged" }, + { BLK_PERM_RESIZE, "resize" }, + { BLK_PERM_GRAPH_MOD, "change children" }, + { 0, NULL } + }; + + char *result = g_strdup(""); + struct perm_name *p; + + for (p = permissions; p->name; p++) { + if (perm & p->perm) { + char *old = result; + result = g_strdup_printf("%s%s%s", old, *old ? ", " : "", p->name); + g_free(old); + } + } + + return result; +} + /* * Checks whether a new reference to @bs can be added if the new user requires * @new_used_perm/@new_shared_perm as its permissions. If @ignore_child is set, @@ -1486,17 +1523,25 @@ static int bdrv_check_update_perm(BlockDriverState *bs, uint64_t new_used_perm, continue; } - if ((new_used_perm & c->shared_perm) != new_used_perm || - (c->perm & new_shared_perm) != c->perm) - { - const char *user = NULL; - if (c->role->get_name) { - user = c->role->get_name(c); - if (user && !*user) { - user = NULL; - } - } - error_setg(errp, "Conflicts with %s", user ?: "another operation"); + if ((new_used_perm & c->shared_perm) != new_used_perm) { + char *user = bdrv_child_user_desc(c); + char *perm_names = bdrv_perm_names(new_used_perm & ~c->shared_perm); + error_setg(errp, "Conflicts with use by %s as '%s', which does not " + "allow '%s' on %s", + user, c->name, perm_names, bdrv_get_node_name(c->bs)); + g_free(user); + g_free(perm_names); + return -EPERM; + } + + if ((c->perm & new_shared_perm) != c->perm) { + char *user = bdrv_child_user_desc(c); + char *perm_names = bdrv_perm_names(c->perm & ~new_shared_perm); + error_setg(errp, "Conflicts with use by %s as '%s', which uses " + "'%s' on %s", + user, c->name, perm_names, bdrv_get_node_name(c->bs)); + g_free(user); + g_free(perm_names); return -EPERM; } From 26de9438c1b6013532fb95de0720e2696588332f Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Tue, 17 Jan 2017 13:39:34 +0100 Subject: [PATCH 25/46] block: Add BdrvChildRole.stay_at_node When the parents' child links are updated in bdrv_append() or bdrv_replace_in_backing_chain(), this should affect all child links of BlockBackends or other nodes, but not on child links held for other purposes (like for setting permissions). This patch allows to control the behaviour per BdrvChildRole. Signed-off-by: Kevin Wolf Reviewed-by: Max Reitz Acked-by: Fam Zheng --- block.c | 3 +++ include/block/block_int.h | 4 ++++ 2 files changed, 7 insertions(+) diff --git a/block.c b/block.c index f72a67f230..9e538a5d41 100644 --- a/block.c +++ b/block.c @@ -2853,6 +2853,9 @@ static void change_parent_backing_link(BlockDriverState *from, BdrvChild *c, *next, *to_c; QLIST_FOREACH_SAFE(c, &from->parents, next_parent, next) { + if (c->role->stay_at_node) { + continue; + } if (c->role == &child_backing) { /* @from is generally not allowed to be a backing file, except for * when @to is the overlay. In that case, @from may not be replaced diff --git a/include/block/block_int.h b/include/block/block_int.h index 3177b9f496..a0d9328b59 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -441,6 +441,10 @@ typedef struct BdrvAioNotifier { } BdrvAioNotifier; struct BdrvChildRole { + /* If true, bdrv_replace_in_backing_chain() doesn't change the node this + * BdrvChild points to. */ + bool stay_at_node; + void (*inherit_options)(int *child_flags, QDict *child_options, int parent_flags, QDict *parent_options); From 76d554e20bd0a965ac22d6155a129be12fac2667 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Tue, 17 Jan 2017 11:56:42 +0100 Subject: [PATCH 26/46] blockjob: Add permissions to block_job_add_bdrv() Block jobs don't actually do I/O through the the reference they create with block_job_add_bdrv(), but they might want to use the permisssion system to express what the block job does to intermediate nodes. This adds permissions to block_job_add_bdrv() to provide the means to request permissions. Signed-off-by: Kevin Wolf Reviewed-by: Max Reitz Acked-by: Fam Zheng --- block/backup.c | 4 +++- block/commit.c | 8 ++++++-- block/mirror.c | 9 +++++++-- block/stream.c | 4 +++- blockjob.c | 36 ++++++++++++++++++++++++++++++------ include/block/blockjob.h | 5 ++++- 6 files changed, 53 insertions(+), 13 deletions(-) diff --git a/block/backup.c b/block/backup.c index c7596840e4..405f271395 100644 --- a/block/backup.c +++ b/block/backup.c @@ -657,7 +657,9 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs, job->cluster_size = MAX(BACKUP_CLUSTER_SIZE_DEFAULT, bdi.cluster_size); } - block_job_add_bdrv(&job->common, target); + /* FIXME Use real permissions */ + block_job_add_bdrv(&job->common, "target", target, 0, BLK_PERM_ALL, + &error_abort); job->common.len = len; block_job_txn_add_job(txn, &job->common); diff --git a/block/commit.c b/block/commit.c index 60d29a9c0f..b69586ff7b 100644 --- a/block/commit.c +++ b/block/commit.c @@ -267,13 +267,17 @@ void commit_start(const char *job_id, BlockDriverState *bs, * disappear from the chain after this operation. */ assert(bdrv_chain_contains(top, base)); for (iter = top; iter != backing_bs(base); iter = backing_bs(iter)) { - block_job_add_bdrv(&s->common, iter); + /* FIXME Use real permissions */ + block_job_add_bdrv(&s->common, "intermediate node", iter, 0, + BLK_PERM_ALL, &error_abort); } /* overlay_bs must be blocked because it needs to be modified to * update the backing image string, but if it's the root node then * don't block it again */ if (bs != overlay_bs) { - block_job_add_bdrv(&s->common, overlay_bs); + /* FIXME Use real permissions */ + block_job_add_bdrv(&s->common, "overlay of top", overlay_bs, 0, + BLK_PERM_ALL, &error_abort); } /* FIXME Use real permissions */ diff --git a/block/mirror.c b/block/mirror.c index 18128e6163..4d325f1811 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -1052,13 +1052,18 @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs, return; } - block_job_add_bdrv(&s->common, target); + /* FIXME Use real permissions */ + block_job_add_bdrv(&s->common, "target", target, 0, BLK_PERM_ALL, + &error_abort); + /* In commit_active_start() all intermediate nodes disappear, so * any jobs in them must be blocked */ if (bdrv_chain_contains(bs, target)) { BlockDriverState *iter; for (iter = backing_bs(bs); iter != target; iter = backing_bs(iter)) { - block_job_add_bdrv(&s->common, iter); + /* FIXME Use real permissions */ + block_job_add_bdrv(&s->common, "intermediate node", iter, 0, + BLK_PERM_ALL, &error_abort); } } diff --git a/block/stream.c b/block/stream.c index 7f49279b38..ba8650f0a4 100644 --- a/block/stream.c +++ b/block/stream.c @@ -248,7 +248,9 @@ void stream_start(const char *job_id, BlockDriverState *bs, /* Block all intermediate nodes between bs and base, because they * will disappear from the chain after this operation */ for (iter = backing_bs(bs); iter && iter != base; iter = backing_bs(iter)) { - block_job_add_bdrv(&s->common, iter); + /* FIXME Use real permissions */ + block_job_add_bdrv(&s->common, "intermediate node", iter, 0, + BLK_PERM_ALL, &error_abort); } s->base = base; diff --git a/blockjob.c b/blockjob.c index 27833c7c0d..4216cdeebf 100644 --- a/blockjob.c +++ b/blockjob.c @@ -55,6 +55,19 @@ struct BlockJobTxn { static QLIST_HEAD(, BlockJob) block_jobs = QLIST_HEAD_INITIALIZER(block_jobs); +static char *child_job_get_parent_desc(BdrvChild *c) +{ + BlockJob *job = c->opaque; + return g_strdup_printf("%s job '%s'", + BlockJobType_lookup[job->driver->job_type], + job->id); +} + +static const BdrvChildRole child_job = { + .get_parent_desc = child_job_get_parent_desc, + .stay_at_node = true, +}; + BlockJob *block_job_next(BlockJob *job) { if (!job) { @@ -115,11 +128,22 @@ static void block_job_detach_aio_context(void *opaque) block_job_unref(job); } -void block_job_add_bdrv(BlockJob *job, BlockDriverState *bs) +int block_job_add_bdrv(BlockJob *job, const char *name, BlockDriverState *bs, + uint64_t perm, uint64_t shared_perm, Error **errp) { - job->nodes = g_slist_prepend(job->nodes, bs); + BdrvChild *c; + + c = bdrv_root_attach_child(bs, name, &child_job, perm, shared_perm, + job, errp); + if (c == NULL) { + return -EPERM; + } + + job->nodes = g_slist_prepend(job->nodes, c); bdrv_ref(bs); bdrv_op_block_all(bs, job->blocker); + + return 0; } void *block_job_create(const char *job_id, const BlockJobDriver *driver, @@ -171,7 +195,7 @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver, job = g_malloc0(driver->instance_size); error_setg(&job->blocker, "block device is in use by block job: %s", BlockJobType_lookup[driver->job_type]); - block_job_add_bdrv(job, bs); + block_job_add_bdrv(job, "main node", bs, 0, BLK_PERM_ALL, &error_abort); bdrv_op_unblock(bs, BLOCK_OP_TYPE_DATAPLANE, job->blocker); job->driver = driver; @@ -238,9 +262,9 @@ void block_job_unref(BlockJob *job) BlockDriverState *bs = blk_bs(job->blk); bs->job = NULL; for (l = job->nodes; l; l = l->next) { - bs = l->data; - bdrv_op_unblock_all(bs, job->blocker); - bdrv_unref(bs); + BdrvChild *c = l->data; + bdrv_op_unblock_all(c->bs, job->blocker); + bdrv_root_unref_child(c); } g_slist_free(job->nodes); blk_remove_aio_context_notifier(job->blk, diff --git a/include/block/blockjob.h b/include/block/blockjob.h index 1acb256223..9d65ef80b8 100644 --- a/include/block/blockjob.h +++ b/include/block/blockjob.h @@ -169,13 +169,16 @@ BlockJob *block_job_get(const char *id); /** * block_job_add_bdrv: * @job: A block job + * @name: The name to assign to the new BdrvChild * @bs: A BlockDriverState that is involved in @job + * @perm, @shared_perm: Permissions to request on the node * * Add @bs to the list of BlockDriverState that are involved in * @job. This means that all operations will be blocked on @bs while * @job exists. */ -void block_job_add_bdrv(BlockJob *job, BlockDriverState *bs); +int block_job_add_bdrv(BlockJob *job, const char *name, BlockDriverState *bs, + uint64_t perm, uint64_t shared_perm, Error **errp); /** * block_job_set_speed: From 8dfba2797761d8a43744e4e6571c8175e448a478 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Mon, 16 Jan 2017 16:22:34 +0100 Subject: [PATCH 27/46] commit: Use real permissions in commit block job This is probably one of the most interesting conversions to the new op blocker system because a commit block job intentionally leaves some intermediate block nodes in the backing chain that aren't valid on their own any more; only the whole chain together results in a valid view. In order to provide the 'consistent read' permission to the parents of the 'top' node of the commit job, a new filter block driver is inserted above 'top' which doesn't require 'consistent read' on its backing chain. Subsequently, the commit job can block 'consistent read' on all intermediate nodes without causing a conflict. Signed-off-by: Kevin Wolf Acked-by: Fam Zheng Reviewed-by: Max Reitz --- block/commit.c | 117 ++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 97 insertions(+), 20 deletions(-) diff --git a/block/commit.c b/block/commit.c index b69586ff7b..8de4473520 100644 --- a/block/commit.c +++ b/block/commit.c @@ -36,6 +36,7 @@ typedef struct CommitBlockJob { BlockJob common; RateLimit limit; BlockDriverState *active; + BlockDriverState *commit_top_bs; BlockBackend *top; BlockBackend *base; BlockdevOnError on_error; @@ -83,12 +84,23 @@ static void commit_complete(BlockJob *job, void *opaque) BlockDriverState *active = s->active; BlockDriverState *top = blk_bs(s->top); BlockDriverState *base = blk_bs(s->base); - BlockDriverState *overlay_bs = bdrv_find_overlay(active, top); + BlockDriverState *overlay_bs = bdrv_find_overlay(active, s->commit_top_bs); int ret = data->ret; + bool remove_commit_top_bs = false; + + /* Remove base node parent that still uses BLK_PERM_WRITE/RESIZE before + * the normal backing chain can be restored. */ + blk_unref(s->base); if (!block_job_is_cancelled(&s->common) && ret == 0) { /* success */ - ret = bdrv_drop_intermediate(active, top, base, s->backing_file_str); + ret = bdrv_drop_intermediate(active, s->commit_top_bs, base, + s->backing_file_str); + } else if (overlay_bs) { + /* XXX Can (or should) we somehow keep 'consistent read' blocked even + * after the failed/cancelled commit job is gone? If we already wrote + * something to base, the intermediate images aren't valid any more. */ + remove_commit_top_bs = true; } /* restore base open flags here if appropriate (e.g., change the base back @@ -102,9 +114,15 @@ static void commit_complete(BlockJob *job, void *opaque) } g_free(s->backing_file_str); blk_unref(s->top); - blk_unref(s->base); block_job_completed(&s->common, ret); g_free(data); + + /* If bdrv_drop_intermediate() didn't already do that, remove the commit + * filter driver from the backing chain. Do this as the final step so that + * the 'consistent read' permission can be granted. */ + if (remove_commit_top_bs) { + bdrv_set_backing_hd(overlay_bs, top); + } } static void coroutine_fn commit_run(void *opaque) @@ -208,6 +226,34 @@ static const BlockJobDriver commit_job_driver = { .start = commit_run, }; +static int coroutine_fn bdrv_commit_top_preadv(BlockDriverState *bs, + uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags) +{ + return bdrv_co_preadv(bs->backing, offset, bytes, qiov, flags); +} + +static void bdrv_commit_top_close(BlockDriverState *bs) +{ +} + +static void bdrv_commit_top_child_perm(BlockDriverState *bs, BdrvChild *c, + const BdrvChildRole *role, + uint64_t perm, uint64_t shared, + uint64_t *nperm, uint64_t *nshared) +{ + *nperm = 0; + *nshared = BLK_PERM_ALL; +} + +/* Dummy node that provides consistent read to its users without requiring it + * from its backing file and that allows writes on the backing file chain. */ +static BlockDriver bdrv_commit_top = { + .format_name = "commit_top", + .bdrv_co_preadv = bdrv_commit_top_preadv, + .bdrv_close = bdrv_commit_top_close, + .bdrv_child_perm = bdrv_commit_top_child_perm, +}; + void commit_start(const char *job_id, BlockDriverState *bs, BlockDriverState *base, BlockDriverState *top, int64_t speed, BlockdevOnError on_error, const char *backing_file_str, @@ -219,6 +265,7 @@ void commit_start(const char *job_id, BlockDriverState *bs, int orig_base_flags; BlockDriverState *iter; BlockDriverState *overlay_bs; + BlockDriverState *commit_top_bs = NULL; Error *local_err = NULL; int ret; @@ -235,7 +282,6 @@ void commit_start(const char *job_id, BlockDriverState *bs, return; } - /* FIXME Use real permissions */ s = block_job_create(job_id, &commit_job_driver, bs, 0, BLK_PERM_ALL, speed, BLOCK_JOB_DEFAULT, NULL, NULL, errp); if (!s) { @@ -262,34 +308,62 @@ void commit_start(const char *job_id, BlockDriverState *bs, } } + /* Insert commit_top block node above top, so we can block consistent read + * on the backing chain below it */ + commit_top_bs = bdrv_new_open_driver(&bdrv_commit_top, NULL, 0, errp); + if (commit_top_bs == NULL) { + goto fail; + } + + bdrv_set_backing_hd(commit_top_bs, top); + bdrv_set_backing_hd(overlay_bs, commit_top_bs); + + s->commit_top_bs = commit_top_bs; + bdrv_unref(commit_top_bs); /* Block all nodes between top and base, because they will * disappear from the chain after this operation. */ assert(bdrv_chain_contains(top, base)); - for (iter = top; iter != backing_bs(base); iter = backing_bs(iter)) { - /* FIXME Use real permissions */ - block_job_add_bdrv(&s->common, "intermediate node", iter, 0, - BLK_PERM_ALL, &error_abort); - } - /* overlay_bs must be blocked because it needs to be modified to - * update the backing image string, but if it's the root node then - * don't block it again */ - if (bs != overlay_bs) { - /* FIXME Use real permissions */ - block_job_add_bdrv(&s->common, "overlay of top", overlay_bs, 0, - BLK_PERM_ALL, &error_abort); + for (iter = top; iter != base; iter = backing_bs(iter)) { + /* XXX BLK_PERM_WRITE needs to be allowed so we don't block ourselves + * at s->base (if writes are blocked for a node, they are also blocked + * for its backing file). The other options would be a second filter + * driver above s->base. */ + ret = block_job_add_bdrv(&s->common, "intermediate node", iter, 0, + BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE, + errp); + if (ret < 0) { + goto fail; + } } - /* FIXME Use real permissions */ - s->base = blk_new(0, BLK_PERM_ALL); + ret = block_job_add_bdrv(&s->common, "base", base, 0, BLK_PERM_ALL, errp); + if (ret < 0) { + goto fail; + } + + /* overlay_bs must be blocked because it needs to be modified to + * update the backing image string. */ + ret = block_job_add_bdrv(&s->common, "overlay of top", overlay_bs, + BLK_PERM_GRAPH_MOD, BLK_PERM_ALL, errp); + if (ret < 0) { + goto fail; + } + + s->base = blk_new(BLK_PERM_CONSISTENT_READ + | BLK_PERM_WRITE + | BLK_PERM_RESIZE, + BLK_PERM_CONSISTENT_READ + | BLK_PERM_GRAPH_MOD + | BLK_PERM_WRITE_UNCHANGED); ret = blk_insert_bs(s->base, base, errp); if (ret < 0) { goto fail; } - /* FIXME Use real permissions */ + /* Required permissions are already taken with block_job_add_bdrv() */ s->top = blk_new(0, BLK_PERM_ALL); - ret = blk_insert_bs(s->top, top, errp); + blk_insert_bs(s->top, top, errp); if (ret < 0) { goto fail; } @@ -314,6 +388,9 @@ fail: if (s->top) { blk_unref(s->top); } + if (commit_top_bs) { + bdrv_set_backing_hd(overlay_bs, top); + } block_job_unref(&s->common); } From d3f06759222e70979965e3afccfd88eddd3cc454 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Thu, 19 Jan 2017 18:16:03 +0100 Subject: [PATCH 28/46] commit: Use real permissions for HMP 'commit' This is a little simpler than the commit block job because it's synchronous and only commits into the immediate backing file, but otherwise doing more or less the same. Signed-off-by: Kevin Wolf Reviewed-by: Max Reitz Acked-by: Fam Zheng --- block/commit.c | 33 +++++++++++++++++++++++++++------ 1 file changed, 27 insertions(+), 6 deletions(-) diff --git a/block/commit.c b/block/commit.c index 8de4473520..f18026b61f 100644 --- a/block/commit.c +++ b/block/commit.c @@ -401,11 +401,14 @@ fail: int bdrv_commit(BlockDriverState *bs) { BlockBackend *src, *backing; + BlockDriverState *backing_file_bs = NULL; + BlockDriverState *commit_top_bs = NULL; BlockDriver *drv = bs->drv; int64_t sector, total_sectors, length, backing_length; int n, ro, open_flags; int ret = 0; uint8_t *buf = NULL; + Error *local_err = NULL; if (!drv) return -ENOMEDIUM; @@ -428,17 +431,31 @@ int bdrv_commit(BlockDriverState *bs) } } - /* FIXME Use real permissions */ - src = blk_new(0, BLK_PERM_ALL); - backing = blk_new(0, BLK_PERM_ALL); + src = blk_new(BLK_PERM_CONSISTENT_READ, BLK_PERM_ALL); + backing = blk_new(BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL); - ret = blk_insert_bs(src, bs, NULL); + ret = blk_insert_bs(src, bs, &local_err); if (ret < 0) { + error_report_err(local_err); goto ro_cleanup; } - ret = blk_insert_bs(backing, bs->backing->bs, NULL); + /* Insert commit_top block node above backing, so we can write to it */ + backing_file_bs = backing_bs(bs); + + commit_top_bs = bdrv_new_open_driver(&bdrv_commit_top, NULL, BDRV_O_RDWR, + &local_err); + if (commit_top_bs == NULL) { + error_report_err(local_err); + goto ro_cleanup; + } + + bdrv_set_backing_hd(commit_top_bs, backing_file_bs); + bdrv_set_backing_hd(bs, commit_top_bs); + + ret = blk_insert_bs(backing, backing_file_bs, &local_err); if (ret < 0) { + error_report_err(local_err); goto ro_cleanup; } @@ -512,8 +529,12 @@ int bdrv_commit(BlockDriverState *bs) ro_cleanup: qemu_vfree(buf); - blk_unref(src); blk_unref(backing); + if (backing_file_bs) { + bdrv_set_backing_hd(bs, backing_file_bs); + } + bdrv_unref(commit_top_bs); + blk_unref(src); if (ro) { /* ignoring error return here */ From 4e9e4323d5ec07a07f8db9317e1842a5e00a14e2 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Wed, 25 Jan 2017 11:39:04 +0100 Subject: [PATCH 29/46] backup: Use real permissions in backup block job The backup block job doesn't have very complicated requirements: It needs to read from the source and write to the target, but it's fine with either side being changed. The only restriction is that we can't resize the image because the job uses a cached value. qemu-iotests 055 needs to be changed because it used a target which was already attached to a virtio-blk device. The permission system correctly forbids this (virtio-blk can't accept another writer with its default share-rw=off). Signed-off-by: Kevin Wolf Reviewed-by: Max Reitz Acked-by: Fam Zheng --- block/backup.c | 15 ++++++++++----- tests/qemu-iotests/055 | 11 +++++++---- 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/block/backup.c b/block/backup.c index 405f271395..d1ab617c7e 100644 --- a/block/backup.c +++ b/block/backup.c @@ -618,15 +618,20 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs, goto error; } - /* FIXME Use real permissions */ - job = block_job_create(job_id, &backup_job_driver, bs, 0, BLK_PERM_ALL, + /* job->common.len is fixed, so we can't allow resize */ + job = block_job_create(job_id, &backup_job_driver, bs, + BLK_PERM_CONSISTENT_READ, + BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE | + BLK_PERM_WRITE_UNCHANGED | BLK_PERM_GRAPH_MOD, speed, creation_flags, cb, opaque, errp); if (!job) { goto error; } - /* FIXME Use real permissions */ - job->target = blk_new(0, BLK_PERM_ALL); + /* The target must match the source in size, so no resize here either */ + job->target = blk_new(BLK_PERM_WRITE, + BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE | + BLK_PERM_WRITE_UNCHANGED | BLK_PERM_GRAPH_MOD); ret = blk_insert_bs(job->target, target, errp); if (ret < 0) { goto error; @@ -657,7 +662,7 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs, job->cluster_size = MAX(BACKUP_CLUSTER_SIZE_DEFAULT, bdi.cluster_size); } - /* FIXME Use real permissions */ + /* Required permissions are already taken with target's blk_new() */ block_job_add_bdrv(&job->common, "target", target, 0, BLK_PERM_ALL, &error_abort); job->common.len = len; diff --git a/tests/qemu-iotests/055 b/tests/qemu-iotests/055 index 1d3fd04b65..aafcd249f6 100755 --- a/tests/qemu-iotests/055 +++ b/tests/qemu-iotests/055 @@ -48,7 +48,8 @@ class TestSingleDrive(iotests.QMPTestCase): def setUp(self): qemu_img('create', '-f', iotests.imgfmt, blockdev_target_img, str(image_len)) - self.vm = iotests.VM().add_drive(test_img).add_drive(blockdev_target_img) + self.vm = iotests.VM().add_drive(test_img) + self.vm.add_drive(blockdev_target_img, interface="none") if iotests.qemu_default_machine == 'pc': self.vm.add_drive(None, 'media=cdrom', 'ide') self.vm.launch() @@ -164,7 +165,8 @@ class TestSetSpeed(iotests.QMPTestCase): def setUp(self): qemu_img('create', '-f', iotests.imgfmt, blockdev_target_img, str(image_len)) - self.vm = iotests.VM().add_drive(test_img).add_drive(blockdev_target_img) + self.vm = iotests.VM().add_drive(test_img) + self.vm.add_drive(blockdev_target_img, interface="none") self.vm.launch() def tearDown(self): @@ -247,7 +249,8 @@ class TestSingleTransaction(iotests.QMPTestCase): def setUp(self): qemu_img('create', '-f', iotests.imgfmt, blockdev_target_img, str(image_len)) - self.vm = iotests.VM().add_drive(test_img).add_drive(blockdev_target_img) + self.vm = iotests.VM().add_drive(test_img) + self.vm.add_drive(blockdev_target_img, interface="none") if iotests.qemu_default_machine == 'pc': self.vm.add_drive(None, 'media=cdrom', 'ide') self.vm.launch() @@ -460,7 +463,7 @@ class TestDriveCompression(iotests.QMPTestCase): qemu_img('create', '-f', fmt, blockdev_target_img, str(TestDriveCompression.image_len), *args) - self.vm.add_drive(blockdev_target_img, format=fmt) + self.vm.add_drive(blockdev_target_img, format=fmt, interface="none") self.vm.launch() From dd65a52e4aa4a0adfedf0ed9a35da1960f359fe1 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Thu, 9 Feb 2017 12:46:27 +0100 Subject: [PATCH 30/46] block: Fix pending requests check in bdrv_append() bdrv_append() cares about isolation of the node that it modifies, but not about activity in some subtree below it. Instead of using the recursive bdrv_requests_pending(), directly check bs->in_flight, which considers only the node in question. Signed-off-by: Kevin Wolf Reviewed-by: Max Reitz Acked-by: Fam Zheng --- block.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/block.c b/block.c index 9e538a5d41..5189c7c55c 100644 --- a/block.c +++ b/block.c @@ -2897,8 +2897,8 @@ static void change_parent_backing_link(BlockDriverState *from, */ void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top) { - assert(!bdrv_requests_pending(bs_top)); - assert(!bdrv_requests_pending(bs_new)); + assert(!atomic_read(&bs_top->in_flight)); + assert(!atomic_read(&bs_new->in_flight)); bdrv_ref(bs_top); From db95dbba3b7b09cd11ffaf3a8453c2500e807f80 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Wed, 8 Feb 2017 11:28:52 +0100 Subject: [PATCH 31/46] block: BdrvChildRole.attach/detach() callbacks Backing files are somewhat special compared to other kinds of children because they are attached and detached using bdrv_set_backing_hd() rather than the normal set of functions, which does a few more things like setting backing blockers, toggling the BDRV_O_NO_BACKING flag, setting parent_bs->backing_file, etc. These special features are a reason why change_parent_backing_link() can't handle backing files yet. With abstracting the additional features into .attach/.detach callbacks, we get a step closer to a function that can actually deal with this. Signed-off-by: Kevin Wolf Reviewed-by: Max Reitz Acked-by: Fam Zheng --- block.c | 95 ++++++++++++++++++++++++--------------- include/block/block_int.h | 3 ++ 2 files changed, 63 insertions(+), 35 deletions(-) diff --git a/block.c b/block.c index 5189c7c55c..698a5c7e0b 100644 --- a/block.c +++ b/block.c @@ -807,6 +807,57 @@ const BdrvChildRole child_format = { .drained_end = bdrv_child_cb_drained_end, }; +static void bdrv_backing_attach(BdrvChild *c) +{ + BlockDriverState *parent = c->opaque; + BlockDriverState *backing_hd = c->bs; + + assert(!parent->backing_blocker); + error_setg(&parent->backing_blocker, + "node is used as backing hd of '%s'", + bdrv_get_device_or_node_name(parent)); + + parent->open_flags &= ~BDRV_O_NO_BACKING; + pstrcpy(parent->backing_file, sizeof(parent->backing_file), + backing_hd->filename); + pstrcpy(parent->backing_format, sizeof(parent->backing_format), + backing_hd->drv ? backing_hd->drv->format_name : ""); + + bdrv_op_block_all(backing_hd, parent->backing_blocker); + /* Otherwise we won't be able to commit or stream */ + bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET, + parent->backing_blocker); + bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_STREAM, + parent->backing_blocker); + /* + * We do backup in 3 ways: + * 1. drive backup + * The target bs is new opened, and the source is top BDS + * 2. blockdev backup + * Both the source and the target are top BDSes. + * 3. internal backup(used for block replication) + * Both the source and the target are backing file + * + * In case 1 and 2, neither the source nor the target is the backing file. + * In case 3, we will block the top BDS, so there is only one block job + * for the top BDS and its backing chain. + */ + bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_BACKUP_SOURCE, + parent->backing_blocker); + bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_BACKUP_TARGET, + parent->backing_blocker); +} + +static void bdrv_backing_detach(BdrvChild *c) +{ + BlockDriverState *parent = c->opaque; + + assert(parent->backing_blocker); + bdrv_op_unblock_all(c->bs, parent->backing_blocker); + error_free(parent->backing_blocker); + parent->backing_blocker = NULL; +} + /* * Returns the options and flags that bs->backing should get, based on the * given options and flags for the parent BDS @@ -833,6 +884,8 @@ static void bdrv_backing_options(int *child_flags, QDict *child_options, const BdrvChildRole child_backing = { .get_parent_desc = bdrv_child_get_parent_desc, + .attach = bdrv_backing_attach, + .detach = bdrv_backing_detach, .inherit_options = bdrv_backing_options, .drained_begin = bdrv_child_cb_drained_begin, .drained_end = bdrv_child_cb_drained_end, @@ -1670,6 +1723,9 @@ static void bdrv_replace_child(BdrvChild *child, BlockDriverState *new_bs, if (old_bs->quiesce_counter && child->role->drained_end) { child->role->drained_end(child); } + if (child->role->detach) { + child->role->detach(child); + } QLIST_REMOVE(child, next_parent); /* Update permissions for old node. This is guaranteed to succeed @@ -1693,6 +1749,10 @@ static void bdrv_replace_child(BdrvChild *child, BlockDriverState *new_bs, bdrv_check_perm(new_bs, perm, shared_perm, &error_abort); } bdrv_set_perm(new_bs, perm, shared_perm); + + if (child->role->attach) { + child->role->attach(child); + } } } @@ -1830,52 +1890,17 @@ void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd) } if (bs->backing) { - assert(bs->backing_blocker); - bdrv_op_unblock_all(bs->backing->bs, bs->backing_blocker); bdrv_unref_child(bs, bs->backing); - } else if (backing_hd) { - error_setg(&bs->backing_blocker, - "node is used as backing hd of '%s'", - bdrv_get_device_or_node_name(bs)); } if (!backing_hd) { - error_free(bs->backing_blocker); - bs->backing_blocker = NULL; bs->backing = NULL; goto out; } /* FIXME Error handling */ bs->backing = bdrv_attach_child(bs, backing_hd, "backing", &child_backing, &error_abort); - bs->open_flags &= ~BDRV_O_NO_BACKING; - pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_hd->filename); - pstrcpy(bs->backing_format, sizeof(bs->backing_format), - backing_hd->drv ? backing_hd->drv->format_name : ""); - bdrv_op_block_all(backing_hd, bs->backing_blocker); - /* Otherwise we won't be able to commit or stream */ - bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET, - bs->backing_blocker); - bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_STREAM, - bs->backing_blocker); - /* - * We do backup in 3 ways: - * 1. drive backup - * The target bs is new opened, and the source is top BDS - * 2. blockdev backup - * Both the source and the target are top BDSes. - * 3. internal backup(used for block replication) - * Both the source and the target are backing file - * - * In case 1 and 2, neither the source nor the target is the backing file. - * In case 3, we will block the top BDS, so there is only one block job - * for the top BDS and its backing chain. - */ - bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_BACKUP_SOURCE, - bs->backing_blocker); - bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_BACKUP_TARGET, - bs->backing_blocker); out: bdrv_refresh_limits(bs, NULL); } diff --git a/include/block/block_int.h b/include/block/block_int.h index a0d9328b59..a5c704bb5e 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -472,6 +472,9 @@ struct BdrvChildRole { */ void (*drained_begin)(BdrvChild *child); void (*drained_end)(BdrvChild *child); + + void (*attach)(BdrvChild *child); + void (*detach)(BdrvChild *child); }; extern const BdrvChildRole child_file; From 3e44c8e08a4b84ec1f4f1eb249d33005bb9cf572 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Thu, 9 Feb 2017 12:51:18 +0100 Subject: [PATCH 32/46] block: Allow backing file links in change_parent_backing_link() Now that the backing file child role implements .attach/.detach callbacks, nothing prevents us from modifying the graph even if that involves changing backing file links. Signed-off-by: Kevin Wolf Reviewed-by: Max Reitz Acked-by: Fam Zheng --- block.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/block.c b/block.c index 698a5c7e0b..79d9ad0ad6 100644 --- a/block.c +++ b/block.c @@ -2882,9 +2882,9 @@ static void change_parent_backing_link(BlockDriverState *from, continue; } if (c->role == &child_backing) { - /* @from is generally not allowed to be a backing file, except for - * when @to is the overlay. In that case, @from may not be replaced - * by @to as @to's backing node. */ + /* If @from is a backing file of @to, ignore the child to avoid + * creating a loop. We only want to change the pointer of other + * parents. */ QLIST_FOREACH(to_c, &to->children, next) { if (to_c == c) { break; @@ -2895,7 +2895,6 @@ static void change_parent_backing_link(BlockDriverState *from, } } - assert(c->role != &child_backing); bdrv_ref(to); /* FIXME Are we sure that bdrv_replace_child() can't run into * &error_abort because of permissions? */ From bbc02b90bcba371818dbffec89933072f9406945 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Tue, 28 Feb 2017 12:45:58 +0100 Subject: [PATCH 33/46] blockjob: Factor out block_job_remove_all_bdrv() In some cases, we want to remove op blockers on intermediate nodes before the whole block job transaction has completed (because they block restoring the final graph state during completion). Provide a function for this. The whole block job lifecycle is a bit messed up and it's hard to actually do all things in the right order, but I'll leave simplifying this for another day. Signed-off-by: Kevin Wolf Acked-by: Fam Zheng Reviewed-by: Max Reitz --- blockjob.c | 20 +++++++++++++------- include/block/blockjob.h | 9 +++++++++ 2 files changed, 22 insertions(+), 7 deletions(-) diff --git a/blockjob.c b/blockjob.c index 4216cdeebf..69126af97f 100644 --- a/blockjob.c +++ b/blockjob.c @@ -128,6 +128,18 @@ static void block_job_detach_aio_context(void *opaque) block_job_unref(job); } +void block_job_remove_all_bdrv(BlockJob *job) +{ + GSList *l; + for (l = job->nodes; l; l = l->next) { + BdrvChild *c = l->data; + bdrv_op_unblock_all(c->bs, job->blocker); + bdrv_root_unref_child(c); + } + g_slist_free(job->nodes); + job->nodes = NULL; +} + int block_job_add_bdrv(BlockJob *job, const char *name, BlockDriverState *bs, uint64_t perm, uint64_t shared_perm, Error **errp) { @@ -258,15 +270,9 @@ void block_job_ref(BlockJob *job) void block_job_unref(BlockJob *job) { if (--job->refcnt == 0) { - GSList *l; BlockDriverState *bs = blk_bs(job->blk); bs->job = NULL; - for (l = job->nodes; l; l = l->next) { - BdrvChild *c = l->data; - bdrv_op_unblock_all(c->bs, job->blocker); - bdrv_root_unref_child(c); - } - g_slist_free(job->nodes); + block_job_remove_all_bdrv(job); blk_remove_aio_context_notifier(job->blk, block_job_attached_aio_context, block_job_detach_aio_context, job); diff --git a/include/block/blockjob.h b/include/block/blockjob.h index 9d65ef80b8..9e906f7d7e 100644 --- a/include/block/blockjob.h +++ b/include/block/blockjob.h @@ -180,6 +180,15 @@ BlockJob *block_job_get(const char *id); int block_job_add_bdrv(BlockJob *job, const char *name, BlockDriverState *bs, uint64_t perm, uint64_t shared_perm, Error **errp); +/** + * block_job_remove_all_bdrv: + * @job: The block job + * + * Remove all BlockDriverStates from the list of nodes that are involved in the + * job. This removes the blockers added with block_job_add_bdrv(). + */ +void block_job_remove_all_bdrv(BlockJob *job); + /** * block_job_set_speed: * @job: The job to set the speed for. From 4ef85a9c233936d9ac1400d67b066353a6e9f32f Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Wed, 25 Jan 2017 19:16:34 +0100 Subject: [PATCH 34/46] mirror: Use real permissions in mirror/active commit block job The mirror block job is mainly used for two different scenarios: Mirroring to an otherwise unused, independent target node, or for active commit where the target node is part of the backing chain of the source. Similarly to the commit block job patch, we need to insert a new filter node to keep the permissions correct during active commit. Note that one change this implies is that job->blk points to mirror_top_bs as its root now, and mirror_top_bs (rather than the actual source node) contains the bs->job pointer. This requires qemu-img commit to get the job by name now rather than just taking bs->job. Signed-off-by: Kevin Wolf Acked-by: Fam Zheng Acked-by: Max Reitz --- block/mirror.c | 216 +++++++++++++++++++++++++++++++------ qemu-img.c | 6 +- tests/qemu-iotests/141 | 2 +- tests/qemu-iotests/141.out | 4 +- 4 files changed, 190 insertions(+), 38 deletions(-) diff --git a/block/mirror.c b/block/mirror.c index 4d325f1811..40e8bcccce 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -38,7 +38,10 @@ typedef struct MirrorBlockJob { BlockJob common; RateLimit limit; BlockBackend *target; + BlockDriverState *mirror_top_bs; + BlockDriverState *source; BlockDriverState *base; + /* The name of the graph node to replace */ char *replaces; /* The BDS to replace */ @@ -327,7 +330,7 @@ static void mirror_do_zero_or_discard(MirrorBlockJob *s, static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s) { - BlockDriverState *source = blk_bs(s->common.blk); + BlockDriverState *source = s->source; int64_t sector_num, first_chunk; uint64_t delay_ns = 0; /* At least the first dirty chunk is mirrored in one iteration. */ @@ -497,12 +500,25 @@ static void mirror_exit(BlockJob *job, void *opaque) MirrorBlockJob *s = container_of(job, MirrorBlockJob, common); MirrorExitData *data = opaque; AioContext *replace_aio_context = NULL; - BlockDriverState *src = blk_bs(s->common.blk); + BlockDriverState *src = s->source; BlockDriverState *target_bs = blk_bs(s->target); + BlockDriverState *mirror_top_bs = s->mirror_top_bs; /* Make sure that the source BDS doesn't go away before we called * block_job_completed(). */ bdrv_ref(src); + bdrv_ref(mirror_top_bs); + + /* We don't access the source any more. Dropping any WRITE/RESIZE is + * required before it could become a backing file of target_bs. */ + bdrv_child_try_set_perm(mirror_top_bs->backing, 0, BLK_PERM_ALL, + &error_abort); + if (s->backing_mode == MIRROR_SOURCE_BACKING_CHAIN) { + BlockDriverState *backing = s->is_none_mode ? src : s->base; + if (backing_bs(target_bs) != backing) { + bdrv_set_backing_hd(target_bs, backing); + } + } if (s->to_replace) { replace_aio_context = bdrv_get_aio_context(s->to_replace); @@ -524,13 +540,6 @@ static void mirror_exit(BlockJob *job, void *opaque) bdrv_drained_begin(target_bs); bdrv_replace_in_backing_chain(to_replace, target_bs); bdrv_drained_end(target_bs); - - /* We just changed the BDS the job BB refers to, so switch the BB back - * so the cleanup does the right thing. We don't need any permissions - * any more now. */ - blk_remove_bs(job->blk); - blk_set_perm(job->blk, 0, BLK_PERM_ALL, &error_abort); - blk_insert_bs(job->blk, src, &error_abort); } if (s->to_replace) { bdrv_op_unblock_all(s->to_replace, s->replace_blocker); @@ -543,9 +552,26 @@ static void mirror_exit(BlockJob *job, void *opaque) g_free(s->replaces); blk_unref(s->target); s->target = NULL; + + /* Remove the mirror filter driver from the graph. Before this, get rid of + * the blockers on the intermediate nodes so that the resulting state is + * valid. */ + block_job_remove_all_bdrv(job); + bdrv_replace_in_backing_chain(mirror_top_bs, backing_bs(mirror_top_bs)); + + /* We just changed the BDS the job BB refers to (with either or both of the + * bdrv_replace_in_backing_chain() calls), so switch the BB back so the + * cleanup does the right thing. We don't need any permissions any more + * now. */ + blk_remove_bs(job->blk); + blk_set_perm(job->blk, 0, BLK_PERM_ALL, &error_abort); + blk_insert_bs(job->blk, mirror_top_bs, &error_abort); + block_job_completed(&s->common, data->ret); + g_free(data); bdrv_drained_end(src); + bdrv_unref(mirror_top_bs); bdrv_unref(src); } @@ -565,7 +591,7 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s) { int64_t sector_num, end; BlockDriverState *base = s->base; - BlockDriverState *bs = blk_bs(s->common.blk); + BlockDriverState *bs = s->source; BlockDriverState *target_bs = blk_bs(s->target); int ret, n; @@ -647,7 +673,7 @@ static void coroutine_fn mirror_run(void *opaque) { MirrorBlockJob *s = opaque; MirrorExitData *data; - BlockDriverState *bs = blk_bs(s->common.blk); + BlockDriverState *bs = s->source; BlockDriverState *target_bs = blk_bs(s->target); bool need_drain = true; int64_t length; @@ -879,9 +905,8 @@ static void mirror_set_speed(BlockJob *job, int64_t speed, Error **errp) static void mirror_complete(BlockJob *job, Error **errp) { MirrorBlockJob *s = container_of(job, MirrorBlockJob, common); - BlockDriverState *src, *target; + BlockDriverState *target; - src = blk_bs(job->blk); target = blk_bs(s->target); if (!s->synced) { @@ -913,6 +938,10 @@ static void mirror_complete(BlockJob *job, Error **errp) replace_aio_context = bdrv_get_aio_context(s->to_replace); aio_context_acquire(replace_aio_context); + /* TODO Translate this into permission system. Current definition of + * GRAPH_MOD would require to request it for the parents; they might + * not even be BlockDriverStates, however, so a BdrvChild can't address + * them. May need redefinition of GRAPH_MOD. */ error_setg(&s->replace_blocker, "block device is in use by block-job-complete"); bdrv_op_block_all(s->to_replace, s->replace_blocker); @@ -921,13 +950,6 @@ static void mirror_complete(BlockJob *job, Error **errp) aio_context_release(replace_aio_context); } - if (s->backing_mode == MIRROR_SOURCE_BACKING_CHAIN) { - BlockDriverState *backing = s->is_none_mode ? src : s->base; - if (backing_bs(target) != backing) { - bdrv_set_backing_hd(target, backing); - } - } - s->should_complete = true; block_job_enter(&s->common); } @@ -983,6 +1005,77 @@ static const BlockJobDriver commit_active_job_driver = { .drain = mirror_drain, }; +static int coroutine_fn bdrv_mirror_top_preadv(BlockDriverState *bs, + uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags) +{ + return bdrv_co_preadv(bs->backing, offset, bytes, qiov, flags); +} + +static int coroutine_fn bdrv_mirror_top_pwritev(BlockDriverState *bs, + uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags) +{ + return bdrv_co_pwritev(bs->backing, offset, bytes, qiov, flags); +} + +static int coroutine_fn bdrv_mirror_top_flush(BlockDriverState *bs) +{ + return bdrv_co_flush(bs->backing->bs); +} + +static int64_t coroutine_fn bdrv_mirror_top_get_block_status( + BlockDriverState *bs, int64_t sector_num, int nb_sectors, int *pnum, + BlockDriverState **file) +{ + *pnum = nb_sectors; + *file = bs->backing->bs; + return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID | BDRV_BLOCK_DATA | + (sector_num << BDRV_SECTOR_BITS); +} + +static int coroutine_fn bdrv_mirror_top_pwrite_zeroes(BlockDriverState *bs, + int64_t offset, int count, BdrvRequestFlags flags) +{ + return bdrv_co_pwrite_zeroes(bs->backing, offset, count, flags); +} + +static int coroutine_fn bdrv_mirror_top_pdiscard(BlockDriverState *bs, + int64_t offset, int count) +{ + return bdrv_co_pdiscard(bs->backing->bs, offset, count); +} + +static void bdrv_mirror_top_close(BlockDriverState *bs) +{ +} + +static void bdrv_mirror_top_child_perm(BlockDriverState *bs, BdrvChild *c, + const BdrvChildRole *role, + uint64_t perm, uint64_t shared, + uint64_t *nperm, uint64_t *nshared) +{ + /* Must be able to forward guest writes to the real image */ + *nperm = 0; + if (perm & BLK_PERM_WRITE) { + *nperm |= BLK_PERM_WRITE; + } + + *nshared = BLK_PERM_ALL; +} + +/* Dummy node that provides consistent read to its users without requiring it + * from its backing file and that allows writes on the backing file chain. */ +static BlockDriver bdrv_mirror_top = { + .format_name = "mirror_top", + .bdrv_co_preadv = bdrv_mirror_top_preadv, + .bdrv_co_pwritev = bdrv_mirror_top_pwritev, + .bdrv_co_pwrite_zeroes = bdrv_mirror_top_pwrite_zeroes, + .bdrv_co_pdiscard = bdrv_mirror_top_pdiscard, + .bdrv_co_flush = bdrv_mirror_top_flush, + .bdrv_co_get_block_status = bdrv_mirror_top_get_block_status, + .bdrv_close = bdrv_mirror_top_close, + .bdrv_child_perm = bdrv_mirror_top_child_perm, +}; + static void mirror_start_job(const char *job_id, BlockDriverState *bs, int creation_flags, BlockDriverState *target, const char *replaces, int64_t speed, @@ -998,6 +1091,9 @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs, bool auto_complete) { MirrorBlockJob *s; + BlockDriverState *mirror_top_bs; + bool target_graph_mod; + bool target_is_backing; int ret; if (granularity == 0) { @@ -1015,20 +1111,55 @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs, buf_size = DEFAULT_MIRROR_BUF_SIZE; } - /* FIXME Use real permissions */ - s = block_job_create(job_id, driver, bs, 0, BLK_PERM_ALL, speed, - creation_flags, cb, opaque, errp); - if (!s) { + /* In the case of active commit, add dummy driver to provide consistent + * reads on the top, while disabling it in the intermediate nodes, and make + * the backing chain writable. */ + mirror_top_bs = bdrv_new_open_driver(&bdrv_mirror_top, NULL, BDRV_O_RDWR, + errp); + if (mirror_top_bs == NULL) { return; } + mirror_top_bs->total_sectors = bs->total_sectors; - /* FIXME Use real permissions */ - s->target = blk_new(0, BLK_PERM_ALL); + /* bdrv_append takes ownership of the mirror_top_bs reference, need to keep + * it alive until block_job_create() even if bs has no parent. */ + bdrv_ref(mirror_top_bs); + bdrv_drained_begin(bs); + bdrv_append(mirror_top_bs, bs); + bdrv_drained_end(bs); + + /* Make sure that the source is not resized while the job is running */ + s = block_job_create(job_id, driver, mirror_top_bs, + BLK_PERM_CONSISTENT_READ, + BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED | + BLK_PERM_WRITE | BLK_PERM_GRAPH_MOD, speed, + creation_flags, cb, opaque, errp); + bdrv_unref(mirror_top_bs); + if (!s) { + goto fail; + } + s->source = bs; + s->mirror_top_bs = mirror_top_bs; + + /* No resize for the target either; while the mirror is still running, a + * consistent read isn't necessarily possible. We could possibly allow + * writes and graph modifications, though it would likely defeat the + * purpose of a mirror, so leave them blocked for now. + * + * In the case of active commit, things look a bit different, though, + * because the target is an already populated backing file in active use. + * We can allow anything except resize there.*/ + target_is_backing = bdrv_chain_contains(bs, target); + target_graph_mod = (backing_mode != MIRROR_LEAVE_BACKING_CHAIN); + s->target = blk_new(BLK_PERM_WRITE | BLK_PERM_RESIZE | + (target_graph_mod ? BLK_PERM_GRAPH_MOD : 0), + BLK_PERM_WRITE_UNCHANGED | + (target_is_backing ? BLK_PERM_CONSISTENT_READ | + BLK_PERM_WRITE | + BLK_PERM_GRAPH_MOD : 0)); ret = blk_insert_bs(s->target, target, errp); if (ret < 0) { - blk_unref(s->target); - block_job_unref(&s->common); - return; + goto fail; } s->replaces = g_strdup(replaces); @@ -1052,23 +1183,40 @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs, return; } - /* FIXME Use real permissions */ + /* Required permissions are already taken with blk_new() */ block_job_add_bdrv(&s->common, "target", target, 0, BLK_PERM_ALL, &error_abort); /* In commit_active_start() all intermediate nodes disappear, so * any jobs in them must be blocked */ - if (bdrv_chain_contains(bs, target)) { + if (target_is_backing) { BlockDriverState *iter; for (iter = backing_bs(bs); iter != target; iter = backing_bs(iter)) { - /* FIXME Use real permissions */ - block_job_add_bdrv(&s->common, "intermediate node", iter, 0, - BLK_PERM_ALL, &error_abort); + /* XXX BLK_PERM_WRITE needs to be allowed so we don't block + * ourselves at s->base (if writes are blocked for a node, they are + * also blocked for its backing file). The other options would be a + * second filter driver above s->base (== target). */ + ret = block_job_add_bdrv(&s->common, "intermediate node", iter, 0, + BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE, + errp); + if (ret < 0) { + goto fail; + } } } trace_mirror_start(bs, s, opaque); block_job_start(&s->common); + return; + +fail: + if (s) { + g_free(s->replaces); + blk_unref(s->target); + block_job_unref(&s->common); + } + + bdrv_replace_in_backing_chain(mirror_top_bs, backing_bs(mirror_top_bs)); } void mirror_start(const char *job_id, BlockDriverState *bs, diff --git a/qemu-img.c b/qemu-img.c index a48a471042..0c76d4caa7 100644 --- a/qemu-img.c +++ b/qemu-img.c @@ -814,6 +814,8 @@ static void run_block_job(BlockJob *job, Error **errp) { AioContext *aio_context = blk_get_aio_context(job->blk); + /* FIXME In error cases, the job simply goes away and we access a dangling + * pointer below. */ aio_context_acquire(aio_context); do { aio_poll(aio_context, true); @@ -835,6 +837,7 @@ static int img_commit(int argc, char **argv) const char *filename, *fmt, *cache, *base; BlockBackend *blk; BlockDriverState *bs, *base_bs; + BlockJob *job; bool progress = false, quiet = false, drop = false; bool writethrough; Error *local_err = NULL; @@ -970,7 +973,8 @@ static int img_commit(int argc, char **argv) bdrv_ref(bs); } - run_block_job(bs->job, &local_err); + job = block_job_get("commit"); + run_block_job(job, &local_err); if (local_err) { goto unref_backing; } diff --git a/tests/qemu-iotests/141 b/tests/qemu-iotests/141 index 3ba79f027a..6d8f0a1a84 100755 --- a/tests/qemu-iotests/141 +++ b/tests/qemu-iotests/141 @@ -67,7 +67,7 @@ test_blockjob() _send_qemu_cmd $QEMU_HANDLE \ "{'execute': 'x-blockdev-del', 'arguments': {'node-name': 'drv0'}}" \ - 'error' + 'error' | _filter_generated_node_ids _send_qemu_cmd $QEMU_HANDLE \ "{'execute': 'block-job-cancel', diff --git a/tests/qemu-iotests/141.out b/tests/qemu-iotests/141.out index 195ca1a604..82e763b68d 100644 --- a/tests/qemu-iotests/141.out +++ b/tests/qemu-iotests/141.out @@ -20,7 +20,7 @@ Formatting 'TEST_DIR/o.IMGFMT', fmt=IMGFMT size=1048576 backing_file=TEST_DIR/t. Formatting 'TEST_DIR/o.IMGFMT', fmt=IMGFMT size=1048576 backing_file=TEST_DIR/t.IMGFMT backing_fmt=IMGFMT {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "job0", "len": 0, "offset": 0, "speed": 0, "type": "mirror"}} {"return": {}} -{"error": {"class": "GenericError", "desc": "Node drv0 is in use"}} +{"error": {"class": "GenericError", "desc": "Node 'drv0' is busy: node is used as backing hd of 'NODE_NAME'"}} {"return": {}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "job0", "len": 0, "offset": 0, "speed": 0, "type": "mirror"}} {"return": {}} @@ -30,7 +30,7 @@ Formatting 'TEST_DIR/o.IMGFMT', fmt=IMGFMT size=1048576 backing_file=TEST_DIR/t. {"return": {}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "job0", "len": 0, "offset": 0, "speed": 0, "type": "commit"}} {"return": {}} -{"error": {"class": "GenericError", "desc": "Node drv0 is in use"}} +{"error": {"class": "GenericError", "desc": "Node 'drv0' is busy: node is used as backing hd of 'NODE_NAME'"}} {"return": {}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "job0", "len": 0, "offset": 0, "speed": 0, "type": "commit"}} {"return": {}} From a170a91fd3eab6155da39e740381867e80bcc93e Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Thu, 9 Feb 2017 13:34:18 +0100 Subject: [PATCH 35/46] stream: Use real permissions in streaming block job The correct permissions are relatively obvious here (and explained in code comments). For intermediate streaming, we need to reopen the top node read-write before creating the job now because the permissions system catches attempts to get the BLK_PERM_WRITE_UNCHANGED permission on a read-only node. Signed-off-by: Kevin Wolf Reviewed-by: Max Reitz Acked-by: Fam Zheng --- block/stream.c | 39 +++++++++++++++++++++++++++------------ 1 file changed, 27 insertions(+), 12 deletions(-) diff --git a/block/stream.c b/block/stream.c index ba8650f0a4..b9c2f43c57 100644 --- a/block/stream.c +++ b/block/stream.c @@ -84,6 +84,8 @@ static void stream_complete(BlockJob *job, void *opaque) /* Reopen the image back in read-only mode if necessary */ if (s->bs_flags != bdrv_get_flags(bs)) { + /* Give up write permissions before making it read-only */ + blk_set_perm(job->blk, 0, BLK_PERM_ALL, &error_abort); bdrv_reopen(bs, s->bs_flags, NULL); } @@ -229,28 +231,35 @@ void stream_start(const char *job_id, BlockDriverState *bs, BlockDriverState *iter; int orig_bs_flags; - /* FIXME Use real permissions */ - s = block_job_create(job_id, &stream_job_driver, bs, 0, BLK_PERM_ALL, - speed, BLOCK_JOB_DEFAULT, NULL, NULL, errp); - if (!s) { - return; - } - /* Make sure that the image is opened in read-write mode */ orig_bs_flags = bdrv_get_flags(bs); if (!(orig_bs_flags & BDRV_O_RDWR)) { if (bdrv_reopen(bs, orig_bs_flags | BDRV_O_RDWR, errp) != 0) { - block_job_unref(&s->common); return; } } - /* Block all intermediate nodes between bs and base, because they - * will disappear from the chain after this operation */ + /* Prevent concurrent jobs trying to modify the graph structure here, we + * already have our own plans. Also don't allow resize as the image size is + * queried only at the job start and then cached. */ + s = block_job_create(job_id, &stream_job_driver, bs, + BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED | + BLK_PERM_GRAPH_MOD, + BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED | + BLK_PERM_WRITE, + speed, BLOCK_JOB_DEFAULT, NULL, NULL, errp); + if (!s) { + goto fail; + } + + /* Block all intermediate nodes between bs and base, because they will + * disappear from the chain after this operation. The streaming job reads + * every block only once, assuming that it doesn't change, so block writes + * and resizes. */ for (iter = backing_bs(bs); iter && iter != base; iter = backing_bs(iter)) { - /* FIXME Use real permissions */ block_job_add_bdrv(&s->common, "intermediate node", iter, 0, - BLK_PERM_ALL, &error_abort); + BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED, + &error_abort); } s->base = base; @@ -260,4 +269,10 @@ void stream_start(const char *job_id, BlockDriverState *bs, s->on_error = on_error; trace_stream_start(bs, base, s); block_job_start(&s->common); + return; + +fail: + if (orig_bs_flags != bdrv_get_flags(bs)) { + bdrv_reopen(bs, s->bs_flags, NULL); + } } From 6cdbceb12cf955398df48eda94a45ca41e956c78 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Mon, 20 Feb 2017 18:10:05 +0100 Subject: [PATCH 36/46] mirror: Add filter-node-name to blockdev-mirror Management tools need to be able to know about every node in the graph and need a way to address them. Changing the graph structure was okay because libvirt doesn't really manage the node level yet, but future libvirt versions need to deal with both new and old version of qemu. This new option to blockdev-mirror allows the client to set a node-name for the automatically inserted filter driver, and at the same time serves as a witness for a future libvirt that this version of qemu does automatically insert a filter driver. Signed-off-by: Kevin Wolf Reviewed-by: Max Reitz Acked-by: Fam Zheng --- block/mirror.c | 14 ++++++++------ blockdev.c | 12 +++++++++++- include/block/block_int.h | 5 ++++- qapi/block-core.json | 8 +++++++- 4 files changed, 30 insertions(+), 9 deletions(-) diff --git a/block/mirror.c b/block/mirror.c index 40e8bcccce..f6d988df3d 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -1088,7 +1088,7 @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs, void *opaque, Error **errp, const BlockJobDriver *driver, bool is_none_mode, BlockDriverState *base, - bool auto_complete) + bool auto_complete, const char *filter_node_name) { MirrorBlockJob *s; BlockDriverState *mirror_top_bs; @@ -1114,8 +1114,8 @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs, /* In the case of active commit, add dummy driver to provide consistent * reads on the top, while disabling it in the intermediate nodes, and make * the backing chain writable. */ - mirror_top_bs = bdrv_new_open_driver(&bdrv_mirror_top, NULL, BDRV_O_RDWR, - errp); + mirror_top_bs = bdrv_new_open_driver(&bdrv_mirror_top, filter_node_name, + BDRV_O_RDWR, errp); if (mirror_top_bs == NULL) { return; } @@ -1225,7 +1225,7 @@ void mirror_start(const char *job_id, BlockDriverState *bs, MirrorSyncMode mode, BlockMirrorBackingMode backing_mode, BlockdevOnError on_source_error, BlockdevOnError on_target_error, - bool unmap, Error **errp) + bool unmap, const char *filter_node_name, Error **errp) { bool is_none_mode; BlockDriverState *base; @@ -1239,7 +1239,8 @@ void mirror_start(const char *job_id, BlockDriverState *bs, mirror_start_job(job_id, bs, BLOCK_JOB_DEFAULT, target, replaces, speed, granularity, buf_size, backing_mode, on_source_error, on_target_error, unmap, NULL, NULL, errp, - &mirror_job_driver, is_none_mode, base, false); + &mirror_job_driver, is_none_mode, base, false, + filter_node_name); } void commit_active_start(const char *job_id, BlockDriverState *bs, @@ -1260,7 +1261,8 @@ void commit_active_start(const char *job_id, BlockDriverState *bs, mirror_start_job(job_id, bs, creation_flags, base, NULL, speed, 0, 0, MIRROR_LEAVE_BACKING_CHAIN, on_error, on_error, true, cb, opaque, &local_err, - &commit_active_job_driver, false, base, auto_complete); + &commit_active_job_driver, false, base, auto_complete, + NULL); if (local_err) { error_propagate(errp, local_err); goto error_restore_flags; diff --git a/blockdev.c b/blockdev.c index 0a0226bca1..e592180b62 100644 --- a/blockdev.c +++ b/blockdev.c @@ -3366,6 +3366,8 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs, bool has_on_target_error, BlockdevOnError on_target_error, bool has_unmap, bool unmap, + bool has_filter_node_name, + const char *filter_node_name, Error **errp) { @@ -3387,6 +3389,9 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs, if (!has_unmap) { unmap = true; } + if (!has_filter_node_name) { + filter_node_name = NULL; + } if (granularity != 0 && (granularity < 512 || granularity > 1048576 * 64)) { error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "granularity", @@ -3416,7 +3421,8 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs, mirror_start(job_id, bs, target, has_replaces ? replaces : NULL, speed, granularity, buf_size, sync, backing_mode, - on_source_error, on_target_error, unmap, errp); + on_source_error, on_target_error, unmap, filter_node_name, + errp); } void qmp_drive_mirror(DriveMirror *arg, Error **errp) @@ -3554,6 +3560,7 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp) arg->has_on_source_error, arg->on_source_error, arg->has_on_target_error, arg->on_target_error, arg->has_unmap, arg->unmap, + false, NULL, &local_err); bdrv_unref(target_bs); error_propagate(errp, local_err); @@ -3572,6 +3579,8 @@ void qmp_blockdev_mirror(bool has_job_id, const char *job_id, BlockdevOnError on_source_error, bool has_on_target_error, BlockdevOnError on_target_error, + bool has_filter_node_name, + const char *filter_node_name, Error **errp) { BlockDriverState *bs; @@ -3603,6 +3612,7 @@ void qmp_blockdev_mirror(bool has_job_id, const char *job_id, has_on_source_error, on_source_error, has_on_target_error, on_target_error, true, true, + has_filter_node_name, filter_node_name, &local_err); error_propagate(errp, local_err); diff --git a/include/block/block_int.h b/include/block/block_int.h index a5c704bb5e..563b30c3ee 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -824,6 +824,9 @@ void commit_active_start(const char *job_id, BlockDriverState *bs, * @on_source_error: The action to take upon error reading from the source. * @on_target_error: The action to take upon error writing to the target. * @unmap: Whether to unmap target where source sectors only contain zeroes. + * @filter_node_name: The node name that should be assigned to the filter + * driver that the mirror job inserts into the graph above @bs. NULL means that + * a node name should be autogenerated. * @errp: Error object. * * Start a mirroring operation on @bs. Clusters that are allocated @@ -837,7 +840,7 @@ void mirror_start(const char *job_id, BlockDriverState *bs, MirrorSyncMode mode, BlockMirrorBackingMode backing_mode, BlockdevOnError on_source_error, BlockdevOnError on_target_error, - bool unmap, Error **errp); + bool unmap, const char *filter_node_name, Error **errp); /* * backup_job_create: diff --git a/qapi/block-core.json b/qapi/block-core.json index cf24c04242..f0fa34c434 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -1671,6 +1671,11 @@ # default 'report' (no limitations, since this applies to # a different block device than @device). # +# @filter-node-name: #optional the node name that should be assigned to the +# filter driver that the mirror job inserts into the graph +# above @device. If this option is not given, a node name is +# autogenerated. (Since: 2.9) +# # Returns: nothing on success. # # Since: 2.6 @@ -1690,7 +1695,8 @@ 'sync': 'MirrorSyncMode', '*speed': 'int', '*granularity': 'uint32', '*buf-size': 'int', '*on-source-error': 'BlockdevOnError', - '*on-target-error': 'BlockdevOnError' } } + '*on-target-error': 'BlockdevOnError', + '*filter-node-name': 'str' } } ## # @block_set_io_throttle: From 0db832f42e445398b2815cd740e9cd915e7dd644 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Mon, 20 Feb 2017 18:10:05 +0100 Subject: [PATCH 37/46] commit: Add filter-node-name to block-commit Management tools need to be able to know about every node in the graph and need a way to address them. Changing the graph structure was okay because libvirt doesn't really manage the node level yet, but future libvirt versions need to deal with both new and old version of qemu. This new option to blockdev-commit allows the client to set a node-name for the automatically inserted filter driver, and at the same time serves as a witness for a future libvirt that this version of qemu does automatically insert a filter driver. Signed-off-by: Kevin Wolf Acked-by: Fam Zheng Reviewed-by: Max Reitz --- block/commit.c | 5 +++-- block/mirror.c | 3 ++- block/replication.c | 2 +- blockdev.c | 10 +++++++--- include/block/block_int.h | 13 ++++++++++--- qapi/block-core.json | 8 +++++++- qemu-img.c | 4 ++-- 7 files changed, 32 insertions(+), 13 deletions(-) diff --git a/block/commit.c b/block/commit.c index f18026b61f..1e0f5318a4 100644 --- a/block/commit.c +++ b/block/commit.c @@ -257,7 +257,7 @@ static BlockDriver bdrv_commit_top = { void commit_start(const char *job_id, BlockDriverState *bs, BlockDriverState *base, BlockDriverState *top, int64_t speed, BlockdevOnError on_error, const char *backing_file_str, - Error **errp) + const char *filter_node_name, Error **errp) { CommitBlockJob *s; BlockReopenQueue *reopen_queue = NULL; @@ -310,7 +310,8 @@ void commit_start(const char *job_id, BlockDriverState *bs, /* Insert commit_top block node above top, so we can block consistent read * on the backing chain below it */ - commit_top_bs = bdrv_new_open_driver(&bdrv_commit_top, NULL, 0, errp); + commit_top_bs = bdrv_new_open_driver(&bdrv_commit_top, filter_node_name, 0, + errp); if (commit_top_bs == NULL) { goto fail; } diff --git a/block/mirror.c b/block/mirror.c index f6d988df3d..869212daac 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -1246,6 +1246,7 @@ void mirror_start(const char *job_id, BlockDriverState *bs, void commit_active_start(const char *job_id, BlockDriverState *bs, BlockDriverState *base, int creation_flags, int64_t speed, BlockdevOnError on_error, + const char *filter_node_name, BlockCompletionFunc *cb, void *opaque, Error **errp, bool auto_complete) { @@ -1262,7 +1263,7 @@ void commit_active_start(const char *job_id, BlockDriverState *bs, MIRROR_LEAVE_BACKING_CHAIN, on_error, on_error, true, cb, opaque, &local_err, &commit_active_job_driver, false, base, auto_complete, - NULL); + filter_node_name); if (local_err) { error_propagate(errp, local_err); goto error_restore_flags; diff --git a/block/replication.c b/block/replication.c index 91465cbae9..22f170fd33 100644 --- a/block/replication.c +++ b/block/replication.c @@ -644,7 +644,7 @@ static void replication_stop(ReplicationState *rs, bool failover, Error **errp) s->replication_state = BLOCK_REPLICATION_FAILOVER; commit_active_start(NULL, s->active_disk->bs, s->secondary_disk->bs, BLOCK_JOB_INTERNAL, 0, BLOCKDEV_ON_ERROR_REPORT, - replication_done, bs, errp, true); + NULL, replication_done, bs, errp, true); break; default: aio_context_release(aio_context); diff --git a/blockdev.c b/blockdev.c index e592180b62..ff781d9df3 100644 --- a/blockdev.c +++ b/blockdev.c @@ -3032,6 +3032,7 @@ void qmp_block_commit(bool has_job_id, const char *job_id, const char *device, bool has_top, const char *top, bool has_backing_file, const char *backing_file, bool has_speed, int64_t speed, + bool has_filter_node_name, const char *filter_node_name, Error **errp) { BlockDriverState *bs; @@ -3047,6 +3048,9 @@ void qmp_block_commit(bool has_job_id, const char *job_id, const char *device, if (!has_speed) { speed = 0; } + if (!has_filter_node_name) { + filter_node_name = NULL; + } /* Important Note: * libvirt relies on the DeviceNotFound error class in order to probe for @@ -3121,8 +3125,8 @@ void qmp_block_commit(bool has_job_id, const char *job_id, const char *device, goto out; } commit_active_start(has_job_id ? job_id : NULL, bs, base_bs, - BLOCK_JOB_DEFAULT, speed, on_error, NULL, NULL, - &local_err, false); + BLOCK_JOB_DEFAULT, speed, on_error, + filter_node_name, NULL, NULL, &local_err, false); } else { BlockDriverState *overlay_bs = bdrv_find_overlay(bs, top_bs); if (bdrv_op_is_blocked(overlay_bs, BLOCK_OP_TYPE_COMMIT_TARGET, errp)) { @@ -3130,7 +3134,7 @@ void qmp_block_commit(bool has_job_id, const char *job_id, const char *device, } commit_start(has_job_id ? job_id : NULL, bs, base_bs, top_bs, speed, on_error, has_backing_file ? backing_file : NULL, - &local_err); + filter_node_name, &local_err); } if (local_err != NULL) { error_propagate(errp, local_err); diff --git a/include/block/block_int.h b/include/block/block_int.h index 563b30c3ee..a57c0bfb55 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -780,13 +780,16 @@ void stream_start(const char *job_id, BlockDriverState *bs, * @speed: The maximum speed, in bytes per second, or 0 for unlimited. * @on_error: The action to take upon error. * @backing_file_str: String to use as the backing file in @top's overlay + * @filter_node_name: The node name that should be assigned to the filter + * driver that the commit job inserts into the graph above @top. NULL means + * that a node name should be autogenerated. * @errp: Error object. * */ void commit_start(const char *job_id, BlockDriverState *bs, BlockDriverState *base, BlockDriverState *top, int64_t speed, BlockdevOnError on_error, const char *backing_file_str, - Error **errp); + const char *filter_node_name, Error **errp); /** * commit_active_start: * @job_id: The id of the newly-created job, or %NULL to use the @@ -797,6 +800,9 @@ void commit_start(const char *job_id, BlockDriverState *bs, * See @BlockJobCreateFlags * @speed: The maximum speed, in bytes per second, or 0 for unlimited. * @on_error: The action to take upon error. + * @filter_node_name: The node name that should be assigned to the filter + * driver that the commit job inserts into the graph above @bs. NULL means that + * a node name should be autogenerated. * @cb: Completion function for the job. * @opaque: Opaque pointer value passed to @cb. * @errp: Error object. @@ -806,8 +812,9 @@ void commit_start(const char *job_id, BlockDriverState *bs, void commit_active_start(const char *job_id, BlockDriverState *bs, BlockDriverState *base, int creation_flags, int64_t speed, BlockdevOnError on_error, - BlockCompletionFunc *cb, - void *opaque, Error **errp, bool auto_complete); + const char *filter_node_name, + BlockCompletionFunc *cb, void *opaque, Error **errp, + bool auto_complete); /* * mirror_start: * @job_id: The id of the newly-created job, or %NULL to use the diff --git a/qapi/block-core.json b/qapi/block-core.json index f0fa34c434..5cc992fb8f 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -1304,6 +1304,11 @@ # # @speed: #optional the maximum speed, in bytes per second # +# @filter-node-name: #optional the node name that should be assigned to the +# filter driver that the commit job inserts into the graph +# above @top. If this option is not given, a node name is +# autogenerated. (Since: 2.9) +# # Returns: Nothing on success # If commit or stream is already active on this device, DeviceInUse # If @device does not exist, DeviceNotFound @@ -1323,7 +1328,8 @@ ## { 'command': 'block-commit', 'data': { '*job-id': 'str', 'device': 'str', '*base': 'str', '*top': 'str', - '*backing-file': 'str', '*speed': 'int' } } + '*backing-file': 'str', '*speed': 'int', + '*filter-node-name': 'str' } } ## # @drive-backup: diff --git a/qemu-img.c b/qemu-img.c index 0c76d4caa7..98b836b030 100644 --- a/qemu-img.c +++ b/qemu-img.c @@ -958,8 +958,8 @@ static int img_commit(int argc, char **argv) aio_context = bdrv_get_aio_context(bs); aio_context_acquire(aio_context); commit_active_start("commit", bs, base_bs, BLOCK_JOB_DEFAULT, 0, - BLOCKDEV_ON_ERROR_REPORT, common_block_job_cb, &cbi, - &local_err, false); + BLOCKDEV_ON_ERROR_REPORT, NULL, common_block_job_cb, + &cbi, &local_err, false); aio_context_release(aio_context); if (local_err) { goto done; From 887354bd13ecb7ff68ec26892806c97512b77877 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 10 Feb 2017 16:24:56 +0100 Subject: [PATCH 38/46] hmp: Request permissions in qemu-io The HMP command 'qemu-io' is a bit tricky because it wants to work on the original BlockBackend, but additional permissions could be required. The details are explained in a comment in the code, but in summary, just request whatever permissions the current qemu-io command needs. Signed-off-by: Kevin Wolf Reviewed-by: Max Reitz Acked-by: Fam Zheng --- block/block-backend.c | 6 ++++++ hmp.c | 26 +++++++++++++++++++++++++- include/qemu-io.h | 1 + include/sysemu/block-backend.h | 1 + qemu-io-cmds.c | 28 ++++++++++++++++++++++++++++ 5 files changed, 61 insertions(+), 1 deletion(-) diff --git a/block/block-backend.c b/block/block-backend.c index 38a3858e96..daa7908d01 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -584,6 +584,12 @@ int blk_set_perm(BlockBackend *blk, uint64_t perm, uint64_t shared_perm, return 0; } +void blk_get_perm(BlockBackend *blk, uint64_t *perm, uint64_t *shared_perm) +{ + *perm = blk->perm; + *shared_perm = blk->shared_perm; +} + static int blk_do_attach_dev(BlockBackend *blk, void *dev) { if (blk->dev) { diff --git a/hmp.c b/hmp.c index e219f97239..7b44e64c84 100644 --- a/hmp.c +++ b/hmp.c @@ -2051,7 +2051,6 @@ void hmp_qemu_io(Monitor *mon, const QDict *qdict) if (!blk) { BlockDriverState *bs = bdrv_lookup_bs(NULL, device, &err); if (bs) { - /* FIXME Use real permissions */ blk = local_blk = blk_new(0, BLK_PERM_ALL); ret = blk_insert_bs(blk, bs, &err); if (ret < 0) { @@ -2065,6 +2064,31 @@ void hmp_qemu_io(Monitor *mon, const QDict *qdict) aio_context = blk_get_aio_context(blk); aio_context_acquire(aio_context); + /* + * Notably absent: Proper permission management. This is sad, but it seems + * almost impossible to achieve without changing the semantics and thereby + * limiting the use cases of the qemu-io HMP command. + * + * In an ideal world we would unconditionally create a new BlockBackend for + * qemuio_command(), but we have commands like 'reopen' and want them to + * take effect on the exact BlockBackend whose name the user passed instead + * of just on a temporary copy of it. + * + * Another problem is that deleting the temporary BlockBackend involves + * draining all requests on it first, but some qemu-iotests cases want to + * issue multiple aio_read/write requests and expect them to complete in + * the background while the monitor has already returned. + * + * This is also what prevents us from saving the original permissions and + * restoring them later: We can't revoke permissions until all requests + * have completed, and we don't know when that is nor can we really let + * anything else run before we have revoken them to avoid race conditions. + * + * What happens now is that command() in qemu-io-cmds.c can extend the + * permissions if necessary for the qemu-io command. And they simply stay + * extended, possibly resulting in a read-only guest device keeping write + * permissions. Ugly, but it appears to be the lesser evil. + */ qemuio_command(blk, command); aio_context_release(aio_context); diff --git a/include/qemu-io.h b/include/qemu-io.h index 4d402b9b01..196fde0f3a 100644 --- a/include/qemu-io.h +++ b/include/qemu-io.h @@ -36,6 +36,7 @@ typedef struct cmdinfo { const char *args; const char *oneline; helpfunc_t help; + uint64_t perm; } cmdinfo_t; extern bool qemuio_misalign; diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h index b23f6830db..096c17fce0 100644 --- a/include/sysemu/block-backend.h +++ b/include/sysemu/block-backend.h @@ -107,6 +107,7 @@ bool bdrv_has_blk(BlockDriverState *bs); bool bdrv_is_root_node(BlockDriverState *bs); int blk_set_perm(BlockBackend *blk, uint64_t perm, uint64_t shared_perm, Error **errp); +void blk_get_perm(BlockBackend *blk, uint64_t *perm, uint64_t *shared_perm); void blk_set_allow_write_beyond_eof(BlockBackend *blk, bool allow); void blk_iostatus_enable(BlockBackend *blk); diff --git a/qemu-io-cmds.c b/qemu-io-cmds.c index 7ac1576d4c..2c48f9ce1a 100644 --- a/qemu-io-cmds.c +++ b/qemu-io-cmds.c @@ -83,6 +83,29 @@ static int command(BlockBackend *blk, const cmdinfo_t *ct, int argc, } return 0; } + + /* Request additional permissions if necessary for this command. The caller + * is responsible for restoring the original permissions afterwards if this + * is what it wants. */ + if (ct->perm && blk_is_available(blk)) { + uint64_t orig_perm, orig_shared_perm; + blk_get_perm(blk, &orig_perm, &orig_shared_perm); + + if (ct->perm & ~orig_perm) { + uint64_t new_perm; + Error *local_err = NULL; + int ret; + + new_perm = orig_perm | ct->perm; + + ret = blk_set_perm(blk, new_perm, orig_shared_perm, &local_err); + if (ret < 0) { + error_report_err(local_err); + return 0; + } + } + } + optind = 0; return ct->cfunc(blk, argc, argv); } @@ -918,6 +941,7 @@ static const cmdinfo_t write_cmd = { .name = "write", .altname = "w", .cfunc = write_f, + .perm = BLK_PERM_WRITE, .argmin = 2, .argmax = -1, .args = "[-bcCfquz] [-P pattern] off len", @@ -1093,6 +1117,7 @@ static int writev_f(BlockBackend *blk, int argc, char **argv); static const cmdinfo_t writev_cmd = { .name = "writev", .cfunc = writev_f, + .perm = BLK_PERM_WRITE, .argmin = 2, .argmax = -1, .args = "[-Cfq] [-P pattern] off len [len..]", @@ -1392,6 +1417,7 @@ static int aio_write_f(BlockBackend *blk, int argc, char **argv); static const cmdinfo_t aio_write_cmd = { .name = "aio_write", .cfunc = aio_write_f, + .perm = BLK_PERM_WRITE, .argmin = 2, .argmax = -1, .args = "[-Cfiquz] [-P pattern] off len [len..]", @@ -1556,6 +1582,7 @@ static const cmdinfo_t truncate_cmd = { .name = "truncate", .altname = "t", .cfunc = truncate_f, + .perm = BLK_PERM_WRITE | BLK_PERM_RESIZE, .argmin = 1, .argmax = 1, .args = "off", @@ -1653,6 +1680,7 @@ static const cmdinfo_t discard_cmd = { .name = "discard", .altname = "d", .cfunc = discard_f, + .perm = BLK_PERM_WRITE, .argmin = 2, .argmax = -1, .args = "[-Cq] off len", From 6f5ef23a3ff09919b73eef8196969685cb2383ee Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Thu, 9 Feb 2017 14:45:37 +0100 Subject: [PATCH 39/46] migration/block: Use real permissions Request BLK_PERM_CONSISTENT_READ for the source of block migration, and handle potential permission errors as good as we can in this place (which is not very good, but it matches the other failure cases). Signed-off-by: Kevin Wolf Acked-by: Fam Zheng Reviewed-by: Max Reitz --- migration/block.c | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/migration/block.c b/migration/block.c index d2599360a0..1941bc2402 100644 --- a/migration/block.c +++ b/migration/block.c @@ -379,7 +379,7 @@ static void unset_dirty_tracking(void) } } -static void init_blk_migration(QEMUFile *f) +static int init_blk_migration(QEMUFile *f) { BlockDriverState *bs; BlkMigDevState *bmds; @@ -390,6 +390,8 @@ static void init_blk_migration(QEMUFile *f) BlkMigDevState *bmds; BlockDriverState *bs; } *bmds_bs; + Error *local_err = NULL; + int ret; block_mig_state.submitted = 0; block_mig_state.read_done = 0; @@ -411,12 +413,12 @@ static void init_blk_migration(QEMUFile *f) sectors = bdrv_nb_sectors(bs); if (sectors <= 0) { + ret = sectors; goto out; } bmds = g_new0(BlkMigDevState, 1); - /* FIXME Use real permissions */ - bmds->blk = blk_new(0, BLK_PERM_ALL); + bmds->blk = blk_new(BLK_PERM_CONSISTENT_READ, BLK_PERM_ALL); bmds->blk_name = g_strdup(bdrv_get_device_name(bs)); bmds->bulk_completed = 0; bmds->total_sectors = sectors; @@ -446,7 +448,11 @@ static void init_blk_migration(QEMUFile *f) BlockDriverState *bs = bmds_bs[i].bs; if (bmds) { - blk_insert_bs(bmds->blk, bs, &error_abort); + ret = blk_insert_bs(bmds->blk, bs, &local_err); + if (ret < 0) { + error_report_err(local_err); + goto out; + } alloc_aio_bitmap(bmds); error_setg(&bmds->blocker, "block device is in use by migration"); @@ -454,8 +460,10 @@ static void init_blk_migration(QEMUFile *f) } } + ret = 0; out: g_free(bmds_bs); + return ret; } /* Called with no lock taken. */ @@ -706,7 +714,11 @@ static int block_save_setup(QEMUFile *f, void *opaque) block_mig_state.submitted, block_mig_state.transferred); qemu_mutex_lock_iothread(); - init_blk_migration(f); + ret = init_blk_migration(f); + if (ret < 0) { + qemu_mutex_unlock_iothread(); + return ret; + } /* start track dirty blocks */ ret = set_dirty_tracking(); From 8a7ce4f9338c475df1afc12502af704e4300a3e0 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Thu, 9 Feb 2017 15:43:38 +0100 Subject: [PATCH 40/46] nbd/server: Use real permissions for NBD exports NBD can't cope with device size changes, so resize must be forbidden, but otherwise we can tolerate anything. Depending on whether the export is writable or not, we only require consistent reads and writes. Signed-off-by: Kevin Wolf Reviewed-by: Max Reitz Acked-by: Fam Zheng --- nbd/server.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/nbd/server.c b/nbd/server.c index 89362ba760..924a1fe2db 100644 --- a/nbd/server.c +++ b/nbd/server.c @@ -891,10 +891,17 @@ NBDExport *nbd_export_new(BlockDriverState *bs, off_t dev_offset, off_t size, { BlockBackend *blk; NBDExport *exp = g_malloc0(sizeof(NBDExport)); + uint64_t perm; int ret; - /* FIXME Use real permissions */ - blk = blk_new(0, BLK_PERM_ALL); + /* Don't allow resize while the NBD server is running, otherwise we don't + * care what happens with the node. */ + perm = BLK_PERM_CONSISTENT_READ; + if ((nbdflags & NBD_FLAG_READ_ONLY) == 0) { + perm |= BLK_PERM_WRITE; + } + blk = blk_new(perm, BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED | + BLK_PERM_WRITE | BLK_PERM_GRAPH_MOD); ret = blk_insert_bs(blk, bs, errp); if (ret < 0) { goto fail; From 2807c0cd439321dbac118b895cdd2b595f14bf4b Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Thu, 9 Feb 2017 15:48:04 +0100 Subject: [PATCH 41/46] tests: Remove FIXME comments Not requesting any permissions is actually correct for these test cases because no actual I/O or other operation covered by the permission system is performed. Signed-off-by: Kevin Wolf Reviewed-by: Max Reitz Acked-by: Fam Zheng --- tests/test-blockjob.c | 2 +- tests/test-throttle.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test-blockjob.c b/tests/test-blockjob.c index 1afe17b449..740e740398 100644 --- a/tests/test-blockjob.c +++ b/tests/test-blockjob.c @@ -54,7 +54,7 @@ static BlockJob *do_test_id(BlockBackend *blk, const char *id, * BlockDriverState inserted. */ static BlockBackend *create_blk(const char *name) { - /* FIXME Use real permissions */ + /* No I/O is performed on this device */ BlockBackend *blk = blk_new(0, BLK_PERM_ALL); BlockDriverState *bs; diff --git a/tests/test-throttle.c b/tests/test-throttle.c index 5846433c9f..bd7c501b2e 100644 --- a/tests/test-throttle.c +++ b/tests/test-throttle.c @@ -593,7 +593,7 @@ static void test_groups(void) BlockBackend *blk1, *blk2, *blk3; BlockBackendPublic *blkp1, *blkp2, *blkp3; - /* FIXME Use real permissions */ + /* No actual I/O is performed on these devices */ blk1 = blk_new(0, BLK_PERM_ALL); blk2 = blk_new(0, BLK_PERM_ALL); blk3 = blk_new(0, BLK_PERM_ALL); From 85c97ca7a10b93216bc95052e9dabe3a4bb8736a Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Thu, 9 Feb 2017 15:58:43 +0100 Subject: [PATCH 42/46] block: Pass BdrvChild to bdrv_aligned_preadv/pwritev and copy-on-read This is where we want to check the permissions, so we need to have the BdrvChild around where they are stored. Signed-off-by: Kevin Wolf Acked-by: Fam Zheng Reviewed-by: Max Reitz --- block/io.c | 37 +++++++++++++++++++++---------------- 1 file changed, 21 insertions(+), 16 deletions(-) diff --git a/block/io.c b/block/io.c index d5c45447fd..2592ca1bd4 100644 --- a/block/io.c +++ b/block/io.c @@ -925,9 +925,11 @@ bdrv_driver_pwritev_compressed(BlockDriverState *bs, uint64_t offset, return drv->bdrv_co_pwritev_compressed(bs, offset, bytes, qiov); } -static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs, +static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild *child, int64_t offset, unsigned int bytes, QEMUIOVector *qiov) { + BlockDriverState *bs = child->bs; + /* Perform I/O through a temporary buffer so that users who scribble over * their read buffer while the operation is in progress do not end up * modifying the image file. This is critical for zero-copy guest I/O @@ -1001,10 +1003,11 @@ err: * handles copy on read, zeroing after EOF, and fragmentation of large * reads; any other features must be implemented by the caller. */ -static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs, +static int coroutine_fn bdrv_aligned_preadv(BdrvChild *child, BdrvTrackedRequest *req, int64_t offset, unsigned int bytes, int64_t align, QEMUIOVector *qiov, int flags) { + BlockDriverState *bs = child->bs; int64_t total_bytes, max_bytes; int ret = 0; uint64_t bytes_remaining = bytes; @@ -1050,7 +1053,7 @@ static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs, } if (!ret || pnum != nb_sectors) { - ret = bdrv_co_do_copy_on_readv(bs, offset, bytes, qiov); + ret = bdrv_co_do_copy_on_readv(child, offset, bytes, qiov); goto out; } } @@ -1158,7 +1161,7 @@ int coroutine_fn bdrv_co_preadv(BdrvChild *child, } tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_READ); - ret = bdrv_aligned_preadv(bs, &req, offset, bytes, align, + ret = bdrv_aligned_preadv(child, &req, offset, bytes, align, use_local_qiov ? &local_qiov : qiov, flags); tracked_request_end(&req); @@ -1306,10 +1309,11 @@ fail: * Forwards an already correctly aligned write request to the BlockDriver, * after possibly fragmenting it. */ -static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs, +static int coroutine_fn bdrv_aligned_pwritev(BdrvChild *child, BdrvTrackedRequest *req, int64_t offset, unsigned int bytes, int64_t align, QEMUIOVector *qiov, int flags) { + BlockDriverState *bs = child->bs; BlockDriver *drv = bs->drv; bool waited; int ret; @@ -1397,12 +1401,13 @@ static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs, return ret; } -static int coroutine_fn bdrv_co_do_zero_pwritev(BlockDriverState *bs, +static int coroutine_fn bdrv_co_do_zero_pwritev(BdrvChild *child, int64_t offset, unsigned int bytes, BdrvRequestFlags flags, BdrvTrackedRequest *req) { + BlockDriverState *bs = child->bs; uint8_t *buf = NULL; QEMUIOVector local_qiov; struct iovec iov; @@ -1430,7 +1435,7 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BlockDriverState *bs, mark_request_serialising(req, align); wait_serialising_requests(req); bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_HEAD); - ret = bdrv_aligned_preadv(bs, req, offset & ~(align - 1), align, + ret = bdrv_aligned_preadv(child, req, offset & ~(align - 1), align, align, &local_qiov, 0); if (ret < 0) { goto fail; @@ -1438,7 +1443,7 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BlockDriverState *bs, bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD); memset(buf + head_padding_bytes, 0, zero_bytes); - ret = bdrv_aligned_pwritev(bs, req, offset & ~(align - 1), align, + ret = bdrv_aligned_pwritev(child, req, offset & ~(align - 1), align, align, &local_qiov, flags & ~BDRV_REQ_ZERO_WRITE); if (ret < 0) { @@ -1452,7 +1457,7 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BlockDriverState *bs, if (bytes >= align) { /* Write the aligned part in the middle. */ uint64_t aligned_bytes = bytes & ~(align - 1); - ret = bdrv_aligned_pwritev(bs, req, offset, aligned_bytes, align, + ret = bdrv_aligned_pwritev(child, req, offset, aligned_bytes, align, NULL, flags); if (ret < 0) { goto fail; @@ -1468,7 +1473,7 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BlockDriverState *bs, mark_request_serialising(req, align); wait_serialising_requests(req); bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL); - ret = bdrv_aligned_preadv(bs, req, offset, align, + ret = bdrv_aligned_preadv(child, req, offset, align, align, &local_qiov, 0); if (ret < 0) { goto fail; @@ -1476,7 +1481,7 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BlockDriverState *bs, bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL); memset(buf, 0, bytes); - ret = bdrv_aligned_pwritev(bs, req, offset, align, align, + ret = bdrv_aligned_pwritev(child, req, offset, align, align, &local_qiov, flags & ~BDRV_REQ_ZERO_WRITE); } fail: @@ -1523,7 +1528,7 @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child, tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_WRITE); if (!qiov) { - ret = bdrv_co_do_zero_pwritev(bs, offset, bytes, flags, &req); + ret = bdrv_co_do_zero_pwritev(child, offset, bytes, flags, &req); goto out; } @@ -1542,7 +1547,7 @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child, qemu_iovec_init_external(&head_qiov, &head_iov, 1); bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_HEAD); - ret = bdrv_aligned_preadv(bs, &req, offset & ~(align - 1), align, + ret = bdrv_aligned_preadv(child, &req, offset & ~(align - 1), align, align, &head_qiov, 0); if (ret < 0) { goto fail; @@ -1584,8 +1589,8 @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child, qemu_iovec_init_external(&tail_qiov, &tail_iov, 1); bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL); - ret = bdrv_aligned_preadv(bs, &req, (offset + bytes) & ~(align - 1), align, - align, &tail_qiov, 0); + ret = bdrv_aligned_preadv(child, &req, (offset + bytes) & ~(align - 1), + align, align, &tail_qiov, 0); if (ret < 0) { goto fail; } @@ -1603,7 +1608,7 @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child, bytes = ROUND_UP(bytes, align); } - ret = bdrv_aligned_pwritev(bs, &req, offset, bytes, align, + ret = bdrv_aligned_pwritev(child, &req, offset, bytes, align, use_local_qiov ? &local_qiov : qiov, flags); From afa4b293230c0bad3dfbfa9c0ff3f7bdfab40430 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Thu, 9 Feb 2017 16:49:53 +0100 Subject: [PATCH 43/46] block: Assertions for write permissions This adds assertions that ensure that the necessary write permissions have been granted before someone attempts to write to a node. Signed-off-by: Kevin Wolf Acked-by: Fam Zheng Reviewed-by: Max Reitz --- block/io.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/block/io.c b/block/io.c index 2592ca1bd4..4c797454a4 100644 --- a/block/io.c +++ b/block/io.c @@ -945,6 +945,8 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild *child, size_t skip_bytes; int ret; + assert(child->perm & (BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE)); + /* Cover entire cluster so no additional backing file I/O is required when * allocating cluster in the image file. */ @@ -1336,6 +1338,7 @@ static int coroutine_fn bdrv_aligned_pwritev(BdrvChild *child, assert(!waited || !req->serialising); assert(req->overlap_offset <= offset); assert(offset + bytes <= req->overlap_offset + req->overlap_bytes); + assert(child->perm & BLK_PERM_WRITE); ret = notifier_with_return_list_notify(&bs->before_write_notifiers, req); From c8f6d58edb0defbe1e90d44419ec8ec6d711c341 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 17 Feb 2017 14:52:00 +0100 Subject: [PATCH 44/46] block: Assertions for resize permission This adds an assertion that ensures that the necessary resize permission has been granted before bdrv_truncate() is called. Signed-off-by: Kevin Wolf Reviewed-by: Max Reitz Acked-by: Fam Zheng --- block.c | 3 +++ block/io.c | 1 + 2 files changed, 4 insertions(+) diff --git a/block.c b/block.c index 79d9ad0ad6..74ac7dcf74 100644 --- a/block.c +++ b/block.c @@ -3122,6 +3122,9 @@ int bdrv_truncate(BdrvChild *child, int64_t offset) BlockDriverState *bs = child->bs; BlockDriver *drv = bs->drv; int ret; + + assert(child->perm & BLK_PERM_RESIZE); + if (!drv) return -ENOMEDIUM; if (!drv->bdrv_truncate) diff --git a/block/io.c b/block/io.c index 4c797454a4..8f38d46de0 100644 --- a/block/io.c +++ b/block/io.c @@ -1339,6 +1339,7 @@ static int coroutine_fn bdrv_aligned_pwritev(BdrvChild *child, assert(req->overlap_offset <= offset); assert(offset + bytes <= req->overlap_offset + req->overlap_bytes); assert(child->perm & BLK_PERM_WRITE); + assert(end_sector <= bs->total_sectors || child->perm & BLK_PERM_RESIZE); ret = notifier_with_return_list_notify(&bs->before_write_notifiers, req); From 12fa4af61fb2a08b156134c3b6717534c637c995 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 17 Feb 2017 20:42:32 +0100 Subject: [PATCH 45/46] block: Add Error parameter to bdrv_set_backing_hd() Not all callers of bdrv_set_backing_hd() know for sure that attaching the backing file will be allowed by the permission system. Return the error from the function rather than aborting. Signed-off-by: Kevin Wolf Acked-by: Fam Zheng Reviewed-by: Max Reitz --- block.c | 30 +++++++++++++++++++++++------- block/commit.c | 14 +++++++------- block/mirror.c | 7 ++++++- block/stream.c | 9 ++++++++- block/vvfat.c | 2 +- include/block/block.h | 3 ++- 6 files changed, 47 insertions(+), 18 deletions(-) diff --git a/block.c b/block.c index 74ac7dcf74..6440b61be6 100644 --- a/block.c +++ b/block.c @@ -1883,7 +1883,8 @@ static void bdrv_parent_cb_resize(BlockDriverState *bs) * Sets the backing file link of a BDS. A new reference is created; callers * which don't need their own reference any more must call bdrv_unref(). */ -void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd) +void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd, + Error **errp) { if (backing_hd) { bdrv_ref(backing_hd); @@ -1897,9 +1898,12 @@ void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd) bs->backing = NULL; goto out; } - /* FIXME Error handling */ + bs->backing = bdrv_attach_child(bs, backing_hd, "backing", &child_backing, - &error_abort); + errp); + if (!bs->backing) { + bdrv_unref(backing_hd); + } out: bdrv_refresh_limits(bs, NULL); @@ -1983,8 +1987,12 @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options, /* Hook up the backing file link; drop our reference, bs owns the * backing_hd reference now */ - bdrv_set_backing_hd(bs, backing_hd); + bdrv_set_backing_hd(bs, backing_hd, &local_err); bdrv_unref(backing_hd); + if (local_err) { + ret = -EINVAL; + goto free_exit; + } qdict_del(parent_options, bdref_key); @@ -2818,7 +2826,7 @@ static void bdrv_close(BlockDriverState *bs) bs->drv->bdrv_close(bs); bs->drv = NULL; - bdrv_set_backing_hd(bs, NULL); + bdrv_set_backing_hd(bs, NULL, &error_abort); if (bs->file != NULL) { bdrv_unref_child(bs, bs->file); @@ -2927,7 +2935,8 @@ void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top) bdrv_ref(bs_top); change_parent_backing_link(bs_top, bs_new); - bdrv_set_backing_hd(bs_new, bs_top); + /* FIXME Error handling */ + bdrv_set_backing_hd(bs_new, bs_top, &error_abort); bdrv_unref(bs_top); /* bs_new is now referenced by its new parents, we don't need the @@ -3075,6 +3084,7 @@ int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top, BlockDriverState *base, const char *backing_file_str) { BlockDriverState *new_top_bs = NULL; + Error *local_err = NULL; int ret = -EIO; if (!top->drv || !base->drv) { @@ -3107,7 +3117,13 @@ int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top, if (ret) { goto exit; } - bdrv_set_backing_hd(new_top_bs, base); + + bdrv_set_backing_hd(new_top_bs, base, &local_err); + if (local_err) { + ret = -EPERM; + error_report_err(local_err); + goto exit; + } ret = 0; exit: diff --git a/block/commit.c b/block/commit.c index 1e0f5318a4..22a0a4db98 100644 --- a/block/commit.c +++ b/block/commit.c @@ -121,7 +121,7 @@ static void commit_complete(BlockJob *job, void *opaque) * filter driver from the backing chain. Do this as the final step so that * the 'consistent read' permission can be granted. */ if (remove_commit_top_bs) { - bdrv_set_backing_hd(overlay_bs, top); + bdrv_set_backing_hd(overlay_bs, top, &error_abort); } } @@ -316,8 +316,8 @@ void commit_start(const char *job_id, BlockDriverState *bs, goto fail; } - bdrv_set_backing_hd(commit_top_bs, top); - bdrv_set_backing_hd(overlay_bs, commit_top_bs); + bdrv_set_backing_hd(commit_top_bs, top, &error_abort); + bdrv_set_backing_hd(overlay_bs, commit_top_bs, &error_abort); s->commit_top_bs = commit_top_bs; bdrv_unref(commit_top_bs); @@ -390,7 +390,7 @@ fail: blk_unref(s->top); } if (commit_top_bs) { - bdrv_set_backing_hd(overlay_bs, top); + bdrv_set_backing_hd(overlay_bs, top, &error_abort); } block_job_unref(&s->common); } @@ -451,8 +451,8 @@ int bdrv_commit(BlockDriverState *bs) goto ro_cleanup; } - bdrv_set_backing_hd(commit_top_bs, backing_file_bs); - bdrv_set_backing_hd(bs, commit_top_bs); + bdrv_set_backing_hd(commit_top_bs, backing_file_bs, &error_abort); + bdrv_set_backing_hd(bs, commit_top_bs, &error_abort); ret = blk_insert_bs(backing, backing_file_bs, &local_err); if (ret < 0) { @@ -532,7 +532,7 @@ ro_cleanup: blk_unref(backing); if (backing_file_bs) { - bdrv_set_backing_hd(bs, backing_file_bs); + bdrv_set_backing_hd(bs, backing_file_bs, &error_abort); } bdrv_unref(commit_top_bs); blk_unref(src); diff --git a/block/mirror.c b/block/mirror.c index 869212daac..8497e0db83 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -503,6 +503,7 @@ static void mirror_exit(BlockJob *job, void *opaque) BlockDriverState *src = s->source; BlockDriverState *target_bs = blk_bs(s->target); BlockDriverState *mirror_top_bs = s->mirror_top_bs; + Error *local_err = NULL; /* Make sure that the source BDS doesn't go away before we called * block_job_completed(). */ @@ -516,7 +517,11 @@ static void mirror_exit(BlockJob *job, void *opaque) if (s->backing_mode == MIRROR_SOURCE_BACKING_CHAIN) { BlockDriverState *backing = s->is_none_mode ? src : s->base; if (backing_bs(target_bs) != backing) { - bdrv_set_backing_hd(target_bs, backing); + bdrv_set_backing_hd(target_bs, backing, &local_err); + if (local_err) { + error_report_err(local_err); + data->ret = -EPERM; + } } } diff --git a/block/stream.c b/block/stream.c index b9c2f43c57..0113710845 100644 --- a/block/stream.c +++ b/block/stream.c @@ -68,6 +68,7 @@ static void stream_complete(BlockJob *job, void *opaque) StreamCompleteData *data = opaque; BlockDriverState *bs = blk_bs(job->blk); BlockDriverState *base = s->base; + Error *local_err = NULL; if (!block_job_is_cancelled(&s->common) && data->reached_end && data->ret == 0) { @@ -79,9 +80,15 @@ static void stream_complete(BlockJob *job, void *opaque) } } data->ret = bdrv_change_backing_file(bs, base_id, base_fmt); - bdrv_set_backing_hd(bs, base); + bdrv_set_backing_hd(bs, base, &local_err); + if (local_err) { + error_report_err(local_err); + data->ret = -EPERM; + goto out; + } } +out: /* Reopen the image back in read-only mode if necessary */ if (s->bs_flags != bdrv_get_flags(bs)) { /* Give up write permissions before making it read-only */ diff --git a/block/vvfat.c b/block/vvfat.c index 72b482cb1f..aa61c329e7 100644 --- a/block/vvfat.c +++ b/block/vvfat.c @@ -3041,7 +3041,7 @@ static int enable_write_target(BlockDriverState *bs, Error **errp) &error_abort); *(void**) backing->opaque = s; - bdrv_set_backing_hd(s->bs, backing); + bdrv_set_backing_hd(s->bs, backing, &error_abort); bdrv_unref(backing); return 0; diff --git a/include/block/block.h b/include/block/block.h index 07f7561886..eac286124d 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -247,7 +247,8 @@ BdrvChild *bdrv_open_child(const char *filename, BlockDriverState* parent, const BdrvChildRole *child_role, bool allow_none, Error **errp); -void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd); +void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd, + Error **errp); int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options, const char *bdref_key, Error **errp); BlockDriverState *bdrv_open(const char *filename, const char *reference, From b2c2832c6140cfe3ddc0de2d77eeb0b77dea8fd3 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Mon, 20 Feb 2017 12:46:42 +0100 Subject: [PATCH 46/46] block: Add Error parameter to bdrv_append() Aborting on error in bdrv_append() isn't correct. This patch fixes it and lets the callers handle failures. Test case 085 needs a reference output update. This is caused by the reversed order of bdrv_set_backing_hd() and change_parent_backing_link() in bdrv_append(): When the backing file of the new node is set, the parent nodes are still pointing to the old top, so the backing blocker is now initialised with the node name rather than the BlockBackend name. Signed-off-by: Kevin Wolf Acked-by: Fam Zheng Reviewed-by: Max Reitz --- block.c | 23 +++++++++++++++++------ block/mirror.c | 9 ++++++++- blockdev.c | 18 +++++++++++++++--- include/block/block.h | 3 ++- tests/qemu-iotests/085.out | 2 +- 5 files changed, 43 insertions(+), 12 deletions(-) diff --git a/block.c b/block.c index 6440b61be6..f293ccb5af 100644 --- a/block.c +++ b/block.c @@ -2087,6 +2087,7 @@ static BlockDriverState *bdrv_append_temp_snapshot(BlockDriverState *bs, int64_t total_size; QemuOpts *opts = NULL; BlockDriverState *bs_snapshot; + Error *local_err = NULL; int ret; /* if snapshot, we create a temporary backing file and open it @@ -2136,7 +2137,12 @@ static BlockDriverState *bdrv_append_temp_snapshot(BlockDriverState *bs, * call bdrv_unref() on it), so in order to be able to return one, we have * to increase bs_snapshot's refcount here */ bdrv_ref(bs_snapshot); - bdrv_append(bs_snapshot, bs); + bdrv_append(bs_snapshot, bs, &local_err); + if (local_err) { + error_propagate(errp, local_err); + ret = -EINVAL; + goto out; + } g_free(tmp_filename); return bs_snapshot; @@ -2927,20 +2933,25 @@ static void change_parent_backing_link(BlockDriverState *from, * parents of bs_top after bdrv_append() returns. If the caller needs to keep a * reference of its own, it must call bdrv_ref(). */ -void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top) +void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top, + Error **errp) { + Error *local_err = NULL; + assert(!atomic_read(&bs_top->in_flight)); assert(!atomic_read(&bs_new->in_flight)); - bdrv_ref(bs_top); + bdrv_set_backing_hd(bs_new, bs_top, &local_err); + if (local_err) { + error_propagate(errp, local_err); + goto out; + } change_parent_backing_link(bs_top, bs_new); - /* FIXME Error handling */ - bdrv_set_backing_hd(bs_new, bs_top, &error_abort); - bdrv_unref(bs_top); /* bs_new is now referenced by its new parents, we don't need the * additional reference any more. */ +out: bdrv_unref(bs_new); } diff --git a/block/mirror.c b/block/mirror.c index 8497e0db83..57f26c33a4 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -1099,6 +1099,7 @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs, BlockDriverState *mirror_top_bs; bool target_graph_mod; bool target_is_backing; + Error *local_err = NULL; int ret; if (granularity == 0) { @@ -1130,9 +1131,15 @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs, * it alive until block_job_create() even if bs has no parent. */ bdrv_ref(mirror_top_bs); bdrv_drained_begin(bs); - bdrv_append(mirror_top_bs, bs); + bdrv_append(mirror_top_bs, bs, &local_err); bdrv_drained_end(bs); + if (local_err) { + bdrv_unref(mirror_top_bs); + error_propagate(errp, local_err); + return; + } + /* Make sure that the source is not resized while the job is running */ s = block_job_create(job_id, driver, mirror_top_bs, BLK_PERM_CONSISTENT_READ, diff --git a/blockdev.c b/blockdev.c index ff781d9df3..8eb4e84fe0 100644 --- a/blockdev.c +++ b/blockdev.c @@ -1768,6 +1768,17 @@ static void external_snapshot_prepare(BlkActionState *common, if (!state->new_bs->drv->supports_backing) { error_setg(errp, "The snapshot does not support backing images"); + return; + } + + /* This removes our old bs and adds the new bs. This is an operation that + * can fail, so we need to do it in .prepare; undoing it for abort is + * always possible. */ + bdrv_ref(state->new_bs); + bdrv_append(state->new_bs, state->old_bs, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; } } @@ -1778,8 +1789,6 @@ static void external_snapshot_commit(BlkActionState *common) bdrv_set_aio_context(state->new_bs, state->aio_context); - /* This removes our old bs and adds the new bs */ - bdrv_append(state->new_bs, state->old_bs); /* We don't need (or want) to use the transactional * bdrv_reopen_multiple() across all the entries at once, because we * don't want to abort all of them if one of them fails the reopen */ @@ -1794,7 +1803,9 @@ static void external_snapshot_abort(BlkActionState *common) ExternalSnapshotState *state = DO_UPCAST(ExternalSnapshotState, common, common); if (state->new_bs) { - bdrv_unref(state->new_bs); + if (state->new_bs->backing) { + bdrv_replace_in_backing_chain(state->new_bs, state->old_bs); + } } } @@ -1805,6 +1816,7 @@ static void external_snapshot_clean(BlkActionState *common) if (state->aio_context) { bdrv_drained_end(state->old_bs); aio_context_release(state->aio_context); + bdrv_unref(state->new_bs); } } diff --git a/include/block/block.h b/include/block/block.h index eac286124d..c7c4a3ac3a 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -236,7 +236,8 @@ int bdrv_create(BlockDriver *drv, const char* filename, QemuOpts *opts, Error **errp); int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp); BlockDriverState *bdrv_new(void); -void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top); +void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top, + Error **errp); void bdrv_replace_in_backing_chain(BlockDriverState *old, BlockDriverState *new); diff --git a/tests/qemu-iotests/085.out b/tests/qemu-iotests/085.out index 08e4bb7218..182acb42cf 100644 --- a/tests/qemu-iotests/085.out +++ b/tests/qemu-iotests/085.out @@ -74,7 +74,7 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 backing_file=TEST_DIR/ === Invalid command - snapshot node used as backing hd === -{"error": {"class": "GenericError", "desc": "Node 'snap_11' is busy: node is used as backing hd of 'virtio0'"}} +{"error": {"class": "GenericError", "desc": "Node 'snap_11' is busy: node is used as backing hd of 'snap_12'"}} === Invalid command - snapshot node has a backing image ===