From a50c1f57e491ba17c0fcb7d285dfe04770be0a70 Mon Sep 17 00:00:00 2001 From: Alberto Garcia Date: Fri, 10 Apr 2020 14:18:15 +0200 Subject: [PATCH 01/15] qcow2: Add incompatibility note between backing files and raw external data files Backing files and raw external data files are mutually exclusive. The documentation of the raw external data bit (in autoclear_features) already indicates that, but we should also mention it on the other side. Suggested-by: Eric Blake Signed-off-by: Alberto Garcia Message-Id: <20200410121816.8334-1-berto@igalia.com> Reviewed-by: Eric Blake Signed-off-by: Kevin Wolf --- docs/interop/qcow2.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/interop/qcow2.txt b/docs/interop/qcow2.txt index 640e0eca40..298a031310 100644 --- a/docs/interop/qcow2.txt +++ b/docs/interop/qcow2.txt @@ -25,6 +25,9 @@ The first cluster of a qcow2 image contains the file header: is stored (NB: The string is not null terminated). 0 if the image doesn't have a backing file. + Note: backing files are incompatible with raw external data + files (auto-clear feature bit 1). + 16 - 19: backing_file_size Length of the backing file name in bytes. Must not be longer than 1023 bytes. Undefined if the image doesn't have From 3fb61087074474b088fee23bb81ffd258cb9cb2a Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 9 Apr 2020 15:10:06 -0400 Subject: [PATCH 02/15] qemu-iotests: allow qcow2 external discarded clusters to contain stale data Test 244 checks the expected behavior of qcow2 external data files with respect to zero and discarded clusters. Filesystems however are free to ignore discard requests, and this seems to be the case for overlayfs. Relax the tests to skip checks on the external data file for discarded areas, which implies not using qemu-img compare in the data_file_raw=on case. This fixes docker tests on RHEL8. Cc: Kevin Wolf Cc: qemu-block@nongnu.org Signed-off-by: Paolo Bonzini Message-Id: <20200409191006.24429-1-pbonzini@redhat.com> Signed-off-by: Kevin Wolf --- tests/qemu-iotests/244 | 10 ++++++++-- tests/qemu-iotests/244.out | 9 ++++++--- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/tests/qemu-iotests/244 b/tests/qemu-iotests/244 index 2ec1815e6f..efe3c0428b 100755 --- a/tests/qemu-iotests/244 +++ b/tests/qemu-iotests/244 @@ -143,7 +143,6 @@ $QEMU_IO -c 'read -P 0 0 1M' \ echo $QEMU_IO -c 'read -P 0 0 1M' \ -c 'read -P 0x11 1M 1M' \ - -c 'read -P 0 2M 2M' \ -c 'read -P 0x11 4M 1M' \ -c 'read -P 0 5M 1M' \ -f raw "$TEST_IMG.data" | @@ -180,8 +179,15 @@ $QEMU_IO -c 'read -P 0 0 1M' \ -f $IMGFMT "$TEST_IMG" | _filter_qemu_io +# Discarded clusters are only marked as such in the qcow2 metadata, but +# they can contain stale data in the external data file. Instead, zero +# clusters must be zeroed in the external data file too. echo -$QEMU_IMG compare "$TEST_IMG" "$TEST_IMG.data" +$QEMU_IO -c 'read -P 0 0 1M' \ + -c 'read -P 0x11 1M 1M' \ + -c 'read -P 0 3M 3M' \ + -f raw "$TEST_IMG".data | + _filter_qemu_io echo -n "qcow2 file size after I/O: " du -b $TEST_IMG | cut -f1 diff --git a/tests/qemu-iotests/244.out b/tests/qemu-iotests/244.out index 56329deb4b..dbab7359a9 100644 --- a/tests/qemu-iotests/244.out +++ b/tests/qemu-iotests/244.out @@ -74,8 +74,6 @@ read 1048576/1048576 bytes at offset 0 1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) read 1048576/1048576 bytes at offset 1048576 1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -read 2097152/2097152 bytes at offset 2097152 -2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) read 1048576/1048576 bytes at offset 4194304 1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) read 1048576/1048576 bytes at offset 5242880 @@ -108,7 +106,12 @@ read 1048576/1048576 bytes at offset 1048576 read 4194304/4194304 bytes at offset 2097152 4 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -Images are identical. +read 1048576/1048576 bytes at offset 0 +1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 1048576/1048576 bytes at offset 1048576 +1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 3145728/3145728 bytes at offset 3145728 +3 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) qcow2 file size after I/O: 327680 === bdrv_co_block_status test for file and offset=0 === From 92b92799dc8662b6f71809100a4aabc1ae408ebb Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 24 Apr 2020 14:54:39 +0200 Subject: [PATCH 03/15] block: Add flags to BlockDriver.bdrv_co_truncate() This adds a new BdrvRequestFlags parameter to the .bdrv_co_truncate() driver callbacks, and a supported_truncate_flags field in BlockDriverState that allows drivers to advertise support for request flags in the context of truncate. For now, we always pass 0 and no drivers declare support for any flag. Signed-off-by: Kevin Wolf Reviewed-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Alberto Garcia Reviewed-by: Max Reitz Message-Id: <20200424125448.63318-2-kwolf@redhat.com> Signed-off-by: Kevin Wolf --- block/crypto.c | 3 ++- block/file-posix.c | 2 +- block/file-win32.c | 2 +- block/gluster.c | 1 + block/io.c | 8 +++++++- block/iscsi.c | 2 +- block/nfs.c | 3 ++- block/qcow2.c | 2 +- block/qed.c | 1 + block/raw-format.c | 2 +- block/rbd.c | 1 + block/sheepdog.c | 4 ++-- block/ssh.c | 2 +- include/block/block_int.h | 10 +++++++++- tests/test-block-iothread.c | 3 ++- 15 files changed, 33 insertions(+), 13 deletions(-) diff --git a/block/crypto.c b/block/crypto.c index d577f89659..3721a8495c 100644 --- a/block/crypto.c +++ b/block/crypto.c @@ -299,7 +299,8 @@ static int block_crypto_co_create_generic(BlockDriverState *bs, static int coroutine_fn block_crypto_co_truncate(BlockDriverState *bs, int64_t offset, bool exact, - PreallocMode prealloc, Error **errp) + PreallocMode prealloc, BdrvRequestFlags flags, + Error **errp) { BlockCrypto *crypto = bs->opaque; uint64_t payload_offset = diff --git a/block/file-posix.c b/block/file-posix.c index 094e3b0212..58326a0a60 100644 --- a/block/file-posix.c +++ b/block/file-posix.c @@ -2080,7 +2080,7 @@ raw_regular_truncate(BlockDriverState *bs, int fd, int64_t offset, static int coroutine_fn raw_co_truncate(BlockDriverState *bs, int64_t offset, bool exact, PreallocMode prealloc, - Error **errp) + BdrvRequestFlags flags, Error **errp) { BDRVRawState *s = bs->opaque; struct stat st; diff --git a/block/file-win32.c b/block/file-win32.c index 15859839a1..a6b0dda5c3 100644 --- a/block/file-win32.c +++ b/block/file-win32.c @@ -469,7 +469,7 @@ static void raw_close(BlockDriverState *bs) static int coroutine_fn raw_co_truncate(BlockDriverState *bs, int64_t offset, bool exact, PreallocMode prealloc, - Error **errp) + BdrvRequestFlags flags, Error **errp) { BDRVRawState *s = bs->opaque; LONG low, high; diff --git a/block/gluster.c b/block/gluster.c index 0aa1f2cda4..d06df900f6 100644 --- a/block/gluster.c +++ b/block/gluster.c @@ -1228,6 +1228,7 @@ static coroutine_fn int qemu_gluster_co_truncate(BlockDriverState *bs, int64_t offset, bool exact, PreallocMode prealloc, + BdrvRequestFlags flags, Error **errp) { BDRVGlusterState *s = bs->opaque; diff --git a/block/io.c b/block/io.c index aba67f66b9..04ac5cf023 100644 --- a/block/io.c +++ b/block/io.c @@ -3344,6 +3344,7 @@ int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact, BlockDriverState *bs = child->bs; BlockDriver *drv = bs->drv; BdrvTrackedRequest req; + BdrvRequestFlags flags = 0; int64_t old_size, new_bytes; int ret; @@ -3394,7 +3395,12 @@ int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact, } if (drv->bdrv_co_truncate) { - ret = drv->bdrv_co_truncate(bs, offset, exact, prealloc, errp); + if (flags & ~bs->supported_truncate_flags) { + error_setg(errp, "Block driver does not support requested flags"); + ret = -ENOTSUP; + goto out; + } + ret = drv->bdrv_co_truncate(bs, offset, exact, prealloc, flags, errp); } else if (bs->file && drv->is_filter) { ret = bdrv_co_truncate(bs->file, offset, exact, prealloc, errp); } else { diff --git a/block/iscsi.c b/block/iscsi.c index 0b4b7210df..914a1de9fb 100644 --- a/block/iscsi.c +++ b/block/iscsi.c @@ -2124,7 +2124,7 @@ static void iscsi_reopen_commit(BDRVReopenState *reopen_state) static int coroutine_fn iscsi_co_truncate(BlockDriverState *bs, int64_t offset, bool exact, PreallocMode prealloc, - Error **errp) + BdrvRequestFlags flags, Error **errp) { IscsiLun *iscsilun = bs->opaque; int64_t cur_length; diff --git a/block/nfs.c b/block/nfs.c index cc2413d5ab..2393fbfe6b 100644 --- a/block/nfs.c +++ b/block/nfs.c @@ -755,7 +755,8 @@ static int64_t nfs_get_allocated_file_size(BlockDriverState *bs) static int coroutine_fn nfs_file_co_truncate(BlockDriverState *bs, int64_t offset, bool exact, - PreallocMode prealloc, Error **errp) + PreallocMode prealloc, BdrvRequestFlags flags, + Error **errp) { NFSClient *client = bs->opaque; int ret; diff --git a/block/qcow2.c b/block/qcow2.c index b524b0c53f..0b406b22fb 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -3964,7 +3964,7 @@ fail: static int coroutine_fn qcow2_co_truncate(BlockDriverState *bs, int64_t offset, bool exact, PreallocMode prealloc, - Error **errp) + BdrvRequestFlags flags, Error **errp) { BDRVQcow2State *s = bs->opaque; uint64_t old_length; diff --git a/block/qed.c b/block/qed.c index 1af9b3cb1d..fb6100bd20 100644 --- a/block/qed.c +++ b/block/qed.c @@ -1467,6 +1467,7 @@ static int coroutine_fn bdrv_qed_co_truncate(BlockDriverState *bs, int64_t offset, bool exact, PreallocMode prealloc, + BdrvRequestFlags flags, Error **errp) { BDRVQEDState *s = bs->opaque; diff --git a/block/raw-format.c b/block/raw-format.c index 93b25e1b6b..9331368f43 100644 --- a/block/raw-format.c +++ b/block/raw-format.c @@ -371,7 +371,7 @@ static void raw_refresh_limits(BlockDriverState *bs, Error **errp) static int coroutine_fn raw_co_truncate(BlockDriverState *bs, int64_t offset, bool exact, PreallocMode prealloc, - Error **errp) + BdrvRequestFlags flags, Error **errp) { BDRVRawState *s = bs->opaque; diff --git a/block/rbd.c b/block/rbd.c index e637639a07..f2d52091c7 100644 --- a/block/rbd.c +++ b/block/rbd.c @@ -1108,6 +1108,7 @@ static int coroutine_fn qemu_rbd_co_truncate(BlockDriverState *bs, int64_t offset, bool exact, PreallocMode prealloc, + BdrvRequestFlags flags, Error **errp) { int r; diff --git a/block/sheepdog.c b/block/sheepdog.c index 5f3aead038..76729f40a4 100644 --- a/block/sheepdog.c +++ b/block/sheepdog.c @@ -2281,7 +2281,7 @@ static int64_t sd_getlength(BlockDriverState *bs) static int coroutine_fn sd_co_truncate(BlockDriverState *bs, int64_t offset, bool exact, PreallocMode prealloc, - Error **errp) + BdrvRequestFlags flags, Error **errp) { BDRVSheepdogState *s = bs->opaque; int ret, fd; @@ -2597,7 +2597,7 @@ static coroutine_fn int sd_co_writev(BlockDriverState *bs, int64_t sector_num, assert(!flags); if (offset > s->inode.vdi_size) { - ret = sd_co_truncate(bs, offset, false, PREALLOC_MODE_OFF, NULL); + ret = sd_co_truncate(bs, offset, false, PREALLOC_MODE_OFF, 0, NULL); if (ret < 0) { return ret; } diff --git a/block/ssh.c b/block/ssh.c index 84e92821c0..9eb33df859 100644 --- a/block/ssh.c +++ b/block/ssh.c @@ -1298,7 +1298,7 @@ static int64_t ssh_getlength(BlockDriverState *bs) static int coroutine_fn ssh_co_truncate(BlockDriverState *bs, int64_t offset, bool exact, PreallocMode prealloc, - Error **errp) + BdrvRequestFlags flags, Error **errp) { BDRVSSHState *s = bs->opaque; diff --git a/include/block/block_int.h b/include/block/block_int.h index 4c3587ea19..92335f33c7 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -355,7 +355,7 @@ struct BlockDriver { */ int coroutine_fn (*bdrv_co_truncate)(BlockDriverState *bs, int64_t offset, bool exact, PreallocMode prealloc, - Error **errp); + BdrvRequestFlags flags, Error **errp); int64_t (*bdrv_getlength)(BlockDriverState *bs); bool has_variable_length; @@ -847,6 +847,14 @@ struct BlockDriverState { /* Flags honored during pwrite_zeroes (so far: BDRV_REQ_FUA, * BDRV_REQ_MAY_UNMAP, BDRV_REQ_WRITE_UNCHANGED) */ unsigned int supported_zero_flags; + /* + * Flags honoured during truncate (so far: BDRV_REQ_ZERO_WRITE). + * + * If BDRV_REQ_ZERO_WRITE is given, the truncate operation must make sure + * that any added space reads as all zeros. If this can't be guaranteed, + * the operation must fail. + */ + unsigned int supported_truncate_flags; /* the following member gives a name to every node on the bs graph. */ char node_name[32]; diff --git a/tests/test-block-iothread.c b/tests/test-block-iothread.c index 0c861809f0..2f3b76323d 100644 --- a/tests/test-block-iothread.c +++ b/tests/test-block-iothread.c @@ -46,7 +46,8 @@ static int coroutine_fn bdrv_test_co_pdiscard(BlockDriverState *bs, static int coroutine_fn bdrv_test_co_truncate(BlockDriverState *bs, int64_t offset, bool exact, - PreallocMode prealloc, Error **errp) + PreallocMode prealloc, BdrvRequestFlags flags, + Error **errp) { return 0; } From 7b8e4857426f2e2de2441749996c6161b550bada Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 24 Apr 2020 14:54:40 +0200 Subject: [PATCH 04/15] block: Add flags to bdrv(_co)_truncate() Now that block drivers can support flags for .bdrv_co_truncate, expose the parameter in the node level interfaces bdrv_co_truncate() and bdrv_truncate(). Signed-off-by: Kevin Wolf Reviewed-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Alberto Garcia Reviewed-by: Max Reitz Message-Id: <20200424125448.63318-3-kwolf@redhat.com> Signed-off-by: Kevin Wolf --- block/block-backend.c | 2 +- block/crypto.c | 2 +- block/io.c | 12 +++++++----- block/parallels.c | 6 +++--- block/qcow.c | 4 ++-- block/qcow2-refcount.c | 2 +- block/qcow2.c | 15 +++++++++------ block/raw-format.c | 2 +- block/vhdx-log.c | 2 +- block/vhdx.c | 2 +- block/vmdk.c | 2 +- include/block/block.h | 5 +++-- tests/test-block-iothread.c | 6 +++--- 13 files changed, 34 insertions(+), 28 deletions(-) diff --git a/block/block-backend.c b/block/block-backend.c index 38ae413826..8be20060d3 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -2144,7 +2144,7 @@ int blk_truncate(BlockBackend *blk, int64_t offset, bool exact, return -ENOMEDIUM; } - return bdrv_truncate(blk->root, offset, exact, prealloc, errp); + return bdrv_truncate(blk->root, offset, exact, prealloc, 0, errp); } int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf, diff --git a/block/crypto.c b/block/crypto.c index 3721a8495c..ab33545c92 100644 --- a/block/crypto.c +++ b/block/crypto.c @@ -313,7 +313,7 @@ block_crypto_co_truncate(BlockDriverState *bs, int64_t offset, bool exact, offset += payload_offset; - return bdrv_co_truncate(bs->file, offset, exact, prealloc, errp); + return bdrv_co_truncate(bs->file, offset, exact, prealloc, 0, errp); } static void block_crypto_close(BlockDriverState *bs) diff --git a/block/io.c b/block/io.c index 04ac5cf023..795075954e 100644 --- a/block/io.c +++ b/block/io.c @@ -3339,12 +3339,12 @@ static void bdrv_parent_cb_resize(BlockDriverState *bs) * 'offset' bytes in length. */ int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact, - PreallocMode prealloc, Error **errp) + PreallocMode prealloc, BdrvRequestFlags flags, + Error **errp) { BlockDriverState *bs = child->bs; BlockDriver *drv = bs->drv; BdrvTrackedRequest req; - BdrvRequestFlags flags = 0; int64_t old_size, new_bytes; int ret; @@ -3402,7 +3402,7 @@ int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact, } ret = drv->bdrv_co_truncate(bs, offset, exact, prealloc, flags, errp); } else if (bs->file && drv->is_filter) { - ret = bdrv_co_truncate(bs->file, offset, exact, prealloc, errp); + ret = bdrv_co_truncate(bs->file, offset, exact, prealloc, flags, errp); } else { error_setg(errp, "Image format driver does not support resize"); ret = -ENOTSUP; @@ -3435,6 +3435,7 @@ typedef struct TruncateCo { int64_t offset; bool exact; PreallocMode prealloc; + BdrvRequestFlags flags; Error **errp; int ret; } TruncateCo; @@ -3443,12 +3444,12 @@ static void coroutine_fn bdrv_truncate_co_entry(void *opaque) { TruncateCo *tco = opaque; tco->ret = bdrv_co_truncate(tco->child, tco->offset, tco->exact, - tco->prealloc, tco->errp); + tco->prealloc, tco->flags, tco->errp); aio_wait_kick(); } int bdrv_truncate(BdrvChild *child, int64_t offset, bool exact, - PreallocMode prealloc, Error **errp) + PreallocMode prealloc, BdrvRequestFlags flags, Error **errp) { Coroutine *co; TruncateCo tco = { @@ -3456,6 +3457,7 @@ int bdrv_truncate(BdrvChild *child, int64_t offset, bool exact, .offset = offset, .exact = exact, .prealloc = prealloc, + .flags = flags, .errp = errp, .ret = NOT_DONE, }; diff --git a/block/parallels.c b/block/parallels.c index 6d4ed77f16..2be92cf417 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -203,7 +203,7 @@ static int64_t allocate_clusters(BlockDriverState *bs, int64_t sector_num, } else { ret = bdrv_truncate(bs->file, (s->data_end + space) << BDRV_SECTOR_BITS, - false, PREALLOC_MODE_OFF, NULL); + false, PREALLOC_MODE_OFF, 0, NULL); } if (ret < 0) { return ret; @@ -493,7 +493,7 @@ static int coroutine_fn parallels_co_check(BlockDriverState *bs, * That means we have to pass exact=true. */ ret = bdrv_truncate(bs->file, res->image_end_offset, true, - PREALLOC_MODE_OFF, &local_err); + PREALLOC_MODE_OFF, 0, &local_err); if (ret < 0) { error_report_err(local_err); res->check_errors++; @@ -889,7 +889,7 @@ static void parallels_close(BlockDriverState *bs) /* errors are ignored, so we might as well pass exact=true */ bdrv_truncate(bs->file, s->data_end << BDRV_SECTOR_BITS, true, - PREALLOC_MODE_OFF, NULL); + PREALLOC_MODE_OFF, 0, NULL); } g_free(s->bat_dirty_bmap); diff --git a/block/qcow.c b/block/qcow.c index 8973e4e565..6b5f2269f0 100644 --- a/block/qcow.c +++ b/block/qcow.c @@ -480,7 +480,7 @@ static int get_cluster_offset(BlockDriverState *bs, return -E2BIG; } ret = bdrv_truncate(bs->file, cluster_offset + s->cluster_size, - false, PREALLOC_MODE_OFF, NULL); + false, PREALLOC_MODE_OFF, 0, NULL); if (ret < 0) { return ret; } @@ -1035,7 +1035,7 @@ static int qcow_make_empty(BlockDriverState *bs) l1_length) < 0) return -1; ret = bdrv_truncate(bs->file, s->l1_table_offset + l1_length, false, - PREALLOC_MODE_OFF, NULL); + PREALLOC_MODE_OFF, 0, NULL); if (ret < 0) return ret; diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c index 7ef1c0e42a..d9650b9b6c 100644 --- a/block/qcow2-refcount.c +++ b/block/qcow2-refcount.c @@ -2018,7 +2018,7 @@ static int check_refblocks(BlockDriverState *bs, BdrvCheckResult *res, } ret = bdrv_truncate(bs->file, offset + s->cluster_size, false, - PREALLOC_MODE_OFF, &local_err); + PREALLOC_MODE_OFF, 0, &local_err); if (ret < 0) { error_report_err(local_err); goto resize_fail; diff --git a/block/qcow2.c b/block/qcow2.c index 0b406b22fb..c5b0711357 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -3095,7 +3095,7 @@ static int coroutine_fn preallocate_co(BlockDriverState *bs, uint64_t offset, mode = PREALLOC_MODE_OFF; } ret = bdrv_co_truncate(s->data_file, host_offset + cur_bytes, false, - mode, errp); + mode, 0, errp); if (ret < 0) { return ret; } @@ -4061,7 +4061,7 @@ static int coroutine_fn qcow2_co_truncate(BlockDriverState *bs, int64_t offset, * always fulfilled, so there is no need to pass it on.) */ bdrv_co_truncate(bs->file, (last_cluster + 1) * s->cluster_size, - false, PREALLOC_MODE_OFF, &local_err); + false, PREALLOC_MODE_OFF, 0, &local_err); if (local_err) { warn_reportf_err(local_err, "Failed to truncate the tail of the image: "); @@ -4083,7 +4083,8 @@ static int coroutine_fn qcow2_co_truncate(BlockDriverState *bs, int64_t offset, * file should be resized to the exact target size, too, * so we pass @exact here. */ - ret = bdrv_co_truncate(s->data_file, offset, exact, prealloc, errp); + ret = bdrv_co_truncate(s->data_file, offset, exact, prealloc, 0, + errp); if (ret < 0) { goto fail; } @@ -4169,7 +4170,8 @@ static int coroutine_fn qcow2_co_truncate(BlockDriverState *bs, int64_t offset, new_file_size = allocation_start + nb_new_data_clusters * s->cluster_size; /* Image file grows, so @exact does not matter */ - ret = bdrv_co_truncate(bs->file, new_file_size, false, prealloc, errp); + ret = bdrv_co_truncate(bs->file, new_file_size, false, prealloc, 0, + errp); if (ret < 0) { error_prepend(errp, "Failed to resize underlying file: "); qcow2_free_clusters(bs, allocation_start, @@ -4348,7 +4350,8 @@ qcow2_co_pwritev_compressed_part(BlockDriverState *bs, if (len < 0) { return len; } - return bdrv_co_truncate(bs->file, len, false, PREALLOC_MODE_OFF, NULL); + return bdrv_co_truncate(bs->file, len, false, PREALLOC_MODE_OFF, 0, + NULL); } if (offset_into_cluster(s, offset)) { @@ -4563,7 +4566,7 @@ static int make_completely_empty(BlockDriverState *bs) } ret = bdrv_truncate(bs->file, (3 + l1_clusters) * s->cluster_size, false, - PREALLOC_MODE_OFF, &local_err); + PREALLOC_MODE_OFF, 0, &local_err); if (ret < 0) { error_report_err(local_err); goto fail; diff --git a/block/raw-format.c b/block/raw-format.c index 9331368f43..3465c9a865 100644 --- a/block/raw-format.c +++ b/block/raw-format.c @@ -387,7 +387,7 @@ static int coroutine_fn raw_co_truncate(BlockDriverState *bs, int64_t offset, s->size = offset; offset += s->offset; - return bdrv_co_truncate(bs->file, offset, exact, prealloc, errp); + return bdrv_co_truncate(bs->file, offset, exact, prealloc, 0, errp); } static void raw_eject(BlockDriverState *bs, bool eject_flag) diff --git a/block/vhdx-log.c b/block/vhdx-log.c index 13a49c2a33..404fb5f3cb 100644 --- a/block/vhdx-log.c +++ b/block/vhdx-log.c @@ -558,7 +558,7 @@ static int vhdx_log_flush(BlockDriverState *bs, BDRVVHDXState *s, goto exit; } ret = bdrv_truncate(bs->file, new_file_size, false, - PREALLOC_MODE_OFF, NULL); + PREALLOC_MODE_OFF, 0, NULL); if (ret < 0) { goto exit; } diff --git a/block/vhdx.c b/block/vhdx.c index e16fdc2f2d..3a33eda99c 100644 --- a/block/vhdx.c +++ b/block/vhdx.c @@ -1264,7 +1264,7 @@ static int vhdx_allocate_block(BlockDriverState *bs, BDRVVHDXState *s, } return bdrv_truncate(bs->file, *new_offset + s->block_size, false, - PREALLOC_MODE_OFF, NULL); + PREALLOC_MODE_OFF, 0, NULL); } /* diff --git a/block/vmdk.c b/block/vmdk.c index 218d9c9800..5de99fe813 100644 --- a/block/vmdk.c +++ b/block/vmdk.c @@ -2077,7 +2077,7 @@ vmdk_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset, } length = QEMU_ALIGN_UP(length, BDRV_SECTOR_SIZE); ret = bdrv_truncate(s->extents[i].file, length, false, - PREALLOC_MODE_OFF, NULL); + PREALLOC_MODE_OFF, 0, NULL); if (ret < 0) { return ret; } diff --git a/include/block/block.h b/include/block/block.h index b05995fe9c..8b62429aa4 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -339,9 +339,10 @@ BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs, void bdrv_refresh_filename(BlockDriverState *bs); int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact, - PreallocMode prealloc, Error **errp); + PreallocMode prealloc, BdrvRequestFlags flags, + Error **errp); int bdrv_truncate(BdrvChild *child, int64_t offset, bool exact, - PreallocMode prealloc, Error **errp); + PreallocMode prealloc, BdrvRequestFlags flags, Error **errp); int64_t bdrv_nb_sectors(BlockDriverState *bs); int64_t bdrv_getlength(BlockDriverState *bs); diff --git a/tests/test-block-iothread.c b/tests/test-block-iothread.c index 2f3b76323d..71e9bce3b1 100644 --- a/tests/test-block-iothread.c +++ b/tests/test-block-iothread.c @@ -186,18 +186,18 @@ static void test_sync_op_truncate(BdrvChild *c) int ret; /* Normal success path */ - ret = bdrv_truncate(c, 65536, false, PREALLOC_MODE_OFF, NULL); + ret = bdrv_truncate(c, 65536, false, PREALLOC_MODE_OFF, 0, NULL); g_assert_cmpint(ret, ==, 0); /* Early error: Negative offset */ - ret = bdrv_truncate(c, -2, false, PREALLOC_MODE_OFF, NULL); + ret = bdrv_truncate(c, -2, false, PREALLOC_MODE_OFF, 0, NULL); g_assert_cmpint(ret, ==, -EINVAL); /* Error: Read-only image */ c->bs->read_only = true; c->bs->open_flags &= ~BDRV_O_RDWR; - ret = bdrv_truncate(c, 65536, false, PREALLOC_MODE_OFF, NULL); + ret = bdrv_truncate(c, 65536, false, PREALLOC_MODE_OFF, 0, NULL); g_assert_cmpint(ret, ==, -EACCES); c->bs->read_only = false; From 8c6242b6f383e43fd11d2c50f8bcdd2bba1100fc Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 24 Apr 2020 14:54:41 +0200 Subject: [PATCH 05/15] block-backend: Add flags to blk_truncate() Now that node level interface bdrv_truncate() supports passing request flags to the block driver, expose this on the BlockBackend level, too. Signed-off-by: Kevin Wolf Reviewed-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Alberto Garcia Reviewed-by: Max Reitz Message-Id: <20200424125448.63318-4-kwolf@redhat.com> Signed-off-by: Kevin Wolf --- block.c | 3 ++- block/block-backend.c | 4 ++-- block/commit.c | 4 ++-- block/crypto.c | 2 +- block/mirror.c | 2 +- block/qcow2.c | 4 ++-- block/qed.c | 2 +- block/vdi.c | 2 +- block/vhdx.c | 4 ++-- block/vmdk.c | 6 +++--- block/vpc.c | 2 +- blockdev.c | 2 +- include/sysemu/block-backend.h | 2 +- qemu-img.c | 2 +- qemu-io-cmds.c | 2 +- 15 files changed, 22 insertions(+), 21 deletions(-) diff --git a/block.c b/block.c index c11385ae05..301ec588bd 100644 --- a/block.c +++ b/block.c @@ -548,7 +548,8 @@ static int64_t create_file_fallback_truncate(BlockBackend *blk, int64_t size; int ret; - ret = blk_truncate(blk, minimum_size, false, PREALLOC_MODE_OFF, &local_err); + ret = blk_truncate(blk, minimum_size, false, PREALLOC_MODE_OFF, 0, + &local_err); if (ret < 0 && ret != -ENOTSUP) { error_propagate(errp, local_err); return ret; diff --git a/block/block-backend.c b/block/block-backend.c index 8be20060d3..17ed6d8c5b 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -2137,14 +2137,14 @@ int blk_pwrite_compressed(BlockBackend *blk, int64_t offset, const void *buf, } int blk_truncate(BlockBackend *blk, int64_t offset, bool exact, - PreallocMode prealloc, Error **errp) + PreallocMode prealloc, BdrvRequestFlags flags, Error **errp) { if (!blk_is_available(blk)) { error_setg(errp, "No medium inserted"); return -ENOMEDIUM; } - return bdrv_truncate(blk->root, offset, exact, prealloc, 0, errp); + return bdrv_truncate(blk->root, offset, exact, prealloc, flags, errp); } int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf, diff --git a/block/commit.c b/block/commit.c index 8e672799af..87f6096d90 100644 --- a/block/commit.c +++ b/block/commit.c @@ -133,7 +133,7 @@ static int coroutine_fn commit_run(Job *job, Error **errp) } if (base_len < len) { - ret = blk_truncate(s->base, len, false, PREALLOC_MODE_OFF, NULL); + ret = blk_truncate(s->base, len, false, PREALLOC_MODE_OFF, 0, NULL); if (ret) { goto out; } @@ -458,7 +458,7 @@ int bdrv_commit(BlockDriverState *bs) * grow the backing file image if possible. If not possible, * we must return an error */ if (length > backing_length) { - ret = blk_truncate(backing, length, false, PREALLOC_MODE_OFF, + ret = blk_truncate(backing, length, false, PREALLOC_MODE_OFF, 0, &local_err); if (ret < 0) { error_report_err(local_err); diff --git a/block/crypto.c b/block/crypto.c index ab33545c92..e02f343590 100644 --- a/block/crypto.c +++ b/block/crypto.c @@ -115,7 +115,7 @@ static ssize_t block_crypto_init_func(QCryptoBlock *block, * which will be used by the crypto header */ return blk_truncate(data->blk, data->size + headerlen, false, - data->prealloc, errp); + data->prealloc, 0, errp); } diff --git a/block/mirror.c b/block/mirror.c index c26fd9260d..aca95c9bc9 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -900,7 +900,7 @@ static int coroutine_fn mirror_run(Job *job, Error **errp) if (s->bdev_length > base_length) { ret = blk_truncate(s->target, s->bdev_length, false, - PREALLOC_MODE_OFF, NULL); + PREALLOC_MODE_OFF, 0, NULL); if (ret < 0) { goto immediate_exit; } diff --git a/block/qcow2.c b/block/qcow2.c index c5b0711357..9cfbdfc939 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -3511,7 +3511,7 @@ qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp) /* Okay, now that we have a valid image, let's give it the right size */ ret = blk_truncate(blk, qcow2_opts->size, false, qcow2_opts->preallocation, - errp); + 0, errp); if (ret < 0) { error_prepend(errp, "Could not resize image: "); goto out; @@ -5374,7 +5374,7 @@ static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts, * Amending image options should ensure that the image has * exactly the given new values, so pass exact=true here. */ - ret = blk_truncate(blk, new_size, true, PREALLOC_MODE_OFF, errp); + ret = blk_truncate(blk, new_size, true, PREALLOC_MODE_OFF, 0, errp); blk_unref(blk); if (ret < 0) { return ret; diff --git a/block/qed.c b/block/qed.c index fb6100bd20..b0fdb8f565 100644 --- a/block/qed.c +++ b/block/qed.c @@ -677,7 +677,7 @@ static int coroutine_fn bdrv_qed_co_create(BlockdevCreateOptions *opts, * The QED format associates file length with allocation status, * so a new file (which is empty) must have a length of 0. */ - ret = blk_truncate(blk, 0, true, PREALLOC_MODE_OFF, errp); + ret = blk_truncate(blk, 0, true, PREALLOC_MODE_OFF, 0, errp); if (ret < 0) { goto out; } diff --git a/block/vdi.c b/block/vdi.c index e1a11f2aa0..0c7835ae70 100644 --- a/block/vdi.c +++ b/block/vdi.c @@ -875,7 +875,7 @@ static int coroutine_fn vdi_co_do_create(BlockdevCreateOptions *create_options, if (image_type == VDI_TYPE_STATIC) { ret = blk_truncate(blk, offset + blocks * block_size, false, - PREALLOC_MODE_OFF, errp); + PREALLOC_MODE_OFF, 0, errp); if (ret < 0) { error_prepend(errp, "Failed to statically allocate file"); goto exit; diff --git a/block/vhdx.c b/block/vhdx.c index 3a33eda99c..45be0a4321 100644 --- a/block/vhdx.c +++ b/block/vhdx.c @@ -1703,13 +1703,13 @@ static int vhdx_create_bat(BlockBackend *blk, BDRVVHDXState *s, /* All zeroes, so we can just extend the file - the end of the BAT * is the furthest thing we have written yet */ ret = blk_truncate(blk, data_file_offset, false, PREALLOC_MODE_OFF, - errp); + 0, errp); if (ret < 0) { goto exit; } } else if (type == VHDX_TYPE_FIXED) { ret = blk_truncate(blk, data_file_offset + image_size, false, - PREALLOC_MODE_OFF, errp); + PREALLOC_MODE_OFF, 0, errp); if (ret < 0) { goto exit; } diff --git a/block/vmdk.c b/block/vmdk.c index 5de99fe813..8ec18f35a5 100644 --- a/block/vmdk.c +++ b/block/vmdk.c @@ -2118,7 +2118,7 @@ static int vmdk_init_extent(BlockBackend *blk, int gd_buf_size; if (flat) { - ret = blk_truncate(blk, filesize, false, PREALLOC_MODE_OFF, errp); + ret = blk_truncate(blk, filesize, false, PREALLOC_MODE_OFF, 0, errp); goto exit; } magic = cpu_to_be32(VMDK4_MAGIC); @@ -2182,7 +2182,7 @@ static int vmdk_init_extent(BlockBackend *blk, } ret = blk_truncate(blk, le64_to_cpu(header.grain_offset) << 9, false, - PREALLOC_MODE_OFF, errp); + PREALLOC_MODE_OFF, 0, errp); if (ret < 0) { goto exit; } @@ -2523,7 +2523,7 @@ static int coroutine_fn vmdk_co_do_create(int64_t size, /* bdrv_pwrite write padding zeros to align to sector, we don't need that * for description file */ if (desc_offset == 0) { - ret = blk_truncate(blk, desc_len, false, PREALLOC_MODE_OFF, errp); + ret = blk_truncate(blk, desc_len, false, PREALLOC_MODE_OFF, 0, errp); if (ret < 0) { goto exit; } diff --git a/block/vpc.c b/block/vpc.c index d8141b52da..2d1eade146 100644 --- a/block/vpc.c +++ b/block/vpc.c @@ -898,7 +898,7 @@ static int create_fixed_disk(BlockBackend *blk, uint8_t *buf, /* Add footer to total size */ total_size += HEADER_SIZE; - ret = blk_truncate(blk, total_size, false, PREALLOC_MODE_OFF, errp); + ret = blk_truncate(blk, total_size, false, PREALLOC_MODE_OFF, 0, errp); if (ret < 0) { return ret; } diff --git a/blockdev.c b/blockdev.c index 9da960b1e7..dc1a0c7c2f 100644 --- a/blockdev.c +++ b/blockdev.c @@ -2741,7 +2741,7 @@ void qmp_block_resize(bool has_device, const char *device, } bdrv_drained_begin(bs); - ret = blk_truncate(blk, size, false, PREALLOC_MODE_OFF, errp); + ret = blk_truncate(blk, size, false, PREALLOC_MODE_OFF, 0, errp); bdrv_drained_end(bs); out: diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h index 9bbdbd63d7..34de7faa81 100644 --- a/include/sysemu/block-backend.h +++ b/include/sysemu/block-backend.h @@ -237,7 +237,7 @@ int coroutine_fn blk_co_pwrite_zeroes(BlockBackend *blk, int64_t offset, int blk_pwrite_compressed(BlockBackend *blk, int64_t offset, const void *buf, int bytes); int blk_truncate(BlockBackend *blk, int64_t offset, bool exact, - PreallocMode prealloc, Error **errp); + PreallocMode prealloc, BdrvRequestFlags flags, Error **errp); int blk_pdiscard(BlockBackend *blk, int64_t offset, int bytes); int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf, int64_t pos, int size); diff --git a/qemu-img.c b/qemu-img.c index a2369766f0..6a4327aaba 100644 --- a/qemu-img.c +++ b/qemu-img.c @@ -3897,7 +3897,7 @@ static int img_resize(int argc, char **argv) * resizing, so pass @exact=true. It is of no use to report * success when the image has not actually been resized. */ - ret = blk_truncate(blk, total_size, true, prealloc, &err); + ret = blk_truncate(blk, total_size, true, prealloc, 0, &err); if (!ret) { qprintf(quiet, "Image resized.\n"); } else { diff --git a/qemu-io-cmds.c b/qemu-io-cmds.c index 1b7e700020..851f07e8f8 100644 --- a/qemu-io-cmds.c +++ b/qemu-io-cmds.c @@ -1715,7 +1715,7 @@ static int truncate_f(BlockBackend *blk, int argc, char **argv) * exact=true. It is better to err on the "emit more errors" side * than to be overly permissive. */ - ret = blk_truncate(blk, offset, true, PREALLOC_MODE_OFF, &local_err); + ret = blk_truncate(blk, offset, true, PREALLOC_MODE_OFF, 0, &local_err); if (ret < 0) { error_report_err(local_err); return ret; From f01643fb8b47e8a70c04bbf45e0f12a9e5bc54de Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 24 Apr 2020 14:54:42 +0200 Subject: [PATCH 06/15] qcow2: Support BDRV_REQ_ZERO_WRITE for truncate If BDRV_REQ_ZERO_WRITE is set and we're extending the image, calling qcow2_cluster_zeroize() with flags=0 does the right thing: It doesn't undo any previous preallocation, but just adds the zero flag to all relevant L2 entries. If an external data file is in use, a write_zeroes request to the data file is made instead. Signed-off-by: Kevin Wolf Message-Id: <20200424125448.63318-5-kwolf@redhat.com> Reviewed-by: Eric Blake Reviewed-by: Max Reitz Signed-off-by: Kevin Wolf --- block/qcow2-cluster.c | 2 +- block/qcow2.c | 34 ++++++++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+), 1 deletion(-) diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c index 17f1363279..4b5fc8c4a7 100644 --- a/block/qcow2-cluster.c +++ b/block/qcow2-cluster.c @@ -1795,7 +1795,7 @@ int qcow2_cluster_zeroize(BlockDriverState *bs, uint64_t offset, /* Caller must pass aligned values, except at image end */ assert(QEMU_IS_ALIGNED(offset, s->cluster_size)); assert(QEMU_IS_ALIGNED(end_offset, s->cluster_size) || - end_offset == bs->total_sectors << BDRV_SECTOR_BITS); + end_offset >= bs->total_sectors << BDRV_SECTOR_BITS); /* The zero flag is only supported by version 3 and newer */ if (s->qcow_version < 3) { diff --git a/block/qcow2.c b/block/qcow2.c index 9cfbdfc939..98065d7808 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -1726,6 +1726,7 @@ static int coroutine_fn qcow2_do_open(BlockDriverState *bs, QDict *options, bs->supported_zero_flags = header.version >= 3 ? BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK : 0; + bs->supported_truncate_flags = BDRV_REQ_ZERO_WRITE; /* Repair image if dirty */ if (!(flags & (BDRV_O_CHECK | BDRV_O_INACTIVE)) && !bs->read_only && @@ -4214,6 +4215,39 @@ static int coroutine_fn qcow2_co_truncate(BlockDriverState *bs, int64_t offset, g_assert_not_reached(); } + if ((flags & BDRV_REQ_ZERO_WRITE) && offset > old_length) { + uint64_t zero_start = QEMU_ALIGN_UP(old_length, s->cluster_size); + + /* + * Use zero clusters as much as we can. qcow2_cluster_zeroize() + * requires a cluster-aligned start. The end may be unaligned if it is + * at the end of the image (which it is here). + */ + ret = qcow2_cluster_zeroize(bs, zero_start, offset - zero_start, 0); + if (ret < 0) { + error_setg_errno(errp, -ret, "Failed to zero out new clusters"); + goto fail; + } + + /* Write explicit zeros for the unaligned head */ + if (zero_start > old_length) { + uint64_t len = zero_start - old_length; + uint8_t *buf = qemu_blockalign0(bs, len); + QEMUIOVector qiov; + qemu_iovec_init_buf(&qiov, buf, len); + + qemu_co_mutex_unlock(&s->lock); + ret = qcow2_co_pwritev_part(bs, old_length, len, &qiov, 0, 0); + qemu_co_mutex_lock(&s->lock); + + qemu_vfree(buf); + if (ret < 0) { + error_setg_errno(errp, -ret, "Failed to zero out the new area"); + goto fail; + } + } + } + if (prealloc != PREALLOC_MODE_OFF) { /* Flush metadata before actually changing the image size */ ret = qcow2_write_caches(bs); From 1ddaabaecb7eaeb6d8948a32340af95db44c54a1 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 24 Apr 2020 14:54:43 +0200 Subject: [PATCH 07/15] raw-format: Support BDRV_REQ_ZERO_WRITE for truncate The raw format driver can simply forward the flag and let its bs->file child take care of actually providing the zeros. Signed-off-by: Kevin Wolf Reviewed-by: Max Reitz Reviewed-by: Eric Blake Reviewed-by: Vladimir Sementsov-Ogievskiy Message-Id: <20200424125448.63318-6-kwolf@redhat.com> Signed-off-by: Kevin Wolf --- block/raw-format.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/block/raw-format.c b/block/raw-format.c index 3465c9a865..351f2d91c6 100644 --- a/block/raw-format.c +++ b/block/raw-format.c @@ -387,7 +387,7 @@ static int coroutine_fn raw_co_truncate(BlockDriverState *bs, int64_t offset, s->size = offset; offset += s->offset; - return bdrv_co_truncate(bs->file, offset, exact, prealloc, 0, errp); + return bdrv_co_truncate(bs->file, offset, exact, prealloc, flags, errp); } static void raw_eject(BlockDriverState *bs, bool eject_flag) @@ -445,6 +445,8 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags, bs->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED | ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) & bs->file->bs->supported_zero_flags); + bs->supported_truncate_flags = bs->file->bs->supported_truncate_flags & + BDRV_REQ_ZERO_WRITE; if (bs->probed && !bdrv_is_read_only(bs)) { bdrv_refresh_filename(bs->file->bs); From 2f0c6e7a650de133eccd94e9bb6cf7b2070f07f1 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 24 Apr 2020 14:54:44 +0200 Subject: [PATCH 08/15] file-posix: Support BDRV_REQ_ZERO_WRITE for truncate For regular files, we always get BDRV_REQ_ZERO_WRITE behaviour from the OS, so we can advertise the flag and just ignore it. Signed-off-by: Kevin Wolf Reviewed-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Alberto Garcia Reviewed-by: Max Reitz Message-Id: <20200424125448.63318-7-kwolf@redhat.com> Signed-off-by: Kevin Wolf --- block/file-posix.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/block/file-posix.c b/block/file-posix.c index 58326a0a60..bf09ad8bc0 100644 --- a/block/file-posix.c +++ b/block/file-posix.c @@ -702,6 +702,10 @@ static int raw_open_common(BlockDriverState *bs, QDict *options, #endif bs->supported_zero_flags = BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK; + if (S_ISREG(st.st_mode)) { + /* When extending regular files, we get zeros from the OS */ + bs->supported_truncate_flags = BDRV_REQ_ZERO_WRITE; + } ret = 0; fail: if (filename && (bdrv_flags & BDRV_O_TEMPORARY)) { From 955c7d6687fefcd903900a1e597fcbc896c661cd Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 24 Apr 2020 14:54:45 +0200 Subject: [PATCH 09/15] block: truncate: Don't make backing file data visible When extending the size of an image that has a backing file larger than its old size, make sure that the backing file data doesn't become visible in the guest, but the added area is properly zeroed out. Consider the following scenario where the overlay is shorter than its backing file: base.qcow2: AAAAAAAA overlay.qcow2: BBBB When resizing (extending) overlay.qcow2, the new blocks should not stay unallocated and make the additional As from base.qcow2 visible like before this patch, but zeros should be read. A similar case happens with the various variants of a commit job when an intermediate file is short (- for unallocated): base.qcow2: A-A-AAAA mid.qcow2: BB-B top.qcow2: C--C--C- After commit top.qcow2 to mid.qcow2, the following happens: mid.qcow2: CB-C00C0 (correct result) mid.qcow2: CB-C--C- (before this fix) Without the fix, blocks that previously read as zeros on top.qcow2 suddenly turn into A. Signed-off-by: Kevin Wolf Reviewed-by: Vladimir Sementsov-Ogievskiy Message-Id: <20200424125448.63318-8-kwolf@redhat.com> Reviewed-by: Max Reitz Signed-off-by: Kevin Wolf --- block/io.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/block/io.c b/block/io.c index 795075954e..a4f9714230 100644 --- a/block/io.c +++ b/block/io.c @@ -3394,6 +3394,31 @@ int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact, goto out; } + /* + * If the image has a backing file that is large enough that it would + * provide data for the new area, we cannot leave it unallocated because + * then the backing file content would become visible. Instead, zero-fill + * the new area. + * + * Note that if the image has a backing file, but was opened without the + * backing file, taking care of keeping things consistent with that backing + * file is the user's responsibility. + */ + if (new_bytes && bs->backing) { + int64_t backing_len; + + backing_len = bdrv_getlength(backing_bs(bs)); + if (backing_len < 0) { + ret = backing_len; + error_setg_errno(errp, -ret, "Could not get backing file size"); + goto out; + } + + if (backing_len > old_size) { + flags |= BDRV_REQ_ZERO_WRITE; + } + } + if (drv->bdrv_co_truncate) { if (flags & ~bs->supported_truncate_flags) { error_setg(errp, "Block driver does not support requested flags"); From fd586ce8bee50d98773436214dc9e644ddda54aa Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 24 Apr 2020 14:54:46 +0200 Subject: [PATCH 10/15] iotests: Filter testfiles out in filter_img_info() We want to keep TEST_IMG for the full path of the main test image, but filter_testfiles() must be called for other test images before replacing other things like the image format because the test directory path could contain the format as a substring. Insert a filter_testfiles() call between both. Signed-off-by: Kevin Wolf Reviewed-by: Max Reitz Reviewed-by: Vladimir Sementsov-Ogievskiy Message-Id: <20200424125448.63318-9-kwolf@redhat.com> Signed-off-by: Kevin Wolf --- tests/qemu-iotests/iotests.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py index 7bc4934cd2..5f8c263d59 100644 --- a/tests/qemu-iotests/iotests.py +++ b/tests/qemu-iotests/iotests.py @@ -338,8 +338,9 @@ def filter_img_info(output, filename): for line in output.split('\n'): if 'disk size' in line or 'actual-size' in line: continue - line = line.replace(filename, 'TEST_IMG') \ - .replace(imgfmt, 'IMGFMT') + line = line.replace(filename, 'TEST_IMG') + line = filter_testfiles(line) + line = line.replace(imgfmt, 'IMGFMT') line = re.sub('iters: [0-9]+', 'iters: XXX', line) line = re.sub('uuid: [-a-f0-9]+', 'uuid: XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX', line) line = re.sub('cid: [0-9]+', 'cid: XXXXXXXXXX', line) From bf03dede475e29a16f9188ea85a4d77cd3dcf2b7 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 24 Apr 2020 14:54:47 +0200 Subject: [PATCH 11/15] iotests: Test committing to short backing file Signed-off-by: Kevin Wolf Message-Id: <20200424125448.63318-10-kwolf@redhat.com> Reviewed-by: Max Reitz Reviewed-by: Vladimir Sementsov-Ogievskiy Signed-off-by: Kevin Wolf --- tests/qemu-iotests/274 | 155 +++++++++++++++++++++ tests/qemu-iotests/274.out | 268 +++++++++++++++++++++++++++++++++++++ tests/qemu-iotests/group | 1 + 3 files changed, 424 insertions(+) create mode 100755 tests/qemu-iotests/274 create mode 100644 tests/qemu-iotests/274.out diff --git a/tests/qemu-iotests/274 b/tests/qemu-iotests/274 new file mode 100755 index 0000000000..e951f723b8 --- /dev/null +++ b/tests/qemu-iotests/274 @@ -0,0 +1,155 @@ +#!/usr/bin/env python3 +# +# Copyright (C) 2019 Red Hat, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +# Creator/Owner: Kevin Wolf +# +# Some tests for short backing files and short overlays + +import iotests + +iotests.verify_image_format(supported_fmts=['qcow2']) +iotests.verify_platform(['linux']) + +size_short = 1 * 1024 * 1024 +size_long = 2 * 1024 * 1024 +size_diff = size_long - size_short + +def create_chain() -> None: + iotests.qemu_img_log('create', '-f', iotests.imgfmt, base, + str(size_long)) + iotests.qemu_img_log('create', '-f', iotests.imgfmt, '-b', base, mid, + str(size_short)) + iotests.qemu_img_log('create', '-f', iotests.imgfmt, '-b', mid, top, + str(size_long)) + + iotests.qemu_io_log('-c', 'write -P 1 0 %d' % size_long, base) + +def create_vm() -> iotests.VM: + vm = iotests.VM() + vm.add_blockdev('file,filename=%s,node-name=base-file' % base) + vm.add_blockdev('%s,file=base-file,node-name=base' % iotests.imgfmt) + vm.add_blockdev('file,filename=%s,node-name=mid-file' % mid) + vm.add_blockdev('%s,file=mid-file,node-name=mid,backing=base' + % iotests.imgfmt) + vm.add_drive(top, 'backing=mid,node-name=top') + return vm + +with iotests.FilePath('base') as base, \ + iotests.FilePath('mid') as mid, \ + iotests.FilePath('top') as top: + + iotests.log('== Commit tests ==') + + create_chain() + + iotests.log('=== Check visible data ===') + + iotests.qemu_io_log('-c', 'read -P 1 0 %d' % size_short, top) + iotests.qemu_io_log('-c', 'read -P 0 %d %d' % (size_short, size_diff), top) + + iotests.log('=== Checking allocation status ===') + + iotests.qemu_io_log('-c', 'alloc 0 %d' % size_short, + '-c', 'alloc %d %d' % (size_short, size_diff), + base) + + iotests.qemu_io_log('-c', 'alloc 0 %d' % size_short, + '-c', 'alloc %d %d' % (size_short, size_diff), + mid) + + iotests.qemu_io_log('-c', 'alloc 0 %d' % size_short, + '-c', 'alloc %d %d' % (size_short, size_diff), + top) + + iotests.log('=== Checking map ===') + + iotests.qemu_img_log('map', '--output=json', base) + iotests.qemu_img_log('map', '--output=human', base) + iotests.qemu_img_log('map', '--output=json', mid) + iotests.qemu_img_log('map', '--output=human', mid) + iotests.qemu_img_log('map', '--output=json', top) + iotests.qemu_img_log('map', '--output=human', top) + + iotests.log('=== Testing qemu-img commit (top -> mid) ===') + + iotests.qemu_img_log('commit', top) + iotests.img_info_log(mid) + iotests.qemu_io_log('-c', 'read -P 1 0 %d' % size_short, mid) + iotests.qemu_io_log('-c', 'read -P 0 %d %d' % (size_short, size_diff), mid) + + iotests.log('=== Testing HMP commit (top -> mid) ===') + + create_chain() + with create_vm() as vm: + vm.launch() + vm.qmp_log('human-monitor-command', command_line='commit drive0') + + iotests.img_info_log(mid) + iotests.qemu_io_log('-c', 'read -P 1 0 %d' % size_short, mid) + iotests.qemu_io_log('-c', 'read -P 0 %d %d' % (size_short, size_diff), mid) + + iotests.log('=== Testing QMP active commit (top -> mid) ===') + + create_chain() + with create_vm() as vm: + vm.launch() + vm.qmp_log('block-commit', device='top', base_node='mid', + job_id='job0', auto_dismiss=False) + vm.run_job('job0', wait=5) + + iotests.img_info_log(mid) + iotests.qemu_io_log('-c', 'read -P 1 0 %d' % size_short, mid) + iotests.qemu_io_log('-c', 'read -P 0 %d %d' % (size_short, size_diff), mid) + + + iotests.log('== Resize tests ==') + + # Use different sizes for different allocation modes: + # + # We want to have at least one test where 32 bit truncation in the size of + # the overlapping area becomes visible. This is covered by the + # prealloc='off' case (1G to 6G is an overlap of 5G). + # + # However, we can only do this for modes that don't preallocate data + # because otherwise we might run out of space on the test host. + # + # We also want to test some unaligned combinations. + for (prealloc, base_size, top_size_old, top_size_new, off) in [ + ('off', '6G', '1G', '8G', '5G'), + ('metadata', '32G', '30G', '33G', '31G'), + ('falloc', '10M', '5M', '15M', '9M'), + ('full', '16M', '8M', '12M', '11M'), + ('off', '384k', '253k', '512k', '253k'), + ('off', '400k', '256k', '512k', '336k'), + ('off', '512k', '256k', '500k', '436k')]: + + iotests.log('=== preallocation=%s ===' % prealloc) + iotests.qemu_img_log('create', '-f', iotests.imgfmt, base, base_size) + iotests.qemu_img_log('create', '-f', iotests.imgfmt, '-b', base, top, + top_size_old) + iotests.qemu_io_log('-c', 'write -P 1 %s 64k' % off, base) + + # After this, top_size_old to base_size should be allocated/zeroed. + # + # In theory, leaving base_size to top_size_new unallocated would be + # correct, but in practice, if we zero out anything, we zero out + # everything up to top_size_new. + iotests.qemu_img_log('resize', '-f', iotests.imgfmt, + '--preallocation', prealloc, top, top_size_new) + iotests.qemu_io_log('-c', 'read -P 0 %s 64k' % off, top) + iotests.qemu_io_log('-c', 'map', top) + iotests.qemu_img_log('map', '--output=json', top) diff --git a/tests/qemu-iotests/274.out b/tests/qemu-iotests/274.out new file mode 100644 index 0000000000..1a796fd07c --- /dev/null +++ b/tests/qemu-iotests/274.out @@ -0,0 +1,268 @@ +== Commit tests == +Formatting 'TEST_DIR/PID-base', fmt=qcow2 size=2097152 cluster_size=65536 lazy_refcounts=off refcount_bits=16 + +Formatting 'TEST_DIR/PID-mid', fmt=qcow2 size=1048576 backing_file=TEST_DIR/PID-base cluster_size=65536 lazy_refcounts=off refcount_bits=16 + +Formatting 'TEST_DIR/PID-top', fmt=qcow2 size=2097152 backing_file=TEST_DIR/PID-mid cluster_size=65536 lazy_refcounts=off refcount_bits=16 + +wrote 2097152/2097152 bytes at offset 0 +2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + +=== Check visible data === +read 1048576/1048576 bytes at offset 0 +1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + +read 1048576/1048576 bytes at offset 1048576 +1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + +=== Checking allocation status === +1048576/1048576 bytes allocated at offset 0 bytes +1048576/1048576 bytes allocated at offset 1 MiB + +0/1048576 bytes allocated at offset 0 bytes +0/0 bytes allocated at offset 1 MiB + +0/1048576 bytes allocated at offset 0 bytes +0/1048576 bytes allocated at offset 1 MiB + +=== Checking map === +[{ "start": 0, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": 327680}] + +Offset Length Mapped to File +0 0x200000 0x50000 TEST_DIR/PID-base + +[{ "start": 0, "length": 1048576, "depth": 1, "zero": false, "data": true, "offset": 327680}] + +Offset Length Mapped to File +0 0x100000 0x50000 TEST_DIR/PID-base + +[{ "start": 0, "length": 1048576, "depth": 2, "zero": false, "data": true, "offset": 327680}, +{ "start": 1048576, "length": 1048576, "depth": 0, "zero": true, "data": false}] + +Offset Length Mapped to File +0 0x100000 0x50000 TEST_DIR/PID-base + +=== Testing qemu-img commit (top -> mid) === +Image committed. + +image: TEST_IMG +file format: IMGFMT +virtual size: 2 MiB (2097152 bytes) +cluster_size: 65536 +backing file: TEST_DIR/PID-base +Format specific information: + compat: 1.1 + lazy refcounts: false + refcount bits: 16 + corrupt: false + +read 1048576/1048576 bytes at offset 0 +1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + +read 1048576/1048576 bytes at offset 1048576 +1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + +=== Testing HMP commit (top -> mid) === +Formatting 'TEST_DIR/PID-base', fmt=qcow2 size=2097152 cluster_size=65536 lazy_refcounts=off refcount_bits=16 + +Formatting 'TEST_DIR/PID-mid', fmt=qcow2 size=1048576 backing_file=TEST_DIR/PID-base cluster_size=65536 lazy_refcounts=off refcount_bits=16 + +Formatting 'TEST_DIR/PID-top', fmt=qcow2 size=2097152 backing_file=TEST_DIR/PID-mid cluster_size=65536 lazy_refcounts=off refcount_bits=16 + +wrote 2097152/2097152 bytes at offset 0 +2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + +{"execute": "human-monitor-command", "arguments": {"command-line": "commit drive0"}} +{"return": ""} +image: TEST_IMG +file format: IMGFMT +virtual size: 2 MiB (2097152 bytes) +cluster_size: 65536 +backing file: TEST_DIR/PID-base +Format specific information: + compat: 1.1 + lazy refcounts: false + refcount bits: 16 + corrupt: false + +read 1048576/1048576 bytes at offset 0 +1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + +read 1048576/1048576 bytes at offset 1048576 +1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + +=== Testing QMP active commit (top -> mid) === +Formatting 'TEST_DIR/PID-base', fmt=qcow2 size=2097152 cluster_size=65536 lazy_refcounts=off refcount_bits=16 + +Formatting 'TEST_DIR/PID-mid', fmt=qcow2 size=1048576 backing_file=TEST_DIR/PID-base cluster_size=65536 lazy_refcounts=off refcount_bits=16 + +Formatting 'TEST_DIR/PID-top', fmt=qcow2 size=2097152 backing_file=TEST_DIR/PID-mid cluster_size=65536 lazy_refcounts=off refcount_bits=16 + +wrote 2097152/2097152 bytes at offset 0 +2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + +{"execute": "block-commit", "arguments": {"auto-dismiss": false, "base-node": "mid", "device": "top", "job-id": "job0"}} +{"return": {}} +{"execute": "job-complete", "arguments": {"id": "job0"}} +{"return": {}} +{"data": {"device": "job0", "len": 0, "offset": 0, "speed": 0, "type": "commit"}, "event": "BLOCK_JOB_READY", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} +{"data": {"device": "job0", "len": 0, "offset": 0, "speed": 0, "type": "commit"}, "event": "BLOCK_JOB_COMPLETED", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} +{"execute": "job-dismiss", "arguments": {"id": "job0"}} +{"return": {}} +image: TEST_IMG +file format: IMGFMT +virtual size: 2 MiB (2097152 bytes) +cluster_size: 65536 +backing file: TEST_DIR/PID-base +Format specific information: + compat: 1.1 + lazy refcounts: false + refcount bits: 16 + corrupt: false + +read 1048576/1048576 bytes at offset 0 +1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + +read 1048576/1048576 bytes at offset 1048576 +1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + +== Resize tests == +=== preallocation=off === +Formatting 'TEST_DIR/PID-base', fmt=qcow2 size=6442450944 cluster_size=65536 lazy_refcounts=off refcount_bits=16 + +Formatting 'TEST_DIR/PID-top', fmt=qcow2 size=1073741824 backing_file=TEST_DIR/PID-base cluster_size=65536 lazy_refcounts=off refcount_bits=16 + +wrote 65536/65536 bytes at offset 5368709120 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + +Image resized. + +read 65536/65536 bytes at offset 5368709120 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + +1 GiB (0x40000000) bytes not allocated at offset 0 bytes (0x0) +7 GiB (0x1c0000000) bytes allocated at offset 1 GiB (0x40000000) + +[{ "start": 0, "length": 1073741824, "depth": 1, "zero": true, "data": false}, +{ "start": 1073741824, "length": 7516192768, "depth": 0, "zero": true, "data": false}] + +=== preallocation=metadata === +Formatting 'TEST_DIR/PID-base', fmt=qcow2 size=34359738368 cluster_size=65536 lazy_refcounts=off refcount_bits=16 + +Formatting 'TEST_DIR/PID-top', fmt=qcow2 size=32212254720 backing_file=TEST_DIR/PID-base cluster_size=65536 lazy_refcounts=off refcount_bits=16 + +wrote 65536/65536 bytes at offset 33285996544 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + +Image resized. + +read 65536/65536 bytes at offset 33285996544 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + +30 GiB (0x780000000) bytes not allocated at offset 0 bytes (0x0) +3 GiB (0xc0000000) bytes allocated at offset 30 GiB (0x780000000) + +[{ "start": 0, "length": 32212254720, "depth": 1, "zero": true, "data": false}, +{ "start": 32212254720, "length": 536870912, "depth": 0, "zero": true, "data": false, "offset": 327680}, +{ "start": 32749125632, "length": 536870912, "depth": 0, "zero": true, "data": false, "offset": 537264128}, +{ "start": 33285996544, "length": 536870912, "depth": 0, "zero": true, "data": false, "offset": 1074200576}, +{ "start": 33822867456, "length": 536870912, "depth": 0, "zero": true, "data": false, "offset": 1611137024}, +{ "start": 34359738368, "length": 536870912, "depth": 0, "zero": true, "data": false, "offset": 2148139008}, +{ "start": 34896609280, "length": 536870912, "depth": 0, "zero": true, "data": false, "offset": 2685075456}] + +=== preallocation=falloc === +Formatting 'TEST_DIR/PID-base', fmt=qcow2 size=10485760 cluster_size=65536 lazy_refcounts=off refcount_bits=16 + +Formatting 'TEST_DIR/PID-top', fmt=qcow2 size=5242880 backing_file=TEST_DIR/PID-base cluster_size=65536 lazy_refcounts=off refcount_bits=16 + +wrote 65536/65536 bytes at offset 9437184 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + +Image resized. + +read 65536/65536 bytes at offset 9437184 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + +5 MiB (0x500000) bytes not allocated at offset 0 bytes (0x0) +10 MiB (0xa00000) bytes allocated at offset 5 MiB (0x500000) + +[{ "start": 0, "length": 5242880, "depth": 1, "zero": true, "data": false}, +{ "start": 5242880, "length": 10485760, "depth": 0, "zero": true, "data": false, "offset": 327680}] + +=== preallocation=full === +Formatting 'TEST_DIR/PID-base', fmt=qcow2 size=16777216 cluster_size=65536 lazy_refcounts=off refcount_bits=16 + +Formatting 'TEST_DIR/PID-top', fmt=qcow2 size=8388608 backing_file=TEST_DIR/PID-base cluster_size=65536 lazy_refcounts=off refcount_bits=16 + +wrote 65536/65536 bytes at offset 11534336 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + +Image resized. + +read 65536/65536 bytes at offset 11534336 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + +8 MiB (0x800000) bytes not allocated at offset 0 bytes (0x0) +4 MiB (0x400000) bytes allocated at offset 8 MiB (0x800000) + +[{ "start": 0, "length": 8388608, "depth": 1, "zero": true, "data": false}, +{ "start": 8388608, "length": 4194304, "depth": 0, "zero": true, "data": false, "offset": 327680}] + +=== preallocation=off === +Formatting 'TEST_DIR/PID-base', fmt=qcow2 size=393216 cluster_size=65536 lazy_refcounts=off refcount_bits=16 + +Formatting 'TEST_DIR/PID-top', fmt=qcow2 size=259072 backing_file=TEST_DIR/PID-base cluster_size=65536 lazy_refcounts=off refcount_bits=16 + +wrote 65536/65536 bytes at offset 259072 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + +Image resized. + +read 65536/65536 bytes at offset 259072 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + +192 KiB (0x30000) bytes not allocated at offset 0 bytes (0x0) +320 KiB (0x50000) bytes allocated at offset 192 KiB (0x30000) + +[{ "start": 0, "length": 196608, "depth": 1, "zero": true, "data": false}, +{ "start": 196608, "length": 65536, "depth": 0, "zero": false, "data": true, "offset": 327680}, +{ "start": 262144, "length": 262144, "depth": 0, "zero": true, "data": false}] + +=== preallocation=off === +Formatting 'TEST_DIR/PID-base', fmt=qcow2 size=409600 cluster_size=65536 lazy_refcounts=off refcount_bits=16 + +Formatting 'TEST_DIR/PID-top', fmt=qcow2 size=262144 backing_file=TEST_DIR/PID-base cluster_size=65536 lazy_refcounts=off refcount_bits=16 + +wrote 65536/65536 bytes at offset 344064 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + +Image resized. + +read 65536/65536 bytes at offset 344064 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + +256 KiB (0x40000) bytes not allocated at offset 0 bytes (0x0) +256 KiB (0x40000) bytes allocated at offset 256 KiB (0x40000) + +[{ "start": 0, "length": 262144, "depth": 1, "zero": true, "data": false}, +{ "start": 262144, "length": 262144, "depth": 0, "zero": true, "data": false}] + +=== preallocation=off === +Formatting 'TEST_DIR/PID-base', fmt=qcow2 size=524288 cluster_size=65536 lazy_refcounts=off refcount_bits=16 + +Formatting 'TEST_DIR/PID-top', fmt=qcow2 size=262144 backing_file=TEST_DIR/PID-base cluster_size=65536 lazy_refcounts=off refcount_bits=16 + +wrote 65536/65536 bytes at offset 446464 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + +Image resized. + +read 65536/65536 bytes at offset 446464 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + +256 KiB (0x40000) bytes not allocated at offset 0 bytes (0x0) +244 KiB (0x3d000) bytes allocated at offset 256 KiB (0x40000) + +[{ "start": 0, "length": 262144, "depth": 1, "zero": true, "data": false}, +{ "start": 262144, "length": 249856, "depth": 0, "zero": true, "data": false}] + diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group index 435dccd5af..1710470e70 100644 --- a/tests/qemu-iotests/group +++ b/tests/qemu-iotests/group @@ -286,6 +286,7 @@ 270 rw backing quick 272 rw 273 backing quick +274 rw backing 277 rw quick 279 rw backing quick 280 rw migration quick From eb8a0cf3ba26611f3981f8f45ac6a868975a68cc Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 24 Apr 2020 16:27:01 +0200 Subject: [PATCH 12/15] qcow2: Forward ZERO_WRITE flag for full preallocation The BDRV_REQ_ZERO_WRITE is currently implemented in a way that first the image is possibly preallocated and then the zero flag is added to all clusters. This means that a copy-on-write operation may be needed when writing to these clusters, despite having used preallocation, negating one of the major benefits of preallocation. Instead, try to forward the BDRV_REQ_ZERO_WRITE to the protocol driver, and if the protocol driver can ensure that the new area reads as zeros, we can skip setting the zero flag in the qcow2 layer. Unfortunately, the same approach doesn't work for metadata preallocation, so we'll still set the zero flag there. Signed-off-by: Kevin Wolf Reviewed-by: Max Reitz Message-Id: <20200424142701.67053-1-kwolf@redhat.com> Reviewed-by: Vladimir Sementsov-Ogievskiy Signed-off-by: Kevin Wolf --- block/qcow2.c | 22 +++++++++++++++++++--- tests/qemu-iotests/274.out | 4 ++-- 2 files changed, 21 insertions(+), 5 deletions(-) diff --git a/block/qcow2.c b/block/qcow2.c index 98065d7808..2ba0b17c39 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -4170,9 +4170,25 @@ static int coroutine_fn qcow2_co_truncate(BlockDriverState *bs, int64_t offset, /* Allocate the data area */ new_file_size = allocation_start + nb_new_data_clusters * s->cluster_size; - /* Image file grows, so @exact does not matter */ - ret = bdrv_co_truncate(bs->file, new_file_size, false, prealloc, 0, - errp); + /* + * Image file grows, so @exact does not matter. + * + * If we need to zero out the new area, try first whether the protocol + * driver can already take care of this. + */ + if (flags & BDRV_REQ_ZERO_WRITE) { + ret = bdrv_co_truncate(bs->file, new_file_size, false, prealloc, + BDRV_REQ_ZERO_WRITE, NULL); + if (ret >= 0) { + flags &= ~BDRV_REQ_ZERO_WRITE; + } + } else { + ret = -1; + } + if (ret < 0) { + ret = bdrv_co_truncate(bs->file, new_file_size, false, prealloc, 0, + errp); + } if (ret < 0) { error_prepend(errp, "Failed to resize underlying file: "); qcow2_free_clusters(bs, allocation_start, diff --git a/tests/qemu-iotests/274.out b/tests/qemu-iotests/274.out index 1a796fd07c..9d6fdeb1f7 100644 --- a/tests/qemu-iotests/274.out +++ b/tests/qemu-iotests/274.out @@ -187,7 +187,7 @@ read 65536/65536 bytes at offset 9437184 10 MiB (0xa00000) bytes allocated at offset 5 MiB (0x500000) [{ "start": 0, "length": 5242880, "depth": 1, "zero": true, "data": false}, -{ "start": 5242880, "length": 10485760, "depth": 0, "zero": true, "data": false, "offset": 327680}] +{ "start": 5242880, "length": 10485760, "depth": 0, "zero": false, "data": true, "offset": 327680}] === preallocation=full === Formatting 'TEST_DIR/PID-base', fmt=qcow2 size=16777216 cluster_size=65536 lazy_refcounts=off refcount_bits=16 @@ -206,7 +206,7 @@ read 65536/65536 bytes at offset 11534336 4 MiB (0x400000) bytes allocated at offset 8 MiB (0x800000) [{ "start": 0, "length": 8388608, "depth": 1, "zero": true, "data": false}, -{ "start": 8388608, "length": 4194304, "depth": 0, "zero": true, "data": false, "offset": 327680}] +{ "start": 8388608, "length": 4194304, "depth": 0, "zero": false, "data": true, "offset": 327680}] === preallocation=off === Formatting 'TEST_DIR/PID-base', fmt=qcow2 size=393216 cluster_size=65536 lazy_refcounts=off refcount_bits=16 From 6cf94132293adb5c76d336ab7ff5924afe2cb147 Mon Sep 17 00:00:00 2001 From: Andrzej Jakowski Date: Mon, 30 Mar 2020 09:46:56 -0700 Subject: [PATCH 13/15] nvme: introduce PMR support from NVMe 1.4 spec This patch introduces support for PMR that has been defined as part of NVMe 1.4 spec. User can now specify a pmrdev option that should point to HostMemoryBackend. pmrdev memory region will subsequently be exposed as PCI BAR 2 in emulated NVMe device. Guest OS can perform mmio read and writes to the PMR region that will stay persistent across system reboot. Signed-off-by: Andrzej Jakowski Reviewed-by: Klaus Jensen Reviewed-by: Stefan Hajnoczi Message-Id: <20200330164656.9348-1-andrzej.jakowski@linux.intel.com> Reviewed-by: Keith Busch Signed-off-by: Kevin Wolf --- hw/block/Makefile.objs | 2 +- hw/block/nvme.c | 109 ++++++++++++++++++++++++++ hw/block/nvme.h | 2 + hw/block/trace-events | 4 + include/block/nvme.h | 172 +++++++++++++++++++++++++++++++++++++++++ 5 files changed, 288 insertions(+), 1 deletion(-) diff --git a/hw/block/Makefile.objs b/hw/block/Makefile.objs index 4b4a2b338d..47960b5f0d 100644 --- a/hw/block/Makefile.objs +++ b/hw/block/Makefile.objs @@ -7,12 +7,12 @@ common-obj-$(CONFIG_PFLASH_CFI02) += pflash_cfi02.o common-obj-$(CONFIG_XEN) += xen-block.o common-obj-$(CONFIG_ECC) += ecc.o common-obj-$(CONFIG_ONENAND) += onenand.o -common-obj-$(CONFIG_NVME_PCI) += nvme.o common-obj-$(CONFIG_SWIM) += swim.o common-obj-$(CONFIG_SH4) += tc58128.o obj-$(CONFIG_VIRTIO_BLK) += virtio-blk.o obj-$(CONFIG_VHOST_USER_BLK) += vhost-user-blk.o +obj-$(CONFIG_NVME_PCI) += nvme.o obj-y += dataplane/ diff --git a/hw/block/nvme.c b/hw/block/nvme.c index d28335cbf3..9b453423cf 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -19,10 +19,19 @@ * -drive file=,if=none,id= * -device nvme,drive=,serial=,id=, \ * cmb_size_mb=, \ + * [pmrdev=,] \ * num_queues= * * Note cmb_size_mb denotes size of CMB in MB. CMB is assumed to be at * offset 0 in BAR2 and supports only WDS, RDS and SQS for now. + * + * cmb_size_mb= and pmrdev= options are mutually exclusive due to limitation + * in available BAR's. cmb_size_mb= will take precedence over pmrdev= when + * both provided. + * Enabling pmr emulation can be achieved by pointing to memory-backend-file. + * For example: + * -object memory-backend-file,id=,share=on,mem-path=, \ + * size= .... -device nvme,...,pmrdev= */ #include "qemu/osdep.h" @@ -35,7 +44,9 @@ #include "sysemu/sysemu.h" #include "qapi/error.h" #include "qapi/visitor.h" +#include "sysemu/hostmem.h" #include "sysemu/block-backend.h" +#include "exec/ram_addr.h" #include "qemu/log.h" #include "qemu/module.h" @@ -1141,6 +1152,26 @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, uint64_t data, NVME_GUEST_ERR(nvme_ub_mmiowr_cmbsz_readonly, "invalid write to read only CMBSZ, ignored"); return; + case 0xE00: /* PMRCAP */ + NVME_GUEST_ERR(nvme_ub_mmiowr_pmrcap_readonly, + "invalid write to PMRCAP register, ignored"); + return; + case 0xE04: /* TODO PMRCTL */ + break; + case 0xE08: /* PMRSTS */ + NVME_GUEST_ERR(nvme_ub_mmiowr_pmrsts_readonly, + "invalid write to PMRSTS register, ignored"); + return; + case 0xE0C: /* PMREBS */ + NVME_GUEST_ERR(nvme_ub_mmiowr_pmrebs_readonly, + "invalid write to PMREBS register, ignored"); + return; + case 0xE10: /* PMRSWTP */ + NVME_GUEST_ERR(nvme_ub_mmiowr_pmrswtp_readonly, + "invalid write to PMRSWTP register, ignored"); + return; + case 0xE14: /* TODO PMRMSC */ + break; default: NVME_GUEST_ERR(nvme_ub_mmiowr_invalid, "invalid MMIO write," @@ -1169,6 +1200,16 @@ static uint64_t nvme_mmio_read(void *opaque, hwaddr addr, unsigned size) } if (addr < sizeof(n->bar)) { + /* + * When PMRWBM bit 1 is set then read from + * from PMRSTS should ensure prior writes + * made it to persistent media + */ + if (addr == 0xE08 && + (NVME_PMRCAP_PMRWBM(n->bar.pmrcap) & 0x02)) { + qemu_ram_writeback(n->pmrdev->mr.ram_block, + 0, n->pmrdev->size); + } memcpy(&val, ptr + addr, size); } else { NVME_GUEST_ERR(nvme_ub_mmiord_invalid_ofs, @@ -1332,6 +1373,23 @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp) error_setg(errp, "serial property not set"); return; } + + if (!n->cmb_size_mb && n->pmrdev) { + if (host_memory_backend_is_mapped(n->pmrdev)) { + char *path = object_get_canonical_path_component(OBJECT(n->pmrdev)); + error_setg(errp, "can't use already busy memdev: %s", path); + g_free(path); + return; + } + + if (!is_power_of_2(n->pmrdev->size)) { + error_setg(errp, "pmr backend size needs to be power of 2 in size"); + return; + } + + host_memory_backend_set_mapped(n->pmrdev, true); + } + blkconf_blocksizes(&n->conf); if (!blkconf_apply_backend_options(&n->conf, blk_is_read_only(n->conf.blk), false, errp)) { @@ -1415,6 +1473,51 @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp) PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64 | PCI_BASE_ADDRESS_MEM_PREFETCH, &n->ctrl_mem); + } else if (n->pmrdev) { + /* Controller Capabilities register */ + NVME_CAP_SET_PMRS(n->bar.cap, 1); + + /* PMR Capabities register */ + n->bar.pmrcap = 0; + NVME_PMRCAP_SET_RDS(n->bar.pmrcap, 0); + NVME_PMRCAP_SET_WDS(n->bar.pmrcap, 0); + NVME_PMRCAP_SET_BIR(n->bar.pmrcap, 2); + NVME_PMRCAP_SET_PMRTU(n->bar.pmrcap, 0); + /* Turn on bit 1 support */ + NVME_PMRCAP_SET_PMRWBM(n->bar.pmrcap, 0x02); + NVME_PMRCAP_SET_PMRTO(n->bar.pmrcap, 0); + NVME_PMRCAP_SET_CMSS(n->bar.pmrcap, 0); + + /* PMR Control register */ + n->bar.pmrctl = 0; + NVME_PMRCTL_SET_EN(n->bar.pmrctl, 0); + + /* PMR Status register */ + n->bar.pmrsts = 0; + NVME_PMRSTS_SET_ERR(n->bar.pmrsts, 0); + NVME_PMRSTS_SET_NRDY(n->bar.pmrsts, 0); + NVME_PMRSTS_SET_HSTS(n->bar.pmrsts, 0); + NVME_PMRSTS_SET_CBAI(n->bar.pmrsts, 0); + + /* PMR Elasticity Buffer Size register */ + n->bar.pmrebs = 0; + NVME_PMREBS_SET_PMRSZU(n->bar.pmrebs, 0); + NVME_PMREBS_SET_RBB(n->bar.pmrebs, 0); + NVME_PMREBS_SET_PMRWBZ(n->bar.pmrebs, 0); + + /* PMR Sustained Write Throughput register */ + n->bar.pmrswtp = 0; + NVME_PMRSWTP_SET_PMRSWTU(n->bar.pmrswtp, 0); + NVME_PMRSWTP_SET_PMRSWTV(n->bar.pmrswtp, 0); + + /* PMR Memory Space Control register */ + n->bar.pmrmsc = 0; + NVME_PMRMSC_SET_CMSE(n->bar.pmrmsc, 0); + NVME_PMRMSC_SET_CBA(n->bar.pmrmsc, 0); + + pci_register_bar(pci_dev, NVME_PMRCAP_BIR(n->bar.pmrcap), + PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64 | + PCI_BASE_ADDRESS_MEM_PREFETCH, &n->pmrdev->mr); } for (i = 0; i < n->num_namespaces; i++) { @@ -1445,11 +1548,17 @@ static void nvme_exit(PCIDevice *pci_dev) if (n->cmb_size_mb) { g_free(n->cmbuf); } + + if (n->pmrdev) { + host_memory_backend_set_mapped(n->pmrdev, false); + } msix_uninit_exclusive_bar(pci_dev); } static Property nvme_props[] = { DEFINE_BLOCK_PROPERTIES(NvmeCtrl, conf), + DEFINE_PROP_LINK("pmrdev", NvmeCtrl, pmrdev, TYPE_MEMORY_BACKEND, + HostMemoryBackend *), DEFINE_PROP_STRING("serial", NvmeCtrl, serial), DEFINE_PROP_UINT32("cmb_size_mb", NvmeCtrl, cmb_size_mb, 0), DEFINE_PROP_UINT32("num_queues", NvmeCtrl, num_queues, 64), diff --git a/hw/block/nvme.h b/hw/block/nvme.h index 557194ee19..6520a9f0be 100644 --- a/hw/block/nvme.h +++ b/hw/block/nvme.h @@ -83,6 +83,8 @@ typedef struct NvmeCtrl { uint64_t timestamp_set_qemu_clock_ms; /* QEMU clock time */ char *serial; + HostMemoryBackend *pmrdev; + NvmeNamespace *namespaces; NvmeSQueue **sq; NvmeCQueue **cq; diff --git a/hw/block/trace-events b/hw/block/trace-events index bf6d11b58b..aca54bda14 100644 --- a/hw/block/trace-events +++ b/hw/block/trace-events @@ -110,6 +110,10 @@ nvme_ub_mmiowr_ssreset_w1c_unsupported(void) "attempted to W1C CSTS.NSSRO but CA nvme_ub_mmiowr_ssreset_unsupported(void) "attempted NVM subsystem reset but CAP.NSSRS is zero (not supported)" nvme_ub_mmiowr_cmbloc_reserved(void) "invalid write to reserved CMBLOC when CMBSZ is zero, ignored" nvme_ub_mmiowr_cmbsz_readonly(void) "invalid write to read only CMBSZ, ignored" +nvme_ub_mmiowr_pmrcap_readonly(void) "invalid write to read only PMRCAP, ignored" +nvme_ub_mmiowr_pmrsts_readonly(void) "invalid write to read only PMRSTS, ignored" +nvme_ub_mmiowr_pmrebs_readonly(void) "invalid write to read only PMREBS, ignored" +nvme_ub_mmiowr_pmrswtp_readonly(void) "invalid write to read only PMRSWTP, ignored" nvme_ub_mmiowr_invalid(uint64_t offset, uint64_t data) "invalid MMIO write, offset=0x%"PRIx64", data=0x%"PRIx64"" nvme_ub_mmiord_misaligned32(uint64_t offset) "MMIO read not 32-bit aligned, offset=0x%"PRIx64"" nvme_ub_mmiord_toosmall(uint64_t offset) "MMIO read smaller than 32-bits, offset=0x%"PRIx64"" diff --git a/include/block/nvme.h b/include/block/nvme.h index 8fb941c653..5525c8e343 100644 --- a/include/block/nvme.h +++ b/include/block/nvme.h @@ -15,6 +15,13 @@ typedef struct NvmeBar { uint64_t acq; uint32_t cmbloc; uint32_t cmbsz; + uint8_t padding[3520]; /* not used by QEMU */ + uint32_t pmrcap; + uint32_t pmrctl; + uint32_t pmrsts; + uint32_t pmrebs; + uint32_t pmrswtp; + uint32_t pmrmsc; } NvmeBar; enum NvmeCapShift { @@ -27,6 +34,7 @@ enum NvmeCapShift { CAP_CSS_SHIFT = 37, CAP_MPSMIN_SHIFT = 48, CAP_MPSMAX_SHIFT = 52, + CAP_PMR_SHIFT = 56, }; enum NvmeCapMask { @@ -39,6 +47,7 @@ enum NvmeCapMask { CAP_CSS_MASK = 0xff, CAP_MPSMIN_MASK = 0xf, CAP_MPSMAX_MASK = 0xf, + CAP_PMR_MASK = 0x1, }; #define NVME_CAP_MQES(cap) (((cap) >> CAP_MQES_SHIFT) & CAP_MQES_MASK) @@ -69,6 +78,8 @@ enum NvmeCapMask { << CAP_MPSMIN_SHIFT) #define NVME_CAP_SET_MPSMAX(cap, val) (cap |= (uint64_t)(val & CAP_MPSMAX_MASK)\ << CAP_MPSMAX_SHIFT) +#define NVME_CAP_SET_PMRS(cap, val) (cap |= (uint64_t)(val & CAP_PMR_MASK)\ + << CAP_PMR_SHIFT) enum NvmeCcShift { CC_EN_SHIFT = 0, @@ -205,6 +216,167 @@ enum NvmeCmbszMask { #define NVME_CMBSZ_GETSIZE(cmbsz) \ (NVME_CMBSZ_SZ(cmbsz) * (1 << (12 + 4 * NVME_CMBSZ_SZU(cmbsz)))) +enum NvmePmrcapShift { + PMRCAP_RDS_SHIFT = 3, + PMRCAP_WDS_SHIFT = 4, + PMRCAP_BIR_SHIFT = 5, + PMRCAP_PMRTU_SHIFT = 8, + PMRCAP_PMRWBM_SHIFT = 10, + PMRCAP_PMRTO_SHIFT = 16, + PMRCAP_CMSS_SHIFT = 24, +}; + +enum NvmePmrcapMask { + PMRCAP_RDS_MASK = 0x1, + PMRCAP_WDS_MASK = 0x1, + PMRCAP_BIR_MASK = 0x7, + PMRCAP_PMRTU_MASK = 0x3, + PMRCAP_PMRWBM_MASK = 0xf, + PMRCAP_PMRTO_MASK = 0xff, + PMRCAP_CMSS_MASK = 0x1, +}; + +#define NVME_PMRCAP_RDS(pmrcap) \ + ((pmrcap >> PMRCAP_RDS_SHIFT) & PMRCAP_RDS_MASK) +#define NVME_PMRCAP_WDS(pmrcap) \ + ((pmrcap >> PMRCAP_WDS_SHIFT) & PMRCAP_WDS_MASK) +#define NVME_PMRCAP_BIR(pmrcap) \ + ((pmrcap >> PMRCAP_BIR_SHIFT) & PMRCAP_BIR_MASK) +#define NVME_PMRCAP_PMRTU(pmrcap) \ + ((pmrcap >> PMRCAP_PMRTU_SHIFT) & PMRCAP_PMRTU_MASK) +#define NVME_PMRCAP_PMRWBM(pmrcap) \ + ((pmrcap >> PMRCAP_PMRWBM_SHIFT) & PMRCAP_PMRWBM_MASK) +#define NVME_PMRCAP_PMRTO(pmrcap) \ + ((pmrcap >> PMRCAP_PMRTO_SHIFT) & PMRCAP_PMRTO_MASK) +#define NVME_PMRCAP_CMSS(pmrcap) \ + ((pmrcap >> PMRCAP_CMSS_SHIFT) & PMRCAP_CMSS_MASK) + +#define NVME_PMRCAP_SET_RDS(pmrcap, val) \ + (pmrcap |= (uint64_t)(val & PMRCAP_RDS_MASK) << PMRCAP_RDS_SHIFT) +#define NVME_PMRCAP_SET_WDS(pmrcap, val) \ + (pmrcap |= (uint64_t)(val & PMRCAP_WDS_MASK) << PMRCAP_WDS_SHIFT) +#define NVME_PMRCAP_SET_BIR(pmrcap, val) \ + (pmrcap |= (uint64_t)(val & PMRCAP_BIR_MASK) << PMRCAP_BIR_SHIFT) +#define NVME_PMRCAP_SET_PMRTU(pmrcap, val) \ + (pmrcap |= (uint64_t)(val & PMRCAP_PMRTU_MASK) << PMRCAP_PMRTU_SHIFT) +#define NVME_PMRCAP_SET_PMRWBM(pmrcap, val) \ + (pmrcap |= (uint64_t)(val & PMRCAP_PMRWBM_MASK) << PMRCAP_PMRWBM_SHIFT) +#define NVME_PMRCAP_SET_PMRTO(pmrcap, val) \ + (pmrcap |= (uint64_t)(val & PMRCAP_PMRTO_MASK) << PMRCAP_PMRTO_SHIFT) +#define NVME_PMRCAP_SET_CMSS(pmrcap, val) \ + (pmrcap |= (uint64_t)(val & PMRCAP_CMSS_MASK) << PMRCAP_CMSS_SHIFT) + +enum NvmePmrctlShift { + PMRCTL_EN_SHIFT = 0, +}; + +enum NvmePmrctlMask { + PMRCTL_EN_MASK = 0x1, +}; + +#define NVME_PMRCTL_EN(pmrctl) ((pmrctl >> PMRCTL_EN_SHIFT) & PMRCTL_EN_MASK) + +#define NVME_PMRCTL_SET_EN(pmrctl, val) \ + (pmrctl |= (uint64_t)(val & PMRCTL_EN_MASK) << PMRCTL_EN_SHIFT) + +enum NvmePmrstsShift { + PMRSTS_ERR_SHIFT = 0, + PMRSTS_NRDY_SHIFT = 8, + PMRSTS_HSTS_SHIFT = 9, + PMRSTS_CBAI_SHIFT = 12, +}; + +enum NvmePmrstsMask { + PMRSTS_ERR_MASK = 0xff, + PMRSTS_NRDY_MASK = 0x1, + PMRSTS_HSTS_MASK = 0x7, + PMRSTS_CBAI_MASK = 0x1, +}; + +#define NVME_PMRSTS_ERR(pmrsts) \ + ((pmrsts >> PMRSTS_ERR_SHIFT) & PMRSTS_ERR_MASK) +#define NVME_PMRSTS_NRDY(pmrsts) \ + ((pmrsts >> PMRSTS_NRDY_SHIFT) & PMRSTS_NRDY_MASK) +#define NVME_PMRSTS_HSTS(pmrsts) \ + ((pmrsts >> PMRSTS_HSTS_SHIFT) & PMRSTS_HSTS_MASK) +#define NVME_PMRSTS_CBAI(pmrsts) \ + ((pmrsts >> PMRSTS_CBAI_SHIFT) & PMRSTS_CBAI_MASK) + +#define NVME_PMRSTS_SET_ERR(pmrsts, val) \ + (pmrsts |= (uint64_t)(val & PMRSTS_ERR_MASK) << PMRSTS_ERR_SHIFT) +#define NVME_PMRSTS_SET_NRDY(pmrsts, val) \ + (pmrsts |= (uint64_t)(val & PMRSTS_NRDY_MASK) << PMRSTS_NRDY_SHIFT) +#define NVME_PMRSTS_SET_HSTS(pmrsts, val) \ + (pmrsts |= (uint64_t)(val & PMRSTS_HSTS_MASK) << PMRSTS_HSTS_SHIFT) +#define NVME_PMRSTS_SET_CBAI(pmrsts, val) \ + (pmrsts |= (uint64_t)(val & PMRSTS_CBAI_MASK) << PMRSTS_CBAI_SHIFT) + +enum NvmePmrebsShift { + PMREBS_PMRSZU_SHIFT = 0, + PMREBS_RBB_SHIFT = 4, + PMREBS_PMRWBZ_SHIFT = 8, +}; + +enum NvmePmrebsMask { + PMREBS_PMRSZU_MASK = 0xf, + PMREBS_RBB_MASK = 0x1, + PMREBS_PMRWBZ_MASK = 0xffffff, +}; + +#define NVME_PMREBS_PMRSZU(pmrebs) \ + ((pmrebs >> PMREBS_PMRSZU_SHIFT) & PMREBS_PMRSZU_MASK) +#define NVME_PMREBS_RBB(pmrebs) \ + ((pmrebs >> PMREBS_RBB_SHIFT) & PMREBS_RBB_MASK) +#define NVME_PMREBS_PMRWBZ(pmrebs) \ + ((pmrebs >> PMREBS_PMRWBZ_SHIFT) & PMREBS_PMRWBZ_MASK) + +#define NVME_PMREBS_SET_PMRSZU(pmrebs, val) \ + (pmrebs |= (uint64_t)(val & PMREBS_PMRSZU_MASK) << PMREBS_PMRSZU_SHIFT) +#define NVME_PMREBS_SET_RBB(pmrebs, val) \ + (pmrebs |= (uint64_t)(val & PMREBS_RBB_MASK) << PMREBS_RBB_SHIFT) +#define NVME_PMREBS_SET_PMRWBZ(pmrebs, val) \ + (pmrebs |= (uint64_t)(val & PMREBS_PMRWBZ_MASK) << PMREBS_PMRWBZ_SHIFT) + +enum NvmePmrswtpShift { + PMRSWTP_PMRSWTU_SHIFT = 0, + PMRSWTP_PMRSWTV_SHIFT = 8, +}; + +enum NvmePmrswtpMask { + PMRSWTP_PMRSWTU_MASK = 0xf, + PMRSWTP_PMRSWTV_MASK = 0xffffff, +}; + +#define NVME_PMRSWTP_PMRSWTU(pmrswtp) \ + ((pmrswtp >> PMRSWTP_PMRSWTU_SHIFT) & PMRSWTP_PMRSWTU_MASK) +#define NVME_PMRSWTP_PMRSWTV(pmrswtp) \ + ((pmrswtp >> PMRSWTP_PMRSWTV_SHIFT) & PMRSWTP_PMRSWTV_MASK) + +#define NVME_PMRSWTP_SET_PMRSWTU(pmrswtp, val) \ + (pmrswtp |= (uint64_t)(val & PMRSWTP_PMRSWTU_MASK) << PMRSWTP_PMRSWTU_SHIFT) +#define NVME_PMRSWTP_SET_PMRSWTV(pmrswtp, val) \ + (pmrswtp |= (uint64_t)(val & PMRSWTP_PMRSWTV_MASK) << PMRSWTP_PMRSWTV_SHIFT) + +enum NvmePmrmscShift { + PMRMSC_CMSE_SHIFT = 1, + PMRMSC_CBA_SHIFT = 12, +}; + +enum NvmePmrmscMask { + PMRMSC_CMSE_MASK = 0x1, + PMRMSC_CBA_MASK = 0xfffffffffffff, +}; + +#define NVME_PMRMSC_CMSE(pmrmsc) \ + ((pmrmsc >> PMRMSC_CMSE_SHIFT) & PMRMSC_CMSE_MASK) +#define NVME_PMRMSC_CBA(pmrmsc) \ + ((pmrmsc >> PMRMSC_CBA_SHIFT) & PMRMSC_CBA_MASK) + +#define NVME_PMRMSC_SET_CMSE(pmrmsc, val) \ + (pmrmsc |= (uint64_t)(val & PMRMSC_CMSE_MASK) << PMRMSC_CMSE_SHIFT) +#define NVME_PMRMSC_SET_CBA(pmrmsc, val) \ + (pmrmsc |= (uint64_t)(val & PMRMSC_CBA_MASK) << PMRMSC_CBA_SHIFT) + typedef struct NvmeCmd { uint8_t opcode; uint8_t fuse; From d6a5beeb2bbf4f5ce6e6396051fb4c5fcced56a4 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Thu, 16 Apr 2020 17:04:20 +0200 Subject: [PATCH 14/15] qom: Factor out user_creatable_add_dict() The QMP handler qmp_object_add() and the implementation of --object in qemu-storage-daemon can share most of the code. Currently, qemu-storage-daemon calls qmp_object_add(), but this is not correct because different visitors need to be used. As a first step towards a fix, make qmp_object_add() a wrapper around a new function user_creatable_add_dict() that can get an additional parameter. The handling of "props" is only required for compatibility and not required for the qemu-storage-daemon command line, so it stays in qmp_object_add(). Signed-off-by: Kevin Wolf --- include/qom/object_interfaces.h | 12 ++++++++++++ qom/object_interfaces.c | 27 +++++++++++++++++++++++++++ qom/qom-qmp-cmds.c | 24 +----------------------- 3 files changed, 40 insertions(+), 23 deletions(-) diff --git a/include/qom/object_interfaces.h b/include/qom/object_interfaces.h index 6f92f3cebb..a0037968a4 100644 --- a/include/qom/object_interfaces.h +++ b/include/qom/object_interfaces.h @@ -87,6 +87,18 @@ Object *user_creatable_add_type(const char *type, const char *id, const QDict *qdict, Visitor *v, Error **errp); +/** + * user_creatable_add_dict: + * @qdict: the object definition + * @errp: if an error occurs, a pointer to an area to store the error + * + * Create an instance of the user creatable object that is defined by + * @qdict. The object type is taken from the QDict key 'qom-type', its + * ID from the key 'id'. The remaining entries in @qdict are used to + * initialize the object properties. + */ +void user_creatable_add_dict(QDict *qdict, Error **errp); + /** * user_creatable_add_opts: * @opts: the object definition diff --git a/qom/object_interfaces.c b/qom/object_interfaces.c index 72cb9e32a9..739e3e5172 100644 --- a/qom/object_interfaces.c +++ b/qom/object_interfaces.c @@ -6,6 +6,7 @@ #include "qapi/qmp/qerror.h" #include "qapi/qmp/qjson.h" #include "qapi/qmp/qstring.h" +#include "qapi/qobject-input-visitor.h" #include "qom/object_interfaces.h" #include "qemu/help_option.h" #include "qemu/module.h" @@ -105,6 +106,32 @@ out: return obj; } +void user_creatable_add_dict(QDict *qdict, Error **errp) +{ + Visitor *v; + Object *obj; + g_autofree char *type = NULL; + g_autofree char *id = NULL; + + type = g_strdup(qdict_get_try_str(qdict, "qom-type")); + if (!type) { + error_setg(errp, QERR_MISSING_PARAMETER, "qom-type"); + return; + } + qdict_del(qdict, "qom-type"); + + id = g_strdup(qdict_get_try_str(qdict, "id")); + if (!id) { + error_setg(errp, QERR_MISSING_PARAMETER, "id"); + return; + } + qdict_del(qdict, "id"); + + v = qobject_input_visitor_new(QOBJECT(qdict)); + obj = user_creatable_add_type(type, id, qdict, v, errp); + visit_free(v); + object_unref(obj); +} Object *user_creatable_add_opts(QemuOpts *opts, Error **errp) { diff --git a/qom/qom-qmp-cmds.c b/qom/qom-qmp-cmds.c index e47ebe8ed1..35db44b50e 100644 --- a/qom/qom-qmp-cmds.c +++ b/qom/qom-qmp-cmds.c @@ -21,7 +21,6 @@ #include "qapi/qapi-commands-qom.h" #include "qapi/qmp/qdict.h" #include "qapi/qmp/qerror.h" -#include "qapi/qobject-input-visitor.h" #include "qemu/cutils.h" #include "qom/object_interfaces.h" #include "qom/qom-qobject.h" @@ -245,24 +244,6 @@ void qmp_object_add(QDict *qdict, QObject **ret_data, Error **errp) { QObject *props; QDict *pdict; - Visitor *v; - Object *obj; - g_autofree char *type = NULL; - g_autofree char *id = NULL; - - type = g_strdup(qdict_get_try_str(qdict, "qom-type")); - if (!type) { - error_setg(errp, QERR_MISSING_PARAMETER, "qom-type"); - return; - } - qdict_del(qdict, "qom-type"); - - id = g_strdup(qdict_get_try_str(qdict, "id")); - if (!id) { - error_setg(errp, QERR_MISSING_PARAMETER, "id"); - return; - } - qdict_del(qdict, "id"); props = qdict_get(qdict, "props"); if (props) { @@ -282,10 +263,7 @@ void qmp_object_add(QDict *qdict, QObject **ret_data, Error **errp) qobject_unref(pdict); } - v = qobject_input_visitor_new(QOBJECT(qdict)); - obj = user_creatable_add_type(type, id, qdict, v, errp); - visit_free(v); - object_unref(obj); + user_creatable_add_dict(qdict, errp); } void qmp_object_del(const char *id, Error **errp) From eaae29ef89d498d0eac553c77b554f310a47f809 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Thu, 16 Apr 2020 17:26:06 +0200 Subject: [PATCH 15/15] qemu-storage-daemon: Fix non-string --object properties After processing the option string with the keyval parser, we get a QDict that contains only strings. This QDict must be fed to a keyval visitor which converts the strings into the right data types. qmp_object_add(), however, uses the normal QObject input visitor, which expects a QDict where all properties already have the QType that matches the data type required by the QOM object type. Change the --object implementation in qemu-storage-daemon so that it doesn't call qmp_object_add(), but calls user_creatable_add_dict() directly instead and pass it a new keyval boolean that decides which visitor must be used. Reported-by: Coiby Xu Signed-off-by: Kevin Wolf --- include/qom/object_interfaces.h | 6 +++++- qemu-storage-daemon.c | 4 +--- qom/object_interfaces.c | 8 ++++++-- qom/qom-qmp-cmds.c | 2 +- 4 files changed, 13 insertions(+), 7 deletions(-) diff --git a/include/qom/object_interfaces.h b/include/qom/object_interfaces.h index a0037968a4..65172120fa 100644 --- a/include/qom/object_interfaces.h +++ b/include/qom/object_interfaces.h @@ -90,6 +90,10 @@ Object *user_creatable_add_type(const char *type, const char *id, /** * user_creatable_add_dict: * @qdict: the object definition + * @keyval: if true, use a keyval visitor for processing @qdict (i.e. + * assume that all @qdict values are strings); otherwise, use + * the normal QObject visitor (i.e. assume all @qdict values + * have the QType expected by the QOM object type) * @errp: if an error occurs, a pointer to an area to store the error * * Create an instance of the user creatable object that is defined by @@ -97,7 +101,7 @@ Object *user_creatable_add_type(const char *type, const char *id, * ID from the key 'id'. The remaining entries in @qdict are used to * initialize the object properties. */ -void user_creatable_add_dict(QDict *qdict, Error **errp); +void user_creatable_add_dict(QDict *qdict, bool keyval, Error **errp); /** * user_creatable_add_opts: diff --git a/qemu-storage-daemon.c b/qemu-storage-daemon.c index dd128978cc..9e7adfe3a6 100644 --- a/qemu-storage-daemon.c +++ b/qemu-storage-daemon.c @@ -278,7 +278,6 @@ static void process_options(int argc, char *argv[]) QemuOpts *opts; const char *type; QDict *args; - QObject *ret_data = NULL; /* FIXME The keyval parser rejects 'help' arguments, so we must * unconditionall try QemuOpts first. */ @@ -291,9 +290,8 @@ static void process_options(int argc, char *argv[]) qemu_opts_del(opts); args = keyval_parse(optarg, "qom-type", &error_fatal); - qmp_object_add(args, &ret_data, &error_fatal); + user_creatable_add_dict(args, true, &error_fatal); qobject_unref(args); - qobject_unref(ret_data); break; } default: diff --git a/qom/object_interfaces.c b/qom/object_interfaces.c index 739e3e5172..bc36f96e47 100644 --- a/qom/object_interfaces.c +++ b/qom/object_interfaces.c @@ -106,7 +106,7 @@ out: return obj; } -void user_creatable_add_dict(QDict *qdict, Error **errp) +void user_creatable_add_dict(QDict *qdict, bool keyval, Error **errp) { Visitor *v; Object *obj; @@ -127,7 +127,11 @@ void user_creatable_add_dict(QDict *qdict, Error **errp) } qdict_del(qdict, "id"); - v = qobject_input_visitor_new(QOBJECT(qdict)); + if (keyval) { + v = qobject_input_visitor_new_keyval(QOBJECT(qdict)); + } else { + v = qobject_input_visitor_new(QOBJECT(qdict)); + } obj = user_creatable_add_type(type, id, qdict, v, errp); visit_free(v); object_unref(obj); diff --git a/qom/qom-qmp-cmds.c b/qom/qom-qmp-cmds.c index 35db44b50e..c5249e44d0 100644 --- a/qom/qom-qmp-cmds.c +++ b/qom/qom-qmp-cmds.c @@ -263,7 +263,7 @@ void qmp_object_add(QDict *qdict, QObject **ret_data, Error **errp) qobject_unref(pdict); } - user_creatable_add_dict(qdict, errp); + user_creatable_add_dict(qdict, false, errp); } void qmp_object_del(const char *id, Error **errp)