2006-08-06 01:14:20 +04:00
|
|
|
/*
|
|
|
|
* Block driver for the QCOW version 2 format
|
2007-09-17 01:08:06 +04:00
|
|
|
*
|
2006-08-06 01:14:20 +04:00
|
|
|
* Copyright (c) 2004-2006 Fabrice Bellard
|
2007-09-17 01:08:06 +04:00
|
|
|
*
|
2006-08-06 01:14:20 +04:00
|
|
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
|
|
* of this software and associated documentation files (the "Software"), to deal
|
|
|
|
* in the Software without restriction, including without limitation the rights
|
|
|
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
|
|
* copies of the Software, and to permit persons to whom the Software is
|
|
|
|
* furnished to do so, subject to the following conditions:
|
|
|
|
*
|
|
|
|
* The above copyright notice and this permission notice shall be included in
|
|
|
|
* all copies or substantial portions of the Software.
|
|
|
|
*
|
|
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
|
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
|
|
* THE SOFTWARE.
|
|
|
|
*/
|
2018-02-01 14:18:31 +03:00
|
|
|
|
2016-01-18 21:01:42 +03:00
|
|
|
#include "qemu/osdep.h"
|
2018-06-20 17:48:36 +03:00
|
|
|
|
2018-06-14 22:14:28 +03:00
|
|
|
#include "block/qdict.h"
|
2016-03-08 17:57:05 +03:00
|
|
|
#include "sysemu/block-backend.h"
|
Include qemu/main-loop.h less
In my "build everything" tree, changing qemu/main-loop.h triggers a
recompile of some 5600 out of 6600 objects (not counting tests and
objects that don't depend on qemu/osdep.h). It includes block/aio.h,
which in turn includes qemu/event_notifier.h, qemu/notify.h,
qemu/processor.h, qemu/qsp.h, qemu/queue.h, qemu/thread-posix.h,
qemu/thread.h, qemu/timer.h, and a few more.
Include qemu/main-loop.h only where it's needed. Touching it now
recompiles only some 1700 objects. For block/aio.h and
qemu/event_notifier.h, these numbers drop from 5600 to 2800. For the
others, they shrink only slightly.
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <20190812052359.30071-21-armbru@redhat.com>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
Tested-by: Philippe Mathieu-Daudé <philmd@redhat.com>
2019-08-12 08:23:50 +03:00
|
|
|
#include "qemu/main-loop.h"
|
2012-12-17 21:20:00 +04:00
|
|
|
#include "qemu/module.h"
|
2018-05-03 22:50:20 +03:00
|
|
|
#include "qcow2.h"
|
2012-12-17 21:20:00 +04:00
|
|
|
#include "qemu/error-report.h"
|
2018-02-01 14:18:31 +03:00
|
|
|
#include "qapi/error.h"
|
2018-02-11 12:36:01 +03:00
|
|
|
#include "qapi/qapi-events-block-core.h"
|
2018-02-01 14:18:35 +03:00
|
|
|
#include "qapi/qmp/qdict.h"
|
|
|
|
#include "qapi/qmp/qstring.h"
|
2012-03-01 21:36:21 +04:00
|
|
|
#include "trace.h"
|
2014-06-05 13:20:59 +04:00
|
|
|
#include "qemu/option_int.h"
|
2016-03-20 20:16:19 +03:00
|
|
|
#include "qemu/cutils.h"
|
2016-03-15 19:22:36 +03:00
|
|
|
#include "qemu/bswap.h"
|
2018-01-11 18:18:08 +03:00
|
|
|
#include "qapi/qobject-input-visitor.h"
|
|
|
|
#include "qapi/qapi-visit-block-core.h"
|
2018-05-03 22:50:20 +03:00
|
|
|
#include "crypto.h"
|
2019-09-16 20:53:24 +03:00
|
|
|
#include "block/aio_task.h"
|
2006-08-06 01:14:20 +04:00
|
|
|
|
|
|
|
/*
|
|
|
|
Differences with QCOW:
|
|
|
|
|
|
|
|
- Support for multiple incremental snapshots.
|
|
|
|
- Memory management by reference counts.
|
|
|
|
- Clusters which have a reference count of one have the bit
|
|
|
|
QCOW_OFLAG_COPIED to optimize write performance.
|
2007-09-17 01:08:06 +04:00
|
|
|
- Size of compressed clusters is stored in sectors to reduce bit usage
|
2006-08-06 01:14:20 +04:00
|
|
|
in the cluster offsets.
|
|
|
|
- Support for storing additional data (such as the VM state) in the
|
2007-09-17 12:09:54 +04:00
|
|
|
snapshots.
|
2006-08-06 01:14:20 +04:00
|
|
|
- If a backing store is used, the cluster size is not constrained
|
|
|
|
(could be backported to QCOW).
|
|
|
|
- L2 tables have always a size of one cluster.
|
|
|
|
*/
|
|
|
|
|
2009-03-28 20:55:06 +03:00
|
|
|
|
|
|
|
typedef struct {
|
|
|
|
uint32_t magic;
|
|
|
|
uint32_t len;
|
2013-09-25 20:08:50 +04:00
|
|
|
} QEMU_PACKED QCowExtension;
|
2012-09-20 23:13:28 +04:00
|
|
|
|
2010-12-17 18:02:39 +03:00
|
|
|
#define QCOW2_EXT_MAGIC_END 0
|
2020-07-17 11:14:49 +03:00
|
|
|
#define QCOW2_EXT_MAGIC_BACKING_FORMAT 0xe2792aca
|
2012-04-12 17:20:27 +04:00
|
|
|
#define QCOW2_EXT_MAGIC_FEATURE_TABLE 0x6803f857
|
qcow2: add support for LUKS encryption format
This adds support for using LUKS as an encryption format
with the qcow2 file, using the new encrypt.format parameter
to request "luks" format. e.g.
# qemu-img create --object secret,data=123456,id=sec0 \
-f qcow2 -o encrypt.format=luks,encrypt.key-secret=sec0 \
test.qcow2 10G
The legacy "encryption=on" parameter still results in
creation of the old qcow2 AES format (and is equivalent
to the new 'encryption-format=aes'). e.g. the following are
equivalent:
# qemu-img create --object secret,data=123456,id=sec0 \
-f qcow2 -o encryption=on,encrypt.key-secret=sec0 \
test.qcow2 10G
# qemu-img create --object secret,data=123456,id=sec0 \
-f qcow2 -o encryption-format=aes,encrypt.key-secret=sec0 \
test.qcow2 10G
With the LUKS format it is necessary to store the LUKS
partition header and key material in the QCow2 file. This
data can be many MB in size, so cannot go into the QCow2
header region directly. Thus the spec defines a FDE
(Full Disk Encryption) header extension that specifies
the offset of a set of clusters to hold the FDE headers,
as well as the length of that region. The LUKS header is
thus stored in these extra allocated clusters before the
main image payload.
Aside from all the cryptographic differences implied by
use of the LUKS format, there is one further key difference
between the use of legacy AES and LUKS encryption in qcow2.
For LUKS, the initialiazation vectors are generated using
the host physical sector as the input, rather than the
guest virtual sector. This guarantees unique initialization
vectors for all sectors when qcow2 internal snapshots are
used, thus giving stronger protection against watermarking
attacks.
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170623162419.26068-14-berrange@redhat.com
Reviewed-by: Alberto Garcia <berto@igalia.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-06-23 19:24:12 +03:00
|
|
|
#define QCOW2_EXT_MAGIC_CRYPTO_HEADER 0x0537be77
|
2017-06-28 15:05:08 +03:00
|
|
|
#define QCOW2_EXT_MAGIC_BITMAPS 0x23852875
|
2019-01-14 18:48:25 +03:00
|
|
|
#define QCOW2_EXT_MAGIC_DATA_FILE 0x44415441
|
2009-03-28 20:55:06 +03:00
|
|
|
|
2018-11-01 21:27:37 +03:00
|
|
|
static int coroutine_fn
|
|
|
|
qcow2_co_preadv_compressed(BlockDriverState *bs,
|
2021-09-14 15:24:46 +03:00
|
|
|
uint64_t l2_entry,
|
2018-11-01 21:27:37 +03:00
|
|
|
uint64_t offset,
|
|
|
|
uint64_t bytes,
|
2019-06-04 19:15:13 +03:00
|
|
|
QEMUIOVector *qiov,
|
|
|
|
size_t qiov_offset);
|
2018-11-01 21:27:37 +03:00
|
|
|
|
2010-12-17 18:02:39 +03:00
|
|
|
static int qcow2_probe(const uint8_t *buf, int buf_size, const char *filename)
|
2006-08-06 01:14:20 +04:00
|
|
|
{
|
|
|
|
const QCowHeader *cow_header = (const void *)buf;
|
2007-09-17 12:09:54 +04:00
|
|
|
|
2006-08-06 01:14:20 +04:00
|
|
|
if (buf_size >= sizeof(QCowHeader) &&
|
|
|
|
be32_to_cpu(cow_header->magic) == QCOW_MAGIC &&
|
2011-12-15 15:20:58 +04:00
|
|
|
be32_to_cpu(cow_header->version) >= 2)
|
2006-08-06 01:14:20 +04:00
|
|
|
return 100;
|
|
|
|
else
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2009-03-28 20:55:06 +03:00
|
|
|
|
qcow2: add support for LUKS encryption format
This adds support for using LUKS as an encryption format
with the qcow2 file, using the new encrypt.format parameter
to request "luks" format. e.g.
# qemu-img create --object secret,data=123456,id=sec0 \
-f qcow2 -o encrypt.format=luks,encrypt.key-secret=sec0 \
test.qcow2 10G
The legacy "encryption=on" parameter still results in
creation of the old qcow2 AES format (and is equivalent
to the new 'encryption-format=aes'). e.g. the following are
equivalent:
# qemu-img create --object secret,data=123456,id=sec0 \
-f qcow2 -o encryption=on,encrypt.key-secret=sec0 \
test.qcow2 10G
# qemu-img create --object secret,data=123456,id=sec0 \
-f qcow2 -o encryption-format=aes,encrypt.key-secret=sec0 \
test.qcow2 10G
With the LUKS format it is necessary to store the LUKS
partition header and key material in the QCow2 file. This
data can be many MB in size, so cannot go into the QCow2
header region directly. Thus the spec defines a FDE
(Full Disk Encryption) header extension that specifies
the offset of a set of clusters to hold the FDE headers,
as well as the length of that region. The LUKS header is
thus stored in these extra allocated clusters before the
main image payload.
Aside from all the cryptographic differences implied by
use of the LUKS format, there is one further key difference
between the use of legacy AES and LUKS encryption in qcow2.
For LUKS, the initialiazation vectors are generated using
the host physical sector as the input, rather than the
guest virtual sector. This guarantees unique initialization
vectors for all sectors when qcow2 internal snapshots are
used, thus giving stronger protection against watermarking
attacks.
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170623162419.26068-14-berrange@redhat.com
Reviewed-by: Alberto Garcia <berto@igalia.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-06-23 19:24:12 +03:00
|
|
|
static ssize_t qcow2_crypto_hdr_read_func(QCryptoBlock *block, size_t offset,
|
|
|
|
uint8_t *buf, size_t buflen,
|
|
|
|
void *opaque, Error **errp)
|
|
|
|
{
|
|
|
|
BlockDriverState *bs = opaque;
|
|
|
|
BDRVQcow2State *s = bs->opaque;
|
|
|
|
ssize_t ret;
|
|
|
|
|
|
|
|
if ((offset + buflen) > s->crypto_header.length) {
|
|
|
|
error_setg(errp, "Request for data outside of extension header");
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
ret = bdrv_pread(bs->file,
|
|
|
|
s->crypto_header.offset + offset, buf, buflen);
|
|
|
|
if (ret < 0) {
|
|
|
|
error_setg_errno(errp, -ret, "Could not read encryption header");
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static ssize_t qcow2_crypto_hdr_init_func(QCryptoBlock *block, size_t headerlen,
|
|
|
|
void *opaque, Error **errp)
|
|
|
|
{
|
|
|
|
BlockDriverState *bs = opaque;
|
|
|
|
BDRVQcow2State *s = bs->opaque;
|
|
|
|
int64_t ret;
|
|
|
|
int64_t clusterlen;
|
|
|
|
|
|
|
|
ret = qcow2_alloc_clusters(bs, headerlen);
|
|
|
|
if (ret < 0) {
|
|
|
|
error_setg_errno(errp, -ret,
|
|
|
|
"Cannot allocate cluster for LUKS header size %zu",
|
|
|
|
headerlen);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
s->crypto_header.length = headerlen;
|
|
|
|
s->crypto_header.offset = ret;
|
|
|
|
|
block: always fill entire LUKS header space with zeros
When initializing the LUKS header the size with default encryption
parameters will currently be 2068480 bytes. This is rounded up to
a multiple of the cluster size, 2081792, with 64k sectors. If the
end of the header is not the same as the end of the cluster we fill
the extra space with zeros. This was forgetting that not even the
space allocated for the header will be fully initialized, as we
only write key material for the first key slot. The space left
for the other 7 slots is never written to.
An optimization to the ref count checking code:
commit a5fff8d4b4d928311a5005efa12d0991fe3b66f9 (refs/bisect/bad)
Author: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Date: Wed Feb 27 16:14:30 2019 +0300
qcow2-refcount: avoid eating RAM
made the assumption that every cluster which was allocated would
have at least some data written to it. This was violated by way
the LUKS header is only partially written, with much space simply
reserved for future use.
Depending on the cluster size this problem was masked by the
logic which wrote zeros between the end of the LUKS header and
the end of the cluster.
$ qemu-img create --object secret,id=cluster_encrypt0,data=123456 \
-f qcow2 -o cluster_size=2k,encrypt.iter-time=1,\
encrypt.format=luks,encrypt.key-secret=cluster_encrypt0 \
cluster_size_check.qcow2 100M
Formatting 'cluster_size_check.qcow2', fmt=qcow2 size=104857600
encrypt.format=luks encrypt.key-secret=cluster_encrypt0
encrypt.iter-time=1 cluster_size=2048 lazy_refcounts=off refcount_bits=16
$ qemu-img check --object secret,id=cluster_encrypt0,data=redhat \
'json:{"driver": "qcow2", "encrypt.format": "luks", \
"encrypt.key-secret": "cluster_encrypt0", \
"file.driver": "file", "file.filename": "cluster_size_check.qcow2"}'
ERROR: counting reference for region exceeding the end of the file by one cluster or more: offset 0x2000 size 0x1f9000
Leaked cluster 4 refcount=1 reference=0
...snip...
Leaked cluster 130 refcount=1 reference=0
1 errors were found on the image.
Data may be corrupted, or further writes to the image may corrupt it.
127 leaked clusters were found on the image.
This means waste of disk space, but no harm to data.
Image end offset: 268288
The problem only exists when the disk image is entirely empty. Writing
data to the disk image payload will solve the problem by causing the
end of the file to be extended further.
The change fixes it by ensuring that the entire allocated LUKS header
region is fully initialized with zeros. The qemu-img check will still
fail for any pre-existing disk images created prior to this change,
unless at least 1 byte of the payload is written to.
Fully writing zeros to the entire LUKS header is a good idea regardless
as it ensures that space has been allocated on the host filesystem (or
whatever block storage backend is used).
Signed-off-by: Daniel P. Berrangé <berrange@redhat.com>
Message-Id: <20200207135520.2669430-1-berrange@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2020-02-07 16:55:20 +03:00
|
|
|
/*
|
|
|
|
* Zero fill all space in cluster so it has predictable
|
|
|
|
* content, as we may not initialize some regions of the
|
|
|
|
* header (eg only 1 out of 8 key slots will be initialized)
|
|
|
|
*/
|
qcow2: add support for LUKS encryption format
This adds support for using LUKS as an encryption format
with the qcow2 file, using the new encrypt.format parameter
to request "luks" format. e.g.
# qemu-img create --object secret,data=123456,id=sec0 \
-f qcow2 -o encrypt.format=luks,encrypt.key-secret=sec0 \
test.qcow2 10G
The legacy "encryption=on" parameter still results in
creation of the old qcow2 AES format (and is equivalent
to the new 'encryption-format=aes'). e.g. the following are
equivalent:
# qemu-img create --object secret,data=123456,id=sec0 \
-f qcow2 -o encryption=on,encrypt.key-secret=sec0 \
test.qcow2 10G
# qemu-img create --object secret,data=123456,id=sec0 \
-f qcow2 -o encryption-format=aes,encrypt.key-secret=sec0 \
test.qcow2 10G
With the LUKS format it is necessary to store the LUKS
partition header and key material in the QCow2 file. This
data can be many MB in size, so cannot go into the QCow2
header region directly. Thus the spec defines a FDE
(Full Disk Encryption) header extension that specifies
the offset of a set of clusters to hold the FDE headers,
as well as the length of that region. The LUKS header is
thus stored in these extra allocated clusters before the
main image payload.
Aside from all the cryptographic differences implied by
use of the LUKS format, there is one further key difference
between the use of legacy AES and LUKS encryption in qcow2.
For LUKS, the initialiazation vectors are generated using
the host physical sector as the input, rather than the
guest virtual sector. This guarantees unique initialization
vectors for all sectors when qcow2 internal snapshots are
used, thus giving stronger protection against watermarking
attacks.
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170623162419.26068-14-berrange@redhat.com
Reviewed-by: Alberto Garcia <berto@igalia.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-06-23 19:24:12 +03:00
|
|
|
clusterlen = size_to_clusters(s, headerlen) * s->cluster_size;
|
2019-01-15 22:39:06 +03:00
|
|
|
assert(qcow2_pre_write_overlap_check(bs, 0, ret, clusterlen, false) == 0);
|
qcow2: add support for LUKS encryption format
This adds support for using LUKS as an encryption format
with the qcow2 file, using the new encrypt.format parameter
to request "luks" format. e.g.
# qemu-img create --object secret,data=123456,id=sec0 \
-f qcow2 -o encrypt.format=luks,encrypt.key-secret=sec0 \
test.qcow2 10G
The legacy "encryption=on" parameter still results in
creation of the old qcow2 AES format (and is equivalent
to the new 'encryption-format=aes'). e.g. the following are
equivalent:
# qemu-img create --object secret,data=123456,id=sec0 \
-f qcow2 -o encryption=on,encrypt.key-secret=sec0 \
test.qcow2 10G
# qemu-img create --object secret,data=123456,id=sec0 \
-f qcow2 -o encryption-format=aes,encrypt.key-secret=sec0 \
test.qcow2 10G
With the LUKS format it is necessary to store the LUKS
partition header and key material in the QCow2 file. This
data can be many MB in size, so cannot go into the QCow2
header region directly. Thus the spec defines a FDE
(Full Disk Encryption) header extension that specifies
the offset of a set of clusters to hold the FDE headers,
as well as the length of that region. The LUKS header is
thus stored in these extra allocated clusters before the
main image payload.
Aside from all the cryptographic differences implied by
use of the LUKS format, there is one further key difference
between the use of legacy AES and LUKS encryption in qcow2.
For LUKS, the initialiazation vectors are generated using
the host physical sector as the input, rather than the
guest virtual sector. This guarantees unique initialization
vectors for all sectors when qcow2 internal snapshots are
used, thus giving stronger protection against watermarking
attacks.
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170623162419.26068-14-berrange@redhat.com
Reviewed-by: Alberto Garcia <berto@igalia.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-06-23 19:24:12 +03:00
|
|
|
ret = bdrv_pwrite_zeroes(bs->file,
|
block: always fill entire LUKS header space with zeros
When initializing the LUKS header the size with default encryption
parameters will currently be 2068480 bytes. This is rounded up to
a multiple of the cluster size, 2081792, with 64k sectors. If the
end of the header is not the same as the end of the cluster we fill
the extra space with zeros. This was forgetting that not even the
space allocated for the header will be fully initialized, as we
only write key material for the first key slot. The space left
for the other 7 slots is never written to.
An optimization to the ref count checking code:
commit a5fff8d4b4d928311a5005efa12d0991fe3b66f9 (refs/bisect/bad)
Author: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Date: Wed Feb 27 16:14:30 2019 +0300
qcow2-refcount: avoid eating RAM
made the assumption that every cluster which was allocated would
have at least some data written to it. This was violated by way
the LUKS header is only partially written, with much space simply
reserved for future use.
Depending on the cluster size this problem was masked by the
logic which wrote zeros between the end of the LUKS header and
the end of the cluster.
$ qemu-img create --object secret,id=cluster_encrypt0,data=123456 \
-f qcow2 -o cluster_size=2k,encrypt.iter-time=1,\
encrypt.format=luks,encrypt.key-secret=cluster_encrypt0 \
cluster_size_check.qcow2 100M
Formatting 'cluster_size_check.qcow2', fmt=qcow2 size=104857600
encrypt.format=luks encrypt.key-secret=cluster_encrypt0
encrypt.iter-time=1 cluster_size=2048 lazy_refcounts=off refcount_bits=16
$ qemu-img check --object secret,id=cluster_encrypt0,data=redhat \
'json:{"driver": "qcow2", "encrypt.format": "luks", \
"encrypt.key-secret": "cluster_encrypt0", \
"file.driver": "file", "file.filename": "cluster_size_check.qcow2"}'
ERROR: counting reference for region exceeding the end of the file by one cluster or more: offset 0x2000 size 0x1f9000
Leaked cluster 4 refcount=1 reference=0
...snip...
Leaked cluster 130 refcount=1 reference=0
1 errors were found on the image.
Data may be corrupted, or further writes to the image may corrupt it.
127 leaked clusters were found on the image.
This means waste of disk space, but no harm to data.
Image end offset: 268288
The problem only exists when the disk image is entirely empty. Writing
data to the disk image payload will solve the problem by causing the
end of the file to be extended further.
The change fixes it by ensuring that the entire allocated LUKS header
region is fully initialized with zeros. The qemu-img check will still
fail for any pre-existing disk images created prior to this change,
unless at least 1 byte of the payload is written to.
Fully writing zeros to the entire LUKS header is a good idea regardless
as it ensures that space has been allocated on the host filesystem (or
whatever block storage backend is used).
Signed-off-by: Daniel P. Berrangé <berrange@redhat.com>
Message-Id: <20200207135520.2669430-1-berrange@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2020-02-07 16:55:20 +03:00
|
|
|
ret,
|
|
|
|
clusterlen, 0);
|
qcow2: add support for LUKS encryption format
This adds support for using LUKS as an encryption format
with the qcow2 file, using the new encrypt.format parameter
to request "luks" format. e.g.
# qemu-img create --object secret,data=123456,id=sec0 \
-f qcow2 -o encrypt.format=luks,encrypt.key-secret=sec0 \
test.qcow2 10G
The legacy "encryption=on" parameter still results in
creation of the old qcow2 AES format (and is equivalent
to the new 'encryption-format=aes'). e.g. the following are
equivalent:
# qemu-img create --object secret,data=123456,id=sec0 \
-f qcow2 -o encryption=on,encrypt.key-secret=sec0 \
test.qcow2 10G
# qemu-img create --object secret,data=123456,id=sec0 \
-f qcow2 -o encryption-format=aes,encrypt.key-secret=sec0 \
test.qcow2 10G
With the LUKS format it is necessary to store the LUKS
partition header and key material in the QCow2 file. This
data can be many MB in size, so cannot go into the QCow2
header region directly. Thus the spec defines a FDE
(Full Disk Encryption) header extension that specifies
the offset of a set of clusters to hold the FDE headers,
as well as the length of that region. The LUKS header is
thus stored in these extra allocated clusters before the
main image payload.
Aside from all the cryptographic differences implied by
use of the LUKS format, there is one further key difference
between the use of legacy AES and LUKS encryption in qcow2.
For LUKS, the initialiazation vectors are generated using
the host physical sector as the input, rather than the
guest virtual sector. This guarantees unique initialization
vectors for all sectors when qcow2 internal snapshots are
used, thus giving stronger protection against watermarking
attacks.
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170623162419.26068-14-berrange@redhat.com
Reviewed-by: Alberto Garcia <berto@igalia.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-06-23 19:24:12 +03:00
|
|
|
if (ret < 0) {
|
|
|
|
error_setg_errno(errp, -ret, "Could not zero fill encryption header");
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static ssize_t qcow2_crypto_hdr_write_func(QCryptoBlock *block, size_t offset,
|
|
|
|
const uint8_t *buf, size_t buflen,
|
|
|
|
void *opaque, Error **errp)
|
|
|
|
{
|
|
|
|
BlockDriverState *bs = opaque;
|
|
|
|
BDRVQcow2State *s = bs->opaque;
|
|
|
|
ssize_t ret;
|
|
|
|
|
|
|
|
if ((offset + buflen) > s->crypto_header.length) {
|
|
|
|
error_setg(errp, "Request for data outside of extension header");
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
ret = bdrv_pwrite(bs->file,
|
|
|
|
s->crypto_header.offset + offset, buf, buflen);
|
|
|
|
if (ret < 0) {
|
|
|
|
error_setg_errno(errp, -ret, "Could not read encryption header");
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2020-06-25 15:55:43 +03:00
|
|
|
static QDict*
|
|
|
|
qcow2_extract_crypto_opts(QemuOpts *opts, const char *fmt, Error **errp)
|
|
|
|
{
|
|
|
|
QDict *cryptoopts_qdict;
|
|
|
|
QDict *opts_qdict;
|
|
|
|
|
|
|
|
/* Extract "encrypt." options into a qdict */
|
|
|
|
opts_qdict = qemu_opts_to_qdict(opts, NULL);
|
|
|
|
qdict_extract_subqdict(opts_qdict, &cryptoopts_qdict, "encrypt.");
|
|
|
|
qobject_unref(opts_qdict);
|
|
|
|
qdict_put_str(cryptoopts_qdict, "format", fmt);
|
|
|
|
return cryptoopts_qdict;
|
|
|
|
}
|
qcow2: add support for LUKS encryption format
This adds support for using LUKS as an encryption format
with the qcow2 file, using the new encrypt.format parameter
to request "luks" format. e.g.
# qemu-img create --object secret,data=123456,id=sec0 \
-f qcow2 -o encrypt.format=luks,encrypt.key-secret=sec0 \
test.qcow2 10G
The legacy "encryption=on" parameter still results in
creation of the old qcow2 AES format (and is equivalent
to the new 'encryption-format=aes'). e.g. the following are
equivalent:
# qemu-img create --object secret,data=123456,id=sec0 \
-f qcow2 -o encryption=on,encrypt.key-secret=sec0 \
test.qcow2 10G
# qemu-img create --object secret,data=123456,id=sec0 \
-f qcow2 -o encryption-format=aes,encrypt.key-secret=sec0 \
test.qcow2 10G
With the LUKS format it is necessary to store the LUKS
partition header and key material in the QCow2 file. This
data can be many MB in size, so cannot go into the QCow2
header region directly. Thus the spec defines a FDE
(Full Disk Encryption) header extension that specifies
the offset of a set of clusters to hold the FDE headers,
as well as the length of that region. The LUKS header is
thus stored in these extra allocated clusters before the
main image payload.
Aside from all the cryptographic differences implied by
use of the LUKS format, there is one further key difference
between the use of legacy AES and LUKS encryption in qcow2.
For LUKS, the initialiazation vectors are generated using
the host physical sector as the input, rather than the
guest virtual sector. This guarantees unique initialization
vectors for all sectors when qcow2 internal snapshots are
used, thus giving stronger protection against watermarking
attacks.
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170623162419.26068-14-berrange@redhat.com
Reviewed-by: Alberto Garcia <berto@igalia.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-06-23 19:24:12 +03:00
|
|
|
|
2020-03-24 20:42:30 +03:00
|
|
|
/*
|
2009-03-28 20:55:06 +03:00
|
|
|
* read qcow2 extension and fill bs
|
|
|
|
* start reading from start_offset
|
|
|
|
* finish reading upon magic of value 0 or when end_offset reached
|
|
|
|
* unknown magic is skipped (future extension this version knows nothing about)
|
|
|
|
* return 0 upon success, non-0 otherwise
|
|
|
|
*/
|
2010-12-17 18:02:39 +03:00
|
|
|
static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset,
|
2013-09-05 11:40:43 +04:00
|
|
|
uint64_t end_offset, void **p_feature_table,
|
2017-06-28 15:05:08 +03:00
|
|
|
int flags, bool *need_update_header,
|
|
|
|
Error **errp)
|
2009-03-28 20:55:06 +03:00
|
|
|
{
|
2015-09-07 18:12:56 +03:00
|
|
|
BDRVQcow2State *s = bs->opaque;
|
2009-03-28 20:55:06 +03:00
|
|
|
QCowExtension ext;
|
|
|
|
uint64_t offset;
|
2012-02-02 17:52:08 +04:00
|
|
|
int ret;
|
2017-06-28 15:05:08 +03:00
|
|
|
Qcow2BitmapHeaderExt bitmaps_ext;
|
|
|
|
|
|
|
|
if (need_update_header != NULL) {
|
|
|
|
*need_update_header = false;
|
|
|
|
}
|
2009-03-28 20:55:06 +03:00
|
|
|
|
|
|
|
#ifdef DEBUG_EXT
|
2010-12-17 18:02:39 +03:00
|
|
|
printf("qcow2_read_extensions: start=%ld end=%ld\n", start_offset, end_offset);
|
2009-03-28 20:55:06 +03:00
|
|
|
#endif
|
|
|
|
offset = start_offset;
|
|
|
|
while (offset < end_offset) {
|
|
|
|
|
|
|
|
#ifdef DEBUG_EXT
|
|
|
|
/* Sanity check */
|
|
|
|
if (offset > s->cluster_size)
|
2010-12-17 18:02:39 +03:00
|
|
|
printf("qcow2_read_extension: suspicious offset %lu\n", offset);
|
2009-03-28 20:55:06 +03:00
|
|
|
|
2011-11-22 14:06:25 +04:00
|
|
|
printf("attempting to read extended header in offset %lu\n", offset);
|
2009-03-28 20:55:06 +03:00
|
|
|
#endif
|
|
|
|
|
2016-06-20 19:24:02 +03:00
|
|
|
ret = bdrv_pread(bs->file, offset, &ext, sizeof(ext));
|
2013-09-05 11:40:43 +04:00
|
|
|
if (ret < 0) {
|
|
|
|
error_setg_errno(errp, -ret, "qcow2_read_extension: ERROR: "
|
|
|
|
"pread fail from offset %" PRIu64, offset);
|
2009-03-28 20:55:06 +03:00
|
|
|
return 1;
|
|
|
|
}
|
2018-10-09 20:24:59 +03:00
|
|
|
ext.magic = be32_to_cpu(ext.magic);
|
|
|
|
ext.len = be32_to_cpu(ext.len);
|
2009-03-28 20:55:06 +03:00
|
|
|
offset += sizeof(ext);
|
|
|
|
#ifdef DEBUG_EXT
|
|
|
|
printf("ext.magic = 0x%x\n", ext.magic);
|
|
|
|
#endif
|
2014-11-25 20:12:40 +03:00
|
|
|
if (offset > end_offset || ext.len > end_offset - offset) {
|
2013-09-05 11:40:43 +04:00
|
|
|
error_setg(errp, "Header extension too large");
|
2012-02-22 15:37:13 +04:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2009-03-28 20:55:06 +03:00
|
|
|
switch (ext.magic) {
|
2010-12-17 18:02:39 +03:00
|
|
|
case QCOW2_EXT_MAGIC_END:
|
2009-03-28 20:55:06 +03:00
|
|
|
return 0;
|
2009-03-28 20:55:14 +03:00
|
|
|
|
2010-12-17 18:02:39 +03:00
|
|
|
case QCOW2_EXT_MAGIC_BACKING_FORMAT:
|
2009-03-28 20:55:14 +03:00
|
|
|
if (ext.len >= sizeof(bs->backing_format)) {
|
2014-04-29 21:03:12 +04:00
|
|
|
error_setg(errp, "ERROR: ext_backing_format: len=%" PRIu32
|
|
|
|
" too large (>=%zu)", ext.len,
|
|
|
|
sizeof(bs->backing_format));
|
2009-03-28 20:55:14 +03:00
|
|
|
return 2;
|
|
|
|
}
|
2016-06-20 19:24:02 +03:00
|
|
|
ret = bdrv_pread(bs->file, offset, bs->backing_format, ext.len);
|
2013-09-05 11:40:43 +04:00
|
|
|
if (ret < 0) {
|
|
|
|
error_setg_errno(errp, -ret, "ERROR: ext_backing_format: "
|
|
|
|
"Could not read format name");
|
2009-03-28 20:55:14 +03:00
|
|
|
return 3;
|
2013-09-05 11:40:43 +04:00
|
|
|
}
|
2009-03-28 20:55:14 +03:00
|
|
|
bs->backing_format[ext.len] = '\0';
|
2015-04-07 16:03:16 +03:00
|
|
|
s->image_backing_format = g_strdup(bs->backing_format);
|
2009-03-28 20:55:14 +03:00
|
|
|
#ifdef DEBUG_EXT
|
|
|
|
printf("Qcow2: Got format extension %s\n", bs->backing_format);
|
|
|
|
#endif
|
|
|
|
break;
|
|
|
|
|
2012-04-12 17:20:27 +04:00
|
|
|
case QCOW2_EXT_MAGIC_FEATURE_TABLE:
|
|
|
|
if (p_feature_table != NULL) {
|
2020-10-30 06:35:12 +03:00
|
|
|
void *feature_table = g_malloc0(ext.len + 2 * sizeof(Qcow2Feature));
|
2016-06-20 19:24:02 +03:00
|
|
|
ret = bdrv_pread(bs->file, offset , feature_table, ext.len);
|
2012-04-12 17:20:27 +04:00
|
|
|
if (ret < 0) {
|
2013-09-05 11:40:43 +04:00
|
|
|
error_setg_errno(errp, -ret, "ERROR: ext_feature_table: "
|
|
|
|
"Could not read table");
|
2012-04-12 17:20:27 +04:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
*p_feature_table = feature_table;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
qcow2: add support for LUKS encryption format
This adds support for using LUKS as an encryption format
with the qcow2 file, using the new encrypt.format parameter
to request "luks" format. e.g.
# qemu-img create --object secret,data=123456,id=sec0 \
-f qcow2 -o encrypt.format=luks,encrypt.key-secret=sec0 \
test.qcow2 10G
The legacy "encryption=on" parameter still results in
creation of the old qcow2 AES format (and is equivalent
to the new 'encryption-format=aes'). e.g. the following are
equivalent:
# qemu-img create --object secret,data=123456,id=sec0 \
-f qcow2 -o encryption=on,encrypt.key-secret=sec0 \
test.qcow2 10G
# qemu-img create --object secret,data=123456,id=sec0 \
-f qcow2 -o encryption-format=aes,encrypt.key-secret=sec0 \
test.qcow2 10G
With the LUKS format it is necessary to store the LUKS
partition header and key material in the QCow2 file. This
data can be many MB in size, so cannot go into the QCow2
header region directly. Thus the spec defines a FDE
(Full Disk Encryption) header extension that specifies
the offset of a set of clusters to hold the FDE headers,
as well as the length of that region. The LUKS header is
thus stored in these extra allocated clusters before the
main image payload.
Aside from all the cryptographic differences implied by
use of the LUKS format, there is one further key difference
between the use of legacy AES and LUKS encryption in qcow2.
For LUKS, the initialiazation vectors are generated using
the host physical sector as the input, rather than the
guest virtual sector. This guarantees unique initialization
vectors for all sectors when qcow2 internal snapshots are
used, thus giving stronger protection against watermarking
attacks.
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170623162419.26068-14-berrange@redhat.com
Reviewed-by: Alberto Garcia <berto@igalia.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-06-23 19:24:12 +03:00
|
|
|
case QCOW2_EXT_MAGIC_CRYPTO_HEADER: {
|
|
|
|
unsigned int cflags = 0;
|
|
|
|
if (s->crypt_method_header != QCOW_CRYPT_LUKS) {
|
|
|
|
error_setg(errp, "CRYPTO header extension only "
|
|
|
|
"expected with LUKS encryption method");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
if (ext.len != sizeof(Qcow2CryptoHeaderExtension)) {
|
|
|
|
error_setg(errp, "CRYPTO header extension size %u, "
|
|
|
|
"but expected size %zu", ext.len,
|
|
|
|
sizeof(Qcow2CryptoHeaderExtension));
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
ret = bdrv_pread(bs->file, offset, &s->crypto_header, ext.len);
|
|
|
|
if (ret < 0) {
|
|
|
|
error_setg_errno(errp, -ret,
|
|
|
|
"Unable to read CRYPTO header extension");
|
|
|
|
return ret;
|
|
|
|
}
|
2018-10-09 20:24:59 +03:00
|
|
|
s->crypto_header.offset = be64_to_cpu(s->crypto_header.offset);
|
|
|
|
s->crypto_header.length = be64_to_cpu(s->crypto_header.length);
|
qcow2: add support for LUKS encryption format
This adds support for using LUKS as an encryption format
with the qcow2 file, using the new encrypt.format parameter
to request "luks" format. e.g.
# qemu-img create --object secret,data=123456,id=sec0 \
-f qcow2 -o encrypt.format=luks,encrypt.key-secret=sec0 \
test.qcow2 10G
The legacy "encryption=on" parameter still results in
creation of the old qcow2 AES format (and is equivalent
to the new 'encryption-format=aes'). e.g. the following are
equivalent:
# qemu-img create --object secret,data=123456,id=sec0 \
-f qcow2 -o encryption=on,encrypt.key-secret=sec0 \
test.qcow2 10G
# qemu-img create --object secret,data=123456,id=sec0 \
-f qcow2 -o encryption-format=aes,encrypt.key-secret=sec0 \
test.qcow2 10G
With the LUKS format it is necessary to store the LUKS
partition header and key material in the QCow2 file. This
data can be many MB in size, so cannot go into the QCow2
header region directly. Thus the spec defines a FDE
(Full Disk Encryption) header extension that specifies
the offset of a set of clusters to hold the FDE headers,
as well as the length of that region. The LUKS header is
thus stored in these extra allocated clusters before the
main image payload.
Aside from all the cryptographic differences implied by
use of the LUKS format, there is one further key difference
between the use of legacy AES and LUKS encryption in qcow2.
For LUKS, the initialiazation vectors are generated using
the host physical sector as the input, rather than the
guest virtual sector. This guarantees unique initialization
vectors for all sectors when qcow2 internal snapshots are
used, thus giving stronger protection against watermarking
attacks.
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170623162419.26068-14-berrange@redhat.com
Reviewed-by: Alberto Garcia <berto@igalia.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-06-23 19:24:12 +03:00
|
|
|
|
|
|
|
if ((s->crypto_header.offset % s->cluster_size) != 0) {
|
|
|
|
error_setg(errp, "Encryption header offset '%" PRIu64 "' is "
|
|
|
|
"not a multiple of cluster size '%u'",
|
|
|
|
s->crypto_header.offset, s->cluster_size);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (flags & BDRV_O_NO_IO) {
|
|
|
|
cflags |= QCRYPTO_BLOCK_OPEN_NO_IO;
|
|
|
|
}
|
2017-06-23 19:24:17 +03:00
|
|
|
s->crypto = qcrypto_block_open(s->crypto_opts, "encrypt.",
|
qcow2: add support for LUKS encryption format
This adds support for using LUKS as an encryption format
with the qcow2 file, using the new encrypt.format parameter
to request "luks" format. e.g.
# qemu-img create --object secret,data=123456,id=sec0 \
-f qcow2 -o encrypt.format=luks,encrypt.key-secret=sec0 \
test.qcow2 10G
The legacy "encryption=on" parameter still results in
creation of the old qcow2 AES format (and is equivalent
to the new 'encryption-format=aes'). e.g. the following are
equivalent:
# qemu-img create --object secret,data=123456,id=sec0 \
-f qcow2 -o encryption=on,encrypt.key-secret=sec0 \
test.qcow2 10G
# qemu-img create --object secret,data=123456,id=sec0 \
-f qcow2 -o encryption-format=aes,encrypt.key-secret=sec0 \
test.qcow2 10G
With the LUKS format it is necessary to store the LUKS
partition header and key material in the QCow2 file. This
data can be many MB in size, so cannot go into the QCow2
header region directly. Thus the spec defines a FDE
(Full Disk Encryption) header extension that specifies
the offset of a set of clusters to hold the FDE headers,
as well as the length of that region. The LUKS header is
thus stored in these extra allocated clusters before the
main image payload.
Aside from all the cryptographic differences implied by
use of the LUKS format, there is one further key difference
between the use of legacy AES and LUKS encryption in qcow2.
For LUKS, the initialiazation vectors are generated using
the host physical sector as the input, rather than the
guest virtual sector. This guarantees unique initialization
vectors for all sectors when qcow2 internal snapshots are
used, thus giving stronger protection against watermarking
attacks.
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170623162419.26068-14-berrange@redhat.com
Reviewed-by: Alberto Garcia <berto@igalia.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-06-23 19:24:12 +03:00
|
|
|
qcow2_crypto_hdr_read_func,
|
2019-05-06 17:27:41 +03:00
|
|
|
bs, cflags, QCOW2_MAX_THREADS, errp);
|
qcow2: add support for LUKS encryption format
This adds support for using LUKS as an encryption format
with the qcow2 file, using the new encrypt.format parameter
to request "luks" format. e.g.
# qemu-img create --object secret,data=123456,id=sec0 \
-f qcow2 -o encrypt.format=luks,encrypt.key-secret=sec0 \
test.qcow2 10G
The legacy "encryption=on" parameter still results in
creation of the old qcow2 AES format (and is equivalent
to the new 'encryption-format=aes'). e.g. the following are
equivalent:
# qemu-img create --object secret,data=123456,id=sec0 \
-f qcow2 -o encryption=on,encrypt.key-secret=sec0 \
test.qcow2 10G
# qemu-img create --object secret,data=123456,id=sec0 \
-f qcow2 -o encryption-format=aes,encrypt.key-secret=sec0 \
test.qcow2 10G
With the LUKS format it is necessary to store the LUKS
partition header and key material in the QCow2 file. This
data can be many MB in size, so cannot go into the QCow2
header region directly. Thus the spec defines a FDE
(Full Disk Encryption) header extension that specifies
the offset of a set of clusters to hold the FDE headers,
as well as the length of that region. The LUKS header is
thus stored in these extra allocated clusters before the
main image payload.
Aside from all the cryptographic differences implied by
use of the LUKS format, there is one further key difference
between the use of legacy AES and LUKS encryption in qcow2.
For LUKS, the initialiazation vectors are generated using
the host physical sector as the input, rather than the
guest virtual sector. This guarantees unique initialization
vectors for all sectors when qcow2 internal snapshots are
used, thus giving stronger protection against watermarking
attacks.
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170623162419.26068-14-berrange@redhat.com
Reviewed-by: Alberto Garcia <berto@igalia.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-06-23 19:24:12 +03:00
|
|
|
if (!s->crypto) {
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
} break;
|
|
|
|
|
2017-06-28 15:05:08 +03:00
|
|
|
case QCOW2_EXT_MAGIC_BITMAPS:
|
|
|
|
if (ext.len != sizeof(bitmaps_ext)) {
|
|
|
|
error_setg_errno(errp, -ret, "bitmaps_ext: "
|
|
|
|
"Invalid extension length");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!(s->autoclear_features & QCOW2_AUTOCLEAR_BITMAPS)) {
|
2017-11-23 05:08:17 +03:00
|
|
|
if (s->qcow_version < 3) {
|
|
|
|
/* Let's be a bit more specific */
|
|
|
|
warn_report("This qcow2 v2 image contains bitmaps, but "
|
|
|
|
"they may have been modified by a program "
|
|
|
|
"without persistent bitmap support; so now "
|
|
|
|
"they must all be considered inconsistent");
|
|
|
|
} else {
|
|
|
|
warn_report("a program lacking bitmap support "
|
|
|
|
"modified this file, so all bitmaps are now "
|
|
|
|
"considered inconsistent");
|
|
|
|
}
|
2017-09-11 22:52:46 +03:00
|
|
|
error_printf("Some clusters may be leaked, "
|
|
|
|
"run 'qemu-img check -r' on the image "
|
2017-06-28 15:05:08 +03:00
|
|
|
"file to fix.");
|
|
|
|
if (need_update_header != NULL) {
|
|
|
|
/* Updating is needed to drop invalid bitmap extension. */
|
|
|
|
*need_update_header = true;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
ret = bdrv_pread(bs->file, offset, &bitmaps_ext, ext.len);
|
|
|
|
if (ret < 0) {
|
|
|
|
error_setg_errno(errp, -ret, "bitmaps_ext: "
|
|
|
|
"Could not read ext header");
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (bitmaps_ext.reserved32 != 0) {
|
|
|
|
error_setg_errno(errp, -ret, "bitmaps_ext: "
|
|
|
|
"Reserved field is not zero");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2018-10-09 20:24:59 +03:00
|
|
|
bitmaps_ext.nb_bitmaps = be32_to_cpu(bitmaps_ext.nb_bitmaps);
|
|
|
|
bitmaps_ext.bitmap_directory_size =
|
|
|
|
be64_to_cpu(bitmaps_ext.bitmap_directory_size);
|
|
|
|
bitmaps_ext.bitmap_directory_offset =
|
|
|
|
be64_to_cpu(bitmaps_ext.bitmap_directory_offset);
|
2017-06-28 15:05:08 +03:00
|
|
|
|
|
|
|
if (bitmaps_ext.nb_bitmaps > QCOW2_MAX_BITMAPS) {
|
|
|
|
error_setg(errp,
|
|
|
|
"bitmaps_ext: Image has %" PRIu32 " bitmaps, "
|
|
|
|
"exceeding the QEMU supported maximum of %d",
|
|
|
|
bitmaps_ext.nb_bitmaps, QCOW2_MAX_BITMAPS);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (bitmaps_ext.nb_bitmaps == 0) {
|
|
|
|
error_setg(errp, "found bitmaps extension with zero bitmaps");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2019-12-12 13:01:21 +03:00
|
|
|
if (offset_into_cluster(s, bitmaps_ext.bitmap_directory_offset)) {
|
2017-06-28 15:05:08 +03:00
|
|
|
error_setg(errp, "bitmaps_ext: "
|
|
|
|
"invalid bitmap directory offset");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (bitmaps_ext.bitmap_directory_size >
|
|
|
|
QCOW2_MAX_BITMAP_DIRECTORY_SIZE) {
|
|
|
|
error_setg(errp, "bitmaps_ext: "
|
|
|
|
"bitmap directory size (%" PRIu64 ") exceeds "
|
|
|
|
"the maximum supported size (%d)",
|
|
|
|
bitmaps_ext.bitmap_directory_size,
|
|
|
|
QCOW2_MAX_BITMAP_DIRECTORY_SIZE);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
s->nb_bitmaps = bitmaps_ext.nb_bitmaps;
|
|
|
|
s->bitmap_directory_offset =
|
|
|
|
bitmaps_ext.bitmap_directory_offset;
|
|
|
|
s->bitmap_directory_size =
|
|
|
|
bitmaps_ext.bitmap_directory_size;
|
|
|
|
|
|
|
|
#ifdef DEBUG_EXT
|
|
|
|
printf("Qcow2: Got bitmaps extension: "
|
|
|
|
"offset=%" PRIu64 " nb_bitmaps=%" PRIu32 "\n",
|
|
|
|
s->bitmap_directory_offset, s->nb_bitmaps);
|
|
|
|
#endif
|
|
|
|
break;
|
|
|
|
|
2019-01-15 21:02:40 +03:00
|
|
|
case QCOW2_EXT_MAGIC_DATA_FILE:
|
|
|
|
{
|
|
|
|
s->image_data_file = g_malloc0(ext.len + 1);
|
|
|
|
ret = bdrv_pread(bs->file, offset, s->image_data_file, ext.len);
|
|
|
|
if (ret < 0) {
|
|
|
|
error_setg_errno(errp, -ret,
|
|
|
|
"ERROR: Could not read data file name");
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
#ifdef DEBUG_EXT
|
|
|
|
printf("Qcow2: Got external data file %s\n", s->image_data_file);
|
|
|
|
#endif
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2009-03-28 20:55:06 +03:00
|
|
|
default:
|
2012-02-02 17:52:08 +04:00
|
|
|
/* unknown magic - save it in case we need to rewrite the header */
|
2017-11-17 19:47:47 +03:00
|
|
|
/* If you add a new feature, make sure to also update the fast
|
|
|
|
* path of qcow2_make_empty() to deal with it. */
|
2012-02-02 17:52:08 +04:00
|
|
|
{
|
|
|
|
Qcow2UnknownHeaderExtension *uext;
|
|
|
|
|
|
|
|
uext = g_malloc0(sizeof(*uext) + ext.len);
|
|
|
|
uext->magic = ext.magic;
|
|
|
|
uext->len = ext.len;
|
|
|
|
QLIST_INSERT_HEAD(&s->unknown_header_ext, uext, next);
|
|
|
|
|
2016-06-20 19:24:02 +03:00
|
|
|
ret = bdrv_pread(bs->file, offset , uext->data, uext->len);
|
2012-02-02 17:52:08 +04:00
|
|
|
if (ret < 0) {
|
2013-09-05 11:40:43 +04:00
|
|
|
error_setg_errno(errp, -ret, "ERROR: unknown extension: "
|
|
|
|
"Could not read data");
|
2012-02-02 17:52:08 +04:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
}
|
2009-03-28 20:55:06 +03:00
|
|
|
break;
|
|
|
|
}
|
2012-02-22 15:31:47 +04:00
|
|
|
|
|
|
|
offset += ((ext.len + 7) & ~7);
|
2009-03-28 20:55:06 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2012-02-02 17:52:08 +04:00
|
|
|
static void cleanup_unknown_header_ext(BlockDriverState *bs)
|
|
|
|
{
|
2015-09-07 18:12:56 +03:00
|
|
|
BDRVQcow2State *s = bs->opaque;
|
2012-02-02 17:52:08 +04:00
|
|
|
Qcow2UnknownHeaderExtension *uext, *next;
|
|
|
|
|
|
|
|
QLIST_FOREACH_SAFE(uext, &s->unknown_header_ext, next, next) {
|
|
|
|
QLIST_REMOVE(uext, next);
|
|
|
|
g_free(uext);
|
|
|
|
}
|
|
|
|
}
|
2009-03-28 20:55:06 +03:00
|
|
|
|
2016-03-16 21:54:33 +03:00
|
|
|
static void report_unsupported_feature(Error **errp, Qcow2Feature *table,
|
|
|
|
uint64_t mask)
|
2012-04-12 17:20:27 +04:00
|
|
|
{
|
2020-01-15 16:56:26 +03:00
|
|
|
g_autoptr(GString) features = g_string_sized_new(60);
|
2014-07-17 13:41:53 +04:00
|
|
|
|
2012-04-12 17:20:27 +04:00
|
|
|
while (table && table->name[0] != '\0') {
|
|
|
|
if (table->type == QCOW2_FEAT_TYPE_INCOMPATIBLE) {
|
2014-07-17 13:41:53 +04:00
|
|
|
if (mask & (1ULL << table->bit)) {
|
2020-01-15 16:56:26 +03:00
|
|
|
if (features->len > 0) {
|
|
|
|
g_string_append(features, ", ");
|
|
|
|
}
|
|
|
|
g_string_append_printf(features, "%.46s", table->name);
|
2014-07-17 13:41:53 +04:00
|
|
|
mask &= ~(1ULL << table->bit);
|
2012-04-12 17:20:27 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
table++;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (mask) {
|
2020-01-15 16:56:26 +03:00
|
|
|
if (features->len > 0) {
|
|
|
|
g_string_append(features, ", ");
|
|
|
|
}
|
|
|
|
g_string_append_printf(features,
|
|
|
|
"Unknown incompatible feature: %" PRIx64, mask);
|
2012-04-12 17:20:27 +04:00
|
|
|
}
|
2014-07-17 13:41:53 +04:00
|
|
|
|
2020-01-15 16:56:26 +03:00
|
|
|
error_setg(errp, "Unsupported qcow2 feature(s): %s", features->str);
|
2012-04-12 17:20:27 +04:00
|
|
|
}
|
|
|
|
|
2012-07-27 12:05:22 +04:00
|
|
|
/*
|
|
|
|
* Sets the dirty bit and flushes afterwards if necessary.
|
|
|
|
*
|
|
|
|
* The incompatible_features bit is only set if the image file header was
|
|
|
|
* updated successfully. Therefore it is not required to check the return
|
|
|
|
* value of this function.
|
|
|
|
*/
|
2012-12-07 21:08:47 +04:00
|
|
|
int qcow2_mark_dirty(BlockDriverState *bs)
|
2012-07-27 12:05:22 +04:00
|
|
|
{
|
2015-09-07 18:12:56 +03:00
|
|
|
BDRVQcow2State *s = bs->opaque;
|
2012-07-27 12:05:22 +04:00
|
|
|
uint64_t val;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
assert(s->qcow_version >= 3);
|
|
|
|
|
|
|
|
if (s->incompatible_features & QCOW2_INCOMPAT_DIRTY) {
|
|
|
|
return 0; /* already dirty */
|
|
|
|
}
|
|
|
|
|
|
|
|
val = cpu_to_be64(s->incompatible_features | QCOW2_INCOMPAT_DIRTY);
|
2016-06-20 21:09:15 +03:00
|
|
|
ret = bdrv_pwrite(bs->file, offsetof(QCowHeader, incompatible_features),
|
2012-07-27 12:05:22 +04:00
|
|
|
&val, sizeof(val));
|
|
|
|
if (ret < 0) {
|
|
|
|
return ret;
|
|
|
|
}
|
2015-06-16 15:19:22 +03:00
|
|
|
ret = bdrv_flush(bs->file->bs);
|
2012-07-27 12:05:22 +04:00
|
|
|
if (ret < 0) {
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Only treat image as dirty if the header was updated successfully */
|
|
|
|
s->incompatible_features |= QCOW2_INCOMPAT_DIRTY;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2012-07-27 12:05:19 +04:00
|
|
|
/*
|
|
|
|
* Clears the dirty bit and flushes before if necessary. Only call this
|
|
|
|
* function when there are no pending requests, it does not guard against
|
|
|
|
* concurrent requests dirtying the image.
|
|
|
|
*/
|
|
|
|
static int qcow2_mark_clean(BlockDriverState *bs)
|
|
|
|
{
|
2015-09-07 18:12:56 +03:00
|
|
|
BDRVQcow2State *s = bs->opaque;
|
2012-07-27 12:05:19 +04:00
|
|
|
|
|
|
|
if (s->incompatible_features & QCOW2_INCOMPAT_DIRTY) {
|
2014-04-03 15:47:50 +04:00
|
|
|
int ret;
|
|
|
|
|
|
|
|
s->incompatible_features &= ~QCOW2_INCOMPAT_DIRTY;
|
|
|
|
|
2018-03-01 19:36:14 +03:00
|
|
|
ret = qcow2_flush_caches(bs);
|
2012-07-27 12:05:19 +04:00
|
|
|
if (ret < 0) {
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
return qcow2_update_header(bs);
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2013-08-30 16:34:24 +04:00
|
|
|
/*
|
|
|
|
* Marks the image as corrupt.
|
|
|
|
*/
|
|
|
|
int qcow2_mark_corrupt(BlockDriverState *bs)
|
|
|
|
{
|
2015-09-07 18:12:56 +03:00
|
|
|
BDRVQcow2State *s = bs->opaque;
|
2013-08-30 16:34:24 +04:00
|
|
|
|
|
|
|
s->incompatible_features |= QCOW2_INCOMPAT_CORRUPT;
|
|
|
|
return qcow2_update_header(bs);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Marks the image as consistent, i.e., unsets the corrupt bit, and flushes
|
|
|
|
* before if necessary.
|
|
|
|
*/
|
|
|
|
int qcow2_mark_consistent(BlockDriverState *bs)
|
|
|
|
{
|
2015-09-07 18:12:56 +03:00
|
|
|
BDRVQcow2State *s = bs->opaque;
|
2013-08-30 16:34:24 +04:00
|
|
|
|
|
|
|
if (s->incompatible_features & QCOW2_INCOMPAT_CORRUPT) {
|
2018-03-01 19:36:14 +03:00
|
|
|
int ret = qcow2_flush_caches(bs);
|
2013-08-30 16:34:24 +04:00
|
|
|
if (ret < 0) {
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
s->incompatible_features &= ~QCOW2_INCOMPAT_CORRUPT;
|
|
|
|
return qcow2_update_header(bs);
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2019-10-11 18:28:06 +03:00
|
|
|
static void qcow2_add_check_result(BdrvCheckResult *out,
|
|
|
|
const BdrvCheckResult *src,
|
|
|
|
bool set_allocation_info)
|
|
|
|
{
|
|
|
|
out->corruptions += src->corruptions;
|
|
|
|
out->leaks += src->leaks;
|
|
|
|
out->check_errors += src->check_errors;
|
|
|
|
out->corruptions_fixed += src->corruptions_fixed;
|
|
|
|
out->leaks_fixed += src->leaks_fixed;
|
|
|
|
|
|
|
|
if (set_allocation_info) {
|
|
|
|
out->image_end_offset = src->image_end_offset;
|
|
|
|
out->bfi = src->bfi;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-03-01 19:36:19 +03:00
|
|
|
static int coroutine_fn qcow2_co_check_locked(BlockDriverState *bs,
|
|
|
|
BdrvCheckResult *result,
|
|
|
|
BdrvCheckMode fix)
|
2012-08-09 16:05:55 +04:00
|
|
|
{
|
2019-10-11 18:28:06 +03:00
|
|
|
BdrvCheckResult snapshot_res = {};
|
|
|
|
BdrvCheckResult refcount_res = {};
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
memset(result, 0, sizeof(*result));
|
|
|
|
|
|
|
|
ret = qcow2_check_read_snapshot_table(bs, &snapshot_res, fix);
|
|
|
|
if (ret < 0) {
|
2019-10-11 18:28:07 +03:00
|
|
|
qcow2_add_check_result(result, &snapshot_res, false);
|
2019-10-11 18:28:06 +03:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
ret = qcow2_check_refcounts(bs, &refcount_res, fix);
|
|
|
|
qcow2_add_check_result(result, &refcount_res, true);
|
2019-10-11 18:28:07 +03:00
|
|
|
if (ret < 0) {
|
|
|
|
qcow2_add_check_result(result, &snapshot_res, false);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
ret = qcow2_check_fix_snapshot_table(bs, &snapshot_res, fix);
|
|
|
|
qcow2_add_check_result(result, &snapshot_res, false);
|
2012-08-09 16:05:55 +04:00
|
|
|
if (ret < 0) {
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (fix && result->check_errors == 0 && result->corruptions == 0) {
|
2013-08-30 16:34:30 +04:00
|
|
|
ret = qcow2_mark_clean(bs);
|
|
|
|
if (ret < 0) {
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
return qcow2_mark_consistent(bs);
|
2012-08-09 16:05:55 +04:00
|
|
|
}
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2018-03-01 19:36:19 +03:00
|
|
|
static int coroutine_fn qcow2_co_check(BlockDriverState *bs,
|
|
|
|
BdrvCheckResult *result,
|
|
|
|
BdrvCheckMode fix)
|
|
|
|
{
|
|
|
|
BDRVQcow2State *s = bs->opaque;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
qemu_co_mutex_lock(&s->lock);
|
|
|
|
ret = qcow2_co_check_locked(bs, result, fix);
|
|
|
|
qemu_co_mutex_unlock(&s->lock);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2018-03-06 19:14:06 +03:00
|
|
|
int qcow2_validate_table(BlockDriverState *bs, uint64_t offset,
|
|
|
|
uint64_t entries, size_t entry_len,
|
|
|
|
int64_t max_size_bytes, const char *table_name,
|
|
|
|
Error **errp)
|
2014-03-26 16:05:44 +04:00
|
|
|
{
|
2015-09-07 18:12:56 +03:00
|
|
|
BDRVQcow2State *s = bs->opaque;
|
2014-03-26 16:05:44 +04:00
|
|
|
|
2018-03-06 19:14:06 +03:00
|
|
|
if (entries > max_size_bytes / entry_len) {
|
|
|
|
error_setg(errp, "%s too large", table_name);
|
|
|
|
return -EFBIG;
|
2014-03-26 16:05:44 +04:00
|
|
|
}
|
|
|
|
|
2018-03-06 19:14:06 +03:00
|
|
|
/* Use signed INT64_MAX as the maximum even for uint64_t header fields,
|
|
|
|
* because values will be passed to qemu functions taking int64_t. */
|
|
|
|
if ((INT64_MAX - entries * entry_len < offset) ||
|
|
|
|
(offset_into_cluster(s, offset) != 0)) {
|
|
|
|
error_setg(errp, "%s offset invalid", table_name);
|
2014-03-26 16:05:44 +04:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2019-03-12 19:48:48 +03:00
|
|
|
static const char *const mutable_opts[] = {
|
|
|
|
QCOW2_OPT_LAZY_REFCOUNTS,
|
|
|
|
QCOW2_OPT_DISCARD_REQUEST,
|
|
|
|
QCOW2_OPT_DISCARD_SNAPSHOT,
|
|
|
|
QCOW2_OPT_DISCARD_OTHER,
|
|
|
|
QCOW2_OPT_OVERLAP,
|
|
|
|
QCOW2_OPT_OVERLAP_TEMPLATE,
|
|
|
|
QCOW2_OPT_OVERLAP_MAIN_HEADER,
|
|
|
|
QCOW2_OPT_OVERLAP_ACTIVE_L1,
|
|
|
|
QCOW2_OPT_OVERLAP_ACTIVE_L2,
|
|
|
|
QCOW2_OPT_OVERLAP_REFCOUNT_TABLE,
|
|
|
|
QCOW2_OPT_OVERLAP_REFCOUNT_BLOCK,
|
|
|
|
QCOW2_OPT_OVERLAP_SNAPSHOT_TABLE,
|
|
|
|
QCOW2_OPT_OVERLAP_INACTIVE_L1,
|
|
|
|
QCOW2_OPT_OVERLAP_INACTIVE_L2,
|
|
|
|
QCOW2_OPT_OVERLAP_BITMAP_DIRECTORY,
|
|
|
|
QCOW2_OPT_CACHE_SIZE,
|
|
|
|
QCOW2_OPT_L2_CACHE_SIZE,
|
|
|
|
QCOW2_OPT_L2_CACHE_ENTRY_SIZE,
|
|
|
|
QCOW2_OPT_REFCOUNT_CACHE_SIZE,
|
|
|
|
QCOW2_OPT_CACHE_CLEAN_INTERVAL,
|
|
|
|
NULL
|
|
|
|
};
|
|
|
|
|
2013-03-15 13:35:08 +04:00
|
|
|
static QemuOptsList qcow2_runtime_opts = {
|
|
|
|
.name = "qcow2",
|
|
|
|
.head = QTAILQ_HEAD_INITIALIZER(qcow2_runtime_opts.head),
|
|
|
|
.desc = {
|
|
|
|
{
|
2013-07-17 16:45:34 +04:00
|
|
|
.name = QCOW2_OPT_LAZY_REFCOUNTS,
|
2013-03-15 13:35:08 +04:00
|
|
|
.type = QEMU_OPT_BOOL,
|
|
|
|
.help = "Postpone refcount updates",
|
|
|
|
},
|
2013-06-19 15:44:19 +04:00
|
|
|
{
|
|
|
|
.name = QCOW2_OPT_DISCARD_REQUEST,
|
|
|
|
.type = QEMU_OPT_BOOL,
|
|
|
|
.help = "Pass guest discard requests to the layer below",
|
|
|
|
},
|
|
|
|
{
|
|
|
|
.name = QCOW2_OPT_DISCARD_SNAPSHOT,
|
|
|
|
.type = QEMU_OPT_BOOL,
|
|
|
|
.help = "Generate discard requests when snapshot related space "
|
|
|
|
"is freed",
|
|
|
|
},
|
|
|
|
{
|
|
|
|
.name = QCOW2_OPT_DISCARD_OTHER,
|
|
|
|
.type = QEMU_OPT_BOOL,
|
|
|
|
.help = "Generate discard requests when other clusters are freed",
|
|
|
|
},
|
2013-10-10 13:09:25 +04:00
|
|
|
{
|
|
|
|
.name = QCOW2_OPT_OVERLAP,
|
|
|
|
.type = QEMU_OPT_STRING,
|
|
|
|
.help = "Selects which overlap checks to perform from a range of "
|
|
|
|
"templates (none, constant, cached, all)",
|
|
|
|
},
|
2014-08-20 21:59:35 +04:00
|
|
|
{
|
|
|
|
.name = QCOW2_OPT_OVERLAP_TEMPLATE,
|
|
|
|
.type = QEMU_OPT_STRING,
|
|
|
|
.help = "Selects which overlap checks to perform from a range of "
|
|
|
|
"templates (none, constant, cached, all)",
|
|
|
|
},
|
2013-10-10 13:09:25 +04:00
|
|
|
{
|
|
|
|
.name = QCOW2_OPT_OVERLAP_MAIN_HEADER,
|
|
|
|
.type = QEMU_OPT_BOOL,
|
|
|
|
.help = "Check for unintended writes into the main qcow2 header",
|
|
|
|
},
|
|
|
|
{
|
|
|
|
.name = QCOW2_OPT_OVERLAP_ACTIVE_L1,
|
|
|
|
.type = QEMU_OPT_BOOL,
|
|
|
|
.help = "Check for unintended writes into the active L1 table",
|
|
|
|
},
|
|
|
|
{
|
|
|
|
.name = QCOW2_OPT_OVERLAP_ACTIVE_L2,
|
|
|
|
.type = QEMU_OPT_BOOL,
|
|
|
|
.help = "Check for unintended writes into an active L2 table",
|
|
|
|
},
|
|
|
|
{
|
|
|
|
.name = QCOW2_OPT_OVERLAP_REFCOUNT_TABLE,
|
|
|
|
.type = QEMU_OPT_BOOL,
|
|
|
|
.help = "Check for unintended writes into the refcount table",
|
|
|
|
},
|
|
|
|
{
|
|
|
|
.name = QCOW2_OPT_OVERLAP_REFCOUNT_BLOCK,
|
|
|
|
.type = QEMU_OPT_BOOL,
|
|
|
|
.help = "Check for unintended writes into a refcount block",
|
|
|
|
},
|
|
|
|
{
|
|
|
|
.name = QCOW2_OPT_OVERLAP_SNAPSHOT_TABLE,
|
|
|
|
.type = QEMU_OPT_BOOL,
|
|
|
|
.help = "Check for unintended writes into the snapshot table",
|
|
|
|
},
|
|
|
|
{
|
|
|
|
.name = QCOW2_OPT_OVERLAP_INACTIVE_L1,
|
|
|
|
.type = QEMU_OPT_BOOL,
|
|
|
|
.help = "Check for unintended writes into an inactive L1 table",
|
|
|
|
},
|
|
|
|
{
|
|
|
|
.name = QCOW2_OPT_OVERLAP_INACTIVE_L2,
|
|
|
|
.type = QEMU_OPT_BOOL,
|
|
|
|
.help = "Check for unintended writes into an inactive L2 table",
|
|
|
|
},
|
2018-07-05 18:15:15 +03:00
|
|
|
{
|
|
|
|
.name = QCOW2_OPT_OVERLAP_BITMAP_DIRECTORY,
|
|
|
|
.type = QEMU_OPT_BOOL,
|
|
|
|
.help = "Check for unintended writes into the bitmap directory",
|
|
|
|
},
|
2014-08-19 00:07:33 +04:00
|
|
|
{
|
|
|
|
.name = QCOW2_OPT_CACHE_SIZE,
|
|
|
|
.type = QEMU_OPT_SIZE,
|
|
|
|
.help = "Maximum combined metadata (L2 tables and refcount blocks) "
|
|
|
|
"cache size",
|
|
|
|
},
|
|
|
|
{
|
|
|
|
.name = QCOW2_OPT_L2_CACHE_SIZE,
|
|
|
|
.type = QEMU_OPT_SIZE,
|
|
|
|
.help = "Maximum L2 table cache size",
|
|
|
|
},
|
2018-02-05 17:33:36 +03:00
|
|
|
{
|
|
|
|
.name = QCOW2_OPT_L2_CACHE_ENTRY_SIZE,
|
|
|
|
.type = QEMU_OPT_SIZE,
|
|
|
|
.help = "Size of each entry in the L2 cache",
|
|
|
|
},
|
2014-08-19 00:07:33 +04:00
|
|
|
{
|
|
|
|
.name = QCOW2_OPT_REFCOUNT_CACHE_SIZE,
|
|
|
|
.type = QEMU_OPT_SIZE,
|
|
|
|
.help = "Maximum refcount block cache size",
|
|
|
|
},
|
2015-08-04 15:14:40 +03:00
|
|
|
{
|
|
|
|
.name = QCOW2_OPT_CACHE_CLEAN_INTERVAL,
|
|
|
|
.type = QEMU_OPT_NUMBER,
|
|
|
|
.help = "Clean unused cache entries after this time (in seconds)",
|
|
|
|
},
|
qcow2: add support for LUKS encryption format
This adds support for using LUKS as an encryption format
with the qcow2 file, using the new encrypt.format parameter
to request "luks" format. e.g.
# qemu-img create --object secret,data=123456,id=sec0 \
-f qcow2 -o encrypt.format=luks,encrypt.key-secret=sec0 \
test.qcow2 10G
The legacy "encryption=on" parameter still results in
creation of the old qcow2 AES format (and is equivalent
to the new 'encryption-format=aes'). e.g. the following are
equivalent:
# qemu-img create --object secret,data=123456,id=sec0 \
-f qcow2 -o encryption=on,encrypt.key-secret=sec0 \
test.qcow2 10G
# qemu-img create --object secret,data=123456,id=sec0 \
-f qcow2 -o encryption-format=aes,encrypt.key-secret=sec0 \
test.qcow2 10G
With the LUKS format it is necessary to store the LUKS
partition header and key material in the QCow2 file. This
data can be many MB in size, so cannot go into the QCow2
header region directly. Thus the spec defines a FDE
(Full Disk Encryption) header extension that specifies
the offset of a set of clusters to hold the FDE headers,
as well as the length of that region. The LUKS header is
thus stored in these extra allocated clusters before the
main image payload.
Aside from all the cryptographic differences implied by
use of the LUKS format, there is one further key difference
between the use of legacy AES and LUKS encryption in qcow2.
For LUKS, the initialiazation vectors are generated using
the host physical sector as the input, rather than the
guest virtual sector. This guarantees unique initialization
vectors for all sectors when qcow2 internal snapshots are
used, thus giving stronger protection against watermarking
attacks.
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170623162419.26068-14-berrange@redhat.com
Reviewed-by: Alberto Garcia <berto@igalia.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-06-23 19:24:12 +03:00
|
|
|
BLOCK_CRYPTO_OPT_DEF_KEY_SECRET("encrypt.",
|
|
|
|
"ID of secret providing qcow2 AES key or LUKS passphrase"),
|
2013-03-15 13:35:08 +04:00
|
|
|
{ /* end of list */ }
|
|
|
|
},
|
|
|
|
};
|
|
|
|
|
2013-10-10 13:09:26 +04:00
|
|
|
static const char *overlap_bool_option_names[QCOW2_OL_MAX_BITNR] = {
|
2018-07-05 18:15:15 +03:00
|
|
|
[QCOW2_OL_MAIN_HEADER_BITNR] = QCOW2_OPT_OVERLAP_MAIN_HEADER,
|
|
|
|
[QCOW2_OL_ACTIVE_L1_BITNR] = QCOW2_OPT_OVERLAP_ACTIVE_L1,
|
|
|
|
[QCOW2_OL_ACTIVE_L2_BITNR] = QCOW2_OPT_OVERLAP_ACTIVE_L2,
|
|
|
|
[QCOW2_OL_REFCOUNT_TABLE_BITNR] = QCOW2_OPT_OVERLAP_REFCOUNT_TABLE,
|
|
|
|
[QCOW2_OL_REFCOUNT_BLOCK_BITNR] = QCOW2_OPT_OVERLAP_REFCOUNT_BLOCK,
|
|
|
|
[QCOW2_OL_SNAPSHOT_TABLE_BITNR] = QCOW2_OPT_OVERLAP_SNAPSHOT_TABLE,
|
|
|
|
[QCOW2_OL_INACTIVE_L1_BITNR] = QCOW2_OPT_OVERLAP_INACTIVE_L1,
|
|
|
|
[QCOW2_OL_INACTIVE_L2_BITNR] = QCOW2_OPT_OVERLAP_INACTIVE_L2,
|
|
|
|
[QCOW2_OL_BITMAP_DIRECTORY_BITNR] = QCOW2_OPT_OVERLAP_BITMAP_DIRECTORY,
|
2013-10-10 13:09:26 +04:00
|
|
|
};
|
|
|
|
|
2015-08-04 15:14:40 +03:00
|
|
|
static void cache_clean_timer_cb(void *opaque)
|
|
|
|
{
|
|
|
|
BlockDriverState *bs = opaque;
|
2015-09-07 18:12:56 +03:00
|
|
|
BDRVQcow2State *s = bs->opaque;
|
2018-02-05 17:33:09 +03:00
|
|
|
qcow2_cache_clean_unused(s->l2_table_cache);
|
|
|
|
qcow2_cache_clean_unused(s->refcount_block_cache);
|
2015-08-04 15:14:40 +03:00
|
|
|
timer_mod(s->cache_clean_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) +
|
|
|
|
(int64_t) s->cache_clean_interval * 1000);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void cache_clean_timer_init(BlockDriverState *bs, AioContext *context)
|
|
|
|
{
|
2015-09-07 18:12:56 +03:00
|
|
|
BDRVQcow2State *s = bs->opaque;
|
2015-08-04 15:14:40 +03:00
|
|
|
if (s->cache_clean_interval > 0) {
|
2021-03-29 11:06:03 +03:00
|
|
|
s->cache_clean_timer =
|
|
|
|
aio_timer_new_with_attrs(context, QEMU_CLOCK_VIRTUAL,
|
|
|
|
SCALE_MS, QEMU_TIMER_ATTR_EXTERNAL,
|
|
|
|
cache_clean_timer_cb, bs);
|
2015-08-04 15:14:40 +03:00
|
|
|
timer_mod(s->cache_clean_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) +
|
|
|
|
(int64_t) s->cache_clean_interval * 1000);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void cache_clean_timer_del(BlockDriverState *bs)
|
|
|
|
{
|
2015-09-07 18:12:56 +03:00
|
|
|
BDRVQcow2State *s = bs->opaque;
|
2015-08-04 15:14:40 +03:00
|
|
|
if (s->cache_clean_timer) {
|
|
|
|
timer_free(s->cache_clean_timer);
|
|
|
|
s->cache_clean_timer = NULL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void qcow2_detach_aio_context(BlockDriverState *bs)
|
|
|
|
{
|
|
|
|
cache_clean_timer_del(bs);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void qcow2_attach_aio_context(BlockDriverState *bs,
|
|
|
|
AioContext *new_context)
|
|
|
|
{
|
|
|
|
cache_clean_timer_init(bs, new_context);
|
|
|
|
}
|
|
|
|
|
2021-02-02 15:49:53 +03:00
|
|
|
static bool read_cache_sizes(BlockDriverState *bs, QemuOpts *opts,
|
2015-06-01 19:09:19 +03:00
|
|
|
uint64_t *l2_cache_size,
|
2018-02-05 17:33:36 +03:00
|
|
|
uint64_t *l2_cache_entry_size,
|
2014-08-19 00:07:33 +04:00
|
|
|
uint64_t *refcount_cache_size, Error **errp)
|
|
|
|
{
|
2015-09-07 18:12:56 +03:00
|
|
|
BDRVQcow2State *s = bs->opaque;
|
2018-09-26 19:04:43 +03:00
|
|
|
uint64_t combined_cache_size, l2_cache_max_setting;
|
2014-08-19 00:07:33 +04:00
|
|
|
bool l2_cache_size_set, refcount_cache_size_set, combined_cache_size_set;
|
2019-02-13 19:48:53 +03:00
|
|
|
bool l2_cache_entry_size_set;
|
2018-05-28 18:01:28 +03:00
|
|
|
int min_refcount_cache = MIN_REFCOUNT_CACHE_SIZE * s->cluster_size;
|
2018-09-26 19:04:43 +03:00
|
|
|
uint64_t virtual_disk_size = bs->total_sectors * BDRV_SECTOR_SIZE;
|
2019-08-16 15:17:42 +03:00
|
|
|
uint64_t max_l2_entries = DIV_ROUND_UP(virtual_disk_size, s->cluster_size);
|
|
|
|
/* An L2 table is always one cluster in size so the max cache size
|
|
|
|
* should be a multiple of the cluster size. */
|
2020-07-10 19:12:54 +03:00
|
|
|
uint64_t max_l2_cache = ROUND_UP(max_l2_entries * l2_entry_size(s),
|
2019-08-16 15:17:42 +03:00
|
|
|
s->cluster_size);
|
2014-08-19 00:07:33 +04:00
|
|
|
|
|
|
|
combined_cache_size_set = qemu_opt_get(opts, QCOW2_OPT_CACHE_SIZE);
|
|
|
|
l2_cache_size_set = qemu_opt_get(opts, QCOW2_OPT_L2_CACHE_SIZE);
|
|
|
|
refcount_cache_size_set = qemu_opt_get(opts, QCOW2_OPT_REFCOUNT_CACHE_SIZE);
|
2019-02-13 19:48:53 +03:00
|
|
|
l2_cache_entry_size_set = qemu_opt_get(opts, QCOW2_OPT_L2_CACHE_ENTRY_SIZE);
|
2014-08-19 00:07:33 +04:00
|
|
|
|
|
|
|
combined_cache_size = qemu_opt_get_size(opts, QCOW2_OPT_CACHE_SIZE, 0);
|
2018-09-26 19:04:43 +03:00
|
|
|
l2_cache_max_setting = qemu_opt_get_size(opts, QCOW2_OPT_L2_CACHE_SIZE,
|
|
|
|
DEFAULT_L2_CACHE_MAX_SIZE);
|
2014-08-19 00:07:33 +04:00
|
|
|
*refcount_cache_size = qemu_opt_get_size(opts,
|
|
|
|
QCOW2_OPT_REFCOUNT_CACHE_SIZE, 0);
|
|
|
|
|
2018-02-05 17:33:36 +03:00
|
|
|
*l2_cache_entry_size = qemu_opt_get_size(
|
|
|
|
opts, QCOW2_OPT_L2_CACHE_ENTRY_SIZE, s->cluster_size);
|
|
|
|
|
2018-09-26 19:04:43 +03:00
|
|
|
*l2_cache_size = MIN(max_l2_cache, l2_cache_max_setting);
|
|
|
|
|
2014-08-19 00:07:33 +04:00
|
|
|
if (combined_cache_size_set) {
|
|
|
|
if (l2_cache_size_set && refcount_cache_size_set) {
|
|
|
|
error_setg(errp, QCOW2_OPT_CACHE_SIZE ", " QCOW2_OPT_L2_CACHE_SIZE
|
|
|
|
" and " QCOW2_OPT_REFCOUNT_CACHE_SIZE " may not be set "
|
2018-07-25 17:27:55 +03:00
|
|
|
"at the same time");
|
2021-02-02 15:49:53 +03:00
|
|
|
return false;
|
2018-09-26 19:04:43 +03:00
|
|
|
} else if (l2_cache_size_set &&
|
|
|
|
(l2_cache_max_setting > combined_cache_size)) {
|
2014-08-19 00:07:33 +04:00
|
|
|
error_setg(errp, QCOW2_OPT_L2_CACHE_SIZE " may not exceed "
|
|
|
|
QCOW2_OPT_CACHE_SIZE);
|
2021-02-02 15:49:53 +03:00
|
|
|
return false;
|
2014-08-19 00:07:33 +04:00
|
|
|
} else if (*refcount_cache_size > combined_cache_size) {
|
|
|
|
error_setg(errp, QCOW2_OPT_REFCOUNT_CACHE_SIZE " may not exceed "
|
|
|
|
QCOW2_OPT_CACHE_SIZE);
|
2021-02-02 15:49:53 +03:00
|
|
|
return false;
|
2014-08-19 00:07:33 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
if (l2_cache_size_set) {
|
|
|
|
*refcount_cache_size = combined_cache_size - *l2_cache_size;
|
|
|
|
} else if (refcount_cache_size_set) {
|
|
|
|
*l2_cache_size = combined_cache_size - *refcount_cache_size;
|
|
|
|
} else {
|
2018-04-17 15:37:04 +03:00
|
|
|
/* Assign as much memory as possible to the L2 cache, and
|
|
|
|
* use the remainder for the refcount cache */
|
|
|
|
if (combined_cache_size >= max_l2_cache + min_refcount_cache) {
|
|
|
|
*l2_cache_size = max_l2_cache;
|
|
|
|
*refcount_cache_size = combined_cache_size - *l2_cache_size;
|
|
|
|
} else {
|
|
|
|
*refcount_cache_size =
|
|
|
|
MIN(combined_cache_size, min_refcount_cache);
|
|
|
|
*l2_cache_size = combined_cache_size - *refcount_cache_size;
|
|
|
|
}
|
2014-08-19 00:07:33 +04:00
|
|
|
}
|
|
|
|
}
|
2019-02-13 19:48:53 +03:00
|
|
|
|
|
|
|
/*
|
|
|
|
* If the L2 cache is not enough to cover the whole disk then
|
|
|
|
* default to 4KB entries. Smaller entries reduce the cost of
|
|
|
|
* loads and evictions and increase I/O performance.
|
|
|
|
*/
|
|
|
|
if (*l2_cache_size < max_l2_cache && !l2_cache_entry_size_set) {
|
|
|
|
*l2_cache_entry_size = MIN(s->cluster_size, 4096);
|
|
|
|
}
|
|
|
|
|
2018-09-26 19:04:42 +03:00
|
|
|
/* l2_cache_size and refcount_cache_size are ensured to have at least
|
|
|
|
* their minimum values in qcow2_update_options_prepare() */
|
2018-02-05 17:33:36 +03:00
|
|
|
|
|
|
|
if (*l2_cache_entry_size < (1 << MIN_CLUSTER_BITS) ||
|
|
|
|
*l2_cache_entry_size > s->cluster_size ||
|
|
|
|
!is_power_of_2(*l2_cache_entry_size)) {
|
|
|
|
error_setg(errp, "L2 cache entry size must be a power of two "
|
|
|
|
"between %d and the cluster size (%d)",
|
|
|
|
1 << MIN_CLUSTER_BITS, s->cluster_size);
|
2021-02-02 15:49:53 +03:00
|
|
|
return false;
|
2018-02-05 17:33:36 +03:00
|
|
|
}
|
2021-02-02 15:49:53 +03:00
|
|
|
|
|
|
|
return true;
|
2014-08-19 00:07:33 +04:00
|
|
|
}
|
|
|
|
|
2015-04-16 17:16:02 +03:00
|
|
|
typedef struct Qcow2ReopenState {
|
|
|
|
Qcow2Cache *l2_table_cache;
|
|
|
|
Qcow2Cache *refcount_block_cache;
|
2018-02-05 17:33:13 +03:00
|
|
|
int l2_slice_size; /* Number of entries in a slice of the L2 table */
|
2015-04-16 17:16:02 +03:00
|
|
|
bool use_lazy_refcounts;
|
|
|
|
int overlap_check;
|
|
|
|
bool discard_passthrough[QCOW2_DISCARD_MAX];
|
|
|
|
uint64_t cache_clean_interval;
|
2017-06-23 19:24:10 +03:00
|
|
|
QCryptoBlockOpenOptions *crypto_opts; /* Disk encryption runtime options */
|
2015-04-16 17:16:02 +03:00
|
|
|
} Qcow2ReopenState;
|
|
|
|
|
|
|
|
static int qcow2_update_options_prepare(BlockDriverState *bs,
|
|
|
|
Qcow2ReopenState *r,
|
|
|
|
QDict *options, int flags,
|
|
|
|
Error **errp)
|
2015-04-16 12:29:27 +03:00
|
|
|
{
|
|
|
|
BDRVQcow2State *s = bs->opaque;
|
2015-04-16 12:44:26 +03:00
|
|
|
QemuOpts *opts = NULL;
|
2015-04-16 12:29:27 +03:00
|
|
|
const char *opt_overlap_check, *opt_overlap_check_template;
|
|
|
|
int overlap_check_template = 0;
|
2018-02-05 17:33:36 +03:00
|
|
|
uint64_t l2_cache_size, l2_cache_entry_size, refcount_cache_size;
|
2015-04-16 12:29:27 +03:00
|
|
|
int i;
|
2017-06-23 19:24:10 +03:00
|
|
|
const char *encryptfmt;
|
|
|
|
QDict *encryptopts = NULL;
|
2015-04-16 12:29:27 +03:00
|
|
|
int ret;
|
|
|
|
|
2017-06-23 19:24:10 +03:00
|
|
|
qdict_extract_subqdict(options, &encryptopts, "encrypt.");
|
|
|
|
encryptfmt = qdict_get_try_str(encryptopts, "format");
|
|
|
|
|
2015-04-16 12:44:26 +03:00
|
|
|
opts = qemu_opts_create(&qcow2_runtime_opts, NULL, 0, &error_abort);
|
2020-07-07 19:06:03 +03:00
|
|
|
if (!qemu_opts_absorb_qdict(opts, options, errp)) {
|
2015-04-16 12:44:26 +03:00
|
|
|
ret = -EINVAL;
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* get L2 table/refcount block cache size from command line options */
|
2021-02-02 15:49:53 +03:00
|
|
|
if (!read_cache_sizes(bs, opts, &l2_cache_size, &l2_cache_entry_size,
|
|
|
|
&refcount_cache_size, errp)) {
|
2015-04-16 12:44:26 +03:00
|
|
|
ret = -EINVAL;
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
|
2018-02-05 17:33:36 +03:00
|
|
|
l2_cache_size /= l2_cache_entry_size;
|
2015-04-16 12:44:26 +03:00
|
|
|
if (l2_cache_size < MIN_L2_CACHE_SIZE) {
|
|
|
|
l2_cache_size = MIN_L2_CACHE_SIZE;
|
|
|
|
}
|
|
|
|
if (l2_cache_size > INT_MAX) {
|
|
|
|
error_setg(errp, "L2 cache size too big");
|
|
|
|
ret = -EINVAL;
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
|
|
|
|
refcount_cache_size /= s->cluster_size;
|
|
|
|
if (refcount_cache_size < MIN_REFCOUNT_CACHE_SIZE) {
|
|
|
|
refcount_cache_size = MIN_REFCOUNT_CACHE_SIZE;
|
|
|
|
}
|
|
|
|
if (refcount_cache_size > INT_MAX) {
|
|
|
|
error_setg(errp, "Refcount cache size too big");
|
|
|
|
ret = -EINVAL;
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
|
2015-04-16 14:42:27 +03:00
|
|
|
/* alloc new L2 table/refcount block cache, flush old one */
|
|
|
|
if (s->l2_table_cache) {
|
|
|
|
ret = qcow2_cache_flush(bs, s->l2_table_cache);
|
|
|
|
if (ret) {
|
|
|
|
error_setg_errno(errp, -ret, "Failed to flush the L2 table cache");
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (s->refcount_block_cache) {
|
|
|
|
ret = qcow2_cache_flush(bs, s->refcount_block_cache);
|
|
|
|
if (ret) {
|
|
|
|
error_setg_errno(errp, -ret,
|
|
|
|
"Failed to flush the refcount block cache");
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-07-10 19:12:54 +03:00
|
|
|
r->l2_slice_size = l2_cache_entry_size / l2_entry_size(s);
|
2018-02-05 17:33:36 +03:00
|
|
|
r->l2_table_cache = qcow2_cache_create(bs, l2_cache_size,
|
|
|
|
l2_cache_entry_size);
|
|
|
|
r->refcount_block_cache = qcow2_cache_create(bs, refcount_cache_size,
|
|
|
|
s->cluster_size);
|
2015-04-16 17:16:02 +03:00
|
|
|
if (r->l2_table_cache == NULL || r->refcount_block_cache == NULL) {
|
2015-04-16 12:44:26 +03:00
|
|
|
error_setg(errp, "Could not allocate metadata caches");
|
|
|
|
ret = -ENOMEM;
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* New interval for cache cleanup timer */
|
2015-04-16 17:16:02 +03:00
|
|
|
r->cache_clean_interval =
|
2015-04-16 14:42:27 +03:00
|
|
|
qemu_opt_get_number(opts, QCOW2_OPT_CACHE_CLEAN_INTERVAL,
|
2018-09-26 19:04:46 +03:00
|
|
|
DEFAULT_CACHE_CLEAN_INTERVAL);
|
2016-11-25 14:27:44 +03:00
|
|
|
#ifndef CONFIG_LINUX
|
|
|
|
if (r->cache_clean_interval != 0) {
|
|
|
|
error_setg(errp, QCOW2_OPT_CACHE_CLEAN_INTERVAL
|
|
|
|
" not supported on this host");
|
|
|
|
ret = -EINVAL;
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
#endif
|
2015-04-16 17:16:02 +03:00
|
|
|
if (r->cache_clean_interval > UINT_MAX) {
|
2015-04-16 12:44:26 +03:00
|
|
|
error_setg(errp, "Cache clean interval too big");
|
|
|
|
ret = -EINVAL;
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
|
2015-04-16 14:42:27 +03:00
|
|
|
/* lazy-refcounts; flush if going from enabled to disabled */
|
2015-04-16 17:16:02 +03:00
|
|
|
r->use_lazy_refcounts = qemu_opt_get_bool(opts, QCOW2_OPT_LAZY_REFCOUNTS,
|
2015-04-16 12:29:27 +03:00
|
|
|
(s->compatible_features & QCOW2_COMPAT_LAZY_REFCOUNTS));
|
2015-04-16 17:16:02 +03:00
|
|
|
if (r->use_lazy_refcounts && s->qcow_version < 3) {
|
2015-04-16 14:11:39 +03:00
|
|
|
error_setg(errp, "Lazy refcounts require a qcow2 image with at least "
|
|
|
|
"qemu 1.1 compatibility level");
|
|
|
|
ret = -EINVAL;
|
|
|
|
goto fail;
|
|
|
|
}
|
2015-04-16 12:29:27 +03:00
|
|
|
|
2015-04-16 14:42:27 +03:00
|
|
|
if (s->use_lazy_refcounts && !r->use_lazy_refcounts) {
|
|
|
|
ret = qcow2_mark_clean(bs);
|
|
|
|
if (ret < 0) {
|
|
|
|
error_setg_errno(errp, -ret, "Failed to disable lazy refcounts");
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-04-16 14:11:39 +03:00
|
|
|
/* Overlap check options */
|
2015-04-16 12:29:27 +03:00
|
|
|
opt_overlap_check = qemu_opt_get(opts, QCOW2_OPT_OVERLAP);
|
|
|
|
opt_overlap_check_template = qemu_opt_get(opts, QCOW2_OPT_OVERLAP_TEMPLATE);
|
|
|
|
if (opt_overlap_check_template && opt_overlap_check &&
|
|
|
|
strcmp(opt_overlap_check_template, opt_overlap_check))
|
|
|
|
{
|
|
|
|
error_setg(errp, "Conflicting values for qcow2 options '"
|
|
|
|
QCOW2_OPT_OVERLAP "' ('%s') and '" QCOW2_OPT_OVERLAP_TEMPLATE
|
|
|
|
"' ('%s')", opt_overlap_check, opt_overlap_check_template);
|
|
|
|
ret = -EINVAL;
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
if (!opt_overlap_check) {
|
|
|
|
opt_overlap_check = opt_overlap_check_template ?: "cached";
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!strcmp(opt_overlap_check, "none")) {
|
|
|
|
overlap_check_template = 0;
|
|
|
|
} else if (!strcmp(opt_overlap_check, "constant")) {
|
|
|
|
overlap_check_template = QCOW2_OL_CONSTANT;
|
|
|
|
} else if (!strcmp(opt_overlap_check, "cached")) {
|
|
|
|
overlap_check_template = QCOW2_OL_CACHED;
|
|
|
|
} else if (!strcmp(opt_overlap_check, "all")) {
|
|
|
|
overlap_check_template = QCOW2_OL_ALL;
|
|
|
|
} else {
|
|
|
|
error_setg(errp, "Unsupported value '%s' for qcow2 option "
|
|
|
|
"'overlap-check'. Allowed are any of the following: "
|
|
|
|
"none, constant, cached, all", opt_overlap_check);
|
|
|
|
ret = -EINVAL;
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
|
2015-04-16 17:16:02 +03:00
|
|
|
r->overlap_check = 0;
|
2015-04-16 12:29:27 +03:00
|
|
|
for (i = 0; i < QCOW2_OL_MAX_BITNR; i++) {
|
|
|
|
/* overlap-check defines a template bitmask, but every flag may be
|
|
|
|
* overwritten through the associated boolean option */
|
2015-04-16 17:16:02 +03:00
|
|
|
r->overlap_check |=
|
2015-04-16 12:29:27 +03:00
|
|
|
qemu_opt_get_bool(opts, overlap_bool_option_names[i],
|
|
|
|
overlap_check_template & (1 << i)) << i;
|
|
|
|
}
|
|
|
|
|
2015-04-16 17:16:02 +03:00
|
|
|
r->discard_passthrough[QCOW2_DISCARD_NEVER] = false;
|
|
|
|
r->discard_passthrough[QCOW2_DISCARD_ALWAYS] = true;
|
|
|
|
r->discard_passthrough[QCOW2_DISCARD_REQUEST] =
|
2015-04-16 14:11:39 +03:00
|
|
|
qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_REQUEST,
|
|
|
|
flags & BDRV_O_UNMAP);
|
2015-04-16 17:16:02 +03:00
|
|
|
r->discard_passthrough[QCOW2_DISCARD_SNAPSHOT] =
|
2015-04-16 14:11:39 +03:00
|
|
|
qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_SNAPSHOT, true);
|
2015-04-16 17:16:02 +03:00
|
|
|
r->discard_passthrough[QCOW2_DISCARD_OTHER] =
|
2015-04-16 14:11:39 +03:00
|
|
|
qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_OTHER, false);
|
|
|
|
|
2017-06-23 19:24:10 +03:00
|
|
|
switch (s->crypt_method_header) {
|
|
|
|
case QCOW_CRYPT_NONE:
|
|
|
|
if (encryptfmt) {
|
|
|
|
error_setg(errp, "No encryption in image header, but options "
|
|
|
|
"specified format '%s'", encryptfmt);
|
|
|
|
ret = -EINVAL;
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case QCOW_CRYPT_AES:
|
|
|
|
if (encryptfmt && !g_str_equal(encryptfmt, "aes")) {
|
|
|
|
error_setg(errp,
|
|
|
|
"Header reported 'aes' encryption format but "
|
|
|
|
"options specify '%s'", encryptfmt);
|
|
|
|
ret = -EINVAL;
|
|
|
|
goto fail;
|
|
|
|
}
|
2018-06-26 20:41:19 +03:00
|
|
|
qdict_put_str(encryptopts, "format", "qcow");
|
|
|
|
r->crypto_opts = block_crypto_open_opts_init(encryptopts, errp);
|
2021-02-02 15:49:56 +03:00
|
|
|
if (!r->crypto_opts) {
|
|
|
|
ret = -EINVAL;
|
|
|
|
goto fail;
|
|
|
|
}
|
2017-06-23 19:24:10 +03:00
|
|
|
break;
|
|
|
|
|
qcow2: add support for LUKS encryption format
This adds support for using LUKS as an encryption format
with the qcow2 file, using the new encrypt.format parameter
to request "luks" format. e.g.
# qemu-img create --object secret,data=123456,id=sec0 \
-f qcow2 -o encrypt.format=luks,encrypt.key-secret=sec0 \
test.qcow2 10G
The legacy "encryption=on" parameter still results in
creation of the old qcow2 AES format (and is equivalent
to the new 'encryption-format=aes'). e.g. the following are
equivalent:
# qemu-img create --object secret,data=123456,id=sec0 \
-f qcow2 -o encryption=on,encrypt.key-secret=sec0 \
test.qcow2 10G
# qemu-img create --object secret,data=123456,id=sec0 \
-f qcow2 -o encryption-format=aes,encrypt.key-secret=sec0 \
test.qcow2 10G
With the LUKS format it is necessary to store the LUKS
partition header and key material in the QCow2 file. This
data can be many MB in size, so cannot go into the QCow2
header region directly. Thus the spec defines a FDE
(Full Disk Encryption) header extension that specifies
the offset of a set of clusters to hold the FDE headers,
as well as the length of that region. The LUKS header is
thus stored in these extra allocated clusters before the
main image payload.
Aside from all the cryptographic differences implied by
use of the LUKS format, there is one further key difference
between the use of legacy AES and LUKS encryption in qcow2.
For LUKS, the initialiazation vectors are generated using
the host physical sector as the input, rather than the
guest virtual sector. This guarantees unique initialization
vectors for all sectors when qcow2 internal snapshots are
used, thus giving stronger protection against watermarking
attacks.
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170623162419.26068-14-berrange@redhat.com
Reviewed-by: Alberto Garcia <berto@igalia.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-06-23 19:24:12 +03:00
|
|
|
case QCOW_CRYPT_LUKS:
|
|
|
|
if (encryptfmt && !g_str_equal(encryptfmt, "luks")) {
|
|
|
|
error_setg(errp,
|
|
|
|
"Header reported 'luks' encryption format but "
|
|
|
|
"options specify '%s'", encryptfmt);
|
|
|
|
ret = -EINVAL;
|
|
|
|
goto fail;
|
|
|
|
}
|
2018-06-26 20:41:19 +03:00
|
|
|
qdict_put_str(encryptopts, "format", "luks");
|
|
|
|
r->crypto_opts = block_crypto_open_opts_init(encryptopts, errp);
|
2021-02-02 15:49:56 +03:00
|
|
|
if (!r->crypto_opts) {
|
|
|
|
ret = -EINVAL;
|
|
|
|
goto fail;
|
|
|
|
}
|
qcow2: add support for LUKS encryption format
This adds support for using LUKS as an encryption format
with the qcow2 file, using the new encrypt.format parameter
to request "luks" format. e.g.
# qemu-img create --object secret,data=123456,id=sec0 \
-f qcow2 -o encrypt.format=luks,encrypt.key-secret=sec0 \
test.qcow2 10G
The legacy "encryption=on" parameter still results in
creation of the old qcow2 AES format (and is equivalent
to the new 'encryption-format=aes'). e.g. the following are
equivalent:
# qemu-img create --object secret,data=123456,id=sec0 \
-f qcow2 -o encryption=on,encrypt.key-secret=sec0 \
test.qcow2 10G
# qemu-img create --object secret,data=123456,id=sec0 \
-f qcow2 -o encryption-format=aes,encrypt.key-secret=sec0 \
test.qcow2 10G
With the LUKS format it is necessary to store the LUKS
partition header and key material in the QCow2 file. This
data can be many MB in size, so cannot go into the QCow2
header region directly. Thus the spec defines a FDE
(Full Disk Encryption) header extension that specifies
the offset of a set of clusters to hold the FDE headers,
as well as the length of that region. The LUKS header is
thus stored in these extra allocated clusters before the
main image payload.
Aside from all the cryptographic differences implied by
use of the LUKS format, there is one further key difference
between the use of legacy AES and LUKS encryption in qcow2.
For LUKS, the initialiazation vectors are generated using
the host physical sector as the input, rather than the
guest virtual sector. This guarantees unique initialization
vectors for all sectors when qcow2 internal snapshots are
used, thus giving stronger protection against watermarking
attacks.
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170623162419.26068-14-berrange@redhat.com
Reviewed-by: Alberto Garcia <berto@igalia.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-06-23 19:24:12 +03:00
|
|
|
break;
|
|
|
|
|
2017-06-23 19:24:10 +03:00
|
|
|
default:
|
|
|
|
error_setg(errp, "Unsupported encryption method %d",
|
|
|
|
s->crypt_method_header);
|
|
|
|
ret = -EINVAL;
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
|
2015-04-16 12:29:27 +03:00
|
|
|
ret = 0;
|
|
|
|
fail:
|
2018-04-19 18:01:43 +03:00
|
|
|
qobject_unref(encryptopts);
|
2015-04-16 12:44:26 +03:00
|
|
|
qemu_opts_del(opts);
|
|
|
|
opts = NULL;
|
2015-04-16 17:16:02 +03:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void qcow2_update_options_commit(BlockDriverState *bs,
|
|
|
|
Qcow2ReopenState *r)
|
|
|
|
{
|
|
|
|
BDRVQcow2State *s = bs->opaque;
|
|
|
|
int i;
|
|
|
|
|
2015-04-16 14:42:27 +03:00
|
|
|
if (s->l2_table_cache) {
|
2018-02-05 17:33:08 +03:00
|
|
|
qcow2_cache_destroy(s->l2_table_cache);
|
2015-04-16 14:42:27 +03:00
|
|
|
}
|
|
|
|
if (s->refcount_block_cache) {
|
2018-02-05 17:33:08 +03:00
|
|
|
qcow2_cache_destroy(s->refcount_block_cache);
|
2015-04-16 14:42:27 +03:00
|
|
|
}
|
2015-04-16 17:16:02 +03:00
|
|
|
s->l2_table_cache = r->l2_table_cache;
|
|
|
|
s->refcount_block_cache = r->refcount_block_cache;
|
2018-02-05 17:33:13 +03:00
|
|
|
s->l2_slice_size = r->l2_slice_size;
|
2015-04-16 17:16:02 +03:00
|
|
|
|
|
|
|
s->overlap_check = r->overlap_check;
|
|
|
|
s->use_lazy_refcounts = r->use_lazy_refcounts;
|
|
|
|
|
|
|
|
for (i = 0; i < QCOW2_DISCARD_MAX; i++) {
|
|
|
|
s->discard_passthrough[i] = r->discard_passthrough[i];
|
|
|
|
}
|
|
|
|
|
2015-04-16 14:42:27 +03:00
|
|
|
if (s->cache_clean_interval != r->cache_clean_interval) {
|
|
|
|
cache_clean_timer_del(bs);
|
|
|
|
s->cache_clean_interval = r->cache_clean_interval;
|
|
|
|
cache_clean_timer_init(bs, bdrv_get_aio_context(bs));
|
|
|
|
}
|
2017-06-23 19:24:10 +03:00
|
|
|
|
|
|
|
qapi_free_QCryptoBlockOpenOptions(s->crypto_opts);
|
|
|
|
s->crypto_opts = r->crypto_opts;
|
2015-04-16 17:16:02 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
static void qcow2_update_options_abort(BlockDriverState *bs,
|
|
|
|
Qcow2ReopenState *r)
|
|
|
|
{
|
|
|
|
if (r->l2_table_cache) {
|
2018-02-05 17:33:08 +03:00
|
|
|
qcow2_cache_destroy(r->l2_table_cache);
|
2015-04-16 17:16:02 +03:00
|
|
|
}
|
|
|
|
if (r->refcount_block_cache) {
|
2018-02-05 17:33:08 +03:00
|
|
|
qcow2_cache_destroy(r->refcount_block_cache);
|
2015-04-16 17:16:02 +03:00
|
|
|
}
|
2017-06-23 19:24:10 +03:00
|
|
|
qapi_free_QCryptoBlockOpenOptions(r->crypto_opts);
|
2015-04-16 17:16:02 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
static int qcow2_update_options(BlockDriverState *bs, QDict *options,
|
|
|
|
int flags, Error **errp)
|
|
|
|
{
|
|
|
|
Qcow2ReopenState r = {};
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
ret = qcow2_update_options_prepare(bs, &r, options, flags, errp);
|
|
|
|
if (ret >= 0) {
|
|
|
|
qcow2_update_options_commit(bs, &r);
|
|
|
|
} else {
|
|
|
|
qcow2_update_options_abort(bs, &r);
|
|
|
|
}
|
2015-04-16 12:44:26 +03:00
|
|
|
|
2015-04-16 12:29:27 +03:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
qcow2: introduce compression type feature
The patch adds some preparation parts for incompatible compression type
feature to qcow2 allowing the use different compression methods for
image clusters (de)compressing.
It is implied that the compression type is set on the image creation and
can be changed only later by image conversion, thus compression type
defines the only compression algorithm used for the image, and thus,
for all image clusters.
The goal of the feature is to add support of other compression methods
to qcow2. For example, ZSTD which is more effective on compression than ZLIB.
The default compression is ZLIB. Images created with ZLIB compression type
are backward compatible with older qemu versions.
Adding of the compression type breaks a number of tests because now the
compression type is reported on image creation and there are some changes
in the qcow2 header in size and offsets.
The tests are fixed in the following ways:
* filter out compression_type for many tests
* fix header size, feature table size and backing file offset
affected tests: 031, 036, 061, 080
header_size +=8: 1 byte compression type
7 bytes padding
feature_table += 48: incompatible feature compression type
backing_file_offset += 56 (8 + 48 -> header_change + feature_table_change)
* add "compression type" for test output matching when it isn't filtered
affected tests: 049, 060, 061, 065, 082, 085, 144, 182, 185, 198, 206,
242, 255, 274, 280
Signed-off-by: Denis Plotnikov <dplotnikov@virtuozzo.com>
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
QAPI part:
Acked-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <20200507082521.29210-2-dplotnikov@virtuozzo.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2020-05-07 11:25:18 +03:00
|
|
|
static int validate_compression_type(BDRVQcow2State *s, Error **errp)
|
|
|
|
{
|
|
|
|
switch (s->compression_type) {
|
|
|
|
case QCOW2_COMPRESSION_TYPE_ZLIB:
|
2020-05-07 11:25:20 +03:00
|
|
|
#ifdef CONFIG_ZSTD
|
|
|
|
case QCOW2_COMPRESSION_TYPE_ZSTD:
|
|
|
|
#endif
|
qcow2: introduce compression type feature
The patch adds some preparation parts for incompatible compression type
feature to qcow2 allowing the use different compression methods for
image clusters (de)compressing.
It is implied that the compression type is set on the image creation and
can be changed only later by image conversion, thus compression type
defines the only compression algorithm used for the image, and thus,
for all image clusters.
The goal of the feature is to add support of other compression methods
to qcow2. For example, ZSTD which is more effective on compression than ZLIB.
The default compression is ZLIB. Images created with ZLIB compression type
are backward compatible with older qemu versions.
Adding of the compression type breaks a number of tests because now the
compression type is reported on image creation and there are some changes
in the qcow2 header in size and offsets.
The tests are fixed in the following ways:
* filter out compression_type for many tests
* fix header size, feature table size and backing file offset
affected tests: 031, 036, 061, 080
header_size +=8: 1 byte compression type
7 bytes padding
feature_table += 48: incompatible feature compression type
backing_file_offset += 56 (8 + 48 -> header_change + feature_table_change)
* add "compression type" for test output matching when it isn't filtered
affected tests: 049, 060, 061, 065, 082, 085, 144, 182, 185, 198, 206,
242, 255, 274, 280
Signed-off-by: Denis Plotnikov <dplotnikov@virtuozzo.com>
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
QAPI part:
Acked-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <20200507082521.29210-2-dplotnikov@virtuozzo.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2020-05-07 11:25:18 +03:00
|
|
|
break;
|
|
|
|
|
|
|
|
default:
|
|
|
|
error_setg(errp, "qcow2: unknown compression type: %u",
|
|
|
|
s->compression_type);
|
|
|
|
return -ENOTSUP;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* if the compression type differs from QCOW2_COMPRESSION_TYPE_ZLIB
|
|
|
|
* the incompatible feature flag must be set
|
|
|
|
*/
|
|
|
|
if (s->compression_type == QCOW2_COMPRESSION_TYPE_ZLIB) {
|
|
|
|
if (s->incompatible_features & QCOW2_INCOMPAT_COMPRESSION) {
|
|
|
|
error_setg(errp, "qcow2: Compression type incompatible feature "
|
|
|
|
"bit must not be set");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
if (!(s->incompatible_features & QCOW2_INCOMPAT_COMPRESSION)) {
|
|
|
|
error_setg(errp, "qcow2: Compression type incompatible feature "
|
|
|
|
"bit must be set");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2018-03-01 19:36:16 +03:00
|
|
|
/* Called with s->lock held. */
|
|
|
|
static int coroutine_fn qcow2_do_open(BlockDriverState *bs, QDict *options,
|
|
|
|
int flags, Error **errp)
|
2006-08-06 01:14:20 +04:00
|
|
|
{
|
2021-02-02 15:49:45 +03:00
|
|
|
ERRP_GUARD();
|
2015-09-07 18:12:56 +03:00
|
|
|
BDRVQcow2State *s = bs->opaque;
|
2014-03-26 16:05:47 +04:00
|
|
|
unsigned int len, i;
|
|
|
|
int ret = 0;
|
2006-08-06 01:14:20 +04:00
|
|
|
QCowHeader header;
|
2009-03-28 20:55:06 +03:00
|
|
|
uint64_t ext_end;
|
2013-05-14 18:14:33 +04:00
|
|
|
uint64_t l1_vm_state_index;
|
2017-06-28 15:05:08 +03:00
|
|
|
bool update_header = false;
|
2006-08-06 01:14:20 +04:00
|
|
|
|
2016-06-20 19:24:02 +03:00
|
|
|
ret = bdrv_pread(bs->file, 0, &header, sizeof(header));
|
2010-12-17 18:02:40 +03:00
|
|
|
if (ret < 0) {
|
2013-09-05 11:40:43 +04:00
|
|
|
error_setg_errno(errp, -ret, "Could not read qcow2 header");
|
2006-08-06 01:14:20 +04:00
|
|
|
goto fail;
|
2010-12-17 18:02:40 +03:00
|
|
|
}
|
2018-10-09 20:24:59 +03:00
|
|
|
header.magic = be32_to_cpu(header.magic);
|
|
|
|
header.version = be32_to_cpu(header.version);
|
|
|
|
header.backing_file_offset = be64_to_cpu(header.backing_file_offset);
|
|
|
|
header.backing_file_size = be32_to_cpu(header.backing_file_size);
|
|
|
|
header.size = be64_to_cpu(header.size);
|
|
|
|
header.cluster_bits = be32_to_cpu(header.cluster_bits);
|
|
|
|
header.crypt_method = be32_to_cpu(header.crypt_method);
|
|
|
|
header.l1_table_offset = be64_to_cpu(header.l1_table_offset);
|
|
|
|
header.l1_size = be32_to_cpu(header.l1_size);
|
|
|
|
header.refcount_table_offset = be64_to_cpu(header.refcount_table_offset);
|
|
|
|
header.refcount_table_clusters =
|
|
|
|
be32_to_cpu(header.refcount_table_clusters);
|
|
|
|
header.snapshots_offset = be64_to_cpu(header.snapshots_offset);
|
|
|
|
header.nb_snapshots = be32_to_cpu(header.nb_snapshots);
|
2007-09-17 12:09:54 +04:00
|
|
|
|
2011-02-09 13:11:07 +03:00
|
|
|
if (header.magic != QCOW_MAGIC) {
|
2013-09-05 11:40:43 +04:00
|
|
|
error_setg(errp, "Image is not in qcow2 format");
|
2014-02-17 17:44:06 +04:00
|
|
|
ret = -EINVAL;
|
2006-08-06 01:14:20 +04:00
|
|
|
goto fail;
|
2010-12-17 18:02:40 +03:00
|
|
|
}
|
2011-12-15 15:20:58 +04:00
|
|
|
if (header.version < 2 || header.version > 3) {
|
2016-03-16 21:54:33 +03:00
|
|
|
error_setg(errp, "Unsupported qcow2 version %" PRIu32, header.version);
|
2011-12-15 15:20:58 +04:00
|
|
|
ret = -ENOTSUP;
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
|
|
|
|
s->qcow_version = header.version;
|
|
|
|
|
2014-03-26 16:05:41 +04:00
|
|
|
/* Initialise cluster size */
|
|
|
|
if (header.cluster_bits < MIN_CLUSTER_BITS ||
|
|
|
|
header.cluster_bits > MAX_CLUSTER_BITS) {
|
2014-04-29 21:03:12 +04:00
|
|
|
error_setg(errp, "Unsupported cluster size: 2^%" PRIu32,
|
|
|
|
header.cluster_bits);
|
2014-03-26 16:05:41 +04:00
|
|
|
ret = -EINVAL;
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
|
|
|
|
s->cluster_bits = header.cluster_bits;
|
|
|
|
s->cluster_size = 1 << s->cluster_bits;
|
|
|
|
|
2011-12-15 15:20:58 +04:00
|
|
|
/* Initialise version 3 header fields */
|
|
|
|
if (header.version == 2) {
|
|
|
|
header.incompatible_features = 0;
|
|
|
|
header.compatible_features = 0;
|
|
|
|
header.autoclear_features = 0;
|
|
|
|
header.refcount_order = 4;
|
|
|
|
header.header_length = 72;
|
|
|
|
} else {
|
2018-10-09 20:24:59 +03:00
|
|
|
header.incompatible_features =
|
|
|
|
be64_to_cpu(header.incompatible_features);
|
|
|
|
header.compatible_features = be64_to_cpu(header.compatible_features);
|
|
|
|
header.autoclear_features = be64_to_cpu(header.autoclear_features);
|
|
|
|
header.refcount_order = be32_to_cpu(header.refcount_order);
|
|
|
|
header.header_length = be32_to_cpu(header.header_length);
|
2014-03-26 16:05:41 +04:00
|
|
|
|
|
|
|
if (header.header_length < 104) {
|
|
|
|
error_setg(errp, "qcow2 header too short");
|
|
|
|
ret = -EINVAL;
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (header.header_length > s->cluster_size) {
|
|
|
|
error_setg(errp, "qcow2 header exceeds cluster size");
|
|
|
|
ret = -EINVAL;
|
|
|
|
goto fail;
|
2011-12-15 15:20:58 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
if (header.header_length > sizeof(header)) {
|
|
|
|
s->unknown_header_fields_size = header.header_length - sizeof(header);
|
|
|
|
s->unknown_header_fields = g_malloc(s->unknown_header_fields_size);
|
2016-06-20 19:24:02 +03:00
|
|
|
ret = bdrv_pread(bs->file, sizeof(header), s->unknown_header_fields,
|
2011-12-15 15:20:58 +04:00
|
|
|
s->unknown_header_fields_size);
|
|
|
|
if (ret < 0) {
|
2013-09-05 11:40:43 +04:00
|
|
|
error_setg_errno(errp, -ret, "Could not read unknown qcow2 header "
|
|
|
|
"fields");
|
2011-12-15 15:20:58 +04:00
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-03-26 16:05:42 +04:00
|
|
|
if (header.backing_file_offset > s->cluster_size) {
|
|
|
|
error_setg(errp, "Invalid backing file offset");
|
|
|
|
ret = -EINVAL;
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
|
2012-04-12 17:20:27 +04:00
|
|
|
if (header.backing_file_offset) {
|
|
|
|
ext_end = header.backing_file_offset;
|
|
|
|
} else {
|
|
|
|
ext_end = 1 << header.cluster_bits;
|
|
|
|
}
|
|
|
|
|
2011-12-15 15:20:58 +04:00
|
|
|
/* Handle feature bits */
|
|
|
|
s->incompatible_features = header.incompatible_features;
|
|
|
|
s->compatible_features = header.compatible_features;
|
|
|
|
s->autoclear_features = header.autoclear_features;
|
|
|
|
|
qcow2: introduce compression type feature
The patch adds some preparation parts for incompatible compression type
feature to qcow2 allowing the use different compression methods for
image clusters (de)compressing.
It is implied that the compression type is set on the image creation and
can be changed only later by image conversion, thus compression type
defines the only compression algorithm used for the image, and thus,
for all image clusters.
The goal of the feature is to add support of other compression methods
to qcow2. For example, ZSTD which is more effective on compression than ZLIB.
The default compression is ZLIB. Images created with ZLIB compression type
are backward compatible with older qemu versions.
Adding of the compression type breaks a number of tests because now the
compression type is reported on image creation and there are some changes
in the qcow2 header in size and offsets.
The tests are fixed in the following ways:
* filter out compression_type for many tests
* fix header size, feature table size and backing file offset
affected tests: 031, 036, 061, 080
header_size +=8: 1 byte compression type
7 bytes padding
feature_table += 48: incompatible feature compression type
backing_file_offset += 56 (8 + 48 -> header_change + feature_table_change)
* add "compression type" for test output matching when it isn't filtered
affected tests: 049, 060, 061, 065, 082, 085, 144, 182, 185, 198, 206,
242, 255, 274, 280
Signed-off-by: Denis Plotnikov <dplotnikov@virtuozzo.com>
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
QAPI part:
Acked-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <20200507082521.29210-2-dplotnikov@virtuozzo.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2020-05-07 11:25:18 +03:00
|
|
|
/*
|
|
|
|
* Handle compression type
|
|
|
|
* Older qcow2 images don't contain the compression type header.
|
|
|
|
* Distinguish them by the header length and use
|
|
|
|
* the only valid (default) compression type in that case
|
|
|
|
*/
|
|
|
|
if (header.header_length > offsetof(QCowHeader, compression_type)) {
|
|
|
|
s->compression_type = header.compression_type;
|
|
|
|
} else {
|
|
|
|
s->compression_type = QCOW2_COMPRESSION_TYPE_ZLIB;
|
|
|
|
}
|
|
|
|
|
|
|
|
ret = validate_compression_type(s, errp);
|
|
|
|
if (ret) {
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
|
2012-07-27 12:05:19 +04:00
|
|
|
if (s->incompatible_features & ~QCOW2_INCOMPAT_MASK) {
|
2012-04-12 17:20:27 +04:00
|
|
|
void *feature_table = NULL;
|
|
|
|
qcow2_read_extensions(bs, header.header_length, ext_end,
|
2017-06-28 15:05:08 +03:00
|
|
|
&feature_table, flags, NULL, NULL);
|
2016-03-16 21:54:33 +03:00
|
|
|
report_unsupported_feature(errp, feature_table,
|
2012-07-27 12:05:19 +04:00
|
|
|
s->incompatible_features &
|
|
|
|
~QCOW2_INCOMPAT_MASK);
|
2011-12-15 15:20:58 +04:00
|
|
|
ret = -ENOTSUP;
|
2014-03-28 21:38:58 +04:00
|
|
|
g_free(feature_table);
|
2011-12-15 15:20:58 +04:00
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
|
2013-08-30 16:34:24 +04:00
|
|
|
if (s->incompatible_features & QCOW2_INCOMPAT_CORRUPT) {
|
|
|
|
/* Corrupt images may not be written to unless they are being repaired
|
|
|
|
*/
|
|
|
|
if ((flags & BDRV_O_RDWR) && !(flags & BDRV_O_CHECK)) {
|
2013-09-05 11:40:43 +04:00
|
|
|
error_setg(errp, "qcow2: Image is corrupt; cannot be opened "
|
|
|
|
"read/write");
|
2013-08-30 16:34:24 +04:00
|
|
|
ret = -EACCES;
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-07-10 19:12:51 +03:00
|
|
|
s->subclusters_per_cluster =
|
|
|
|
has_subclusters(s) ? QCOW_EXTL2_SUBCLUSTERS_PER_CLUSTER : 1;
|
|
|
|
s->subcluster_size = s->cluster_size / s->subclusters_per_cluster;
|
|
|
|
s->subcluster_bits = ctz32(s->subcluster_size);
|
|
|
|
|
2020-07-10 19:13:13 +03:00
|
|
|
if (s->subcluster_size < (1 << MIN_CLUSTER_BITS)) {
|
|
|
|
error_setg(errp, "Unsupported subcluster size: %d", s->subcluster_size);
|
|
|
|
ret = -EINVAL;
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
|
2011-12-15 15:20:58 +04:00
|
|
|
/* Check support for various header values */
|
2015-02-10 23:28:52 +03:00
|
|
|
if (header.refcount_order > 6) {
|
|
|
|
error_setg(errp, "Reference count entry width too large; may not "
|
|
|
|
"exceed 64 bits");
|
|
|
|
ret = -EINVAL;
|
2011-02-09 13:11:07 +03:00
|
|
|
goto fail;
|
|
|
|
}
|
2013-09-03 12:09:53 +04:00
|
|
|
s->refcount_order = header.refcount_order;
|
2015-02-10 23:28:43 +03:00
|
|
|
s->refcount_bits = 1 << s->refcount_order;
|
|
|
|
s->refcount_max = UINT64_C(1) << (s->refcount_bits - 1);
|
|
|
|
s->refcount_max += s->refcount_max - 1;
|
2011-12-15 15:20:58 +04:00
|
|
|
|
2006-08-06 01:14:20 +04:00
|
|
|
s->crypt_method_header = header.crypt_method;
|
2010-12-17 18:02:40 +03:00
|
|
|
if (s->crypt_method_header) {
|
2016-03-21 17:11:48 +03:00
|
|
|
if (bdrv_uses_whitelist() &&
|
|
|
|
s->crypt_method_header == QCOW_CRYPT_AES) {
|
block: drop support for using qcow[2] encryption with system emulators
Back in the 2.3.0 release we declared qcow[2] encryption as
deprecated, warning people that it would be removed in a future
release.
commit a1f688f4152e65260b94f37543521ceff8bfebe4
Author: Markus Armbruster <armbru@redhat.com>
Date: Fri Mar 13 21:09:40 2015 +0100
block: Deprecate QCOW/QCOW2 encryption
The code still exists today, but by a (happy?) accident we entirely
broke the ability to use qcow[2] encryption in the system emulators
in the 2.4.0 release due to
commit 8336aafae1451d54c81dd2b187b45f7c45d2428e
Author: Daniel P. Berrange <berrange@redhat.com>
Date: Tue May 12 17:09:18 2015 +0100
qcow2/qcow: protect against uninitialized encryption key
This commit was designed to prevent future coding bugs which
might cause QEMU to read/write data on an encrypted block
device in plain text mode before a decryption key is set.
It turns out this preventative measure was a little too good,
because we already had a long standing bug where QEMU read
encrypted data in plain text mode during system emulator
startup, in order to guess disk geometry:
Thread 10 (Thread 0x7fffd3fff700 (LWP 30373)):
#0 0x00007fffe90b1a28 in raise () at /lib64/libc.so.6
#1 0x00007fffe90b362a in abort () at /lib64/libc.so.6
#2 0x00007fffe90aa227 in __assert_fail_base () at /lib64/libc.so.6
#3 0x00007fffe90aa2d2 in () at /lib64/libc.so.6
#4 0x000055555587ae19 in qcow2_co_readv (bs=0x5555562accb0, sector_num=0, remaining_sectors=1, qiov=0x7fffffffd260) at block/qcow2.c:1229
#5 0x000055555589b60d in bdrv_aligned_preadv (bs=bs@entry=0x5555562accb0, req=req@entry=0x7fffd3ffea50, offset=offset@entry=0, bytes=bytes@entry=512, align=align@entry=512, qiov=qiov@entry=0x7fffffffd260, flags=0) at block/io.c:908
#6 0x000055555589b8bc in bdrv_co_do_preadv (bs=0x5555562accb0, offset=0, bytes=512, qiov=0x7fffffffd260, flags=<optimized out>) at block/io.c:999
#7 0x000055555589c375 in bdrv_rw_co_entry (opaque=0x7fffffffd210) at block/io.c:544
#8 0x000055555586933b in coroutine_thread (opaque=0x555557876310) at coroutine-gthread.c:134
#9 0x00007ffff64e1835 in g_thread_proxy (data=0x5555562b5590) at gthread.c:778
#10 0x00007ffff6bb760a in start_thread () at /lib64/libpthread.so.0
#11 0x00007fffe917f59d in clone () at /lib64/libc.so.6
Thread 1 (Thread 0x7ffff7ecab40 (LWP 30343)):
#0 0x00007fffe91797a9 in syscall () at /lib64/libc.so.6
#1 0x00007ffff64ff87f in g_cond_wait (cond=cond@entry=0x555555e085f0 <coroutine_cond>, mutex=mutex@entry=0x555555e08600 <coroutine_lock>) at gthread-posix.c:1397
#2 0x00005555558692c3 in qemu_coroutine_switch (co=<optimized out>) at coroutine-gthread.c:117
#3 0x00005555558692c3 in qemu_coroutine_switch (from_=0x5555562b5e30, to_=to_@entry=0x555557876310, action=action@entry=COROUTINE_ENTER) at coroutine-gthread.c:175
#4 0x0000555555868a90 in qemu_coroutine_enter (co=0x555557876310, opaque=0x0) at qemu-coroutine.c:116
#5 0x0000555555859b84 in thread_pool_completion_bh (opaque=0x7fffd40010e0) at thread-pool.c:187
#6 0x0000555555859514 in aio_bh_poll (ctx=ctx@entry=0x5555562953b0) at async.c:85
#7 0x0000555555864d10 in aio_dispatch (ctx=ctx@entry=0x5555562953b0) at aio-posix.c:135
#8 0x0000555555864f75 in aio_poll (ctx=ctx@entry=0x5555562953b0, blocking=blocking@entry=true) at aio-posix.c:291
#9 0x000055555589c40d in bdrv_prwv_co (bs=bs@entry=0x5555562accb0, offset=offset@entry=0, qiov=qiov@entry=0x7fffffffd260, is_write=is_write@entry=false, flags=flags@entry=(unknown: 0)) at block/io.c:591
#10 0x000055555589c503 in bdrv_rw_co (bs=bs@entry=0x5555562accb0, sector_num=sector_num@entry=0, buf=buf@entry=0x7fffffffd2e0 "\321,", nb_sectors=nb_sectors@entry=21845, is_write=is_write@entry=false, flags=flags@entry=(unknown: 0)) at block/io.c:614
#11 0x000055555589c562 in bdrv_read_unthrottled (nb_sectors=21845, buf=0x7fffffffd2e0 "\321,", sector_num=0, bs=0x5555562accb0) at block/io.c:622
#12 0x000055555589c562 in bdrv_read_unthrottled (bs=0x5555562accb0, sector_num=sector_num@entry=0, buf=buf@entry=0x7fffffffd2e0 "\321,", nb_sectors=nb_sectors@entry=21845) at block/io.c:634
nb_sectors@entry=1) at block/block-backend.c:504
#14 0x0000555555752e9f in guess_disk_lchs (blk=blk@entry=0x5555562a5290, pcylinders=pcylinders@entry=0x7fffffffd52c, pheads=pheads@entry=0x7fffffffd530, psectors=psectors@entry=0x7fffffffd534) at hw/block/hd-geometry.c:68
#15 0x0000555555752ff7 in hd_geometry_guess (blk=0x5555562a5290, pcyls=pcyls@entry=0x555557875d1c, pheads=pheads@entry=0x555557875d20, psecs=psecs@entry=0x555557875d24, ptrans=ptrans@entry=0x555557875d28) at hw/block/hd-geometry.c:133
#16 0x0000555555752b87 in blkconf_geometry (conf=conf@entry=0x555557875d00, ptrans=ptrans@entry=0x555557875d28, cyls_max=cyls_max@entry=65536, heads_max=heads_max@entry=16, secs_max=secs_max@entry=255, errp=errp@entry=0x7fffffffd5e0) at hw/block/block.c:71
#17 0x0000555555799bc4 in ide_dev_initfn (dev=0x555557875c80, kind=IDE_HD) at hw/ide/qdev.c:174
#18 0x0000555555768394 in device_realize (dev=0x555557875c80, errp=0x7fffffffd640) at hw/core/qdev.c:247
#19 0x0000555555769a81 in device_set_realized (obj=0x555557875c80, value=<optimized out>, errp=0x7fffffffd730) at hw/core/qdev.c:1058
#20 0x00005555558240ce in property_set_bool (obj=0x555557875c80, v=<optimized out>, opaque=0x555557875de0, name=<optimized out>, errp=0x7fffffffd730)
at qom/object.c:1514
#21 0x0000555555826c87 in object_property_set_qobject (obj=obj@entry=0x555557875c80, value=value@entry=0x55555784bcb0, name=name@entry=0x55555591cb3d "realized", errp=errp@entry=0x7fffffffd730) at qom/qom-qobject.c:24
#22 0x0000555555825760 in object_property_set_bool (obj=obj@entry=0x555557875c80, value=value@entry=true, name=name@entry=0x55555591cb3d "realized", errp=errp@entry=0x7fffffffd730) at qom/object.c:905
#23 0x000055555576897b in qdev_init_nofail (dev=dev@entry=0x555557875c80) at hw/core/qdev.c:380
#24 0x0000555555799ead in ide_create_drive (bus=bus@entry=0x555557629630, unit=unit@entry=0, drive=0x5555562b77e0) at hw/ide/qdev.c:122
#25 0x000055555579a746 in pci_ide_create_devs (dev=dev@entry=0x555557628db0, hd_table=hd_table@entry=0x7fffffffd830) at hw/ide/pci.c:440
#26 0x000055555579b165 in pci_piix3_ide_init (bus=<optimized out>, hd_table=0x7fffffffd830, devfn=<optimized out>) at hw/ide/piix.c:218
#27 0x000055555568ca55 in pc_init1 (machine=0x5555562960a0, pci_enabled=1, kvmclock_enabled=<optimized out>) at /home/berrange/src/virt/qemu/hw/i386/pc_piix.c:256
#28 0x0000555555603ab2 in main (argc=<optimized out>, argv=<optimized out>, envp=<optimized out>) at vl.c:4249
So the safety net is correctly preventing QEMU reading cipher
text as if it were plain text, during startup and aborting QEMU
to avoid bad usage of this data.
For added fun this bug only happens if the encrypted qcow2
file happens to have data written to the first cluster,
otherwise the cluster won't be allocated and so qcow2 would
not try the decryption routines at all, just return all 0's.
That no one even noticed, let alone reported, this bug that
has shipped in 2.4.0, 2.5.0 and 2.6.0 shows that the number
of actual users of encrypted qcow2 is approximately zero.
So rather than fix the crash, and backport it to stable
releases, just go ahead with what we have warned users about
and disable any use of qcow2 encryption in the system
emulators. qemu-img/qemu-io/qemu-nbd are still able to access
qcow2 encrypted images for the sake of data conversion.
In the future, qcow2 will gain support for the alternative
luks format, but when this happens it'll be using the
'-object secret' infrastructure for getting keys, which
avoids this problematic scenario entirely.
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-06-13 14:30:09 +03:00
|
|
|
error_setg(errp,
|
|
|
|
"Use of AES-CBC encrypted qcow2 images is no longer "
|
|
|
|
"supported in system emulators");
|
|
|
|
error_append_hint(errp,
|
|
|
|
"You can use 'qemu-img convert' to convert your "
|
|
|
|
"image to an alternative supported format, such "
|
|
|
|
"as unencrypted qcow2, or raw with the LUKS "
|
|
|
|
"format instead.\n");
|
|
|
|
ret = -ENOSYS;
|
|
|
|
goto fail;
|
2016-03-21 17:11:48 +03:00
|
|
|
}
|
|
|
|
|
qcow2: add support for LUKS encryption format
This adds support for using LUKS as an encryption format
with the qcow2 file, using the new encrypt.format parameter
to request "luks" format. e.g.
# qemu-img create --object secret,data=123456,id=sec0 \
-f qcow2 -o encrypt.format=luks,encrypt.key-secret=sec0 \
test.qcow2 10G
The legacy "encryption=on" parameter still results in
creation of the old qcow2 AES format (and is equivalent
to the new 'encryption-format=aes'). e.g. the following are
equivalent:
# qemu-img create --object secret,data=123456,id=sec0 \
-f qcow2 -o encryption=on,encrypt.key-secret=sec0 \
test.qcow2 10G
# qemu-img create --object secret,data=123456,id=sec0 \
-f qcow2 -o encryption-format=aes,encrypt.key-secret=sec0 \
test.qcow2 10G
With the LUKS format it is necessary to store the LUKS
partition header and key material in the QCow2 file. This
data can be many MB in size, so cannot go into the QCow2
header region directly. Thus the spec defines a FDE
(Full Disk Encryption) header extension that specifies
the offset of a set of clusters to hold the FDE headers,
as well as the length of that region. The LUKS header is
thus stored in these extra allocated clusters before the
main image payload.
Aside from all the cryptographic differences implied by
use of the LUKS format, there is one further key difference
between the use of legacy AES and LUKS encryption in qcow2.
For LUKS, the initialiazation vectors are generated using
the host physical sector as the input, rather than the
guest virtual sector. This guarantees unique initialization
vectors for all sectors when qcow2 internal snapshots are
used, thus giving stronger protection against watermarking
attacks.
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170623162419.26068-14-berrange@redhat.com
Reviewed-by: Alberto Garcia <berto@igalia.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-06-23 19:24:12 +03:00
|
|
|
if (s->crypt_method_header == QCOW_CRYPT_AES) {
|
|
|
|
s->crypt_physical_offset = false;
|
|
|
|
} else {
|
|
|
|
/* Assuming LUKS and any future crypt methods we
|
|
|
|
* add will all use physical offsets, due to the
|
|
|
|
* fact that the alternative is insecure... */
|
|
|
|
s->crypt_physical_offset = true;
|
|
|
|
}
|
|
|
|
|
2016-06-24 01:37:26 +03:00
|
|
|
bs->encrypted = true;
|
2010-12-17 18:02:40 +03:00
|
|
|
}
|
2014-03-26 16:05:41 +04:00
|
|
|
|
2020-07-10 19:12:54 +03:00
|
|
|
s->l2_bits = s->cluster_bits - ctz32(l2_entry_size(s));
|
2006-08-06 01:14:20 +04:00
|
|
|
s->l2_size = 1 << s->l2_bits;
|
2014-10-22 16:09:28 +04:00
|
|
|
/* 2^(s->refcount_order - 3) is the refcount width in bytes */
|
|
|
|
s->refcount_block_bits = s->cluster_bits - (s->refcount_order - 3);
|
|
|
|
s->refcount_block_size = 1 << s->refcount_block_bits;
|
2018-09-26 19:04:47 +03:00
|
|
|
bs->total_sectors = header.size / BDRV_SECTOR_SIZE;
|
2006-08-06 01:14:20 +04:00
|
|
|
s->csize_shift = (62 - (s->cluster_bits - 8));
|
|
|
|
s->csize_mask = (1 << (s->cluster_bits - 8)) - 1;
|
|
|
|
s->cluster_offset_mask = (1LL << s->csize_shift) - 1;
|
2014-03-26 16:05:43 +04:00
|
|
|
|
2006-08-06 01:14:20 +04:00
|
|
|
s->refcount_table_offset = header.refcount_table_offset;
|
2007-09-17 01:08:06 +04:00
|
|
|
s->refcount_table_size =
|
2006-08-06 01:14:20 +04:00
|
|
|
header.refcount_table_clusters << (s->cluster_bits - 3);
|
|
|
|
|
2017-11-03 17:18:53 +03:00
|
|
|
if (header.refcount_table_clusters == 0 && !(flags & BDRV_O_CHECK)) {
|
|
|
|
error_setg(errp, "Image does not contain a reference count table");
|
|
|
|
ret = -EINVAL;
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
|
2018-03-06 19:14:06 +03:00
|
|
|
ret = qcow2_validate_table(bs, s->refcount_table_offset,
|
|
|
|
header.refcount_table_clusters,
|
|
|
|
s->cluster_size, QCOW_MAX_REFTABLE_SIZE,
|
|
|
|
"Reference count table", errp);
|
2014-03-26 16:05:44 +04:00
|
|
|
if (ret < 0) {
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
|
2019-10-11 18:28:06 +03:00
|
|
|
if (!(flags & BDRV_O_CHECK)) {
|
|
|
|
/*
|
|
|
|
* The total size in bytes of the snapshot table is checked in
|
|
|
|
* qcow2_read_snapshots() because the size of each snapshot is
|
|
|
|
* variable and we don't know it yet.
|
|
|
|
* Here we only check the offset and number of snapshots.
|
|
|
|
*/
|
|
|
|
ret = qcow2_validate_table(bs, header.snapshots_offset,
|
|
|
|
header.nb_snapshots,
|
|
|
|
sizeof(QCowSnapshotHeader),
|
|
|
|
sizeof(QCowSnapshotHeader) *
|
|
|
|
QCOW_MAX_SNAPSHOTS,
|
|
|
|
"Snapshot table", errp);
|
|
|
|
if (ret < 0) {
|
|
|
|
goto fail;
|
|
|
|
}
|
2014-03-26 16:05:45 +04:00
|
|
|
}
|
|
|
|
|
2006-08-06 01:14:20 +04:00
|
|
|
/* read the level 1 table */
|
2018-03-06 19:14:06 +03:00
|
|
|
ret = qcow2_validate_table(bs, header.l1_table_offset,
|
2020-08-28 14:08:28 +03:00
|
|
|
header.l1_size, L1E_SIZE,
|
2018-03-06 19:14:06 +03:00
|
|
|
QCOW_MAX_L1_SIZE, "Active L1 table", errp);
|
|
|
|
if (ret < 0) {
|
2014-03-26 16:05:46 +04:00
|
|
|
goto fail;
|
|
|
|
}
|
2006-08-06 01:14:20 +04:00
|
|
|
s->l1_size = header.l1_size;
|
2018-03-06 19:14:06 +03:00
|
|
|
s->l1_table_offset = header.l1_table_offset;
|
2013-05-14 18:14:33 +04:00
|
|
|
|
|
|
|
l1_vm_state_index = size_to_l1(s, header.size);
|
|
|
|
if (l1_vm_state_index > INT_MAX) {
|
2013-09-05 11:40:43 +04:00
|
|
|
error_setg(errp, "Image is too big");
|
2013-05-14 18:14:33 +04:00
|
|
|
ret = -EFBIG;
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
s->l1_vm_state_index = l1_vm_state_index;
|
|
|
|
|
2006-08-06 01:14:20 +04:00
|
|
|
/* the L1 table must contain at least enough entries to put
|
|
|
|
header.size bytes */
|
2010-12-17 18:02:40 +03:00
|
|
|
if (s->l1_size < s->l1_vm_state_index) {
|
2013-09-05 11:40:43 +04:00
|
|
|
error_setg(errp, "L1 table is too small");
|
2010-12-17 18:02:40 +03:00
|
|
|
ret = -EINVAL;
|
2006-08-06 01:14:20 +04:00
|
|
|
goto fail;
|
2010-12-17 18:02:40 +03:00
|
|
|
}
|
2014-03-26 16:05:46 +04:00
|
|
|
|
2009-10-26 18:11:16 +03:00
|
|
|
if (s->l1_size > 0) {
|
2020-08-28 14:08:28 +03:00
|
|
|
s->l1_table = qemu_try_blockalign(bs->file->bs, s->l1_size * L1E_SIZE);
|
2014-05-20 19:12:47 +04:00
|
|
|
if (s->l1_table == NULL) {
|
|
|
|
error_setg(errp, "Could not allocate L1 table");
|
|
|
|
ret = -ENOMEM;
|
|
|
|
goto fail;
|
|
|
|
}
|
2016-06-20 19:24:02 +03:00
|
|
|
ret = bdrv_pread(bs->file, s->l1_table_offset, s->l1_table,
|
2020-08-28 14:08:28 +03:00
|
|
|
s->l1_size * L1E_SIZE);
|
2010-12-17 18:02:40 +03:00
|
|
|
if (ret < 0) {
|
2013-09-05 11:40:43 +04:00
|
|
|
error_setg_errno(errp, -ret, "Could not read L1 table");
|
2009-10-26 18:11:16 +03:00
|
|
|
goto fail;
|
2010-12-17 18:02:40 +03:00
|
|
|
}
|
2009-10-26 18:11:16 +03:00
|
|
|
for(i = 0;i < s->l1_size; i++) {
|
2018-10-09 20:24:59 +03:00
|
|
|
s->l1_table[i] = be64_to_cpu(s->l1_table[i]);
|
2009-10-26 18:11:16 +03:00
|
|
|
}
|
2006-08-06 01:14:20 +04:00
|
|
|
}
|
2011-01-10 19:17:28 +03:00
|
|
|
|
2015-04-16 12:44:26 +03:00
|
|
|
/* Parse driver-specific options */
|
|
|
|
ret = qcow2_update_options(bs, options, flags, errp);
|
2015-04-16 12:36:10 +03:00
|
|
|
if (ret < 0) {
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
|
2011-11-15 01:09:46 +04:00
|
|
|
s->flags = flags;
|
2007-09-17 12:09:54 +04:00
|
|
|
|
2010-12-17 18:02:40 +03:00
|
|
|
ret = qcow2_refcount_init(bs);
|
|
|
|
if (ret != 0) {
|
2013-09-05 11:40:43 +04:00
|
|
|
error_setg_errno(errp, -ret, "Could not initialize refcount handling");
|
2006-08-06 01:14:20 +04:00
|
|
|
goto fail;
|
2010-12-17 18:02:40 +03:00
|
|
|
}
|
2006-08-06 01:14:20 +04:00
|
|
|
|
2009-09-12 11:36:22 +04:00
|
|
|
QLIST_INIT(&s->cluster_allocs);
|
2013-06-19 15:44:20 +04:00
|
|
|
QTAILQ_INIT(&s->discards);
|
2009-08-31 18:48:49 +04:00
|
|
|
|
2009-03-28 20:55:06 +03:00
|
|
|
/* read qcow2 extensions */
|
2013-09-05 11:40:43 +04:00
|
|
|
if (qcow2_read_extensions(bs, header.header_length, ext_end, NULL,
|
2020-07-07 19:06:03 +03:00
|
|
|
flags, &update_header, errp)) {
|
2010-12-17 18:02:40 +03:00
|
|
|
ret = -EINVAL;
|
2009-03-28 20:55:06 +03:00
|
|
|
goto fail;
|
2010-12-17 18:02:40 +03:00
|
|
|
}
|
2009-03-28 20:55:06 +03:00
|
|
|
|
2019-01-29 19:13:57 +03:00
|
|
|
/* Open external data file */
|
2020-05-13 14:05:35 +03:00
|
|
|
s->data_file = bdrv_open_child(NULL, options, "data-file", bs,
|
|
|
|
&child_of_bds, BDRV_CHILD_DATA,
|
2021-02-02 15:49:45 +03:00
|
|
|
true, errp);
|
|
|
|
if (*errp) {
|
2019-01-29 19:13:57 +03:00
|
|
|
ret = -EINVAL;
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (s->incompatible_features & QCOW2_INCOMPAT_DATA_FILE) {
|
2019-01-15 21:02:40 +03:00
|
|
|
if (!s->data_file && s->image_data_file) {
|
|
|
|
s->data_file = bdrv_open_child(s->image_data_file, options,
|
2020-05-13 14:05:35 +03:00
|
|
|
"data-file", bs, &child_of_bds,
|
|
|
|
BDRV_CHILD_DATA, false, errp);
|
2019-01-15 21:02:40 +03:00
|
|
|
if (!s->data_file) {
|
|
|
|
ret = -EINVAL;
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
}
|
2019-01-29 19:13:57 +03:00
|
|
|
if (!s->data_file) {
|
|
|
|
error_setg(errp, "'data-file' is required for this image");
|
|
|
|
ret = -EINVAL;
|
|
|
|
goto fail;
|
|
|
|
}
|
2020-05-13 14:05:35 +03:00
|
|
|
|
|
|
|
/* No data here */
|
|
|
|
bs->file->role &= ~BDRV_CHILD_DATA;
|
|
|
|
|
|
|
|
/* Must succeed because we have given up permissions if anything */
|
|
|
|
bdrv_child_refresh_perms(bs, bs->file, &error_abort);
|
2019-01-29 19:13:57 +03:00
|
|
|
} else {
|
|
|
|
if (s->data_file) {
|
|
|
|
error_setg(errp, "'data-file' can only be set for images with an "
|
|
|
|
"external data file");
|
|
|
|
ret = -EINVAL;
|
|
|
|
goto fail;
|
2019-02-22 16:29:38 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
s->data_file = bs->file;
|
|
|
|
|
|
|
|
if (data_file_is_raw(bs)) {
|
|
|
|
error_setg(errp, "data-file-raw requires a data file");
|
|
|
|
ret = -EINVAL;
|
|
|
|
goto fail;
|
2019-01-29 19:13:57 +03:00
|
|
|
}
|
|
|
|
}
|
2019-01-14 18:48:25 +03:00
|
|
|
|
qcow2: add support for LUKS encryption format
This adds support for using LUKS as an encryption format
with the qcow2 file, using the new encrypt.format parameter
to request "luks" format. e.g.
# qemu-img create --object secret,data=123456,id=sec0 \
-f qcow2 -o encrypt.format=luks,encrypt.key-secret=sec0 \
test.qcow2 10G
The legacy "encryption=on" parameter still results in
creation of the old qcow2 AES format (and is equivalent
to the new 'encryption-format=aes'). e.g. the following are
equivalent:
# qemu-img create --object secret,data=123456,id=sec0 \
-f qcow2 -o encryption=on,encrypt.key-secret=sec0 \
test.qcow2 10G
# qemu-img create --object secret,data=123456,id=sec0 \
-f qcow2 -o encryption-format=aes,encrypt.key-secret=sec0 \
test.qcow2 10G
With the LUKS format it is necessary to store the LUKS
partition header and key material in the QCow2 file. This
data can be many MB in size, so cannot go into the QCow2
header region directly. Thus the spec defines a FDE
(Full Disk Encryption) header extension that specifies
the offset of a set of clusters to hold the FDE headers,
as well as the length of that region. The LUKS header is
thus stored in these extra allocated clusters before the
main image payload.
Aside from all the cryptographic differences implied by
use of the LUKS format, there is one further key difference
between the use of legacy AES and LUKS encryption in qcow2.
For LUKS, the initialiazation vectors are generated using
the host physical sector as the input, rather than the
guest virtual sector. This guarantees unique initialization
vectors for all sectors when qcow2 internal snapshots are
used, thus giving stronger protection against watermarking
attacks.
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170623162419.26068-14-berrange@redhat.com
Reviewed-by: Alberto Garcia <berto@igalia.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-06-23 19:24:12 +03:00
|
|
|
/* qcow2_read_extension may have set up the crypto context
|
|
|
|
* if the crypt method needs a header region, some methods
|
|
|
|
* don't need header extensions, so must check here
|
|
|
|
*/
|
|
|
|
if (s->crypt_method_header && !s->crypto) {
|
|
|
|
if (s->crypt_method_header == QCOW_CRYPT_AES) {
|
|
|
|
unsigned int cflags = 0;
|
|
|
|
if (flags & BDRV_O_NO_IO) {
|
|
|
|
cflags |= QCRYPTO_BLOCK_OPEN_NO_IO;
|
|
|
|
}
|
2017-06-23 19:24:17 +03:00
|
|
|
s->crypto = qcrypto_block_open(s->crypto_opts, "encrypt.",
|
2019-05-06 17:27:41 +03:00
|
|
|
NULL, NULL, cflags,
|
|
|
|
QCOW2_MAX_THREADS, errp);
|
qcow2: add support for LUKS encryption format
This adds support for using LUKS as an encryption format
with the qcow2 file, using the new encrypt.format parameter
to request "luks" format. e.g.
# qemu-img create --object secret,data=123456,id=sec0 \
-f qcow2 -o encrypt.format=luks,encrypt.key-secret=sec0 \
test.qcow2 10G
The legacy "encryption=on" parameter still results in
creation of the old qcow2 AES format (and is equivalent
to the new 'encryption-format=aes'). e.g. the following are
equivalent:
# qemu-img create --object secret,data=123456,id=sec0 \
-f qcow2 -o encryption=on,encrypt.key-secret=sec0 \
test.qcow2 10G
# qemu-img create --object secret,data=123456,id=sec0 \
-f qcow2 -o encryption-format=aes,encrypt.key-secret=sec0 \
test.qcow2 10G
With the LUKS format it is necessary to store the LUKS
partition header and key material in the QCow2 file. This
data can be many MB in size, so cannot go into the QCow2
header region directly. Thus the spec defines a FDE
(Full Disk Encryption) header extension that specifies
the offset of a set of clusters to hold the FDE headers,
as well as the length of that region. The LUKS header is
thus stored in these extra allocated clusters before the
main image payload.
Aside from all the cryptographic differences implied by
use of the LUKS format, there is one further key difference
between the use of legacy AES and LUKS encryption in qcow2.
For LUKS, the initialiazation vectors are generated using
the host physical sector as the input, rather than the
guest virtual sector. This guarantees unique initialization
vectors for all sectors when qcow2 internal snapshots are
used, thus giving stronger protection against watermarking
attacks.
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170623162419.26068-14-berrange@redhat.com
Reviewed-by: Alberto Garcia <berto@igalia.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-06-23 19:24:12 +03:00
|
|
|
if (!s->crypto) {
|
|
|
|
ret = -EINVAL;
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
} else if (!(flags & BDRV_O_NO_IO)) {
|
|
|
|
error_setg(errp, "Missing CRYPTO header for crypt method %d",
|
|
|
|
s->crypt_method_header);
|
2017-06-23 19:24:10 +03:00
|
|
|
ret = -EINVAL;
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2006-08-06 01:14:20 +04:00
|
|
|
/* read the backing file name */
|
|
|
|
if (header.backing_file_offset != 0) {
|
|
|
|
len = header.backing_file_size;
|
2015-01-22 16:03:30 +03:00
|
|
|
if (len > MIN(1023, s->cluster_size - header.backing_file_offset) ||
|
2015-01-27 16:33:55 +03:00
|
|
|
len >= sizeof(bs->backing_file)) {
|
2014-03-26 16:05:47 +04:00
|
|
|
error_setg(errp, "Backing file name too long");
|
|
|
|
ret = -EINVAL;
|
|
|
|
goto fail;
|
2010-12-17 18:02:40 +03:00
|
|
|
}
|
2016-06-20 19:24:02 +03:00
|
|
|
ret = bdrv_pread(bs->file, header.backing_file_offset,
|
block: Add BDS.auto_backing_file
If the backing file is overridden, this most probably does change the
guest-visible data of a BDS. Therefore, we will need to consider this
in bdrv_refresh_filename().
To see whether it has been overridden, we might want to compare
bs->backing_file and bs->backing->bs->filename. However,
bs->backing_file is changed by bdrv_set_backing_hd() (which is just used
to change the backing child at runtime, without modifying the image
header), so bs->backing_file most of the time simply contains a copy of
bs->backing->bs->filename anyway, so it is useless for such a
comparison.
This patch adds an auto_backing_file BDS field which contains the
backing file path as indicated by the image header, which is not changed
by bdrv_set_backing_hd().
Because of bdrv_refresh_filename() magic, however, a BDS's filename may
differ from what has been specified during bdrv_open(). Then, the
comparison between bs->auto_backing_file and bs->backing->bs->filename
may fail even though bs->backing was opened from bs->auto_backing_file.
To mitigate this, we can copy the real BDS's filename (after the whole
bdrv_open() and bdrv_refresh_filename() process) into
bs->auto_backing_file, if we know the former has been opened based on
the latter. This is only possible if no options modifying the backing
file's behavior have been specified, though. To simplify things, this
patch only copies the filename from the backing file if no options have
been specified for it at all.
Furthermore, there are cases where an overlay is created by qemu which
already contains a BDS's filename (e.g. in blockdev-snapshot-sync). We
do not need to worry about updating the overlay's bs->auto_backing_file
there, because we actually wrote a post-bdrv_refresh_filename() filename
into the image header.
So all in all, there will be false negatives where (as of a future
patch) bdrv_refresh_filename() will assume that the backing file differs
from what was specified in the image header, even though it really does
not. However, these cases should be limited to where (1) the user
actually did override something in the backing chain (e.g. by specifying
options for the backing file), or (2) the user executed a QMP command to
change some node's backing file (e.g. change-backing-file or
block-commit with @backing-file given) where the given filename does not
happen to coincide with qemu's idea of the backing BDS's filename.
Then again, (1) really is limited to -drive. With -blockdev or
blockdev-add, you have to adhere to the schema, so a user cannot give
partial "unimportant" options (e.g. by just setting backing.node-name
and leaving the rest to the image header). Therefore, trying to fix
this would mean trying to fix something for -drive only.
To improve on (2), we would need a full infrastructure to "canonicalize"
an arbitrary filename (+ options), so it can be compared against
another. That seems a bit over the top, considering that filenames
nowadays are there mostly for the user's entertainment.
Signed-off-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Alberto Garcia <berto@igalia.com>
Message-id: 20190201192935.18394-5-mreitz@redhat.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
2019-02-01 22:29:08 +03:00
|
|
|
bs->auto_backing_file, len);
|
2010-12-17 18:02:40 +03:00
|
|
|
if (ret < 0) {
|
2013-09-05 11:40:43 +04:00
|
|
|
error_setg_errno(errp, -ret, "Could not read backing file name");
|
2006-08-06 01:14:20 +04:00
|
|
|
goto fail;
|
2010-12-17 18:02:40 +03:00
|
|
|
}
|
block: Add BDS.auto_backing_file
If the backing file is overridden, this most probably does change the
guest-visible data of a BDS. Therefore, we will need to consider this
in bdrv_refresh_filename().
To see whether it has been overridden, we might want to compare
bs->backing_file and bs->backing->bs->filename. However,
bs->backing_file is changed by bdrv_set_backing_hd() (which is just used
to change the backing child at runtime, without modifying the image
header), so bs->backing_file most of the time simply contains a copy of
bs->backing->bs->filename anyway, so it is useless for such a
comparison.
This patch adds an auto_backing_file BDS field which contains the
backing file path as indicated by the image header, which is not changed
by bdrv_set_backing_hd().
Because of bdrv_refresh_filename() magic, however, a BDS's filename may
differ from what has been specified during bdrv_open(). Then, the
comparison between bs->auto_backing_file and bs->backing->bs->filename
may fail even though bs->backing was opened from bs->auto_backing_file.
To mitigate this, we can copy the real BDS's filename (after the whole
bdrv_open() and bdrv_refresh_filename() process) into
bs->auto_backing_file, if we know the former has been opened based on
the latter. This is only possible if no options modifying the backing
file's behavior have been specified, though. To simplify things, this
patch only copies the filename from the backing file if no options have
been specified for it at all.
Furthermore, there are cases where an overlay is created by qemu which
already contains a BDS's filename (e.g. in blockdev-snapshot-sync). We
do not need to worry about updating the overlay's bs->auto_backing_file
there, because we actually wrote a post-bdrv_refresh_filename() filename
into the image header.
So all in all, there will be false negatives where (as of a future
patch) bdrv_refresh_filename() will assume that the backing file differs
from what was specified in the image header, even though it really does
not. However, these cases should be limited to where (1) the user
actually did override something in the backing chain (e.g. by specifying
options for the backing file), or (2) the user executed a QMP command to
change some node's backing file (e.g. change-backing-file or
block-commit with @backing-file given) where the given filename does not
happen to coincide with qemu's idea of the backing BDS's filename.
Then again, (1) really is limited to -drive. With -blockdev or
blockdev-add, you have to adhere to the schema, so a user cannot give
partial "unimportant" options (e.g. by just setting backing.node-name
and leaving the rest to the image header). Therefore, trying to fix
this would mean trying to fix something for -drive only.
To improve on (2), we would need a full infrastructure to "canonicalize"
an arbitrary filename (+ options), so it can be compared against
another. That seems a bit over the top, considering that filenames
nowadays are there mostly for the user's entertainment.
Signed-off-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Alberto Garcia <berto@igalia.com>
Message-id: 20190201192935.18394-5-mreitz@redhat.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
2019-02-01 22:29:08 +03:00
|
|
|
bs->auto_backing_file[len] = '\0';
|
|
|
|
pstrcpy(bs->backing_file, sizeof(bs->backing_file),
|
|
|
|
bs->auto_backing_file);
|
|
|
|
s->image_backing_file = g_strdup(bs->auto_backing_file);
|
2006-08-06 01:14:20 +04:00
|
|
|
}
|
2011-11-16 14:43:28 +04:00
|
|
|
|
2019-10-11 18:28:06 +03:00
|
|
|
/*
|
|
|
|
* Internal snapshots; skip reading them in check mode, because
|
|
|
|
* we do not need them then, and we do not want to abort because
|
|
|
|
* of a broken table.
|
|
|
|
*/
|
|
|
|
if (!(flags & BDRV_O_CHECK)) {
|
|
|
|
s->snapshots_offset = header.snapshots_offset;
|
|
|
|
s->nb_snapshots = header.nb_snapshots;
|
2014-03-26 16:06:04 +04:00
|
|
|
|
2019-10-11 18:28:06 +03:00
|
|
|
ret = qcow2_read_snapshots(bs, errp);
|
|
|
|
if (ret < 0) {
|
|
|
|
goto fail;
|
|
|
|
}
|
2010-12-17 18:02:40 +03:00
|
|
|
}
|
2006-08-06 01:14:20 +04:00
|
|
|
|
2012-06-14 14:42:23 +04:00
|
|
|
/* Clear unknown autoclear feature bits */
|
2017-06-28 15:05:08 +03:00
|
|
|
update_header |= s->autoclear_features & ~QCOW2_AUTOCLEAR_MASK;
|
2021-05-27 18:40:54 +03:00
|
|
|
update_header = update_header && bdrv_is_writable(bs);
|
2017-06-28 15:05:11 +03:00
|
|
|
if (update_header) {
|
2017-06-28 15:05:08 +03:00
|
|
|
s->autoclear_features &= QCOW2_AUTOCLEAR_MASK;
|
2017-06-28 15:05:11 +03:00
|
|
|
}
|
|
|
|
|
dirty-bitmaps: clean-up bitmaps loading and migration logic
This patch aims to bring the following behavior:
1. We don't load bitmaps, when started in inactive mode. It's the case
of incoming migration. In this case we wait for bitmaps migration
through migration channel (if 'dirty-bitmaps' capability is enabled) or
for invalidation (to load bitmaps from the image).
2. We don't remove persistent bitmaps on inactivation. Instead, we only
remove bitmaps after storing. This is the only way to restore bitmaps,
if we decided to resume source after [failed] migration with
'dirty-bitmaps' capability enabled (which means, that bitmaps were not
stored).
3. We load bitmaps on open and any invalidation, it's ok for all cases:
- normal open
- migration target invalidation with dirty-bitmaps capability
(bitmaps are migrating through migration channel, the are not
stored, so they should have IN_USE flag set and will be skipped
when loading. However, it would fail if bitmaps are read-only[1])
- migration target invalidation without dirty-bitmaps capability
(normal load of the bitmaps, if migrated with shared storage)
- source invalidation with dirty-bitmaps capability
(skip because IN_USE)
- source invalidation without dirty-bitmaps capability
(bitmaps were dropped, reload them)
[1]: to accurately handle this, migration of read-only bitmaps is
explicitly forbidden in this patch.
New mechanism for not storing bitmaps when migrate with dirty-bitmaps
capability is introduced: migration filed in BdrvDirtyBitmap.
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Signed-off-by: John Snow <jsnow@redhat.com>
2018-10-29 23:23:17 +03:00
|
|
|
/* == Handle persistent dirty bitmaps ==
|
|
|
|
*
|
|
|
|
* We want load dirty bitmaps in three cases:
|
|
|
|
*
|
|
|
|
* 1. Normal open of the disk in active mode, not related to invalidation
|
|
|
|
* after migration.
|
|
|
|
*
|
|
|
|
* 2. Invalidation of the target vm after pre-copy phase of migration, if
|
|
|
|
* bitmaps are _not_ migrating through migration channel, i.e.
|
|
|
|
* 'dirty-bitmaps' capability is disabled.
|
|
|
|
*
|
|
|
|
* 3. Invalidation of source vm after failed or canceled migration.
|
|
|
|
* This is a very interesting case. There are two possible types of
|
|
|
|
* bitmaps:
|
|
|
|
*
|
|
|
|
* A. Stored on inactivation and removed. They should be loaded from the
|
|
|
|
* image.
|
|
|
|
*
|
|
|
|
* B. Not stored: not-persistent bitmaps and bitmaps, migrated through
|
|
|
|
* the migration channel (with dirty-bitmaps capability).
|
|
|
|
*
|
|
|
|
* On the other hand, there are two possible sub-cases:
|
|
|
|
*
|
|
|
|
* 3.1 disk was changed by somebody else while were inactive. In this
|
|
|
|
* case all in-RAM dirty bitmaps (both persistent and not) are
|
|
|
|
* definitely invalid. And we don't have any method to determine
|
|
|
|
* this.
|
|
|
|
*
|
|
|
|
* Simple and safe thing is to just drop all the bitmaps of type B on
|
|
|
|
* inactivation. But in this case we lose bitmaps in valid 4.2 case.
|
|
|
|
*
|
|
|
|
* On the other hand, resuming source vm, if disk was already changed
|
|
|
|
* is a bad thing anyway: not only bitmaps, the whole vm state is
|
|
|
|
* out of sync with disk.
|
|
|
|
*
|
|
|
|
* This means, that user or management tool, who for some reason
|
|
|
|
* decided to resume source vm, after disk was already changed by
|
|
|
|
* target vm, should at least drop all dirty bitmaps by hand.
|
|
|
|
*
|
|
|
|
* So, we can ignore this case for now, but TODO: "generation"
|
|
|
|
* extension for qcow2, to determine, that image was changed after
|
|
|
|
* last inactivation. And if it is changed, we will drop (or at least
|
|
|
|
* mark as 'invalid' all the bitmaps of type B, both persistent
|
|
|
|
* and not).
|
|
|
|
*
|
|
|
|
* 3.2 disk was _not_ changed while were inactive. Bitmaps may be saved
|
|
|
|
* to disk ('dirty-bitmaps' capability disabled), or not saved
|
|
|
|
* ('dirty-bitmaps' capability enabled), but we don't need to care
|
|
|
|
* of: let's load bitmaps as always: stored bitmaps will be loaded,
|
|
|
|
* and not stored has flag IN_USE=1 in the image and will be skipped
|
|
|
|
* on loading.
|
|
|
|
*
|
|
|
|
* One remaining possible case when we don't want load bitmaps:
|
|
|
|
*
|
|
|
|
* 4. Open disk in inactive mode in target vm (bitmaps are migrating or
|
|
|
|
* will be loaded on invalidation, no needs try loading them before)
|
|
|
|
*/
|
|
|
|
|
|
|
|
if (!(bdrv_get_flags(bs) & BDRV_O_INACTIVE)) {
|
|
|
|
/* It's case 1, 2 or 3.2. Or 3.1 which is BUG in management layer. */
|
2021-02-02 15:49:51 +03:00
|
|
|
bool header_updated;
|
|
|
|
if (!qcow2_load_dirty_bitmaps(bs, &header_updated, errp)) {
|
2019-12-18 14:53:35 +03:00
|
|
|
ret = -EINVAL;
|
|
|
|
goto fail;
|
|
|
|
}
|
dirty-bitmaps: clean-up bitmaps loading and migration logic
This patch aims to bring the following behavior:
1. We don't load bitmaps, when started in inactive mode. It's the case
of incoming migration. In this case we wait for bitmaps migration
through migration channel (if 'dirty-bitmaps' capability is enabled) or
for invalidation (to load bitmaps from the image).
2. We don't remove persistent bitmaps on inactivation. Instead, we only
remove bitmaps after storing. This is the only way to restore bitmaps,
if we decided to resume source after [failed] migration with
'dirty-bitmaps' capability enabled (which means, that bitmaps were not
stored).
3. We load bitmaps on open and any invalidation, it's ok for all cases:
- normal open
- migration target invalidation with dirty-bitmaps capability
(bitmaps are migrating through migration channel, the are not
stored, so they should have IN_USE flag set and will be skipped
when loading. However, it would fail if bitmaps are read-only[1])
- migration target invalidation without dirty-bitmaps capability
(normal load of the bitmaps, if migrated with shared storage)
- source invalidation with dirty-bitmaps capability
(skip because IN_USE)
- source invalidation without dirty-bitmaps capability
(bitmaps were dropped, reload them)
[1]: to accurately handle this, migration of read-only bitmaps is
explicitly forbidden in this patch.
New mechanism for not storing bitmaps when migrate with dirty-bitmaps
capability is introduced: migration filed in BdrvDirtyBitmap.
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Signed-off-by: John Snow <jsnow@redhat.com>
2018-10-29 23:23:17 +03:00
|
|
|
|
|
|
|
update_header = update_header && !header_updated;
|
2017-06-28 15:05:11 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
if (update_header) {
|
2012-06-14 14:42:23 +04:00
|
|
|
ret = qcow2_update_header(bs);
|
|
|
|
if (ret < 0) {
|
2013-09-05 11:40:43 +04:00
|
|
|
error_setg_errno(errp, -ret, "Could not update qcow2 header");
|
2012-06-14 14:42:23 +04:00
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-11-22 18:57:48 +03:00
|
|
|
bs->supported_zero_flags = header.version >= 3 ?
|
|
|
|
BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK : 0;
|
2020-04-24 15:54:42 +03:00
|
|
|
bs->supported_truncate_flags = BDRV_REQ_ZERO_WRITE;
|
2011-06-30 19:42:09 +04:00
|
|
|
|
2012-07-27 12:05:19 +04:00
|
|
|
/* Repair image if dirty */
|
2021-05-27 18:40:54 +03:00
|
|
|
if (!(flags & BDRV_O_CHECK) && bdrv_is_writable(bs) &&
|
2012-08-09 16:05:56 +04:00
|
|
|
(s->incompatible_features & QCOW2_INCOMPAT_DIRTY)) {
|
2012-07-27 12:05:19 +04:00
|
|
|
BdrvCheckResult result = {0};
|
|
|
|
|
2018-03-01 19:36:19 +03:00
|
|
|
ret = qcow2_co_check_locked(bs, &result,
|
|
|
|
BDRV_FIX_ERRORS | BDRV_FIX_LEAKS);
|
2017-11-10 23:31:07 +03:00
|
|
|
if (ret < 0 || result.check_errors) {
|
|
|
|
if (ret >= 0) {
|
|
|
|
ret = -EIO;
|
|
|
|
}
|
2013-09-05 11:40:43 +04:00
|
|
|
error_setg_errno(errp, -ret, "Could not repair dirty image");
|
2012-07-27 12:05:19 +04:00
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2006-08-06 01:14:20 +04:00
|
|
|
#ifdef DEBUG_ALLOC
|
2011-08-04 21:22:10 +04:00
|
|
|
{
|
|
|
|
BdrvCheckResult result = {0};
|
2012-06-15 19:41:07 +04:00
|
|
|
qcow2_check_refcounts(bs, &result, 0);
|
2011-08-04 21:22:10 +04:00
|
|
|
}
|
2006-08-06 01:14:20 +04:00
|
|
|
#endif
|
2018-06-20 17:48:37 +03:00
|
|
|
|
2019-05-06 17:27:38 +03:00
|
|
|
qemu_co_queue_init(&s->thread_task_queue);
|
2018-06-20 17:48:37 +03:00
|
|
|
|
2010-12-17 18:02:40 +03:00
|
|
|
return ret;
|
2006-08-06 01:14:20 +04:00
|
|
|
|
|
|
|
fail:
|
2019-01-15 21:02:40 +03:00
|
|
|
g_free(s->image_data_file);
|
2019-01-29 19:13:57 +03:00
|
|
|
if (has_data_file(bs)) {
|
|
|
|
bdrv_unref_child(bs, s->data_file);
|
2020-03-16 09:06:31 +03:00
|
|
|
s->data_file = NULL;
|
2019-01-29 19:13:57 +03:00
|
|
|
}
|
2011-12-15 15:20:58 +04:00
|
|
|
g_free(s->unknown_header_fields);
|
2012-02-02 17:52:08 +04:00
|
|
|
cleanup_unknown_header_ext(bs);
|
2009-05-28 18:07:07 +04:00
|
|
|
qcow2_free_snapshots(bs);
|
|
|
|
qcow2_refcount_close(bs);
|
2014-05-20 19:12:47 +04:00
|
|
|
qemu_vfree(s->l1_table);
|
2013-08-30 16:34:26 +04:00
|
|
|
/* else pre-write overlap checks in cache_destroy may crash */
|
|
|
|
s->l1_table = NULL;
|
2015-08-04 15:14:40 +03:00
|
|
|
cache_clean_timer_del(bs);
|
2011-01-10 19:17:28 +03:00
|
|
|
if (s->l2_table_cache) {
|
2018-02-05 17:33:08 +03:00
|
|
|
qcow2_cache_destroy(s->l2_table_cache);
|
2011-01-10 19:17:28 +03:00
|
|
|
}
|
2014-03-28 21:38:58 +04:00
|
|
|
if (s->refcount_block_cache) {
|
2018-02-05 17:33:08 +03:00
|
|
|
qcow2_cache_destroy(s->refcount_block_cache);
|
2014-03-28 21:38:58 +04:00
|
|
|
}
|
2017-06-23 19:24:10 +03:00
|
|
|
qcrypto_block_free(s->crypto);
|
|
|
|
qapi_free_QCryptoBlockOpenOptions(s->crypto_opts);
|
2010-12-17 18:02:40 +03:00
|
|
|
return ret;
|
2006-08-06 01:14:20 +04:00
|
|
|
}
|
|
|
|
|
2018-03-01 19:36:16 +03:00
|
|
|
typedef struct QCow2OpenCo {
|
|
|
|
BlockDriverState *bs;
|
|
|
|
QDict *options;
|
|
|
|
int flags;
|
|
|
|
Error **errp;
|
|
|
|
int ret;
|
|
|
|
} QCow2OpenCo;
|
|
|
|
|
|
|
|
static void coroutine_fn qcow2_open_entry(void *opaque)
|
|
|
|
{
|
|
|
|
QCow2OpenCo *qoc = opaque;
|
|
|
|
BDRVQcow2State *s = qoc->bs->opaque;
|
|
|
|
|
|
|
|
qemu_co_mutex_lock(&s->lock);
|
|
|
|
qoc->ret = qcow2_do_open(qoc->bs, qoc->options, qoc->flags, qoc->errp);
|
|
|
|
qemu_co_mutex_unlock(&s->lock);
|
|
|
|
}
|
|
|
|
|
2016-12-16 20:52:37 +03:00
|
|
|
static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
|
|
|
|
Error **errp)
|
|
|
|
{
|
2018-03-01 19:36:16 +03:00
|
|
|
BDRVQcow2State *s = bs->opaque;
|
|
|
|
QCow2OpenCo qoc = {
|
|
|
|
.bs = bs,
|
|
|
|
.options = options,
|
|
|
|
.flags = flags,
|
|
|
|
.errp = errp,
|
|
|
|
.ret = -EINPROGRESS
|
|
|
|
};
|
|
|
|
|
2020-05-13 14:05:35 +03:00
|
|
|
bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds,
|
|
|
|
BDRV_CHILD_IMAGE, false, errp);
|
2016-12-16 20:52:37 +03:00
|
|
|
if (!bs->file) {
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2018-03-01 19:36:16 +03:00
|
|
|
/* Initialise locks */
|
|
|
|
qemu_co_mutex_init(&s->lock);
|
|
|
|
|
|
|
|
if (qemu_in_coroutine()) {
|
|
|
|
/* From bdrv_co_create. */
|
|
|
|
qcow2_open_entry(&qoc);
|
|
|
|
} else {
|
block: Fix hangs in synchronous APIs with iothreads
In the block layer, synchronous APIs are often implemented by creating a
coroutine that calls the asynchronous coroutine-based implementation and
then waiting for completion with BDRV_POLL_WHILE().
For this to work with iothreads (more specifically, when the synchronous
API is called in a thread that is not the home thread of the block
device, so that the coroutine will run in a different thread), we must
make sure to call aio_wait_kick() at the end of the operation. Many
places are missing this, so that BDRV_POLL_WHILE() keeps hanging even if
the condition has long become false.
Note that bdrv_dec_in_flight() involves an aio_wait_kick() call. This
corresponds to the BDRV_POLL_WHILE() in the drain functions, but it is
generally not enough for most other operations because they haven't set
the return value in the coroutine entry stub yet. To avoid race
conditions there, we need to kick after setting the return value.
The race window is small enough that the problem doesn't usually surface
in the common path. However, it does surface and causes easily
reproducible hangs if the operation can return early before even calling
bdrv_inc/dec_in_flight, which many of them do (trivial error or no-op
success paths).
The bug in bdrv_truncate(), bdrv_check() and bdrv_invalidate_cache() is
slightly different: These functions even neglected to schedule the
coroutine in the home thread of the node. This avoids the hang, but is
obviously wrong, too. Fix those to schedule the coroutine in the right
AioContext in addition to adding aio_wait_kick() calls.
Cc: qemu-stable@nongnu.org
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2019-01-07 15:02:48 +03:00
|
|
|
assert(qemu_get_current_aio_context() == qemu_get_aio_context());
|
2018-03-01 19:36:16 +03:00
|
|
|
qemu_coroutine_enter(qemu_coroutine_create(qcow2_open_entry, &qoc));
|
|
|
|
BDRV_POLL_WHILE(bs, qoc.ret == -EINPROGRESS);
|
|
|
|
}
|
|
|
|
return qoc.ret;
|
2016-12-16 20:52:37 +03:00
|
|
|
}
|
|
|
|
|
2014-07-16 19:48:16 +04:00
|
|
|
static void qcow2_refresh_limits(BlockDriverState *bs, Error **errp)
|
2013-12-11 22:26:16 +04:00
|
|
|
{
|
2015-09-07 18:12:56 +03:00
|
|
|
BDRVQcow2State *s = bs->opaque;
|
2013-12-11 22:26:16 +04:00
|
|
|
|
2016-06-24 01:37:15 +03:00
|
|
|
if (bs->encrypted) {
|
|
|
|
/* Encryption works on a sector granularity */
|
2018-10-11 13:58:02 +03:00
|
|
|
bs->bl.request_alignment = qcrypto_block_get_sector_size(s->crypto);
|
2016-06-24 01:37:15 +03:00
|
|
|
}
|
2020-07-10 19:13:10 +03:00
|
|
|
bs->bl.pwrite_zeroes_alignment = s->subcluster_size;
|
2016-11-17 23:13:55 +03:00
|
|
|
bs->bl.pdiscard_alignment = s->cluster_size;
|
2013-12-11 22:26:16 +04:00
|
|
|
}
|
|
|
|
|
2012-09-20 23:13:28 +04:00
|
|
|
static int qcow2_reopen_prepare(BDRVReopenState *state,
|
|
|
|
BlockReopenQueue *queue, Error **errp)
|
|
|
|
{
|
2021-07-08 14:47:04 +03:00
|
|
|
BDRVQcow2State *s = state->bs->opaque;
|
2015-04-16 14:42:27 +03:00
|
|
|
Qcow2ReopenState *r;
|
2014-04-03 15:47:50 +04:00
|
|
|
int ret;
|
|
|
|
|
2015-04-16 14:42:27 +03:00
|
|
|
r = g_new0(Qcow2ReopenState, 1);
|
|
|
|
state->opaque = r;
|
|
|
|
|
|
|
|
ret = qcow2_update_options_prepare(state->bs, r, state->options,
|
|
|
|
state->flags, errp);
|
|
|
|
if (ret < 0) {
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* We need to write out any unwritten data if we reopen read-only. */
|
2014-04-03 15:47:50 +04:00
|
|
|
if ((state->flags & BDRV_O_RDWR) == 0) {
|
2017-06-28 15:05:20 +03:00
|
|
|
ret = qcow2_reopen_bitmaps_ro(state->bs, errp);
|
|
|
|
if (ret < 0) {
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
|
2014-04-03 15:47:50 +04:00
|
|
|
ret = bdrv_flush(state->bs);
|
|
|
|
if (ret < 0) {
|
2015-04-16 14:42:27 +03:00
|
|
|
goto fail;
|
2014-04-03 15:47:50 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
ret = qcow2_mark_clean(state->bs);
|
|
|
|
if (ret < 0) {
|
2015-04-16 14:42:27 +03:00
|
|
|
goto fail;
|
2014-04-03 15:47:50 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-07-08 14:47:04 +03:00
|
|
|
/*
|
|
|
|
* Without an external data file, s->data_file points to the same BdrvChild
|
|
|
|
* as bs->file. It needs to be resynced after reopen because bs->file may
|
|
|
|
* be changed. We can't use it in the meantime.
|
|
|
|
*/
|
|
|
|
if (!has_data_file(state->bs)) {
|
|
|
|
assert(s->data_file == state->bs->file);
|
|
|
|
s->data_file = NULL;
|
|
|
|
}
|
|
|
|
|
2012-09-20 23:13:28 +04:00
|
|
|
return 0;
|
2015-04-16 14:42:27 +03:00
|
|
|
|
|
|
|
fail:
|
|
|
|
qcow2_update_options_abort(state->bs, r);
|
|
|
|
g_free(r);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void qcow2_reopen_commit(BDRVReopenState *state)
|
|
|
|
{
|
2021-07-08 14:47:04 +03:00
|
|
|
BDRVQcow2State *s = state->bs->opaque;
|
|
|
|
|
2015-04-16 14:42:27 +03:00
|
|
|
qcow2_update_options_commit(state->bs, state->opaque);
|
2021-07-08 14:47:04 +03:00
|
|
|
if (!s->data_file) {
|
|
|
|
/*
|
|
|
|
* If we don't have an external data file, s->data_file was cleared by
|
|
|
|
* qcow2_reopen_prepare() and needs to be updated.
|
|
|
|
*/
|
|
|
|
s->data_file = state->bs->file;
|
|
|
|
}
|
2020-02-28 15:44:47 +03:00
|
|
|
g_free(state->opaque);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void qcow2_reopen_commit_post(BDRVReopenState *state)
|
|
|
|
{
|
2019-09-27 15:23:55 +03:00
|
|
|
if (state->flags & BDRV_O_RDWR) {
|
|
|
|
Error *local_err = NULL;
|
|
|
|
|
|
|
|
if (qcow2_reopen_bitmaps_rw(state->bs, &local_err) < 0) {
|
|
|
|
/*
|
|
|
|
* This is not fatal, bitmaps just left read-only, so all following
|
|
|
|
* writes will fail. User can remove read-only bitmaps to unblock
|
|
|
|
* writes or retry reopen.
|
|
|
|
*/
|
|
|
|
error_reportf_err(local_err,
|
|
|
|
"%s: Failed to make dirty bitmaps writable: ",
|
|
|
|
bdrv_get_node_name(state->bs));
|
|
|
|
}
|
|
|
|
}
|
2015-04-16 14:42:27 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
static void qcow2_reopen_abort(BDRVReopenState *state)
|
|
|
|
{
|
2021-07-08 14:47:04 +03:00
|
|
|
BDRVQcow2State *s = state->bs->opaque;
|
|
|
|
|
|
|
|
if (!s->data_file) {
|
|
|
|
/*
|
|
|
|
* If we don't have an external data file, s->data_file was cleared by
|
|
|
|
* qcow2_reopen_prepare() and needs to be restored.
|
|
|
|
*/
|
|
|
|
s->data_file = state->bs->file;
|
|
|
|
}
|
2015-04-16 14:42:27 +03:00
|
|
|
qcow2_update_options_abort(state->bs, state->opaque);
|
|
|
|
g_free(state->opaque);
|
2012-09-20 23:13:28 +04:00
|
|
|
}
|
|
|
|
|
2015-11-16 17:34:59 +03:00
|
|
|
static void qcow2_join_options(QDict *options, QDict *old_options)
|
|
|
|
{
|
|
|
|
bool has_new_overlap_template =
|
|
|
|
qdict_haskey(options, QCOW2_OPT_OVERLAP) ||
|
|
|
|
qdict_haskey(options, QCOW2_OPT_OVERLAP_TEMPLATE);
|
|
|
|
bool has_new_total_cache_size =
|
|
|
|
qdict_haskey(options, QCOW2_OPT_CACHE_SIZE);
|
|
|
|
bool has_all_cache_options;
|
|
|
|
|
|
|
|
/* New overlap template overrides all old overlap options */
|
|
|
|
if (has_new_overlap_template) {
|
|
|
|
qdict_del(old_options, QCOW2_OPT_OVERLAP);
|
|
|
|
qdict_del(old_options, QCOW2_OPT_OVERLAP_TEMPLATE);
|
|
|
|
qdict_del(old_options, QCOW2_OPT_OVERLAP_MAIN_HEADER);
|
|
|
|
qdict_del(old_options, QCOW2_OPT_OVERLAP_ACTIVE_L1);
|
|
|
|
qdict_del(old_options, QCOW2_OPT_OVERLAP_ACTIVE_L2);
|
|
|
|
qdict_del(old_options, QCOW2_OPT_OVERLAP_REFCOUNT_TABLE);
|
|
|
|
qdict_del(old_options, QCOW2_OPT_OVERLAP_REFCOUNT_BLOCK);
|
|
|
|
qdict_del(old_options, QCOW2_OPT_OVERLAP_SNAPSHOT_TABLE);
|
|
|
|
qdict_del(old_options, QCOW2_OPT_OVERLAP_INACTIVE_L1);
|
|
|
|
qdict_del(old_options, QCOW2_OPT_OVERLAP_INACTIVE_L2);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* New total cache size overrides all old options */
|
|
|
|
if (qdict_haskey(options, QCOW2_OPT_CACHE_SIZE)) {
|
|
|
|
qdict_del(old_options, QCOW2_OPT_L2_CACHE_SIZE);
|
|
|
|
qdict_del(old_options, QCOW2_OPT_REFCOUNT_CACHE_SIZE);
|
|
|
|
}
|
|
|
|
|
|
|
|
qdict_join(options, old_options, false);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If after merging all cache size options are set, an old total size is
|
|
|
|
* overwritten. Do keep all options, however, if all three are new. The
|
|
|
|
* resulting error message is what we want to happen.
|
|
|
|
*/
|
|
|
|
has_all_cache_options =
|
|
|
|
qdict_haskey(options, QCOW2_OPT_CACHE_SIZE) ||
|
|
|
|
qdict_haskey(options, QCOW2_OPT_L2_CACHE_SIZE) ||
|
|
|
|
qdict_haskey(options, QCOW2_OPT_REFCOUNT_CACHE_SIZE);
|
|
|
|
|
|
|
|
if (has_all_cache_options && !has_new_total_cache_size) {
|
|
|
|
qdict_del(options, QCOW2_OPT_CACHE_SIZE);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-02-13 23:26:52 +03:00
|
|
|
static int coroutine_fn qcow2_co_block_status(BlockDriverState *bs,
|
|
|
|
bool want_zero,
|
|
|
|
int64_t offset, int64_t count,
|
|
|
|
int64_t *pnum, int64_t *map,
|
|
|
|
BlockDriverState **file)
|
2006-08-06 01:14:20 +04:00
|
|
|
{
|
2015-09-07 18:12:56 +03:00
|
|
|
BDRVQcow2State *s = bs->opaque;
|
2020-07-10 19:12:44 +03:00
|
|
|
uint64_t host_offset;
|
2016-05-31 17:13:07 +03:00
|
|
|
unsigned int bytes;
|
2020-07-10 19:13:00 +03:00
|
|
|
QCow2SubclusterType type;
|
2019-12-12 13:01:21 +03:00
|
|
|
int ret, status = 0;
|
2006-08-06 01:14:20 +04:00
|
|
|
|
2019-10-24 17:26:58 +03:00
|
|
|
qemu_co_mutex_lock(&s->lock);
|
|
|
|
|
block: avoid recursive block_status call if possible
drv_co_block_status digs bs->file for additional, more accurate search
for hole inside region, reported as DATA by bs since 5daa74a6ebc.
This accuracy is not free: assume we have qcow2 disk. Actually, qcow2
knows, where are holes and where is data. But every block_status
request calls lseek additionally. Assume a big disk, full of
data, in any iterative copying block job (or img convert) we'll call
lseek(HOLE) on every iteration, and each of these lseeks will have to
iterate through all metadata up to the end of file. It's obviously
ineffective behavior. And for many scenarios we don't need this lseek
at all.
However, lseek is needed when we have metadata-preallocated image.
So, let's detect metadata-preallocation case and don't dig qcow2's
protocol file in other cases.
The idea is to compare allocation size in POV of filesystem with
allocations size in POV of Qcow2 (by refcounts). If allocation in fs is
significantly lower, consider it as metadata-preallocation case.
102 iotest changed, as our detector can't detect shrinked file as
metadata-preallocation, which don't seem to be wrong, as with metadata
preallocation we always have valid file length.
Two other iotests have a slight change in their QMP output sequence:
Active 'block-commit' returns earlier because the job coroutine yields
earlier on a blocking operation. This operation is loading the refcount
blocks in qcow2_detect_metadata_preallocation().
Suggested-by: Denis V. Lunev <den@openvz.org>
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2019-04-08 19:26:17 +03:00
|
|
|
if (!s->metadata_preallocation_checked) {
|
|
|
|
ret = qcow2_detect_metadata_preallocation(bs);
|
|
|
|
s->metadata_preallocation = (ret == 1);
|
|
|
|
s->metadata_preallocation_checked = true;
|
|
|
|
}
|
|
|
|
|
2018-02-13 23:26:52 +03:00
|
|
|
bytes = MIN(INT_MAX, count);
|
2020-07-10 19:12:59 +03:00
|
|
|
ret = qcow2_get_host_offset(bs, offset, &bytes, &host_offset, &type);
|
2011-11-14 16:44:21 +04:00
|
|
|
qemu_co_mutex_unlock(&s->lock);
|
2010-05-21 19:59:36 +04:00
|
|
|
if (ret < 0) {
|
2013-09-04 21:00:25 +04:00
|
|
|
return ret;
|
2010-05-21 19:59:36 +04:00
|
|
|
}
|
2008-08-14 22:10:28 +04:00
|
|
|
|
2018-02-13 23:26:52 +03:00
|
|
|
*pnum = bytes;
|
2016-05-31 17:13:07 +03:00
|
|
|
|
2020-07-10 19:13:00 +03:00
|
|
|
if ((type == QCOW2_SUBCLUSTER_NORMAL ||
|
2020-07-10 19:13:01 +03:00
|
|
|
type == QCOW2_SUBCLUSTER_ZERO_ALLOC ||
|
|
|
|
type == QCOW2_SUBCLUSTER_UNALLOCATED_ALLOC) && !s->crypto) {
|
2020-07-10 19:12:44 +03:00
|
|
|
*map = host_offset;
|
2019-02-27 15:22:56 +03:00
|
|
|
*file = s->data_file->bs;
|
2018-02-13 23:26:52 +03:00
|
|
|
status |= BDRV_BLOCK_OFFSET_VALID;
|
2013-09-04 21:00:30 +04:00
|
|
|
}
|
2020-07-10 19:13:00 +03:00
|
|
|
if (type == QCOW2_SUBCLUSTER_ZERO_PLAIN ||
|
|
|
|
type == QCOW2_SUBCLUSTER_ZERO_ALLOC) {
|
2013-09-04 21:00:30 +04:00
|
|
|
status |= BDRV_BLOCK_ZERO;
|
2020-07-10 19:13:01 +03:00
|
|
|
} else if (type != QCOW2_SUBCLUSTER_UNALLOCATED_PLAIN &&
|
|
|
|
type != QCOW2_SUBCLUSTER_UNALLOCATED_ALLOC) {
|
2013-09-04 21:00:30 +04:00
|
|
|
status |= BDRV_BLOCK_DATA;
|
|
|
|
}
|
block: avoid recursive block_status call if possible
drv_co_block_status digs bs->file for additional, more accurate search
for hole inside region, reported as DATA by bs since 5daa74a6ebc.
This accuracy is not free: assume we have qcow2 disk. Actually, qcow2
knows, where are holes and where is data. But every block_status
request calls lseek additionally. Assume a big disk, full of
data, in any iterative copying block job (or img convert) we'll call
lseek(HOLE) on every iteration, and each of these lseeks will have to
iterate through all metadata up to the end of file. It's obviously
ineffective behavior. And for many scenarios we don't need this lseek
at all.
However, lseek is needed when we have metadata-preallocated image.
So, let's detect metadata-preallocation case and don't dig qcow2's
protocol file in other cases.
The idea is to compare allocation size in POV of filesystem with
allocations size in POV of Qcow2 (by refcounts). If allocation in fs is
significantly lower, consider it as metadata-preallocation case.
102 iotest changed, as our detector can't detect shrinked file as
metadata-preallocation, which don't seem to be wrong, as with metadata
preallocation we always have valid file length.
Two other iotests have a slight change in their QMP output sequence:
Active 'block-commit' returns earlier because the job coroutine yields
earlier on a blocking operation. This operation is loading the refcount
blocks in qcow2_detect_metadata_preallocation().
Suggested-by: Denis V. Lunev <den@openvz.org>
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2019-04-08 19:26:17 +03:00
|
|
|
if (s->metadata_preallocation && (status & BDRV_BLOCK_DATA) &&
|
|
|
|
(status & BDRV_BLOCK_OFFSET_VALID))
|
|
|
|
{
|
|
|
|
status |= BDRV_BLOCK_RECURSE;
|
|
|
|
}
|
2013-09-04 21:00:30 +04:00
|
|
|
return status;
|
2006-08-06 01:14:20 +04:00
|
|
|
}
|
|
|
|
|
2018-06-01 12:26:42 +03:00
|
|
|
static coroutine_fn int qcow2_handle_l2meta(BlockDriverState *bs,
|
|
|
|
QCowL2Meta **pl2meta,
|
|
|
|
bool link_l2)
|
|
|
|
{
|
|
|
|
int ret = 0;
|
|
|
|
QCowL2Meta *l2meta = *pl2meta;
|
|
|
|
|
|
|
|
while (l2meta != NULL) {
|
|
|
|
QCowL2Meta *next;
|
|
|
|
|
2018-06-27 06:57:51 +03:00
|
|
|
if (link_l2) {
|
2018-06-01 12:26:42 +03:00
|
|
|
ret = qcow2_alloc_cluster_link_l2(bs, l2meta);
|
|
|
|
if (ret) {
|
|
|
|
goto out;
|
|
|
|
}
|
2018-06-28 18:05:45 +03:00
|
|
|
} else {
|
|
|
|
qcow2_alloc_cluster_abort(bs, l2meta);
|
2018-06-01 12:26:42 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Take the request off the list of running requests */
|
2020-09-03 19:37:48 +03:00
|
|
|
QLIST_REMOVE(l2meta, next_in_flight);
|
2018-06-01 12:26:42 +03:00
|
|
|
|
|
|
|
qemu_co_queue_restart_all(&l2meta->dependent_requests);
|
|
|
|
|
|
|
|
next = l2meta->next;
|
|
|
|
g_free(l2meta);
|
|
|
|
l2meta = next;
|
|
|
|
}
|
|
|
|
out:
|
|
|
|
*pl2meta = l2meta;
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2019-09-16 20:53:22 +03:00
|
|
|
static coroutine_fn int
|
|
|
|
qcow2_co_preadv_encrypted(BlockDriverState *bs,
|
2020-07-10 19:12:43 +03:00
|
|
|
uint64_t host_offset,
|
2019-09-16 20:53:22 +03:00
|
|
|
uint64_t offset,
|
|
|
|
uint64_t bytes,
|
|
|
|
QEMUIOVector *qiov,
|
|
|
|
uint64_t qiov_offset)
|
|
|
|
{
|
|
|
|
int ret;
|
|
|
|
BDRVQcow2State *s = bs->opaque;
|
|
|
|
uint8_t *buf;
|
|
|
|
|
|
|
|
assert(bs->encrypted && s->crypto);
|
|
|
|
assert(bytes <= QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* For encrypted images, read everything into a temporary
|
|
|
|
* contiguous buffer on which the AES functions can work.
|
|
|
|
* Also, decryption in a separate buffer is better as it
|
|
|
|
* prevents the guest from learning information about the
|
|
|
|
* encrypted nature of the virtual disk.
|
|
|
|
*/
|
|
|
|
|
|
|
|
buf = qemu_try_blockalign(s->data_file->bs, bytes);
|
|
|
|
if (buf == NULL) {
|
|
|
|
return -ENOMEM;
|
|
|
|
}
|
|
|
|
|
|
|
|
BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
|
2020-07-10 19:12:43 +03:00
|
|
|
ret = bdrv_co_pread(s->data_file, host_offset, bytes, buf, 0);
|
2019-09-16 20:53:22 +03:00
|
|
|
if (ret < 0) {
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
|
2020-07-10 19:12:43 +03:00
|
|
|
if (qcow2_co_decrypt(bs, host_offset, offset, buf, bytes) < 0)
|
2019-09-16 20:53:22 +03:00
|
|
|
{
|
|
|
|
ret = -EIO;
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
qemu_iovec_from_buf(qiov, qiov_offset, buf, bytes);
|
|
|
|
|
|
|
|
fail:
|
|
|
|
qemu_vfree(buf);
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2019-09-16 20:53:24 +03:00
|
|
|
typedef struct Qcow2AioTask {
|
|
|
|
AioTask task;
|
|
|
|
|
|
|
|
BlockDriverState *bs;
|
2020-07-10 19:13:00 +03:00
|
|
|
QCow2SubclusterType subcluster_type; /* only for read */
|
2021-09-14 15:24:46 +03:00
|
|
|
uint64_t host_offset; /* or l2_entry for compressed read */
|
2019-09-16 20:53:24 +03:00
|
|
|
uint64_t offset;
|
|
|
|
uint64_t bytes;
|
|
|
|
QEMUIOVector *qiov;
|
|
|
|
uint64_t qiov_offset;
|
|
|
|
QCowL2Meta *l2meta; /* only for write */
|
|
|
|
} Qcow2AioTask;
|
|
|
|
|
|
|
|
static coroutine_fn int qcow2_co_preadv_task_entry(AioTask *task);
|
|
|
|
static coroutine_fn int qcow2_add_task(BlockDriverState *bs,
|
|
|
|
AioTaskPool *pool,
|
|
|
|
AioTaskFunc func,
|
2020-07-10 19:13:00 +03:00
|
|
|
QCow2SubclusterType subcluster_type,
|
2020-07-10 19:12:43 +03:00
|
|
|
uint64_t host_offset,
|
2019-09-16 20:53:24 +03:00
|
|
|
uint64_t offset,
|
|
|
|
uint64_t bytes,
|
|
|
|
QEMUIOVector *qiov,
|
|
|
|
size_t qiov_offset,
|
|
|
|
QCowL2Meta *l2meta)
|
|
|
|
{
|
|
|
|
Qcow2AioTask local_task;
|
|
|
|
Qcow2AioTask *task = pool ? g_new(Qcow2AioTask, 1) : &local_task;
|
|
|
|
|
|
|
|
*task = (Qcow2AioTask) {
|
|
|
|
.task.func = func,
|
|
|
|
.bs = bs,
|
2020-07-10 19:13:00 +03:00
|
|
|
.subcluster_type = subcluster_type,
|
2019-09-16 20:53:24 +03:00
|
|
|
.qiov = qiov,
|
2020-07-10 19:12:43 +03:00
|
|
|
.host_offset = host_offset,
|
2019-09-16 20:53:24 +03:00
|
|
|
.offset = offset,
|
|
|
|
.bytes = bytes,
|
|
|
|
.qiov_offset = qiov_offset,
|
|
|
|
.l2meta = l2meta,
|
|
|
|
};
|
|
|
|
|
|
|
|
trace_qcow2_add_task(qemu_coroutine_self(), bs, pool,
|
|
|
|
func == qcow2_co_preadv_task_entry ? "read" : "write",
|
2020-07-10 19:13:00 +03:00
|
|
|
subcluster_type, host_offset, offset, bytes,
|
2019-09-16 20:53:24 +03:00
|
|
|
qiov, qiov_offset);
|
|
|
|
|
|
|
|
if (!pool) {
|
|
|
|
return func(&task->task);
|
|
|
|
}
|
|
|
|
|
|
|
|
aio_task_pool_start_task(pool, &task->task);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2019-09-16 20:53:22 +03:00
|
|
|
static coroutine_fn int qcow2_co_preadv_task(BlockDriverState *bs,
|
2020-07-10 19:13:00 +03:00
|
|
|
QCow2SubclusterType subc_type,
|
2020-07-10 19:12:43 +03:00
|
|
|
uint64_t host_offset,
|
2019-09-16 20:53:22 +03:00
|
|
|
uint64_t offset, uint64_t bytes,
|
|
|
|
QEMUIOVector *qiov,
|
|
|
|
size_t qiov_offset)
|
|
|
|
{
|
|
|
|
BDRVQcow2State *s = bs->opaque;
|
|
|
|
|
2020-07-10 19:13:00 +03:00
|
|
|
switch (subc_type) {
|
|
|
|
case QCOW2_SUBCLUSTER_ZERO_PLAIN:
|
|
|
|
case QCOW2_SUBCLUSTER_ZERO_ALLOC:
|
2019-09-16 20:53:22 +03:00
|
|
|
/* Both zero types are handled in qcow2_co_preadv_part */
|
|
|
|
g_assert_not_reached();
|
|
|
|
|
2020-07-10 19:13:00 +03:00
|
|
|
case QCOW2_SUBCLUSTER_UNALLOCATED_PLAIN:
|
2020-07-10 19:13:01 +03:00
|
|
|
case QCOW2_SUBCLUSTER_UNALLOCATED_ALLOC:
|
2019-09-16 20:53:22 +03:00
|
|
|
assert(bs->backing); /* otherwise handled in qcow2_co_preadv_part */
|
|
|
|
|
|
|
|
BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO);
|
|
|
|
return bdrv_co_preadv_part(bs->backing, offset, bytes,
|
|
|
|
qiov, qiov_offset, 0);
|
|
|
|
|
2020-07-10 19:13:00 +03:00
|
|
|
case QCOW2_SUBCLUSTER_COMPRESSED:
|
2020-07-10 19:12:43 +03:00
|
|
|
return qcow2_co_preadv_compressed(bs, host_offset,
|
2019-09-16 20:53:22 +03:00
|
|
|
offset, bytes, qiov, qiov_offset);
|
|
|
|
|
2020-07-10 19:13:00 +03:00
|
|
|
case QCOW2_SUBCLUSTER_NORMAL:
|
2019-09-16 20:53:22 +03:00
|
|
|
if (bs->encrypted) {
|
2020-07-10 19:12:43 +03:00
|
|
|
return qcow2_co_preadv_encrypted(bs, host_offset,
|
2019-09-16 20:53:22 +03:00
|
|
|
offset, bytes, qiov, qiov_offset);
|
|
|
|
}
|
|
|
|
|
|
|
|
BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
|
2020-07-10 19:12:43 +03:00
|
|
|
return bdrv_co_preadv_part(s->data_file, host_offset,
|
2019-09-16 20:53:22 +03:00
|
|
|
bytes, qiov, qiov_offset, 0);
|
|
|
|
|
|
|
|
default:
|
|
|
|
g_assert_not_reached();
|
|
|
|
}
|
|
|
|
|
|
|
|
g_assert_not_reached();
|
|
|
|
}
|
|
|
|
|
2019-09-16 20:53:24 +03:00
|
|
|
static coroutine_fn int qcow2_co_preadv_task_entry(AioTask *task)
|
|
|
|
{
|
|
|
|
Qcow2AioTask *t = container_of(task, Qcow2AioTask, task);
|
|
|
|
|
|
|
|
assert(!t->l2meta);
|
|
|
|
|
2020-07-10 19:13:00 +03:00
|
|
|
return qcow2_co_preadv_task(t->bs, t->subcluster_type,
|
|
|
|
t->host_offset, t->offset, t->bytes,
|
|
|
|
t->qiov, t->qiov_offset);
|
2019-09-16 20:53:24 +03:00
|
|
|
}
|
|
|
|
|
2019-06-04 19:15:13 +03:00
|
|
|
static coroutine_fn int qcow2_co_preadv_part(BlockDriverState *bs,
|
block: use int64_t instead of uint64_t in driver read handlers
We are generally moving to int64_t for both offset and bytes parameters
on all io paths.
Main motivation is realization of 64-bit write_zeroes operation for
fast zeroing large disk chunks, up to the whole disk.
We chose signed type, to be consistent with off_t (which is signed) and
with possibility for signed return type (where negative value means
error).
So, convert driver read handlers parameters which are already 64bit to
signed type.
While being here, convert also flags parameter to be BdrvRequestFlags.
Now let's consider all callers. Simple
git grep '\->bdrv_\(aio\|co\)_preadv\(_part\)\?'
shows that's there three callers of driver function:
bdrv_driver_preadv() in block/io.c, passes int64_t, checked by
bdrv_check_qiov_request() to be non-negative.
qcow2_load_vmstate() does bdrv_check_qiov_request().
do_perform_cow_read() has uint64_t argument. And a lot of things in
qcow2 driver are uint64_t, so converting it is big job. But we must
not work with requests that don't satisfy bdrv_check_qiov_request(),
so let's just assert it here.
Still, the functions may be called directly, not only by drv->...
Let's check:
git grep '\.bdrv_\(aio\|co\)_preadv\(_part\)\?\s*=' | \
awk '{print $4}' | sed 's/,//' | sed 's/&//' | sort | uniq | \
while read func; do git grep "$func(" | \
grep -v "$func(BlockDriverState"; done
The only one such caller:
QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, &data, 1);
...
ret = bdrv_replace_test_co_preadv(bs, 0, 1, &qiov, 0);
in tests/unit/test-bdrv-drain.c, and it's OK obviously.
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Message-Id: <20210903102807.27127-4-vsementsov@virtuozzo.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
[eblake: fix typos]
Signed-off-by: Eric Blake <eblake@redhat.com>
2021-09-03 13:27:59 +03:00
|
|
|
int64_t offset, int64_t bytes,
|
2019-06-04 19:15:13 +03:00
|
|
|
QEMUIOVector *qiov,
|
block: use int64_t instead of uint64_t in driver read handlers
We are generally moving to int64_t for both offset and bytes parameters
on all io paths.
Main motivation is realization of 64-bit write_zeroes operation for
fast zeroing large disk chunks, up to the whole disk.
We chose signed type, to be consistent with off_t (which is signed) and
with possibility for signed return type (where negative value means
error).
So, convert driver read handlers parameters which are already 64bit to
signed type.
While being here, convert also flags parameter to be BdrvRequestFlags.
Now let's consider all callers. Simple
git grep '\->bdrv_\(aio\|co\)_preadv\(_part\)\?'
shows that's there three callers of driver function:
bdrv_driver_preadv() in block/io.c, passes int64_t, checked by
bdrv_check_qiov_request() to be non-negative.
qcow2_load_vmstate() does bdrv_check_qiov_request().
do_perform_cow_read() has uint64_t argument. And a lot of things in
qcow2 driver are uint64_t, so converting it is big job. But we must
not work with requests that don't satisfy bdrv_check_qiov_request(),
so let's just assert it here.
Still, the functions may be called directly, not only by drv->...
Let's check:
git grep '\.bdrv_\(aio\|co\)_preadv\(_part\)\?\s*=' | \
awk '{print $4}' | sed 's/,//' | sed 's/&//' | sort | uniq | \
while read func; do git grep "$func(" | \
grep -v "$func(BlockDriverState"; done
The only one such caller:
QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, &data, 1);
...
ret = bdrv_replace_test_co_preadv(bs, 0, 1, &qiov, 0);
in tests/unit/test-bdrv-drain.c, and it's OK obviously.
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Message-Id: <20210903102807.27127-4-vsementsov@virtuozzo.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
[eblake: fix typos]
Signed-off-by: Eric Blake <eblake@redhat.com>
2021-09-03 13:27:59 +03:00
|
|
|
size_t qiov_offset,
|
|
|
|
BdrvRequestFlags flags)
|
2006-08-06 01:14:20 +04:00
|
|
|
{
|
2015-09-07 18:12:56 +03:00
|
|
|
BDRVQcow2State *s = bs->opaque;
|
2019-09-16 20:53:24 +03:00
|
|
|
int ret = 0;
|
2016-05-31 17:13:07 +03:00
|
|
|
unsigned int cur_bytes; /* number of bytes in current iteration */
|
2020-07-10 19:12:44 +03:00
|
|
|
uint64_t host_offset = 0;
|
2020-07-10 19:13:00 +03:00
|
|
|
QCow2SubclusterType type;
|
2019-09-16 20:53:24 +03:00
|
|
|
AioTaskPool *aio = NULL;
|
2006-08-06 01:14:20 +04:00
|
|
|
|
2019-09-16 20:53:24 +03:00
|
|
|
while (bytes != 0 && aio_task_pool_status(aio) == 0) {
|
2011-08-23 17:21:18 +04:00
|
|
|
/* prepare next request */
|
2016-05-31 17:13:07 +03:00
|
|
|
cur_bytes = MIN(bytes, INT_MAX);
|
2017-06-23 19:24:10 +03:00
|
|
|
if (s->crypto) {
|
2016-05-31 17:13:07 +03:00
|
|
|
cur_bytes = MIN(cur_bytes,
|
|
|
|
QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size);
|
2006-08-06 01:14:20 +04:00
|
|
|
}
|
2011-08-23 17:21:18 +04:00
|
|
|
|
2019-05-06 17:27:39 +03:00
|
|
|
qemu_co_mutex_lock(&s->lock);
|
2020-07-10 19:12:59 +03:00
|
|
|
ret = qcow2_get_host_offset(bs, offset, &cur_bytes,
|
|
|
|
&host_offset, &type);
|
2019-05-06 17:27:39 +03:00
|
|
|
qemu_co_mutex_unlock(&s->lock);
|
2011-02-09 12:26:06 +03:00
|
|
|
if (ret < 0) {
|
2019-09-16 20:53:24 +03:00
|
|
|
goto out;
|
2011-02-09 12:26:06 +03:00
|
|
|
}
|
2010-09-13 20:08:52 +04:00
|
|
|
|
2020-07-10 19:13:00 +03:00
|
|
|
if (type == QCOW2_SUBCLUSTER_ZERO_PLAIN ||
|
|
|
|
type == QCOW2_SUBCLUSTER_ZERO_ALLOC ||
|
2020-07-10 19:13:01 +03:00
|
|
|
(type == QCOW2_SUBCLUSTER_UNALLOCATED_PLAIN && !bs->backing) ||
|
|
|
|
(type == QCOW2_SUBCLUSTER_UNALLOCATED_ALLOC && !bs->backing))
|
2019-09-16 20:53:22 +03:00
|
|
|
{
|
2019-06-04 19:15:13 +03:00
|
|
|
qemu_iovec_memset(qiov, qiov_offset, 0, cur_bytes);
|
2019-09-16 20:53:22 +03:00
|
|
|
} else {
|
2019-09-16 20:53:24 +03:00
|
|
|
if (!aio && cur_bytes != bytes) {
|
|
|
|
aio = aio_task_pool_new(QCOW2_MAX_WORKERS);
|
|
|
|
}
|
2020-07-10 19:12:59 +03:00
|
|
|
ret = qcow2_add_task(bs, aio, qcow2_co_preadv_task_entry, type,
|
2020-07-10 19:12:43 +03:00
|
|
|
host_offset, offset, cur_bytes,
|
2019-09-16 20:53:24 +03:00
|
|
|
qiov, qiov_offset, NULL);
|
2011-08-23 17:21:18 +04:00
|
|
|
if (ret < 0) {
|
2019-09-16 20:53:24 +03:00
|
|
|
goto out;
|
2011-08-23 17:21:18 +04:00
|
|
|
}
|
2011-08-23 17:21:14 +04:00
|
|
|
}
|
2009-04-07 22:43:24 +04:00
|
|
|
|
2016-05-31 17:13:07 +03:00
|
|
|
bytes -= cur_bytes;
|
|
|
|
offset += cur_bytes;
|
2019-06-04 19:15:13 +03:00
|
|
|
qiov_offset += cur_bytes;
|
2011-08-23 17:21:18 +04:00
|
|
|
}
|
2011-06-30 19:42:09 +04:00
|
|
|
|
2019-09-16 20:53:24 +03:00
|
|
|
out:
|
|
|
|
if (aio) {
|
|
|
|
aio_task_pool_wait_all(aio);
|
|
|
|
if (ret == 0) {
|
|
|
|
ret = aio_task_pool_status(aio);
|
|
|
|
}
|
|
|
|
g_free(aio);
|
|
|
|
}
|
|
|
|
|
|
|
|
return ret;
|
2006-08-06 01:14:20 +04:00
|
|
|
}
|
|
|
|
|
2017-06-19 16:40:08 +03:00
|
|
|
/* Check if it's possible to merge a write request with the writing of
|
|
|
|
* the data from the COW regions */
|
|
|
|
static bool merge_cow(uint64_t offset, unsigned bytes,
|
2019-06-04 19:15:14 +03:00
|
|
|
QEMUIOVector *qiov, size_t qiov_offset,
|
|
|
|
QCowL2Meta *l2meta)
|
2017-06-19 16:40:08 +03:00
|
|
|
{
|
|
|
|
QCowL2Meta *m;
|
|
|
|
|
|
|
|
for (m = l2meta; m != NULL; m = m->next) {
|
|
|
|
/* If both COW regions are empty then there's nothing to merge */
|
|
|
|
if (m->cow_start.nb_bytes == 0 && m->cow_end.nb_bytes == 0) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2019-05-16 17:27:49 +03:00
|
|
|
/* If COW regions are handled already, skip this too */
|
|
|
|
if (m->skip_cow) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2020-10-07 19:13:23 +03:00
|
|
|
/*
|
|
|
|
* The write request should start immediately after the first
|
|
|
|
* COW region. This does not always happen because the area
|
|
|
|
* touched by the request can be larger than the one defined
|
|
|
|
* by @m (a single request can span an area consisting of a
|
|
|
|
* mix of previously unallocated and allocated clusters, that
|
|
|
|
* is why @l2meta is a list).
|
|
|
|
*/
|
2017-06-19 16:40:08 +03:00
|
|
|
if (l2meta_cow_start(m) + m->cow_start.nb_bytes != offset) {
|
2020-10-07 19:13:23 +03:00
|
|
|
/* In this case the request starts before this region */
|
|
|
|
assert(offset < l2meta_cow_start(m));
|
|
|
|
assert(m->cow_start.nb_bytes == 0);
|
2017-06-19 16:40:08 +03:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2020-10-07 19:13:23 +03:00
|
|
|
/* The write request should end immediately before the second
|
|
|
|
* COW region (see above for why it does not always happen) */
|
2017-06-19 16:40:08 +03:00
|
|
|
if (m->offset + m->cow_end.offset != offset + bytes) {
|
2020-10-07 19:13:23 +03:00
|
|
|
assert(offset + bytes > m->offset + m->cow_end.offset);
|
|
|
|
assert(m->cow_end.nb_bytes == 0);
|
2017-06-19 16:40:08 +03:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Make sure that adding both COW regions to the QEMUIOVector
|
|
|
|
* does not exceed IOV_MAX */
|
2019-06-04 19:15:14 +03:00
|
|
|
if (qemu_iovec_subvec_niov(qiov, qiov_offset, bytes) > IOV_MAX - 2) {
|
2017-06-19 16:40:08 +03:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2019-06-04 19:15:14 +03:00
|
|
|
m->data_qiov = qiov;
|
|
|
|
m->data_qiov_offset = qiov_offset;
|
2017-06-19 16:40:08 +03:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2020-10-26 19:58:27 +03:00
|
|
|
/*
|
|
|
|
* Return 1 if the COW regions read as zeroes, 0 if not, < 0 on error.
|
|
|
|
* Note that returning 0 does not guarantee non-zero data.
|
|
|
|
*/
|
|
|
|
static int is_zero_cow(BlockDriverState *bs, QCowL2Meta *m)
|
2019-05-16 17:27:49 +03:00
|
|
|
{
|
|
|
|
/*
|
|
|
|
* This check is designed for optimization shortcut so it must be
|
|
|
|
* efficient.
|
2020-10-26 19:58:27 +03:00
|
|
|
* Instead of is_zero(), use bdrv_co_is_zero_fast() as it is
|
|
|
|
* faster (but not as accurate and can result in false negatives).
|
2019-05-16 17:27:49 +03:00
|
|
|
*/
|
2020-10-26 19:58:27 +03:00
|
|
|
int ret = bdrv_co_is_zero_fast(bs, m->offset + m->cow_start.offset,
|
|
|
|
m->cow_start.nb_bytes);
|
|
|
|
if (ret <= 0) {
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
return bdrv_co_is_zero_fast(bs, m->offset + m->cow_end.offset,
|
|
|
|
m->cow_end.nb_bytes);
|
2019-05-16 17:27:49 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
static int handle_alloc_space(BlockDriverState *bs, QCowL2Meta *l2meta)
|
|
|
|
{
|
|
|
|
BDRVQcow2State *s = bs->opaque;
|
|
|
|
QCowL2Meta *m;
|
|
|
|
|
|
|
|
if (!(s->data_file->bs->supported_zero_flags & BDRV_REQ_NO_FALLBACK)) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (bs->encrypted) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (m = l2meta; m != NULL; m = m->next) {
|
|
|
|
int ret;
|
2020-07-10 19:13:09 +03:00
|
|
|
uint64_t start_offset = m->alloc_offset + m->cow_start.offset;
|
|
|
|
unsigned nb_bytes = m->cow_end.offset + m->cow_end.nb_bytes -
|
|
|
|
m->cow_start.offset;
|
2019-05-16 17:27:49 +03:00
|
|
|
|
|
|
|
if (!m->cow_start.nb_bytes && !m->cow_end.nb_bytes) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2020-10-26 19:58:27 +03:00
|
|
|
ret = is_zero_cow(bs, m);
|
|
|
|
if (ret < 0) {
|
|
|
|
return ret;
|
|
|
|
} else if (ret == 0) {
|
2019-05-16 17:27:49 +03:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* instead of writing zero COW buffers,
|
|
|
|
* efficiently zero out the whole clusters
|
|
|
|
*/
|
|
|
|
|
2020-07-10 19:13:09 +03:00
|
|
|
ret = qcow2_pre_write_overlap_check(bs, 0, start_offset, nb_bytes,
|
2019-05-16 17:27:49 +03:00
|
|
|
true);
|
|
|
|
if (ret < 0) {
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
BLKDBG_EVENT(bs->file, BLKDBG_CLUSTER_ALLOC_SPACE);
|
2020-07-10 19:13:09 +03:00
|
|
|
ret = bdrv_co_pwrite_zeroes(s->data_file, start_offset, nb_bytes,
|
2019-05-16 17:27:49 +03:00
|
|
|
BDRV_REQ_NO_FALLBACK);
|
|
|
|
if (ret < 0) {
|
|
|
|
if (ret != -ENOTSUP && ret != -EAGAIN) {
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
trace_qcow2_skip_cow(qemu_coroutine_self(), m->offset, m->nb_clusters);
|
|
|
|
m->skip_cow = true;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2019-09-16 20:53:23 +03:00
|
|
|
/*
|
|
|
|
* qcow2_co_pwritev_task
|
|
|
|
* Called with s->lock unlocked
|
|
|
|
* l2meta - if not NULL, qcow2_co_pwritev_task() will consume it. Caller must
|
|
|
|
* not use it somehow after qcow2_co_pwritev_task() call
|
|
|
|
*/
|
|
|
|
static coroutine_fn int qcow2_co_pwritev_task(BlockDriverState *bs,
|
2020-07-10 19:12:43 +03:00
|
|
|
uint64_t host_offset,
|
2019-09-16 20:53:23 +03:00
|
|
|
uint64_t offset, uint64_t bytes,
|
|
|
|
QEMUIOVector *qiov,
|
|
|
|
uint64_t qiov_offset,
|
|
|
|
QCowL2Meta *l2meta)
|
|
|
|
{
|
|
|
|
int ret;
|
|
|
|
BDRVQcow2State *s = bs->opaque;
|
|
|
|
void *crypt_buf = NULL;
|
|
|
|
QEMUIOVector encrypted_qiov;
|
|
|
|
|
|
|
|
if (bs->encrypted) {
|
|
|
|
assert(s->crypto);
|
|
|
|
assert(bytes <= QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size);
|
|
|
|
crypt_buf = qemu_try_blockalign(bs->file->bs, bytes);
|
|
|
|
if (crypt_buf == NULL) {
|
|
|
|
ret = -ENOMEM;
|
|
|
|
goto out_unlocked;
|
|
|
|
}
|
|
|
|
qemu_iovec_to_buf(qiov, qiov_offset, crypt_buf, bytes);
|
|
|
|
|
2020-07-10 19:12:43 +03:00
|
|
|
if (qcow2_co_encrypt(bs, host_offset, offset, crypt_buf, bytes) < 0) {
|
2019-09-16 20:53:23 +03:00
|
|
|
ret = -EIO;
|
|
|
|
goto out_unlocked;
|
|
|
|
}
|
|
|
|
|
|
|
|
qemu_iovec_init_buf(&encrypted_qiov, crypt_buf, bytes);
|
|
|
|
qiov = &encrypted_qiov;
|
|
|
|
qiov_offset = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Try to efficiently initialize the physical space with zeroes */
|
|
|
|
ret = handle_alloc_space(bs, l2meta);
|
|
|
|
if (ret < 0) {
|
|
|
|
goto out_unlocked;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If we need to do COW, check if it's possible to merge the
|
|
|
|
* writing of the guest data together with that of the COW regions.
|
|
|
|
* If it's not possible (or not necessary) then write the
|
|
|
|
* guest data now.
|
|
|
|
*/
|
|
|
|
if (!merge_cow(offset, bytes, qiov, qiov_offset, l2meta)) {
|
|
|
|
BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO);
|
2020-07-10 19:12:43 +03:00
|
|
|
trace_qcow2_writev_data(qemu_coroutine_self(), host_offset);
|
|
|
|
ret = bdrv_co_pwritev_part(s->data_file, host_offset,
|
2019-09-16 20:53:23 +03:00
|
|
|
bytes, qiov, qiov_offset, 0);
|
|
|
|
if (ret < 0) {
|
|
|
|
goto out_unlocked;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
qemu_co_mutex_lock(&s->lock);
|
|
|
|
|
|
|
|
ret = qcow2_handle_l2meta(bs, &l2meta, true);
|
|
|
|
goto out_locked;
|
|
|
|
|
|
|
|
out_unlocked:
|
|
|
|
qemu_co_mutex_lock(&s->lock);
|
|
|
|
|
|
|
|
out_locked:
|
|
|
|
qcow2_handle_l2meta(bs, &l2meta, false);
|
|
|
|
qemu_co_mutex_unlock(&s->lock);
|
|
|
|
|
|
|
|
qemu_vfree(crypt_buf);
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2019-09-16 20:53:24 +03:00
|
|
|
static coroutine_fn int qcow2_co_pwritev_task_entry(AioTask *task)
|
|
|
|
{
|
|
|
|
Qcow2AioTask *t = container_of(task, Qcow2AioTask, task);
|
|
|
|
|
2020-07-10 19:13:00 +03:00
|
|
|
assert(!t->subcluster_type);
|
2019-09-16 20:53:24 +03:00
|
|
|
|
2020-07-10 19:12:43 +03:00
|
|
|
return qcow2_co_pwritev_task(t->bs, t->host_offset,
|
2019-09-16 20:53:24 +03:00
|
|
|
t->offset, t->bytes, t->qiov, t->qiov_offset,
|
|
|
|
t->l2meta);
|
|
|
|
}
|
|
|
|
|
2019-06-04 19:15:14 +03:00
|
|
|
static coroutine_fn int qcow2_co_pwritev_part(
|
block: use int64_t instead of uint64_t in driver write handlers
We are generally moving to int64_t for both offset and bytes parameters
on all io paths.
Main motivation is realization of 64-bit write_zeroes operation for
fast zeroing large disk chunks, up to the whole disk.
We chose signed type, to be consistent with off_t (which is signed) and
with possibility for signed return type (where negative value means
error).
So, convert driver write handlers parameters which are already 64bit to
signed type.
While being here, convert also flags parameter to be BdrvRequestFlags.
Now let's consider all callers. Simple
git grep '\->bdrv_\(aio\|co\)_pwritev\(_part\)\?'
shows that's there three callers of driver function:
bdrv_driver_pwritev() and bdrv_driver_pwritev_compressed() in
block/io.c, both pass int64_t, checked by bdrv_check_qiov_request() to
be non-negative.
qcow2_save_vmstate() does bdrv_check_qiov_request().
Still, the functions may be called directly, not only by drv->...
Let's check:
git grep '\.bdrv_\(aio\|co\)_pwritev\(_part\)\?\s*=' | \
awk '{print $4}' | sed 's/,//' | sed 's/&//' | sort | uniq | \
while read func; do git grep "$func(" | \
grep -v "$func(BlockDriverState"; done
shows several callers:
qcow2:
qcow2_co_truncate() write at most up to @offset, which is checked in
generic qcow2_co_truncate() by bdrv_check_request().
qcow2_co_pwritev_compressed_task() pass the request (or part of the
request) that already went through normal write path, so it should
be OK
qcow:
qcow_co_pwritev_compressed() pass int64_t, it's updated by this patch
quorum:
quorum_co_pwrite_zeroes() pass int64_t and int - OK
throttle:
throttle_co_pwritev_compressed() pass int64_t, it's updated by this
patch
vmdk:
vmdk_co_pwritev_compressed() pass int64_t, it's updated by this
patch
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Message-Id: <20210903102807.27127-5-vsementsov@virtuozzo.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Eric Blake <eblake@redhat.com>
2021-09-03 13:28:00 +03:00
|
|
|
BlockDriverState *bs, int64_t offset, int64_t bytes,
|
|
|
|
QEMUIOVector *qiov, size_t qiov_offset, BdrvRequestFlags flags)
|
2006-08-06 01:14:20 +04:00
|
|
|
{
|
2015-09-07 18:12:56 +03:00
|
|
|
BDRVQcow2State *s = bs->opaque;
|
2016-06-01 17:55:05 +03:00
|
|
|
int offset_in_cluster;
|
2011-06-30 19:42:09 +04:00
|
|
|
int ret;
|
2016-06-01 17:55:05 +03:00
|
|
|
unsigned int cur_bytes; /* number of sectors in current iteration */
|
2020-09-11 17:09:42 +03:00
|
|
|
uint64_t host_offset;
|
2013-01-14 20:31:31 +04:00
|
|
|
QCowL2Meta *l2meta = NULL;
|
2019-09-16 20:53:24 +03:00
|
|
|
AioTaskPool *aio = NULL;
|
2011-08-23 17:21:15 +04:00
|
|
|
|
2016-06-01 17:55:05 +03:00
|
|
|
trace_qcow2_writev_start_req(qemu_coroutine_self(), offset, bytes);
|
2012-03-01 21:36:21 +04:00
|
|
|
|
2019-09-16 20:53:24 +03:00
|
|
|
while (bytes != 0 && aio_task_pool_status(aio) == 0) {
|
2011-08-23 17:21:19 +04:00
|
|
|
|
2012-12-07 21:08:46 +04:00
|
|
|
l2meta = NULL;
|
2012-12-07 21:08:44 +04:00
|
|
|
|
2012-03-01 21:36:21 +04:00
|
|
|
trace_qcow2_writev_start_part(qemu_coroutine_self());
|
2016-06-01 17:55:05 +03:00
|
|
|
offset_in_cluster = offset_into_cluster(s, offset);
|
|
|
|
cur_bytes = MIN(bytes, INT_MAX);
|
|
|
|
if (bs->encrypted) {
|
|
|
|
cur_bytes = MIN(cur_bytes,
|
|
|
|
QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size
|
|
|
|
- offset_in_cluster);
|
2011-08-23 17:21:18 +04:00
|
|
|
}
|
2008-08-14 22:10:28 +04:00
|
|
|
|
2019-09-16 20:53:23 +03:00
|
|
|
qemu_co_mutex_lock(&s->lock);
|
|
|
|
|
2020-09-11 17:09:42 +03:00
|
|
|
ret = qcow2_alloc_host_offset(bs, offset, &cur_bytes,
|
|
|
|
&host_offset, &l2meta);
|
2011-08-23 17:21:18 +04:00
|
|
|
if (ret < 0) {
|
2019-05-06 17:27:40 +03:00
|
|
|
goto out_locked;
|
2011-08-23 17:21:18 +04:00
|
|
|
}
|
2010-01-20 17:03:01 +03:00
|
|
|
|
2020-09-11 17:09:42 +03:00
|
|
|
ret = qcow2_pre_write_overlap_check(bs, 0, host_offset,
|
2019-05-06 17:27:40 +03:00
|
|
|
cur_bytes, true);
|
|
|
|
if (ret < 0) {
|
|
|
|
goto out_locked;
|
|
|
|
}
|
|
|
|
|
|
|
|
qemu_co_mutex_unlock(&s->lock);
|
|
|
|
|
2019-09-16 20:53:24 +03:00
|
|
|
if (!aio && cur_bytes != bytes) {
|
|
|
|
aio = aio_task_pool_new(QCOW2_MAX_WORKERS);
|
|
|
|
}
|
|
|
|
ret = qcow2_add_task(bs, aio, qcow2_co_pwritev_task_entry, 0,
|
2020-09-11 17:09:42 +03:00
|
|
|
host_offset, offset,
|
2020-07-10 19:12:43 +03:00
|
|
|
cur_bytes, qiov, qiov_offset, l2meta);
|
2019-09-16 20:53:23 +03:00
|
|
|
l2meta = NULL; /* l2meta is consumed by qcow2_co_pwritev_task() */
|
2019-05-16 17:27:49 +03:00
|
|
|
if (ret < 0) {
|
2019-09-16 20:53:23 +03:00
|
|
|
goto fail_nometa;
|
2012-12-07 21:08:46 +04:00
|
|
|
}
|
2011-09-01 17:02:13 +04:00
|
|
|
|
2016-06-01 17:55:05 +03:00
|
|
|
bytes -= cur_bytes;
|
|
|
|
offset += cur_bytes;
|
2019-09-16 20:53:23 +03:00
|
|
|
qiov_offset += cur_bytes;
|
2016-06-01 17:55:05 +03:00
|
|
|
trace_qcow2_writev_done_part(qemu_coroutine_self(), cur_bytes);
|
2011-08-23 17:21:18 +04:00
|
|
|
}
|
2011-08-23 17:21:19 +04:00
|
|
|
ret = 0;
|
2011-08-23 17:21:14 +04:00
|
|
|
|
2019-05-06 17:27:40 +03:00
|
|
|
qemu_co_mutex_lock(&s->lock);
|
|
|
|
|
|
|
|
out_locked:
|
2018-06-01 12:26:42 +03:00
|
|
|
qcow2_handle_l2meta(bs, &l2meta, false);
|
2011-09-01 17:02:13 +04:00
|
|
|
|
2017-06-29 16:27:39 +03:00
|
|
|
qemu_co_mutex_unlock(&s->lock);
|
|
|
|
|
2019-09-16 20:53:23 +03:00
|
|
|
fail_nometa:
|
2019-09-16 20:53:24 +03:00
|
|
|
if (aio) {
|
|
|
|
aio_task_pool_wait_all(aio);
|
|
|
|
if (ret == 0) {
|
|
|
|
ret = aio_task_pool_status(aio);
|
|
|
|
}
|
|
|
|
g_free(aio);
|
|
|
|
}
|
|
|
|
|
2012-03-01 21:36:21 +04:00
|
|
|
trace_qcow2_writev_done_req(qemu_coroutine_self(), ret);
|
2011-06-07 17:04:32 +04:00
|
|
|
|
2011-06-30 19:42:09 +04:00
|
|
|
return ret;
|
2006-08-06 01:14:20 +04:00
|
|
|
}
|
|
|
|
|
2015-12-22 18:04:57 +03:00
|
|
|
static int qcow2_inactivate(BlockDriverState *bs)
|
|
|
|
{
|
|
|
|
BDRVQcow2State *s = bs->opaque;
|
|
|
|
int ret, result = 0;
|
2017-06-28 15:05:19 +03:00
|
|
|
Error *local_err = NULL;
|
2015-12-22 18:04:57 +03:00
|
|
|
|
2019-09-27 15:23:52 +03:00
|
|
|
qcow2_store_persistent_dirty_bitmaps(bs, true, &local_err);
|
2017-09-04 13:18:00 +03:00
|
|
|
if (local_err != NULL) {
|
|
|
|
result = -EINVAL;
|
2018-10-29 23:23:15 +03:00
|
|
|
error_reportf_err(local_err, "Lost persistent bitmaps during "
|
|
|
|
"inactivation of node '%s': ",
|
|
|
|
bdrv_get_device_or_node_name(bs));
|
2017-09-04 13:18:00 +03:00
|
|
|
}
|
|
|
|
|
2015-12-22 18:04:57 +03:00
|
|
|
ret = qcow2_cache_flush(bs, s->l2_table_cache);
|
|
|
|
if (ret) {
|
|
|
|
result = ret;
|
|
|
|
error_report("Failed to flush the L2 table cache: %s",
|
|
|
|
strerror(-ret));
|
|
|
|
}
|
|
|
|
|
|
|
|
ret = qcow2_cache_flush(bs, s->refcount_block_cache);
|
|
|
|
if (ret) {
|
|
|
|
result = ret;
|
|
|
|
error_report("Failed to flush the refcount block cache: %s",
|
|
|
|
strerror(-ret));
|
|
|
|
}
|
|
|
|
|
|
|
|
if (result == 0) {
|
|
|
|
qcow2_mark_clean(bs);
|
|
|
|
}
|
|
|
|
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
2010-12-17 18:02:39 +03:00
|
|
|
static void qcow2_close(BlockDriverState *bs)
|
2006-08-06 01:14:20 +04:00
|
|
|
{
|
2015-09-07 18:12:56 +03:00
|
|
|
BDRVQcow2State *s = bs->opaque;
|
2014-05-20 19:12:47 +04:00
|
|
|
qemu_vfree(s->l1_table);
|
2013-08-30 16:34:26 +04:00
|
|
|
/* else pre-write overlap checks in cache_destroy may crash */
|
|
|
|
s->l1_table = NULL;
|
2011-01-10 19:17:28 +03:00
|
|
|
|
2015-12-22 18:10:32 +03:00
|
|
|
if (!(s->flags & BDRV_O_INACTIVE)) {
|
2015-12-22 18:04:57 +03:00
|
|
|
qcow2_inactivate(bs);
|
2014-03-11 18:15:03 +04:00
|
|
|
}
|
2012-07-27 12:05:19 +04:00
|
|
|
|
2015-08-04 15:14:40 +03:00
|
|
|
cache_clean_timer_del(bs);
|
2018-02-05 17:33:08 +03:00
|
|
|
qcow2_cache_destroy(s->l2_table_cache);
|
|
|
|
qcow2_cache_destroy(s->refcount_block_cache);
|
2011-01-10 19:17:28 +03:00
|
|
|
|
2017-06-23 19:24:10 +03:00
|
|
|
qcrypto_block_free(s->crypto);
|
|
|
|
s->crypto = NULL;
|
2020-02-27 04:29:49 +03:00
|
|
|
qapi_free_QCryptoBlockOpenOptions(s->crypto_opts);
|
2015-07-01 20:10:37 +03:00
|
|
|
|
2011-12-15 15:20:58 +04:00
|
|
|
g_free(s->unknown_header_fields);
|
2012-02-02 17:52:08 +04:00
|
|
|
cleanup_unknown_header_ext(bs);
|
2011-12-15 15:20:58 +04:00
|
|
|
|
2019-01-15 21:02:40 +03:00
|
|
|
g_free(s->image_data_file);
|
2015-04-07 16:03:16 +03:00
|
|
|
g_free(s->image_backing_file);
|
|
|
|
g_free(s->image_backing_format);
|
|
|
|
|
2019-01-29 19:13:57 +03:00
|
|
|
if (has_data_file(bs)) {
|
|
|
|
bdrv_unref_child(bs, s->data_file);
|
2020-03-16 09:06:31 +03:00
|
|
|
s->data_file = NULL;
|
2019-01-29 19:13:57 +03:00
|
|
|
}
|
|
|
|
|
2009-05-28 18:07:07 +04:00
|
|
|
qcow2_refcount_close(bs);
|
2011-12-07 13:25:48 +04:00
|
|
|
qcow2_free_snapshots(bs);
|
2006-08-06 01:14:20 +04:00
|
|
|
}
|
|
|
|
|
2018-03-01 19:36:18 +03:00
|
|
|
static void coroutine_fn qcow2_co_invalidate_cache(BlockDriverState *bs,
|
|
|
|
Error **errp)
|
2011-11-15 01:09:46 +04:00
|
|
|
{
|
2021-02-02 15:49:54 +03:00
|
|
|
ERRP_GUARD();
|
2015-09-07 18:12:56 +03:00
|
|
|
BDRVQcow2State *s = bs->opaque;
|
2011-11-15 01:09:46 +04:00
|
|
|
int flags = s->flags;
|
2017-06-23 19:24:10 +03:00
|
|
|
QCryptoBlock *crypto = NULL;
|
2013-03-18 16:08:10 +04:00
|
|
|
QDict *options;
|
2014-03-12 18:59:16 +04:00
|
|
|
int ret;
|
2011-11-15 01:09:46 +04:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Backing files are read-only which makes all of their metadata immutable,
|
|
|
|
* that means we don't have to worry about reopening them here.
|
|
|
|
*/
|
|
|
|
|
2017-06-23 19:24:10 +03:00
|
|
|
crypto = s->crypto;
|
|
|
|
s->crypto = NULL;
|
2011-11-15 01:09:46 +04:00
|
|
|
|
|
|
|
qcow2_close(bs);
|
|
|
|
|
2015-09-07 18:12:56 +03:00
|
|
|
memset(s, 0, sizeof(BDRVQcow2State));
|
2014-03-11 20:42:41 +04:00
|
|
|
options = qdict_clone_shallow(bs->options);
|
2014-03-12 18:59:16 +04:00
|
|
|
|
2015-12-22 18:10:32 +03:00
|
|
|
flags &= ~BDRV_O_INACTIVE;
|
2018-03-01 19:36:18 +03:00
|
|
|
qemu_co_mutex_lock(&s->lock);
|
2021-02-02 15:49:54 +03:00
|
|
|
ret = qcow2_do_open(bs, options, flags, errp);
|
2018-03-01 19:36:18 +03:00
|
|
|
qemu_co_mutex_unlock(&s->lock);
|
2018-04-19 18:01:43 +03:00
|
|
|
qobject_unref(options);
|
2021-02-02 15:49:54 +03:00
|
|
|
if (ret < 0) {
|
|
|
|
error_prepend(errp, "Could not reopen qcow2 layer: ");
|
2015-12-22 18:14:10 +03:00
|
|
|
bs->drv = NULL;
|
2014-03-12 18:59:16 +04:00
|
|
|
return;
|
|
|
|
}
|
2013-03-18 16:08:10 +04:00
|
|
|
|
2017-06-23 19:24:10 +03:00
|
|
|
s->crypto = crypto;
|
2011-11-15 01:09:46 +04:00
|
|
|
}
|
|
|
|
|
2012-02-02 15:32:31 +04:00
|
|
|
static size_t header_ext_add(char *buf, uint32_t magic, const void *s,
|
|
|
|
size_t len, size_t buflen)
|
|
|
|
{
|
|
|
|
QCowExtension *ext_backing_fmt = (QCowExtension*) buf;
|
|
|
|
size_t ext_len = sizeof(QCowExtension) + ((len + 7) & ~7);
|
|
|
|
|
|
|
|
if (buflen < ext_len) {
|
|
|
|
return -ENOSPC;
|
|
|
|
}
|
|
|
|
|
|
|
|
*ext_backing_fmt = (QCowExtension) {
|
|
|
|
.magic = cpu_to_be32(magic),
|
|
|
|
.len = cpu_to_be32(len),
|
|
|
|
};
|
2016-09-13 11:56:27 +03:00
|
|
|
|
|
|
|
if (len) {
|
|
|
|
memcpy(buf + sizeof(QCowExtension), s, len);
|
|
|
|
}
|
2012-02-02 15:32:31 +04:00
|
|
|
|
|
|
|
return ext_len;
|
|
|
|
}
|
|
|
|
|
2010-01-12 14:55:17 +03:00
|
|
|
/*
|
2012-02-02 15:32:31 +04:00
|
|
|
* Updates the qcow2 header, including the variable length parts of it, i.e.
|
|
|
|
* the backing file name and all extensions. qcow2 was not designed to allow
|
|
|
|
* such changes, so if we run out of space (we can only use the first cluster)
|
|
|
|
* this function may fail.
|
2010-01-12 14:55:17 +03:00
|
|
|
*
|
|
|
|
* Returns 0 on success, -errno in error cases.
|
|
|
|
*/
|
2012-02-02 15:32:31 +04:00
|
|
|
int qcow2_update_header(BlockDriverState *bs)
|
2010-01-12 14:55:17 +03:00
|
|
|
{
|
2015-09-07 18:12:56 +03:00
|
|
|
BDRVQcow2State *s = bs->opaque;
|
2012-02-02 15:32:31 +04:00
|
|
|
QCowHeader *header;
|
|
|
|
char *buf;
|
|
|
|
size_t buflen = s->cluster_size;
|
2010-01-12 14:55:17 +03:00
|
|
|
int ret;
|
2012-02-02 15:32:31 +04:00
|
|
|
uint64_t total_size;
|
|
|
|
uint32_t refcount_table_clusters;
|
2011-12-15 15:20:58 +04:00
|
|
|
size_t header_length;
|
2012-02-02 17:52:08 +04:00
|
|
|
Qcow2UnknownHeaderExtension *uext;
|
2010-01-12 14:55:17 +03:00
|
|
|
|
2012-02-02 15:32:31 +04:00
|
|
|
buf = qemu_blockalign(bs, buflen);
|
2010-01-12 14:55:17 +03:00
|
|
|
|
2012-02-02 15:32:31 +04:00
|
|
|
/* Header structure */
|
|
|
|
header = (QCowHeader*) buf;
|
2010-01-12 14:55:17 +03:00
|
|
|
|
2012-02-02 15:32:31 +04:00
|
|
|
if (buflen < sizeof(*header)) {
|
|
|
|
ret = -ENOSPC;
|
|
|
|
goto fail;
|
2010-01-12 14:55:17 +03:00
|
|
|
}
|
|
|
|
|
2011-12-15 15:20:58 +04:00
|
|
|
header_length = sizeof(*header) + s->unknown_header_fields_size;
|
2012-02-02 15:32:31 +04:00
|
|
|
total_size = bs->total_sectors * BDRV_SECTOR_SIZE;
|
|
|
|
refcount_table_clusters = s->refcount_table_size >> (s->cluster_bits - 3);
|
|
|
|
|
qcow2: introduce compression type feature
The patch adds some preparation parts for incompatible compression type
feature to qcow2 allowing the use different compression methods for
image clusters (de)compressing.
It is implied that the compression type is set on the image creation and
can be changed only later by image conversion, thus compression type
defines the only compression algorithm used for the image, and thus,
for all image clusters.
The goal of the feature is to add support of other compression methods
to qcow2. For example, ZSTD which is more effective on compression than ZLIB.
The default compression is ZLIB. Images created with ZLIB compression type
are backward compatible with older qemu versions.
Adding of the compression type breaks a number of tests because now the
compression type is reported on image creation and there are some changes
in the qcow2 header in size and offsets.
The tests are fixed in the following ways:
* filter out compression_type for many tests
* fix header size, feature table size and backing file offset
affected tests: 031, 036, 061, 080
header_size +=8: 1 byte compression type
7 bytes padding
feature_table += 48: incompatible feature compression type
backing_file_offset += 56 (8 + 48 -> header_change + feature_table_change)
* add "compression type" for test output matching when it isn't filtered
affected tests: 049, 060, 061, 065, 082, 085, 144, 182, 185, 198, 206,
242, 255, 274, 280
Signed-off-by: Denis Plotnikov <dplotnikov@virtuozzo.com>
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
QAPI part:
Acked-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <20200507082521.29210-2-dplotnikov@virtuozzo.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2020-05-07 11:25:18 +03:00
|
|
|
ret = validate_compression_type(s, NULL);
|
|
|
|
if (ret) {
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
|
2012-02-02 15:32:31 +04:00
|
|
|
*header = (QCowHeader) {
|
2011-12-15 15:20:58 +04:00
|
|
|
/* Version 2 fields */
|
2012-02-02 15:32:31 +04:00
|
|
|
.magic = cpu_to_be32(QCOW_MAGIC),
|
2011-12-15 15:20:58 +04:00
|
|
|
.version = cpu_to_be32(s->qcow_version),
|
2012-02-02 15:32:31 +04:00
|
|
|
.backing_file_offset = 0,
|
|
|
|
.backing_file_size = 0,
|
|
|
|
.cluster_bits = cpu_to_be32(s->cluster_bits),
|
|
|
|
.size = cpu_to_be64(total_size),
|
|
|
|
.crypt_method = cpu_to_be32(s->crypt_method_header),
|
|
|
|
.l1_size = cpu_to_be32(s->l1_size),
|
|
|
|
.l1_table_offset = cpu_to_be64(s->l1_table_offset),
|
|
|
|
.refcount_table_offset = cpu_to_be64(s->refcount_table_offset),
|
|
|
|
.refcount_table_clusters = cpu_to_be32(refcount_table_clusters),
|
|
|
|
.nb_snapshots = cpu_to_be32(s->nb_snapshots),
|
|
|
|
.snapshots_offset = cpu_to_be64(s->snapshots_offset),
|
2011-12-15 15:20:58 +04:00
|
|
|
|
|
|
|
/* Version 3 fields */
|
|
|
|
.incompatible_features = cpu_to_be64(s->incompatible_features),
|
|
|
|
.compatible_features = cpu_to_be64(s->compatible_features),
|
|
|
|
.autoclear_features = cpu_to_be64(s->autoclear_features),
|
2013-09-03 12:09:53 +04:00
|
|
|
.refcount_order = cpu_to_be32(s->refcount_order),
|
2011-12-15 15:20:58 +04:00
|
|
|
.header_length = cpu_to_be32(header_length),
|
qcow2: introduce compression type feature
The patch adds some preparation parts for incompatible compression type
feature to qcow2 allowing the use different compression methods for
image clusters (de)compressing.
It is implied that the compression type is set on the image creation and
can be changed only later by image conversion, thus compression type
defines the only compression algorithm used for the image, and thus,
for all image clusters.
The goal of the feature is to add support of other compression methods
to qcow2. For example, ZSTD which is more effective on compression than ZLIB.
The default compression is ZLIB. Images created with ZLIB compression type
are backward compatible with older qemu versions.
Adding of the compression type breaks a number of tests because now the
compression type is reported on image creation and there are some changes
in the qcow2 header in size and offsets.
The tests are fixed in the following ways:
* filter out compression_type for many tests
* fix header size, feature table size and backing file offset
affected tests: 031, 036, 061, 080
header_size +=8: 1 byte compression type
7 bytes padding
feature_table += 48: incompatible feature compression type
backing_file_offset += 56 (8 + 48 -> header_change + feature_table_change)
* add "compression type" for test output matching when it isn't filtered
affected tests: 049, 060, 061, 065, 082, 085, 144, 182, 185, 198, 206,
242, 255, 274, 280
Signed-off-by: Denis Plotnikov <dplotnikov@virtuozzo.com>
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
QAPI part:
Acked-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <20200507082521.29210-2-dplotnikov@virtuozzo.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2020-05-07 11:25:18 +03:00
|
|
|
.compression_type = s->compression_type,
|
2012-02-02 15:32:31 +04:00
|
|
|
};
|
2010-01-12 14:55:17 +03:00
|
|
|
|
2011-12-15 15:20:58 +04:00
|
|
|
/* For older versions, write a shorter header */
|
|
|
|
switch (s->qcow_version) {
|
|
|
|
case 2:
|
|
|
|
ret = offsetof(QCowHeader, incompatible_features);
|
|
|
|
break;
|
|
|
|
case 3:
|
|
|
|
ret = sizeof(*header);
|
|
|
|
break;
|
|
|
|
default:
|
2012-05-21 15:06:54 +04:00
|
|
|
ret = -EINVAL;
|
|
|
|
goto fail;
|
2011-12-15 15:20:58 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
buf += ret;
|
|
|
|
buflen -= ret;
|
|
|
|
memset(buf, 0, buflen);
|
|
|
|
|
|
|
|
/* Preserve any unknown field in the header */
|
|
|
|
if (s->unknown_header_fields_size) {
|
|
|
|
if (buflen < s->unknown_header_fields_size) {
|
|
|
|
ret = -ENOSPC;
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
|
|
|
|
memcpy(buf, s->unknown_header_fields, s->unknown_header_fields_size);
|
|
|
|
buf += s->unknown_header_fields_size;
|
|
|
|
buflen -= s->unknown_header_fields_size;
|
|
|
|
}
|
2010-01-12 14:55:17 +03:00
|
|
|
|
2012-02-02 15:32:31 +04:00
|
|
|
/* Backing file format header extension */
|
2015-04-07 16:03:16 +03:00
|
|
|
if (s->image_backing_format) {
|
2012-02-02 15:32:31 +04:00
|
|
|
ret = header_ext_add(buf, QCOW2_EXT_MAGIC_BACKING_FORMAT,
|
2015-04-07 16:03:16 +03:00
|
|
|
s->image_backing_format,
|
|
|
|
strlen(s->image_backing_format),
|
2012-02-02 15:32:31 +04:00
|
|
|
buflen);
|
|
|
|
if (ret < 0) {
|
|
|
|
goto fail;
|
2010-01-12 14:55:17 +03:00
|
|
|
}
|
|
|
|
|
2012-02-02 15:32:31 +04:00
|
|
|
buf += ret;
|
|
|
|
buflen -= ret;
|
2010-01-12 14:55:17 +03:00
|
|
|
}
|
|
|
|
|
2019-01-15 21:02:40 +03:00
|
|
|
/* External data file header extension */
|
|
|
|
if (has_data_file(bs) && s->image_data_file) {
|
|
|
|
ret = header_ext_add(buf, QCOW2_EXT_MAGIC_DATA_FILE,
|
|
|
|
s->image_data_file, strlen(s->image_data_file),
|
|
|
|
buflen);
|
|
|
|
if (ret < 0) {
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
|
|
|
|
buf += ret;
|
|
|
|
buflen -= ret;
|
|
|
|
}
|
|
|
|
|
qcow2: add support for LUKS encryption format
This adds support for using LUKS as an encryption format
with the qcow2 file, using the new encrypt.format parameter
to request "luks" format. e.g.
# qemu-img create --object secret,data=123456,id=sec0 \
-f qcow2 -o encrypt.format=luks,encrypt.key-secret=sec0 \
test.qcow2 10G
The legacy "encryption=on" parameter still results in
creation of the old qcow2 AES format (and is equivalent
to the new 'encryption-format=aes'). e.g. the following are
equivalent:
# qemu-img create --object secret,data=123456,id=sec0 \
-f qcow2 -o encryption=on,encrypt.key-secret=sec0 \
test.qcow2 10G
# qemu-img create --object secret,data=123456,id=sec0 \
-f qcow2 -o encryption-format=aes,encrypt.key-secret=sec0 \
test.qcow2 10G
With the LUKS format it is necessary to store the LUKS
partition header and key material in the QCow2 file. This
data can be many MB in size, so cannot go into the QCow2
header region directly. Thus the spec defines a FDE
(Full Disk Encryption) header extension that specifies
the offset of a set of clusters to hold the FDE headers,
as well as the length of that region. The LUKS header is
thus stored in these extra allocated clusters before the
main image payload.
Aside from all the cryptographic differences implied by
use of the LUKS format, there is one further key difference
between the use of legacy AES and LUKS encryption in qcow2.
For LUKS, the initialiazation vectors are generated using
the host physical sector as the input, rather than the
guest virtual sector. This guarantees unique initialization
vectors for all sectors when qcow2 internal snapshots are
used, thus giving stronger protection against watermarking
attacks.
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170623162419.26068-14-berrange@redhat.com
Reviewed-by: Alberto Garcia <berto@igalia.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-06-23 19:24:12 +03:00
|
|
|
/* Full disk encryption header pointer extension */
|
|
|
|
if (s->crypto_header.offset != 0) {
|
2018-10-09 20:24:59 +03:00
|
|
|
s->crypto_header.offset = cpu_to_be64(s->crypto_header.offset);
|
|
|
|
s->crypto_header.length = cpu_to_be64(s->crypto_header.length);
|
qcow2: add support for LUKS encryption format
This adds support for using LUKS as an encryption format
with the qcow2 file, using the new encrypt.format parameter
to request "luks" format. e.g.
# qemu-img create --object secret,data=123456,id=sec0 \
-f qcow2 -o encrypt.format=luks,encrypt.key-secret=sec0 \
test.qcow2 10G
The legacy "encryption=on" parameter still results in
creation of the old qcow2 AES format (and is equivalent
to the new 'encryption-format=aes'). e.g. the following are
equivalent:
# qemu-img create --object secret,data=123456,id=sec0 \
-f qcow2 -o encryption=on,encrypt.key-secret=sec0 \
test.qcow2 10G
# qemu-img create --object secret,data=123456,id=sec0 \
-f qcow2 -o encryption-format=aes,encrypt.key-secret=sec0 \
test.qcow2 10G
With the LUKS format it is necessary to store the LUKS
partition header and key material in the QCow2 file. This
data can be many MB in size, so cannot go into the QCow2
header region directly. Thus the spec defines a FDE
(Full Disk Encryption) header extension that specifies
the offset of a set of clusters to hold the FDE headers,
as well as the length of that region. The LUKS header is
thus stored in these extra allocated clusters before the
main image payload.
Aside from all the cryptographic differences implied by
use of the LUKS format, there is one further key difference
between the use of legacy AES and LUKS encryption in qcow2.
For LUKS, the initialiazation vectors are generated using
the host physical sector as the input, rather than the
guest virtual sector. This guarantees unique initialization
vectors for all sectors when qcow2 internal snapshots are
used, thus giving stronger protection against watermarking
attacks.
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170623162419.26068-14-berrange@redhat.com
Reviewed-by: Alberto Garcia <berto@igalia.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-06-23 19:24:12 +03:00
|
|
|
ret = header_ext_add(buf, QCOW2_EXT_MAGIC_CRYPTO_HEADER,
|
|
|
|
&s->crypto_header, sizeof(s->crypto_header),
|
|
|
|
buflen);
|
2018-10-09 20:24:59 +03:00
|
|
|
s->crypto_header.offset = be64_to_cpu(s->crypto_header.offset);
|
|
|
|
s->crypto_header.length = be64_to_cpu(s->crypto_header.length);
|
qcow2: add support for LUKS encryption format
This adds support for using LUKS as an encryption format
with the qcow2 file, using the new encrypt.format parameter
to request "luks" format. e.g.
# qemu-img create --object secret,data=123456,id=sec0 \
-f qcow2 -o encrypt.format=luks,encrypt.key-secret=sec0 \
test.qcow2 10G
The legacy "encryption=on" parameter still results in
creation of the old qcow2 AES format (and is equivalent
to the new 'encryption-format=aes'). e.g. the following are
equivalent:
# qemu-img create --object secret,data=123456,id=sec0 \
-f qcow2 -o encryption=on,encrypt.key-secret=sec0 \
test.qcow2 10G
# qemu-img create --object secret,data=123456,id=sec0 \
-f qcow2 -o encryption-format=aes,encrypt.key-secret=sec0 \
test.qcow2 10G
With the LUKS format it is necessary to store the LUKS
partition header and key material in the QCow2 file. This
data can be many MB in size, so cannot go into the QCow2
header region directly. Thus the spec defines a FDE
(Full Disk Encryption) header extension that specifies
the offset of a set of clusters to hold the FDE headers,
as well as the length of that region. The LUKS header is
thus stored in these extra allocated clusters before the
main image payload.
Aside from all the cryptographic differences implied by
use of the LUKS format, there is one further key difference
between the use of legacy AES and LUKS encryption in qcow2.
For LUKS, the initialiazation vectors are generated using
the host physical sector as the input, rather than the
guest virtual sector. This guarantees unique initialization
vectors for all sectors when qcow2 internal snapshots are
used, thus giving stronger protection against watermarking
attacks.
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170623162419.26068-14-berrange@redhat.com
Reviewed-by: Alberto Garcia <berto@igalia.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-06-23 19:24:12 +03:00
|
|
|
if (ret < 0) {
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
buf += ret;
|
|
|
|
buflen -= ret;
|
|
|
|
}
|
|
|
|
|
2020-03-24 20:42:32 +03:00
|
|
|
/*
|
|
|
|
* Feature table. A mere 8 feature names occupies 392 bytes, and
|
|
|
|
* when coupled with the v3 minimum header of 104 bytes plus the
|
|
|
|
* 8-byte end-of-extension marker, that would leave only 8 bytes
|
|
|
|
* for a backing file name in an image with 512-byte clusters.
|
|
|
|
* Thus, we choose to omit this header for cluster sizes 4k and
|
|
|
|
* smaller.
|
|
|
|
*/
|
|
|
|
if (s->qcow_version >= 3 && s->cluster_size > 4096) {
|
2020-03-24 20:42:31 +03:00
|
|
|
static const Qcow2Feature features[] = {
|
2015-12-02 21:11:04 +03:00
|
|
|
{
|
|
|
|
.type = QCOW2_FEAT_TYPE_INCOMPATIBLE,
|
|
|
|
.bit = QCOW2_INCOMPAT_DIRTY_BITNR,
|
|
|
|
.name = "dirty bit",
|
|
|
|
},
|
|
|
|
{
|
|
|
|
.type = QCOW2_FEAT_TYPE_INCOMPATIBLE,
|
|
|
|
.bit = QCOW2_INCOMPAT_CORRUPT_BITNR,
|
|
|
|
.name = "corrupt bit",
|
|
|
|
},
|
2019-01-14 18:48:25 +03:00
|
|
|
{
|
|
|
|
.type = QCOW2_FEAT_TYPE_INCOMPATIBLE,
|
|
|
|
.bit = QCOW2_INCOMPAT_DATA_FILE_BITNR,
|
|
|
|
.name = "external data file",
|
|
|
|
},
|
qcow2: introduce compression type feature
The patch adds some preparation parts for incompatible compression type
feature to qcow2 allowing the use different compression methods for
image clusters (de)compressing.
It is implied that the compression type is set on the image creation and
can be changed only later by image conversion, thus compression type
defines the only compression algorithm used for the image, and thus,
for all image clusters.
The goal of the feature is to add support of other compression methods
to qcow2. For example, ZSTD which is more effective on compression than ZLIB.
The default compression is ZLIB. Images created with ZLIB compression type
are backward compatible with older qemu versions.
Adding of the compression type breaks a number of tests because now the
compression type is reported on image creation and there are some changes
in the qcow2 header in size and offsets.
The tests are fixed in the following ways:
* filter out compression_type for many tests
* fix header size, feature table size and backing file offset
affected tests: 031, 036, 061, 080
header_size +=8: 1 byte compression type
7 bytes padding
feature_table += 48: incompatible feature compression type
backing_file_offset += 56 (8 + 48 -> header_change + feature_table_change)
* add "compression type" for test output matching when it isn't filtered
affected tests: 049, 060, 061, 065, 082, 085, 144, 182, 185, 198, 206,
242, 255, 274, 280
Signed-off-by: Denis Plotnikov <dplotnikov@virtuozzo.com>
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
QAPI part:
Acked-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <20200507082521.29210-2-dplotnikov@virtuozzo.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2020-05-07 11:25:18 +03:00
|
|
|
{
|
|
|
|
.type = QCOW2_FEAT_TYPE_INCOMPATIBLE,
|
|
|
|
.bit = QCOW2_INCOMPAT_COMPRESSION_BITNR,
|
|
|
|
.name = "compression type",
|
|
|
|
},
|
2020-07-10 19:13:13 +03:00
|
|
|
{
|
|
|
|
.type = QCOW2_FEAT_TYPE_INCOMPATIBLE,
|
|
|
|
.bit = QCOW2_INCOMPAT_EXTL2_BITNR,
|
|
|
|
.name = "extended L2 entries",
|
|
|
|
},
|
2015-12-02 21:11:04 +03:00
|
|
|
{
|
|
|
|
.type = QCOW2_FEAT_TYPE_COMPATIBLE,
|
|
|
|
.bit = QCOW2_COMPAT_LAZY_REFCOUNTS_BITNR,
|
|
|
|
.name = "lazy refcounts",
|
|
|
|
},
|
2020-03-24 20:42:31 +03:00
|
|
|
{
|
|
|
|
.type = QCOW2_FEAT_TYPE_AUTOCLEAR,
|
|
|
|
.bit = QCOW2_AUTOCLEAR_BITMAPS_BITNR,
|
|
|
|
.name = "bitmaps",
|
|
|
|
},
|
|
|
|
{
|
|
|
|
.type = QCOW2_FEAT_TYPE_AUTOCLEAR,
|
|
|
|
.bit = QCOW2_AUTOCLEAR_DATA_FILE_RAW_BITNR,
|
|
|
|
.name = "raw external data",
|
|
|
|
},
|
2015-12-02 21:11:04 +03:00
|
|
|
};
|
|
|
|
|
|
|
|
ret = header_ext_add(buf, QCOW2_EXT_MAGIC_FEATURE_TABLE,
|
|
|
|
features, sizeof(features), buflen);
|
|
|
|
if (ret < 0) {
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
buf += ret;
|
|
|
|
buflen -= ret;
|
2012-04-12 17:20:27 +04:00
|
|
|
}
|
|
|
|
|
2017-06-28 15:05:08 +03:00
|
|
|
/* Bitmap extension */
|
|
|
|
if (s->nb_bitmaps > 0) {
|
|
|
|
Qcow2BitmapHeaderExt bitmaps_header = {
|
|
|
|
.nb_bitmaps = cpu_to_be32(s->nb_bitmaps),
|
|
|
|
.bitmap_directory_size =
|
|
|
|
cpu_to_be64(s->bitmap_directory_size),
|
|
|
|
.bitmap_directory_offset =
|
|
|
|
cpu_to_be64(s->bitmap_directory_offset)
|
|
|
|
};
|
|
|
|
ret = header_ext_add(buf, QCOW2_EXT_MAGIC_BITMAPS,
|
|
|
|
&bitmaps_header, sizeof(bitmaps_header),
|
|
|
|
buflen);
|
|
|
|
if (ret < 0) {
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
buf += ret;
|
|
|
|
buflen -= ret;
|
|
|
|
}
|
|
|
|
|
2012-02-02 17:52:08 +04:00
|
|
|
/* Keep unknown header extensions */
|
|
|
|
QLIST_FOREACH(uext, &s->unknown_header_ext, next) {
|
|
|
|
ret = header_ext_add(buf, uext->magic, uext->data, uext->len, buflen);
|
|
|
|
if (ret < 0) {
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
|
|
|
|
buf += ret;
|
|
|
|
buflen -= ret;
|
|
|
|
}
|
|
|
|
|
2012-02-02 15:32:31 +04:00
|
|
|
/* End of header extensions */
|
|
|
|
ret = header_ext_add(buf, QCOW2_EXT_MAGIC_END, NULL, 0, buflen);
|
2010-01-12 14:55:17 +03:00
|
|
|
if (ret < 0) {
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
|
2012-02-02 15:32:31 +04:00
|
|
|
buf += ret;
|
|
|
|
buflen -= ret;
|
2010-01-12 14:55:17 +03:00
|
|
|
|
2012-02-02 15:32:31 +04:00
|
|
|
/* Backing file name */
|
2015-04-07 16:03:16 +03:00
|
|
|
if (s->image_backing_file) {
|
|
|
|
size_t backing_file_len = strlen(s->image_backing_file);
|
2012-02-02 15:32:31 +04:00
|
|
|
|
|
|
|
if (buflen < backing_file_len) {
|
|
|
|
ret = -ENOSPC;
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
|
2012-10-04 15:10:01 +04:00
|
|
|
/* Using strncpy is ok here, since buf is not NUL-terminated. */
|
2015-04-07 16:03:16 +03:00
|
|
|
strncpy(buf, s->image_backing_file, buflen);
|
2012-02-02 15:32:31 +04:00
|
|
|
|
|
|
|
header->backing_file_offset = cpu_to_be64(buf - ((char*) header));
|
|
|
|
header->backing_file_size = cpu_to_be32(backing_file_len);
|
2010-01-12 14:55:17 +03:00
|
|
|
}
|
|
|
|
|
2012-02-02 15:32:31 +04:00
|
|
|
/* Write the new header */
|
2016-06-20 21:09:15 +03:00
|
|
|
ret = bdrv_pwrite(bs->file, 0, header, s->cluster_size);
|
2010-01-12 14:55:17 +03:00
|
|
|
if (ret < 0) {
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
|
|
|
|
ret = 0;
|
|
|
|
fail:
|
2012-02-02 15:32:31 +04:00
|
|
|
qemu_vfree(header);
|
2010-01-12 14:55:17 +03:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int qcow2_change_backing_file(BlockDriverState *bs,
|
|
|
|
const char *backing_file, const char *backing_fmt)
|
|
|
|
{
|
2015-09-07 18:12:56 +03:00
|
|
|
BDRVQcow2State *s = bs->opaque;
|
2015-04-07 16:03:16 +03:00
|
|
|
|
2019-02-22 16:29:38 +03:00
|
|
|
/* Adding a backing file means that the external data file alone won't be
|
|
|
|
* enough to make sense of the content */
|
|
|
|
if (backing_file && data_file_is_raw(bs)) {
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2016-04-06 19:32:48 +03:00
|
|
|
if (backing_file && strlen(backing_file) > 1023) {
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
block: Add BDS.auto_backing_file
If the backing file is overridden, this most probably does change the
guest-visible data of a BDS. Therefore, we will need to consider this
in bdrv_refresh_filename().
To see whether it has been overridden, we might want to compare
bs->backing_file and bs->backing->bs->filename. However,
bs->backing_file is changed by bdrv_set_backing_hd() (which is just used
to change the backing child at runtime, without modifying the image
header), so bs->backing_file most of the time simply contains a copy of
bs->backing->bs->filename anyway, so it is useless for such a
comparison.
This patch adds an auto_backing_file BDS field which contains the
backing file path as indicated by the image header, which is not changed
by bdrv_set_backing_hd().
Because of bdrv_refresh_filename() magic, however, a BDS's filename may
differ from what has been specified during bdrv_open(). Then, the
comparison between bs->auto_backing_file and bs->backing->bs->filename
may fail even though bs->backing was opened from bs->auto_backing_file.
To mitigate this, we can copy the real BDS's filename (after the whole
bdrv_open() and bdrv_refresh_filename() process) into
bs->auto_backing_file, if we know the former has been opened based on
the latter. This is only possible if no options modifying the backing
file's behavior have been specified, though. To simplify things, this
patch only copies the filename from the backing file if no options have
been specified for it at all.
Furthermore, there are cases where an overlay is created by qemu which
already contains a BDS's filename (e.g. in blockdev-snapshot-sync). We
do not need to worry about updating the overlay's bs->auto_backing_file
there, because we actually wrote a post-bdrv_refresh_filename() filename
into the image header.
So all in all, there will be false negatives where (as of a future
patch) bdrv_refresh_filename() will assume that the backing file differs
from what was specified in the image header, even though it really does
not. However, these cases should be limited to where (1) the user
actually did override something in the backing chain (e.g. by specifying
options for the backing file), or (2) the user executed a QMP command to
change some node's backing file (e.g. change-backing-file or
block-commit with @backing-file given) where the given filename does not
happen to coincide with qemu's idea of the backing BDS's filename.
Then again, (1) really is limited to -drive. With -blockdev or
blockdev-add, you have to adhere to the schema, so a user cannot give
partial "unimportant" options (e.g. by just setting backing.node-name
and leaving the rest to the image header). Therefore, trying to fix
this would mean trying to fix something for -drive only.
To improve on (2), we would need a full infrastructure to "canonicalize"
an arbitrary filename (+ options), so it can be compared against
another. That seems a bit over the top, considering that filenames
nowadays are there mostly for the user's entertainment.
Signed-off-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Alberto Garcia <berto@igalia.com>
Message-id: 20190201192935.18394-5-mreitz@redhat.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
2019-02-01 22:29:08 +03:00
|
|
|
pstrcpy(bs->auto_backing_file, sizeof(bs->auto_backing_file),
|
|
|
|
backing_file ?: "");
|
2012-02-02 15:32:31 +04:00
|
|
|
pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: "");
|
|
|
|
pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: "");
|
|
|
|
|
2015-04-07 16:03:16 +03:00
|
|
|
g_free(s->image_backing_file);
|
|
|
|
g_free(s->image_backing_format);
|
|
|
|
|
|
|
|
s->image_backing_file = backing_file ? g_strdup(bs->backing_file) : NULL;
|
|
|
|
s->image_backing_format = backing_fmt ? g_strdup(bs->backing_format) : NULL;
|
|
|
|
|
2012-02-02 15:32:31 +04:00
|
|
|
return qcow2_update_header(bs);
|
2010-01-12 14:55:17 +03:00
|
|
|
}
|
|
|
|
|
2018-01-10 19:55:16 +03:00
|
|
|
static int qcow2_set_up_encryption(BlockDriverState *bs,
|
|
|
|
QCryptoBlockCreateOptions *cryptoopts,
|
|
|
|
Error **errp)
|
|
|
|
{
|
|
|
|
BDRVQcow2State *s = bs->opaque;
|
|
|
|
QCryptoBlock *crypto = NULL;
|
|
|
|
int fmt, ret;
|
|
|
|
|
|
|
|
switch (cryptoopts->format) {
|
|
|
|
case Q_CRYPTO_BLOCK_FORMAT_LUKS:
|
|
|
|
fmt = QCOW_CRYPT_LUKS;
|
|
|
|
break;
|
|
|
|
case Q_CRYPTO_BLOCK_FORMAT_QCOW:
|
|
|
|
fmt = QCOW_CRYPT_AES;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
error_setg(errp, "Crypto format not supported in qcow2");
|
|
|
|
return -EINVAL;
|
2017-06-23 19:24:10 +03:00
|
|
|
}
|
2018-01-10 19:55:16 +03:00
|
|
|
|
qcow2: add support for LUKS encryption format
This adds support for using LUKS as an encryption format
with the qcow2 file, using the new encrypt.format parameter
to request "luks" format. e.g.
# qemu-img create --object secret,data=123456,id=sec0 \
-f qcow2 -o encrypt.format=luks,encrypt.key-secret=sec0 \
test.qcow2 10G
The legacy "encryption=on" parameter still results in
creation of the old qcow2 AES format (and is equivalent
to the new 'encryption-format=aes'). e.g. the following are
equivalent:
# qemu-img create --object secret,data=123456,id=sec0 \
-f qcow2 -o encryption=on,encrypt.key-secret=sec0 \
test.qcow2 10G
# qemu-img create --object secret,data=123456,id=sec0 \
-f qcow2 -o encryption-format=aes,encrypt.key-secret=sec0 \
test.qcow2 10G
With the LUKS format it is necessary to store the LUKS
partition header and key material in the QCow2 file. This
data can be many MB in size, so cannot go into the QCow2
header region directly. Thus the spec defines a FDE
(Full Disk Encryption) header extension that specifies
the offset of a set of clusters to hold the FDE headers,
as well as the length of that region. The LUKS header is
thus stored in these extra allocated clusters before the
main image payload.
Aside from all the cryptographic differences implied by
use of the LUKS format, there is one further key difference
between the use of legacy AES and LUKS encryption in qcow2.
For LUKS, the initialiazation vectors are generated using
the host physical sector as the input, rather than the
guest virtual sector. This guarantees unique initialization
vectors for all sectors when qcow2 internal snapshots are
used, thus giving stronger protection against watermarking
attacks.
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170623162419.26068-14-berrange@redhat.com
Reviewed-by: Alberto Garcia <berto@igalia.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-06-23 19:24:12 +03:00
|
|
|
s->crypt_method_header = fmt;
|
2017-06-23 19:24:10 +03:00
|
|
|
|
2017-06-23 19:24:17 +03:00
|
|
|
crypto = qcrypto_block_create(cryptoopts, "encrypt.",
|
qcow2: add support for LUKS encryption format
This adds support for using LUKS as an encryption format
with the qcow2 file, using the new encrypt.format parameter
to request "luks" format. e.g.
# qemu-img create --object secret,data=123456,id=sec0 \
-f qcow2 -o encrypt.format=luks,encrypt.key-secret=sec0 \
test.qcow2 10G
The legacy "encryption=on" parameter still results in
creation of the old qcow2 AES format (and is equivalent
to the new 'encryption-format=aes'). e.g. the following are
equivalent:
# qemu-img create --object secret,data=123456,id=sec0 \
-f qcow2 -o encryption=on,encrypt.key-secret=sec0 \
test.qcow2 10G
# qemu-img create --object secret,data=123456,id=sec0 \
-f qcow2 -o encryption-format=aes,encrypt.key-secret=sec0 \
test.qcow2 10G
With the LUKS format it is necessary to store the LUKS
partition header and key material in the QCow2 file. This
data can be many MB in size, so cannot go into the QCow2
header region directly. Thus the spec defines a FDE
(Full Disk Encryption) header extension that specifies
the offset of a set of clusters to hold the FDE headers,
as well as the length of that region. The LUKS header is
thus stored in these extra allocated clusters before the
main image payload.
Aside from all the cryptographic differences implied by
use of the LUKS format, there is one further key difference
between the use of legacy AES and LUKS encryption in qcow2.
For LUKS, the initialiazation vectors are generated using
the host physical sector as the input, rather than the
guest virtual sector. This guarantees unique initialization
vectors for all sectors when qcow2 internal snapshots are
used, thus giving stronger protection against watermarking
attacks.
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170623162419.26068-14-berrange@redhat.com
Reviewed-by: Alberto Garcia <berto@igalia.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-06-23 19:24:12 +03:00
|
|
|
qcow2_crypto_hdr_init_func,
|
|
|
|
qcow2_crypto_hdr_write_func,
|
2017-06-23 19:24:10 +03:00
|
|
|
bs, errp);
|
|
|
|
if (!crypto) {
|
2018-01-10 19:55:16 +03:00
|
|
|
return -EINVAL;
|
2017-06-23 19:24:10 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
ret = qcow2_update_header(bs);
|
|
|
|
if (ret < 0) {
|
|
|
|
error_setg_errno(errp, -ret, "Could not write encryption header");
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
2018-01-10 19:55:16 +03:00
|
|
|
ret = 0;
|
2017-06-23 19:24:10 +03:00
|
|
|
out:
|
|
|
|
qcrypto_block_free(crypto);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2017-06-13 23:21:00 +03:00
|
|
|
/**
|
|
|
|
* Preallocates metadata structures for data clusters between @offset (in the
|
|
|
|
* guest disk) and @new_length (which is thus generally the new guest disk
|
|
|
|
* size).
|
|
|
|
*
|
|
|
|
* Returns: 0 on success, -errno on failure.
|
|
|
|
*/
|
2018-06-26 16:52:13 +03:00
|
|
|
static int coroutine_fn preallocate_co(BlockDriverState *bs, uint64_t offset,
|
2019-04-15 17:34:30 +03:00
|
|
|
uint64_t new_length, PreallocMode mode,
|
|
|
|
Error **errp)
|
2009-08-17 17:50:10 +04:00
|
|
|
{
|
2019-04-15 18:54:50 +03:00
|
|
|
BDRVQcow2State *s = bs->opaque;
|
2016-06-01 17:55:05 +03:00
|
|
|
uint64_t bytes;
|
2012-12-07 21:08:45 +04:00
|
|
|
uint64_t host_offset = 0;
|
2019-04-15 17:34:30 +03:00
|
|
|
int64_t file_length;
|
2016-06-01 17:55:05 +03:00
|
|
|
unsigned int cur_bytes;
|
2010-01-20 17:03:01 +03:00
|
|
|
int ret;
|
2020-09-08 17:08:27 +03:00
|
|
|
QCowL2Meta *meta = NULL, *m;
|
2009-08-17 17:50:10 +04:00
|
|
|
|
2017-06-13 23:21:00 +03:00
|
|
|
assert(offset <= new_length);
|
|
|
|
bytes = new_length - offset;
|
2009-08-17 17:50:10 +04:00
|
|
|
|
2016-06-01 17:55:05 +03:00
|
|
|
while (bytes) {
|
2019-04-15 17:25:01 +03:00
|
|
|
cur_bytes = MIN(bytes, QEMU_ALIGN_DOWN(INT_MAX, s->cluster_size));
|
2020-09-11 17:09:42 +03:00
|
|
|
ret = qcow2_alloc_host_offset(bs, offset, &cur_bytes,
|
|
|
|
&host_offset, &meta);
|
2010-01-20 17:03:01 +03:00
|
|
|
if (ret < 0) {
|
2019-04-15 17:56:07 +03:00
|
|
|
error_setg_errno(errp, -ret, "Allocating clusters failed");
|
2020-09-08 17:08:27 +03:00
|
|
|
goto out;
|
2009-08-17 17:50:10 +04:00
|
|
|
}
|
|
|
|
|
2020-09-08 17:08:27 +03:00
|
|
|
for (m = meta; m != NULL; m = m->next) {
|
|
|
|
m->prealloc = true;
|
|
|
|
}
|
2014-04-01 13:12:57 +04:00
|
|
|
|
2020-09-08 17:08:27 +03:00
|
|
|
ret = qcow2_handle_l2meta(bs, &meta, true);
|
|
|
|
if (ret < 0) {
|
|
|
|
error_setg_errno(errp, -ret, "Mapping clusters failed");
|
|
|
|
goto out;
|
2012-12-07 21:08:46 +04:00
|
|
|
}
|
2009-08-31 18:48:49 +04:00
|
|
|
|
2009-08-17 17:50:10 +04:00
|
|
|
/* TODO Preallocate data if requested */
|
|
|
|
|
2016-06-01 17:55:05 +03:00
|
|
|
bytes -= cur_bytes;
|
|
|
|
offset += cur_bytes;
|
2009-08-17 17:50:10 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* It is expected that the image file is large enough to actually contain
|
|
|
|
* all of the allocated clusters (otherwise we get failing reads after
|
|
|
|
* EOF). Extend the image to the last allocated sector.
|
|
|
|
*/
|
2019-04-15 17:34:30 +03:00
|
|
|
file_length = bdrv_getlength(s->data_file->bs);
|
|
|
|
if (file_length < 0) {
|
|
|
|
error_setg_errno(errp, -file_length, "Could not get file size");
|
2020-09-08 17:08:27 +03:00
|
|
|
ret = file_length;
|
|
|
|
goto out;
|
2019-04-15 17:34:30 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
if (host_offset + cur_bytes > file_length) {
|
|
|
|
if (mode == PREALLOC_MODE_METADATA) {
|
|
|
|
mode = PREALLOC_MODE_OFF;
|
|
|
|
}
|
2019-09-18 12:51:40 +03:00
|
|
|
ret = bdrv_co_truncate(s->data_file, host_offset + cur_bytes, false,
|
2020-04-24 15:54:40 +03:00
|
|
|
mode, 0, errp);
|
2010-06-22 18:59:46 +04:00
|
|
|
if (ret < 0) {
|
2020-09-08 17:08:27 +03:00
|
|
|
goto out;
|
2010-06-22 18:59:46 +04:00
|
|
|
}
|
2009-08-17 17:50:10 +04:00
|
|
|
}
|
|
|
|
|
2020-09-08 17:08:27 +03:00
|
|
|
ret = 0;
|
|
|
|
|
|
|
|
out:
|
|
|
|
qcow2_handle_l2meta(bs, &meta, false);
|
|
|
|
return ret;
|
2009-08-17 17:50:10 +04:00
|
|
|
}
|
|
|
|
|
2017-07-05 15:57:33 +03:00
|
|
|
/* qcow2_refcount_metadata_size:
|
|
|
|
* @clusters: number of clusters to refcount (including data and L1/L2 tables)
|
|
|
|
* @cluster_size: size of a cluster, in bytes
|
|
|
|
* @refcount_order: refcount bits power-of-2 exponent
|
2017-06-13 23:21:03 +03:00
|
|
|
* @generous_increase: allow for the refcount table to be 1.5x as large as it
|
|
|
|
* needs to be
|
2017-07-05 15:57:33 +03:00
|
|
|
*
|
|
|
|
* Returns: Number of bytes required for refcount blocks and table metadata.
|
|
|
|
*/
|
2017-06-13 23:21:03 +03:00
|
|
|
int64_t qcow2_refcount_metadata_size(int64_t clusters, size_t cluster_size,
|
|
|
|
int refcount_order, bool generous_increase,
|
|
|
|
uint64_t *refblock_count)
|
2017-07-05 15:57:33 +03:00
|
|
|
{
|
|
|
|
/*
|
|
|
|
* Every host cluster is reference-counted, including metadata (even
|
|
|
|
* refcount metadata is recursively included).
|
|
|
|
*
|
|
|
|
* An accurate formula for the size of refcount metadata size is difficult
|
|
|
|
* to derive. An easier method of calculation is finding the fixed point
|
|
|
|
* where no further refcount blocks or table clusters are required to
|
|
|
|
* reference count every cluster.
|
|
|
|
*/
|
2020-08-28 14:08:28 +03:00
|
|
|
int64_t blocks_per_table_cluster = cluster_size / REFTABLE_ENTRY_SIZE;
|
2017-07-05 15:57:33 +03:00
|
|
|
int64_t refcounts_per_block = cluster_size * 8 / (1 << refcount_order);
|
|
|
|
int64_t table = 0; /* number of refcount table clusters */
|
|
|
|
int64_t blocks = 0; /* number of refcount block clusters */
|
|
|
|
int64_t last;
|
|
|
|
int64_t n = 0;
|
|
|
|
|
|
|
|
do {
|
|
|
|
last = n;
|
|
|
|
blocks = DIV_ROUND_UP(clusters + table + blocks, refcounts_per_block);
|
|
|
|
table = DIV_ROUND_UP(blocks, blocks_per_table_cluster);
|
|
|
|
n = clusters + blocks + table;
|
2017-06-13 23:21:03 +03:00
|
|
|
|
|
|
|
if (n == last && generous_increase) {
|
|
|
|
clusters += DIV_ROUND_UP(table, 2);
|
|
|
|
n = 0; /* force another loop */
|
|
|
|
generous_increase = false;
|
|
|
|
}
|
2017-07-05 15:57:33 +03:00
|
|
|
} while (n != last);
|
|
|
|
|
2017-06-13 23:21:03 +03:00
|
|
|
if (refblock_count) {
|
|
|
|
*refblock_count = blocks;
|
|
|
|
}
|
|
|
|
|
2017-07-05 15:57:33 +03:00
|
|
|
return (blocks + table) * cluster_size;
|
|
|
|
}
|
|
|
|
|
2017-07-05 15:57:32 +03:00
|
|
|
/**
|
|
|
|
* qcow2_calc_prealloc_size:
|
|
|
|
* @total_size: virtual disk size in bytes
|
|
|
|
* @cluster_size: cluster size in bytes
|
|
|
|
* @refcount_order: refcount bits power-of-2 exponent
|
2020-07-10 19:13:11 +03:00
|
|
|
* @extended_l2: true if the image has extended L2 entries
|
2017-07-05 15:57:32 +03:00
|
|
|
*
|
|
|
|
* Returns: Total number of bytes required for the fully allocated image
|
|
|
|
* (including metadata).
|
|
|
|
*/
|
|
|
|
static int64_t qcow2_calc_prealloc_size(int64_t total_size,
|
|
|
|
size_t cluster_size,
|
2020-07-10 19:13:11 +03:00
|
|
|
int refcount_order,
|
|
|
|
bool extended_l2)
|
2017-07-05 15:57:32 +03:00
|
|
|
{
|
|
|
|
int64_t meta_size = 0;
|
2017-07-05 15:57:33 +03:00
|
|
|
uint64_t nl1e, nl2e;
|
2018-02-15 16:10:08 +03:00
|
|
|
int64_t aligned_total_size = ROUND_UP(total_size, cluster_size);
|
2020-07-10 19:13:11 +03:00
|
|
|
size_t l2e_size = extended_l2 ? L2E_SIZE_EXTENDED : L2E_SIZE_NORMAL;
|
2017-07-05 15:57:32 +03:00
|
|
|
|
|
|
|
/* header: 1 cluster */
|
|
|
|
meta_size += cluster_size;
|
|
|
|
|
|
|
|
/* total size of L2 tables */
|
|
|
|
nl2e = aligned_total_size / cluster_size;
|
2020-07-10 19:13:11 +03:00
|
|
|
nl2e = ROUND_UP(nl2e, cluster_size / l2e_size);
|
|
|
|
meta_size += nl2e * l2e_size;
|
2017-07-05 15:57:32 +03:00
|
|
|
|
|
|
|
/* total size of L1 tables */
|
2020-07-10 19:13:11 +03:00
|
|
|
nl1e = nl2e * l2e_size / cluster_size;
|
2020-08-28 14:08:28 +03:00
|
|
|
nl1e = ROUND_UP(nl1e, cluster_size / L1E_SIZE);
|
|
|
|
meta_size += nl1e * L1E_SIZE;
|
2017-07-05 15:57:32 +03:00
|
|
|
|
2017-07-05 15:57:33 +03:00
|
|
|
/* total size of refcount table and blocks */
|
|
|
|
meta_size += qcow2_refcount_metadata_size(
|
|
|
|
(meta_size + aligned_total_size) / cluster_size,
|
2017-06-13 23:21:03 +03:00
|
|
|
cluster_size, refcount_order, false, NULL);
|
2017-07-05 15:57:32 +03:00
|
|
|
|
|
|
|
return meta_size + aligned_total_size;
|
|
|
|
}
|
|
|
|
|
2020-07-10 19:13:13 +03:00
|
|
|
static bool validate_cluster_size(size_t cluster_size, bool extended_l2,
|
|
|
|
Error **errp)
|
2010-06-11 23:37:37 +04:00
|
|
|
{
|
2018-01-09 21:44:33 +03:00
|
|
|
int cluster_bits = ctz32(cluster_size);
|
2010-06-11 23:37:37 +04:00
|
|
|
if (cluster_bits < MIN_CLUSTER_BITS || cluster_bits > MAX_CLUSTER_BITS ||
|
|
|
|
(1 << cluster_bits) != cluster_size)
|
|
|
|
{
|
2013-09-05 11:40:43 +04:00
|
|
|
error_setg(errp, "Cluster size must be a power of two between %d and "
|
|
|
|
"%dk", 1 << MIN_CLUSTER_BITS, 1 << (MAX_CLUSTER_BITS - 10));
|
2018-01-09 21:44:33 +03:00
|
|
|
return false;
|
|
|
|
}
|
2020-07-10 19:13:13 +03:00
|
|
|
|
|
|
|
if (extended_l2) {
|
|
|
|
unsigned min_cluster_size =
|
|
|
|
(1 << MIN_CLUSTER_BITS) * QCOW_EXTL2_SUBCLUSTERS_PER_CLUSTER;
|
|
|
|
if (cluster_size < min_cluster_size) {
|
|
|
|
error_setg(errp, "Extended L2 entries are only supported with "
|
|
|
|
"cluster sizes of at least %u bytes", min_cluster_size);
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-01-09 21:44:33 +03:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2020-07-10 19:13:13 +03:00
|
|
|
static size_t qcow2_opt_get_cluster_size_del(QemuOpts *opts, bool extended_l2,
|
|
|
|
Error **errp)
|
2018-01-09 21:44:33 +03:00
|
|
|
{
|
|
|
|
size_t cluster_size;
|
|
|
|
|
|
|
|
cluster_size = qemu_opt_get_size_del(opts, BLOCK_OPT_CLUSTER_SIZE,
|
|
|
|
DEFAULT_CLUSTER_SIZE);
|
2020-07-10 19:13:13 +03:00
|
|
|
if (!validate_cluster_size(cluster_size, extended_l2, errp)) {
|
2017-07-05 15:57:34 +03:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
return cluster_size;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int qcow2_opt_get_version_del(QemuOpts *opts, Error **errp)
|
|
|
|
{
|
|
|
|
char *buf;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
buf = qemu_opt_get_del(opts, BLOCK_OPT_COMPAT_LEVEL);
|
|
|
|
if (!buf) {
|
|
|
|
ret = 3; /* default */
|
|
|
|
} else if (!strcmp(buf, "0.10")) {
|
|
|
|
ret = 2;
|
|
|
|
} else if (!strcmp(buf, "1.1")) {
|
|
|
|
ret = 3;
|
|
|
|
} else {
|
|
|
|
error_setg(errp, "Invalid compatibility level: '%s'", buf);
|
|
|
|
ret = -EINVAL;
|
|
|
|
}
|
|
|
|
g_free(buf);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static uint64_t qcow2_opt_get_refcount_bits_del(QemuOpts *opts, int version,
|
|
|
|
Error **errp)
|
|
|
|
{
|
|
|
|
uint64_t refcount_bits;
|
|
|
|
|
|
|
|
refcount_bits = qemu_opt_get_number_del(opts, BLOCK_OPT_REFCOUNT_BITS, 16);
|
|
|
|
if (refcount_bits > 64 || !is_power_of_2(refcount_bits)) {
|
|
|
|
error_setg(errp, "Refcount width must be a power of two and may not "
|
|
|
|
"exceed 64 bits");
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (version < 3 && refcount_bits != 16) {
|
|
|
|
error_setg(errp, "Different refcount widths than 16 bits require "
|
|
|
|
"compatibility level 1.1 or above (use compat=1.1 or "
|
|
|
|
"greater)");
|
|
|
|
return 0;
|
2010-06-11 23:37:37 +04:00
|
|
|
}
|
|
|
|
|
2017-07-05 15:57:34 +03:00
|
|
|
return refcount_bits;
|
|
|
|
}
|
|
|
|
|
2018-01-18 15:43:46 +03:00
|
|
|
static int coroutine_fn
|
2018-01-10 19:55:16 +03:00
|
|
|
qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp)
|
2017-07-05 15:57:34 +03:00
|
|
|
{
|
2018-01-09 21:44:33 +03:00
|
|
|
BlockdevCreateOptionsQcow2 *qcow2_opts;
|
2017-07-05 15:57:34 +03:00
|
|
|
QDict *options;
|
|
|
|
|
2010-06-11 23:37:37 +04:00
|
|
|
/*
|
|
|
|
* Open the image file and write a minimal qcow2 header.
|
|
|
|
*
|
|
|
|
* We keep things simple and start with a zero-sized image. We also
|
|
|
|
* do without refcount blocks or a L1 table for now. We'll fix the
|
|
|
|
* inconsistency later.
|
|
|
|
*
|
|
|
|
* We do need a refcount table because growing the refcount table means
|
2020-03-24 20:42:30 +03:00
|
|
|
* allocating two new refcount blocks - the second of which would be at
|
2010-06-11 23:37:37 +04:00
|
|
|
* 2 GB for 64k clusters, and we don't want to have a 2 GB initial file
|
|
|
|
* size for any qcow2 image.
|
|
|
|
*/
|
2018-01-10 17:52:33 +03:00
|
|
|
BlockBackend *blk = NULL;
|
|
|
|
BlockDriverState *bs = NULL;
|
2019-01-14 18:57:27 +03:00
|
|
|
BlockDriverState *data_bs = NULL;
|
2013-12-04 14:06:36 +04:00
|
|
|
QCowHeader *header;
|
2018-01-09 21:44:33 +03:00
|
|
|
size_t cluster_size;
|
|
|
|
int version;
|
|
|
|
int refcount_order;
|
2020-10-30 06:35:12 +03:00
|
|
|
uint64_t *refcount_table;
|
2010-06-11 23:37:37 +04:00
|
|
|
int ret;
|
qcow2: introduce compression type feature
The patch adds some preparation parts for incompatible compression type
feature to qcow2 allowing the use different compression methods for
image clusters (de)compressing.
It is implied that the compression type is set on the image creation and
can be changed only later by image conversion, thus compression type
defines the only compression algorithm used for the image, and thus,
for all image clusters.
The goal of the feature is to add support of other compression methods
to qcow2. For example, ZSTD which is more effective on compression than ZLIB.
The default compression is ZLIB. Images created with ZLIB compression type
are backward compatible with older qemu versions.
Adding of the compression type breaks a number of tests because now the
compression type is reported on image creation and there are some changes
in the qcow2 header in size and offsets.
The tests are fixed in the following ways:
* filter out compression_type for many tests
* fix header size, feature table size and backing file offset
affected tests: 031, 036, 061, 080
header_size +=8: 1 byte compression type
7 bytes padding
feature_table += 48: incompatible feature compression type
backing_file_offset += 56 (8 + 48 -> header_change + feature_table_change)
* add "compression type" for test output matching when it isn't filtered
affected tests: 049, 060, 061, 065, 082, 085, 144, 182, 185, 198, 206,
242, 255, 274, 280
Signed-off-by: Denis Plotnikov <dplotnikov@virtuozzo.com>
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
QAPI part:
Acked-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <20200507082521.29210-2-dplotnikov@virtuozzo.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2020-05-07 11:25:18 +03:00
|
|
|
uint8_t compression_type = QCOW2_COMPRESSION_TYPE_ZLIB;
|
2010-06-11 23:37:37 +04:00
|
|
|
|
2018-01-09 21:44:33 +03:00
|
|
|
assert(create_options->driver == BLOCKDEV_DRIVER_QCOW2);
|
|
|
|
qcow2_opts = &create_options->u.qcow2;
|
|
|
|
|
2018-01-10 17:52:33 +03:00
|
|
|
bs = bdrv_open_blockdev_ref(qcow2_opts->file, errp);
|
|
|
|
if (bs == NULL) {
|
|
|
|
return -EIO;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Validate options and set default values */
|
2018-01-09 21:44:33 +03:00
|
|
|
if (!QEMU_IS_ALIGNED(qcow2_opts->size, BDRV_SECTOR_SIZE)) {
|
2020-01-18 22:09:30 +03:00
|
|
|
error_setg(errp, "Image size must be a multiple of %u bytes",
|
|
|
|
(unsigned) BDRV_SECTOR_SIZE);
|
2018-01-09 21:44:33 +03:00
|
|
|
ret = -EINVAL;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (qcow2_opts->has_version) {
|
|
|
|
switch (qcow2_opts->version) {
|
|
|
|
case BLOCKDEV_QCOW2_VERSION_V2:
|
|
|
|
version = 2;
|
|
|
|
break;
|
|
|
|
case BLOCKDEV_QCOW2_VERSION_V3:
|
|
|
|
version = 3;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
g_assert_not_reached();
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
version = 3;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (qcow2_opts->has_cluster_size) {
|
|
|
|
cluster_size = qcow2_opts->cluster_size;
|
|
|
|
} else {
|
|
|
|
cluster_size = DEFAULT_CLUSTER_SIZE;
|
|
|
|
}
|
|
|
|
|
2020-07-10 19:13:13 +03:00
|
|
|
if (!qcow2_opts->has_extended_l2) {
|
|
|
|
qcow2_opts->extended_l2 = false;
|
|
|
|
}
|
|
|
|
if (qcow2_opts->extended_l2) {
|
|
|
|
if (version < 3) {
|
|
|
|
error_setg(errp, "Extended L2 entries are only supported with "
|
|
|
|
"compatibility level 1.1 and above (use version=v3 or "
|
|
|
|
"greater)");
|
|
|
|
ret = -EINVAL;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!validate_cluster_size(cluster_size, qcow2_opts->extended_l2, errp)) {
|
2018-01-10 17:52:33 +03:00
|
|
|
ret = -EINVAL;
|
|
|
|
goto out;
|
2018-01-09 21:44:33 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
if (!qcow2_opts->has_preallocation) {
|
|
|
|
qcow2_opts->preallocation = PREALLOC_MODE_OFF;
|
|
|
|
}
|
|
|
|
if (qcow2_opts->has_backing_file &&
|
2020-07-10 19:13:14 +03:00
|
|
|
qcow2_opts->preallocation != PREALLOC_MODE_OFF &&
|
|
|
|
!qcow2_opts->extended_l2)
|
2018-01-09 21:44:33 +03:00
|
|
|
{
|
2020-07-10 19:13:14 +03:00
|
|
|
error_setg(errp, "Backing file and preallocation can only be used at "
|
|
|
|
"the same time if extended_l2 is on");
|
2018-01-10 17:52:33 +03:00
|
|
|
ret = -EINVAL;
|
|
|
|
goto out;
|
2018-01-09 21:44:33 +03:00
|
|
|
}
|
|
|
|
if (qcow2_opts->has_backing_fmt && !qcow2_opts->has_backing_file) {
|
|
|
|
error_setg(errp, "Backing format cannot be used without backing file");
|
2018-01-10 17:52:33 +03:00
|
|
|
ret = -EINVAL;
|
|
|
|
goto out;
|
2018-01-09 21:44:33 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
if (!qcow2_opts->has_lazy_refcounts) {
|
|
|
|
qcow2_opts->lazy_refcounts = false;
|
|
|
|
}
|
|
|
|
if (version < 3 && qcow2_opts->lazy_refcounts) {
|
|
|
|
error_setg(errp, "Lazy refcounts only supported with compatibility "
|
2018-01-11 18:18:08 +03:00
|
|
|
"level 1.1 and above (use version=v3 or greater)");
|
2018-01-10 17:52:33 +03:00
|
|
|
ret = -EINVAL;
|
|
|
|
goto out;
|
2018-01-09 21:44:33 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
if (!qcow2_opts->has_refcount_bits) {
|
|
|
|
qcow2_opts->refcount_bits = 16;
|
|
|
|
}
|
|
|
|
if (qcow2_opts->refcount_bits > 64 ||
|
|
|
|
!is_power_of_2(qcow2_opts->refcount_bits))
|
|
|
|
{
|
|
|
|
error_setg(errp, "Refcount width must be a power of two and may not "
|
|
|
|
"exceed 64 bits");
|
2018-01-10 17:52:33 +03:00
|
|
|
ret = -EINVAL;
|
|
|
|
goto out;
|
2018-01-09 21:44:33 +03:00
|
|
|
}
|
|
|
|
if (version < 3 && qcow2_opts->refcount_bits != 16) {
|
|
|
|
error_setg(errp, "Different refcount widths than 16 bits require "
|
2018-01-11 18:18:08 +03:00
|
|
|
"compatibility level 1.1 or above (use version=v3 or "
|
2018-01-09 21:44:33 +03:00
|
|
|
"greater)");
|
2018-01-10 17:52:33 +03:00
|
|
|
ret = -EINVAL;
|
|
|
|
goto out;
|
2018-01-09 21:44:33 +03:00
|
|
|
}
|
|
|
|
refcount_order = ctz32(qcow2_opts->refcount_bits);
|
|
|
|
|
2019-02-22 16:29:38 +03:00
|
|
|
if (qcow2_opts->data_file_raw && !qcow2_opts->data_file) {
|
|
|
|
error_setg(errp, "data-file-raw requires data-file");
|
|
|
|
ret = -EINVAL;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
if (qcow2_opts->data_file_raw && qcow2_opts->has_backing_file) {
|
|
|
|
error_setg(errp, "Backing file and data-file-raw cannot be used at "
|
|
|
|
"the same time");
|
|
|
|
ret = -EINVAL;
|
|
|
|
goto out;
|
|
|
|
}
|
2021-03-26 17:55:08 +03:00
|
|
|
if (qcow2_opts->data_file_raw &&
|
|
|
|
qcow2_opts->preallocation == PREALLOC_MODE_OFF)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* data-file-raw means that "the external data file can be
|
|
|
|
* read as a consistent standalone raw image without looking
|
|
|
|
* at the qcow2 metadata." It does not say that the metadata
|
|
|
|
* must be ignored, though (and the qcow2 driver in fact does
|
|
|
|
* not ignore it), so the L1/L2 tables must be present and
|
|
|
|
* give a 1:1 mapping, so you get the same result regardless
|
|
|
|
* of whether you look at the metadata or whether you ignore
|
|
|
|
* it.
|
|
|
|
*/
|
|
|
|
qcow2_opts->preallocation = PREALLOC_MODE_METADATA;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Cannot use preallocation with backing files, but giving a
|
|
|
|
* backing file when specifying data_file_raw is an error
|
|
|
|
* anyway.
|
|
|
|
*/
|
|
|
|
assert(!qcow2_opts->has_backing_file);
|
|
|
|
}
|
2019-02-22 16:29:38 +03:00
|
|
|
|
2019-01-14 18:57:27 +03:00
|
|
|
if (qcow2_opts->data_file) {
|
|
|
|
if (version < 3) {
|
|
|
|
error_setg(errp, "External data files are only supported with "
|
|
|
|
"compatibility level 1.1 and above (use version=v3 or "
|
|
|
|
"greater)");
|
|
|
|
ret = -EINVAL;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
data_bs = bdrv_open_blockdev_ref(qcow2_opts->data_file, errp);
|
2019-03-13 17:22:38 +03:00
|
|
|
if (data_bs == NULL) {
|
2019-01-14 18:57:27 +03:00
|
|
|
ret = -EIO;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
}
|
2018-01-09 21:44:33 +03:00
|
|
|
|
qcow2: introduce compression type feature
The patch adds some preparation parts for incompatible compression type
feature to qcow2 allowing the use different compression methods for
image clusters (de)compressing.
It is implied that the compression type is set on the image creation and
can be changed only later by image conversion, thus compression type
defines the only compression algorithm used for the image, and thus,
for all image clusters.
The goal of the feature is to add support of other compression methods
to qcow2. For example, ZSTD which is more effective on compression than ZLIB.
The default compression is ZLIB. Images created with ZLIB compression type
are backward compatible with older qemu versions.
Adding of the compression type breaks a number of tests because now the
compression type is reported on image creation and there are some changes
in the qcow2 header in size and offsets.
The tests are fixed in the following ways:
* filter out compression_type for many tests
* fix header size, feature table size and backing file offset
affected tests: 031, 036, 061, 080
header_size +=8: 1 byte compression type
7 bytes padding
feature_table += 48: incompatible feature compression type
backing_file_offset += 56 (8 + 48 -> header_change + feature_table_change)
* add "compression type" for test output matching when it isn't filtered
affected tests: 049, 060, 061, 065, 082, 085, 144, 182, 185, 198, 206,
242, 255, 274, 280
Signed-off-by: Denis Plotnikov <dplotnikov@virtuozzo.com>
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
QAPI part:
Acked-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <20200507082521.29210-2-dplotnikov@virtuozzo.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2020-05-07 11:25:18 +03:00
|
|
|
if (qcow2_opts->has_compression_type &&
|
|
|
|
qcow2_opts->compression_type != QCOW2_COMPRESSION_TYPE_ZLIB) {
|
|
|
|
|
|
|
|
ret = -EINVAL;
|
|
|
|
|
|
|
|
if (version < 3) {
|
|
|
|
error_setg(errp, "Non-zlib compression type is only supported with "
|
|
|
|
"compatibility level 1.1 and above (use version=v3 or "
|
|
|
|
"greater)");
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
switch (qcow2_opts->compression_type) {
|
2020-05-07 11:25:20 +03:00
|
|
|
#ifdef CONFIG_ZSTD
|
|
|
|
case QCOW2_COMPRESSION_TYPE_ZSTD:
|
|
|
|
break;
|
|
|
|
#endif
|
qcow2: introduce compression type feature
The patch adds some preparation parts for incompatible compression type
feature to qcow2 allowing the use different compression methods for
image clusters (de)compressing.
It is implied that the compression type is set on the image creation and
can be changed only later by image conversion, thus compression type
defines the only compression algorithm used for the image, and thus,
for all image clusters.
The goal of the feature is to add support of other compression methods
to qcow2. For example, ZSTD which is more effective on compression than ZLIB.
The default compression is ZLIB. Images created with ZLIB compression type
are backward compatible with older qemu versions.
Adding of the compression type breaks a number of tests because now the
compression type is reported on image creation and there are some changes
in the qcow2 header in size and offsets.
The tests are fixed in the following ways:
* filter out compression_type for many tests
* fix header size, feature table size and backing file offset
affected tests: 031, 036, 061, 080
header_size +=8: 1 byte compression type
7 bytes padding
feature_table += 48: incompatible feature compression type
backing_file_offset += 56 (8 + 48 -> header_change + feature_table_change)
* add "compression type" for test output matching when it isn't filtered
affected tests: 049, 060, 061, 065, 082, 085, 144, 182, 185, 198, 206,
242, 255, 274, 280
Signed-off-by: Denis Plotnikov <dplotnikov@virtuozzo.com>
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
QAPI part:
Acked-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <20200507082521.29210-2-dplotnikov@virtuozzo.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2020-05-07 11:25:18 +03:00
|
|
|
default:
|
|
|
|
error_setg(errp, "Unknown compression type");
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
compression_type = qcow2_opts->compression_type;
|
|
|
|
}
|
|
|
|
|
2018-01-09 21:44:33 +03:00
|
|
|
/* Create BlockBackend to write to the image */
|
2020-04-28 22:26:46 +03:00
|
|
|
blk = blk_new_with_bs(bs, BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL,
|
|
|
|
errp);
|
|
|
|
if (!blk) {
|
|
|
|
ret = -EPERM;
|
2018-01-09 19:35:26 +03:00
|
|
|
goto out;
|
2010-06-11 23:37:37 +04:00
|
|
|
}
|
2016-03-08 17:57:05 +03:00
|
|
|
blk_set_allow_write_beyond_eof(blk, true);
|
|
|
|
|
2010-06-11 23:37:37 +04:00
|
|
|
/* Write the header */
|
2013-12-04 14:06:36 +04:00
|
|
|
QEMU_BUILD_BUG_ON((1 << MIN_CLUSTER_BITS) < sizeof(*header));
|
|
|
|
header = g_malloc0(cluster_size);
|
|
|
|
*header = (QCowHeader) {
|
|
|
|
.magic = cpu_to_be32(QCOW_MAGIC),
|
|
|
|
.version = cpu_to_be32(version),
|
2017-07-05 15:57:34 +03:00
|
|
|
.cluster_bits = cpu_to_be32(ctz32(cluster_size)),
|
2013-12-04 14:06:36 +04:00
|
|
|
.size = cpu_to_be64(0),
|
|
|
|
.l1_table_offset = cpu_to_be64(0),
|
|
|
|
.l1_size = cpu_to_be32(0),
|
|
|
|
.refcount_table_offset = cpu_to_be64(cluster_size),
|
|
|
|
.refcount_table_clusters = cpu_to_be32(1),
|
2015-02-19 01:40:46 +03:00
|
|
|
.refcount_order = cpu_to_be32(refcount_order),
|
qcow2: introduce compression type feature
The patch adds some preparation parts for incompatible compression type
feature to qcow2 allowing the use different compression methods for
image clusters (de)compressing.
It is implied that the compression type is set on the image creation and
can be changed only later by image conversion, thus compression type
defines the only compression algorithm used for the image, and thus,
for all image clusters.
The goal of the feature is to add support of other compression methods
to qcow2. For example, ZSTD which is more effective on compression than ZLIB.
The default compression is ZLIB. Images created with ZLIB compression type
are backward compatible with older qemu versions.
Adding of the compression type breaks a number of tests because now the
compression type is reported on image creation and there are some changes
in the qcow2 header in size and offsets.
The tests are fixed in the following ways:
* filter out compression_type for many tests
* fix header size, feature table size and backing file offset
affected tests: 031, 036, 061, 080
header_size +=8: 1 byte compression type
7 bytes padding
feature_table += 48: incompatible feature compression type
backing_file_offset += 56 (8 + 48 -> header_change + feature_table_change)
* add "compression type" for test output matching when it isn't filtered
affected tests: 049, 060, 061, 065, 082, 085, 144, 182, 185, 198, 206,
242, 255, 274, 280
Signed-off-by: Denis Plotnikov <dplotnikov@virtuozzo.com>
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
QAPI part:
Acked-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <20200507082521.29210-2-dplotnikov@virtuozzo.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2020-05-07 11:25:18 +03:00
|
|
|
/* don't deal with endianness since compression_type is 1 byte long */
|
|
|
|
.compression_type = compression_type,
|
2013-12-04 14:06:36 +04:00
|
|
|
.header_length = cpu_to_be32(sizeof(*header)),
|
|
|
|
};
|
2010-06-11 23:37:37 +04:00
|
|
|
|
2017-06-23 19:24:10 +03:00
|
|
|
/* We'll update this to correct value later */
|
|
|
|
header->crypt_method = cpu_to_be32(QCOW_CRYPT_NONE);
|
2010-06-11 23:37:37 +04:00
|
|
|
|
2018-01-09 21:44:33 +03:00
|
|
|
if (qcow2_opts->lazy_refcounts) {
|
2013-12-04 14:06:36 +04:00
|
|
|
header->compatible_features |=
|
2012-07-27 12:05:22 +04:00
|
|
|
cpu_to_be64(QCOW2_COMPAT_LAZY_REFCOUNTS);
|
|
|
|
}
|
2019-01-14 18:57:27 +03:00
|
|
|
if (data_bs) {
|
|
|
|
header->incompatible_features |=
|
|
|
|
cpu_to_be64(QCOW2_INCOMPAT_DATA_FILE);
|
|
|
|
}
|
2019-02-22 16:29:38 +03:00
|
|
|
if (qcow2_opts->data_file_raw) {
|
|
|
|
header->autoclear_features |=
|
|
|
|
cpu_to_be64(QCOW2_AUTOCLEAR_DATA_FILE_RAW);
|
|
|
|
}
|
qcow2: introduce compression type feature
The patch adds some preparation parts for incompatible compression type
feature to qcow2 allowing the use different compression methods for
image clusters (de)compressing.
It is implied that the compression type is set on the image creation and
can be changed only later by image conversion, thus compression type
defines the only compression algorithm used for the image, and thus,
for all image clusters.
The goal of the feature is to add support of other compression methods
to qcow2. For example, ZSTD which is more effective on compression than ZLIB.
The default compression is ZLIB. Images created with ZLIB compression type
are backward compatible with older qemu versions.
Adding of the compression type breaks a number of tests because now the
compression type is reported on image creation and there are some changes
in the qcow2 header in size and offsets.
The tests are fixed in the following ways:
* filter out compression_type for many tests
* fix header size, feature table size and backing file offset
affected tests: 031, 036, 061, 080
header_size +=8: 1 byte compression type
7 bytes padding
feature_table += 48: incompatible feature compression type
backing_file_offset += 56 (8 + 48 -> header_change + feature_table_change)
* add "compression type" for test output matching when it isn't filtered
affected tests: 049, 060, 061, 065, 082, 085, 144, 182, 185, 198, 206,
242, 255, 274, 280
Signed-off-by: Denis Plotnikov <dplotnikov@virtuozzo.com>
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
QAPI part:
Acked-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <20200507082521.29210-2-dplotnikov@virtuozzo.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2020-05-07 11:25:18 +03:00
|
|
|
if (compression_type != QCOW2_COMPRESSION_TYPE_ZLIB) {
|
|
|
|
header->incompatible_features |=
|
|
|
|
cpu_to_be64(QCOW2_INCOMPAT_COMPRESSION);
|
|
|
|
}
|
2012-07-27 12:05:22 +04:00
|
|
|
|
2020-07-10 19:13:13 +03:00
|
|
|
if (qcow2_opts->extended_l2) {
|
|
|
|
header->incompatible_features |=
|
|
|
|
cpu_to_be64(QCOW2_INCOMPAT_EXTL2);
|
|
|
|
}
|
|
|
|
|
2016-05-06 19:26:27 +03:00
|
|
|
ret = blk_pwrite(blk, 0, header, cluster_size, 0);
|
2013-12-04 14:06:36 +04:00
|
|
|
g_free(header);
|
2010-06-11 23:37:37 +04:00
|
|
|
if (ret < 0) {
|
2013-09-05 11:40:43 +04:00
|
|
|
error_setg_errno(errp, -ret, "Could not write qcow2 header");
|
2010-06-11 23:37:37 +04:00
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
2014-03-28 21:06:31 +04:00
|
|
|
/* Write a refcount table with one refcount block */
|
|
|
|
refcount_table = g_malloc0(2 * cluster_size);
|
|
|
|
refcount_table[0] = cpu_to_be64(2 * cluster_size);
|
2016-05-06 19:26:27 +03:00
|
|
|
ret = blk_pwrite(blk, cluster_size, refcount_table, 2 * cluster_size, 0);
|
2011-08-21 07:09:37 +04:00
|
|
|
g_free(refcount_table);
|
2010-06-11 23:37:37 +04:00
|
|
|
|
|
|
|
if (ret < 0) {
|
2013-09-05 11:40:43 +04:00
|
|
|
error_setg_errno(errp, -ret, "Could not write refcount table");
|
2010-06-11 23:37:37 +04:00
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
2016-03-08 17:57:05 +03:00
|
|
|
blk_unref(blk);
|
|
|
|
blk = NULL;
|
2010-06-11 23:37:37 +04:00
|
|
|
|
|
|
|
/*
|
|
|
|
* And now open the image and make it consistent first (i.e. increase the
|
|
|
|
* refcount of the cluster that is occupied by the header and the refcount
|
|
|
|
* table)
|
|
|
|
*/
|
2015-08-26 20:47:48 +03:00
|
|
|
options = qdict_new();
|
2017-04-28 00:58:17 +03:00
|
|
|
qdict_put_str(options, "driver", "qcow2");
|
2018-01-09 19:35:26 +03:00
|
|
|
qdict_put_str(options, "file", bs->node_name);
|
2019-01-14 18:57:27 +03:00
|
|
|
if (data_bs) {
|
|
|
|
qdict_put_str(options, "data-file", data_bs->node_name);
|
|
|
|
}
|
2018-01-09 19:35:26 +03:00
|
|
|
blk = blk_new_open(NULL, NULL, options,
|
2017-02-17 17:07:38 +03:00
|
|
|
BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_NO_FLUSH,
|
2020-07-07 19:06:03 +03:00
|
|
|
errp);
|
2016-03-08 17:57:05 +03:00
|
|
|
if (blk == NULL) {
|
|
|
|
ret = -EIO;
|
2010-06-11 23:37:37 +04:00
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
2016-03-08 17:57:05 +03:00
|
|
|
ret = qcow2_alloc_clusters(blk_bs(blk), 3 * cluster_size);
|
2010-06-11 23:37:37 +04:00
|
|
|
if (ret < 0) {
|
2013-09-05 11:40:43 +04:00
|
|
|
error_setg_errno(errp, -ret, "Could not allocate clusters for qcow2 "
|
|
|
|
"header and refcount table");
|
2010-06-11 23:37:37 +04:00
|
|
|
goto out;
|
|
|
|
|
|
|
|
} else if (ret != 0) {
|
|
|
|
error_report("Huh, first cluster in empty image is already in use?");
|
|
|
|
abort();
|
|
|
|
}
|
|
|
|
|
2019-01-15 21:02:40 +03:00
|
|
|
/* Set the external data file if necessary */
|
|
|
|
if (data_bs) {
|
|
|
|
BDRVQcow2State *s = blk_bs(blk)->opaque;
|
|
|
|
s->image_data_file = g_strdup(data_bs->filename);
|
|
|
|
}
|
|
|
|
|
2015-12-02 20:34:39 +03:00
|
|
|
/* Create a full header (including things like feature table) */
|
2016-03-08 17:57:05 +03:00
|
|
|
ret = qcow2_update_header(blk_bs(blk));
|
2015-12-02 20:34:39 +03:00
|
|
|
if (ret < 0) {
|
|
|
|
error_setg_errno(errp, -ret, "Could not update qcow2 header");
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
2010-06-11 23:37:37 +04:00
|
|
|
/* Okay, now that we have a valid image, let's give it the right size */
|
2019-09-18 12:51:40 +03:00
|
|
|
ret = blk_truncate(blk, qcow2_opts->size, false, qcow2_opts->preallocation,
|
2020-04-24 15:54:41 +03:00
|
|
|
0, errp);
|
2010-06-11 23:37:37 +04:00
|
|
|
if (ret < 0) {
|
2017-03-28 23:51:27 +03:00
|
|
|
error_prepend(errp, "Could not resize image: ");
|
2010-06-11 23:37:37 +04:00
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
2020-03-24 20:42:30 +03:00
|
|
|
/* Want a backing file? There you go. */
|
2018-01-09 21:44:33 +03:00
|
|
|
if (qcow2_opts->has_backing_file) {
|
|
|
|
const char *backing_format = NULL;
|
|
|
|
|
|
|
|
if (qcow2_opts->has_backing_fmt) {
|
|
|
|
backing_format = BlockdevDriver_str(qcow2_opts->backing_fmt);
|
|
|
|
}
|
|
|
|
|
|
|
|
ret = bdrv_change_backing_file(blk_bs(blk), qcow2_opts->backing_file,
|
2020-07-06 23:39:53 +03:00
|
|
|
backing_format, false);
|
2010-06-11 23:37:37 +04:00
|
|
|
if (ret < 0) {
|
2013-09-05 11:40:43 +04:00
|
|
|
error_setg_errno(errp, -ret, "Could not assign backing file '%s' "
|
2018-01-09 21:44:33 +03:00
|
|
|
"with format '%s'", qcow2_opts->backing_file,
|
|
|
|
backing_format);
|
2010-06-11 23:37:37 +04:00
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-06-23 19:24:10 +03:00
|
|
|
/* Want encryption? There you go. */
|
2018-01-10 19:55:16 +03:00
|
|
|
if (qcow2_opts->has_encrypt) {
|
|
|
|
ret = qcow2_set_up_encryption(blk_bs(blk), qcow2_opts->encrypt, errp);
|
2017-06-23 19:24:10 +03:00
|
|
|
if (ret < 0) {
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-03-08 17:57:05 +03:00
|
|
|
blk_unref(blk);
|
|
|
|
blk = NULL;
|
2013-10-24 22:35:06 +04:00
|
|
|
|
2017-06-23 19:24:10 +03:00
|
|
|
/* Reopen the image without BDRV_O_NO_FLUSH to flush it before returning.
|
|
|
|
* Using BDRV_O_NO_IO, since encryption is now setup we don't want to
|
|
|
|
* have to setup decryption context. We're not doing any I/O on the top
|
|
|
|
* level BlockDriverState, only lower layers, where BDRV_O_NO_IO does
|
|
|
|
* not have effect.
|
|
|
|
*/
|
2015-08-26 20:47:48 +03:00
|
|
|
options = qdict_new();
|
2017-04-28 00:58:17 +03:00
|
|
|
qdict_put_str(options, "driver", "qcow2");
|
2018-01-09 19:35:26 +03:00
|
|
|
qdict_put_str(options, "file", bs->node_name);
|
2019-01-14 18:57:27 +03:00
|
|
|
if (data_bs) {
|
|
|
|
qdict_put_str(options, "data-file", data_bs->node_name);
|
|
|
|
}
|
2018-01-09 19:35:26 +03:00
|
|
|
blk = blk_new_open(NULL, NULL, options,
|
2017-06-23 19:24:10 +03:00
|
|
|
BDRV_O_RDWR | BDRV_O_NO_BACKING | BDRV_O_NO_IO,
|
2020-07-07 19:06:03 +03:00
|
|
|
errp);
|
2016-03-08 17:57:05 +03:00
|
|
|
if (blk == NULL) {
|
|
|
|
ret = -EIO;
|
2013-10-24 22:35:06 +04:00
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
2010-06-11 23:37:37 +04:00
|
|
|
ret = 0;
|
|
|
|
out:
|
2018-01-10 17:52:33 +03:00
|
|
|
blk_unref(blk);
|
|
|
|
bdrv_unref(bs);
|
2019-01-14 18:57:27 +03:00
|
|
|
bdrv_unref(data_bs);
|
2010-06-11 23:37:37 +04:00
|
|
|
return ret;
|
|
|
|
}
|
2010-05-07 14:43:45 +04:00
|
|
|
|
2020-03-26 04:12:17 +03:00
|
|
|
static int coroutine_fn qcow2_co_create_opts(BlockDriver *drv,
|
|
|
|
const char *filename,
|
|
|
|
QemuOpts *opts,
|
2018-01-18 15:43:45 +03:00
|
|
|
Error **errp)
|
2010-05-07 14:43:45 +04:00
|
|
|
{
|
2018-01-11 18:18:08 +03:00
|
|
|
BlockdevCreateOptions *create_options = NULL;
|
2018-06-14 22:14:32 +03:00
|
|
|
QDict *qdict;
|
2018-01-11 18:18:08 +03:00
|
|
|
Visitor *v;
|
2018-01-09 19:35:26 +03:00
|
|
|
BlockDriverState *bs = NULL;
|
2019-01-15 21:02:40 +03:00
|
|
|
BlockDriverState *data_bs = NULL;
|
2018-01-11 18:18:08 +03:00
|
|
|
const char *val;
|
2013-09-05 11:40:43 +04:00
|
|
|
int ret;
|
2010-05-07 14:43:45 +04:00
|
|
|
|
2018-01-11 18:18:08 +03:00
|
|
|
/* Only the keyval visitor supports the dotted syntax needed for
|
|
|
|
* encryption, so go through a QDict before getting a QAPI type. Ignore
|
|
|
|
* options meant for the protocol layer so that the visitor doesn't
|
|
|
|
* complain. */
|
|
|
|
qdict = qemu_opts_to_qdict_filtered(opts, NULL, bdrv_qcow2.create_opts,
|
|
|
|
true);
|
|
|
|
|
|
|
|
/* Handle encryption options */
|
|
|
|
val = qdict_get_try_str(qdict, BLOCK_OPT_ENCRYPT);
|
|
|
|
if (val && !strcmp(val, "on")) {
|
|
|
|
qdict_put_str(qdict, BLOCK_OPT_ENCRYPT, "qcow");
|
|
|
|
} else if (val && !strcmp(val, "off")) {
|
|
|
|
qdict_del(qdict, BLOCK_OPT_ENCRYPT);
|
|
|
|
}
|
|
|
|
|
|
|
|
val = qdict_get_try_str(qdict, BLOCK_OPT_ENCRYPT_FORMAT);
|
|
|
|
if (val && !strcmp(val, "aes")) {
|
|
|
|
qdict_put_str(qdict, BLOCK_OPT_ENCRYPT_FORMAT, "qcow");
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Convert compat=0.10/1.1 into compat=v2/v3, to be renamed into
|
|
|
|
* version=v2/v3 below. */
|
|
|
|
val = qdict_get_try_str(qdict, BLOCK_OPT_COMPAT_LEVEL);
|
|
|
|
if (val && !strcmp(val, "0.10")) {
|
|
|
|
qdict_put_str(qdict, BLOCK_OPT_COMPAT_LEVEL, "v2");
|
|
|
|
} else if (val && !strcmp(val, "1.1")) {
|
|
|
|
qdict_put_str(qdict, BLOCK_OPT_COMPAT_LEVEL, "v3");
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Change legacy command line options into QMP ones */
|
|
|
|
static const QDictRenames opt_renames[] = {
|
|
|
|
{ BLOCK_OPT_BACKING_FILE, "backing-file" },
|
|
|
|
{ BLOCK_OPT_BACKING_FMT, "backing-fmt" },
|
|
|
|
{ BLOCK_OPT_CLUSTER_SIZE, "cluster-size" },
|
|
|
|
{ BLOCK_OPT_LAZY_REFCOUNTS, "lazy-refcounts" },
|
2020-07-10 19:13:13 +03:00
|
|
|
{ BLOCK_OPT_EXTL2, "extended-l2" },
|
2018-01-11 18:18:08 +03:00
|
|
|
{ BLOCK_OPT_REFCOUNT_BITS, "refcount-bits" },
|
|
|
|
{ BLOCK_OPT_ENCRYPT, BLOCK_OPT_ENCRYPT_FORMAT },
|
|
|
|
{ BLOCK_OPT_COMPAT_LEVEL, "version" },
|
2019-02-22 16:29:38 +03:00
|
|
|
{ BLOCK_OPT_DATA_FILE_RAW, "data-file-raw" },
|
qcow2: introduce compression type feature
The patch adds some preparation parts for incompatible compression type
feature to qcow2 allowing the use different compression methods for
image clusters (de)compressing.
It is implied that the compression type is set on the image creation and
can be changed only later by image conversion, thus compression type
defines the only compression algorithm used for the image, and thus,
for all image clusters.
The goal of the feature is to add support of other compression methods
to qcow2. For example, ZSTD which is more effective on compression than ZLIB.
The default compression is ZLIB. Images created with ZLIB compression type
are backward compatible with older qemu versions.
Adding of the compression type breaks a number of tests because now the
compression type is reported on image creation and there are some changes
in the qcow2 header in size and offsets.
The tests are fixed in the following ways:
* filter out compression_type for many tests
* fix header size, feature table size and backing file offset
affected tests: 031, 036, 061, 080
header_size +=8: 1 byte compression type
7 bytes padding
feature_table += 48: incompatible feature compression type
backing_file_offset += 56 (8 + 48 -> header_change + feature_table_change)
* add "compression type" for test output matching when it isn't filtered
affected tests: 049, 060, 061, 065, 082, 085, 144, 182, 185, 198, 206,
242, 255, 274, 280
Signed-off-by: Denis Plotnikov <dplotnikov@virtuozzo.com>
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
QAPI part:
Acked-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <20200507082521.29210-2-dplotnikov@virtuozzo.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2020-05-07 11:25:18 +03:00
|
|
|
{ BLOCK_OPT_COMPRESSION_TYPE, "compression-type" },
|
2018-01-11 18:18:08 +03:00
|
|
|
{ NULL, NULL },
|
|
|
|
};
|
|
|
|
|
|
|
|
if (!qdict_rename_keys(qdict, opt_renames, errp)) {
|
2018-01-09 21:44:33 +03:00
|
|
|
ret = -EINVAL;
|
|
|
|
goto finish;
|
|
|
|
}
|
2018-01-10 19:55:16 +03:00
|
|
|
|
2018-01-11 18:18:08 +03:00
|
|
|
/* Create and open the file (protocol layer) */
|
|
|
|
ret = bdrv_create_file(filename, opts, errp);
|
|
|
|
if (ret < 0) {
|
2017-07-05 15:57:34 +03:00
|
|
|
goto finish;
|
|
|
|
}
|
2018-01-11 18:18:08 +03:00
|
|
|
|
|
|
|
bs = bdrv_open(filename, NULL, NULL,
|
|
|
|
BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, errp);
|
|
|
|
if (bs == NULL) {
|
|
|
|
ret = -EIO;
|
2014-06-05 13:20:59 +04:00
|
|
|
goto finish;
|
|
|
|
}
|
2017-07-05 15:57:34 +03:00
|
|
|
|
2019-01-15 21:02:40 +03:00
|
|
|
/* Create and open an external data file (protocol layer) */
|
|
|
|
val = qdict_get_try_str(qdict, BLOCK_OPT_DATA_FILE);
|
|
|
|
if (val) {
|
|
|
|
ret = bdrv_create_file(val, opts, errp);
|
|
|
|
if (ret < 0) {
|
|
|
|
goto finish;
|
|
|
|
}
|
|
|
|
|
|
|
|
data_bs = bdrv_open(val, NULL, NULL,
|
|
|
|
BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL,
|
|
|
|
errp);
|
|
|
|
if (data_bs == NULL) {
|
|
|
|
ret = -EIO;
|
|
|
|
goto finish;
|
|
|
|
}
|
|
|
|
|
|
|
|
qdict_del(qdict, BLOCK_OPT_DATA_FILE);
|
|
|
|
qdict_put_str(qdict, "data-file", data_bs->node_name);
|
|
|
|
}
|
|
|
|
|
2018-01-11 18:18:08 +03:00
|
|
|
/* Set 'driver' and 'node' options */
|
|
|
|
qdict_put_str(qdict, "driver", "qcow2");
|
|
|
|
qdict_put_str(qdict, "file", bs->node_name);
|
|
|
|
|
|
|
|
/* Now get the QAPI type BlockdevCreateOptions */
|
2018-06-14 22:14:33 +03:00
|
|
|
v = qobject_input_visitor_new_flat_confused(qdict, errp);
|
|
|
|
if (!v) {
|
2014-06-05 13:20:59 +04:00
|
|
|
ret = -EINVAL;
|
|
|
|
goto finish;
|
|
|
|
}
|
|
|
|
|
2020-07-07 19:06:07 +03:00
|
|
|
visit_type_BlockdevCreateOptions(v, NULL, &create_options, errp);
|
2018-01-11 18:18:08 +03:00
|
|
|
visit_free(v);
|
2020-07-07 19:06:07 +03:00
|
|
|
if (!create_options) {
|
2015-02-19 01:40:46 +03:00
|
|
|
ret = -EINVAL;
|
|
|
|
goto finish;
|
|
|
|
}
|
|
|
|
|
2018-01-11 18:18:08 +03:00
|
|
|
/* Silently round up size */
|
|
|
|
create_options->u.qcow2.size = ROUND_UP(create_options->u.qcow2.size,
|
|
|
|
BDRV_SECTOR_SIZE);
|
2018-01-09 19:35:26 +03:00
|
|
|
|
|
|
|
/* Create the qcow2 image (format layer) */
|
2018-01-11 18:18:08 +03:00
|
|
|
ret = qcow2_co_create(create_options, errp);
|
2020-12-17 20:09:04 +03:00
|
|
|
finish:
|
2018-01-09 19:35:26 +03:00
|
|
|
if (ret < 0) {
|
2020-12-17 20:09:04 +03:00
|
|
|
bdrv_co_delete_file_noerr(bs);
|
|
|
|
bdrv_co_delete_file_noerr(data_bs);
|
|
|
|
} else {
|
|
|
|
ret = 0;
|
2018-01-09 19:35:26 +03:00
|
|
|
}
|
2014-06-05 13:20:59 +04:00
|
|
|
|
2018-04-19 18:01:43 +03:00
|
|
|
qobject_unref(qdict);
|
2018-01-09 19:35:26 +03:00
|
|
|
bdrv_unref(bs);
|
2019-01-15 21:02:40 +03:00
|
|
|
bdrv_unref(data_bs);
|
2018-01-11 18:18:08 +03:00
|
|
|
qapi_free_BlockdevCreateOptions(create_options);
|
2013-09-05 11:40:43 +04:00
|
|
|
return ret;
|
2010-05-07 14:43:45 +04:00
|
|
|
}
|
|
|
|
|
2016-05-11 10:00:14 +03:00
|
|
|
|
2017-10-12 06:47:00 +03:00
|
|
|
static bool is_zero(BlockDriverState *bs, int64_t offset, int64_t bytes)
|
2016-05-11 10:00:14 +03:00
|
|
|
{
|
block: Convert bdrv_get_block_status_above() to bytes
We are gradually moving away from sector-based interfaces, towards
byte-based. In the common case, allocation is unlikely to ever use
values that are not naturally sector-aligned, but it is possible
that byte-based values will let us be more precise about allocation
at the end of an unaligned file that can do byte-based access.
Changing the name of the function from bdrv_get_block_status_above()
to bdrv_block_status_above() ensures that the compiler enforces that
all callers are updated. Likewise, since it a byte interface allows
an offset mapping that might not be sector aligned, split the mapping
out of the return value and into a pass-by-reference parameter. For
now, the io.c layer still assert()s that all uses are sector-aligned,
but that can be relaxed when a later patch implements byte-based
block status in the drivers.
For the most part this patch is just the addition of scaling at the
callers followed by inverse scaling at bdrv_block_status(), plus
updates for the new split return interface. But some code,
particularly bdrv_block_status(), gets a lot simpler because it no
longer has to mess with sectors. Likewise, mirror code no longer
computes s->granularity >> BDRV_SECTOR_BITS, and can therefore drop
an assertion about alignment because the loop no longer depends on
alignment (never mind that we don't really have a driver that
reports sub-sector alignments, so it's not really possible to test
the effect of sub-sector mirroring). Fix a neighboring assertion to
use is_power_of_2 while there.
For ease of review, bdrv_get_block_status() was tackled separately.
Signed-off-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-10-12 06:47:08 +03:00
|
|
|
int64_t nr;
|
|
|
|
int res;
|
2017-10-12 06:47:00 +03:00
|
|
|
|
|
|
|
/* Clamp to image length, before checking status of underlying sectors */
|
2017-10-12 06:47:19 +03:00
|
|
|
if (offset + bytes > bs->total_sectors * BDRV_SECTOR_SIZE) {
|
|
|
|
bytes = bs->total_sectors * BDRV_SECTOR_SIZE - offset;
|
qcow2: Optimize write zero of unaligned tail cluster
We've already improved discards to operate efficiently on the tail
of an unaligned qcow2 image; it's time to make a similar improvement
to write zeroes. The special case is only valid at the tail
cluster of a file, where we must recognize that any sectors beyond
the image end would implicitly read as zero, and therefore should
not penalize our logic for widening a partial cluster into writing
the whole cluster as zero.
However, note that for now, the special case of end-of-file is only
recognized if there is no backing file, or if the backing file has
the same length; that's because when the backing file is shorter
than the active layer, we don't have code in place to recognize
that reads of a sector unallocated at the top and beyond the backing
end-of-file are implicitly zero. It's not much of a real loss,
because most people don't use images that aren't cluster-aligned,
or where the active layer is a different size than the backing
layer (especially where the difference falls within a single cluster).
Update test 154 to cover the new scenarios, using two images of
intentionally differing length.
While at it, fix the test to gracefully skip when run as
./check -qcow2 -o compat=0.10 154
since the older format lacks zero clusters already required earlier
in the test.
Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Message-id: 20170507000552.20847-11-eblake@redhat.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-05-07 03:05:50 +03:00
|
|
|
}
|
|
|
|
|
2017-10-12 06:47:00 +03:00
|
|
|
if (!bytes) {
|
2016-05-26 06:48:49 +03:00
|
|
|
return true;
|
|
|
|
}
|
2020-09-24 22:39:59 +03:00
|
|
|
|
|
|
|
/*
|
|
|
|
* bdrv_block_status_above doesn't merge different types of zeros, for
|
|
|
|
* example, zeros which come from the region which is unallocated in
|
|
|
|
* the whole backing chain, and zeros which come because of a short
|
|
|
|
* backing file. So, we need a loop.
|
|
|
|
*/
|
|
|
|
do {
|
|
|
|
res = bdrv_block_status_above(bs, NULL, offset, bytes, &nr, NULL, NULL);
|
|
|
|
offset += nr;
|
|
|
|
bytes -= nr;
|
|
|
|
} while (res >= 0 && (res & BDRV_BLOCK_ZERO) && nr && bytes);
|
|
|
|
|
|
|
|
return res >= 0 && (res & BDRV_BLOCK_ZERO) && bytes == 0;
|
2016-05-11 10:00:14 +03:00
|
|
|
}
|
|
|
|
|
2016-06-02 00:10:06 +03:00
|
|
|
static coroutine_fn int qcow2_co_pwrite_zeroes(BlockDriverState *bs,
|
block: use int64_t instead of int in driver write_zeroes handlers
We are generally moving to int64_t for both offset and bytes parameters
on all io paths.
Main motivation is realization of 64-bit write_zeroes operation for
fast zeroing large disk chunks, up to the whole disk.
We chose signed type, to be consistent with off_t (which is signed) and
with possibility for signed return type (where negative value means
error).
So, convert driver write_zeroes handlers bytes parameter to int64_t.
The only caller of all updated function is bdrv_co_do_pwrite_zeroes().
bdrv_co_do_pwrite_zeroes() itself is of course OK with widening of
callee parameter type. Also, bdrv_co_do_pwrite_zeroes()'s
max_write_zeroes is limited to INT_MAX. So, updated functions all are
safe, they will not get "bytes" larger than before.
Still, let's look through all updated functions, and add assertions to
the ones which are actually unprepared to values larger than INT_MAX.
For these drivers also set explicit max_pwrite_zeroes limit.
Let's go:
blkdebug: calculations can't overflow, thanks to
bdrv_check_qiov_request() in generic layer. rule_check() and
bdrv_co_pwrite_zeroes() both have 64bit argument.
blklogwrites: pass to blk_log_writes_co_log() with 64bit argument.
blkreplay, copy-on-read, filter-compress: pass to
bdrv_co_pwrite_zeroes() which is OK
copy-before-write: Calls cbw_do_copy_before_write() and
bdrv_co_pwrite_zeroes, both have 64bit argument.
file-posix: both handler calls raw_do_pwrite_zeroes, which is updated.
In raw_do_pwrite_zeroes() calculations are OK due to
bdrv_check_qiov_request(), bytes go to RawPosixAIOData::aio_nbytes
which is uint64_t.
Check also where that uint64_t gets handed:
handle_aiocb_write_zeroes_block() passes a uint64_t[2] to
ioctl(BLKZEROOUT), handle_aiocb_write_zeroes() calls do_fallocate()
which takes off_t (and we compile to always have 64-bit off_t), as
does handle_aiocb_write_zeroes_unmap. All look safe.
gluster: bytes go to GlusterAIOCB::size which is int64_t and to
glfs_zerofill_async works with off_t.
iscsi: Aha, here we deal with iscsi_writesame16_task() that has
uint32_t num_blocks argument and iscsi_writesame16_task() has
uint16_t argument. Make comments, add assertions and clarify
max_pwrite_zeroes calculation.
iscsi_allocmap_() functions already has int64_t argument
is_byte_request_lun_aligned is simple to update, do it.
mirror_top: pass to bdrv_mirror_top_do_write which has uint64_t
argument
nbd: Aha, here we have protocol limitation, and NBDRequest::len is
uint32_t. max_pwrite_zeroes is cleanly set to 32bit value, so we are
OK for now.
nvme: Again, protocol limitation. And no inherent limit for
write-zeroes at all. But from code that calculates cdw12 it's obvious
that we do have limit and alignment. Let's clarify it. Also,
obviously the code is not prepared to handle bytes=0. Let's handle
this case too.
trace events already 64bit
preallocate: pass to handle_write() and bdrv_co_pwrite_zeroes(), both
64bit.
rbd: pass to qemu_rbd_start_co() which is 64bit.
qcow2: offset + bytes and alignment still works good (thanks to
bdrv_check_qiov_request()), so tail calculation is OK
qcow2_subcluster_zeroize() has 64bit argument, should be OK
trace events updated
qed: qed_co_request wants int nb_sectors. Also in code we have size_t
used for request length which may be 32bit. So, let's just keep
INT_MAX as a limit (aligning it down to pwrite_zeroes_alignment) and
don't care.
raw-format: Is OK. raw_adjust_offset and bdrv_co_pwrite_zeroes are both
64bit.
throttle: Both throttle_group_co_io_limits_intercept() and
bdrv_co_pwrite_zeroes() are 64bit.
vmdk: pass to vmdk_pwritev which is 64bit
quorum: pass to quorum_co_pwritev() which is 64bit
Hooray!
At this point all block drivers are prepared to support 64bit
write-zero requests, or have explicitly set max_pwrite_zeroes.
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Message-Id: <20210903102807.27127-8-vsementsov@virtuozzo.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
[eblake: use <= rather than < in assertions relying on max_pwrite_zeroes]
Signed-off-by: Eric Blake <eblake@redhat.com>
2021-09-03 13:28:03 +03:00
|
|
|
int64_t offset, int64_t bytes, BdrvRequestFlags flags)
|
2012-03-20 18:12:58 +04:00
|
|
|
{
|
|
|
|
int ret;
|
2015-09-07 18:12:56 +03:00
|
|
|
BDRVQcow2State *s = bs->opaque;
|
2012-03-20 18:12:58 +04:00
|
|
|
|
2020-07-10 19:13:10 +03:00
|
|
|
uint32_t head = offset_into_subcluster(s, offset);
|
|
|
|
uint32_t tail = ROUND_UP(offset + bytes, s->subcluster_size) -
|
|
|
|
(offset + bytes);
|
2016-05-11 10:00:14 +03:00
|
|
|
|
2017-06-09 13:18:08 +03:00
|
|
|
trace_qcow2_pwrite_zeroes_start_req(qemu_coroutine_self(), offset, bytes);
|
|
|
|
if (offset + bytes == bs->total_sectors * BDRV_SECTOR_SIZE) {
|
qcow2: Optimize write zero of unaligned tail cluster
We've already improved discards to operate efficiently on the tail
of an unaligned qcow2 image; it's time to make a similar improvement
to write zeroes. The special case is only valid at the tail
cluster of a file, where we must recognize that any sectors beyond
the image end would implicitly read as zero, and therefore should
not penalize our logic for widening a partial cluster into writing
the whole cluster as zero.
However, note that for now, the special case of end-of-file is only
recognized if there is no backing file, or if the backing file has
the same length; that's because when the backing file is shorter
than the active layer, we don't have code in place to recognize
that reads of a sector unallocated at the top and beyond the backing
end-of-file are implicitly zero. It's not much of a real loss,
because most people don't use images that aren't cluster-aligned,
or where the active layer is a different size than the backing
layer (especially where the difference falls within a single cluster).
Update test 154 to cover the new scenarios, using two images of
intentionally differing length.
While at it, fix the test to gracefully skip when run as
./check -qcow2 -o compat=0.10 154
since the older format lacks zero clusters already required earlier
in the test.
Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Message-id: 20170507000552.20847-11-eblake@redhat.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-05-07 03:05:50 +03:00
|
|
|
tail = 0;
|
|
|
|
}
|
2016-05-26 06:48:47 +03:00
|
|
|
|
2016-05-26 06:48:49 +03:00
|
|
|
if (head || tail) {
|
|
|
|
uint64_t off;
|
2016-05-31 17:13:07 +03:00
|
|
|
unsigned int nr;
|
2020-07-10 19:13:00 +03:00
|
|
|
QCow2SubclusterType type;
|
2016-05-11 10:00:14 +03:00
|
|
|
|
2020-07-10 19:13:10 +03:00
|
|
|
assert(head + bytes + tail <= s->subcluster_size);
|
2016-05-11 10:00:14 +03:00
|
|
|
|
2016-05-26 06:48:49 +03:00
|
|
|
/* check whether remainder of cluster already reads as zero */
|
2017-10-12 06:47:00 +03:00
|
|
|
if (!(is_zero(bs, offset - head, head) &&
|
2020-07-10 19:13:10 +03:00
|
|
|
is_zero(bs, offset + bytes, tail))) {
|
2016-05-11 10:00:14 +03:00
|
|
|
return -ENOTSUP;
|
|
|
|
}
|
|
|
|
|
|
|
|
qemu_co_mutex_lock(&s->lock);
|
|
|
|
/* We can have new write after previous check */
|
2020-07-10 19:13:10 +03:00
|
|
|
offset -= head;
|
|
|
|
bytes = s->subcluster_size;
|
|
|
|
nr = s->subcluster_size;
|
2020-07-10 19:12:59 +03:00
|
|
|
ret = qcow2_get_host_offset(bs, offset, &nr, &off, &type);
|
|
|
|
if (ret < 0 ||
|
2020-07-10 19:13:00 +03:00
|
|
|
(type != QCOW2_SUBCLUSTER_UNALLOCATED_PLAIN &&
|
2020-07-10 19:13:01 +03:00
|
|
|
type != QCOW2_SUBCLUSTER_UNALLOCATED_ALLOC &&
|
2020-07-10 19:13:00 +03:00
|
|
|
type != QCOW2_SUBCLUSTER_ZERO_PLAIN &&
|
|
|
|
type != QCOW2_SUBCLUSTER_ZERO_ALLOC)) {
|
2016-05-11 10:00:14 +03:00
|
|
|
qemu_co_mutex_unlock(&s->lock);
|
2020-09-09 15:37:39 +03:00
|
|
|
return ret < 0 ? ret : -ENOTSUP;
|
2016-05-11 10:00:14 +03:00
|
|
|
}
|
|
|
|
} else {
|
|
|
|
qemu_co_mutex_lock(&s->lock);
|
2012-03-20 18:12:58 +04:00
|
|
|
}
|
|
|
|
|
2017-06-09 13:18:08 +03:00
|
|
|
trace_qcow2_pwrite_zeroes(qemu_coroutine_self(), offset, bytes);
|
2016-05-26 06:48:47 +03:00
|
|
|
|
2020-07-10 19:13:10 +03:00
|
|
|
/* Whatever is left can use real zero subclusters */
|
|
|
|
ret = qcow2_subcluster_zeroize(bs, offset, bytes, flags);
|
2012-03-20 18:12:58 +04:00
|
|
|
qemu_co_mutex_unlock(&s->lock);
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2016-07-16 02:23:03 +03:00
|
|
|
static coroutine_fn int qcow2_co_pdiscard(BlockDriverState *bs,
|
block: use int64_t instead of int in driver discard handlers
We are generally moving to int64_t for both offset and bytes parameters
on all io paths.
Main motivation is realization of 64-bit write_zeroes operation for
fast zeroing large disk chunks, up to the whole disk.
We chose signed type, to be consistent with off_t (which is signed) and
with possibility for signed return type (where negative value means
error).
So, convert driver discard handlers bytes parameter to int64_t.
The only caller of all updated function is bdrv_co_pdiscard in
block/io.c. It is already prepared to work with 64bit requests, but
pass at most max(bs->bl.max_pdiscard, INT_MAX) to the driver.
Let's look at all updated functions:
blkdebug: all calculations are still OK, thanks to
bdrv_check_qiov_request().
both rule_check and bdrv_co_pdiscard are 64bit
blklogwrites: pass to blk_loc_writes_co_log which is 64bit
blkreplay, copy-on-read, filter-compress: pass to bdrv_co_pdiscard, OK
copy-before-write: pass to bdrv_co_pdiscard which is 64bit and to
cbw_do_copy_before_write which is 64bit
file-posix: one handler calls raw_account_discard() is 64bit and both
handlers calls raw_do_pdiscard(). Update raw_do_pdiscard, which pass
to RawPosixAIOData::aio_nbytes, which is 64bit (and calls
raw_account_discard())
gluster: somehow, third argument of glfs_discard_async is size_t.
Let's set max_pdiscard accordingly.
iscsi: iscsi_allocmap_set_invalid is 64bit,
!is_byte_request_lun_aligned is 64bit.
list.num is uint32_t. Let's clarify max_pdiscard and
pdiscard_alignment.
mirror_top: pass to bdrv_mirror_top_do_write() which is
64bit
nbd: protocol limitation. max_pdiscard is alredy set strict enough,
keep it as is for now.
nvme: buf.nlb is uint32_t and we do shift. So, add corresponding limits
to nvme_refresh_limits().
preallocate: pass to bdrv_co_pdiscard() which is 64bit.
rbd: pass to qemu_rbd_start_co() which is 64bit.
qcow2: calculations are still OK, thanks to bdrv_check_qiov_request(),
qcow2_cluster_discard() is 64bit.
raw-format: raw_adjust_offset() is 64bit, bdrv_co_pdiscard too.
throttle: pass to bdrv_co_pdiscard() which is 64bit and to
throttle_group_co_io_limits_intercept() which is 64bit as well.
test-block-iothread: bytes argument is unused
Great! Now all drivers are prepared to handle 64bit discard requests,
or else have explicit max_pdiscard limits.
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Message-Id: <20210903102807.27127-11-vsementsov@virtuozzo.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Eric Blake <eblake@redhat.com>
2021-09-03 13:28:06 +03:00
|
|
|
int64_t offset, int64_t bytes)
|
2011-01-26 18:56:48 +03:00
|
|
|
{
|
2011-10-20 15:16:25 +04:00
|
|
|
int ret;
|
2015-09-07 18:12:56 +03:00
|
|
|
BDRVQcow2State *s = bs->opaque;
|
2011-10-20 15:16:25 +04:00
|
|
|
|
2020-03-31 14:43:45 +03:00
|
|
|
/* If the image does not support QCOW_OFLAG_ZERO then discarding
|
|
|
|
* clusters could expose stale data from the backing file. */
|
|
|
|
if (s->qcow_version < 3 && bs->backing) {
|
|
|
|
return -ENOTSUP;
|
|
|
|
}
|
|
|
|
|
2017-06-09 13:18:08 +03:00
|
|
|
if (!QEMU_IS_ALIGNED(offset | bytes, s->cluster_size)) {
|
|
|
|
assert(bytes < s->cluster_size);
|
2017-04-07 04:37:09 +03:00
|
|
|
/* Ignore partial clusters, except for the special case of the
|
|
|
|
* complete partial cluster at the end of an unaligned file */
|
|
|
|
if (!QEMU_IS_ALIGNED(offset, s->cluster_size) ||
|
2017-06-09 13:18:08 +03:00
|
|
|
offset + bytes != bs->total_sectors * BDRV_SECTOR_SIZE) {
|
2017-04-07 04:37:09 +03:00
|
|
|
return -ENOTSUP;
|
|
|
|
}
|
2016-11-17 23:13:57 +03:00
|
|
|
}
|
|
|
|
|
2011-10-20 15:16:25 +04:00
|
|
|
qemu_co_mutex_lock(&s->lock);
|
2017-06-09 13:18:08 +03:00
|
|
|
ret = qcow2_cluster_discard(bs, offset, bytes, QCOW2_DISCARD_REQUEST,
|
qcow2: Discard/zero clusters by byte count
Passing a byte offset, but sector count, when we ultimately
want to operate on cluster granularity, is madness. Clean up
the external interfaces to take both offset and count as bytes,
while still keeping the assertion added previously that the
caller must align the values to a cluster. Then rename things
to make sure backports don't get confused by changed units:
instead of qcow2_discard_clusters() and qcow2_zero_clusters(),
we now have qcow2_cluster_discard() and qcow2_cluster_zeroize().
The internal functions still operate on clusters at a time, and
return an int for number of cleared clusters; but on an image
with 2M clusters, a single L2 table holds 256k entries that each
represent a 2M cluster, totalling well over INT_MAX bytes if we
ever had a request for that many bytes at once. All our callers
currently limit themselves to 32-bit bytes (and therefore fewer
clusters), but by making this function 64-bit clean, we have one
less place to clean up if we later improve the block layer to
support 64-bit bytes through all operations (with the block layer
auto-fragmenting on behalf of more-limited drivers), rather than
the current state where some interfaces are artificially limited
to INT_MAX at a time.
Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Message-id: 20170507000552.20847-13-eblake@redhat.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-05-07 03:05:52 +03:00
|
|
|
false);
|
2011-10-20 15:16:25 +04:00
|
|
|
qemu_co_mutex_unlock(&s->lock);
|
|
|
|
return ret;
|
2011-01-26 18:56:48 +03:00
|
|
|
}
|
|
|
|
|
2018-06-01 12:26:42 +03:00
|
|
|
static int coroutine_fn
|
|
|
|
qcow2_co_copy_range_from(BlockDriverState *bs,
|
2021-09-03 13:28:01 +03:00
|
|
|
BdrvChild *src, int64_t src_offset,
|
|
|
|
BdrvChild *dst, int64_t dst_offset,
|
|
|
|
int64_t bytes, BdrvRequestFlags read_flags,
|
2018-07-09 19:37:17 +03:00
|
|
|
BdrvRequestFlags write_flags)
|
2018-06-01 12:26:42 +03:00
|
|
|
{
|
|
|
|
BDRVQcow2State *s = bs->opaque;
|
|
|
|
int ret;
|
|
|
|
unsigned int cur_bytes; /* number of bytes in current iteration */
|
|
|
|
BdrvChild *child = NULL;
|
2018-07-09 19:37:17 +03:00
|
|
|
BdrvRequestFlags cur_write_flags;
|
2018-06-01 12:26:42 +03:00
|
|
|
|
|
|
|
assert(!bs->encrypted);
|
|
|
|
qemu_co_mutex_lock(&s->lock);
|
|
|
|
|
|
|
|
while (bytes != 0) {
|
|
|
|
uint64_t copy_offset = 0;
|
2020-07-10 19:13:00 +03:00
|
|
|
QCow2SubclusterType type;
|
2018-06-01 12:26:42 +03:00
|
|
|
/* prepare next request */
|
|
|
|
cur_bytes = MIN(bytes, INT_MAX);
|
2018-07-09 19:37:17 +03:00
|
|
|
cur_write_flags = write_flags;
|
2018-06-01 12:26:42 +03:00
|
|
|
|
2020-07-10 19:12:59 +03:00
|
|
|
ret = qcow2_get_host_offset(bs, src_offset, &cur_bytes,
|
|
|
|
©_offset, &type);
|
2018-06-01 12:26:42 +03:00
|
|
|
if (ret < 0) {
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
2020-07-10 19:12:59 +03:00
|
|
|
switch (type) {
|
2020-07-10 19:13:00 +03:00
|
|
|
case QCOW2_SUBCLUSTER_UNALLOCATED_PLAIN:
|
2020-07-10 19:13:01 +03:00
|
|
|
case QCOW2_SUBCLUSTER_UNALLOCATED_ALLOC:
|
2018-06-01 12:26:42 +03:00
|
|
|
if (bs->backing && bs->backing->bs) {
|
|
|
|
int64_t backing_length = bdrv_getlength(bs->backing->bs);
|
|
|
|
if (src_offset >= backing_length) {
|
2018-07-09 19:37:17 +03:00
|
|
|
cur_write_flags |= BDRV_REQ_ZERO_WRITE;
|
2018-06-01 12:26:42 +03:00
|
|
|
} else {
|
|
|
|
child = bs->backing;
|
|
|
|
cur_bytes = MIN(cur_bytes, backing_length - src_offset);
|
|
|
|
copy_offset = src_offset;
|
|
|
|
}
|
|
|
|
} else {
|
2018-07-09 19:37:17 +03:00
|
|
|
cur_write_flags |= BDRV_REQ_ZERO_WRITE;
|
2018-06-01 12:26:42 +03:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
2020-07-10 19:13:00 +03:00
|
|
|
case QCOW2_SUBCLUSTER_ZERO_PLAIN:
|
|
|
|
case QCOW2_SUBCLUSTER_ZERO_ALLOC:
|
2018-07-09 19:37:17 +03:00
|
|
|
cur_write_flags |= BDRV_REQ_ZERO_WRITE;
|
2018-06-01 12:26:42 +03:00
|
|
|
break;
|
|
|
|
|
2020-07-10 19:13:00 +03:00
|
|
|
case QCOW2_SUBCLUSTER_COMPRESSED:
|
2018-06-01 12:26:42 +03:00
|
|
|
ret = -ENOTSUP;
|
|
|
|
goto out;
|
|
|
|
|
2020-07-10 19:13:00 +03:00
|
|
|
case QCOW2_SUBCLUSTER_NORMAL:
|
2019-01-15 22:39:06 +03:00
|
|
|
child = s->data_file;
|
2018-06-01 12:26:42 +03:00
|
|
|
break;
|
|
|
|
|
|
|
|
default:
|
|
|
|
abort();
|
|
|
|
}
|
|
|
|
qemu_co_mutex_unlock(&s->lock);
|
|
|
|
ret = bdrv_co_copy_range_from(child,
|
|
|
|
copy_offset,
|
|
|
|
dst, dst_offset,
|
2018-07-09 19:37:17 +03:00
|
|
|
cur_bytes, read_flags, cur_write_flags);
|
2018-06-01 12:26:42 +03:00
|
|
|
qemu_co_mutex_lock(&s->lock);
|
|
|
|
if (ret < 0) {
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
bytes -= cur_bytes;
|
|
|
|
src_offset += cur_bytes;
|
|
|
|
dst_offset += cur_bytes;
|
|
|
|
}
|
|
|
|
ret = 0;
|
|
|
|
|
|
|
|
out:
|
|
|
|
qemu_co_mutex_unlock(&s->lock);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int coroutine_fn
|
|
|
|
qcow2_co_copy_range_to(BlockDriverState *bs,
|
2021-09-03 13:28:01 +03:00
|
|
|
BdrvChild *src, int64_t src_offset,
|
|
|
|
BdrvChild *dst, int64_t dst_offset,
|
|
|
|
int64_t bytes, BdrvRequestFlags read_flags,
|
2018-07-09 19:37:17 +03:00
|
|
|
BdrvRequestFlags write_flags)
|
2018-06-01 12:26:42 +03:00
|
|
|
{
|
|
|
|
BDRVQcow2State *s = bs->opaque;
|
|
|
|
int ret;
|
|
|
|
unsigned int cur_bytes; /* number of sectors in current iteration */
|
2020-09-11 17:09:42 +03:00
|
|
|
uint64_t host_offset;
|
2018-06-01 12:26:42 +03:00
|
|
|
QCowL2Meta *l2meta = NULL;
|
|
|
|
|
|
|
|
assert(!bs->encrypted);
|
|
|
|
|
|
|
|
qemu_co_mutex_lock(&s->lock);
|
|
|
|
|
|
|
|
while (bytes != 0) {
|
|
|
|
|
|
|
|
l2meta = NULL;
|
|
|
|
|
|
|
|
cur_bytes = MIN(bytes, INT_MAX);
|
|
|
|
|
|
|
|
/* TODO:
|
|
|
|
* If src->bs == dst->bs, we could simply copy by incrementing
|
|
|
|
* the refcnt, without copying user data.
|
|
|
|
* Or if src->bs == dst->bs->backing->bs, we could copy by discarding. */
|
2020-09-11 17:09:42 +03:00
|
|
|
ret = qcow2_alloc_host_offset(bs, dst_offset, &cur_bytes,
|
|
|
|
&host_offset, &l2meta);
|
2018-06-01 12:26:42 +03:00
|
|
|
if (ret < 0) {
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
|
2020-09-11 17:09:42 +03:00
|
|
|
ret = qcow2_pre_write_overlap_check(bs, 0, host_offset, cur_bytes,
|
|
|
|
true);
|
2018-06-01 12:26:42 +03:00
|
|
|
if (ret < 0) {
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
|
|
|
|
qemu_co_mutex_unlock(&s->lock);
|
2020-09-11 17:09:42 +03:00
|
|
|
ret = bdrv_co_copy_range_to(src, src_offset, s->data_file, host_offset,
|
2018-07-09 19:37:17 +03:00
|
|
|
cur_bytes, read_flags, write_flags);
|
2018-06-01 12:26:42 +03:00
|
|
|
qemu_co_mutex_lock(&s->lock);
|
|
|
|
if (ret < 0) {
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
|
|
|
|
ret = qcow2_handle_l2meta(bs, &l2meta, true);
|
|
|
|
if (ret) {
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
|
|
|
|
bytes -= cur_bytes;
|
2018-06-29 09:03:26 +03:00
|
|
|
src_offset += cur_bytes;
|
2018-06-01 12:26:42 +03:00
|
|
|
dst_offset += cur_bytes;
|
|
|
|
}
|
|
|
|
ret = 0;
|
|
|
|
|
|
|
|
fail:
|
|
|
|
qcow2_handle_l2meta(bs, &l2meta, false);
|
|
|
|
|
|
|
|
qemu_co_mutex_unlock(&s->lock);
|
|
|
|
|
|
|
|
trace_qcow2_writev_done_req(qemu_coroutine_self(), ret);
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
block: Convert .bdrv_truncate callback to coroutine_fn
bdrv_truncate() is an operation that can block (even for a quite long
time, depending on the PreallocMode) in I/O paths that shouldn't block.
Convert it to a coroutine_fn so that we have the infrastructure for
drivers to make their .bdrv_co_truncate implementation asynchronous.
This change could potentially introduce new race conditions because
bdrv_truncate() isn't necessarily executed atomically any more. Whether
this is a problem needs to be evaluated for each block driver that
supports truncate:
* file-posix/win32, gluster, iscsi, nfs, rbd, ssh, sheepdog: The
protocol drivers are trivially safe because they don't actually yield
yet, so there is no change in behaviour.
* copy-on-read, crypto, raw-format: Essentially just filter drivers that
pass the request to a child node, no problem.
* qcow2: The implementation modifies metadata, so it needs to hold
s->lock to be safe with concurrent I/O requests. In order to avoid
double locking, this requires pulling the locking out into
preallocate_co() and using qcow2_write_caches() instead of
bdrv_flush().
* qed: Does a single header update, this is fine without locking.
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2018-06-21 18:54:35 +03:00
|
|
|
static int coroutine_fn qcow2_co_truncate(BlockDriverState *bs, int64_t offset,
|
2019-09-18 12:51:40 +03:00
|
|
|
bool exact, PreallocMode prealloc,
|
2020-04-24 15:54:39 +03:00
|
|
|
BdrvRequestFlags flags, Error **errp)
|
2010-04-28 14:36:11 +04:00
|
|
|
{
|
2015-09-07 18:12:56 +03:00
|
|
|
BDRVQcow2State *s = bs->opaque;
|
2017-06-13 23:21:02 +03:00
|
|
|
uint64_t old_length;
|
2013-05-14 18:14:33 +04:00
|
|
|
int64_t new_l1_size;
|
|
|
|
int ret;
|
2018-09-26 19:04:45 +03:00
|
|
|
QDict *options;
|
2010-04-28 14:36:11 +04:00
|
|
|
|
2017-06-13 23:21:05 +03:00
|
|
|
if (prealloc != PREALLOC_MODE_OFF && prealloc != PREALLOC_MODE_METADATA &&
|
|
|
|
prealloc != PREALLOC_MODE_FALLOC && prealloc != PREALLOC_MODE_FULL)
|
|
|
|
{
|
2017-06-13 23:20:52 +03:00
|
|
|
error_setg(errp, "Unsupported preallocation mode '%s'",
|
2017-08-24 11:46:08 +03:00
|
|
|
PreallocMode_str(prealloc));
|
2017-06-13 23:20:52 +03:00
|
|
|
return -ENOTSUP;
|
|
|
|
}
|
|
|
|
|
2020-01-18 22:09:30 +03:00
|
|
|
if (!QEMU_IS_ALIGNED(offset, BDRV_SECTOR_SIZE)) {
|
|
|
|
error_setg(errp, "The new size must be a multiple of %u",
|
|
|
|
(unsigned) BDRV_SECTOR_SIZE);
|
2010-04-28 14:36:11 +04:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
block: Convert .bdrv_truncate callback to coroutine_fn
bdrv_truncate() is an operation that can block (even for a quite long
time, depending on the PreallocMode) in I/O paths that shouldn't block.
Convert it to a coroutine_fn so that we have the infrastructure for
drivers to make their .bdrv_co_truncate implementation asynchronous.
This change could potentially introduce new race conditions because
bdrv_truncate() isn't necessarily executed atomically any more. Whether
this is a problem needs to be evaluated for each block driver that
supports truncate:
* file-posix/win32, gluster, iscsi, nfs, rbd, ssh, sheepdog: The
protocol drivers are trivially safe because they don't actually yield
yet, so there is no change in behaviour.
* copy-on-read, crypto, raw-format: Essentially just filter drivers that
pass the request to a child node, no problem.
* qcow2: The implementation modifies metadata, so it needs to hold
s->lock to be safe with concurrent I/O requests. In order to avoid
double locking, this requires pulling the locking out into
preallocate_co() and using qcow2_write_caches() instead of
bdrv_flush().
* qed: Does a single header update, this is fine without locking.
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2018-06-21 18:54:35 +03:00
|
|
|
qemu_co_mutex_lock(&s->lock);
|
|
|
|
|
qcow2: Allow resize of images with internal snapshots
We originally refused to allow resize of images with internal
snapshots because the v2 image format did not require the tracking of
snapshot size, making it impossible to safely revert to a snapshot
with a different size than the current view of the image. But the
snapshot size tracking was rectified in v3, and our recent fixes to
qemu-img amend (see 0a85af35) guarantee that we always have a valid
snapshot size. Thus, we no longer need to artificially limit image
resizes, but it does become one more thing that would prevent a
downgrade back to v2. And now that we support different-sized
snapshots, it's also easy to fix reverting to a snapshot to apply the
new size.
Upgrade iotest 61 to cover this (we previously had NO coverage of
refusal to resize while snapshots exist). Note that the amend process
can fail but still have effects: in particular, since we break things
into upgrade, resize, downgrade, a failure during resize does not roll
back changes made during upgrade, nor does failure in downgrade roll
back a resize. But this situation is pre-existing even without this
patch; and without journaling, the best we could do is minimize the
chance of partial failure by collecting all changes prior to doing any
writes - which adds a lot of complexity but could still fail with EIO.
On the other hand, we are careful that even if we have partial
modification but then fail, the image is left viable (that is, we are
careful to sequence things so that after each successful cluster
write, there may be transient leaked clusters but no corrupt
metadata). And complicating the code to make it more transaction-like
is not worth the effort: a user can always request multiple 'qemu-img
amend' changing one thing each, if they need finer-grained control
over detecting the first failure than what they get by letting qemu
decide how to sequence multiple changes.
Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Message-Id: <20200428192648.749066-3-eblake@redhat.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2020-04-28 22:26:47 +03:00
|
|
|
/*
|
|
|
|
* Even though we store snapshot size for all images, it was not
|
|
|
|
* required until v3, so it is not safe to proceed for v2.
|
|
|
|
*/
|
|
|
|
if (s->nb_snapshots && s->qcow_version < 3) {
|
|
|
|
error_setg(errp, "Can't resize a v2 image which has snapshots");
|
block: Convert .bdrv_truncate callback to coroutine_fn
bdrv_truncate() is an operation that can block (even for a quite long
time, depending on the PreallocMode) in I/O paths that shouldn't block.
Convert it to a coroutine_fn so that we have the infrastructure for
drivers to make their .bdrv_co_truncate implementation asynchronous.
This change could potentially introduce new race conditions because
bdrv_truncate() isn't necessarily executed atomically any more. Whether
this is a problem needs to be evaluated for each block driver that
supports truncate:
* file-posix/win32, gluster, iscsi, nfs, rbd, ssh, sheepdog: The
protocol drivers are trivially safe because they don't actually yield
yet, so there is no change in behaviour.
* copy-on-read, crypto, raw-format: Essentially just filter drivers that
pass the request to a child node, no problem.
* qcow2: The implementation modifies metadata, so it needs to hold
s->lock to be safe with concurrent I/O requests. In order to avoid
double locking, this requires pulling the locking out into
preallocate_co() and using qcow2_write_caches() instead of
bdrv_flush().
* qed: Does a single header update, this is fine without locking.
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2018-06-21 18:54:35 +03:00
|
|
|
ret = -ENOTSUP;
|
|
|
|
goto fail;
|
2010-04-28 14:36:11 +04:00
|
|
|
}
|
|
|
|
|
2020-04-28 22:26:48 +03:00
|
|
|
/* See qcow2-bitmap.c for which bitmap scenarios prevent a resize. */
|
2019-03-11 21:51:46 +03:00
|
|
|
if (qcow2_truncate_bitmaps_check(bs, errp)) {
|
block: Convert .bdrv_truncate callback to coroutine_fn
bdrv_truncate() is an operation that can block (even for a quite long
time, depending on the PreallocMode) in I/O paths that shouldn't block.
Convert it to a coroutine_fn so that we have the infrastructure for
drivers to make their .bdrv_co_truncate implementation asynchronous.
This change could potentially introduce new race conditions because
bdrv_truncate() isn't necessarily executed atomically any more. Whether
this is a problem needs to be evaluated for each block driver that
supports truncate:
* file-posix/win32, gluster, iscsi, nfs, rbd, ssh, sheepdog: The
protocol drivers are trivially safe because they don't actually yield
yet, so there is no change in behaviour.
* copy-on-read, crypto, raw-format: Essentially just filter drivers that
pass the request to a child node, no problem.
* qcow2: The implementation modifies metadata, so it needs to hold
s->lock to be safe with concurrent I/O requests. In order to avoid
double locking, this requires pulling the locking out into
preallocate_co() and using qcow2_write_caches() instead of
bdrv_flush().
* qed: Does a single header update, this is fine without locking.
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2018-06-21 18:54:35 +03:00
|
|
|
ret = -ENOTSUP;
|
|
|
|
goto fail;
|
2017-06-28 15:05:08 +03:00
|
|
|
}
|
|
|
|
|
2018-09-26 19:04:47 +03:00
|
|
|
old_length = bs->total_sectors * BDRV_SECTOR_SIZE;
|
2017-09-18 15:42:29 +03:00
|
|
|
new_l1_size = size_to_l1(s, offset);
|
2017-06-13 23:21:02 +03:00
|
|
|
|
|
|
|
if (offset < old_length) {
|
2017-09-29 15:16:13 +03:00
|
|
|
int64_t last_cluster, old_file_size;
|
2017-09-18 15:42:29 +03:00
|
|
|
if (prealloc != PREALLOC_MODE_OFF) {
|
|
|
|
error_setg(errp,
|
|
|
|
"Preallocation can't be used for shrinking an image");
|
block: Convert .bdrv_truncate callback to coroutine_fn
bdrv_truncate() is an operation that can block (even for a quite long
time, depending on the PreallocMode) in I/O paths that shouldn't block.
Convert it to a coroutine_fn so that we have the infrastructure for
drivers to make their .bdrv_co_truncate implementation asynchronous.
This change could potentially introduce new race conditions because
bdrv_truncate() isn't necessarily executed atomically any more. Whether
this is a problem needs to be evaluated for each block driver that
supports truncate:
* file-posix/win32, gluster, iscsi, nfs, rbd, ssh, sheepdog: The
protocol drivers are trivially safe because they don't actually yield
yet, so there is no change in behaviour.
* copy-on-read, crypto, raw-format: Essentially just filter drivers that
pass the request to a child node, no problem.
* qcow2: The implementation modifies metadata, so it needs to hold
s->lock to be safe with concurrent I/O requests. In order to avoid
double locking, this requires pulling the locking out into
preallocate_co() and using qcow2_write_caches() instead of
bdrv_flush().
* qed: Does a single header update, this is fine without locking.
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2018-06-21 18:54:35 +03:00
|
|
|
ret = -EINVAL;
|
|
|
|
goto fail;
|
2017-09-18 15:42:29 +03:00
|
|
|
}
|
2010-04-28 14:36:11 +04:00
|
|
|
|
2017-09-18 15:42:29 +03:00
|
|
|
ret = qcow2_cluster_discard(bs, ROUND_UP(offset, s->cluster_size),
|
|
|
|
old_length - ROUND_UP(offset,
|
|
|
|
s->cluster_size),
|
|
|
|
QCOW2_DISCARD_ALWAYS, true);
|
|
|
|
if (ret < 0) {
|
|
|
|
error_setg_errno(errp, -ret, "Failed to discard cropped clusters");
|
block: Convert .bdrv_truncate callback to coroutine_fn
bdrv_truncate() is an operation that can block (even for a quite long
time, depending on the PreallocMode) in I/O paths that shouldn't block.
Convert it to a coroutine_fn so that we have the infrastructure for
drivers to make their .bdrv_co_truncate implementation asynchronous.
This change could potentially introduce new race conditions because
bdrv_truncate() isn't necessarily executed atomically any more. Whether
this is a problem needs to be evaluated for each block driver that
supports truncate:
* file-posix/win32, gluster, iscsi, nfs, rbd, ssh, sheepdog: The
protocol drivers are trivially safe because they don't actually yield
yet, so there is no change in behaviour.
* copy-on-read, crypto, raw-format: Essentially just filter drivers that
pass the request to a child node, no problem.
* qcow2: The implementation modifies metadata, so it needs to hold
s->lock to be safe with concurrent I/O requests. In order to avoid
double locking, this requires pulling the locking out into
preallocate_co() and using qcow2_write_caches() instead of
bdrv_flush().
* qed: Does a single header update, this is fine without locking.
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2018-06-21 18:54:35 +03:00
|
|
|
goto fail;
|
2017-09-18 15:42:29 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
ret = qcow2_shrink_l1_table(bs, new_l1_size);
|
|
|
|
if (ret < 0) {
|
|
|
|
error_setg_errno(errp, -ret,
|
|
|
|
"Failed to reduce the number of L2 tables");
|
block: Convert .bdrv_truncate callback to coroutine_fn
bdrv_truncate() is an operation that can block (even for a quite long
time, depending on the PreallocMode) in I/O paths that shouldn't block.
Convert it to a coroutine_fn so that we have the infrastructure for
drivers to make their .bdrv_co_truncate implementation asynchronous.
This change could potentially introduce new race conditions because
bdrv_truncate() isn't necessarily executed atomically any more. Whether
this is a problem needs to be evaluated for each block driver that
supports truncate:
* file-posix/win32, gluster, iscsi, nfs, rbd, ssh, sheepdog: The
protocol drivers are trivially safe because they don't actually yield
yet, so there is no change in behaviour.
* copy-on-read, crypto, raw-format: Essentially just filter drivers that
pass the request to a child node, no problem.
* qcow2: The implementation modifies metadata, so it needs to hold
s->lock to be safe with concurrent I/O requests. In order to avoid
double locking, this requires pulling the locking out into
preallocate_co() and using qcow2_write_caches() instead of
bdrv_flush().
* qed: Does a single header update, this is fine without locking.
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2018-06-21 18:54:35 +03:00
|
|
|
goto fail;
|
2017-09-18 15:42:29 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
ret = qcow2_shrink_reftable(bs);
|
|
|
|
if (ret < 0) {
|
|
|
|
error_setg_errno(errp, -ret,
|
|
|
|
"Failed to discard unused refblocks");
|
block: Convert .bdrv_truncate callback to coroutine_fn
bdrv_truncate() is an operation that can block (even for a quite long
time, depending on the PreallocMode) in I/O paths that shouldn't block.
Convert it to a coroutine_fn so that we have the infrastructure for
drivers to make their .bdrv_co_truncate implementation asynchronous.
This change could potentially introduce new race conditions because
bdrv_truncate() isn't necessarily executed atomically any more. Whether
this is a problem needs to be evaluated for each block driver that
supports truncate:
* file-posix/win32, gluster, iscsi, nfs, rbd, ssh, sheepdog: The
protocol drivers are trivially safe because they don't actually yield
yet, so there is no change in behaviour.
* copy-on-read, crypto, raw-format: Essentially just filter drivers that
pass the request to a child node, no problem.
* qcow2: The implementation modifies metadata, so it needs to hold
s->lock to be safe with concurrent I/O requests. In order to avoid
double locking, this requires pulling the locking out into
preallocate_co() and using qcow2_write_caches() instead of
bdrv_flush().
* qed: Does a single header update, this is fine without locking.
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2018-06-21 18:54:35 +03:00
|
|
|
goto fail;
|
2017-09-18 15:42:29 +03:00
|
|
|
}
|
2017-09-29 15:16:13 +03:00
|
|
|
|
|
|
|
old_file_size = bdrv_getlength(bs->file->bs);
|
|
|
|
if (old_file_size < 0) {
|
|
|
|
error_setg_errno(errp, -old_file_size,
|
|
|
|
"Failed to inquire current file length");
|
block: Convert .bdrv_truncate callback to coroutine_fn
bdrv_truncate() is an operation that can block (even for a quite long
time, depending on the PreallocMode) in I/O paths that shouldn't block.
Convert it to a coroutine_fn so that we have the infrastructure for
drivers to make their .bdrv_co_truncate implementation asynchronous.
This change could potentially introduce new race conditions because
bdrv_truncate() isn't necessarily executed atomically any more. Whether
this is a problem needs to be evaluated for each block driver that
supports truncate:
* file-posix/win32, gluster, iscsi, nfs, rbd, ssh, sheepdog: The
protocol drivers are trivially safe because they don't actually yield
yet, so there is no change in behaviour.
* copy-on-read, crypto, raw-format: Essentially just filter drivers that
pass the request to a child node, no problem.
* qcow2: The implementation modifies metadata, so it needs to hold
s->lock to be safe with concurrent I/O requests. In order to avoid
double locking, this requires pulling the locking out into
preallocate_co() and using qcow2_write_caches() instead of
bdrv_flush().
* qed: Does a single header update, this is fine without locking.
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2018-06-21 18:54:35 +03:00
|
|
|
ret = old_file_size;
|
|
|
|
goto fail;
|
2017-09-29 15:16:13 +03:00
|
|
|
}
|
|
|
|
last_cluster = qcow2_get_last_cluster(bs, old_file_size);
|
|
|
|
if (last_cluster < 0) {
|
|
|
|
error_setg_errno(errp, -last_cluster,
|
|
|
|
"Failed to find the last cluster");
|
block: Convert .bdrv_truncate callback to coroutine_fn
bdrv_truncate() is an operation that can block (even for a quite long
time, depending on the PreallocMode) in I/O paths that shouldn't block.
Convert it to a coroutine_fn so that we have the infrastructure for
drivers to make their .bdrv_co_truncate implementation asynchronous.
This change could potentially introduce new race conditions because
bdrv_truncate() isn't necessarily executed atomically any more. Whether
this is a problem needs to be evaluated for each block driver that
supports truncate:
* file-posix/win32, gluster, iscsi, nfs, rbd, ssh, sheepdog: The
protocol drivers are trivially safe because they don't actually yield
yet, so there is no change in behaviour.
* copy-on-read, crypto, raw-format: Essentially just filter drivers that
pass the request to a child node, no problem.
* qcow2: The implementation modifies metadata, so it needs to hold
s->lock to be safe with concurrent I/O requests. In order to avoid
double locking, this requires pulling the locking out into
preallocate_co() and using qcow2_write_caches() instead of
bdrv_flush().
* qed: Does a single header update, this is fine without locking.
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2018-06-21 18:54:35 +03:00
|
|
|
ret = last_cluster;
|
|
|
|
goto fail;
|
2017-09-29 15:16:13 +03:00
|
|
|
}
|
|
|
|
if ((last_cluster + 1) * s->cluster_size < old_file_size) {
|
2017-10-09 18:54:31 +03:00
|
|
|
Error *local_err = NULL;
|
|
|
|
|
2019-09-18 12:51:42 +03:00
|
|
|
/*
|
|
|
|
* Do not pass @exact here: It will not help the user if
|
|
|
|
* we get an error here just because they wanted to shrink
|
|
|
|
* their qcow2 image (on a block device) with qemu-img.
|
|
|
|
* (And on the qcow2 layer, the @exact requirement is
|
|
|
|
* always fulfilled, so there is no need to pass it on.)
|
|
|
|
*/
|
block: Convert .bdrv_truncate callback to coroutine_fn
bdrv_truncate() is an operation that can block (even for a quite long
time, depending on the PreallocMode) in I/O paths that shouldn't block.
Convert it to a coroutine_fn so that we have the infrastructure for
drivers to make their .bdrv_co_truncate implementation asynchronous.
This change could potentially introduce new race conditions because
bdrv_truncate() isn't necessarily executed atomically any more. Whether
this is a problem needs to be evaluated for each block driver that
supports truncate:
* file-posix/win32, gluster, iscsi, nfs, rbd, ssh, sheepdog: The
protocol drivers are trivially safe because they don't actually yield
yet, so there is no change in behaviour.
* copy-on-read, crypto, raw-format: Essentially just filter drivers that
pass the request to a child node, no problem.
* qcow2: The implementation modifies metadata, so it needs to hold
s->lock to be safe with concurrent I/O requests. In order to avoid
double locking, this requires pulling the locking out into
preallocate_co() and using qcow2_write_caches() instead of
bdrv_flush().
* qed: Does a single header update, this is fine without locking.
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2018-06-21 18:54:35 +03:00
|
|
|
bdrv_co_truncate(bs->file, (last_cluster + 1) * s->cluster_size,
|
2020-04-24 15:54:40 +03:00
|
|
|
false, PREALLOC_MODE_OFF, 0, &local_err);
|
2017-10-09 18:54:31 +03:00
|
|
|
if (local_err) {
|
|
|
|
warn_reportf_err(local_err,
|
|
|
|
"Failed to truncate the tail of the image: ");
|
2017-09-29 15:16:13 +03:00
|
|
|
}
|
|
|
|
}
|
2017-09-18 15:42:29 +03:00
|
|
|
} else {
|
|
|
|
ret = qcow2_grow_l1_table(bs, new_l1_size, true);
|
|
|
|
if (ret < 0) {
|
|
|
|
error_setg_errno(errp, -ret, "Failed to grow the L1 table");
|
block: Convert .bdrv_truncate callback to coroutine_fn
bdrv_truncate() is an operation that can block (even for a quite long
time, depending on the PreallocMode) in I/O paths that shouldn't block.
Convert it to a coroutine_fn so that we have the infrastructure for
drivers to make their .bdrv_co_truncate implementation asynchronous.
This change could potentially introduce new race conditions because
bdrv_truncate() isn't necessarily executed atomically any more. Whether
this is a problem needs to be evaluated for each block driver that
supports truncate:
* file-posix/win32, gluster, iscsi, nfs, rbd, ssh, sheepdog: The
protocol drivers are trivially safe because they don't actually yield
yet, so there is no change in behaviour.
* copy-on-read, crypto, raw-format: Essentially just filter drivers that
pass the request to a child node, no problem.
* qcow2: The implementation modifies metadata, so it needs to hold
s->lock to be safe with concurrent I/O requests. In order to avoid
double locking, this requires pulling the locking out into
preallocate_co() and using qcow2_write_caches() instead of
bdrv_flush().
* qed: Does a single header update, this is fine without locking.
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2018-06-21 18:54:35 +03:00
|
|
|
goto fail;
|
2017-09-18 15:42:29 +03:00
|
|
|
}
|
2021-03-26 17:55:08 +03:00
|
|
|
|
|
|
|
if (data_file_is_raw(bs) && prealloc == PREALLOC_MODE_OFF) {
|
|
|
|
/*
|
|
|
|
* When creating a qcow2 image with data-file-raw, we enforce
|
|
|
|
* at least prealloc=metadata, so that the L1/L2 tables are
|
|
|
|
* fully allocated and reading from the data file will return
|
|
|
|
* the same data as reading from the qcow2 image. When the
|
|
|
|
* image is grown, we must consequently preallocate the
|
|
|
|
* metadata structures to cover the added area.
|
|
|
|
*/
|
|
|
|
prealloc = PREALLOC_MODE_METADATA;
|
|
|
|
}
|
2010-04-28 14:36:11 +04:00
|
|
|
}
|
|
|
|
|
2017-06-13 23:21:02 +03:00
|
|
|
switch (prealloc) {
|
|
|
|
case PREALLOC_MODE_OFF:
|
2019-04-15 17:34:30 +03:00
|
|
|
if (has_data_file(bs)) {
|
2019-09-18 12:51:42 +03:00
|
|
|
/*
|
|
|
|
* If the caller wants an exact resize, the external data
|
|
|
|
* file should be resized to the exact target size, too,
|
|
|
|
* so we pass @exact here.
|
|
|
|
*/
|
2020-04-24 15:54:40 +03:00
|
|
|
ret = bdrv_co_truncate(s->data_file, offset, exact, prealloc, 0,
|
|
|
|
errp);
|
2019-04-15 17:34:30 +03:00
|
|
|
if (ret < 0) {
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
}
|
2017-06-13 23:21:02 +03:00
|
|
|
break;
|
|
|
|
|
|
|
|
case PREALLOC_MODE_METADATA:
|
2019-04-15 17:34:30 +03:00
|
|
|
ret = preallocate_co(bs, old_length, offset, prealloc, errp);
|
2017-06-13 23:21:02 +03:00
|
|
|
if (ret < 0) {
|
block: Convert .bdrv_truncate callback to coroutine_fn
bdrv_truncate() is an operation that can block (even for a quite long
time, depending on the PreallocMode) in I/O paths that shouldn't block.
Convert it to a coroutine_fn so that we have the infrastructure for
drivers to make their .bdrv_co_truncate implementation asynchronous.
This change could potentially introduce new race conditions because
bdrv_truncate() isn't necessarily executed atomically any more. Whether
this is a problem needs to be evaluated for each block driver that
supports truncate:
* file-posix/win32, gluster, iscsi, nfs, rbd, ssh, sheepdog: The
protocol drivers are trivially safe because they don't actually yield
yet, so there is no change in behaviour.
* copy-on-read, crypto, raw-format: Essentially just filter drivers that
pass the request to a child node, no problem.
* qcow2: The implementation modifies metadata, so it needs to hold
s->lock to be safe with concurrent I/O requests. In order to avoid
double locking, this requires pulling the locking out into
preallocate_co() and using qcow2_write_caches() instead of
bdrv_flush().
* qed: Does a single header update, this is fine without locking.
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2018-06-21 18:54:35 +03:00
|
|
|
goto fail;
|
2017-06-13 23:21:02 +03:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
2017-06-13 23:21:05 +03:00
|
|
|
case PREALLOC_MODE_FALLOC:
|
|
|
|
case PREALLOC_MODE_FULL:
|
|
|
|
{
|
|
|
|
int64_t allocation_start, host_offset, guest_offset;
|
|
|
|
int64_t clusters_allocated;
|
qcow2: Fix preallocation on block devices
Calling bdrv_getlength() to get the pre-truncate file size will not
really work on block devices, because they have always the same length,
and trying to write beyond it will fail with a rather cryptic error
message.
Instead, we should use qcow2_get_last_cluster() and bdrv_getlength()
only as a fallback.
Before this patch:
$ truncate -s 1G test.img
$ sudo losetup -f --show test.img
/dev/loop0
$ sudo qemu-img create -f qcow2 -o preallocation=full /dev/loop0 64M
Formatting '/dev/loop0', fmt=qcow2 size=67108864 cluster_size=65536
preallocation=full lazy_refcounts=off refcount_bits=16
qemu-img: /dev/loop0: Could not resize image: Failed to resize refcount
structures: No space left on device
With this patch:
$ sudo qemu-img create -f qcow2 -o preallocation=full /dev/loop0 64M
Formatting '/dev/loop0', fmt=qcow2 size=67108864 cluster_size=65536
preallocation=full lazy_refcounts=off refcount_bits=16
qemu-img: /dev/loop0: Could not resize image: Failed to resize
underlying file: Preallocation mode 'full' unsupported for this
non-regular file
So as you can see, it still fails, but now the problem is missing
support on the block device level, so we at least get a better error
message.
Note that we cannot preallocate block devices on truncate by design,
because we do not know what area to preallocate. Their length is always
the same, the truncate operation does not change it.
Signed-off-by: Max Reitz <mreitz@redhat.com>
Message-Id: <20200505141801.1096763-1-mreitz@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2020-05-05 17:18:01 +03:00
|
|
|
int64_t old_file_size, last_cluster, new_file_size;
|
2017-06-13 23:21:05 +03:00
|
|
|
uint64_t nb_new_data_clusters, nb_new_l2_tables;
|
2020-07-10 19:13:12 +03:00
|
|
|
bool subclusters_need_allocation = false;
|
2017-06-13 23:21:05 +03:00
|
|
|
|
2019-01-15 22:39:06 +03:00
|
|
|
/* With a data file, preallocation means just allocating the metadata
|
|
|
|
* and forwarding the truncate request to the data file */
|
|
|
|
if (has_data_file(bs)) {
|
2019-04-15 17:34:30 +03:00
|
|
|
ret = preallocate_co(bs, old_length, offset, prealloc, errp);
|
2019-01-15 22:39:06 +03:00
|
|
|
if (ret < 0) {
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2017-06-13 23:21:05 +03:00
|
|
|
old_file_size = bdrv_getlength(bs->file->bs);
|
|
|
|
if (old_file_size < 0) {
|
|
|
|
error_setg_errno(errp, -old_file_size,
|
|
|
|
"Failed to inquire current file length");
|
block: Convert .bdrv_truncate callback to coroutine_fn
bdrv_truncate() is an operation that can block (even for a quite long
time, depending on the PreallocMode) in I/O paths that shouldn't block.
Convert it to a coroutine_fn so that we have the infrastructure for
drivers to make their .bdrv_co_truncate implementation asynchronous.
This change could potentially introduce new race conditions because
bdrv_truncate() isn't necessarily executed atomically any more. Whether
this is a problem needs to be evaluated for each block driver that
supports truncate:
* file-posix/win32, gluster, iscsi, nfs, rbd, ssh, sheepdog: The
protocol drivers are trivially safe because they don't actually yield
yet, so there is no change in behaviour.
* copy-on-read, crypto, raw-format: Essentially just filter drivers that
pass the request to a child node, no problem.
* qcow2: The implementation modifies metadata, so it needs to hold
s->lock to be safe with concurrent I/O requests. In order to avoid
double locking, this requires pulling the locking out into
preallocate_co() and using qcow2_write_caches() instead of
bdrv_flush().
* qed: Does a single header update, this is fine without locking.
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2018-06-21 18:54:35 +03:00
|
|
|
ret = old_file_size;
|
|
|
|
goto fail;
|
2017-06-13 23:21:05 +03:00
|
|
|
}
|
qcow2: Fix preallocation on block devices
Calling bdrv_getlength() to get the pre-truncate file size will not
really work on block devices, because they have always the same length,
and trying to write beyond it will fail with a rather cryptic error
message.
Instead, we should use qcow2_get_last_cluster() and bdrv_getlength()
only as a fallback.
Before this patch:
$ truncate -s 1G test.img
$ sudo losetup -f --show test.img
/dev/loop0
$ sudo qemu-img create -f qcow2 -o preallocation=full /dev/loop0 64M
Formatting '/dev/loop0', fmt=qcow2 size=67108864 cluster_size=65536
preallocation=full lazy_refcounts=off refcount_bits=16
qemu-img: /dev/loop0: Could not resize image: Failed to resize refcount
structures: No space left on device
With this patch:
$ sudo qemu-img create -f qcow2 -o preallocation=full /dev/loop0 64M
Formatting '/dev/loop0', fmt=qcow2 size=67108864 cluster_size=65536
preallocation=full lazy_refcounts=off refcount_bits=16
qemu-img: /dev/loop0: Could not resize image: Failed to resize
underlying file: Preallocation mode 'full' unsupported for this
non-regular file
So as you can see, it still fails, but now the problem is missing
support on the block device level, so we at least get a better error
message.
Note that we cannot preallocate block devices on truncate by design,
because we do not know what area to preallocate. Their length is always
the same, the truncate operation does not change it.
Signed-off-by: Max Reitz <mreitz@redhat.com>
Message-Id: <20200505141801.1096763-1-mreitz@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2020-05-05 17:18:01 +03:00
|
|
|
|
|
|
|
last_cluster = qcow2_get_last_cluster(bs, old_file_size);
|
|
|
|
if (last_cluster >= 0) {
|
|
|
|
old_file_size = (last_cluster + 1) * s->cluster_size;
|
|
|
|
} else {
|
|
|
|
old_file_size = ROUND_UP(old_file_size, s->cluster_size);
|
|
|
|
}
|
2017-06-13 23:21:05 +03:00
|
|
|
|
2020-06-17 17:00:36 +03:00
|
|
|
nb_new_data_clusters = (ROUND_UP(offset, s->cluster_size) -
|
|
|
|
start_of_cluster(s, old_length)) >> s->cluster_bits;
|
2017-06-13 23:21:05 +03:00
|
|
|
|
|
|
|
/* This is an overestimation; we will not actually allocate space for
|
|
|
|
* these in the file but just make sure the new refcount structures are
|
|
|
|
* able to cover them so we will not have to allocate new refblocks
|
|
|
|
* while entering the data blocks in the potentially new L2 tables.
|
|
|
|
* (We do not actually care where the L2 tables are placed. Maybe they
|
|
|
|
* are already allocated or they can be placed somewhere before
|
|
|
|
* @old_file_size. It does not matter because they will be fully
|
|
|
|
* allocated automatically, so they do not need to be covered by the
|
|
|
|
* preallocation. All that matters is that we will not have to allocate
|
|
|
|
* new refcount structures for them.) */
|
|
|
|
nb_new_l2_tables = DIV_ROUND_UP(nb_new_data_clusters,
|
2020-07-10 19:12:54 +03:00
|
|
|
s->cluster_size / l2_entry_size(s));
|
2017-06-13 23:21:05 +03:00
|
|
|
/* The cluster range may not be aligned to L2 boundaries, so add one L2
|
|
|
|
* table for a potential head/tail */
|
|
|
|
nb_new_l2_tables++;
|
|
|
|
|
|
|
|
allocation_start = qcow2_refcount_area(bs, old_file_size,
|
|
|
|
nb_new_data_clusters +
|
|
|
|
nb_new_l2_tables,
|
|
|
|
true, 0, 0);
|
|
|
|
if (allocation_start < 0) {
|
|
|
|
error_setg_errno(errp, -allocation_start,
|
|
|
|
"Failed to resize refcount structures");
|
block: Convert .bdrv_truncate callback to coroutine_fn
bdrv_truncate() is an operation that can block (even for a quite long
time, depending on the PreallocMode) in I/O paths that shouldn't block.
Convert it to a coroutine_fn so that we have the infrastructure for
drivers to make their .bdrv_co_truncate implementation asynchronous.
This change could potentially introduce new race conditions because
bdrv_truncate() isn't necessarily executed atomically any more. Whether
this is a problem needs to be evaluated for each block driver that
supports truncate:
* file-posix/win32, gluster, iscsi, nfs, rbd, ssh, sheepdog: The
protocol drivers are trivially safe because they don't actually yield
yet, so there is no change in behaviour.
* copy-on-read, crypto, raw-format: Essentially just filter drivers that
pass the request to a child node, no problem.
* qcow2: The implementation modifies metadata, so it needs to hold
s->lock to be safe with concurrent I/O requests. In order to avoid
double locking, this requires pulling the locking out into
preallocate_co() and using qcow2_write_caches() instead of
bdrv_flush().
* qed: Does a single header update, this is fine without locking.
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2018-06-21 18:54:35 +03:00
|
|
|
ret = allocation_start;
|
|
|
|
goto fail;
|
2017-06-13 23:21:05 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
clusters_allocated = qcow2_alloc_clusters_at(bs, allocation_start,
|
|
|
|
nb_new_data_clusters);
|
|
|
|
if (clusters_allocated < 0) {
|
|
|
|
error_setg_errno(errp, -clusters_allocated,
|
|
|
|
"Failed to allocate data clusters");
|
block: Convert .bdrv_truncate callback to coroutine_fn
bdrv_truncate() is an operation that can block (even for a quite long
time, depending on the PreallocMode) in I/O paths that shouldn't block.
Convert it to a coroutine_fn so that we have the infrastructure for
drivers to make their .bdrv_co_truncate implementation asynchronous.
This change could potentially introduce new race conditions because
bdrv_truncate() isn't necessarily executed atomically any more. Whether
this is a problem needs to be evaluated for each block driver that
supports truncate:
* file-posix/win32, gluster, iscsi, nfs, rbd, ssh, sheepdog: The
protocol drivers are trivially safe because they don't actually yield
yet, so there is no change in behaviour.
* copy-on-read, crypto, raw-format: Essentially just filter drivers that
pass the request to a child node, no problem.
* qcow2: The implementation modifies metadata, so it needs to hold
s->lock to be safe with concurrent I/O requests. In order to avoid
double locking, this requires pulling the locking out into
preallocate_co() and using qcow2_write_caches() instead of
bdrv_flush().
* qed: Does a single header update, this is fine without locking.
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2018-06-21 18:54:35 +03:00
|
|
|
ret = clusters_allocated;
|
|
|
|
goto fail;
|
2017-06-13 23:21:05 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
assert(clusters_allocated == nb_new_data_clusters);
|
|
|
|
|
|
|
|
/* Allocate the data area */
|
|
|
|
new_file_size = allocation_start +
|
|
|
|
nb_new_data_clusters * s->cluster_size;
|
2020-04-24 17:27:01 +03:00
|
|
|
/*
|
|
|
|
* Image file grows, so @exact does not matter.
|
|
|
|
*
|
|
|
|
* If we need to zero out the new area, try first whether the protocol
|
|
|
|
* driver can already take care of this.
|
|
|
|
*/
|
|
|
|
if (flags & BDRV_REQ_ZERO_WRITE) {
|
|
|
|
ret = bdrv_co_truncate(bs->file, new_file_size, false, prealloc,
|
|
|
|
BDRV_REQ_ZERO_WRITE, NULL);
|
|
|
|
if (ret >= 0) {
|
|
|
|
flags &= ~BDRV_REQ_ZERO_WRITE;
|
2020-07-10 19:13:12 +03:00
|
|
|
/* Ensure that we read zeroes and not backing file data */
|
|
|
|
subclusters_need_allocation = true;
|
2020-04-24 17:27:01 +03:00
|
|
|
}
|
|
|
|
} else {
|
|
|
|
ret = -1;
|
|
|
|
}
|
|
|
|
if (ret < 0) {
|
|
|
|
ret = bdrv_co_truncate(bs->file, new_file_size, false, prealloc, 0,
|
|
|
|
errp);
|
|
|
|
}
|
2017-06-13 23:21:05 +03:00
|
|
|
if (ret < 0) {
|
|
|
|
error_prepend(errp, "Failed to resize underlying file: ");
|
|
|
|
qcow2_free_clusters(bs, allocation_start,
|
|
|
|
nb_new_data_clusters * s->cluster_size,
|
|
|
|
QCOW2_DISCARD_OTHER);
|
block: Convert .bdrv_truncate callback to coroutine_fn
bdrv_truncate() is an operation that can block (even for a quite long
time, depending on the PreallocMode) in I/O paths that shouldn't block.
Convert it to a coroutine_fn so that we have the infrastructure for
drivers to make their .bdrv_co_truncate implementation asynchronous.
This change could potentially introduce new race conditions because
bdrv_truncate() isn't necessarily executed atomically any more. Whether
this is a problem needs to be evaluated for each block driver that
supports truncate:
* file-posix/win32, gluster, iscsi, nfs, rbd, ssh, sheepdog: The
protocol drivers are trivially safe because they don't actually yield
yet, so there is no change in behaviour.
* copy-on-read, crypto, raw-format: Essentially just filter drivers that
pass the request to a child node, no problem.
* qcow2: The implementation modifies metadata, so it needs to hold
s->lock to be safe with concurrent I/O requests. In order to avoid
double locking, this requires pulling the locking out into
preallocate_co() and using qcow2_write_caches() instead of
bdrv_flush().
* qed: Does a single header update, this is fine without locking.
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2018-06-21 18:54:35 +03:00
|
|
|
goto fail;
|
2017-06-13 23:21:05 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Create the necessary L2 entries */
|
|
|
|
host_offset = allocation_start;
|
|
|
|
guest_offset = old_length;
|
|
|
|
while (nb_new_data_clusters) {
|
2018-02-05 17:33:31 +03:00
|
|
|
int64_t nb_clusters = MIN(
|
|
|
|
nb_new_data_clusters,
|
|
|
|
s->l2_slice_size - offset_to_l2_slice_index(s, guest_offset));
|
2020-06-17 17:00:36 +03:00
|
|
|
unsigned cow_start_length = offset_into_cluster(s, guest_offset);
|
|
|
|
QCowL2Meta allocation;
|
|
|
|
guest_offset = start_of_cluster(s, guest_offset);
|
|
|
|
allocation = (QCowL2Meta) {
|
2017-06-13 23:21:05 +03:00
|
|
|
.offset = guest_offset,
|
|
|
|
.alloc_offset = host_offset,
|
|
|
|
.nb_clusters = nb_clusters,
|
2020-06-17 17:00:36 +03:00
|
|
|
.cow_start = {
|
|
|
|
.offset = 0,
|
|
|
|
.nb_bytes = cow_start_length,
|
|
|
|
},
|
|
|
|
.cow_end = {
|
|
|
|
.offset = nb_clusters << s->cluster_bits,
|
|
|
|
.nb_bytes = 0,
|
|
|
|
},
|
2020-07-10 19:13:12 +03:00
|
|
|
.prealloc = !subclusters_need_allocation,
|
2017-06-13 23:21:05 +03:00
|
|
|
};
|
|
|
|
qemu_co_queue_init(&allocation.dependent_requests);
|
|
|
|
|
|
|
|
ret = qcow2_alloc_cluster_link_l2(bs, &allocation);
|
|
|
|
if (ret < 0) {
|
|
|
|
error_setg_errno(errp, -ret, "Failed to update L2 tables");
|
|
|
|
qcow2_free_clusters(bs, host_offset,
|
|
|
|
nb_new_data_clusters * s->cluster_size,
|
|
|
|
QCOW2_DISCARD_OTHER);
|
block: Convert .bdrv_truncate callback to coroutine_fn
bdrv_truncate() is an operation that can block (even for a quite long
time, depending on the PreallocMode) in I/O paths that shouldn't block.
Convert it to a coroutine_fn so that we have the infrastructure for
drivers to make their .bdrv_co_truncate implementation asynchronous.
This change could potentially introduce new race conditions because
bdrv_truncate() isn't necessarily executed atomically any more. Whether
this is a problem needs to be evaluated for each block driver that
supports truncate:
* file-posix/win32, gluster, iscsi, nfs, rbd, ssh, sheepdog: The
protocol drivers are trivially safe because they don't actually yield
yet, so there is no change in behaviour.
* copy-on-read, crypto, raw-format: Essentially just filter drivers that
pass the request to a child node, no problem.
* qcow2: The implementation modifies metadata, so it needs to hold
s->lock to be safe with concurrent I/O requests. In order to avoid
double locking, this requires pulling the locking out into
preallocate_co() and using qcow2_write_caches() instead of
bdrv_flush().
* qed: Does a single header update, this is fine without locking.
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2018-06-21 18:54:35 +03:00
|
|
|
goto fail;
|
2017-06-13 23:21:05 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
guest_offset += nb_clusters * s->cluster_size;
|
|
|
|
host_offset += nb_clusters * s->cluster_size;
|
|
|
|
nb_new_data_clusters -= nb_clusters;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2017-06-13 23:21:02 +03:00
|
|
|
default:
|
|
|
|
g_assert_not_reached();
|
|
|
|
}
|
|
|
|
|
2020-04-24 15:54:42 +03:00
|
|
|
if ((flags & BDRV_REQ_ZERO_WRITE) && offset > old_length) {
|
2020-07-10 19:13:10 +03:00
|
|
|
uint64_t zero_start = QEMU_ALIGN_UP(old_length, s->subcluster_size);
|
2020-04-24 15:54:42 +03:00
|
|
|
|
|
|
|
/*
|
2020-07-10 19:13:10 +03:00
|
|
|
* Use zero clusters as much as we can. qcow2_subcluster_zeroize()
|
|
|
|
* requires a subcluster-aligned start. The end may be unaligned if
|
|
|
|
* it is at the end of the image (which it is here).
|
2020-04-24 15:54:42 +03:00
|
|
|
*/
|
2020-05-04 18:52:17 +03:00
|
|
|
if (offset > zero_start) {
|
2020-07-10 19:13:10 +03:00
|
|
|
ret = qcow2_subcluster_zeroize(bs, zero_start, offset - zero_start,
|
|
|
|
0);
|
2020-05-04 18:52:17 +03:00
|
|
|
if (ret < 0) {
|
|
|
|
error_setg_errno(errp, -ret, "Failed to zero out new clusters");
|
|
|
|
goto fail;
|
|
|
|
}
|
2020-04-24 15:54:42 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Write explicit zeros for the unaligned head */
|
|
|
|
if (zero_start > old_length) {
|
2020-05-04 18:52:17 +03:00
|
|
|
uint64_t len = MIN(zero_start, offset) - old_length;
|
2020-04-24 15:54:42 +03:00
|
|
|
uint8_t *buf = qemu_blockalign0(bs, len);
|
|
|
|
QEMUIOVector qiov;
|
|
|
|
qemu_iovec_init_buf(&qiov, buf, len);
|
|
|
|
|
|
|
|
qemu_co_mutex_unlock(&s->lock);
|
|
|
|
ret = qcow2_co_pwritev_part(bs, old_length, len, &qiov, 0, 0);
|
|
|
|
qemu_co_mutex_lock(&s->lock);
|
|
|
|
|
|
|
|
qemu_vfree(buf);
|
|
|
|
if (ret < 0) {
|
|
|
|
error_setg_errno(errp, -ret, "Failed to zero out the new area");
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-06-13 23:21:02 +03:00
|
|
|
if (prealloc != PREALLOC_MODE_OFF) {
|
|
|
|
/* Flush metadata before actually changing the image size */
|
block: Convert .bdrv_truncate callback to coroutine_fn
bdrv_truncate() is an operation that can block (even for a quite long
time, depending on the PreallocMode) in I/O paths that shouldn't block.
Convert it to a coroutine_fn so that we have the infrastructure for
drivers to make their .bdrv_co_truncate implementation asynchronous.
This change could potentially introduce new race conditions because
bdrv_truncate() isn't necessarily executed atomically any more. Whether
this is a problem needs to be evaluated for each block driver that
supports truncate:
* file-posix/win32, gluster, iscsi, nfs, rbd, ssh, sheepdog: The
protocol drivers are trivially safe because they don't actually yield
yet, so there is no change in behaviour.
* copy-on-read, crypto, raw-format: Essentially just filter drivers that
pass the request to a child node, no problem.
* qcow2: The implementation modifies metadata, so it needs to hold
s->lock to be safe with concurrent I/O requests. In order to avoid
double locking, this requires pulling the locking out into
preallocate_co() and using qcow2_write_caches() instead of
bdrv_flush().
* qed: Does a single header update, this is fine without locking.
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2018-06-21 18:54:35 +03:00
|
|
|
ret = qcow2_write_caches(bs);
|
2017-06-13 23:21:02 +03:00
|
|
|
if (ret < 0) {
|
|
|
|
error_setg_errno(errp, -ret,
|
|
|
|
"Failed to flush the preallocated area to disk");
|
block: Convert .bdrv_truncate callback to coroutine_fn
bdrv_truncate() is an operation that can block (even for a quite long
time, depending on the PreallocMode) in I/O paths that shouldn't block.
Convert it to a coroutine_fn so that we have the infrastructure for
drivers to make their .bdrv_co_truncate implementation asynchronous.
This change could potentially introduce new race conditions because
bdrv_truncate() isn't necessarily executed atomically any more. Whether
this is a problem needs to be evaluated for each block driver that
supports truncate:
* file-posix/win32, gluster, iscsi, nfs, rbd, ssh, sheepdog: The
protocol drivers are trivially safe because they don't actually yield
yet, so there is no change in behaviour.
* copy-on-read, crypto, raw-format: Essentially just filter drivers that
pass the request to a child node, no problem.
* qcow2: The implementation modifies metadata, so it needs to hold
s->lock to be safe with concurrent I/O requests. In order to avoid
double locking, this requires pulling the locking out into
preallocate_co() and using qcow2_write_caches() instead of
bdrv_flush().
* qed: Does a single header update, this is fine without locking.
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2018-06-21 18:54:35 +03:00
|
|
|
goto fail;
|
2017-06-13 23:21:02 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-09-26 19:04:45 +03:00
|
|
|
bs->total_sectors = offset / BDRV_SECTOR_SIZE;
|
|
|
|
|
2010-04-28 14:36:11 +04:00
|
|
|
/* write updated header.size */
|
|
|
|
offset = cpu_to_be64(offset);
|
2016-06-20 21:09:15 +03:00
|
|
|
ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, size),
|
2020-08-28 14:08:28 +03:00
|
|
|
&offset, sizeof(offset));
|
2010-04-28 14:36:11 +04:00
|
|
|
if (ret < 0) {
|
2017-03-28 23:51:29 +03:00
|
|
|
error_setg_errno(errp, -ret, "Failed to update the image size");
|
block: Convert .bdrv_truncate callback to coroutine_fn
bdrv_truncate() is an operation that can block (even for a quite long
time, depending on the PreallocMode) in I/O paths that shouldn't block.
Convert it to a coroutine_fn so that we have the infrastructure for
drivers to make their .bdrv_co_truncate implementation asynchronous.
This change could potentially introduce new race conditions because
bdrv_truncate() isn't necessarily executed atomically any more. Whether
this is a problem needs to be evaluated for each block driver that
supports truncate:
* file-posix/win32, gluster, iscsi, nfs, rbd, ssh, sheepdog: The
protocol drivers are trivially safe because they don't actually yield
yet, so there is no change in behaviour.
* copy-on-read, crypto, raw-format: Essentially just filter drivers that
pass the request to a child node, no problem.
* qcow2: The implementation modifies metadata, so it needs to hold
s->lock to be safe with concurrent I/O requests. In order to avoid
double locking, this requires pulling the locking out into
preallocate_co() and using qcow2_write_caches() instead of
bdrv_flush().
* qed: Does a single header update, this is fine without locking.
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2018-06-21 18:54:35 +03:00
|
|
|
goto fail;
|
2010-04-28 14:36:11 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
s->l1_vm_state_index = new_l1_size;
|
2018-09-26 19:04:45 +03:00
|
|
|
|
|
|
|
/* Update cache sizes */
|
|
|
|
options = qdict_clone_shallow(bs->options);
|
|
|
|
ret = qcow2_update_options(bs, options, s->flags, errp);
|
|
|
|
qobject_unref(options);
|
|
|
|
if (ret < 0) {
|
|
|
|
goto fail;
|
|
|
|
}
|
block: Convert .bdrv_truncate callback to coroutine_fn
bdrv_truncate() is an operation that can block (even for a quite long
time, depending on the PreallocMode) in I/O paths that shouldn't block.
Convert it to a coroutine_fn so that we have the infrastructure for
drivers to make their .bdrv_co_truncate implementation asynchronous.
This change could potentially introduce new race conditions because
bdrv_truncate() isn't necessarily executed atomically any more. Whether
this is a problem needs to be evaluated for each block driver that
supports truncate:
* file-posix/win32, gluster, iscsi, nfs, rbd, ssh, sheepdog: The
protocol drivers are trivially safe because they don't actually yield
yet, so there is no change in behaviour.
* copy-on-read, crypto, raw-format: Essentially just filter drivers that
pass the request to a child node, no problem.
* qcow2: The implementation modifies metadata, so it needs to hold
s->lock to be safe with concurrent I/O requests. In order to avoid
double locking, this requires pulling the locking out into
preallocate_co() and using qcow2_write_caches() instead of
bdrv_flush().
* qed: Does a single header update, this is fine without locking.
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2018-06-21 18:54:35 +03:00
|
|
|
ret = 0;
|
|
|
|
fail:
|
|
|
|
qemu_co_mutex_unlock(&s->lock);
|
|
|
|
return ret;
|
2010-04-28 14:36:11 +04:00
|
|
|
}
|
|
|
|
|
2016-07-22 11:17:43 +03:00
|
|
|
static coroutine_fn int
|
2019-12-02 15:15:05 +03:00
|
|
|
qcow2_co_pwritev_compressed_task(BlockDriverState *bs,
|
2019-06-04 19:15:14 +03:00
|
|
|
uint64_t offset, uint64_t bytes,
|
|
|
|
QEMUIOVector *qiov, size_t qiov_offset)
|
2010-04-24 00:19:47 +04:00
|
|
|
{
|
2015-09-07 18:12:56 +03:00
|
|
|
BDRVQcow2State *s = bs->opaque;
|
2018-06-20 17:48:36 +03:00
|
|
|
int ret;
|
2019-04-30 13:08:02 +03:00
|
|
|
ssize_t out_len;
|
2016-07-22 11:17:43 +03:00
|
|
|
uint8_t *buf, *out_buf;
|
2019-02-27 12:26:24 +03:00
|
|
|
uint64_t cluster_offset;
|
2010-04-24 00:19:47 +04:00
|
|
|
|
2019-12-02 15:15:05 +03:00
|
|
|
assert(bytes == s->cluster_size || (bytes < s->cluster_size &&
|
|
|
|
(offset + bytes == bs->total_sectors << BDRV_SECTOR_BITS)));
|
2017-11-14 13:16:49 +03:00
|
|
|
|
2016-07-22 11:17:44 +03:00
|
|
|
buf = qemu_blockalign(bs, s->cluster_size);
|
2019-12-02 15:15:05 +03:00
|
|
|
if (bytes < s->cluster_size) {
|
2016-07-22 11:17:44 +03:00
|
|
|
/* Zero-pad last write if image size is not cluster aligned */
|
|
|
|
memset(buf + bytes, 0, s->cluster_size - bytes);
|
2013-04-15 19:17:31 +04:00
|
|
|
}
|
2019-06-04 19:15:14 +03:00
|
|
|
qemu_iovec_to_buf(qiov, qiov_offset, buf, bytes);
|
2010-04-24 00:19:47 +04:00
|
|
|
|
2016-07-14 19:59:25 +03:00
|
|
|
out_buf = g_malloc(s->cluster_size);
|
2010-04-24 00:19:47 +04:00
|
|
|
|
2018-11-01 21:27:33 +03:00
|
|
|
out_len = qcow2_co_compress(bs, out_buf, s->cluster_size - 1,
|
|
|
|
buf, s->cluster_size);
|
2019-04-30 13:08:02 +03:00
|
|
|
if (out_len == -ENOMEM) {
|
2010-04-24 00:19:47 +04:00
|
|
|
/* could not compress: write normal cluster */
|
2019-06-04 19:15:14 +03:00
|
|
|
ret = qcow2_co_pwritev_part(bs, offset, bytes, qiov, qiov_offset, 0);
|
2011-10-18 19:12:44 +04:00
|
|
|
if (ret < 0) {
|
|
|
|
goto fail;
|
|
|
|
}
|
2016-07-22 11:17:43 +03:00
|
|
|
goto success;
|
2019-04-30 13:08:02 +03:00
|
|
|
} else if (out_len < 0) {
|
|
|
|
ret = -EINVAL;
|
|
|
|
goto fail;
|
2016-07-22 11:17:43 +03:00
|
|
|
}
|
2013-08-30 16:34:26 +04:00
|
|
|
|
2016-07-22 11:17:43 +03:00
|
|
|
qemu_co_mutex_lock(&s->lock);
|
2019-02-27 12:26:24 +03:00
|
|
|
ret = qcow2_alloc_compressed_cluster_offset(bs, offset, out_len,
|
|
|
|
&cluster_offset);
|
|
|
|
if (ret < 0) {
|
2016-07-22 11:17:43 +03:00
|
|
|
qemu_co_mutex_unlock(&s->lock);
|
|
|
|
goto fail;
|
|
|
|
}
|
2013-08-30 16:34:26 +04:00
|
|
|
|
2019-01-15 22:39:06 +03:00
|
|
|
ret = qcow2_pre_write_overlap_check(bs, 0, cluster_offset, out_len, true);
|
2016-07-22 11:17:43 +03:00
|
|
|
qemu_co_mutex_unlock(&s->lock);
|
|
|
|
if (ret < 0) {
|
|
|
|
goto fail;
|
2010-04-24 00:19:47 +04:00
|
|
|
}
|
|
|
|
|
2019-01-15 22:39:06 +03:00
|
|
|
BLKDBG_EVENT(s->data_file, BLKDBG_WRITE_COMPRESSED);
|
2019-04-22 17:58:31 +03:00
|
|
|
ret = bdrv_co_pwrite(s->data_file, cluster_offset, out_len, out_buf, 0);
|
2016-07-22 11:17:43 +03:00
|
|
|
if (ret < 0) {
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
success:
|
2011-10-18 19:12:44 +04:00
|
|
|
ret = 0;
|
|
|
|
fail:
|
2016-07-22 11:17:43 +03:00
|
|
|
qemu_vfree(buf);
|
2011-08-21 07:09:37 +04:00
|
|
|
g_free(out_buf);
|
2011-10-18 19:12:44 +04:00
|
|
|
return ret;
|
2010-04-24 00:19:47 +04:00
|
|
|
}
|
|
|
|
|
2019-12-02 15:15:05 +03:00
|
|
|
static coroutine_fn int qcow2_co_pwritev_compressed_task_entry(AioTask *task)
|
|
|
|
{
|
|
|
|
Qcow2AioTask *t = container_of(task, Qcow2AioTask, task);
|
|
|
|
|
2020-07-10 19:13:00 +03:00
|
|
|
assert(!t->subcluster_type && !t->l2meta);
|
2019-12-02 15:15:05 +03:00
|
|
|
|
|
|
|
return qcow2_co_pwritev_compressed_task(t->bs, t->offset, t->bytes, t->qiov,
|
|
|
|
t->qiov_offset);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* XXX: put compressed sectors first, then all the cluster aligned
|
|
|
|
* tables to avoid losing bytes in alignment
|
|
|
|
*/
|
|
|
|
static coroutine_fn int
|
|
|
|
qcow2_co_pwritev_compressed_part(BlockDriverState *bs,
|
block: use int64_t instead of uint64_t in driver write handlers
We are generally moving to int64_t for both offset and bytes parameters
on all io paths.
Main motivation is realization of 64-bit write_zeroes operation for
fast zeroing large disk chunks, up to the whole disk.
We chose signed type, to be consistent with off_t (which is signed) and
with possibility for signed return type (where negative value means
error).
So, convert driver write handlers parameters which are already 64bit to
signed type.
While being here, convert also flags parameter to be BdrvRequestFlags.
Now let's consider all callers. Simple
git grep '\->bdrv_\(aio\|co\)_pwritev\(_part\)\?'
shows that's there three callers of driver function:
bdrv_driver_pwritev() and bdrv_driver_pwritev_compressed() in
block/io.c, both pass int64_t, checked by bdrv_check_qiov_request() to
be non-negative.
qcow2_save_vmstate() does bdrv_check_qiov_request().
Still, the functions may be called directly, not only by drv->...
Let's check:
git grep '\.bdrv_\(aio\|co\)_pwritev\(_part\)\?\s*=' | \
awk '{print $4}' | sed 's/,//' | sed 's/&//' | sort | uniq | \
while read func; do git grep "$func(" | \
grep -v "$func(BlockDriverState"; done
shows several callers:
qcow2:
qcow2_co_truncate() write at most up to @offset, which is checked in
generic qcow2_co_truncate() by bdrv_check_request().
qcow2_co_pwritev_compressed_task() pass the request (or part of the
request) that already went through normal write path, so it should
be OK
qcow:
qcow_co_pwritev_compressed() pass int64_t, it's updated by this patch
quorum:
quorum_co_pwrite_zeroes() pass int64_t and int - OK
throttle:
throttle_co_pwritev_compressed() pass int64_t, it's updated by this
patch
vmdk:
vmdk_co_pwritev_compressed() pass int64_t, it's updated by this
patch
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Message-Id: <20210903102807.27127-5-vsementsov@virtuozzo.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Eric Blake <eblake@redhat.com>
2021-09-03 13:28:00 +03:00
|
|
|
int64_t offset, int64_t bytes,
|
2019-12-02 15:15:05 +03:00
|
|
|
QEMUIOVector *qiov, size_t qiov_offset)
|
|
|
|
{
|
|
|
|
BDRVQcow2State *s = bs->opaque;
|
|
|
|
AioTaskPool *aio = NULL;
|
|
|
|
int ret = 0;
|
|
|
|
|
|
|
|
if (has_data_file(bs)) {
|
|
|
|
return -ENOTSUP;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (bytes == 0) {
|
|
|
|
/*
|
|
|
|
* align end of file to a sector boundary to ease reading with
|
|
|
|
* sector based I/Os
|
|
|
|
*/
|
|
|
|
int64_t len = bdrv_getlength(bs->file->bs);
|
|
|
|
if (len < 0) {
|
|
|
|
return len;
|
|
|
|
}
|
2020-04-24 15:54:40 +03:00
|
|
|
return bdrv_co_truncate(bs->file, len, false, PREALLOC_MODE_OFF, 0,
|
|
|
|
NULL);
|
2019-12-02 15:15:05 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
if (offset_into_cluster(s, offset)) {
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2020-04-06 17:34:01 +03:00
|
|
|
if (offset_into_cluster(s, bytes) &&
|
|
|
|
(offset + bytes) != (bs->total_sectors << BDRV_SECTOR_BITS)) {
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2019-12-02 15:15:05 +03:00
|
|
|
while (bytes && aio_task_pool_status(aio) == 0) {
|
|
|
|
uint64_t chunk_size = MIN(bytes, s->cluster_size);
|
|
|
|
|
|
|
|
if (!aio && chunk_size != bytes) {
|
|
|
|
aio = aio_task_pool_new(QCOW2_MAX_WORKERS);
|
|
|
|
}
|
|
|
|
|
|
|
|
ret = qcow2_add_task(bs, aio, qcow2_co_pwritev_compressed_task_entry,
|
|
|
|
0, 0, offset, chunk_size, qiov, qiov_offset, NULL);
|
|
|
|
if (ret < 0) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
qiov_offset += chunk_size;
|
|
|
|
offset += chunk_size;
|
|
|
|
bytes -= chunk_size;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (aio) {
|
|
|
|
aio_task_pool_wait_all(aio);
|
|
|
|
if (ret == 0) {
|
|
|
|
ret = aio_task_pool_status(aio);
|
|
|
|
}
|
|
|
|
g_free(aio);
|
|
|
|
}
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2018-11-01 21:27:37 +03:00
|
|
|
static int coroutine_fn
|
|
|
|
qcow2_co_preadv_compressed(BlockDriverState *bs,
|
2021-09-14 15:24:46 +03:00
|
|
|
uint64_t l2_entry,
|
2018-11-01 21:27:37 +03:00
|
|
|
uint64_t offset,
|
|
|
|
uint64_t bytes,
|
2019-06-04 19:15:13 +03:00
|
|
|
QEMUIOVector *qiov,
|
|
|
|
size_t qiov_offset)
|
2018-11-01 21:27:34 +03:00
|
|
|
{
|
|
|
|
BDRVQcow2State *s = bs->opaque;
|
2021-09-14 15:24:47 +03:00
|
|
|
int ret = 0, csize;
|
2018-11-01 21:27:34 +03:00
|
|
|
uint64_t coffset;
|
2018-11-01 21:27:37 +03:00
|
|
|
uint8_t *buf, *out_buf;
|
|
|
|
int offset_in_cluster = offset_into_cluster(s, offset);
|
2018-11-01 21:27:34 +03:00
|
|
|
|
2021-09-14 15:24:47 +03:00
|
|
|
qcow2_parse_compressed_l2_entry(bs, l2_entry, &coffset, &csize);
|
2018-11-01 21:27:34 +03:00
|
|
|
|
2018-11-01 21:27:37 +03:00
|
|
|
buf = g_try_malloc(csize);
|
|
|
|
if (!buf) {
|
|
|
|
return -ENOMEM;
|
|
|
|
}
|
2018-11-01 21:27:34 +03:00
|
|
|
|
2018-11-01 21:27:37 +03:00
|
|
|
out_buf = qemu_blockalign(bs, s->cluster_size);
|
2018-11-01 21:27:36 +03:00
|
|
|
|
2018-11-01 21:27:37 +03:00
|
|
|
BLKDBG_EVENT(bs->file, BLKDBG_READ_COMPRESSED);
|
2019-04-22 17:58:31 +03:00
|
|
|
ret = bdrv_co_pread(bs->file, coffset, csize, buf, 0);
|
2018-11-01 21:27:37 +03:00
|
|
|
if (ret < 0) {
|
|
|
|
goto fail;
|
2018-11-01 21:27:34 +03:00
|
|
|
}
|
2018-11-01 21:27:37 +03:00
|
|
|
|
2018-11-01 21:27:38 +03:00
|
|
|
if (qcow2_co_decompress(bs, out_buf, s->cluster_size, buf, csize) < 0) {
|
2018-11-01 21:27:37 +03:00
|
|
|
ret = -EIO;
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
|
2019-06-04 19:15:13 +03:00
|
|
|
qemu_iovec_from_buf(qiov, qiov_offset, out_buf + offset_in_cluster, bytes);
|
2018-11-01 21:27:37 +03:00
|
|
|
|
|
|
|
fail:
|
|
|
|
qemu_vfree(out_buf);
|
|
|
|
g_free(buf);
|
|
|
|
|
|
|
|
return ret;
|
2018-11-01 21:27:34 +03:00
|
|
|
}
|
|
|
|
|
2014-10-24 17:57:32 +04:00
|
|
|
static int make_completely_empty(BlockDriverState *bs)
|
|
|
|
{
|
2015-09-07 18:12:56 +03:00
|
|
|
BDRVQcow2State *s = bs->opaque;
|
2017-03-28 23:51:27 +03:00
|
|
|
Error *local_err = NULL;
|
2014-10-24 17:57:32 +04:00
|
|
|
int ret, l1_clusters;
|
|
|
|
int64_t offset;
|
|
|
|
uint64_t *new_reftable = NULL;
|
|
|
|
uint64_t rt_entry, l1_size2;
|
|
|
|
struct {
|
|
|
|
uint64_t l1_offset;
|
|
|
|
uint64_t reftable_offset;
|
|
|
|
uint32_t reftable_clusters;
|
|
|
|
} QEMU_PACKED l1_ofs_rt_ofs_cls;
|
|
|
|
|
|
|
|
ret = qcow2_cache_empty(bs, s->l2_table_cache);
|
|
|
|
if (ret < 0) {
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
|
|
|
|
ret = qcow2_cache_empty(bs, s->refcount_block_cache);
|
|
|
|
if (ret < 0) {
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Refcounts will be broken utterly */
|
|
|
|
ret = qcow2_mark_dirty(bs);
|
|
|
|
if (ret < 0) {
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
|
|
|
|
BLKDBG_EVENT(bs->file, BLKDBG_L1_UPDATE);
|
|
|
|
|
2020-08-28 14:08:28 +03:00
|
|
|
l1_clusters = DIV_ROUND_UP(s->l1_size, s->cluster_size / L1E_SIZE);
|
|
|
|
l1_size2 = (uint64_t)s->l1_size * L1E_SIZE;
|
2014-10-24 17:57:32 +04:00
|
|
|
|
|
|
|
/* After this call, neither the in-memory nor the on-disk refcount
|
|
|
|
* information accurately describe the actual references */
|
|
|
|
|
2016-06-16 16:13:15 +03:00
|
|
|
ret = bdrv_pwrite_zeroes(bs->file, s->l1_table_offset,
|
2016-06-02 00:10:04 +03:00
|
|
|
l1_clusters * s->cluster_size, 0);
|
2014-10-24 17:57:32 +04:00
|
|
|
if (ret < 0) {
|
|
|
|
goto fail_broken_refcounts;
|
|
|
|
}
|
|
|
|
memset(s->l1_table, 0, l1_size2);
|
|
|
|
|
|
|
|
BLKDBG_EVENT(bs->file, BLKDBG_EMPTY_IMAGE_PREPARE);
|
|
|
|
|
|
|
|
/* Overwrite enough clusters at the beginning of the sectors to place
|
|
|
|
* the refcount table, a refcount block and the L1 table in; this may
|
|
|
|
* overwrite parts of the existing refcount and L1 table, which is not
|
|
|
|
* an issue because the dirty flag is set, complete data loss is in fact
|
|
|
|
* desired and partial data loss is consequently fine as well */
|
2016-06-16 16:13:15 +03:00
|
|
|
ret = bdrv_pwrite_zeroes(bs->file, s->cluster_size,
|
2016-06-02 00:10:04 +03:00
|
|
|
(2 + l1_clusters) * s->cluster_size, 0);
|
2014-10-24 17:57:32 +04:00
|
|
|
/* This call (even if it failed overall) may have overwritten on-disk
|
|
|
|
* refcount structures; in that case, the in-memory refcount information
|
|
|
|
* will probably differ from the on-disk information which makes the BDS
|
|
|
|
* unusable */
|
|
|
|
if (ret < 0) {
|
|
|
|
goto fail_broken_refcounts;
|
|
|
|
}
|
|
|
|
|
|
|
|
BLKDBG_EVENT(bs->file, BLKDBG_L1_UPDATE);
|
|
|
|
BLKDBG_EVENT(bs->file, BLKDBG_REFTABLE_UPDATE);
|
|
|
|
|
|
|
|
/* "Create" an empty reftable (one cluster) directly after the image
|
|
|
|
* header and an empty L1 table three clusters after the image header;
|
|
|
|
* the cluster between those two will be used as the first refblock */
|
2016-06-16 19:06:17 +03:00
|
|
|
l1_ofs_rt_ofs_cls.l1_offset = cpu_to_be64(3 * s->cluster_size);
|
|
|
|
l1_ofs_rt_ofs_cls.reftable_offset = cpu_to_be64(s->cluster_size);
|
|
|
|
l1_ofs_rt_ofs_cls.reftable_clusters = cpu_to_be32(1);
|
2016-06-20 21:09:15 +03:00
|
|
|
ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, l1_table_offset),
|
2014-10-24 17:57:32 +04:00
|
|
|
&l1_ofs_rt_ofs_cls, sizeof(l1_ofs_rt_ofs_cls));
|
|
|
|
if (ret < 0) {
|
|
|
|
goto fail_broken_refcounts;
|
|
|
|
}
|
|
|
|
|
|
|
|
s->l1_table_offset = 3 * s->cluster_size;
|
|
|
|
|
2020-08-28 14:08:28 +03:00
|
|
|
new_reftable = g_try_new0(uint64_t, s->cluster_size / REFTABLE_ENTRY_SIZE);
|
2014-10-24 17:57:32 +04:00
|
|
|
if (!new_reftable) {
|
|
|
|
ret = -ENOMEM;
|
|
|
|
goto fail_broken_refcounts;
|
|
|
|
}
|
|
|
|
|
|
|
|
s->refcount_table_offset = s->cluster_size;
|
2020-08-28 14:08:28 +03:00
|
|
|
s->refcount_table_size = s->cluster_size / REFTABLE_ENTRY_SIZE;
|
2017-02-01 15:38:28 +03:00
|
|
|
s->max_refcount_table_index = 0;
|
2014-10-24 17:57:32 +04:00
|
|
|
|
|
|
|
g_free(s->refcount_table);
|
|
|
|
s->refcount_table = new_reftable;
|
|
|
|
new_reftable = NULL;
|
|
|
|
|
|
|
|
/* Now the in-memory refcount information again corresponds to the on-disk
|
|
|
|
* information (reftable is empty and no refblocks (the refblock cache is
|
|
|
|
* empty)); however, this means some clusters (e.g. the image header) are
|
|
|
|
* referenced, but not refcounted, but the normal qcow2 code assumes that
|
|
|
|
* the in-memory information is always correct */
|
|
|
|
|
|
|
|
BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC);
|
|
|
|
|
|
|
|
/* Enter the first refblock into the reftable */
|
|
|
|
rt_entry = cpu_to_be64(2 * s->cluster_size);
|
2016-06-20 21:09:15 +03:00
|
|
|
ret = bdrv_pwrite_sync(bs->file, s->cluster_size,
|
2014-10-24 17:57:32 +04:00
|
|
|
&rt_entry, sizeof(rt_entry));
|
|
|
|
if (ret < 0) {
|
|
|
|
goto fail_broken_refcounts;
|
|
|
|
}
|
|
|
|
s->refcount_table[0] = 2 * s->cluster_size;
|
|
|
|
|
|
|
|
s->free_cluster_index = 0;
|
|
|
|
assert(3 + l1_clusters <= s->refcount_block_size);
|
|
|
|
offset = qcow2_alloc_clusters(bs, 3 * s->cluster_size + l1_size2);
|
|
|
|
if (offset < 0) {
|
|
|
|
ret = offset;
|
|
|
|
goto fail_broken_refcounts;
|
|
|
|
} else if (offset > 0) {
|
|
|
|
error_report("First cluster in emptied image is in use");
|
|
|
|
abort();
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Now finally the in-memory information corresponds to the on-disk
|
|
|
|
* structures and is correct */
|
|
|
|
ret = qcow2_mark_clean(bs);
|
|
|
|
if (ret < 0) {
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
|
2019-09-18 12:51:40 +03:00
|
|
|
ret = bdrv_truncate(bs->file, (3 + l1_clusters) * s->cluster_size, false,
|
2020-04-24 15:54:40 +03:00
|
|
|
PREALLOC_MODE_OFF, 0, &local_err);
|
2014-10-24 17:57:32 +04:00
|
|
|
if (ret < 0) {
|
2017-03-28 23:51:27 +03:00
|
|
|
error_report_err(local_err);
|
2014-10-24 17:57:32 +04:00
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
fail_broken_refcounts:
|
|
|
|
/* The BDS is unusable at this point. If we wanted to make it usable, we
|
|
|
|
* would have to call qcow2_refcount_close(), qcow2_refcount_init(),
|
|
|
|
* qcow2_check_refcounts(), qcow2_refcount_close() and qcow2_refcount_init()
|
|
|
|
* again. However, because the functions which could have caused this error
|
|
|
|
* path to be taken are used by those functions as well, it's very likely
|
|
|
|
* that that sequence will fail as well. Therefore, just eject the BDS. */
|
|
|
|
bs->drv = NULL;
|
|
|
|
|
|
|
|
fail:
|
|
|
|
g_free(new_reftable);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2014-10-24 17:57:31 +04:00
|
|
|
static int qcow2_make_empty(BlockDriverState *bs)
|
|
|
|
{
|
2015-09-07 18:12:56 +03:00
|
|
|
BDRVQcow2State *s = bs->opaque;
|
qcow2: Discard/zero clusters by byte count
Passing a byte offset, but sector count, when we ultimately
want to operate on cluster granularity, is madness. Clean up
the external interfaces to take both offset and count as bytes,
while still keeping the assertion added previously that the
caller must align the values to a cluster. Then rename things
to make sure backports don't get confused by changed units:
instead of qcow2_discard_clusters() and qcow2_zero_clusters(),
we now have qcow2_cluster_discard() and qcow2_cluster_zeroize().
The internal functions still operate on clusters at a time, and
return an int for number of cleared clusters; but on an image
with 2M clusters, a single L2 table holds 256k entries that each
represent a 2M cluster, totalling well over INT_MAX bytes if we
ever had a request for that many bytes at once. All our callers
currently limit themselves to 32-bit bytes (and therefore fewer
clusters), but by making this function 64-bit clean, we have one
less place to clean up if we later improve the block layer to
support 64-bit bytes through all operations (with the block layer
auto-fragmenting on behalf of more-limited drivers), rather than
the current state where some interfaces are artificially limited
to INT_MAX at a time.
Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Message-id: 20170507000552.20847-13-eblake@redhat.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-05-07 03:05:52 +03:00
|
|
|
uint64_t offset, end_offset;
|
|
|
|
int step = QEMU_ALIGN_DOWN(INT_MAX, s->cluster_size);
|
2014-10-24 17:57:32 +04:00
|
|
|
int l1_clusters, ret = 0;
|
|
|
|
|
2020-08-28 14:08:28 +03:00
|
|
|
l1_clusters = DIV_ROUND_UP(s->l1_size, s->cluster_size / L1E_SIZE);
|
2014-10-24 17:57:32 +04:00
|
|
|
|
2017-11-17 19:47:47 +03:00
|
|
|
if (s->qcow_version >= 3 && !s->snapshots && !s->nb_bitmaps &&
|
2017-11-17 14:29:13 +03:00
|
|
|
3 + l1_clusters <= s->refcount_block_size &&
|
2019-04-29 13:52:21 +03:00
|
|
|
s->crypt_method_header != QCOW_CRYPT_LUKS &&
|
|
|
|
!has_data_file(bs)) {
|
2017-11-17 19:47:47 +03:00
|
|
|
/* The following function only works for qcow2 v3 images (it
|
|
|
|
* requires the dirty flag) and only as long as there are no
|
|
|
|
* features that reserve extra clusters (such as snapshots,
|
|
|
|
* LUKS header, or persistent bitmaps), because it completely
|
|
|
|
* empties the image. Furthermore, the L1 table and three
|
|
|
|
* additional clusters (image header, refcount table, one
|
2019-04-29 13:52:21 +03:00
|
|
|
* refcount block) have to fit inside one refcount block. It
|
|
|
|
* only resets the image file, i.e. does not work with an
|
|
|
|
* external data file. */
|
2014-10-24 17:57:32 +04:00
|
|
|
return make_completely_empty(bs);
|
|
|
|
}
|
2014-10-24 17:57:31 +04:00
|
|
|
|
2014-10-24 17:57:32 +04:00
|
|
|
/* This fallback code simply discards every active cluster; this is slow,
|
|
|
|
* but works in all cases */
|
qcow2: Discard/zero clusters by byte count
Passing a byte offset, but sector count, when we ultimately
want to operate on cluster granularity, is madness. Clean up
the external interfaces to take both offset and count as bytes,
while still keeping the assertion added previously that the
caller must align the values to a cluster. Then rename things
to make sure backports don't get confused by changed units:
instead of qcow2_discard_clusters() and qcow2_zero_clusters(),
we now have qcow2_cluster_discard() and qcow2_cluster_zeroize().
The internal functions still operate on clusters at a time, and
return an int for number of cleared clusters; but on an image
with 2M clusters, a single L2 table holds 256k entries that each
represent a 2M cluster, totalling well over INT_MAX bytes if we
ever had a request for that many bytes at once. All our callers
currently limit themselves to 32-bit bytes (and therefore fewer
clusters), but by making this function 64-bit clean, we have one
less place to clean up if we later improve the block layer to
support 64-bit bytes through all operations (with the block layer
auto-fragmenting on behalf of more-limited drivers), rather than
the current state where some interfaces are artificially limited
to INT_MAX at a time.
Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Message-id: 20170507000552.20847-13-eblake@redhat.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-05-07 03:05:52 +03:00
|
|
|
end_offset = bs->total_sectors * BDRV_SECTOR_SIZE;
|
|
|
|
for (offset = 0; offset < end_offset; offset += step) {
|
2014-10-24 17:57:31 +04:00
|
|
|
/* As this function is generally used after committing an external
|
|
|
|
* snapshot, QCOW2_DISCARD_SNAPSHOT seems appropriate. Also, the
|
|
|
|
* default action for this kind of discard is to pass the discard,
|
|
|
|
* which will ideally result in an actually smaller image file, as
|
|
|
|
* is probably desired. */
|
qcow2: Discard/zero clusters by byte count
Passing a byte offset, but sector count, when we ultimately
want to operate on cluster granularity, is madness. Clean up
the external interfaces to take both offset and count as bytes,
while still keeping the assertion added previously that the
caller must align the values to a cluster. Then rename things
to make sure backports don't get confused by changed units:
instead of qcow2_discard_clusters() and qcow2_zero_clusters(),
we now have qcow2_cluster_discard() and qcow2_cluster_zeroize().
The internal functions still operate on clusters at a time, and
return an int for number of cleared clusters; but on an image
with 2M clusters, a single L2 table holds 256k entries that each
represent a 2M cluster, totalling well over INT_MAX bytes if we
ever had a request for that many bytes at once. All our callers
currently limit themselves to 32-bit bytes (and therefore fewer
clusters), but by making this function 64-bit clean, we have one
less place to clean up if we later improve the block layer to
support 64-bit bytes through all operations (with the block layer
auto-fragmenting on behalf of more-limited drivers), rather than
the current state where some interfaces are artificially limited
to INT_MAX at a time.
Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Message-id: 20170507000552.20847-13-eblake@redhat.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-05-07 03:05:52 +03:00
|
|
|
ret = qcow2_cluster_discard(bs, offset, MIN(step, end_offset - offset),
|
|
|
|
QCOW2_DISCARD_SNAPSHOT, true);
|
2014-10-24 17:57:31 +04:00
|
|
|
if (ret < 0) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2011-11-10 12:23:22 +04:00
|
|
|
static coroutine_fn int qcow2_co_flush_to_os(BlockDriverState *bs)
|
2010-04-24 00:19:47 +04:00
|
|
|
{
|
2015-09-07 18:12:56 +03:00
|
|
|
BDRVQcow2State *s = bs->opaque;
|
2011-01-10 19:17:28 +03:00
|
|
|
int ret;
|
|
|
|
|
2011-10-20 15:16:24 +04:00
|
|
|
qemu_co_mutex_lock(&s->lock);
|
2018-03-01 19:36:14 +03:00
|
|
|
ret = qcow2_write_caches(bs);
|
2011-10-20 15:16:24 +04:00
|
|
|
qemu_co_mutex_unlock(&s->lock);
|
2011-01-10 19:17:28 +03:00
|
|
|
|
2018-03-01 19:36:14 +03:00
|
|
|
return ret;
|
2011-11-10 21:10:11 +04:00
|
|
|
}
|
|
|
|
|
2017-07-05 15:57:35 +03:00
|
|
|
static BlockMeasureInfo *qcow2_measure(QemuOpts *opts, BlockDriverState *in_bs,
|
|
|
|
Error **errp)
|
|
|
|
{
|
|
|
|
Error *local_err = NULL;
|
|
|
|
BlockMeasureInfo *info;
|
|
|
|
uint64_t required = 0; /* bytes that contribute to required size */
|
|
|
|
uint64_t virtual_size; /* disk size as seen by guest */
|
|
|
|
uint64_t refcount_bits;
|
|
|
|
uint64_t l2_tables;
|
2019-02-18 13:45:24 +03:00
|
|
|
uint64_t luks_payload_size = 0;
|
2017-07-05 15:57:35 +03:00
|
|
|
size_t cluster_size;
|
|
|
|
int version;
|
|
|
|
char *optstr;
|
|
|
|
PreallocMode prealloc;
|
|
|
|
bool has_backing_file;
|
2019-02-18 13:45:24 +03:00
|
|
|
bool has_luks;
|
2020-07-10 19:13:13 +03:00
|
|
|
bool extended_l2;
|
2020-07-10 19:13:11 +03:00
|
|
|
size_t l2e_size;
|
2017-07-05 15:57:35 +03:00
|
|
|
|
|
|
|
/* Parse image creation options */
|
2020-07-10 19:13:13 +03:00
|
|
|
extended_l2 = qemu_opt_get_bool_del(opts, BLOCK_OPT_EXTL2, false);
|
|
|
|
|
|
|
|
cluster_size = qcow2_opt_get_cluster_size_del(opts, extended_l2,
|
|
|
|
&local_err);
|
2017-07-05 15:57:35 +03:00
|
|
|
if (local_err) {
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
|
|
|
version = qcow2_opt_get_version_del(opts, &local_err);
|
|
|
|
if (local_err) {
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
|
|
|
refcount_bits = qcow2_opt_get_refcount_bits_del(opts, version, &local_err);
|
|
|
|
if (local_err) {
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
|
|
|
optstr = qemu_opt_get_del(opts, BLOCK_OPT_PREALLOC);
|
2017-08-24 11:46:10 +03:00
|
|
|
prealloc = qapi_enum_parse(&PreallocMode_lookup, optstr,
|
2017-08-24 11:45:57 +03:00
|
|
|
PREALLOC_MODE_OFF, &local_err);
|
2017-07-05 15:57:35 +03:00
|
|
|
g_free(optstr);
|
|
|
|
if (local_err) {
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
|
|
|
optstr = qemu_opt_get_del(opts, BLOCK_OPT_BACKING_FILE);
|
|
|
|
has_backing_file = !!optstr;
|
|
|
|
g_free(optstr);
|
|
|
|
|
2019-02-18 13:45:24 +03:00
|
|
|
optstr = qemu_opt_get_del(opts, BLOCK_OPT_ENCRYPT_FORMAT);
|
|
|
|
has_luks = optstr && strcmp(optstr, "luks") == 0;
|
|
|
|
g_free(optstr);
|
|
|
|
|
|
|
|
if (has_luks) {
|
2020-02-21 14:25:19 +03:00
|
|
|
g_autoptr(QCryptoBlockCreateOptions) create_opts = NULL;
|
2020-06-25 15:55:43 +03:00
|
|
|
QDict *cryptoopts = qcow2_extract_crypto_opts(opts, "luks", errp);
|
2019-02-18 13:45:24 +03:00
|
|
|
size_t headerlen;
|
|
|
|
|
2020-02-21 14:25:19 +03:00
|
|
|
create_opts = block_crypto_create_opts_init(cryptoopts, errp);
|
|
|
|
qobject_unref(cryptoopts);
|
|
|
|
if (!create_opts) {
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!qcrypto_block_calculate_payload_offset(create_opts,
|
|
|
|
"encrypt.",
|
|
|
|
&headerlen,
|
|
|
|
&local_err)) {
|
2019-02-18 13:45:24 +03:00
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
|
|
|
luks_payload_size = ROUND_UP(headerlen, cluster_size);
|
|
|
|
}
|
|
|
|
|
2018-02-15 16:10:08 +03:00
|
|
|
virtual_size = qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0);
|
|
|
|
virtual_size = ROUND_UP(virtual_size, cluster_size);
|
2017-07-05 15:57:35 +03:00
|
|
|
|
|
|
|
/* Check that virtual disk size is valid */
|
2020-07-10 19:13:11 +03:00
|
|
|
l2e_size = extended_l2 ? L2E_SIZE_EXTENDED : L2E_SIZE_NORMAL;
|
2017-07-05 15:57:35 +03:00
|
|
|
l2_tables = DIV_ROUND_UP(virtual_size / cluster_size,
|
2020-07-10 19:13:11 +03:00
|
|
|
cluster_size / l2e_size);
|
2020-08-28 14:08:28 +03:00
|
|
|
if (l2_tables * L1E_SIZE > QCOW_MAX_L1_SIZE) {
|
2017-07-05 15:57:35 +03:00
|
|
|
error_setg(&local_err, "The image size is too large "
|
|
|
|
"(try using a larger cluster size)");
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Account for input image */
|
|
|
|
if (in_bs) {
|
|
|
|
int64_t ssize = bdrv_getlength(in_bs);
|
|
|
|
if (ssize < 0) {
|
|
|
|
error_setg_errno(&local_err, -ssize,
|
|
|
|
"Unable to get image virtual_size");
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
2018-02-15 16:10:08 +03:00
|
|
|
virtual_size = ROUND_UP(ssize, cluster_size);
|
2017-07-05 15:57:35 +03:00
|
|
|
|
|
|
|
if (has_backing_file) {
|
|
|
|
/* We don't how much of the backing chain is shared by the input
|
|
|
|
* image and the new image file. In the worst case the new image's
|
|
|
|
* backing file has nothing in common with the input image. Be
|
|
|
|
* conservative and assume all clusters need to be written.
|
|
|
|
*/
|
|
|
|
required = virtual_size;
|
|
|
|
} else {
|
2017-09-25 17:55:22 +03:00
|
|
|
int64_t offset;
|
block: Convert bdrv_get_block_status_above() to bytes
We are gradually moving away from sector-based interfaces, towards
byte-based. In the common case, allocation is unlikely to ever use
values that are not naturally sector-aligned, but it is possible
that byte-based values will let us be more precise about allocation
at the end of an unaligned file that can do byte-based access.
Changing the name of the function from bdrv_get_block_status_above()
to bdrv_block_status_above() ensures that the compiler enforces that
all callers are updated. Likewise, since it a byte interface allows
an offset mapping that might not be sector aligned, split the mapping
out of the return value and into a pass-by-reference parameter. For
now, the io.c layer still assert()s that all uses are sector-aligned,
but that can be relaxed when a later patch implements byte-based
block status in the drivers.
For the most part this patch is just the addition of scaling at the
callers followed by inverse scaling at bdrv_block_status(), plus
updates for the new split return interface. But some code,
particularly bdrv_block_status(), gets a lot simpler because it no
longer has to mess with sectors. Likewise, mirror code no longer
computes s->granularity >> BDRV_SECTOR_BITS, and can therefore drop
an assertion about alignment because the loop no longer depends on
alignment (never mind that we don't really have a driver that
reports sub-sector alignments, so it's not really possible to test
the effect of sub-sector mirroring). Fix a neighboring assertion to
use is_power_of_2 while there.
For ease of review, bdrv_get_block_status() was tackled separately.
Signed-off-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-10-12 06:47:08 +03:00
|
|
|
int64_t pnum = 0;
|
2017-07-05 15:57:35 +03:00
|
|
|
|
block: Convert bdrv_get_block_status_above() to bytes
We are gradually moving away from sector-based interfaces, towards
byte-based. In the common case, allocation is unlikely to ever use
values that are not naturally sector-aligned, but it is possible
that byte-based values will let us be more precise about allocation
at the end of an unaligned file that can do byte-based access.
Changing the name of the function from bdrv_get_block_status_above()
to bdrv_block_status_above() ensures that the compiler enforces that
all callers are updated. Likewise, since it a byte interface allows
an offset mapping that might not be sector aligned, split the mapping
out of the return value and into a pass-by-reference parameter. For
now, the io.c layer still assert()s that all uses are sector-aligned,
but that can be relaxed when a later patch implements byte-based
block status in the drivers.
For the most part this patch is just the addition of scaling at the
callers followed by inverse scaling at bdrv_block_status(), plus
updates for the new split return interface. But some code,
particularly bdrv_block_status(), gets a lot simpler because it no
longer has to mess with sectors. Likewise, mirror code no longer
computes s->granularity >> BDRV_SECTOR_BITS, and can therefore drop
an assertion about alignment because the loop no longer depends on
alignment (never mind that we don't really have a driver that
reports sub-sector alignments, so it's not really possible to test
the effect of sub-sector mirroring). Fix a neighboring assertion to
use is_power_of_2 while there.
For ease of review, bdrv_get_block_status() was tackled separately.
Signed-off-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-10-12 06:47:08 +03:00
|
|
|
for (offset = 0; offset < ssize; offset += pnum) {
|
|
|
|
int ret;
|
2017-07-05 15:57:35 +03:00
|
|
|
|
block: Convert bdrv_get_block_status_above() to bytes
We are gradually moving away from sector-based interfaces, towards
byte-based. In the common case, allocation is unlikely to ever use
values that are not naturally sector-aligned, but it is possible
that byte-based values will let us be more precise about allocation
at the end of an unaligned file that can do byte-based access.
Changing the name of the function from bdrv_get_block_status_above()
to bdrv_block_status_above() ensures that the compiler enforces that
all callers are updated. Likewise, since it a byte interface allows
an offset mapping that might not be sector aligned, split the mapping
out of the return value and into a pass-by-reference parameter. For
now, the io.c layer still assert()s that all uses are sector-aligned,
but that can be relaxed when a later patch implements byte-based
block status in the drivers.
For the most part this patch is just the addition of scaling at the
callers followed by inverse scaling at bdrv_block_status(), plus
updates for the new split return interface. But some code,
particularly bdrv_block_status(), gets a lot simpler because it no
longer has to mess with sectors. Likewise, mirror code no longer
computes s->granularity >> BDRV_SECTOR_BITS, and can therefore drop
an assertion about alignment because the loop no longer depends on
alignment (never mind that we don't really have a driver that
reports sub-sector alignments, so it's not really possible to test
the effect of sub-sector mirroring). Fix a neighboring assertion to
use is_power_of_2 while there.
For ease of review, bdrv_get_block_status() was tackled separately.
Signed-off-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-10-12 06:47:08 +03:00
|
|
|
ret = bdrv_block_status_above(in_bs, NULL, offset,
|
|
|
|
ssize - offset, &pnum, NULL,
|
|
|
|
NULL);
|
2017-07-05 15:57:35 +03:00
|
|
|
if (ret < 0) {
|
|
|
|
error_setg_errno(&local_err, -ret,
|
|
|
|
"Unable to get block status");
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (ret & BDRV_BLOCK_ZERO) {
|
|
|
|
/* Skip zero regions (safe with no backing file) */
|
|
|
|
} else if ((ret & (BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED)) ==
|
|
|
|
(BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED)) {
|
|
|
|
/* Extend pnum to end of cluster for next iteration */
|
block: Convert bdrv_get_block_status_above() to bytes
We are gradually moving away from sector-based interfaces, towards
byte-based. In the common case, allocation is unlikely to ever use
values that are not naturally sector-aligned, but it is possible
that byte-based values will let us be more precise about allocation
at the end of an unaligned file that can do byte-based access.
Changing the name of the function from bdrv_get_block_status_above()
to bdrv_block_status_above() ensures that the compiler enforces that
all callers are updated. Likewise, since it a byte interface allows
an offset mapping that might not be sector aligned, split the mapping
out of the return value and into a pass-by-reference parameter. For
now, the io.c layer still assert()s that all uses are sector-aligned,
but that can be relaxed when a later patch implements byte-based
block status in the drivers.
For the most part this patch is just the addition of scaling at the
callers followed by inverse scaling at bdrv_block_status(), plus
updates for the new split return interface. But some code,
particularly bdrv_block_status(), gets a lot simpler because it no
longer has to mess with sectors. Likewise, mirror code no longer
computes s->granularity >> BDRV_SECTOR_BITS, and can therefore drop
an assertion about alignment because the loop no longer depends on
alignment (never mind that we don't really have a driver that
reports sub-sector alignments, so it's not really possible to test
the effect of sub-sector mirroring). Fix a neighboring assertion to
use is_power_of_2 while there.
For ease of review, bdrv_get_block_status() was tackled separately.
Signed-off-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-10-12 06:47:08 +03:00
|
|
|
pnum = ROUND_UP(offset + pnum, cluster_size) - offset;
|
2017-07-05 15:57:35 +03:00
|
|
|
|
|
|
|
/* Count clusters we've seen */
|
block: Convert bdrv_get_block_status_above() to bytes
We are gradually moving away from sector-based interfaces, towards
byte-based. In the common case, allocation is unlikely to ever use
values that are not naturally sector-aligned, but it is possible
that byte-based values will let us be more precise about allocation
at the end of an unaligned file that can do byte-based access.
Changing the name of the function from bdrv_get_block_status_above()
to bdrv_block_status_above() ensures that the compiler enforces that
all callers are updated. Likewise, since it a byte interface allows
an offset mapping that might not be sector aligned, split the mapping
out of the return value and into a pass-by-reference parameter. For
now, the io.c layer still assert()s that all uses are sector-aligned,
but that can be relaxed when a later patch implements byte-based
block status in the drivers.
For the most part this patch is just the addition of scaling at the
callers followed by inverse scaling at bdrv_block_status(), plus
updates for the new split return interface. But some code,
particularly bdrv_block_status(), gets a lot simpler because it no
longer has to mess with sectors. Likewise, mirror code no longer
computes s->granularity >> BDRV_SECTOR_BITS, and can therefore drop
an assertion about alignment because the loop no longer depends on
alignment (never mind that we don't really have a driver that
reports sub-sector alignments, so it's not really possible to test
the effect of sub-sector mirroring). Fix a neighboring assertion to
use is_power_of_2 while there.
For ease of review, bdrv_get_block_status() was tackled separately.
Signed-off-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-10-12 06:47:08 +03:00
|
|
|
required += offset % cluster_size + pnum;
|
2017-07-05 15:57:35 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Take into account preallocation. Nothing special is needed for
|
|
|
|
* PREALLOC_MODE_METADATA since metadata is always counted.
|
|
|
|
*/
|
|
|
|
if (prealloc == PREALLOC_MODE_FULL || prealloc == PREALLOC_MODE_FALLOC) {
|
|
|
|
required = virtual_size;
|
|
|
|
}
|
|
|
|
|
qcow2: Expose bitmaps' size during measure
It's useful to know how much space can be occupied by qcow2 persistent
bitmaps, even though such metadata is unrelated to the guest-visible
data. Report this value as an additional QMP field, present when
measuring an existing image and output format that both support
bitmaps. Update iotest 178 and 190 to updated output, as well as new
coverage in 190 demonstrating non-zero values made possible with the
recently-added qemu-img bitmap command (see 3b51ab4b).
The new 'bitmaps size:' field is displayed automatically as part of
'qemu-img measure' any time it is present in QMP (that is, any time
both the source image being measured and destination format support
bitmaps, even if the measurement is 0 because there are no bitmaps
present). If the field is absent, it means that no bitmaps can be
copied (source, destination, or both lack bitmaps, including when
measuring based on size rather than on a source image). This behavior
is compatible with an upcoming patch adding 'qemu-img convert
--bitmaps': that command will fail in the same situations where this
patch omits the field.
The addition of a new field demonstrates why we should always
zero-initialize qapi C structs; while the qcow2 driver still fully
populates all fields, the raw and crypto drivers had to be tweaked to
avoid uninitialized data.
Consideration was also given towards having a 'qemu-img measure
--bitmaps' which errors out when bitmaps are not possible, and
otherwise sums the bitmaps into the existing allocation totals rather
than displaying as a separate field, as a potential convenience
factor. But this was ultimately decided to be more complexity than
necessary when the QMP interface was sufficient enough with bitmaps
remaining a separate field.
See also: https://bugzilla.redhat.com/1779904
Reported-by: Nir Soffer <nsoffer@redhat.com>
Signed-off-by: Eric Blake <eblake@redhat.com>
Message-Id: <20200521192137.1120211-3-eblake@redhat.com>
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
2020-05-21 22:21:34 +03:00
|
|
|
info = g_new0(BlockMeasureInfo, 1);
|
2020-07-10 19:13:11 +03:00
|
|
|
info->fully_allocated = luks_payload_size +
|
2017-07-05 15:57:35 +03:00
|
|
|
qcow2_calc_prealloc_size(virtual_size, cluster_size,
|
2020-07-10 19:13:11 +03:00
|
|
|
ctz32(refcount_bits), extended_l2);
|
2017-07-05 15:57:35 +03:00
|
|
|
|
qcow2: Expose bitmaps' size during measure
It's useful to know how much space can be occupied by qcow2 persistent
bitmaps, even though such metadata is unrelated to the guest-visible
data. Report this value as an additional QMP field, present when
measuring an existing image and output format that both support
bitmaps. Update iotest 178 and 190 to updated output, as well as new
coverage in 190 demonstrating non-zero values made possible with the
recently-added qemu-img bitmap command (see 3b51ab4b).
The new 'bitmaps size:' field is displayed automatically as part of
'qemu-img measure' any time it is present in QMP (that is, any time
both the source image being measured and destination format support
bitmaps, even if the measurement is 0 because there are no bitmaps
present). If the field is absent, it means that no bitmaps can be
copied (source, destination, or both lack bitmaps, including when
measuring based on size rather than on a source image). This behavior
is compatible with an upcoming patch adding 'qemu-img convert
--bitmaps': that command will fail in the same situations where this
patch omits the field.
The addition of a new field demonstrates why we should always
zero-initialize qapi C structs; while the qcow2 driver still fully
populates all fields, the raw and crypto drivers had to be tweaked to
avoid uninitialized data.
Consideration was also given towards having a 'qemu-img measure
--bitmaps' which errors out when bitmaps are not possible, and
otherwise sums the bitmaps into the existing allocation totals rather
than displaying as a separate field, as a potential convenience
factor. But this was ultimately decided to be more complexity than
necessary when the QMP interface was sufficient enough with bitmaps
remaining a separate field.
See also: https://bugzilla.redhat.com/1779904
Reported-by: Nir Soffer <nsoffer@redhat.com>
Signed-off-by: Eric Blake <eblake@redhat.com>
Message-Id: <20200521192137.1120211-3-eblake@redhat.com>
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
2020-05-21 22:21:34 +03:00
|
|
|
/*
|
|
|
|
* Remove data clusters that are not required. This overestimates the
|
2017-07-05 15:57:35 +03:00
|
|
|
* required size because metadata needed for the fully allocated file is
|
qcow2: Expose bitmaps' size during measure
It's useful to know how much space can be occupied by qcow2 persistent
bitmaps, even though such metadata is unrelated to the guest-visible
data. Report this value as an additional QMP field, present when
measuring an existing image and output format that both support
bitmaps. Update iotest 178 and 190 to updated output, as well as new
coverage in 190 demonstrating non-zero values made possible with the
recently-added qemu-img bitmap command (see 3b51ab4b).
The new 'bitmaps size:' field is displayed automatically as part of
'qemu-img measure' any time it is present in QMP (that is, any time
both the source image being measured and destination format support
bitmaps, even if the measurement is 0 because there are no bitmaps
present). If the field is absent, it means that no bitmaps can be
copied (source, destination, or both lack bitmaps, including when
measuring based on size rather than on a source image). This behavior
is compatible with an upcoming patch adding 'qemu-img convert
--bitmaps': that command will fail in the same situations where this
patch omits the field.
The addition of a new field demonstrates why we should always
zero-initialize qapi C structs; while the qcow2 driver still fully
populates all fields, the raw and crypto drivers had to be tweaked to
avoid uninitialized data.
Consideration was also given towards having a 'qemu-img measure
--bitmaps' which errors out when bitmaps are not possible, and
otherwise sums the bitmaps into the existing allocation totals rather
than displaying as a separate field, as a potential convenience
factor. But this was ultimately decided to be more complexity than
necessary when the QMP interface was sufficient enough with bitmaps
remaining a separate field.
See also: https://bugzilla.redhat.com/1779904
Reported-by: Nir Soffer <nsoffer@redhat.com>
Signed-off-by: Eric Blake <eblake@redhat.com>
Message-Id: <20200521192137.1120211-3-eblake@redhat.com>
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
2020-05-21 22:21:34 +03:00
|
|
|
* still counted. Show bitmaps only if both source and destination
|
|
|
|
* would support them.
|
2017-07-05 15:57:35 +03:00
|
|
|
*/
|
|
|
|
info->required = info->fully_allocated - virtual_size + required;
|
qcow2: Expose bitmaps' size during measure
It's useful to know how much space can be occupied by qcow2 persistent
bitmaps, even though such metadata is unrelated to the guest-visible
data. Report this value as an additional QMP field, present when
measuring an existing image and output format that both support
bitmaps. Update iotest 178 and 190 to updated output, as well as new
coverage in 190 demonstrating non-zero values made possible with the
recently-added qemu-img bitmap command (see 3b51ab4b).
The new 'bitmaps size:' field is displayed automatically as part of
'qemu-img measure' any time it is present in QMP (that is, any time
both the source image being measured and destination format support
bitmaps, even if the measurement is 0 because there are no bitmaps
present). If the field is absent, it means that no bitmaps can be
copied (source, destination, or both lack bitmaps, including when
measuring based on size rather than on a source image). This behavior
is compatible with an upcoming patch adding 'qemu-img convert
--bitmaps': that command will fail in the same situations where this
patch omits the field.
The addition of a new field demonstrates why we should always
zero-initialize qapi C structs; while the qcow2 driver still fully
populates all fields, the raw and crypto drivers had to be tweaked to
avoid uninitialized data.
Consideration was also given towards having a 'qemu-img measure
--bitmaps' which errors out when bitmaps are not possible, and
otherwise sums the bitmaps into the existing allocation totals rather
than displaying as a separate field, as a potential convenience
factor. But this was ultimately decided to be more complexity than
necessary when the QMP interface was sufficient enough with bitmaps
remaining a separate field.
See also: https://bugzilla.redhat.com/1779904
Reported-by: Nir Soffer <nsoffer@redhat.com>
Signed-off-by: Eric Blake <eblake@redhat.com>
Message-Id: <20200521192137.1120211-3-eblake@redhat.com>
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
2020-05-21 22:21:34 +03:00
|
|
|
info->has_bitmaps = version >= 3 && in_bs &&
|
|
|
|
bdrv_supports_persistent_dirty_bitmap(in_bs);
|
|
|
|
if (info->has_bitmaps) {
|
|
|
|
info->bitmaps = qcow2_get_persistent_dirty_bitmap_size(in_bs,
|
|
|
|
cluster_size);
|
|
|
|
}
|
2017-07-05 15:57:35 +03:00
|
|
|
return info;
|
|
|
|
|
|
|
|
err:
|
|
|
|
error_propagate(errp, local_err);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2010-12-17 18:02:39 +03:00
|
|
|
static int qcow2_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
|
2010-04-24 00:19:47 +04:00
|
|
|
{
|
2015-09-07 18:12:56 +03:00
|
|
|
BDRVQcow2State *s = bs->opaque;
|
2010-04-24 00:19:47 +04:00
|
|
|
bdi->cluster_size = s->cluster_size;
|
2010-12-17 18:02:39 +03:00
|
|
|
bdi->vm_state_offset = qcow2_vm_state_offset(s);
|
2021-05-04 19:06:56 +03:00
|
|
|
bdi->is_dirty = s->incompatible_features & QCOW2_INCOMPAT_DIRTY;
|
2010-04-24 00:19:47 +04:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2019-02-08 18:06:06 +03:00
|
|
|
static ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *bs,
|
|
|
|
Error **errp)
|
2013-10-09 12:46:18 +04:00
|
|
|
{
|
2015-09-07 18:12:56 +03:00
|
|
|
BDRVQcow2State *s = bs->opaque;
|
2017-06-23 19:24:18 +03:00
|
|
|
ImageInfoSpecific *spec_info;
|
|
|
|
QCryptoBlockInfo *encrypt_info = NULL;
|
2013-10-09 12:46:18 +04:00
|
|
|
|
2017-06-23 19:24:18 +03:00
|
|
|
if (s->crypto != NULL) {
|
2021-02-02 15:49:50 +03:00
|
|
|
encrypt_info = qcrypto_block_get_info(s->crypto, errp);
|
|
|
|
if (!encrypt_info) {
|
2019-02-08 18:06:06 +03:00
|
|
|
return NULL;
|
|
|
|
}
|
2017-06-23 19:24:18 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
spec_info = g_new(ImageInfoSpecific, 1);
|
2013-10-09 12:46:18 +04:00
|
|
|
*spec_info = (ImageInfoSpecific){
|
2015-10-27 01:34:54 +03:00
|
|
|
.type = IMAGE_INFO_SPECIFIC_KIND_QCOW2,
|
2019-02-08 18:06:07 +03:00
|
|
|
.u.qcow2.data = g_new0(ImageInfoSpecificQCow2, 1),
|
2013-10-09 12:46:18 +04:00
|
|
|
};
|
|
|
|
if (s->qcow_version == 2) {
|
qapi: Don't special-case simple union wrappers
Simple unions were carrying a special case that hid their 'data'
QMP member from the resulting C struct, via the hack method
QAPISchemaObjectTypeVariant.simple_union_type(). But by using
the work we started by unboxing flat union and alternate
branches, coupled with the ability to visit the members of an
implicit type, we can now expose the simple union's implicit
type in qapi-types.h:
| struct q_obj_ImageInfoSpecificQCow2_wrapper {
| ImageInfoSpecificQCow2 *data;
| };
|
| struct q_obj_ImageInfoSpecificVmdk_wrapper {
| ImageInfoSpecificVmdk *data;
| };
...
| struct ImageInfoSpecific {
| ImageInfoSpecificKind type;
| union { /* union tag is @type */
| void *data;
|- ImageInfoSpecificQCow2 *qcow2;
|- ImageInfoSpecificVmdk *vmdk;
|+ q_obj_ImageInfoSpecificQCow2_wrapper qcow2;
|+ q_obj_ImageInfoSpecificVmdk_wrapper vmdk;
| } u;
| };
Doing this removes asymmetry between QAPI's QMP side and its
C side (both sides now expose 'data'), and means that the
treatment of a simple union as sugar for a flat union is now
equivalent in both languages (previously the two approaches used
a different layer of dereferencing, where the simple union could
be converted to a flat union with equivalent C layout but
different {} on the wire, or to an equivalent QMP wire form
but with different C representation). Using the implicit type
also lets us get rid of the simple_union_type() hack.
Of course, now all clients of simple unions have to adjust from
using su->u.member to using su->u.member.data; while this touches
a number of files in the tree, some earlier cleanup patches
helped minimize the change to the initialization of a temporary
variable rather than every single member access. The generated
qapi-visit.c code is also affected by the layout change:
|@@ -7393,10 +7393,10 @@ void visit_type_ImageInfoSpecific_member
| }
| switch (obj->type) {
| case IMAGE_INFO_SPECIFIC_KIND_QCOW2:
|- visit_type_ImageInfoSpecificQCow2(v, "data", &obj->u.qcow2, &err);
|+ visit_type_q_obj_ImageInfoSpecificQCow2_wrapper_members(v, &obj->u.qcow2, &err);
| break;
| case IMAGE_INFO_SPECIFIC_KIND_VMDK:
|- visit_type_ImageInfoSpecificVmdk(v, "data", &obj->u.vmdk, &err);
|+ visit_type_q_obj_ImageInfoSpecificVmdk_wrapper_members(v, &obj->u.vmdk, &err);
| break;
| default:
| abort();
Signed-off-by: Eric Blake <eblake@redhat.com>
Message-Id: <1458254921-17042-13-git-send-email-eblake@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2016-03-18 01:48:37 +03:00
|
|
|
*spec_info->u.qcow2.data = (ImageInfoSpecificQCow2){
|
2015-02-10 23:28:44 +03:00
|
|
|
.compat = g_strdup("0.10"),
|
|
|
|
.refcount_bits = s->refcount_bits,
|
2013-10-09 12:46:18 +04:00
|
|
|
};
|
|
|
|
} else if (s->qcow_version == 3) {
|
2019-02-08 18:06:07 +03:00
|
|
|
Qcow2BitmapInfoList *bitmaps;
|
2021-02-02 15:49:50 +03:00
|
|
|
if (!qcow2_get_bitmap_info_list(bs, &bitmaps, errp)) {
|
2019-02-08 18:06:07 +03:00
|
|
|
qapi_free_ImageInfoSpecific(spec_info);
|
2020-03-20 21:36:20 +03:00
|
|
|
qapi_free_QCryptoBlockInfo(encrypt_info);
|
2019-02-08 18:06:07 +03:00
|
|
|
return NULL;
|
|
|
|
}
|
qapi: Don't special-case simple union wrappers
Simple unions were carrying a special case that hid their 'data'
QMP member from the resulting C struct, via the hack method
QAPISchemaObjectTypeVariant.simple_union_type(). But by using
the work we started by unboxing flat union and alternate
branches, coupled with the ability to visit the members of an
implicit type, we can now expose the simple union's implicit
type in qapi-types.h:
| struct q_obj_ImageInfoSpecificQCow2_wrapper {
| ImageInfoSpecificQCow2 *data;
| };
|
| struct q_obj_ImageInfoSpecificVmdk_wrapper {
| ImageInfoSpecificVmdk *data;
| };
...
| struct ImageInfoSpecific {
| ImageInfoSpecificKind type;
| union { /* union tag is @type */
| void *data;
|- ImageInfoSpecificQCow2 *qcow2;
|- ImageInfoSpecificVmdk *vmdk;
|+ q_obj_ImageInfoSpecificQCow2_wrapper qcow2;
|+ q_obj_ImageInfoSpecificVmdk_wrapper vmdk;
| } u;
| };
Doing this removes asymmetry between QAPI's QMP side and its
C side (both sides now expose 'data'), and means that the
treatment of a simple union as sugar for a flat union is now
equivalent in both languages (previously the two approaches used
a different layer of dereferencing, where the simple union could
be converted to a flat union with equivalent C layout but
different {} on the wire, or to an equivalent QMP wire form
but with different C representation). Using the implicit type
also lets us get rid of the simple_union_type() hack.
Of course, now all clients of simple unions have to adjust from
using su->u.member to using su->u.member.data; while this touches
a number of files in the tree, some earlier cleanup patches
helped minimize the change to the initialization of a temporary
variable rather than every single member access. The generated
qapi-visit.c code is also affected by the layout change:
|@@ -7393,10 +7393,10 @@ void visit_type_ImageInfoSpecific_member
| }
| switch (obj->type) {
| case IMAGE_INFO_SPECIFIC_KIND_QCOW2:
|- visit_type_ImageInfoSpecificQCow2(v, "data", &obj->u.qcow2, &err);
|+ visit_type_q_obj_ImageInfoSpecificQCow2_wrapper_members(v, &obj->u.qcow2, &err);
| break;
| case IMAGE_INFO_SPECIFIC_KIND_VMDK:
|- visit_type_ImageInfoSpecificVmdk(v, "data", &obj->u.vmdk, &err);
|+ visit_type_q_obj_ImageInfoSpecificVmdk_wrapper_members(v, &obj->u.vmdk, &err);
| break;
| default:
| abort();
Signed-off-by: Eric Blake <eblake@redhat.com>
Message-Id: <1458254921-17042-13-git-send-email-eblake@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2016-03-18 01:48:37 +03:00
|
|
|
*spec_info->u.qcow2.data = (ImageInfoSpecificQCow2){
|
2013-10-09 12:46:18 +04:00
|
|
|
.compat = g_strdup("1.1"),
|
|
|
|
.lazy_refcounts = s->compatible_features &
|
|
|
|
QCOW2_COMPAT_LAZY_REFCOUNTS,
|
|
|
|
.has_lazy_refcounts = true,
|
2014-09-30 23:31:28 +04:00
|
|
|
.corrupt = s->incompatible_features &
|
|
|
|
QCOW2_INCOMPAT_CORRUPT,
|
|
|
|
.has_corrupt = true,
|
2020-07-10 19:13:13 +03:00
|
|
|
.has_extended_l2 = true,
|
|
|
|
.extended_l2 = has_subclusters(s),
|
2015-02-10 23:28:44 +03:00
|
|
|
.refcount_bits = s->refcount_bits,
|
2019-02-08 18:06:07 +03:00
|
|
|
.has_bitmaps = !!bitmaps,
|
|
|
|
.bitmaps = bitmaps,
|
2019-01-15 21:02:40 +03:00
|
|
|
.has_data_file = !!s->image_data_file,
|
|
|
|
.data_file = g_strdup(s->image_data_file),
|
2019-02-22 16:29:38 +03:00
|
|
|
.has_data_file_raw = has_data_file(bs),
|
|
|
|
.data_file_raw = data_file_is_raw(bs),
|
qcow2: introduce compression type feature
The patch adds some preparation parts for incompatible compression type
feature to qcow2 allowing the use different compression methods for
image clusters (de)compressing.
It is implied that the compression type is set on the image creation and
can be changed only later by image conversion, thus compression type
defines the only compression algorithm used for the image, and thus,
for all image clusters.
The goal of the feature is to add support of other compression methods
to qcow2. For example, ZSTD which is more effective on compression than ZLIB.
The default compression is ZLIB. Images created with ZLIB compression type
are backward compatible with older qemu versions.
Adding of the compression type breaks a number of tests because now the
compression type is reported on image creation and there are some changes
in the qcow2 header in size and offsets.
The tests are fixed in the following ways:
* filter out compression_type for many tests
* fix header size, feature table size and backing file offset
affected tests: 031, 036, 061, 080
header_size +=8: 1 byte compression type
7 bytes padding
feature_table += 48: incompatible feature compression type
backing_file_offset += 56 (8 + 48 -> header_change + feature_table_change)
* add "compression type" for test output matching when it isn't filtered
affected tests: 049, 060, 061, 065, 082, 085, 144, 182, 185, 198, 206,
242, 255, 274, 280
Signed-off-by: Denis Plotnikov <dplotnikov@virtuozzo.com>
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
QAPI part:
Acked-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <20200507082521.29210-2-dplotnikov@virtuozzo.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2020-05-07 11:25:18 +03:00
|
|
|
.compression_type = s->compression_type,
|
2013-10-09 12:46:18 +04:00
|
|
|
};
|
2015-12-10 12:55:48 +03:00
|
|
|
} else {
|
|
|
|
/* if this assertion fails, this probably means a new version was
|
|
|
|
* added without having it covered here */
|
|
|
|
assert(false);
|
2013-10-09 12:46:18 +04:00
|
|
|
}
|
|
|
|
|
2017-06-23 19:24:18 +03:00
|
|
|
if (encrypt_info) {
|
|
|
|
ImageInfoSpecificQCow2Encryption *qencrypt =
|
|
|
|
g_new(ImageInfoSpecificQCow2Encryption, 1);
|
|
|
|
switch (encrypt_info->format) {
|
|
|
|
case Q_CRYPTO_BLOCK_FORMAT_QCOW:
|
|
|
|
qencrypt->format = BLOCKDEV_QCOW2_ENCRYPTION_FORMAT_AES;
|
|
|
|
break;
|
|
|
|
case Q_CRYPTO_BLOCK_FORMAT_LUKS:
|
|
|
|
qencrypt->format = BLOCKDEV_QCOW2_ENCRYPTION_FORMAT_LUKS;
|
|
|
|
qencrypt->u.luks = encrypt_info->u.luks;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
abort();
|
|
|
|
}
|
|
|
|
/* Since we did shallow copy above, erase any pointers
|
|
|
|
* in the original info */
|
|
|
|
memset(&encrypt_info->u, 0, sizeof(encrypt_info->u));
|
|
|
|
qapi_free_QCryptoBlockInfo(encrypt_info);
|
|
|
|
|
|
|
|
spec_info->u.qcow2.data->has_encrypt = true;
|
|
|
|
spec_info->u.qcow2.data->encrypt = qencrypt;
|
|
|
|
}
|
|
|
|
|
2013-10-09 12:46:18 +04:00
|
|
|
return spec_info;
|
|
|
|
}
|
|
|
|
|
2019-07-24 20:12:34 +03:00
|
|
|
static int qcow2_has_zero_init(BlockDriverState *bs)
|
|
|
|
{
|
|
|
|
BDRVQcow2State *s = bs->opaque;
|
|
|
|
bool preallocated;
|
|
|
|
|
|
|
|
if (qemu_in_coroutine()) {
|
|
|
|
qemu_co_mutex_lock(&s->lock);
|
|
|
|
}
|
|
|
|
/*
|
|
|
|
* Check preallocation status: Preallocated images have all L2
|
|
|
|
* tables allocated, nonpreallocated images have none. It is
|
|
|
|
* therefore enough to check the first one.
|
|
|
|
*/
|
|
|
|
preallocated = s->l1_size > 0 && s->l1_table[0] != 0;
|
|
|
|
if (qemu_in_coroutine()) {
|
|
|
|
qemu_co_mutex_unlock(&s->lock);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!preallocated) {
|
|
|
|
return 1;
|
|
|
|
} else if (bs->encrypted) {
|
|
|
|
return 0;
|
|
|
|
} else {
|
|
|
|
return bdrv_has_zero_init(s->data_file->bs);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-09-03 13:27:58 +03:00
|
|
|
/*
|
|
|
|
* Check the request to vmstate. On success return
|
|
|
|
* qcow2_vm_state_offset(bs) + @pos
|
|
|
|
*/
|
|
|
|
static int64_t qcow2_check_vmstate_request(BlockDriverState *bs,
|
|
|
|
QEMUIOVector *qiov, int64_t pos)
|
|
|
|
{
|
|
|
|
BDRVQcow2State *s = bs->opaque;
|
|
|
|
int64_t vmstate_offset = qcow2_vm_state_offset(s);
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
/* Incoming requests must be OK */
|
|
|
|
bdrv_check_qiov_request(pos, qiov->size, qiov, 0, &error_abort);
|
|
|
|
|
|
|
|
if (INT64_MAX - pos < vmstate_offset) {
|
|
|
|
return -EIO;
|
|
|
|
}
|
|
|
|
|
|
|
|
pos += vmstate_offset;
|
|
|
|
ret = bdrv_check_qiov_request(pos, qiov->size, qiov, 0, NULL);
|
|
|
|
if (ret < 0) {
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
return pos;
|
|
|
|
}
|
|
|
|
|
2013-04-05 23:27:53 +04:00
|
|
|
static int qcow2_save_vmstate(BlockDriverState *bs, QEMUIOVector *qiov,
|
|
|
|
int64_t pos)
|
2010-04-24 00:19:47 +04:00
|
|
|
{
|
2021-09-03 13:27:58 +03:00
|
|
|
int64_t offset = qcow2_check_vmstate_request(bs, qiov, pos);
|
|
|
|
if (offset < 0) {
|
|
|
|
return offset;
|
|
|
|
}
|
2010-04-24 00:19:47 +04:00
|
|
|
|
2010-04-14 16:17:38 +04:00
|
|
|
BLKDBG_EVENT(bs->file, BLKDBG_VMSTATE_SAVE);
|
2021-09-03 13:27:58 +03:00
|
|
|
return bs->drv->bdrv_co_pwritev_part(bs, offset, qiov->size, qiov, 0, 0);
|
2010-04-24 00:19:47 +04:00
|
|
|
}
|
|
|
|
|
2016-06-09 17:50:16 +03:00
|
|
|
static int qcow2_load_vmstate(BlockDriverState *bs, QEMUIOVector *qiov,
|
|
|
|
int64_t pos)
|
2010-04-24 00:19:47 +04:00
|
|
|
{
|
2021-09-03 13:27:58 +03:00
|
|
|
int64_t offset = qcow2_check_vmstate_request(bs, qiov, pos);
|
|
|
|
if (offset < 0) {
|
|
|
|
return offset;
|
|
|
|
}
|
2010-04-24 00:19:47 +04:00
|
|
|
|
2010-04-14 16:17:38 +04:00
|
|
|
BLKDBG_EVENT(bs->file, BLKDBG_VMSTATE_LOAD);
|
2021-09-03 13:27:58 +03:00
|
|
|
return bs->drv->bdrv_co_preadv_part(bs, offset, qiov->size, qiov, 0, 0);
|
2010-04-24 00:19:47 +04:00
|
|
|
}
|
|
|
|
|
2013-09-03 12:09:54 +04:00
|
|
|
/*
|
|
|
|
* Downgrades an image's version. To achieve this, any incompatible features
|
|
|
|
* have to be removed.
|
|
|
|
*/
|
2014-10-27 13:12:53 +03:00
|
|
|
static int qcow2_downgrade(BlockDriverState *bs, int target_version,
|
2018-05-10 00:00:18 +03:00
|
|
|
BlockDriverAmendStatusCB *status_cb, void *cb_opaque,
|
|
|
|
Error **errp)
|
2013-09-03 12:09:54 +04:00
|
|
|
{
|
2015-09-07 18:12:56 +03:00
|
|
|
BDRVQcow2State *s = bs->opaque;
|
2013-09-03 12:09:54 +04:00
|
|
|
int current_version = s->qcow_version;
|
|
|
|
int ret;
|
qcow2: Allow resize of images with internal snapshots
We originally refused to allow resize of images with internal
snapshots because the v2 image format did not require the tracking of
snapshot size, making it impossible to safely revert to a snapshot
with a different size than the current view of the image. But the
snapshot size tracking was rectified in v3, and our recent fixes to
qemu-img amend (see 0a85af35) guarantee that we always have a valid
snapshot size. Thus, we no longer need to artificially limit image
resizes, but it does become one more thing that would prevent a
downgrade back to v2. And now that we support different-sized
snapshots, it's also easy to fix reverting to a snapshot to apply the
new size.
Upgrade iotest 61 to cover this (we previously had NO coverage of
refusal to resize while snapshots exist). Note that the amend process
can fail but still have effects: in particular, since we break things
into upgrade, resize, downgrade, a failure during resize does not roll
back changes made during upgrade, nor does failure in downgrade roll
back a resize. But this situation is pre-existing even without this
patch; and without journaling, the best we could do is minimize the
chance of partial failure by collecting all changes prior to doing any
writes - which adds a lot of complexity but could still fail with EIO.
On the other hand, we are careful that even if we have partial
modification but then fail, the image is left viable (that is, we are
careful to sequence things so that after each successful cluster
write, there may be transient leaked clusters but no corrupt
metadata). And complicating the code to make it more transaction-like
is not worth the effort: a user can always request multiple 'qemu-img
amend' changing one thing each, if they need finer-grained control
over detecting the first failure than what they get by letting qemu
decide how to sequence multiple changes.
Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Message-Id: <20200428192648.749066-3-eblake@redhat.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2020-04-28 22:26:47 +03:00
|
|
|
int i;
|
2013-09-03 12:09:54 +04:00
|
|
|
|
2018-05-10 00:00:18 +03:00
|
|
|
/* This is qcow2_downgrade(), not qcow2_upgrade() */
|
|
|
|
assert(target_version < current_version);
|
|
|
|
|
|
|
|
/* There are no other versions (now) that you can downgrade to */
|
|
|
|
assert(target_version == 2);
|
2013-09-03 12:09:54 +04:00
|
|
|
|
|
|
|
if (s->refcount_order != 4) {
|
2018-05-10 00:00:18 +03:00
|
|
|
error_setg(errp, "compat=0.10 requires refcount_bits=16");
|
2013-09-03 12:09:54 +04:00
|
|
|
return -ENOTSUP;
|
|
|
|
}
|
|
|
|
|
2019-01-15 22:39:06 +03:00
|
|
|
if (has_data_file(bs)) {
|
|
|
|
error_setg(errp, "Cannot downgrade an image with a data file");
|
|
|
|
return -ENOTSUP;
|
|
|
|
}
|
|
|
|
|
qcow2: Allow resize of images with internal snapshots
We originally refused to allow resize of images with internal
snapshots because the v2 image format did not require the tracking of
snapshot size, making it impossible to safely revert to a snapshot
with a different size than the current view of the image. But the
snapshot size tracking was rectified in v3, and our recent fixes to
qemu-img amend (see 0a85af35) guarantee that we always have a valid
snapshot size. Thus, we no longer need to artificially limit image
resizes, but it does become one more thing that would prevent a
downgrade back to v2. And now that we support different-sized
snapshots, it's also easy to fix reverting to a snapshot to apply the
new size.
Upgrade iotest 61 to cover this (we previously had NO coverage of
refusal to resize while snapshots exist). Note that the amend process
can fail but still have effects: in particular, since we break things
into upgrade, resize, downgrade, a failure during resize does not roll
back changes made during upgrade, nor does failure in downgrade roll
back a resize. But this situation is pre-existing even without this
patch; and without journaling, the best we could do is minimize the
chance of partial failure by collecting all changes prior to doing any
writes - which adds a lot of complexity but could still fail with EIO.
On the other hand, we are careful that even if we have partial
modification but then fail, the image is left viable (that is, we are
careful to sequence things so that after each successful cluster
write, there may be transient leaked clusters but no corrupt
metadata). And complicating the code to make it more transaction-like
is not worth the effort: a user can always request multiple 'qemu-img
amend' changing one thing each, if they need finer-grained control
over detecting the first failure than what they get by letting qemu
decide how to sequence multiple changes.
Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Message-Id: <20200428192648.749066-3-eblake@redhat.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2020-04-28 22:26:47 +03:00
|
|
|
/*
|
|
|
|
* If any internal snapshot has a different size than the current
|
|
|
|
* image size, or VM state size that exceeds 32 bits, downgrading
|
|
|
|
* is unsafe. Even though we would still use v3-compliant output
|
|
|
|
* to preserve that data, other v2 programs might not realize
|
|
|
|
* those optional fields are important.
|
|
|
|
*/
|
|
|
|
for (i = 0; i < s->nb_snapshots; i++) {
|
|
|
|
if (s->snapshots[i].vm_state_size > UINT32_MAX ||
|
|
|
|
s->snapshots[i].disk_size != bs->total_sectors * BDRV_SECTOR_SIZE) {
|
|
|
|
error_setg(errp, "Internal snapshots prevent downgrade of image");
|
|
|
|
return -ENOTSUP;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-09-03 12:09:54 +04:00
|
|
|
/* clear incompatible features */
|
|
|
|
if (s->incompatible_features & QCOW2_INCOMPAT_DIRTY) {
|
|
|
|
ret = qcow2_mark_clean(bs);
|
|
|
|
if (ret < 0) {
|
2018-05-10 00:00:18 +03:00
|
|
|
error_setg_errno(errp, -ret, "Failed to make the image clean");
|
2013-09-03 12:09:54 +04:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* with QCOW2_INCOMPAT_CORRUPT, it is pretty much impossible to get here in
|
|
|
|
* the first place; if that happens nonetheless, returning -ENOTSUP is the
|
|
|
|
* best thing to do anyway */
|
|
|
|
|
|
|
|
if (s->incompatible_features) {
|
2018-05-10 00:00:18 +03:00
|
|
|
error_setg(errp, "Cannot downgrade an image with incompatible features "
|
|
|
|
"%#" PRIx64 " set", s->incompatible_features);
|
2013-09-03 12:09:54 +04:00
|
|
|
return -ENOTSUP;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* since we can ignore compatible features, we can set them to 0 as well */
|
|
|
|
s->compatible_features = 0;
|
|
|
|
/* if lazy refcounts have been used, they have already been fixed through
|
|
|
|
* clearing the dirty flag */
|
|
|
|
|
|
|
|
/* clearing autoclear features is trivial */
|
|
|
|
s->autoclear_features = 0;
|
|
|
|
|
2015-07-27 18:51:32 +03:00
|
|
|
ret = qcow2_expand_zero_clusters(bs, status_cb, cb_opaque);
|
2013-09-03 12:09:54 +04:00
|
|
|
if (ret < 0) {
|
2018-05-10 00:00:18 +03:00
|
|
|
error_setg_errno(errp, -ret, "Failed to turn zero into data clusters");
|
2013-09-03 12:09:54 +04:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
s->qcow_version = target_version;
|
|
|
|
ret = qcow2_update_header(bs);
|
|
|
|
if (ret < 0) {
|
|
|
|
s->qcow_version = current_version;
|
2018-05-10 00:00:18 +03:00
|
|
|
error_setg_errno(errp, -ret, "Failed to update the image header");
|
2013-09-03 12:09:54 +04:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2019-10-11 18:28:04 +03:00
|
|
|
/*
|
|
|
|
* Upgrades an image's version. While newer versions encompass all
|
|
|
|
* features of older versions, some things may have to be presented
|
|
|
|
* differently.
|
|
|
|
*/
|
|
|
|
static int qcow2_upgrade(BlockDriverState *bs, int target_version,
|
|
|
|
BlockDriverAmendStatusCB *status_cb, void *cb_opaque,
|
|
|
|
Error **errp)
|
|
|
|
{
|
|
|
|
BDRVQcow2State *s = bs->opaque;
|
2019-10-11 18:28:05 +03:00
|
|
|
bool need_snapshot_update;
|
2019-10-11 18:28:04 +03:00
|
|
|
int current_version = s->qcow_version;
|
2019-10-11 18:28:05 +03:00
|
|
|
int i;
|
2019-10-11 18:28:04 +03:00
|
|
|
int ret;
|
|
|
|
|
|
|
|
/* This is qcow2_upgrade(), not qcow2_downgrade() */
|
|
|
|
assert(target_version > current_version);
|
|
|
|
|
|
|
|
/* There are no other versions (yet) that you can upgrade to */
|
|
|
|
assert(target_version == 3);
|
|
|
|
|
2019-10-11 18:28:05 +03:00
|
|
|
status_cb(bs, 0, 2, cb_opaque);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* In v2, snapshots do not need to have extra data. v3 requires
|
|
|
|
* the 64-bit VM state size and the virtual disk size to be
|
|
|
|
* present.
|
|
|
|
* qcow2_write_snapshots() will always write the list in the
|
|
|
|
* v3-compliant format.
|
|
|
|
*/
|
|
|
|
need_snapshot_update = false;
|
|
|
|
for (i = 0; i < s->nb_snapshots; i++) {
|
|
|
|
if (s->snapshots[i].extra_data_size <
|
|
|
|
sizeof_field(QCowSnapshotExtraData, vm_state_size_large) +
|
|
|
|
sizeof_field(QCowSnapshotExtraData, disk_size))
|
|
|
|
{
|
|
|
|
need_snapshot_update = true;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (need_snapshot_update) {
|
|
|
|
ret = qcow2_write_snapshots(bs);
|
|
|
|
if (ret < 0) {
|
|
|
|
error_setg_errno(errp, -ret, "Failed to update the snapshot table");
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
status_cb(bs, 1, 2, cb_opaque);
|
2019-10-11 18:28:04 +03:00
|
|
|
|
|
|
|
s->qcow_version = target_version;
|
|
|
|
ret = qcow2_update_header(bs);
|
|
|
|
if (ret < 0) {
|
|
|
|
s->qcow_version = current_version;
|
|
|
|
error_setg_errno(errp, -ret, "Failed to update the image header");
|
|
|
|
return ret;
|
|
|
|
}
|
2019-10-11 18:28:05 +03:00
|
|
|
status_cb(bs, 2, 2, cb_opaque);
|
2019-10-11 18:28:04 +03:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2015-07-27 18:51:36 +03:00
|
|
|
typedef enum Qcow2AmendOperation {
|
|
|
|
/* This is the value Qcow2AmendHelperCBInfo::last_operation will be
|
|
|
|
* statically initialized to so that the helper CB can discern the first
|
|
|
|
* invocation from an operation change */
|
|
|
|
QCOW2_NO_OPERATION = 0,
|
|
|
|
|
2019-10-11 18:28:04 +03:00
|
|
|
QCOW2_UPGRADING,
|
2020-06-25 15:55:43 +03:00
|
|
|
QCOW2_UPDATING_ENCRYPTION,
|
2015-07-27 18:51:38 +03:00
|
|
|
QCOW2_CHANGING_REFCOUNT_ORDER,
|
2015-07-27 18:51:36 +03:00
|
|
|
QCOW2_DOWNGRADING,
|
|
|
|
} Qcow2AmendOperation;
|
|
|
|
|
|
|
|
typedef struct Qcow2AmendHelperCBInfo {
|
|
|
|
/* The code coordinating the amend operations should only modify
|
|
|
|
* these four fields; the rest will be managed by the CB */
|
|
|
|
BlockDriverAmendStatusCB *original_status_cb;
|
|
|
|
void *original_cb_opaque;
|
|
|
|
|
|
|
|
Qcow2AmendOperation current_operation;
|
|
|
|
|
|
|
|
/* Total number of operations to perform (only set once) */
|
|
|
|
int total_operations;
|
|
|
|
|
|
|
|
/* The following fields are managed by the CB */
|
|
|
|
|
|
|
|
/* Number of operations completed */
|
|
|
|
int operations_completed;
|
|
|
|
|
|
|
|
/* Cumulative offset of all completed operations */
|
|
|
|
int64_t offset_completed;
|
|
|
|
|
|
|
|
Qcow2AmendOperation last_operation;
|
|
|
|
int64_t last_work_size;
|
|
|
|
} Qcow2AmendHelperCBInfo;
|
|
|
|
|
|
|
|
static void qcow2_amend_helper_cb(BlockDriverState *bs,
|
|
|
|
int64_t operation_offset,
|
|
|
|
int64_t operation_work_size, void *opaque)
|
|
|
|
{
|
|
|
|
Qcow2AmendHelperCBInfo *info = opaque;
|
|
|
|
int64_t current_work_size;
|
|
|
|
int64_t projected_work_size;
|
|
|
|
|
|
|
|
if (info->current_operation != info->last_operation) {
|
|
|
|
if (info->last_operation != QCOW2_NO_OPERATION) {
|
|
|
|
info->offset_completed += info->last_work_size;
|
|
|
|
info->operations_completed++;
|
|
|
|
}
|
|
|
|
|
|
|
|
info->last_operation = info->current_operation;
|
|
|
|
}
|
|
|
|
|
|
|
|
assert(info->total_operations > 0);
|
|
|
|
assert(info->operations_completed < info->total_operations);
|
|
|
|
|
|
|
|
info->last_work_size = operation_work_size;
|
|
|
|
|
|
|
|
current_work_size = info->offset_completed + operation_work_size;
|
|
|
|
|
|
|
|
/* current_work_size is the total work size for (operations_completed + 1)
|
|
|
|
* operations (which includes this one), so multiply it by the number of
|
|
|
|
* operations not covered and divide it by the number of operations
|
|
|
|
* covered to get a projection for the operations not covered */
|
|
|
|
projected_work_size = current_work_size * (info->total_operations -
|
|
|
|
info->operations_completed - 1)
|
|
|
|
/ (info->operations_completed + 1);
|
|
|
|
|
|
|
|
info->original_status_cb(bs, info->offset_completed + operation_offset,
|
|
|
|
current_work_size + projected_work_size,
|
|
|
|
info->original_cb_opaque);
|
|
|
|
}
|
|
|
|
|
2014-10-27 13:12:50 +03:00
|
|
|
static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts,
|
2015-07-27 18:51:32 +03:00
|
|
|
BlockDriverAmendStatusCB *status_cb,
|
2018-05-10 00:00:18 +03:00
|
|
|
void *cb_opaque,
|
2020-06-25 15:55:38 +03:00
|
|
|
bool force,
|
2018-05-10 00:00:18 +03:00
|
|
|
Error **errp)
|
2013-09-03 12:09:54 +04:00
|
|
|
{
|
2015-09-07 18:12:56 +03:00
|
|
|
BDRVQcow2State *s = bs->opaque;
|
2013-09-03 12:09:54 +04:00
|
|
|
int old_version = s->qcow_version, new_version = old_version;
|
|
|
|
uint64_t new_size = 0;
|
2019-01-15 21:02:40 +03:00
|
|
|
const char *backing_file = NULL, *backing_format = NULL, *data_file = NULL;
|
2013-09-03 12:09:54 +04:00
|
|
|
bool lazy_refcounts = s->use_lazy_refcounts;
|
2019-02-22 16:29:38 +03:00
|
|
|
bool data_file_raw = data_file_is_raw(bs);
|
2014-06-05 13:20:59 +04:00
|
|
|
const char *compat = NULL;
|
2015-07-27 18:51:38 +03:00
|
|
|
int refcount_bits = s->refcount_bits;
|
2013-09-03 12:09:54 +04:00
|
|
|
int ret;
|
2014-06-05 13:20:59 +04:00
|
|
|
QemuOptDesc *desc = opts->list->desc;
|
2015-07-27 18:51:36 +03:00
|
|
|
Qcow2AmendHelperCBInfo helper_cb_info;
|
2020-06-25 15:55:43 +03:00
|
|
|
bool encryption_update = false;
|
2013-09-03 12:09:54 +04:00
|
|
|
|
2014-06-05 13:20:59 +04:00
|
|
|
while (desc && desc->name) {
|
|
|
|
if (!qemu_opt_find(opts, desc->name)) {
|
2013-09-03 12:09:54 +04:00
|
|
|
/* only change explicitly defined options */
|
2014-06-05 13:20:59 +04:00
|
|
|
desc++;
|
2013-09-03 12:09:54 +04:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2015-02-19 01:40:47 +03:00
|
|
|
if (!strcmp(desc->name, BLOCK_OPT_COMPAT_LEVEL)) {
|
|
|
|
compat = qemu_opt_get(opts, BLOCK_OPT_COMPAT_LEVEL);
|
2014-06-05 13:20:59 +04:00
|
|
|
if (!compat) {
|
2013-09-03 12:09:54 +04:00
|
|
|
/* preserve default */
|
2019-07-05 18:28:12 +03:00
|
|
|
} else if (!strcmp(compat, "0.10") || !strcmp(compat, "v2")) {
|
2013-09-03 12:09:54 +04:00
|
|
|
new_version = 2;
|
2019-07-05 18:28:12 +03:00
|
|
|
} else if (!strcmp(compat, "1.1") || !strcmp(compat, "v3")) {
|
2013-09-03 12:09:54 +04:00
|
|
|
new_version = 3;
|
|
|
|
} else {
|
2018-05-10 00:00:18 +03:00
|
|
|
error_setg(errp, "Unknown compatibility level %s", compat);
|
2013-09-03 12:09:54 +04:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
2015-02-19 01:40:47 +03:00
|
|
|
} else if (!strcmp(desc->name, BLOCK_OPT_SIZE)) {
|
|
|
|
new_size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, 0);
|
|
|
|
} else if (!strcmp(desc->name, BLOCK_OPT_BACKING_FILE)) {
|
|
|
|
backing_file = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE);
|
|
|
|
} else if (!strcmp(desc->name, BLOCK_OPT_BACKING_FMT)) {
|
|
|
|
backing_format = qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT);
|
2020-06-25 15:55:43 +03:00
|
|
|
} else if (g_str_has_prefix(desc->name, "encrypt.")) {
|
|
|
|
if (!s->crypto) {
|
|
|
|
error_setg(errp,
|
|
|
|
"Can't amend encryption options - encryption not present");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
if (s->crypt_method_header != QCOW_CRYPT_LUKS) {
|
|
|
|
error_setg(errp,
|
|
|
|
"Only LUKS encryption options can be amended");
|
|
|
|
return -ENOTSUP;
|
|
|
|
}
|
|
|
|
encryption_update = true;
|
2015-02-19 01:40:47 +03:00
|
|
|
} else if (!strcmp(desc->name, BLOCK_OPT_LAZY_REFCOUNTS)) {
|
|
|
|
lazy_refcounts = qemu_opt_get_bool(opts, BLOCK_OPT_LAZY_REFCOUNTS,
|
2014-06-05 13:20:59 +04:00
|
|
|
lazy_refcounts);
|
2015-02-19 01:40:49 +03:00
|
|
|
} else if (!strcmp(desc->name, BLOCK_OPT_REFCOUNT_BITS)) {
|
2015-07-27 18:51:38 +03:00
|
|
|
refcount_bits = qemu_opt_get_number(opts, BLOCK_OPT_REFCOUNT_BITS,
|
|
|
|
refcount_bits);
|
|
|
|
|
|
|
|
if (refcount_bits <= 0 || refcount_bits > 64 ||
|
|
|
|
!is_power_of_2(refcount_bits))
|
|
|
|
{
|
2018-05-10 00:00:18 +03:00
|
|
|
error_setg(errp, "Refcount width must be a power of two and "
|
|
|
|
"may not exceed 64 bits");
|
2015-07-27 18:51:38 +03:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
2019-01-15 21:02:40 +03:00
|
|
|
} else if (!strcmp(desc->name, BLOCK_OPT_DATA_FILE)) {
|
|
|
|
data_file = qemu_opt_get(opts, BLOCK_OPT_DATA_FILE);
|
|
|
|
if (data_file && !has_data_file(bs)) {
|
|
|
|
error_setg(errp, "data-file can only be set for images that "
|
|
|
|
"use an external data file");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
2019-02-22 16:29:38 +03:00
|
|
|
} else if (!strcmp(desc->name, BLOCK_OPT_DATA_FILE_RAW)) {
|
|
|
|
data_file_raw = qemu_opt_get_bool(opts, BLOCK_OPT_DATA_FILE_RAW,
|
|
|
|
data_file_raw);
|
|
|
|
if (data_file_raw && !data_file_is_raw(bs)) {
|
|
|
|
error_setg(errp, "data-file-raw cannot be set on existing "
|
|
|
|
"images");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
2013-09-03 12:09:54 +04:00
|
|
|
} else {
|
2015-07-27 18:51:34 +03:00
|
|
|
/* if this point is reached, this probably means a new option was
|
2013-09-03 12:09:54 +04:00
|
|
|
* added without having it covered here */
|
2015-07-27 18:51:34 +03:00
|
|
|
abort();
|
2013-09-03 12:09:54 +04:00
|
|
|
}
|
2014-06-05 13:20:59 +04:00
|
|
|
|
|
|
|
desc++;
|
2013-09-03 12:09:54 +04:00
|
|
|
}
|
|
|
|
|
2015-07-27 18:51:36 +03:00
|
|
|
helper_cb_info = (Qcow2AmendHelperCBInfo){
|
|
|
|
.original_status_cb = status_cb,
|
|
|
|
.original_cb_opaque = cb_opaque,
|
2019-10-11 18:28:04 +03:00
|
|
|
.total_operations = (new_version != old_version)
|
2020-06-25 15:55:43 +03:00
|
|
|
+ (s->refcount_bits != refcount_bits) +
|
|
|
|
(encryption_update == true)
|
2015-07-27 18:51:36 +03:00
|
|
|
};
|
|
|
|
|
2015-07-27 18:51:35 +03:00
|
|
|
/* Upgrade first (some features may require compat=1.1) */
|
|
|
|
if (new_version > old_version) {
|
2019-10-11 18:28:04 +03:00
|
|
|
helper_cb_info.current_operation = QCOW2_UPGRADING;
|
|
|
|
ret = qcow2_upgrade(bs, new_version, &qcow2_amend_helper_cb,
|
|
|
|
&helper_cb_info, errp);
|
2015-07-27 18:51:35 +03:00
|
|
|
if (ret < 0) {
|
|
|
|
return ret;
|
2013-09-03 12:09:54 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-06-25 15:55:43 +03:00
|
|
|
if (encryption_update) {
|
|
|
|
QDict *amend_opts_dict;
|
|
|
|
QCryptoBlockAmendOptions *amend_opts;
|
|
|
|
|
|
|
|
helper_cb_info.current_operation = QCOW2_UPDATING_ENCRYPTION;
|
|
|
|
amend_opts_dict = qcow2_extract_crypto_opts(opts, "luks", errp);
|
|
|
|
if (!amend_opts_dict) {
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
amend_opts = block_crypto_amend_opts_init(amend_opts_dict, errp);
|
|
|
|
qobject_unref(amend_opts_dict);
|
|
|
|
if (!amend_opts) {
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
ret = qcrypto_block_amend_options(s->crypto,
|
|
|
|
qcow2_crypto_hdr_read_func,
|
|
|
|
qcow2_crypto_hdr_write_func,
|
|
|
|
bs,
|
|
|
|
amend_opts,
|
|
|
|
force,
|
|
|
|
errp);
|
|
|
|
qapi_free_QCryptoBlockAmendOptions(amend_opts);
|
|
|
|
if (ret < 0) {
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-07-27 18:51:38 +03:00
|
|
|
if (s->refcount_bits != refcount_bits) {
|
|
|
|
int refcount_order = ctz32(refcount_bits);
|
|
|
|
|
|
|
|
if (new_version < 3 && refcount_bits != 16) {
|
2018-05-10 00:00:18 +03:00
|
|
|
error_setg(errp, "Refcount widths other than 16 bits require "
|
|
|
|
"compatibility level 1.1 or above (use compat=1.1 or "
|
|
|
|
"greater)");
|
2015-07-27 18:51:38 +03:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
helper_cb_info.current_operation = QCOW2_CHANGING_REFCOUNT_ORDER;
|
|
|
|
ret = qcow2_change_refcount_order(bs, refcount_order,
|
|
|
|
&qcow2_amend_helper_cb,
|
2018-05-10 00:00:18 +03:00
|
|
|
&helper_cb_info, errp);
|
2015-07-27 18:51:38 +03:00
|
|
|
if (ret < 0) {
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-02-22 16:29:38 +03:00
|
|
|
/* data-file-raw blocks backing files, so clear it first if requested */
|
|
|
|
if (data_file_raw) {
|
|
|
|
s->autoclear_features |= QCOW2_AUTOCLEAR_DATA_FILE_RAW;
|
|
|
|
} else {
|
|
|
|
s->autoclear_features &= ~QCOW2_AUTOCLEAR_DATA_FILE_RAW;
|
|
|
|
}
|
|
|
|
|
2019-01-15 21:02:40 +03:00
|
|
|
if (data_file) {
|
|
|
|
g_free(s->image_data_file);
|
|
|
|
s->image_data_file = *data_file ? g_strdup(data_file) : NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
ret = qcow2_update_header(bs);
|
|
|
|
if (ret < 0) {
|
|
|
|
error_setg_errno(errp, -ret, "Failed to update the image header");
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2013-09-03 12:09:54 +04:00
|
|
|
if (backing_file || backing_format) {
|
qcow2: Deprecate use of qemu-img amend to change backing file
The use of 'qemu-img amend' to change qcow2 backing files is not
tested very well. In particular, our implementation has a bug where
if a new backing file is provided without a format, then the prior
format is blindly reused, even if this results in data corruption, but
this is not caught by iotests.
There are also situations where amending other options needs access to
the original backing file (for example, on a downgrade to a v2 image,
knowing whether a v3 zero cluster must be allocated or may be left
unallocated depends on knowing whether the backing file already reads
as zero), but the command line does not have a nice way to tell us
both the backing file to use for opening the image as well as the
backing file to install after the operation is complete.
Even if we do allow changing the backing file, it is redundant with
the existing ability to change backing files via 'qemu-img rebase -u'.
It is time to deprecate this support (leaving the existing behavior
intact, even if it is buggy), and at a point in the future, require
the use of only 'qemu-img rebase' for adjusting backing chain
relations, saving 'qemu-img amend' for changes unrelated to the
backing chain.
Signed-off-by: Eric Blake <eblake@redhat.com>
Message-Id: <20200706203954.341758-8-eblake@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2020-07-06 23:39:51 +03:00
|
|
|
if (g_strcmp0(backing_file, s->image_backing_file) ||
|
|
|
|
g_strcmp0(backing_format, s->image_backing_format)) {
|
2021-05-04 00:35:59 +03:00
|
|
|
error_setg(errp, "Cannot amend the backing file");
|
|
|
|
error_append_hint(errp,
|
|
|
|
"You can use 'qemu-img rebase' instead.\n");
|
|
|
|
return -EINVAL;
|
2013-09-03 12:09:54 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (s->use_lazy_refcounts != lazy_refcounts) {
|
|
|
|
if (lazy_refcounts) {
|
2015-07-27 18:51:35 +03:00
|
|
|
if (new_version < 3) {
|
2018-05-10 00:00:18 +03:00
|
|
|
error_setg(errp, "Lazy refcounts only supported with "
|
|
|
|
"compatibility level 1.1 and above (use compat=1.1 "
|
|
|
|
"or greater)");
|
2013-09-03 12:09:54 +04:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
s->compatible_features |= QCOW2_COMPAT_LAZY_REFCOUNTS;
|
|
|
|
ret = qcow2_update_header(bs);
|
|
|
|
if (ret < 0) {
|
|
|
|
s->compatible_features &= ~QCOW2_COMPAT_LAZY_REFCOUNTS;
|
2018-05-10 00:00:18 +03:00
|
|
|
error_setg_errno(errp, -ret, "Failed to update the image header");
|
2013-09-03 12:09:54 +04:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
s->use_lazy_refcounts = true;
|
|
|
|
} else {
|
|
|
|
/* make image clean first */
|
|
|
|
ret = qcow2_mark_clean(bs);
|
|
|
|
if (ret < 0) {
|
2018-05-10 00:00:18 +03:00
|
|
|
error_setg_errno(errp, -ret, "Failed to make the image clean");
|
2013-09-03 12:09:54 +04:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
/* now disallow lazy refcounts */
|
|
|
|
s->compatible_features &= ~QCOW2_COMPAT_LAZY_REFCOUNTS;
|
|
|
|
ret = qcow2_update_header(bs);
|
|
|
|
if (ret < 0) {
|
|
|
|
s->compatible_features |= QCOW2_COMPAT_LAZY_REFCOUNTS;
|
2018-05-10 00:00:18 +03:00
|
|
|
error_setg_errno(errp, -ret, "Failed to update the image header");
|
2013-09-03 12:09:54 +04:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
s->use_lazy_refcounts = false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (new_size) {
|
2020-04-28 22:26:46 +03:00
|
|
|
BlockBackend *blk = blk_new_with_bs(bs, BLK_PERM_RESIZE, BLK_PERM_ALL,
|
|
|
|
errp);
|
|
|
|
if (!blk) {
|
|
|
|
return -EPERM;
|
2017-01-13 21:02:32 +03:00
|
|
|
}
|
|
|
|
|
2019-09-18 12:51:43 +03:00
|
|
|
/*
|
|
|
|
* Amending image options should ensure that the image has
|
|
|
|
* exactly the given new values, so pass exact=true here.
|
|
|
|
*/
|
2020-04-24 15:54:41 +03:00
|
|
|
ret = blk_truncate(blk, new_size, true, PREALLOC_MODE_OFF, 0, errp);
|
2017-02-17 12:58:25 +03:00
|
|
|
blk_unref(blk);
|
2013-09-03 12:09:54 +04:00
|
|
|
if (ret < 0) {
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-07-27 18:51:35 +03:00
|
|
|
/* Downgrade last (so unsupported features can be removed before) */
|
|
|
|
if (new_version < old_version) {
|
2015-07-27 18:51:36 +03:00
|
|
|
helper_cb_info.current_operation = QCOW2_DOWNGRADING;
|
|
|
|
ret = qcow2_downgrade(bs, new_version, &qcow2_amend_helper_cb,
|
2018-05-10 00:00:18 +03:00
|
|
|
&helper_cb_info, errp);
|
2015-07-27 18:51:35 +03:00
|
|
|
if (ret < 0) {
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-09-03 12:09:54 +04:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2020-06-25 15:55:47 +03:00
|
|
|
static int coroutine_fn qcow2_co_amend(BlockDriverState *bs,
|
|
|
|
BlockdevAmendOptions *opts,
|
|
|
|
bool force,
|
|
|
|
Error **errp)
|
|
|
|
{
|
|
|
|
BlockdevAmendOptionsQcow2 *qopts = &opts->u.qcow2;
|
|
|
|
BDRVQcow2State *s = bs->opaque;
|
|
|
|
int ret = 0;
|
|
|
|
|
|
|
|
if (qopts->has_encrypt) {
|
|
|
|
if (!s->crypto) {
|
|
|
|
error_setg(errp, "image is not encrypted, can't amend");
|
|
|
|
return -EOPNOTSUPP;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (qopts->encrypt->format != Q_CRYPTO_BLOCK_FORMAT_LUKS) {
|
|
|
|
error_setg(errp,
|
|
|
|
"Amend can't be used to change the qcow2 encryption format");
|
|
|
|
return -EOPNOTSUPP;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (s->crypt_method_header != QCOW_CRYPT_LUKS) {
|
|
|
|
error_setg(errp,
|
|
|
|
"Only LUKS encryption options can be amended for qcow2 with blockdev-amend");
|
|
|
|
return -EOPNOTSUPP;
|
|
|
|
}
|
|
|
|
|
|
|
|
ret = qcrypto_block_amend_options(s->crypto,
|
|
|
|
qcow2_crypto_hdr_read_func,
|
|
|
|
qcow2_crypto_hdr_write_func,
|
|
|
|
bs,
|
|
|
|
qopts->encrypt,
|
|
|
|
force,
|
|
|
|
errp);
|
|
|
|
}
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2014-09-05 18:07:16 +04:00
|
|
|
/*
|
|
|
|
* If offset or size are negative, respectively, they will not be included in
|
|
|
|
* the BLOCK_IMAGE_CORRUPTED event emitted.
|
|
|
|
* fatal will be ignored for read-only BDS; corruptions found there will always
|
|
|
|
* be considered non-fatal.
|
|
|
|
*/
|
|
|
|
void qcow2_signal_corruption(BlockDriverState *bs, bool fatal, int64_t offset,
|
|
|
|
int64_t size, const char *message_format, ...)
|
|
|
|
{
|
2015-09-07 18:12:56 +03:00
|
|
|
BDRVQcow2State *s = bs->opaque;
|
2015-04-08 12:29:20 +03:00
|
|
|
const char *node_name;
|
2014-09-05 18:07:16 +04:00
|
|
|
char *message;
|
|
|
|
va_list ap;
|
|
|
|
|
2018-06-06 22:37:01 +03:00
|
|
|
fatal = fatal && bdrv_is_writable(bs);
|
2014-09-05 18:07:16 +04:00
|
|
|
|
|
|
|
if (s->signaled_corruption &&
|
|
|
|
(!fatal || (s->incompatible_features & QCOW2_INCOMPAT_CORRUPT)))
|
|
|
|
{
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
va_start(ap, message_format);
|
|
|
|
message = g_strdup_vprintf(message_format, ap);
|
|
|
|
va_end(ap);
|
|
|
|
|
|
|
|
if (fatal) {
|
|
|
|
fprintf(stderr, "qcow2: Marking image as corrupt: %s; further "
|
|
|
|
"corruption events will be suppressed\n", message);
|
|
|
|
} else {
|
|
|
|
fprintf(stderr, "qcow2: Image is corrupt: %s; further non-fatal "
|
|
|
|
"corruption events will be suppressed\n", message);
|
|
|
|
}
|
|
|
|
|
2015-04-08 12:29:20 +03:00
|
|
|
node_name = bdrv_get_node_name(bs);
|
|
|
|
qapi_event_send_block_image_corrupted(bdrv_get_device_name(bs),
|
|
|
|
*node_name != '\0', node_name,
|
|
|
|
message, offset >= 0, offset,
|
|
|
|
size >= 0, size,
|
2018-08-15 16:37:37 +03:00
|
|
|
fatal);
|
2014-09-05 18:07:16 +04:00
|
|
|
g_free(message);
|
|
|
|
|
|
|
|
if (fatal) {
|
|
|
|
qcow2_mark_corrupt(bs);
|
|
|
|
bs->drv = NULL; /* make BDS unusable */
|
|
|
|
}
|
|
|
|
|
|
|
|
s->signaled_corruption = true;
|
|
|
|
}
|
|
|
|
|
2020-06-25 15:55:39 +03:00
|
|
|
#define QCOW_COMMON_OPTIONS \
|
|
|
|
{ \
|
|
|
|
.name = BLOCK_OPT_SIZE, \
|
|
|
|
.type = QEMU_OPT_SIZE, \
|
|
|
|
.help = "Virtual disk size" \
|
|
|
|
}, \
|
|
|
|
{ \
|
|
|
|
.name = BLOCK_OPT_COMPAT_LEVEL, \
|
|
|
|
.type = QEMU_OPT_STRING, \
|
|
|
|
.help = "Compatibility level (v2 [0.10] or v3 [1.1])" \
|
|
|
|
}, \
|
|
|
|
{ \
|
|
|
|
.name = BLOCK_OPT_BACKING_FILE, \
|
|
|
|
.type = QEMU_OPT_STRING, \
|
|
|
|
.help = "File name of a base image" \
|
|
|
|
}, \
|
|
|
|
{ \
|
|
|
|
.name = BLOCK_OPT_BACKING_FMT, \
|
|
|
|
.type = QEMU_OPT_STRING, \
|
|
|
|
.help = "Image format of the base image" \
|
|
|
|
}, \
|
|
|
|
{ \
|
|
|
|
.name = BLOCK_OPT_DATA_FILE, \
|
|
|
|
.type = QEMU_OPT_STRING, \
|
|
|
|
.help = "File name of an external data file" \
|
|
|
|
}, \
|
|
|
|
{ \
|
|
|
|
.name = BLOCK_OPT_DATA_FILE_RAW, \
|
|
|
|
.type = QEMU_OPT_BOOL, \
|
|
|
|
.help = "The external data file must stay valid " \
|
|
|
|
"as a raw image" \
|
|
|
|
}, \
|
|
|
|
{ \
|
|
|
|
.name = BLOCK_OPT_LAZY_REFCOUNTS, \
|
|
|
|
.type = QEMU_OPT_BOOL, \
|
|
|
|
.help = "Postpone refcount updates", \
|
|
|
|
.def_value_str = "off" \
|
|
|
|
}, \
|
|
|
|
{ \
|
|
|
|
.name = BLOCK_OPT_REFCOUNT_BITS, \
|
|
|
|
.type = QEMU_OPT_NUMBER, \
|
|
|
|
.help = "Width of a reference count entry in bits", \
|
|
|
|
.def_value_str = "16" \
|
|
|
|
}
|
|
|
|
|
2014-06-05 13:20:59 +04:00
|
|
|
static QemuOptsList qcow2_create_opts = {
|
|
|
|
.name = "qcow2-create-opts",
|
|
|
|
.head = QTAILQ_HEAD_INITIALIZER(qcow2_create_opts.head),
|
|
|
|
.desc = {
|
2020-06-25 15:55:40 +03:00
|
|
|
{ \
|
|
|
|
.name = BLOCK_OPT_ENCRYPT, \
|
|
|
|
.type = QEMU_OPT_BOOL, \
|
|
|
|
.help = "Encrypt the image with format 'aes'. (Deprecated " \
|
|
|
|
"in favor of " BLOCK_OPT_ENCRYPT_FORMAT "=aes)", \
|
|
|
|
}, \
|
|
|
|
{ \
|
|
|
|
.name = BLOCK_OPT_ENCRYPT_FORMAT, \
|
|
|
|
.type = QEMU_OPT_STRING, \
|
|
|
|
.help = "Encrypt the image, format choices: 'aes', 'luks'", \
|
|
|
|
}, \
|
|
|
|
BLOCK_CRYPTO_OPT_DEF_KEY_SECRET("encrypt.", \
|
|
|
|
"ID of secret providing qcow AES key or LUKS passphrase"), \
|
|
|
|
BLOCK_CRYPTO_OPT_DEF_LUKS_CIPHER_ALG("encrypt."), \
|
|
|
|
BLOCK_CRYPTO_OPT_DEF_LUKS_CIPHER_MODE("encrypt."), \
|
|
|
|
BLOCK_CRYPTO_OPT_DEF_LUKS_IVGEN_ALG("encrypt."), \
|
|
|
|
BLOCK_CRYPTO_OPT_DEF_LUKS_IVGEN_HASH_ALG("encrypt."), \
|
|
|
|
BLOCK_CRYPTO_OPT_DEF_LUKS_HASH_ALG("encrypt."), \
|
|
|
|
BLOCK_CRYPTO_OPT_DEF_LUKS_ITER_TIME("encrypt."), \
|
|
|
|
{ \
|
|
|
|
.name = BLOCK_OPT_CLUSTER_SIZE, \
|
|
|
|
.type = QEMU_OPT_SIZE, \
|
|
|
|
.help = "qcow2 cluster size", \
|
|
|
|
.def_value_str = stringify(DEFAULT_CLUSTER_SIZE) \
|
|
|
|
}, \
|
2020-07-10 19:13:13 +03:00
|
|
|
{ \
|
|
|
|
.name = BLOCK_OPT_EXTL2, \
|
|
|
|
.type = QEMU_OPT_BOOL, \
|
|
|
|
.help = "Extended L2 tables", \
|
|
|
|
.def_value_str = "off" \
|
|
|
|
}, \
|
2020-06-25 15:55:40 +03:00
|
|
|
{ \
|
|
|
|
.name = BLOCK_OPT_PREALLOC, \
|
|
|
|
.type = QEMU_OPT_STRING, \
|
|
|
|
.help = "Preallocation mode (allowed values: off, " \
|
|
|
|
"metadata, falloc, full)" \
|
|
|
|
}, \
|
|
|
|
{ \
|
|
|
|
.name = BLOCK_OPT_COMPRESSION_TYPE, \
|
|
|
|
.type = QEMU_OPT_STRING, \
|
|
|
|
.help = "Compression method used for image cluster " \
|
|
|
|
"compression", \
|
|
|
|
.def_value_str = "zlib" \
|
|
|
|
},
|
2020-06-25 15:55:39 +03:00
|
|
|
QCOW_COMMON_OPTIONS,
|
|
|
|
{ /* end of list */ }
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
static QemuOptsList qcow2_amend_opts = {
|
|
|
|
.name = "qcow2-amend-opts",
|
|
|
|
.head = QTAILQ_HEAD_INITIALIZER(qcow2_amend_opts.head),
|
|
|
|
.desc = {
|
2020-06-25 15:55:43 +03:00
|
|
|
BLOCK_CRYPTO_OPT_DEF_LUKS_STATE("encrypt."),
|
|
|
|
BLOCK_CRYPTO_OPT_DEF_LUKS_KEYSLOT("encrypt."),
|
|
|
|
BLOCK_CRYPTO_OPT_DEF_LUKS_OLD_SECRET("encrypt."),
|
|
|
|
BLOCK_CRYPTO_OPT_DEF_LUKS_NEW_SECRET("encrypt."),
|
|
|
|
BLOCK_CRYPTO_OPT_DEF_LUKS_ITER_TIME("encrypt."),
|
2020-06-25 15:55:39 +03:00
|
|
|
QCOW_COMMON_OPTIONS,
|
2014-06-05 13:20:59 +04:00
|
|
|
{ /* end of list */ }
|
|
|
|
}
|
2010-04-24 00:19:47 +04:00
|
|
|
};
|
|
|
|
|
2019-02-01 22:29:25 +03:00
|
|
|
static const char *const qcow2_strong_runtime_opts[] = {
|
|
|
|
"encrypt." BLOCK_CRYPTO_OPT_QCOW_KEY_SECRET,
|
|
|
|
|
|
|
|
NULL
|
|
|
|
};
|
|
|
|
|
2014-12-02 20:32:41 +03:00
|
|
|
BlockDriver bdrv_qcow2 = {
|
2010-12-17 18:02:39 +03:00
|
|
|
.format_name = "qcow2",
|
2015-09-07 18:12:56 +03:00
|
|
|
.instance_size = sizeof(BDRVQcow2State),
|
2010-12-17 18:02:39 +03:00
|
|
|
.bdrv_probe = qcow2_probe,
|
|
|
|
.bdrv_open = qcow2_open,
|
|
|
|
.bdrv_close = qcow2_close,
|
2012-09-20 23:13:28 +04:00
|
|
|
.bdrv_reopen_prepare = qcow2_reopen_prepare,
|
2015-04-16 14:42:27 +03:00
|
|
|
.bdrv_reopen_commit = qcow2_reopen_commit,
|
2020-02-28 15:44:47 +03:00
|
|
|
.bdrv_reopen_commit_post = qcow2_reopen_commit_post,
|
2015-04-16 14:42:27 +03:00
|
|
|
.bdrv_reopen_abort = qcow2_reopen_abort,
|
2015-11-16 17:34:59 +03:00
|
|
|
.bdrv_join_options = qcow2_join_options,
|
2020-05-13 14:05:39 +03:00
|
|
|
.bdrv_child_perm = bdrv_default_perms,
|
2018-01-18 15:43:45 +03:00
|
|
|
.bdrv_co_create_opts = qcow2_co_create_opts,
|
2018-01-09 18:50:57 +03:00
|
|
|
.bdrv_co_create = qcow2_co_create,
|
2019-07-24 20:12:34 +03:00
|
|
|
.bdrv_has_zero_init = qcow2_has_zero_init,
|
2018-02-13 23:26:52 +03:00
|
|
|
.bdrv_co_block_status = qcow2_co_block_status,
|
2010-12-17 18:02:39 +03:00
|
|
|
|
2019-06-04 19:15:13 +03:00
|
|
|
.bdrv_co_preadv_part = qcow2_co_preadv_part,
|
2019-06-04 19:15:14 +03:00
|
|
|
.bdrv_co_pwritev_part = qcow2_co_pwritev_part,
|
2011-11-10 21:10:11 +04:00
|
|
|
.bdrv_co_flush_to_os = qcow2_co_flush_to_os,
|
2010-04-28 14:36:11 +04:00
|
|
|
|
2016-06-02 00:10:06 +03:00
|
|
|
.bdrv_co_pwrite_zeroes = qcow2_co_pwrite_zeroes,
|
2016-07-16 02:23:03 +03:00
|
|
|
.bdrv_co_pdiscard = qcow2_co_pdiscard,
|
2018-06-01 12:26:42 +03:00
|
|
|
.bdrv_co_copy_range_from = qcow2_co_copy_range_from,
|
|
|
|
.bdrv_co_copy_range_to = qcow2_co_copy_range_to,
|
block: Convert .bdrv_truncate callback to coroutine_fn
bdrv_truncate() is an operation that can block (even for a quite long
time, depending on the PreallocMode) in I/O paths that shouldn't block.
Convert it to a coroutine_fn so that we have the infrastructure for
drivers to make their .bdrv_co_truncate implementation asynchronous.
This change could potentially introduce new race conditions because
bdrv_truncate() isn't necessarily executed atomically any more. Whether
this is a problem needs to be evaluated for each block driver that
supports truncate:
* file-posix/win32, gluster, iscsi, nfs, rbd, ssh, sheepdog: The
protocol drivers are trivially safe because they don't actually yield
yet, so there is no change in behaviour.
* copy-on-read, crypto, raw-format: Essentially just filter drivers that
pass the request to a child node, no problem.
* qcow2: The implementation modifies metadata, so it needs to hold
s->lock to be safe with concurrent I/O requests. In order to avoid
double locking, this requires pulling the locking out into
preallocate_co() and using qcow2_write_caches() instead of
bdrv_flush().
* qed: Does a single header update, this is fine without locking.
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2018-06-21 18:54:35 +03:00
|
|
|
.bdrv_co_truncate = qcow2_co_truncate,
|
2019-06-04 19:15:14 +03:00
|
|
|
.bdrv_co_pwritev_compressed_part = qcow2_co_pwritev_compressed_part,
|
2014-10-24 17:57:31 +04:00
|
|
|
.bdrv_make_empty = qcow2_make_empty,
|
2010-04-24 00:19:47 +04:00
|
|
|
|
|
|
|
.bdrv_snapshot_create = qcow2_snapshot_create,
|
|
|
|
.bdrv_snapshot_goto = qcow2_snapshot_goto,
|
|
|
|
.bdrv_snapshot_delete = qcow2_snapshot_delete,
|
|
|
|
.bdrv_snapshot_list = qcow2_snapshot_list,
|
2014-06-05 13:20:59 +04:00
|
|
|
.bdrv_snapshot_load_tmp = qcow2_snapshot_load_tmp,
|
2017-07-05 15:57:35 +03:00
|
|
|
.bdrv_measure = qcow2_measure,
|
2014-06-05 13:20:59 +04:00
|
|
|
.bdrv_get_info = qcow2_get_info,
|
2013-10-09 12:46:18 +04:00
|
|
|
.bdrv_get_specific_info = qcow2_get_specific_info,
|
2010-04-24 00:19:47 +04:00
|
|
|
|
2010-12-17 18:02:39 +03:00
|
|
|
.bdrv_save_vmstate = qcow2_save_vmstate,
|
|
|
|
.bdrv_load_vmstate = qcow2_load_vmstate,
|
2010-04-24 00:19:47 +04:00
|
|
|
|
2020-05-13 14:05:12 +03:00
|
|
|
.is_format = true,
|
2014-06-04 17:09:35 +04:00
|
|
|
.supports_backing = true,
|
2010-04-24 00:19:47 +04:00
|
|
|
.bdrv_change_backing_file = qcow2_change_backing_file,
|
|
|
|
|
2013-12-11 22:26:16 +04:00
|
|
|
.bdrv_refresh_limits = qcow2_refresh_limits,
|
2018-03-01 19:36:18 +03:00
|
|
|
.bdrv_co_invalidate_cache = qcow2_co_invalidate_cache,
|
2015-12-22 18:04:57 +03:00
|
|
|
.bdrv_inactivate = qcow2_inactivate,
|
2011-11-15 01:09:46 +04:00
|
|
|
|
2014-06-05 13:20:59 +04:00
|
|
|
.create_opts = &qcow2_create_opts,
|
2020-06-25 15:55:39 +03:00
|
|
|
.amend_opts = &qcow2_amend_opts,
|
2019-02-01 22:29:25 +03:00
|
|
|
.strong_runtime_opts = qcow2_strong_runtime_opts,
|
2019-03-12 19:48:48 +03:00
|
|
|
.mutable_opts = mutable_opts,
|
2018-03-01 19:36:19 +03:00
|
|
|
.bdrv_co_check = qcow2_co_check,
|
2014-06-05 13:21:11 +04:00
|
|
|
.bdrv_amend_options = qcow2_amend_options,
|
2020-06-25 15:55:47 +03:00
|
|
|
.bdrv_co_amend = qcow2_co_amend,
|
2015-08-04 15:14:40 +03:00
|
|
|
|
|
|
|
.bdrv_detach_aio_context = qcow2_detach_aio_context,
|
|
|
|
.bdrv_attach_aio_context = qcow2_attach_aio_context,
|
2017-06-28 15:05:14 +03:00
|
|
|
|
block: Make it easier to learn which BDS support bitmaps
Upcoming patches will enhance bitmap support in qemu-img, but in doing
so, it turns out to be nice to suppress output when persistent bitmaps
make no sense (such as on a qcow2 v2 image). Add a hook to make this
easier to query.
This patch adds a new callback .bdrv_supports_persistent_dirty_bitmap,
rather than trying to shoehorn the answer in via existing callbacks.
In particular, while it might have been possible to overload
.bdrv_co_can_store_new_dirty_bitmap to special-case a NULL input to
answer whether any persistent bitmaps are supported, that is at odds
with whether a particular bitmap can be stored (for example, even on
an image that supports persistent bitmaps but has currently filled up
the maximum number of bitmaps, attempts to store another one should
fail); and the new functionality doesn't require coroutine safety.
Similarly, we could have added one more piece of information to
.bdrv_get_info, but then again, most callers to that function tend to
already discard extraneous information, and making it a catch-all
rather than a series of dedicated scalar queries hasn't really
simplified life.
In the future, when we improve the ability to look up bitmaps through
a filter, we will probably also want to teach the block layer to
automatically let filters pass this request on through.
Signed-off-by: Eric Blake <eblake@redhat.com>
Message-Id: <20200513011648.166876-4-eblake@redhat.com>
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
2020-05-13 04:16:42 +03:00
|
|
|
.bdrv_supports_persistent_dirty_bitmap =
|
|
|
|
qcow2_supports_persistent_dirty_bitmap,
|
2019-09-20 11:25:43 +03:00
|
|
|
.bdrv_co_can_store_new_dirty_bitmap = qcow2_co_can_store_new_dirty_bitmap,
|
|
|
|
.bdrv_co_remove_persistent_dirty_bitmap =
|
|
|
|
qcow2_co_remove_persistent_dirty_bitmap,
|
2010-04-24 00:19:47 +04:00
|
|
|
};
|
|
|
|
|
2009-05-10 02:03:42 +04:00
|
|
|
static void bdrv_qcow2_init(void)
|
|
|
|
{
|
|
|
|
bdrv_register(&bdrv_qcow2);
|
|
|
|
}
|
|
|
|
|
|
|
|
block_init(bdrv_qcow2_init);
|