qcow2: Optimize bdrv_make_empty()
bdrv_make_empty() is currently only called if the current image represents an external snapshot that has been committed to its base image; it is therefore unlikely to have internal snapshots. In this case, bdrv_make_empty() can be greatly sped up by emptying the L1 and refcount table (while having the dirty flag set, which only works for compat=1.1) and creating a trivial refcount structure. If there are snapshots or for compat=0.10, fall back to the simple implementation (discard all clusters). [Applied s/clusters/cluster/ typo fix suggested by Eric Blake --Stefan] Signed-off-by: Max Reitz <mreitz@redhat.com> Reviewed-by: Kevin Wolf <kwolf@redhat.com> Reviewed-by: Eric Blake <eblake@redhat.com> Message-id: 1414159063-25977-4-git-send-email-mreitz@redhat.com Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
This commit is contained in:
parent
491d27e2af
commit
94054183da
@ -195,6 +195,8 @@ static const char *event_names[BLKDBG_EVENT_MAX] = {
|
|||||||
[BLKDBG_PWRITEV] = "pwritev",
|
[BLKDBG_PWRITEV] = "pwritev",
|
||||||
[BLKDBG_PWRITEV_ZERO] = "pwritev_zero",
|
[BLKDBG_PWRITEV_ZERO] = "pwritev_zero",
|
||||||
[BLKDBG_PWRITEV_DONE] = "pwritev_done",
|
[BLKDBG_PWRITEV_DONE] = "pwritev_done",
|
||||||
|
|
||||||
|
[BLKDBG_EMPTY_IMAGE_PREPARE] = "empty_image_prepare",
|
||||||
};
|
};
|
||||||
|
|
||||||
static int get_event_by_name(const char *name, BlkDebugEvent *event)
|
static int get_event_by_name(const char *name, BlkDebugEvent *event)
|
||||||
|
165
block/qcow2.c
165
block/qcow2.c
@ -2230,12 +2230,175 @@ fail:
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int make_completely_empty(BlockDriverState *bs)
|
||||||
|
{
|
||||||
|
BDRVQcowState *s = bs->opaque;
|
||||||
|
int ret, l1_clusters;
|
||||||
|
int64_t offset;
|
||||||
|
uint64_t *new_reftable = NULL;
|
||||||
|
uint64_t rt_entry, l1_size2;
|
||||||
|
struct {
|
||||||
|
uint64_t l1_offset;
|
||||||
|
uint64_t reftable_offset;
|
||||||
|
uint32_t reftable_clusters;
|
||||||
|
} QEMU_PACKED l1_ofs_rt_ofs_cls;
|
||||||
|
|
||||||
|
ret = qcow2_cache_empty(bs, s->l2_table_cache);
|
||||||
|
if (ret < 0) {
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = qcow2_cache_empty(bs, s->refcount_block_cache);
|
||||||
|
if (ret < 0) {
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Refcounts will be broken utterly */
|
||||||
|
ret = qcow2_mark_dirty(bs);
|
||||||
|
if (ret < 0) {
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
|
||||||
|
BLKDBG_EVENT(bs->file, BLKDBG_L1_UPDATE);
|
||||||
|
|
||||||
|
l1_clusters = DIV_ROUND_UP(s->l1_size, s->cluster_size / sizeof(uint64_t));
|
||||||
|
l1_size2 = (uint64_t)s->l1_size * sizeof(uint64_t);
|
||||||
|
|
||||||
|
/* After this call, neither the in-memory nor the on-disk refcount
|
||||||
|
* information accurately describe the actual references */
|
||||||
|
|
||||||
|
ret = bdrv_write_zeroes(bs->file, s->l1_table_offset / BDRV_SECTOR_SIZE,
|
||||||
|
l1_clusters * s->cluster_sectors, 0);
|
||||||
|
if (ret < 0) {
|
||||||
|
goto fail_broken_refcounts;
|
||||||
|
}
|
||||||
|
memset(s->l1_table, 0, l1_size2);
|
||||||
|
|
||||||
|
BLKDBG_EVENT(bs->file, BLKDBG_EMPTY_IMAGE_PREPARE);
|
||||||
|
|
||||||
|
/* Overwrite enough clusters at the beginning of the sectors to place
|
||||||
|
* the refcount table, a refcount block and the L1 table in; this may
|
||||||
|
* overwrite parts of the existing refcount and L1 table, which is not
|
||||||
|
* an issue because the dirty flag is set, complete data loss is in fact
|
||||||
|
* desired and partial data loss is consequently fine as well */
|
||||||
|
ret = bdrv_write_zeroes(bs->file, s->cluster_size / BDRV_SECTOR_SIZE,
|
||||||
|
(2 + l1_clusters) * s->cluster_size /
|
||||||
|
BDRV_SECTOR_SIZE, 0);
|
||||||
|
/* This call (even if it failed overall) may have overwritten on-disk
|
||||||
|
* refcount structures; in that case, the in-memory refcount information
|
||||||
|
* will probably differ from the on-disk information which makes the BDS
|
||||||
|
* unusable */
|
||||||
|
if (ret < 0) {
|
||||||
|
goto fail_broken_refcounts;
|
||||||
|
}
|
||||||
|
|
||||||
|
BLKDBG_EVENT(bs->file, BLKDBG_L1_UPDATE);
|
||||||
|
BLKDBG_EVENT(bs->file, BLKDBG_REFTABLE_UPDATE);
|
||||||
|
|
||||||
|
/* "Create" an empty reftable (one cluster) directly after the image
|
||||||
|
* header and an empty L1 table three clusters after the image header;
|
||||||
|
* the cluster between those two will be used as the first refblock */
|
||||||
|
cpu_to_be64w(&l1_ofs_rt_ofs_cls.l1_offset, 3 * s->cluster_size);
|
||||||
|
cpu_to_be64w(&l1_ofs_rt_ofs_cls.reftable_offset, s->cluster_size);
|
||||||
|
cpu_to_be32w(&l1_ofs_rt_ofs_cls.reftable_clusters, 1);
|
||||||
|
ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, l1_table_offset),
|
||||||
|
&l1_ofs_rt_ofs_cls, sizeof(l1_ofs_rt_ofs_cls));
|
||||||
|
if (ret < 0) {
|
||||||
|
goto fail_broken_refcounts;
|
||||||
|
}
|
||||||
|
|
||||||
|
s->l1_table_offset = 3 * s->cluster_size;
|
||||||
|
|
||||||
|
new_reftable = g_try_new0(uint64_t, s->cluster_size / sizeof(uint64_t));
|
||||||
|
if (!new_reftable) {
|
||||||
|
ret = -ENOMEM;
|
||||||
|
goto fail_broken_refcounts;
|
||||||
|
}
|
||||||
|
|
||||||
|
s->refcount_table_offset = s->cluster_size;
|
||||||
|
s->refcount_table_size = s->cluster_size / sizeof(uint64_t);
|
||||||
|
|
||||||
|
g_free(s->refcount_table);
|
||||||
|
s->refcount_table = new_reftable;
|
||||||
|
new_reftable = NULL;
|
||||||
|
|
||||||
|
/* Now the in-memory refcount information again corresponds to the on-disk
|
||||||
|
* information (reftable is empty and no refblocks (the refblock cache is
|
||||||
|
* empty)); however, this means some clusters (e.g. the image header) are
|
||||||
|
* referenced, but not refcounted, but the normal qcow2 code assumes that
|
||||||
|
* the in-memory information is always correct */
|
||||||
|
|
||||||
|
BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC);
|
||||||
|
|
||||||
|
/* Enter the first refblock into the reftable */
|
||||||
|
rt_entry = cpu_to_be64(2 * s->cluster_size);
|
||||||
|
ret = bdrv_pwrite_sync(bs->file, s->cluster_size,
|
||||||
|
&rt_entry, sizeof(rt_entry));
|
||||||
|
if (ret < 0) {
|
||||||
|
goto fail_broken_refcounts;
|
||||||
|
}
|
||||||
|
s->refcount_table[0] = 2 * s->cluster_size;
|
||||||
|
|
||||||
|
s->free_cluster_index = 0;
|
||||||
|
assert(3 + l1_clusters <= s->refcount_block_size);
|
||||||
|
offset = qcow2_alloc_clusters(bs, 3 * s->cluster_size + l1_size2);
|
||||||
|
if (offset < 0) {
|
||||||
|
ret = offset;
|
||||||
|
goto fail_broken_refcounts;
|
||||||
|
} else if (offset > 0) {
|
||||||
|
error_report("First cluster in emptied image is in use");
|
||||||
|
abort();
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Now finally the in-memory information corresponds to the on-disk
|
||||||
|
* structures and is correct */
|
||||||
|
ret = qcow2_mark_clean(bs);
|
||||||
|
if (ret < 0) {
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = bdrv_truncate(bs->file, (3 + l1_clusters) * s->cluster_size);
|
||||||
|
if (ret < 0) {
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
fail_broken_refcounts:
|
||||||
|
/* The BDS is unusable at this point. If we wanted to make it usable, we
|
||||||
|
* would have to call qcow2_refcount_close(), qcow2_refcount_init(),
|
||||||
|
* qcow2_check_refcounts(), qcow2_refcount_close() and qcow2_refcount_init()
|
||||||
|
* again. However, because the functions which could have caused this error
|
||||||
|
* path to be taken are used by those functions as well, it's very likely
|
||||||
|
* that that sequence will fail as well. Therefore, just eject the BDS. */
|
||||||
|
bs->drv = NULL;
|
||||||
|
|
||||||
|
fail:
|
||||||
|
g_free(new_reftable);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
static int qcow2_make_empty(BlockDriverState *bs)
|
static int qcow2_make_empty(BlockDriverState *bs)
|
||||||
{
|
{
|
||||||
int ret = 0;
|
BDRVQcowState *s = bs->opaque;
|
||||||
uint64_t start_sector;
|
uint64_t start_sector;
|
||||||
int sector_step = INT_MAX / BDRV_SECTOR_SIZE;
|
int sector_step = INT_MAX / BDRV_SECTOR_SIZE;
|
||||||
|
int l1_clusters, ret = 0;
|
||||||
|
|
||||||
|
l1_clusters = DIV_ROUND_UP(s->l1_size, s->cluster_size / sizeof(uint64_t));
|
||||||
|
|
||||||
|
if (s->qcow_version >= 3 && !s->snapshots &&
|
||||||
|
3 + l1_clusters <= s->refcount_block_size) {
|
||||||
|
/* The following function only works for qcow2 v3 images (it requires
|
||||||
|
* the dirty flag) and only as long as there are no snapshots (because
|
||||||
|
* it completely empties the image). Furthermore, the L1 table and three
|
||||||
|
* additional clusters (image header, refcount table, one refcount
|
||||||
|
* block) have to fit inside one refcount block. */
|
||||||
|
return make_completely_empty(bs);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* This fallback code simply discards every active cluster; this is slow,
|
||||||
|
* but works in all cases */
|
||||||
for (start_sector = 0; start_sector < bs->total_sectors;
|
for (start_sector = 0; start_sector < bs->total_sectors;
|
||||||
start_sector += sector_step)
|
start_sector += sector_step)
|
||||||
{
|
{
|
||||||
|
@ -498,6 +498,8 @@ typedef enum {
|
|||||||
BLKDBG_PWRITEV_ZERO,
|
BLKDBG_PWRITEV_ZERO,
|
||||||
BLKDBG_PWRITEV_DONE,
|
BLKDBG_PWRITEV_DONE,
|
||||||
|
|
||||||
|
BLKDBG_EMPTY_IMAGE_PREPARE,
|
||||||
|
|
||||||
BLKDBG_EVENT_MAX,
|
BLKDBG_EVENT_MAX,
|
||||||
} BlkDebugEvent;
|
} BlkDebugEvent;
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user