qcow2: add shrink image support
This patch add shrinking of the image file for qcow2. As a result, this allows us to reduce the virtual image size and free up space on the disk without copying the image. Image can be fragmented and shrink is done by punching holes in the image file. Signed-off-by: Pavel Butsykin <pbutsykin@virtuozzo.com> Reviewed-by: Max Reitz <mreitz@redhat.com> Reviewed-by: John Snow <jsnow@redhat.com> Message-id: 20170918124230.8152-4-pbutsykin@virtuozzo.com Signed-off-by: Max Reitz <mreitz@redhat.com>
This commit is contained in:
parent
f71c08ea8e
commit
46b732cdf3
@ -32,6 +32,56 @@
|
|||||||
#include "qemu/bswap.h"
|
#include "qemu/bswap.h"
|
||||||
#include "trace.h"
|
#include "trace.h"
|
||||||
|
|
||||||
|
int qcow2_shrink_l1_table(BlockDriverState *bs, uint64_t exact_size)
|
||||||
|
{
|
||||||
|
BDRVQcow2State *s = bs->opaque;
|
||||||
|
int new_l1_size, i, ret;
|
||||||
|
|
||||||
|
if (exact_size >= s->l1_size) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
new_l1_size = exact_size;
|
||||||
|
|
||||||
|
#ifdef DEBUG_ALLOC2
|
||||||
|
fprintf(stderr, "shrink l1_table from %d to %d\n", s->l1_size, new_l1_size);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
BLKDBG_EVENT(bs->file, BLKDBG_L1_SHRINK_WRITE_TABLE);
|
||||||
|
ret = bdrv_pwrite_zeroes(bs->file, s->l1_table_offset +
|
||||||
|
new_l1_size * sizeof(uint64_t),
|
||||||
|
(s->l1_size - new_l1_size) * sizeof(uint64_t), 0);
|
||||||
|
if (ret < 0) {
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = bdrv_flush(bs->file->bs);
|
||||||
|
if (ret < 0) {
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
|
||||||
|
BLKDBG_EVENT(bs->file, BLKDBG_L1_SHRINK_FREE_L2_CLUSTERS);
|
||||||
|
for (i = s->l1_size - 1; i > new_l1_size - 1; i--) {
|
||||||
|
if ((s->l1_table[i] & L1E_OFFSET_MASK) == 0) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
qcow2_free_clusters(bs, s->l1_table[i] & L1E_OFFSET_MASK,
|
||||||
|
s->cluster_size, QCOW2_DISCARD_ALWAYS);
|
||||||
|
s->l1_table[i] = 0;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
fail:
|
||||||
|
/*
|
||||||
|
* If the write in the l1_table failed the image may contain a partially
|
||||||
|
* overwritten l1_table. In this case it would be better to clear the
|
||||||
|
* l1_table in memory to avoid possible image corruption.
|
||||||
|
*/
|
||||||
|
memset(s->l1_table + new_l1_size, 0,
|
||||||
|
(s->l1_size - new_l1_size) * sizeof(uint64_t));
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
|
int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
|
||||||
bool exact_size)
|
bool exact_size)
|
||||||
{
|
{
|
||||||
|
@ -29,6 +29,7 @@
|
|||||||
#include "block/qcow2.h"
|
#include "block/qcow2.h"
|
||||||
#include "qemu/range.h"
|
#include "qemu/range.h"
|
||||||
#include "qemu/bswap.h"
|
#include "qemu/bswap.h"
|
||||||
|
#include "qemu/cutils.h"
|
||||||
|
|
||||||
static int64_t alloc_clusters_noref(BlockDriverState *bs, uint64_t size);
|
static int64_t alloc_clusters_noref(BlockDriverState *bs, uint64_t size);
|
||||||
static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs,
|
static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs,
|
||||||
@ -3061,3 +3062,122 @@ done:
|
|||||||
qemu_vfree(new_refblock);
|
qemu_vfree(new_refblock);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int qcow2_discard_refcount_block(BlockDriverState *bs,
|
||||||
|
uint64_t discard_block_offs)
|
||||||
|
{
|
||||||
|
BDRVQcow2State *s = bs->opaque;
|
||||||
|
uint64_t refblock_offs = get_refblock_offset(s, discard_block_offs);
|
||||||
|
uint64_t cluster_index = discard_block_offs >> s->cluster_bits;
|
||||||
|
uint32_t block_index = cluster_index & (s->refcount_block_size - 1);
|
||||||
|
void *refblock;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
assert(discard_block_offs != 0);
|
||||||
|
|
||||||
|
ret = qcow2_cache_get(bs, s->refcount_block_cache, refblock_offs,
|
||||||
|
&refblock);
|
||||||
|
if (ret < 0) {
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (s->get_refcount(refblock, block_index) != 1) {
|
||||||
|
qcow2_signal_corruption(bs, true, -1, -1, "Invalid refcount:"
|
||||||
|
" refblock offset %#" PRIx64
|
||||||
|
", reftable index %u"
|
||||||
|
", block offset %#" PRIx64
|
||||||
|
", refcount %#" PRIx64,
|
||||||
|
refblock_offs,
|
||||||
|
offset_to_reftable_index(s, discard_block_offs),
|
||||||
|
discard_block_offs,
|
||||||
|
s->get_refcount(refblock, block_index));
|
||||||
|
qcow2_cache_put(bs, s->refcount_block_cache, &refblock);
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
s->set_refcount(refblock, block_index, 0);
|
||||||
|
|
||||||
|
qcow2_cache_entry_mark_dirty(bs, s->refcount_block_cache, refblock);
|
||||||
|
|
||||||
|
qcow2_cache_put(bs, s->refcount_block_cache, &refblock);
|
||||||
|
|
||||||
|
if (cluster_index < s->free_cluster_index) {
|
||||||
|
s->free_cluster_index = cluster_index;
|
||||||
|
}
|
||||||
|
|
||||||
|
refblock = qcow2_cache_is_table_offset(bs, s->refcount_block_cache,
|
||||||
|
discard_block_offs);
|
||||||
|
if (refblock) {
|
||||||
|
/* discard refblock from the cache if refblock is cached */
|
||||||
|
qcow2_cache_discard(bs, s->refcount_block_cache, refblock);
|
||||||
|
}
|
||||||
|
update_refcount_discard(bs, discard_block_offs, s->cluster_size);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int qcow2_shrink_reftable(BlockDriverState *bs)
|
||||||
|
{
|
||||||
|
BDRVQcow2State *s = bs->opaque;
|
||||||
|
uint64_t *reftable_tmp =
|
||||||
|
g_malloc(s->refcount_table_size * sizeof(uint64_t));
|
||||||
|
int i, ret;
|
||||||
|
|
||||||
|
for (i = 0; i < s->refcount_table_size; i++) {
|
||||||
|
int64_t refblock_offs = s->refcount_table[i] & REFT_OFFSET_MASK;
|
||||||
|
void *refblock;
|
||||||
|
bool unused_block;
|
||||||
|
|
||||||
|
if (refblock_offs == 0) {
|
||||||
|
reftable_tmp[i] = 0;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
ret = qcow2_cache_get(bs, s->refcount_block_cache, refblock_offs,
|
||||||
|
&refblock);
|
||||||
|
if (ret < 0) {
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* the refblock has own reference */
|
||||||
|
if (i == offset_to_reftable_index(s, refblock_offs)) {
|
||||||
|
uint64_t block_index = (refblock_offs >> s->cluster_bits) &
|
||||||
|
(s->refcount_block_size - 1);
|
||||||
|
uint64_t refcount = s->get_refcount(refblock, block_index);
|
||||||
|
|
||||||
|
s->set_refcount(refblock, block_index, 0);
|
||||||
|
|
||||||
|
unused_block = buffer_is_zero(refblock, s->cluster_size);
|
||||||
|
|
||||||
|
s->set_refcount(refblock, block_index, refcount);
|
||||||
|
} else {
|
||||||
|
unused_block = buffer_is_zero(refblock, s->cluster_size);
|
||||||
|
}
|
||||||
|
qcow2_cache_put(bs, s->refcount_block_cache, &refblock);
|
||||||
|
|
||||||
|
reftable_tmp[i] = unused_block ? 0 : cpu_to_be64(s->refcount_table[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = bdrv_pwrite_sync(bs->file, s->refcount_table_offset, reftable_tmp,
|
||||||
|
s->refcount_table_size * sizeof(uint64_t));
|
||||||
|
/*
|
||||||
|
* If the write in the reftable failed the image may contain a partially
|
||||||
|
* overwritten reftable. In this case it would be better to clear the
|
||||||
|
* reftable in memory to avoid possible image corruption.
|
||||||
|
*/
|
||||||
|
for (i = 0; i < s->refcount_table_size; i++) {
|
||||||
|
if (s->refcount_table[i] && !reftable_tmp[i]) {
|
||||||
|
if (ret == 0) {
|
||||||
|
ret = qcow2_discard_refcount_block(bs, s->refcount_table[i] &
|
||||||
|
REFT_OFFSET_MASK);
|
||||||
|
}
|
||||||
|
s->refcount_table[i] = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!s->cache_discards) {
|
||||||
|
qcow2_process_discards(bs, ret);
|
||||||
|
}
|
||||||
|
|
||||||
|
out:
|
||||||
|
g_free(reftable_tmp);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
@ -3104,18 +3104,43 @@ static int qcow2_truncate(BlockDriverState *bs, int64_t offset,
|
|||||||
}
|
}
|
||||||
|
|
||||||
old_length = bs->total_sectors * 512;
|
old_length = bs->total_sectors * 512;
|
||||||
|
|
||||||
/* shrinking is currently not supported */
|
|
||||||
if (offset < old_length) {
|
|
||||||
error_setg(errp, "qcow2 doesn't support shrinking images yet");
|
|
||||||
return -ENOTSUP;
|
|
||||||
}
|
|
||||||
|
|
||||||
new_l1_size = size_to_l1(s, offset);
|
new_l1_size = size_to_l1(s, offset);
|
||||||
ret = qcow2_grow_l1_table(bs, new_l1_size, true);
|
|
||||||
if (ret < 0) {
|
if (offset < old_length) {
|
||||||
error_setg_errno(errp, -ret, "Failed to grow the L1 table");
|
if (prealloc != PREALLOC_MODE_OFF) {
|
||||||
return ret;
|
error_setg(errp,
|
||||||
|
"Preallocation can't be used for shrinking an image");
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = qcow2_cluster_discard(bs, ROUND_UP(offset, s->cluster_size),
|
||||||
|
old_length - ROUND_UP(offset,
|
||||||
|
s->cluster_size),
|
||||||
|
QCOW2_DISCARD_ALWAYS, true);
|
||||||
|
if (ret < 0) {
|
||||||
|
error_setg_errno(errp, -ret, "Failed to discard cropped clusters");
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = qcow2_shrink_l1_table(bs, new_l1_size);
|
||||||
|
if (ret < 0) {
|
||||||
|
error_setg_errno(errp, -ret,
|
||||||
|
"Failed to reduce the number of L2 tables");
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = qcow2_shrink_reftable(bs);
|
||||||
|
if (ret < 0) {
|
||||||
|
error_setg_errno(errp, -ret,
|
||||||
|
"Failed to discard unused refblocks");
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
ret = qcow2_grow_l1_table(bs, new_l1_size, true);
|
||||||
|
if (ret < 0) {
|
||||||
|
error_setg_errno(errp, -ret, "Failed to grow the L1 table");
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
switch (prealloc) {
|
switch (prealloc) {
|
||||||
|
@ -521,6 +521,18 @@ static inline uint64_t refcount_diff(uint64_t r1, uint64_t r2)
|
|||||||
return r1 > r2 ? r1 - r2 : r2 - r1;
|
return r1 > r2 ? r1 - r2 : r2 - r1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline
|
||||||
|
uint32_t offset_to_reftable_index(BDRVQcow2State *s, uint64_t offset)
|
||||||
|
{
|
||||||
|
return offset >> (s->refcount_block_bits + s->cluster_bits);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline uint64_t get_refblock_offset(BDRVQcow2State *s, uint64_t offset)
|
||||||
|
{
|
||||||
|
uint32_t index = offset_to_reftable_index(s, offset);
|
||||||
|
return s->refcount_table[index] & REFT_OFFSET_MASK;
|
||||||
|
}
|
||||||
|
|
||||||
/* qcow2.c functions */
|
/* qcow2.c functions */
|
||||||
int qcow2_backing_read1(BlockDriverState *bs, QEMUIOVector *qiov,
|
int qcow2_backing_read1(BlockDriverState *bs, QEMUIOVector *qiov,
|
||||||
int64_t sector_num, int nb_sectors);
|
int64_t sector_num, int nb_sectors);
|
||||||
@ -584,10 +596,12 @@ int qcow2_inc_refcounts_imrt(BlockDriverState *bs, BdrvCheckResult *res,
|
|||||||
int qcow2_change_refcount_order(BlockDriverState *bs, int refcount_order,
|
int qcow2_change_refcount_order(BlockDriverState *bs, int refcount_order,
|
||||||
BlockDriverAmendStatusCB *status_cb,
|
BlockDriverAmendStatusCB *status_cb,
|
||||||
void *cb_opaque, Error **errp);
|
void *cb_opaque, Error **errp);
|
||||||
|
int qcow2_shrink_reftable(BlockDriverState *bs);
|
||||||
|
|
||||||
/* qcow2-cluster.c functions */
|
/* qcow2-cluster.c functions */
|
||||||
int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
|
int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
|
||||||
bool exact_size);
|
bool exact_size);
|
||||||
|
int qcow2_shrink_l1_table(BlockDriverState *bs, uint64_t max_size);
|
||||||
int qcow2_write_l1_entry(BlockDriverState *bs, int l1_index);
|
int qcow2_write_l1_entry(BlockDriverState *bs, int l1_index);
|
||||||
int qcow2_decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset);
|
int qcow2_decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset);
|
||||||
int qcow2_encrypt_sectors(BDRVQcow2State *s, int64_t sector_num,
|
int qcow2_encrypt_sectors(BDRVQcow2State *s, int64_t sector_num,
|
||||||
|
@ -2533,6 +2533,11 @@
|
|||||||
#
|
#
|
||||||
# Trigger events supported by blkdebug.
|
# Trigger events supported by blkdebug.
|
||||||
#
|
#
|
||||||
|
# @l1_shrink_write_table: write zeros to the l1 table to shrink image.
|
||||||
|
# (since 2.11)
|
||||||
|
#
|
||||||
|
# @l1_shrink_free_l2_clusters: discard the l2 tables. (since 2.11)
|
||||||
|
#
|
||||||
# Since: 2.9
|
# Since: 2.9
|
||||||
##
|
##
|
||||||
{ 'enum': 'BlkdebugEvent', 'prefix': 'BLKDBG',
|
{ 'enum': 'BlkdebugEvent', 'prefix': 'BLKDBG',
|
||||||
@ -2549,7 +2554,8 @@
|
|||||||
'cluster_alloc_bytes', 'cluster_free', 'flush_to_os',
|
'cluster_alloc_bytes', 'cluster_free', 'flush_to_os',
|
||||||
'flush_to_disk', 'pwritev_rmw_head', 'pwritev_rmw_after_head',
|
'flush_to_disk', 'pwritev_rmw_head', 'pwritev_rmw_after_head',
|
||||||
'pwritev_rmw_tail', 'pwritev_rmw_after_tail', 'pwritev',
|
'pwritev_rmw_tail', 'pwritev_rmw_after_tail', 'pwritev',
|
||||||
'pwritev_zero', 'pwritev_done', 'empty_image_prepare' ] }
|
'pwritev_zero', 'pwritev_done', 'empty_image_prepare',
|
||||||
|
'l1_shrink_write_table', 'l1_shrink_free_l2_clusters' ] }
|
||||||
|
|
||||||
##
|
##
|
||||||
# @BlkdebugInjectErrorOptions:
|
# @BlkdebugInjectErrorOptions:
|
||||||
|
Loading…
Reference in New Issue
Block a user