mirror: allow customizing the granularity
The desired granularity may be very different depending on the kind of operation (e.g. continuous replication vs. collapse-to-raw) and whether the VM is expected to perform lots of I/O while mirroring is in progress. Allow the user to customize it, while providing a sane default so that in general there will be no extra allocated space in the target compared to the source. Reviewed-by: Eric Blake <eblake@redhat.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> Signed-off-by: Kevin Wolf <kwolf@redhat.com>
This commit is contained in:
parent
50717e941b
commit
eee13dfe30
@ -17,9 +17,6 @@
|
|||||||
#include "qemu/ratelimit.h"
|
#include "qemu/ratelimit.h"
|
||||||
#include "qemu/bitmap.h"
|
#include "qemu/bitmap.h"
|
||||||
|
|
||||||
#define BLOCK_SIZE (1 << 20)
|
|
||||||
#define BDRV_SECTORS_PER_DIRTY_CHUNK (BLOCK_SIZE >> BDRV_SECTOR_BITS)
|
|
||||||
|
|
||||||
#define SLICE_TIME 100000000ULL /* ns */
|
#define SLICE_TIME 100000000ULL /* ns */
|
||||||
|
|
||||||
typedef struct MirrorBlockJob {
|
typedef struct MirrorBlockJob {
|
||||||
@ -31,6 +28,7 @@ typedef struct MirrorBlockJob {
|
|||||||
bool synced;
|
bool synced;
|
||||||
bool should_complete;
|
bool should_complete;
|
||||||
int64_t sector_num;
|
int64_t sector_num;
|
||||||
|
int64_t granularity;
|
||||||
size_t buf_size;
|
size_t buf_size;
|
||||||
unsigned long *cow_bitmap;
|
unsigned long *cow_bitmap;
|
||||||
HBitmapIter hbi;
|
HBitmapIter hbi;
|
||||||
@ -56,7 +54,7 @@ static int coroutine_fn mirror_iteration(MirrorBlockJob *s,
|
|||||||
BlockDriverState *source = s->common.bs;
|
BlockDriverState *source = s->common.bs;
|
||||||
BlockDriverState *target = s->target;
|
BlockDriverState *target = s->target;
|
||||||
QEMUIOVector qiov;
|
QEMUIOVector qiov;
|
||||||
int ret, nb_sectors;
|
int ret, nb_sectors, sectors_per_chunk;
|
||||||
int64_t end, sector_num, chunk_num;
|
int64_t end, sector_num, chunk_num;
|
||||||
struct iovec iov;
|
struct iovec iov;
|
||||||
|
|
||||||
@ -72,16 +70,16 @@ static int coroutine_fn mirror_iteration(MirrorBlockJob *s,
|
|||||||
* is very large, we need to do COW ourselves. The first time a cluster is
|
* is very large, we need to do COW ourselves. The first time a cluster is
|
||||||
* copied, copy it entirely.
|
* copied, copy it entirely.
|
||||||
*
|
*
|
||||||
* Because both BDRV_SECTORS_PER_DIRTY_CHUNK and the cluster size are
|
* Because both the granularity and the cluster size are powers of two, the
|
||||||
* powers of two, the number of sectors to copy cannot exceed one cluster.
|
* number of sectors to copy cannot exceed one cluster.
|
||||||
*/
|
*/
|
||||||
sector_num = s->sector_num;
|
sector_num = s->sector_num;
|
||||||
nb_sectors = BDRV_SECTORS_PER_DIRTY_CHUNK;
|
sectors_per_chunk = nb_sectors = s->granularity >> BDRV_SECTOR_BITS;
|
||||||
chunk_num = sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK;
|
chunk_num = sector_num / sectors_per_chunk;
|
||||||
if (s->cow_bitmap && !test_bit(chunk_num, s->cow_bitmap)) {
|
if (s->cow_bitmap && !test_bit(chunk_num, s->cow_bitmap)) {
|
||||||
trace_mirror_cow(s, sector_num);
|
trace_mirror_cow(s, sector_num);
|
||||||
bdrv_round_to_clusters(s->target,
|
bdrv_round_to_clusters(s->target,
|
||||||
sector_num, BDRV_SECTORS_PER_DIRTY_CHUNK,
|
sector_num, sectors_per_chunk,
|
||||||
§or_num, &nb_sectors);
|
§or_num, &nb_sectors);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -107,8 +105,8 @@ static int coroutine_fn mirror_iteration(MirrorBlockJob *s,
|
|||||||
goto fail;
|
goto fail;
|
||||||
}
|
}
|
||||||
if (s->cow_bitmap) {
|
if (s->cow_bitmap) {
|
||||||
bitmap_set(s->cow_bitmap, sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK,
|
bitmap_set(s->cow_bitmap, sector_num / sectors_per_chunk,
|
||||||
nb_sectors / BDRV_SECTORS_PER_DIRTY_CHUNK);
|
nb_sectors / sectors_per_chunk);
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
@ -122,7 +120,7 @@ static void coroutine_fn mirror_run(void *opaque)
|
|||||||
{
|
{
|
||||||
MirrorBlockJob *s = opaque;
|
MirrorBlockJob *s = opaque;
|
||||||
BlockDriverState *bs = s->common.bs;
|
BlockDriverState *bs = s->common.bs;
|
||||||
int64_t sector_num, end, length;
|
int64_t sector_num, end, sectors_per_chunk, length;
|
||||||
BlockDriverInfo bdi;
|
BlockDriverInfo bdi;
|
||||||
char backing_filename[1024];
|
char backing_filename[1024];
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
@ -146,22 +144,23 @@ static void coroutine_fn mirror_run(void *opaque)
|
|||||||
sizeof(backing_filename));
|
sizeof(backing_filename));
|
||||||
if (backing_filename[0] && !s->target->backing_hd) {
|
if (backing_filename[0] && !s->target->backing_hd) {
|
||||||
bdrv_get_info(s->target, &bdi);
|
bdrv_get_info(s->target, &bdi);
|
||||||
if (s->buf_size < bdi.cluster_size) {
|
if (s->granularity < bdi.cluster_size) {
|
||||||
s->buf_size = bdi.cluster_size;
|
s->buf_size = bdi.cluster_size;
|
||||||
length = (bdrv_getlength(bs) + BLOCK_SIZE - 1) / BLOCK_SIZE;
|
length = (bdrv_getlength(bs) + s->granularity - 1) / s->granularity;
|
||||||
s->cow_bitmap = bitmap_new(length);
|
s->cow_bitmap = bitmap_new(length);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
end = s->common.len >> BDRV_SECTOR_BITS;
|
end = s->common.len >> BDRV_SECTOR_BITS;
|
||||||
s->buf = qemu_blockalign(bs, s->buf_size);
|
s->buf = qemu_blockalign(bs, s->buf_size);
|
||||||
|
sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS;
|
||||||
|
|
||||||
if (s->mode != MIRROR_SYNC_MODE_NONE) {
|
if (s->mode != MIRROR_SYNC_MODE_NONE) {
|
||||||
/* First part, loop on the sectors and initialize the dirty bitmap. */
|
/* First part, loop on the sectors and initialize the dirty bitmap. */
|
||||||
BlockDriverState *base;
|
BlockDriverState *base;
|
||||||
base = s->mode == MIRROR_SYNC_MODE_FULL ? NULL : bs->backing_hd;
|
base = s->mode == MIRROR_SYNC_MODE_FULL ? NULL : bs->backing_hd;
|
||||||
for (sector_num = 0; sector_num < end; ) {
|
for (sector_num = 0; sector_num < end; ) {
|
||||||
int64_t next = (sector_num | (BDRV_SECTORS_PER_DIRTY_CHUNK - 1)) + 1;
|
int64_t next = (sector_num | (sectors_per_chunk - 1)) + 1;
|
||||||
ret = bdrv_co_is_allocated_above(bs, base,
|
ret = bdrv_co_is_allocated_above(bs, base,
|
||||||
sector_num, next - sector_num, &n);
|
sector_num, next - sector_num, &n);
|
||||||
|
|
||||||
@ -242,7 +241,7 @@ static void coroutine_fn mirror_run(void *opaque)
|
|||||||
s->common.offset = (end - cnt) * BDRV_SECTOR_SIZE;
|
s->common.offset = (end - cnt) * BDRV_SECTOR_SIZE;
|
||||||
|
|
||||||
if (s->common.speed) {
|
if (s->common.speed) {
|
||||||
delay_ns = ratelimit_calculate_delay(&s->limit, BDRV_SECTORS_PER_DIRTY_CHUNK);
|
delay_ns = ratelimit_calculate_delay(&s->limit, sectors_per_chunk);
|
||||||
} else {
|
} else {
|
||||||
delay_ns = 0;
|
delay_ns = 0;
|
||||||
}
|
}
|
||||||
@ -332,7 +331,7 @@ static BlockJobType mirror_job_type = {
|
|||||||
};
|
};
|
||||||
|
|
||||||
void mirror_start(BlockDriverState *bs, BlockDriverState *target,
|
void mirror_start(BlockDriverState *bs, BlockDriverState *target,
|
||||||
int64_t speed, MirrorSyncMode mode,
|
int64_t speed, int64_t granularity, MirrorSyncMode mode,
|
||||||
BlockdevOnError on_source_error,
|
BlockdevOnError on_source_error,
|
||||||
BlockdevOnError on_target_error,
|
BlockdevOnError on_target_error,
|
||||||
BlockDriverCompletionFunc *cb,
|
BlockDriverCompletionFunc *cb,
|
||||||
@ -340,6 +339,20 @@ void mirror_start(BlockDriverState *bs, BlockDriverState *target,
|
|||||||
{
|
{
|
||||||
MirrorBlockJob *s;
|
MirrorBlockJob *s;
|
||||||
|
|
||||||
|
if (granularity == 0) {
|
||||||
|
/* Choose the default granularity based on the target file's cluster
|
||||||
|
* size, clamped between 4k and 64k. */
|
||||||
|
BlockDriverInfo bdi;
|
||||||
|
if (bdrv_get_info(target, &bdi) >= 0 && bdi.cluster_size != 0) {
|
||||||
|
granularity = MAX(4096, bdi.cluster_size);
|
||||||
|
granularity = MIN(65536, granularity);
|
||||||
|
} else {
|
||||||
|
granularity = 65536;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
assert ((granularity & (granularity - 1)) == 0);
|
||||||
|
|
||||||
if ((on_source_error == BLOCKDEV_ON_ERROR_STOP ||
|
if ((on_source_error == BLOCKDEV_ON_ERROR_STOP ||
|
||||||
on_source_error == BLOCKDEV_ON_ERROR_ENOSPC) &&
|
on_source_error == BLOCKDEV_ON_ERROR_ENOSPC) &&
|
||||||
!bdrv_iostatus_is_enabled(bs)) {
|
!bdrv_iostatus_is_enabled(bs)) {
|
||||||
@ -356,9 +369,10 @@ void mirror_start(BlockDriverState *bs, BlockDriverState *target,
|
|||||||
s->on_target_error = on_target_error;
|
s->on_target_error = on_target_error;
|
||||||
s->target = target;
|
s->target = target;
|
||||||
s->mode = mode;
|
s->mode = mode;
|
||||||
s->buf_size = BLOCK_SIZE;
|
s->granularity = granularity;
|
||||||
|
s->buf_size = granularity;
|
||||||
|
|
||||||
bdrv_set_dirty_tracking(bs, BLOCK_SIZE);
|
bdrv_set_dirty_tracking(bs, granularity);
|
||||||
bdrv_set_enable_write_cache(s->target, true);
|
bdrv_set_enable_write_cache(s->target, true);
|
||||||
bdrv_set_on_error(s->target, on_target_error, on_target_error);
|
bdrv_set_on_error(s->target, on_target_error, on_target_error);
|
||||||
bdrv_iostatus_enable(s->target);
|
bdrv_iostatus_enable(s->target);
|
||||||
|
15
blockdev.c
15
blockdev.c
@ -1193,6 +1193,7 @@ void qmp_drive_mirror(const char *device, const char *target,
|
|||||||
enum MirrorSyncMode sync,
|
enum MirrorSyncMode sync,
|
||||||
bool has_mode, enum NewImageMode mode,
|
bool has_mode, enum NewImageMode mode,
|
||||||
bool has_speed, int64_t speed,
|
bool has_speed, int64_t speed,
|
||||||
|
bool has_granularity, uint32_t granularity,
|
||||||
bool has_on_source_error, BlockdevOnError on_source_error,
|
bool has_on_source_error, BlockdevOnError on_source_error,
|
||||||
bool has_on_target_error, BlockdevOnError on_target_error,
|
bool has_on_target_error, BlockdevOnError on_target_error,
|
||||||
Error **errp)
|
Error **errp)
|
||||||
@ -1218,6 +1219,17 @@ void qmp_drive_mirror(const char *device, const char *target,
|
|||||||
if (!has_mode) {
|
if (!has_mode) {
|
||||||
mode = NEW_IMAGE_MODE_ABSOLUTE_PATHS;
|
mode = NEW_IMAGE_MODE_ABSOLUTE_PATHS;
|
||||||
}
|
}
|
||||||
|
if (!has_granularity) {
|
||||||
|
granularity = 0;
|
||||||
|
}
|
||||||
|
if (granularity != 0 && (granularity < 512 || granularity > 1048576 * 64)) {
|
||||||
|
error_set(errp, QERR_INVALID_PARAMETER, device);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (granularity & (granularity - 1)) {
|
||||||
|
error_set(errp, QERR_INVALID_PARAMETER, device);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
bs = bdrv_find(device);
|
bs = bdrv_find(device);
|
||||||
if (!bs) {
|
if (!bs) {
|
||||||
@ -1299,7 +1311,8 @@ void qmp_drive_mirror(const char *device, const char *target,
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
mirror_start(bs, target_bs, speed, sync, on_source_error, on_target_error,
|
mirror_start(bs, target_bs, speed, granularity, sync,
|
||||||
|
on_source_error, on_target_error,
|
||||||
block_job_cb, bs, &local_err);
|
block_job_cb, bs, &local_err);
|
||||||
if (local_err != NULL) {
|
if (local_err != NULL) {
|
||||||
bdrv_delete(target_bs);
|
bdrv_delete(target_bs);
|
||||||
|
2
hmp.c
2
hmp.c
@ -796,7 +796,7 @@ void hmp_drive_mirror(Monitor *mon, const QDict *qdict)
|
|||||||
|
|
||||||
qmp_drive_mirror(device, filename, !!format, format,
|
qmp_drive_mirror(device, filename, !!format, format,
|
||||||
full ? MIRROR_SYNC_MODE_FULL : MIRROR_SYNC_MODE_TOP,
|
full ? MIRROR_SYNC_MODE_FULL : MIRROR_SYNC_MODE_TOP,
|
||||||
true, mode, false, 0,
|
true, mode, false, 0, false, 0,
|
||||||
false, 0, false, 0, &errp);
|
false, 0, false, 0, &errp);
|
||||||
hmp_handle_error(mon, &errp);
|
hmp_handle_error(mon, &errp);
|
||||||
}
|
}
|
||||||
|
@ -344,6 +344,7 @@ void commit_start(BlockDriverState *bs, BlockDriverState *base,
|
|||||||
* @bs: Block device to operate on.
|
* @bs: Block device to operate on.
|
||||||
* @target: Block device to write to.
|
* @target: Block device to write to.
|
||||||
* @speed: The maximum speed, in bytes per second, or 0 for unlimited.
|
* @speed: The maximum speed, in bytes per second, or 0 for unlimited.
|
||||||
|
* @granularity: The chosen granularity for the dirty bitmap.
|
||||||
* @mode: Whether to collapse all images in the chain to the target.
|
* @mode: Whether to collapse all images in the chain to the target.
|
||||||
* @on_source_error: The action to take upon error reading from the source.
|
* @on_source_error: The action to take upon error reading from the source.
|
||||||
* @on_target_error: The action to take upon error writing to the target.
|
* @on_target_error: The action to take upon error writing to the target.
|
||||||
@ -357,7 +358,7 @@ void commit_start(BlockDriverState *bs, BlockDriverState *base,
|
|||||||
* @bs will be switched to read from @target.
|
* @bs will be switched to read from @target.
|
||||||
*/
|
*/
|
||||||
void mirror_start(BlockDriverState *bs, BlockDriverState *target,
|
void mirror_start(BlockDriverState *bs, BlockDriverState *target,
|
||||||
int64_t speed, MirrorSyncMode mode,
|
int64_t speed, int64_t granularity, MirrorSyncMode mode,
|
||||||
BlockdevOnError on_source_error,
|
BlockdevOnError on_source_error,
|
||||||
BlockdevOnError on_target_error,
|
BlockdevOnError on_target_error,
|
||||||
BlockDriverCompletionFunc *cb,
|
BlockDriverCompletionFunc *cb,
|
||||||
|
@ -1636,6 +1636,11 @@
|
|||||||
# (all the disk, only the sectors allocated in the topmost image, or
|
# (all the disk, only the sectors allocated in the topmost image, or
|
||||||
# only new I/O).
|
# only new I/O).
|
||||||
#
|
#
|
||||||
|
# @granularity: #optional granularity of the dirty bitmap, default is 64K
|
||||||
|
# if the image format doesn't have clusters, 4K if the clusters
|
||||||
|
# are smaller than that, else the cluster size. Must be a
|
||||||
|
# power of 2 between 512 and 64M (since 1.4).
|
||||||
|
#
|
||||||
# @on-source-error: #optional the action to take on an error on the source,
|
# @on-source-error: #optional the action to take on an error on the source,
|
||||||
# default 'report'. 'stop' and 'enospc' can only be used
|
# default 'report'. 'stop' and 'enospc' can only be used
|
||||||
# if the block device supports io-status (see BlockInfo).
|
# if the block device supports io-status (see BlockInfo).
|
||||||
@ -1652,7 +1657,8 @@
|
|||||||
{ 'command': 'drive-mirror',
|
{ 'command': 'drive-mirror',
|
||||||
'data': { 'device': 'str', 'target': 'str', '*format': 'str',
|
'data': { 'device': 'str', 'target': 'str', '*format': 'str',
|
||||||
'sync': 'MirrorSyncMode', '*mode': 'NewImageMode',
|
'sync': 'MirrorSyncMode', '*mode': 'NewImageMode',
|
||||||
'*speed': 'int', '*on-source-error': 'BlockdevOnError',
|
'*speed': 'int', '*granularity': 'uint32',
|
||||||
|
'*on-source-error': 'BlockdevOnError',
|
||||||
'*on-target-error': 'BlockdevOnError' } }
|
'*on-target-error': 'BlockdevOnError' } }
|
||||||
|
|
||||||
##
|
##
|
||||||
|
@ -938,7 +938,8 @@ EQMP
|
|||||||
{
|
{
|
||||||
.name = "drive-mirror",
|
.name = "drive-mirror",
|
||||||
.args_type = "sync:s,device:B,target:s,speed:i?,mode:s?,format:s?,"
|
.args_type = "sync:s,device:B,target:s,speed:i?,mode:s?,format:s?,"
|
||||||
"on-source-error:s?,on-target-error:s?",
|
"on-source-error:s?,on-target-error:s?,"
|
||||||
|
"granularity:i?",
|
||||||
.mhandler.cmd_new = qmp_marshal_input_drive_mirror,
|
.mhandler.cmd_new = qmp_marshal_input_drive_mirror,
|
||||||
},
|
},
|
||||||
|
|
||||||
@ -962,6 +963,7 @@ Arguments:
|
|||||||
file/device (NewImageMode, optional, default 'absolute-paths')
|
file/device (NewImageMode, optional, default 'absolute-paths')
|
||||||
- "speed": maximum speed of the streaming job, in bytes per second
|
- "speed": maximum speed of the streaming job, in bytes per second
|
||||||
(json-int)
|
(json-int)
|
||||||
|
- "granularity": granularity of the dirty bitmap, in bytes (json-int, optional)
|
||||||
- "sync": what parts of the disk image should be copied to the destination;
|
- "sync": what parts of the disk image should be copied to the destination;
|
||||||
possibilities include "full" for all the disk, "top" for only the sectors
|
possibilities include "full" for all the disk, "top" for only the sectors
|
||||||
allocated in the topmost image, or "none" to only replicate new I/O
|
allocated in the topmost image, or "none" to only replicate new I/O
|
||||||
@ -971,6 +973,10 @@ Arguments:
|
|||||||
- "on-target-error": the action to take on an error on the target
|
- "on-target-error": the action to take on an error on the target
|
||||||
(BlockdevOnError, default 'report')
|
(BlockdevOnError, default 'report')
|
||||||
|
|
||||||
|
The default value of the granularity is the image cluster size clamped
|
||||||
|
between 4096 and 65536, if the image format defines one. If the format
|
||||||
|
does not define a cluster size, the default value of the granularity
|
||||||
|
is 65536.
|
||||||
|
|
||||||
|
|
||||||
Example:
|
Example:
|
||||||
|
Loading…
Reference in New Issue
Block a user