00e30f05de
Drop write notifiers and use filter node instead. = Changes = 1. Add filter-node-name argument for backup qmp api. We have to do it in this commit, as 257 needs to be fixed. 2. There are no more write notifiers here, so is_write_notifier parameter is dropped from block-copy paths. 3. To sync with in-flight requests at job finish we now have drained removing of the filter, we don't need rw-lock. 4. Block-copy is now using BdrvChildren instead of BlockBackends 5. As backup-top owns these children, we also move block-copy state into backup-top's ownership. = Iotest changes = 56: op-blocker doesn't shoot now, as we set it on source, but then check on filter, when trying to start second backup. To keep the test we instead can catch another collision: both jobs will get 'drive0' job-id, as job-id parameter is unspecified. To prevent interleaving with file-posix locks (as they are dependent on config) let's use another target for second backup. Also, it's obvious now that we'd like to drop this op-blocker at all and add a test-case for two backups from one node (to different destinations) actually works. But not in these series. 141: Output changed: prepatch, "Node is in use" comes from bdrv_has_blk check inside qmp_blockdev_del. But we've dropped block-copy blk objects, so no more blk objects on source bs (job blk is on backup-top filter bs). New message is from op-blocker, which is the next check in qmp_blockdev_add. 257: The test wants to emulate guest write during backup. They should go to filter node, not to original source node, of course. Therefore we need to specify filter node name and use it. Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> Message-id: 20191001131409.14202-6-vsementsov@virtuozzo.com Reviewed-by: Max Reitz <mreitz@redhat.com> Signed-off-by: Max Reitz <mreitz@redhat.com>
346 lines
10 KiB
C
346 lines
10 KiB
C
/*
|
|
* block_copy API
|
|
*
|
|
* Copyright (C) 2013 Proxmox Server Solutions
|
|
* Copyright (c) 2019 Virtuozzo International GmbH.
|
|
*
|
|
* Authors:
|
|
* Dietmar Maurer (dietmar@proxmox.com)
|
|
* Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
|
|
*
|
|
* This work is licensed under the terms of the GNU GPL, version 2 or later.
|
|
* See the COPYING file in the top-level directory.
|
|
*/
|
|
|
|
#include "qemu/osdep.h"
|
|
|
|
#include "trace.h"
|
|
#include "qapi/error.h"
|
|
#include "block/block-copy.h"
|
|
#include "sysemu/block-backend.h"
|
|
|
|
static void coroutine_fn block_copy_wait_inflight_reqs(BlockCopyState *s,
|
|
int64_t start,
|
|
int64_t end)
|
|
{
|
|
BlockCopyInFlightReq *req;
|
|
bool waited;
|
|
|
|
do {
|
|
waited = false;
|
|
QLIST_FOREACH(req, &s->inflight_reqs, list) {
|
|
if (end > req->start_byte && start < req->end_byte) {
|
|
qemu_co_queue_wait(&req->wait_queue, NULL);
|
|
waited = true;
|
|
break;
|
|
}
|
|
}
|
|
} while (waited);
|
|
}
|
|
|
|
static void block_copy_inflight_req_begin(BlockCopyState *s,
|
|
BlockCopyInFlightReq *req,
|
|
int64_t start, int64_t end)
|
|
{
|
|
req->start_byte = start;
|
|
req->end_byte = end;
|
|
qemu_co_queue_init(&req->wait_queue);
|
|
QLIST_INSERT_HEAD(&s->inflight_reqs, req, list);
|
|
}
|
|
|
|
static void coroutine_fn block_copy_inflight_req_end(BlockCopyInFlightReq *req)
|
|
{
|
|
QLIST_REMOVE(req, list);
|
|
qemu_co_queue_restart_all(&req->wait_queue);
|
|
}
|
|
|
|
void block_copy_state_free(BlockCopyState *s)
|
|
{
|
|
if (!s) {
|
|
return;
|
|
}
|
|
|
|
bdrv_release_dirty_bitmap(s->source->bs, s->copy_bitmap);
|
|
g_free(s);
|
|
}
|
|
|
|
BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
|
|
int64_t cluster_size,
|
|
BdrvRequestFlags write_flags, Error **errp)
|
|
{
|
|
BlockCopyState *s;
|
|
BdrvDirtyBitmap *copy_bitmap;
|
|
uint32_t max_transfer =
|
|
MIN_NON_ZERO(INT_MAX, MIN_NON_ZERO(source->bs->bl.max_transfer,
|
|
target->bs->bl.max_transfer));
|
|
|
|
copy_bitmap = bdrv_create_dirty_bitmap(source->bs, cluster_size, NULL,
|
|
errp);
|
|
if (!copy_bitmap) {
|
|
return NULL;
|
|
}
|
|
bdrv_disable_dirty_bitmap(copy_bitmap);
|
|
|
|
s = g_new(BlockCopyState, 1);
|
|
*s = (BlockCopyState) {
|
|
.source = source,
|
|
.target = target,
|
|
.copy_bitmap = copy_bitmap,
|
|
.cluster_size = cluster_size,
|
|
.len = bdrv_dirty_bitmap_size(copy_bitmap),
|
|
.write_flags = write_flags,
|
|
};
|
|
|
|
s->copy_range_size = QEMU_ALIGN_DOWN(max_transfer, cluster_size),
|
|
/*
|
|
* Set use_copy_range, consider the following:
|
|
* 1. Compression is not supported for copy_range.
|
|
* 2. copy_range does not respect max_transfer (it's a TODO), so we factor
|
|
* that in here. If max_transfer is smaller than the job->cluster_size,
|
|
* we do not use copy_range (in that case it's zero after aligning down
|
|
* above).
|
|
*/
|
|
s->use_copy_range =
|
|
!(write_flags & BDRV_REQ_WRITE_COMPRESSED) && s->copy_range_size > 0;
|
|
|
|
QLIST_INIT(&s->inflight_reqs);
|
|
|
|
return s;
|
|
}
|
|
|
|
void block_copy_set_callbacks(
|
|
BlockCopyState *s,
|
|
ProgressBytesCallbackFunc progress_bytes_callback,
|
|
ProgressResetCallbackFunc progress_reset_callback,
|
|
void *progress_opaque)
|
|
{
|
|
s->progress_bytes_callback = progress_bytes_callback;
|
|
s->progress_reset_callback = progress_reset_callback;
|
|
s->progress_opaque = progress_opaque;
|
|
}
|
|
|
|
/*
|
|
* Copy range to target with a bounce buffer and return the bytes copied. If
|
|
* error occurred, return a negative error number
|
|
*/
|
|
static int coroutine_fn block_copy_with_bounce_buffer(BlockCopyState *s,
|
|
int64_t start,
|
|
int64_t end,
|
|
bool *error_is_read,
|
|
void **bounce_buffer)
|
|
{
|
|
int ret;
|
|
int nbytes;
|
|
|
|
assert(QEMU_IS_ALIGNED(start, s->cluster_size));
|
|
bdrv_reset_dirty_bitmap(s->copy_bitmap, start, s->cluster_size);
|
|
nbytes = MIN(s->cluster_size, s->len - start);
|
|
if (!*bounce_buffer) {
|
|
*bounce_buffer = qemu_blockalign(s->source->bs, s->cluster_size);
|
|
}
|
|
|
|
ret = bdrv_co_pread(s->source, start, nbytes, *bounce_buffer, 0);
|
|
if (ret < 0) {
|
|
trace_block_copy_with_bounce_buffer_read_fail(s, start, ret);
|
|
if (error_is_read) {
|
|
*error_is_read = true;
|
|
}
|
|
goto fail;
|
|
}
|
|
|
|
ret = bdrv_co_pwrite(s->target, start, nbytes, *bounce_buffer,
|
|
s->write_flags);
|
|
if (ret < 0) {
|
|
trace_block_copy_with_bounce_buffer_write_fail(s, start, ret);
|
|
if (error_is_read) {
|
|
*error_is_read = false;
|
|
}
|
|
goto fail;
|
|
}
|
|
|
|
return nbytes;
|
|
fail:
|
|
bdrv_set_dirty_bitmap(s->copy_bitmap, start, s->cluster_size);
|
|
return ret;
|
|
|
|
}
|
|
|
|
/*
|
|
* Copy range to target and return the bytes copied. If error occurred, return a
|
|
* negative error number.
|
|
*/
|
|
static int coroutine_fn block_copy_with_offload(BlockCopyState *s,
|
|
int64_t start,
|
|
int64_t end)
|
|
{
|
|
int ret;
|
|
int nr_clusters;
|
|
int nbytes;
|
|
|
|
assert(QEMU_IS_ALIGNED(s->copy_range_size, s->cluster_size));
|
|
assert(QEMU_IS_ALIGNED(start, s->cluster_size));
|
|
nbytes = MIN(s->copy_range_size, MIN(end, s->len) - start);
|
|
nr_clusters = DIV_ROUND_UP(nbytes, s->cluster_size);
|
|
bdrv_reset_dirty_bitmap(s->copy_bitmap, start,
|
|
s->cluster_size * nr_clusters);
|
|
ret = bdrv_co_copy_range(s->source, start, s->target, start, nbytes,
|
|
0, s->write_flags);
|
|
if (ret < 0) {
|
|
trace_block_copy_with_offload_fail(s, start, ret);
|
|
bdrv_set_dirty_bitmap(s->copy_bitmap, start,
|
|
s->cluster_size * nr_clusters);
|
|
return ret;
|
|
}
|
|
|
|
return nbytes;
|
|
}
|
|
|
|
/*
|
|
* Check if the cluster starting at offset is allocated or not.
|
|
* return via pnum the number of contiguous clusters sharing this allocation.
|
|
*/
|
|
static int block_copy_is_cluster_allocated(BlockCopyState *s, int64_t offset,
|
|
int64_t *pnum)
|
|
{
|
|
BlockDriverState *bs = s->source->bs;
|
|
int64_t count, total_count = 0;
|
|
int64_t bytes = s->len - offset;
|
|
int ret;
|
|
|
|
assert(QEMU_IS_ALIGNED(offset, s->cluster_size));
|
|
|
|
while (true) {
|
|
ret = bdrv_is_allocated(bs, offset, bytes, &count);
|
|
if (ret < 0) {
|
|
return ret;
|
|
}
|
|
|
|
total_count += count;
|
|
|
|
if (ret || count == 0) {
|
|
/*
|
|
* ret: partial segment(s) are considered allocated.
|
|
* otherwise: unallocated tail is treated as an entire segment.
|
|
*/
|
|
*pnum = DIV_ROUND_UP(total_count, s->cluster_size);
|
|
return ret;
|
|
}
|
|
|
|
/* Unallocated segment(s) with uncertain following segment(s) */
|
|
if (total_count >= s->cluster_size) {
|
|
*pnum = total_count / s->cluster_size;
|
|
return 0;
|
|
}
|
|
|
|
offset += count;
|
|
bytes -= count;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Reset bits in copy_bitmap starting at offset if they represent unallocated
|
|
* data in the image. May reset subsequent contiguous bits.
|
|
* @return 0 when the cluster at @offset was unallocated,
|
|
* 1 otherwise, and -ret on error.
|
|
*/
|
|
int64_t block_copy_reset_unallocated(BlockCopyState *s,
|
|
int64_t offset, int64_t *count)
|
|
{
|
|
int ret;
|
|
int64_t clusters, bytes;
|
|
|
|
ret = block_copy_is_cluster_allocated(s, offset, &clusters);
|
|
if (ret < 0) {
|
|
return ret;
|
|
}
|
|
|
|
bytes = clusters * s->cluster_size;
|
|
|
|
if (!ret) {
|
|
bdrv_reset_dirty_bitmap(s->copy_bitmap, offset, bytes);
|
|
s->progress_reset_callback(s->progress_opaque);
|
|
}
|
|
|
|
*count = bytes;
|
|
return ret;
|
|
}
|
|
|
|
int coroutine_fn block_copy(BlockCopyState *s,
|
|
int64_t start, uint64_t bytes,
|
|
bool *error_is_read)
|
|
{
|
|
int ret = 0;
|
|
int64_t end = bytes + start; /* bytes */
|
|
void *bounce_buffer = NULL;
|
|
int64_t status_bytes;
|
|
BlockCopyInFlightReq req;
|
|
|
|
/*
|
|
* block_copy() user is responsible for keeping source and target in same
|
|
* aio context
|
|
*/
|
|
assert(bdrv_get_aio_context(s->source->bs) ==
|
|
bdrv_get_aio_context(s->target->bs));
|
|
|
|
assert(QEMU_IS_ALIGNED(start, s->cluster_size));
|
|
assert(QEMU_IS_ALIGNED(end, s->cluster_size));
|
|
|
|
block_copy_wait_inflight_reqs(s, start, bytes);
|
|
block_copy_inflight_req_begin(s, &req, start, end);
|
|
|
|
while (start < end) {
|
|
int64_t dirty_end;
|
|
|
|
if (!bdrv_dirty_bitmap_get(s->copy_bitmap, start)) {
|
|
trace_block_copy_skip(s, start);
|
|
start += s->cluster_size;
|
|
continue; /* already copied */
|
|
}
|
|
|
|
dirty_end = bdrv_dirty_bitmap_next_zero(s->copy_bitmap, start,
|
|
(end - start));
|
|
if (dirty_end < 0) {
|
|
dirty_end = end;
|
|
}
|
|
|
|
if (s->skip_unallocated) {
|
|
ret = block_copy_reset_unallocated(s, start, &status_bytes);
|
|
if (ret == 0) {
|
|
trace_block_copy_skip_range(s, start, status_bytes);
|
|
start += status_bytes;
|
|
continue;
|
|
}
|
|
/* Clamp to known allocated region */
|
|
dirty_end = MIN(dirty_end, start + status_bytes);
|
|
}
|
|
|
|
trace_block_copy_process(s, start);
|
|
|
|
if (s->use_copy_range) {
|
|
ret = block_copy_with_offload(s, start, dirty_end);
|
|
if (ret < 0) {
|
|
s->use_copy_range = false;
|
|
}
|
|
}
|
|
if (!s->use_copy_range) {
|
|
ret = block_copy_with_bounce_buffer(s, start, dirty_end,
|
|
error_is_read, &bounce_buffer);
|
|
}
|
|
if (ret < 0) {
|
|
break;
|
|
}
|
|
|
|
start += ret;
|
|
s->progress_bytes_callback(ret, s->progress_opaque);
|
|
ret = 0;
|
|
}
|
|
|
|
if (bounce_buffer) {
|
|
qemu_vfree(bounce_buffer);
|
|
}
|
|
|
|
block_copy_inflight_req_end(&req);
|
|
|
|
return ret;
|
|
}
|