Block patches for 4.1-rc0:
- The stream job no longer relies on a fixed base node - The rbd block driver can now accomodate growing formats like qcow2 -----BEGIN PGP SIGNATURE----- iQFGBAABCAAwFiEEkb62CjDbPohX0Rgp9AfbAGHVz0AFAl0aubYSHG1yZWl0ekBy ZWRoYXQuY29tAAoJEPQH2wBh1c9AglcH/1EoMnmibLg4BumWh+kPMKIXthoMSN0Z cVK8/OhpR66tAaT0OatX+CKYtfefAdX5Km6Oipa4ygALF21aON6+N2lss+K//ieK cg71CxncmgYlAS358wdJBgzCCzE06+fCY2JYLa/vRgVoKFJjXwVvlmYymuOA3RN/ htJB64tXZheyEX89B7fqBdlDAf2SEomXVVPgKp9QMPKaP+JchGdZ26VxHFXEEJLo NZuMZpUUVNSkiDkpEh6ED/6jDFFpTjLzdoNdVnRJmHpeKNEPrLQVxqyjD1bagyt/ InjEtMGQ9i1v4B1IODdYhyLas/bQsituG7Dj149le02307qcvZbLi7s= =RY+3 -----END PGP SIGNATURE----- Merge remote-tracking branch 'remotes/maxreitz/tags/pull-block-2019-07-02' into staging Block patches for 4.1-rc0: - The stream job no longer relies on a fixed base node - The rbd block driver can now accomodate growing formats like qcow2 # gpg: Signature made Tue 02 Jul 2019 02:56:06 BST # gpg: using RSA key 91BEB60A30DB3E8857D11829F407DB0061D5CF40 # gpg: issuer "mreitz@redhat.com" # gpg: Good signature from "Max Reitz <mreitz@redhat.com>" [full] # Primary key fingerprint: 91BE B60A 30DB 3E88 57D1 1829 F407 DB00 61D5 CF40 * remotes/maxreitz/tags/pull-block-2019-07-02: block/stream: introduce a bottom node block/stream: refactor stream_run: drop goto block: include base when checking image chain for block allocation block/rbd: increase dynamically the image size Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
commit
bf1b9edeb0
@ -174,7 +174,7 @@ static int coroutine_fn commit_run(Job *job, Error **errp)
|
||||
break;
|
||||
}
|
||||
/* Copy if allocated above the base */
|
||||
ret = bdrv_is_allocated_above(blk_bs(s->top), blk_bs(s->base),
|
||||
ret = bdrv_is_allocated_above(blk_bs(s->top), blk_bs(s->base), false,
|
||||
offset, COMMIT_BUFFER_SIZE, &n);
|
||||
copy = (ret == 1);
|
||||
trace_commit_one_iteration(s, offset, n, ret);
|
||||
|
21
block/io.c
21
block/io.c
@ -2295,10 +2295,11 @@ int coroutine_fn bdrv_is_allocated(BlockDriverState *bs, int64_t offset,
|
||||
/*
|
||||
* Given an image chain: ... -> [BASE] -> [INTER1] -> [INTER2] -> [TOP]
|
||||
*
|
||||
* Return true if (a prefix of) the given range is allocated in any image
|
||||
* between BASE and TOP (inclusive). BASE can be NULL to check if the given
|
||||
* offset is allocated in any image of the chain. Return false otherwise,
|
||||
* or negative errno on failure.
|
||||
* Return 1 if (a prefix of) the given range is allocated in any image
|
||||
* between BASE and TOP (BASE is only included if include_base is set).
|
||||
* BASE can be NULL to check if the given offset is allocated in any
|
||||
* image of the chain. Return 0 otherwise, or negative errno on
|
||||
* failure.
|
||||
*
|
||||
* 'pnum' is set to the number of bytes (including and immediately
|
||||
* following the specified offset) that are known to be in the same
|
||||
@ -2310,17 +2311,21 @@ int coroutine_fn bdrv_is_allocated(BlockDriverState *bs, int64_t offset,
|
||||
*/
|
||||
int bdrv_is_allocated_above(BlockDriverState *top,
|
||||
BlockDriverState *base,
|
||||
int64_t offset, int64_t bytes, int64_t *pnum)
|
||||
bool include_base, int64_t offset,
|
||||
int64_t bytes, int64_t *pnum)
|
||||
{
|
||||
BlockDriverState *intermediate;
|
||||
int ret;
|
||||
int64_t n = bytes;
|
||||
|
||||
assert(base || !include_base);
|
||||
|
||||
intermediate = top;
|
||||
while (intermediate && intermediate != base) {
|
||||
while (include_base || intermediate != base) {
|
||||
int64_t pnum_inter;
|
||||
int64_t size_inter;
|
||||
|
||||
assert(intermediate);
|
||||
ret = bdrv_is_allocated(intermediate, offset, bytes, &pnum_inter);
|
||||
if (ret < 0) {
|
||||
return ret;
|
||||
@ -2339,6 +2344,10 @@ int bdrv_is_allocated_above(BlockDriverState *top,
|
||||
n = pnum_inter;
|
||||
}
|
||||
|
||||
if (intermediate == base) {
|
||||
break;
|
||||
}
|
||||
|
||||
intermediate = backing_bs(intermediate);
|
||||
}
|
||||
|
||||
|
@ -808,7 +808,7 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s)
|
||||
return 0;
|
||||
}
|
||||
|
||||
ret = bdrv_is_allocated_above(bs, base, offset, bytes, &count);
|
||||
ret = bdrv_is_allocated_above(bs, base, false, offset, bytes, &count);
|
||||
if (ret < 0) {
|
||||
return ret;
|
||||
}
|
||||
|
@ -2148,7 +2148,8 @@ static bool is_unallocated(BlockDriverState *bs, int64_t offset, int64_t bytes)
|
||||
{
|
||||
int64_t nr;
|
||||
return !bytes ||
|
||||
(!bdrv_is_allocated_above(bs, NULL, offset, bytes, &nr) && nr == bytes);
|
||||
(!bdrv_is_allocated_above(bs, NULL, false, offset, bytes, &nr) &&
|
||||
nr == bytes);
|
||||
}
|
||||
|
||||
static bool is_zero_cow(BlockDriverState *bs, QCowL2Meta *m)
|
||||
|
42
block/rbd.c
42
block/rbd.c
@ -103,6 +103,7 @@ typedef struct BDRVRBDState {
|
||||
rbd_image_t image;
|
||||
char *image_name;
|
||||
char *snap;
|
||||
uint64_t image_size;
|
||||
} BDRVRBDState;
|
||||
|
||||
static int qemu_rbd_connect(rados_t *cluster, rados_ioctx_t *io_ctx,
|
||||
@ -778,6 +779,14 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags,
|
||||
goto failed_open;
|
||||
}
|
||||
|
||||
r = rbd_get_size(s->image, &s->image_size);
|
||||
if (r < 0) {
|
||||
error_setg_errno(errp, -r, "error getting image size from %s",
|
||||
s->image_name);
|
||||
rbd_close(s->image);
|
||||
goto failed_open;
|
||||
}
|
||||
|
||||
/* If we are using an rbd snapshot, we must be r/o, otherwise
|
||||
* leave as-is */
|
||||
if (s->snap != NULL) {
|
||||
@ -834,6 +843,22 @@ static void qemu_rbd_close(BlockDriverState *bs)
|
||||
rados_shutdown(s->cluster);
|
||||
}
|
||||
|
||||
/* Resize the RBD image and update the 'image_size' with the current size */
|
||||
static int qemu_rbd_resize(BlockDriverState *bs, uint64_t size)
|
||||
{
|
||||
BDRVRBDState *s = bs->opaque;
|
||||
int r;
|
||||
|
||||
r = rbd_resize(s->image, size);
|
||||
if (r < 0) {
|
||||
return r;
|
||||
}
|
||||
|
||||
s->image_size = size;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const AIOCBInfo rbd_aiocb_info = {
|
||||
.aiocb_size = sizeof(RBDAIOCB),
|
||||
};
|
||||
@ -935,13 +960,25 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
|
||||
}
|
||||
|
||||
switch (cmd) {
|
||||
case RBD_AIO_WRITE:
|
||||
case RBD_AIO_WRITE: {
|
||||
/*
|
||||
* RBD APIs don't allow us to write more than actual size, so in order
|
||||
* to support growing images, we resize the image before write
|
||||
* operations that exceed the current size.
|
||||
*/
|
||||
if (off + size > s->image_size) {
|
||||
r = qemu_rbd_resize(bs, off + size);
|
||||
if (r < 0) {
|
||||
goto failed_completion;
|
||||
}
|
||||
}
|
||||
#ifdef LIBRBD_SUPPORTS_IOVEC
|
||||
r = rbd_aio_writev(s->image, qiov->iov, qiov->niov, off, c);
|
||||
#else
|
||||
r = rbd_aio_write(s->image, off, size, rcb->buf, c);
|
||||
#endif
|
||||
break;
|
||||
}
|
||||
case RBD_AIO_READ:
|
||||
#ifdef LIBRBD_SUPPORTS_IOVEC
|
||||
r = rbd_aio_readv(s->image, qiov->iov, qiov->niov, off, c);
|
||||
@ -1052,7 +1089,6 @@ static int coroutine_fn qemu_rbd_co_truncate(BlockDriverState *bs,
|
||||
PreallocMode prealloc,
|
||||
Error **errp)
|
||||
{
|
||||
BDRVRBDState *s = bs->opaque;
|
||||
int r;
|
||||
|
||||
if (prealloc != PREALLOC_MODE_OFF) {
|
||||
@ -1061,7 +1097,7 @@ static int coroutine_fn qemu_rbd_co_truncate(BlockDriverState *bs,
|
||||
return -ENOTSUP;
|
||||
}
|
||||
|
||||
r = rbd_resize(s->image, offset);
|
||||
r = qemu_rbd_resize(bs, offset);
|
||||
if (r < 0) {
|
||||
error_setg_errno(errp, -r, "Failed to resize file");
|
||||
return r;
|
||||
|
@ -275,7 +275,7 @@ static coroutine_fn int replication_co_writev(BlockDriverState *bs,
|
||||
while (remaining_sectors > 0) {
|
||||
int64_t count;
|
||||
|
||||
ret = bdrv_is_allocated_above(top->bs, base->bs,
|
||||
ret = bdrv_is_allocated_above(top->bs, base->bs, false,
|
||||
sector_num * BDRV_SECTOR_SIZE,
|
||||
remaining_sectors * BDRV_SECTOR_SIZE,
|
||||
&count);
|
||||
|
@ -31,7 +31,7 @@ enum {
|
||||
|
||||
typedef struct StreamBlockJob {
|
||||
BlockJob common;
|
||||
BlockDriverState *base;
|
||||
BlockDriverState *bottom;
|
||||
BlockdevOnError on_error;
|
||||
char *backing_file_str;
|
||||
bool bs_read_only;
|
||||
@ -54,7 +54,7 @@ static void stream_abort(Job *job)
|
||||
|
||||
if (s->chain_frozen) {
|
||||
BlockJob *bjob = &s->common;
|
||||
bdrv_unfreeze_backing_chain(blk_bs(bjob->blk), s->base);
|
||||
bdrv_unfreeze_backing_chain(blk_bs(bjob->blk), s->bottom);
|
||||
}
|
||||
}
|
||||
|
||||
@ -63,11 +63,11 @@ static int stream_prepare(Job *job)
|
||||
StreamBlockJob *s = container_of(job, StreamBlockJob, common.job);
|
||||
BlockJob *bjob = &s->common;
|
||||
BlockDriverState *bs = blk_bs(bjob->blk);
|
||||
BlockDriverState *base = s->base;
|
||||
BlockDriverState *base = backing_bs(s->bottom);
|
||||
Error *local_err = NULL;
|
||||
int ret = 0;
|
||||
|
||||
bdrv_unfreeze_backing_chain(bs, base);
|
||||
bdrv_unfreeze_backing_chain(bs, s->bottom);
|
||||
s->chain_frozen = false;
|
||||
|
||||
if (bs->backing) {
|
||||
@ -110,7 +110,7 @@ static int coroutine_fn stream_run(Job *job, Error **errp)
|
||||
StreamBlockJob *s = container_of(job, StreamBlockJob, common.job);
|
||||
BlockBackend *blk = s->common.blk;
|
||||
BlockDriverState *bs = blk_bs(blk);
|
||||
BlockDriverState *base = s->base;
|
||||
bool enable_cor = !backing_bs(s->bottom);
|
||||
int64_t len;
|
||||
int64_t offset = 0;
|
||||
uint64_t delay_ns = 0;
|
||||
@ -119,14 +119,14 @@ static int coroutine_fn stream_run(Job *job, Error **errp)
|
||||
int64_t n = 0; /* bytes */
|
||||
void *buf;
|
||||
|
||||
if (!bs->backing) {
|
||||
goto out;
|
||||
if (bs == s->bottom) {
|
||||
/* Nothing to stream */
|
||||
return 0;
|
||||
}
|
||||
|
||||
len = bdrv_getlength(bs);
|
||||
if (len < 0) {
|
||||
ret = len;
|
||||
goto out;
|
||||
return len;
|
||||
}
|
||||
job_progress_set_remaining(&s->common.job, len);
|
||||
|
||||
@ -137,7 +137,7 @@ static int coroutine_fn stream_run(Job *job, Error **errp)
|
||||
* backing chain since the copy-on-read operation does not take base into
|
||||
* account.
|
||||
*/
|
||||
if (!base) {
|
||||
if (enable_cor) {
|
||||
bdrv_enable_copy_on_read(bs);
|
||||
}
|
||||
|
||||
@ -160,9 +160,8 @@ static int coroutine_fn stream_run(Job *job, Error **errp)
|
||||
} else if (ret >= 0) {
|
||||
/* Copy if allocated in the intermediate images. Limit to the
|
||||
* known-unallocated area [offset, offset+n*BDRV_SECTOR_SIZE). */
|
||||
ret = bdrv_is_allocated_above(backing_bs(bs), base,
|
||||
ret = bdrv_is_allocated_above(backing_bs(bs), s->bottom, true,
|
||||
offset, n, &n);
|
||||
|
||||
/* Finish early if end of backing file has been reached */
|
||||
if (ret == 0 && n == 0) {
|
||||
n = len - offset;
|
||||
@ -199,18 +198,14 @@ static int coroutine_fn stream_run(Job *job, Error **errp)
|
||||
}
|
||||
}
|
||||
|
||||
if (!base) {
|
||||
if (enable_cor) {
|
||||
bdrv_disable_copy_on_read(bs);
|
||||
}
|
||||
|
||||
/* Do not remove the backing file if an error was there but ignored. */
|
||||
ret = error;
|
||||
|
||||
qemu_vfree(buf);
|
||||
|
||||
out:
|
||||
/* Modify backing chain and close BDSes in main loop */
|
||||
return ret;
|
||||
/* Do not remove the backing file if an error was there but ignored. */
|
||||
return error;
|
||||
}
|
||||
|
||||
static const BlockJobDriver stream_job_driver = {
|
||||
@ -235,8 +230,10 @@ void stream_start(const char *job_id, BlockDriverState *bs,
|
||||
StreamBlockJob *s;
|
||||
BlockDriverState *iter;
|
||||
bool bs_read_only;
|
||||
int basic_flags = BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED;
|
||||
BlockDriverState *bottom = bdrv_find_overlay(bs, base);
|
||||
|
||||
if (bdrv_freeze_backing_chain(bs, base, errp) < 0) {
|
||||
if (bdrv_freeze_backing_chain(bs, bottom, errp) < 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
@ -253,10 +250,8 @@ void stream_start(const char *job_id, BlockDriverState *bs,
|
||||
* already have our own plans. Also don't allow resize as the image size is
|
||||
* queried only at the job start and then cached. */
|
||||
s = block_job_create(job_id, &stream_job_driver, NULL, bs,
|
||||
BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED |
|
||||
BLK_PERM_GRAPH_MOD,
|
||||
BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED |
|
||||
BLK_PERM_WRITE,
|
||||
basic_flags | BLK_PERM_GRAPH_MOD,
|
||||
basic_flags | BLK_PERM_WRITE,
|
||||
speed, creation_flags, NULL, NULL, errp);
|
||||
if (!s) {
|
||||
goto fail;
|
||||
@ -264,15 +259,18 @@ void stream_start(const char *job_id, BlockDriverState *bs,
|
||||
|
||||
/* Block all intermediate nodes between bs and base, because they will
|
||||
* disappear from the chain after this operation. The streaming job reads
|
||||
* every block only once, assuming that it doesn't change, so block writes
|
||||
* and resizes. */
|
||||
* every block only once, assuming that it doesn't change, so forbid writes
|
||||
* and resizes. Reassign the base node pointer because the backing BS of the
|
||||
* bottom node might change after the call to bdrv_reopen_set_read_only()
|
||||
* due to parallel block jobs running.
|
||||
*/
|
||||
base = backing_bs(bottom);
|
||||
for (iter = backing_bs(bs); iter && iter != base; iter = backing_bs(iter)) {
|
||||
block_job_add_bdrv(&s->common, "intermediate node", iter, 0,
|
||||
BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED,
|
||||
&error_abort);
|
||||
basic_flags, &error_abort);
|
||||
}
|
||||
|
||||
s->base = base;
|
||||
s->bottom = bottom;
|
||||
s->backing_file_str = g_strdup(backing_file_str);
|
||||
s->bs_read_only = bs_read_only;
|
||||
s->chain_frozen = true;
|
||||
|
@ -449,7 +449,8 @@ int bdrv_block_status_above(BlockDriverState *bs, BlockDriverState *base,
|
||||
int bdrv_is_allocated(BlockDriverState *bs, int64_t offset, int64_t bytes,
|
||||
int64_t *pnum);
|
||||
int bdrv_is_allocated_above(BlockDriverState *top, BlockDriverState *base,
|
||||
int64_t offset, int64_t bytes, int64_t *pnum);
|
||||
bool include_base, int64_t offset, int64_t bytes,
|
||||
int64_t *pnum);
|
||||
|
||||
bool bdrv_is_read_only(BlockDriverState *bs);
|
||||
int bdrv_can_set_read_only(BlockDriverState *bs, bool read_only,
|
||||
|
@ -3518,7 +3518,7 @@ static int img_rebase(int argc, char **argv)
|
||||
* to take action
|
||||
*/
|
||||
ret = bdrv_is_allocated_above(backing_bs(bs), prefix_chain_bs,
|
||||
offset, n, &n);
|
||||
false, offset, n, &n);
|
||||
if (ret < 0) {
|
||||
error_report("error while reading image metadata: %s",
|
||||
strerror(-ret));
|
||||
|
@ -866,9 +866,9 @@ class TestBlockdevReopen(iotests.QMPTestCase):
|
||||
auto_finalize = False)
|
||||
self.assert_qmp(result, 'return', {})
|
||||
|
||||
# We can't remove hd2 while the stream job is ongoing
|
||||
# We can remove hd2 while the stream job is ongoing
|
||||
opts['backing']['backing'] = None
|
||||
self.reopen(opts, {}, "Cannot change 'backing' link from 'hd1' to 'hd2'")
|
||||
self.reopen(opts, {})
|
||||
|
||||
# We can't remove hd1 while the stream job is ongoing
|
||||
opts['backing'] = None
|
||||
|
Loading…
Reference in New Issue
Block a user