Block patches for 4.1-rc0:

- The stream job no longer relies on a fixed base node
 - The rbd block driver can now accomodate growing formats like qcow2
 -----BEGIN PGP SIGNATURE-----
 
 iQFGBAABCAAwFiEEkb62CjDbPohX0Rgp9AfbAGHVz0AFAl0aubYSHG1yZWl0ekBy
 ZWRoYXQuY29tAAoJEPQH2wBh1c9AglcH/1EoMnmibLg4BumWh+kPMKIXthoMSN0Z
 cVK8/OhpR66tAaT0OatX+CKYtfefAdX5Km6Oipa4ygALF21aON6+N2lss+K//ieK
 cg71CxncmgYlAS358wdJBgzCCzE06+fCY2JYLa/vRgVoKFJjXwVvlmYymuOA3RN/
 htJB64tXZheyEX89B7fqBdlDAf2SEomXVVPgKp9QMPKaP+JchGdZ26VxHFXEEJLo
 NZuMZpUUVNSkiDkpEh6ED/6jDFFpTjLzdoNdVnRJmHpeKNEPrLQVxqyjD1bagyt/
 InjEtMGQ9i1v4B1IODdYhyLas/bQsituG7Dj149le02307qcvZbLi7s=
 =RY+3
 -----END PGP SIGNATURE-----

Merge remote-tracking branch 'remotes/maxreitz/tags/pull-block-2019-07-02' into staging

Block patches for 4.1-rc0:
- The stream job no longer relies on a fixed base node
- The rbd block driver can now accomodate growing formats like qcow2

# gpg: Signature made Tue 02 Jul 2019 02:56:06 BST
# gpg:                using RSA key 91BEB60A30DB3E8857D11829F407DB0061D5CF40
# gpg:                issuer "mreitz@redhat.com"
# gpg: Good signature from "Max Reitz <mreitz@redhat.com>" [full]
# Primary key fingerprint: 91BE B60A 30DB 3E88 57D1  1829 F407 DB00 61D5 CF40

* remotes/maxreitz/tags/pull-block-2019-07-02:
  block/stream: introduce a bottom node
  block/stream: refactor stream_run: drop goto
  block: include base when checking image chain for block allocation
  block/rbd: increase dynamically the image size

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
Peter Maydell 2019-07-02 17:41:01 +01:00
commit bf1b9edeb0
10 changed files with 91 additions and 46 deletions

View File

@ -174,7 +174,7 @@ static int coroutine_fn commit_run(Job *job, Error **errp)
break; break;
} }
/* Copy if allocated above the base */ /* Copy if allocated above the base */
ret = bdrv_is_allocated_above(blk_bs(s->top), blk_bs(s->base), ret = bdrv_is_allocated_above(blk_bs(s->top), blk_bs(s->base), false,
offset, COMMIT_BUFFER_SIZE, &n); offset, COMMIT_BUFFER_SIZE, &n);
copy = (ret == 1); copy = (ret == 1);
trace_commit_one_iteration(s, offset, n, ret); trace_commit_one_iteration(s, offset, n, ret);

View File

@ -2295,10 +2295,11 @@ int coroutine_fn bdrv_is_allocated(BlockDriverState *bs, int64_t offset,
/* /*
* Given an image chain: ... -> [BASE] -> [INTER1] -> [INTER2] -> [TOP] * Given an image chain: ... -> [BASE] -> [INTER1] -> [INTER2] -> [TOP]
* *
* Return true if (a prefix of) the given range is allocated in any image * Return 1 if (a prefix of) the given range is allocated in any image
* between BASE and TOP (inclusive). BASE can be NULL to check if the given * between BASE and TOP (BASE is only included if include_base is set).
* offset is allocated in any image of the chain. Return false otherwise, * BASE can be NULL to check if the given offset is allocated in any
* or negative errno on failure. * image of the chain. Return 0 otherwise, or negative errno on
* failure.
* *
* 'pnum' is set to the number of bytes (including and immediately * 'pnum' is set to the number of bytes (including and immediately
* following the specified offset) that are known to be in the same * following the specified offset) that are known to be in the same
@ -2310,17 +2311,21 @@ int coroutine_fn bdrv_is_allocated(BlockDriverState *bs, int64_t offset,
*/ */
int bdrv_is_allocated_above(BlockDriverState *top, int bdrv_is_allocated_above(BlockDriverState *top,
BlockDriverState *base, BlockDriverState *base,
int64_t offset, int64_t bytes, int64_t *pnum) bool include_base, int64_t offset,
int64_t bytes, int64_t *pnum)
{ {
BlockDriverState *intermediate; BlockDriverState *intermediate;
int ret; int ret;
int64_t n = bytes; int64_t n = bytes;
assert(base || !include_base);
intermediate = top; intermediate = top;
while (intermediate && intermediate != base) { while (include_base || intermediate != base) {
int64_t pnum_inter; int64_t pnum_inter;
int64_t size_inter; int64_t size_inter;
assert(intermediate);
ret = bdrv_is_allocated(intermediate, offset, bytes, &pnum_inter); ret = bdrv_is_allocated(intermediate, offset, bytes, &pnum_inter);
if (ret < 0) { if (ret < 0) {
return ret; return ret;
@ -2339,6 +2344,10 @@ int bdrv_is_allocated_above(BlockDriverState *top,
n = pnum_inter; n = pnum_inter;
} }
if (intermediate == base) {
break;
}
intermediate = backing_bs(intermediate); intermediate = backing_bs(intermediate);
} }

View File

@ -808,7 +808,7 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s)
return 0; return 0;
} }
ret = bdrv_is_allocated_above(bs, base, offset, bytes, &count); ret = bdrv_is_allocated_above(bs, base, false, offset, bytes, &count);
if (ret < 0) { if (ret < 0) {
return ret; return ret;
} }

View File

@ -2148,7 +2148,8 @@ static bool is_unallocated(BlockDriverState *bs, int64_t offset, int64_t bytes)
{ {
int64_t nr; int64_t nr;
return !bytes || return !bytes ||
(!bdrv_is_allocated_above(bs, NULL, offset, bytes, &nr) && nr == bytes); (!bdrv_is_allocated_above(bs, NULL, false, offset, bytes, &nr) &&
nr == bytes);
} }
static bool is_zero_cow(BlockDriverState *bs, QCowL2Meta *m) static bool is_zero_cow(BlockDriverState *bs, QCowL2Meta *m)

View File

@ -103,6 +103,7 @@ typedef struct BDRVRBDState {
rbd_image_t image; rbd_image_t image;
char *image_name; char *image_name;
char *snap; char *snap;
uint64_t image_size;
} BDRVRBDState; } BDRVRBDState;
static int qemu_rbd_connect(rados_t *cluster, rados_ioctx_t *io_ctx, static int qemu_rbd_connect(rados_t *cluster, rados_ioctx_t *io_ctx,
@ -778,6 +779,14 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags,
goto failed_open; goto failed_open;
} }
r = rbd_get_size(s->image, &s->image_size);
if (r < 0) {
error_setg_errno(errp, -r, "error getting image size from %s",
s->image_name);
rbd_close(s->image);
goto failed_open;
}
/* If we are using an rbd snapshot, we must be r/o, otherwise /* If we are using an rbd snapshot, we must be r/o, otherwise
* leave as-is */ * leave as-is */
if (s->snap != NULL) { if (s->snap != NULL) {
@ -834,6 +843,22 @@ static void qemu_rbd_close(BlockDriverState *bs)
rados_shutdown(s->cluster); rados_shutdown(s->cluster);
} }
/* Resize the RBD image and update the 'image_size' with the current size */
static int qemu_rbd_resize(BlockDriverState *bs, uint64_t size)
{
BDRVRBDState *s = bs->opaque;
int r;
r = rbd_resize(s->image, size);
if (r < 0) {
return r;
}
s->image_size = size;
return 0;
}
static const AIOCBInfo rbd_aiocb_info = { static const AIOCBInfo rbd_aiocb_info = {
.aiocb_size = sizeof(RBDAIOCB), .aiocb_size = sizeof(RBDAIOCB),
}; };
@ -935,13 +960,25 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
} }
switch (cmd) { switch (cmd) {
case RBD_AIO_WRITE: case RBD_AIO_WRITE: {
/*
* RBD APIs don't allow us to write more than actual size, so in order
* to support growing images, we resize the image before write
* operations that exceed the current size.
*/
if (off + size > s->image_size) {
r = qemu_rbd_resize(bs, off + size);
if (r < 0) {
goto failed_completion;
}
}
#ifdef LIBRBD_SUPPORTS_IOVEC #ifdef LIBRBD_SUPPORTS_IOVEC
r = rbd_aio_writev(s->image, qiov->iov, qiov->niov, off, c); r = rbd_aio_writev(s->image, qiov->iov, qiov->niov, off, c);
#else #else
r = rbd_aio_write(s->image, off, size, rcb->buf, c); r = rbd_aio_write(s->image, off, size, rcb->buf, c);
#endif #endif
break; break;
}
case RBD_AIO_READ: case RBD_AIO_READ:
#ifdef LIBRBD_SUPPORTS_IOVEC #ifdef LIBRBD_SUPPORTS_IOVEC
r = rbd_aio_readv(s->image, qiov->iov, qiov->niov, off, c); r = rbd_aio_readv(s->image, qiov->iov, qiov->niov, off, c);
@ -1052,7 +1089,6 @@ static int coroutine_fn qemu_rbd_co_truncate(BlockDriverState *bs,
PreallocMode prealloc, PreallocMode prealloc,
Error **errp) Error **errp)
{ {
BDRVRBDState *s = bs->opaque;
int r; int r;
if (prealloc != PREALLOC_MODE_OFF) { if (prealloc != PREALLOC_MODE_OFF) {
@ -1061,7 +1097,7 @@ static int coroutine_fn qemu_rbd_co_truncate(BlockDriverState *bs,
return -ENOTSUP; return -ENOTSUP;
} }
r = rbd_resize(s->image, offset); r = qemu_rbd_resize(bs, offset);
if (r < 0) { if (r < 0) {
error_setg_errno(errp, -r, "Failed to resize file"); error_setg_errno(errp, -r, "Failed to resize file");
return r; return r;

View File

@ -275,7 +275,7 @@ static coroutine_fn int replication_co_writev(BlockDriverState *bs,
while (remaining_sectors > 0) { while (remaining_sectors > 0) {
int64_t count; int64_t count;
ret = bdrv_is_allocated_above(top->bs, base->bs, ret = bdrv_is_allocated_above(top->bs, base->bs, false,
sector_num * BDRV_SECTOR_SIZE, sector_num * BDRV_SECTOR_SIZE,
remaining_sectors * BDRV_SECTOR_SIZE, remaining_sectors * BDRV_SECTOR_SIZE,
&count); &count);

View File

@ -31,7 +31,7 @@ enum {
typedef struct StreamBlockJob { typedef struct StreamBlockJob {
BlockJob common; BlockJob common;
BlockDriverState *base; BlockDriverState *bottom;
BlockdevOnError on_error; BlockdevOnError on_error;
char *backing_file_str; char *backing_file_str;
bool bs_read_only; bool bs_read_only;
@ -54,7 +54,7 @@ static void stream_abort(Job *job)
if (s->chain_frozen) { if (s->chain_frozen) {
BlockJob *bjob = &s->common; BlockJob *bjob = &s->common;
bdrv_unfreeze_backing_chain(blk_bs(bjob->blk), s->base); bdrv_unfreeze_backing_chain(blk_bs(bjob->blk), s->bottom);
} }
} }
@ -63,11 +63,11 @@ static int stream_prepare(Job *job)
StreamBlockJob *s = container_of(job, StreamBlockJob, common.job); StreamBlockJob *s = container_of(job, StreamBlockJob, common.job);
BlockJob *bjob = &s->common; BlockJob *bjob = &s->common;
BlockDriverState *bs = blk_bs(bjob->blk); BlockDriverState *bs = blk_bs(bjob->blk);
BlockDriverState *base = s->base; BlockDriverState *base = backing_bs(s->bottom);
Error *local_err = NULL; Error *local_err = NULL;
int ret = 0; int ret = 0;
bdrv_unfreeze_backing_chain(bs, base); bdrv_unfreeze_backing_chain(bs, s->bottom);
s->chain_frozen = false; s->chain_frozen = false;
if (bs->backing) { if (bs->backing) {
@ -110,7 +110,7 @@ static int coroutine_fn stream_run(Job *job, Error **errp)
StreamBlockJob *s = container_of(job, StreamBlockJob, common.job); StreamBlockJob *s = container_of(job, StreamBlockJob, common.job);
BlockBackend *blk = s->common.blk; BlockBackend *blk = s->common.blk;
BlockDriverState *bs = blk_bs(blk); BlockDriverState *bs = blk_bs(blk);
BlockDriverState *base = s->base; bool enable_cor = !backing_bs(s->bottom);
int64_t len; int64_t len;
int64_t offset = 0; int64_t offset = 0;
uint64_t delay_ns = 0; uint64_t delay_ns = 0;
@ -119,14 +119,14 @@ static int coroutine_fn stream_run(Job *job, Error **errp)
int64_t n = 0; /* bytes */ int64_t n = 0; /* bytes */
void *buf; void *buf;
if (!bs->backing) { if (bs == s->bottom) {
goto out; /* Nothing to stream */
return 0;
} }
len = bdrv_getlength(bs); len = bdrv_getlength(bs);
if (len < 0) { if (len < 0) {
ret = len; return len;
goto out;
} }
job_progress_set_remaining(&s->common.job, len); job_progress_set_remaining(&s->common.job, len);
@ -137,7 +137,7 @@ static int coroutine_fn stream_run(Job *job, Error **errp)
* backing chain since the copy-on-read operation does not take base into * backing chain since the copy-on-read operation does not take base into
* account. * account.
*/ */
if (!base) { if (enable_cor) {
bdrv_enable_copy_on_read(bs); bdrv_enable_copy_on_read(bs);
} }
@ -160,9 +160,8 @@ static int coroutine_fn stream_run(Job *job, Error **errp)
} else if (ret >= 0) { } else if (ret >= 0) {
/* Copy if allocated in the intermediate images. Limit to the /* Copy if allocated in the intermediate images. Limit to the
* known-unallocated area [offset, offset+n*BDRV_SECTOR_SIZE). */ * known-unallocated area [offset, offset+n*BDRV_SECTOR_SIZE). */
ret = bdrv_is_allocated_above(backing_bs(bs), base, ret = bdrv_is_allocated_above(backing_bs(bs), s->bottom, true,
offset, n, &n); offset, n, &n);
/* Finish early if end of backing file has been reached */ /* Finish early if end of backing file has been reached */
if (ret == 0 && n == 0) { if (ret == 0 && n == 0) {
n = len - offset; n = len - offset;
@ -199,18 +198,14 @@ static int coroutine_fn stream_run(Job *job, Error **errp)
} }
} }
if (!base) { if (enable_cor) {
bdrv_disable_copy_on_read(bs); bdrv_disable_copy_on_read(bs);
} }
/* Do not remove the backing file if an error was there but ignored. */
ret = error;
qemu_vfree(buf); qemu_vfree(buf);
out: /* Do not remove the backing file if an error was there but ignored. */
/* Modify backing chain and close BDSes in main loop */ return error;
return ret;
} }
static const BlockJobDriver stream_job_driver = { static const BlockJobDriver stream_job_driver = {
@ -235,8 +230,10 @@ void stream_start(const char *job_id, BlockDriverState *bs,
StreamBlockJob *s; StreamBlockJob *s;
BlockDriverState *iter; BlockDriverState *iter;
bool bs_read_only; bool bs_read_only;
int basic_flags = BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED;
BlockDriverState *bottom = bdrv_find_overlay(bs, base);
if (bdrv_freeze_backing_chain(bs, base, errp) < 0) { if (bdrv_freeze_backing_chain(bs, bottom, errp) < 0) {
return; return;
} }
@ -253,10 +250,8 @@ void stream_start(const char *job_id, BlockDriverState *bs,
* already have our own plans. Also don't allow resize as the image size is * already have our own plans. Also don't allow resize as the image size is
* queried only at the job start and then cached. */ * queried only at the job start and then cached. */
s = block_job_create(job_id, &stream_job_driver, NULL, bs, s = block_job_create(job_id, &stream_job_driver, NULL, bs,
BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED | basic_flags | BLK_PERM_GRAPH_MOD,
BLK_PERM_GRAPH_MOD, basic_flags | BLK_PERM_WRITE,
BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED |
BLK_PERM_WRITE,
speed, creation_flags, NULL, NULL, errp); speed, creation_flags, NULL, NULL, errp);
if (!s) { if (!s) {
goto fail; goto fail;
@ -264,15 +259,18 @@ void stream_start(const char *job_id, BlockDriverState *bs,
/* Block all intermediate nodes between bs and base, because they will /* Block all intermediate nodes between bs and base, because they will
* disappear from the chain after this operation. The streaming job reads * disappear from the chain after this operation. The streaming job reads
* every block only once, assuming that it doesn't change, so block writes * every block only once, assuming that it doesn't change, so forbid writes
* and resizes. */ * and resizes. Reassign the base node pointer because the backing BS of the
* bottom node might change after the call to bdrv_reopen_set_read_only()
* due to parallel block jobs running.
*/
base = backing_bs(bottom);
for (iter = backing_bs(bs); iter && iter != base; iter = backing_bs(iter)) { for (iter = backing_bs(bs); iter && iter != base; iter = backing_bs(iter)) {
block_job_add_bdrv(&s->common, "intermediate node", iter, 0, block_job_add_bdrv(&s->common, "intermediate node", iter, 0,
BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED, basic_flags, &error_abort);
&error_abort);
} }
s->base = base; s->bottom = bottom;
s->backing_file_str = g_strdup(backing_file_str); s->backing_file_str = g_strdup(backing_file_str);
s->bs_read_only = bs_read_only; s->bs_read_only = bs_read_only;
s->chain_frozen = true; s->chain_frozen = true;

View File

@ -449,7 +449,8 @@ int bdrv_block_status_above(BlockDriverState *bs, BlockDriverState *base,
int bdrv_is_allocated(BlockDriverState *bs, int64_t offset, int64_t bytes, int bdrv_is_allocated(BlockDriverState *bs, int64_t offset, int64_t bytes,
int64_t *pnum); int64_t *pnum);
int bdrv_is_allocated_above(BlockDriverState *top, BlockDriverState *base, int bdrv_is_allocated_above(BlockDriverState *top, BlockDriverState *base,
int64_t offset, int64_t bytes, int64_t *pnum); bool include_base, int64_t offset, int64_t bytes,
int64_t *pnum);
bool bdrv_is_read_only(BlockDriverState *bs); bool bdrv_is_read_only(BlockDriverState *bs);
int bdrv_can_set_read_only(BlockDriverState *bs, bool read_only, int bdrv_can_set_read_only(BlockDriverState *bs, bool read_only,

View File

@ -3518,7 +3518,7 @@ static int img_rebase(int argc, char **argv)
* to take action * to take action
*/ */
ret = bdrv_is_allocated_above(backing_bs(bs), prefix_chain_bs, ret = bdrv_is_allocated_above(backing_bs(bs), prefix_chain_bs,
offset, n, &n); false, offset, n, &n);
if (ret < 0) { if (ret < 0) {
error_report("error while reading image metadata: %s", error_report("error while reading image metadata: %s",
strerror(-ret)); strerror(-ret));

View File

@ -866,9 +866,9 @@ class TestBlockdevReopen(iotests.QMPTestCase):
auto_finalize = False) auto_finalize = False)
self.assert_qmp(result, 'return', {}) self.assert_qmp(result, 'return', {})
# We can't remove hd2 while the stream job is ongoing # We can remove hd2 while the stream job is ongoing
opts['backing']['backing'] = None opts['backing']['backing'] = None
self.reopen(opts, {}, "Cannot change 'backing' link from 'hd1' to 'hd2'") self.reopen(opts, {})
# We can't remove hd1 while the stream job is ongoing # We can't remove hd1 while the stream job is ongoing
opts['backing'] = None opts['backing'] = None