diff --git a/block.c b/block.c index 1b8147c1b3..70a46fdd84 100644 --- a/block.c +++ b/block.c @@ -725,7 +725,7 @@ static int find_image_format(BlockBackend *file, const char *filename, * Set the current 'total_sectors' value * Return 0 on success, -errno on error. */ -static int refresh_total_sectors(BlockDriverState *bs, int64_t hint) +int refresh_total_sectors(BlockDriverState *bs, int64_t hint) { BlockDriver *drv = bs->drv; @@ -2226,16 +2226,6 @@ static void bdrv_parent_cb_change_media(BlockDriverState *bs, bool load) } } -static void bdrv_parent_cb_resize(BlockDriverState *bs) -{ - BdrvChild *c; - QLIST_FOREACH(c, &bs->parents, next_parent) { - if (c->role->resize) { - c->role->resize(c); - } - } -} - /* * Sets the backing file link of a BDS. A new reference is created; callers * which don't need their own reference any more must call bdrv_unref(). @@ -3785,58 +3775,6 @@ exit: return ret; } -/** - * Truncate file to 'offset' bytes (needed only for file protocols) - */ -int bdrv_truncate(BdrvChild *child, int64_t offset, PreallocMode prealloc, - Error **errp) -{ - BlockDriverState *bs = child->bs; - BlockDriver *drv = bs->drv; - int ret; - - assert(child->perm & BLK_PERM_RESIZE); - - /* if bs->drv == NULL, bs is closed, so there's nothing to do here */ - if (!drv) { - error_setg(errp, "No medium inserted"); - return -ENOMEDIUM; - } - if (offset < 0) { - error_setg(errp, "Image size cannot be negative"); - return -EINVAL; - } - - if (!drv->bdrv_truncate) { - if (bs->file && drv->is_filter) { - return bdrv_truncate(bs->file, offset, prealloc, errp); - } - error_setg(errp, "Image format driver does not support resize"); - return -ENOTSUP; - } - if (bs->read_only) { - error_setg(errp, "Image is read-only"); - return -EACCES; - } - - assert(!(bs->open_flags & BDRV_O_INACTIVE)); - - ret = drv->bdrv_truncate(bs, offset, prealloc, errp); - if (ret < 0) { - return ret; - } - ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS); - if (ret < 0) { - error_setg_errno(errp, -ret, "Could not refresh total sector count"); - } else { - offset = bs->total_sectors * BDRV_SECTOR_SIZE; - } - bdrv_dirty_bitmap_truncate(bs, offset); - bdrv_parent_cb_resize(bs); - atomic_inc(&bs->write_gen); - return ret; -} - /** * Length of a allocated file in bytes. Sparse files are counted by actual * allocated space. Return < 0 if error or unknown. diff --git a/block/copy-on-read.c b/block/copy-on-read.c index 6a97208888..1dcdaeed69 100644 --- a/block/copy-on-read.c +++ b/block/copy-on-read.c @@ -80,10 +80,10 @@ static int64_t cor_getlength(BlockDriverState *bs) } -static int cor_truncate(BlockDriverState *bs, int64_t offset, - PreallocMode prealloc, Error **errp) +static int coroutine_fn cor_co_truncate(BlockDriverState *bs, int64_t offset, + PreallocMode prealloc, Error **errp) { - return bdrv_truncate(bs->file, offset, prealloc, errp); + return bdrv_co_truncate(bs->file, offset, prealloc, errp); } @@ -147,7 +147,7 @@ BlockDriver bdrv_copy_on_read = { .bdrv_child_perm = cor_child_perm, .bdrv_getlength = cor_getlength, - .bdrv_truncate = cor_truncate, + .bdrv_co_truncate = cor_co_truncate, .bdrv_co_preadv = cor_co_preadv, .bdrv_co_pwritev = cor_co_pwritev, diff --git a/block/crypto.c b/block/crypto.c index 82091c5f70..994172a3de 100644 --- a/block/crypto.c +++ b/block/crypto.c @@ -148,108 +148,36 @@ static QemuOptsList block_crypto_create_opts_luks = { QCryptoBlockOpenOptions * -block_crypto_open_opts_init(QCryptoBlockFormat format, - QDict *opts, - Error **errp) +block_crypto_open_opts_init(QDict *opts, Error **errp) { Visitor *v; - QCryptoBlockOpenOptions *ret = NULL; - Error *local_err = NULL; + QCryptoBlockOpenOptions *ret; - ret = g_new0(QCryptoBlockOpenOptions, 1); - ret->format = format; - - v = qobject_input_visitor_new_flat_confused(opts, &local_err); - if (local_err) { - goto out; + v = qobject_input_visitor_new_flat_confused(opts, errp); + if (!v) { + return NULL; } - visit_start_struct(v, NULL, NULL, 0, &local_err); - if (local_err) { - goto out; - } + visit_type_QCryptoBlockOpenOptions(v, NULL, &ret, errp); - switch (format) { - case Q_CRYPTO_BLOCK_FORMAT_LUKS: - visit_type_QCryptoBlockOptionsLUKS_members( - v, &ret->u.luks, &local_err); - break; - - case Q_CRYPTO_BLOCK_FORMAT_QCOW: - visit_type_QCryptoBlockOptionsQCow_members( - v, &ret->u.qcow, &local_err); - break; - - default: - error_setg(&local_err, "Unsupported block format %d", format); - break; - } - if (!local_err) { - visit_check_struct(v, &local_err); - } - - visit_end_struct(v, NULL); - - out: - if (local_err) { - error_propagate(errp, local_err); - qapi_free_QCryptoBlockOpenOptions(ret); - ret = NULL; - } visit_free(v); return ret; } QCryptoBlockCreateOptions * -block_crypto_create_opts_init(QCryptoBlockFormat format, - QDict *opts, - Error **errp) +block_crypto_create_opts_init(QDict *opts, Error **errp) { Visitor *v; - QCryptoBlockCreateOptions *ret = NULL; - Error *local_err = NULL; + QCryptoBlockCreateOptions *ret; - ret = g_new0(QCryptoBlockCreateOptions, 1); - ret->format = format; - - v = qobject_input_visitor_new_flat_confused(opts, &local_err); - if (local_err) { - goto out; + v = qobject_input_visitor_new_flat_confused(opts, errp); + if (!v) { + return NULL; } - visit_start_struct(v, NULL, NULL, 0, &local_err); - if (local_err) { - goto out; - } + visit_type_QCryptoBlockCreateOptions(v, NULL, &ret, errp); - switch (format) { - case Q_CRYPTO_BLOCK_FORMAT_LUKS: - visit_type_QCryptoBlockCreateOptionsLUKS_members( - v, &ret->u.luks, &local_err); - break; - - case Q_CRYPTO_BLOCK_FORMAT_QCOW: - visit_type_QCryptoBlockOptionsQCow_members( - v, &ret->u.qcow, &local_err); - break; - - default: - error_setg(&local_err, "Unsupported block format %d", format); - break; - } - if (!local_err) { - visit_check_struct(v, &local_err); - } - - visit_end_struct(v, NULL); - - out: - if (local_err) { - error_propagate(errp, local_err); - qapi_free_QCryptoBlockCreateOptions(ret); - ret = NULL; - } visit_free(v); return ret; } @@ -287,8 +215,9 @@ static int block_crypto_open_generic(QCryptoBlockFormat format, } cryptoopts = qemu_opts_to_qdict(opts, NULL); + qdict_put_str(cryptoopts, "format", QCryptoBlockFormat_str(format)); - open_opts = block_crypto_open_opts_init(format, cryptoopts, errp); + open_opts = block_crypto_open_opts_init(cryptoopts, errp); if (!open_opts) { goto cleanup; } @@ -357,8 +286,9 @@ static int block_crypto_co_create_generic(BlockDriverState *bs, return ret; } -static int block_crypto_truncate(BlockDriverState *bs, int64_t offset, - PreallocMode prealloc, Error **errp) +static int coroutine_fn +block_crypto_co_truncate(BlockDriverState *bs, int64_t offset, + PreallocMode prealloc, Error **errp) { BlockCrypto *crypto = bs->opaque; uint64_t payload_offset = @@ -371,7 +301,7 @@ static int block_crypto_truncate(BlockDriverState *bs, int64_t offset, offset += payload_offset; - return bdrv_truncate(bs->file, offset, prealloc, errp); + return bdrv_co_truncate(bs->file, offset, prealloc, errp); } static void block_crypto_close(BlockDriverState *bs) @@ -611,8 +541,8 @@ static int coroutine_fn block_crypto_co_create_opts_luks(const char *filename, &block_crypto_create_opts_luks, true); - create_opts = block_crypto_create_opts_init(Q_CRYPTO_BLOCK_FORMAT_LUKS, - cryptoopts, errp); + qdict_put_str(cryptoopts, "format", "luks"); + create_opts = block_crypto_create_opts_init(cryptoopts, errp); if (!create_opts) { ret = -EINVAL; goto fail; @@ -700,7 +630,7 @@ BlockDriver bdrv_crypto_luks = { .bdrv_child_perm = bdrv_format_default_perms, .bdrv_co_create = block_crypto_co_create_luks, .bdrv_co_create_opts = block_crypto_co_create_opts_luks, - .bdrv_truncate = block_crypto_truncate, + .bdrv_co_truncate = block_crypto_co_truncate, .create_opts = &block_crypto_create_opts_luks, .bdrv_reopen_prepare = block_crypto_reopen_prepare, diff --git a/block/crypto.h b/block/crypto.h index 0f985ea4e2..dd7d47903c 100644 --- a/block/crypto.h +++ b/block/crypto.h @@ -89,13 +89,9 @@ } QCryptoBlockCreateOptions * -block_crypto_create_opts_init(QCryptoBlockFormat format, - QDict *opts, - Error **errp); +block_crypto_create_opts_init(QDict *opts, Error **errp); QCryptoBlockOpenOptions * -block_crypto_open_opts_init(QCryptoBlockFormat format, - QDict *opts, - Error **errp); +block_crypto_open_opts_init(QDict *opts, Error **errp); #endif /* BLOCK_CRYPTO_H__ */ diff --git a/block/file-posix.c b/block/file-posix.c index 43b963b13e..829ee538d8 100644 --- a/block/file-posix.c +++ b/block/file-posix.c @@ -188,8 +188,16 @@ typedef struct RawPosixAIOData { #define aio_ioctl_cmd aio_nbytes /* for QEMU_AIO_IOCTL */ off_t aio_offset; int aio_type; - int aio_fd2; - off_t aio_offset2; + union { + struct { + int aio_fd2; + off_t aio_offset2; + }; + struct { + PreallocMode prealloc; + Error **errp; + }; + }; } RawPosixAIOData; #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__) @@ -1480,20 +1488,21 @@ static ssize_t handle_aiocb_copy_range(RawPosixAIOData *aiocb) ssize_t ret = copy_file_range(aiocb->aio_fildes, &in_off, aiocb->aio_fd2, &out_off, bytes, 0); - if (ret == -EINTR) { - continue; + if (ret == 0) { + /* No progress (e.g. when beyond EOF), let the caller fall back to + * buffer I/O. */ + return -ENOSPC; } if (ret < 0) { - if (errno == ENOSYS) { + switch (errno) { + case ENOSYS: return -ENOTSUP; - } else { + case EINTR: + continue; + default: return -errno; } } - if (!ret) { - /* No progress (e.g. when beyond EOF), fall back to buffer I/O. */ - return -ENOTSUP; - } bytes -= ret; } return 0; @@ -1539,6 +1548,122 @@ static ssize_t handle_aiocb_discard(RawPosixAIOData *aiocb) return ret; } +static int handle_aiocb_truncate(RawPosixAIOData *aiocb) +{ + int result = 0; + int64_t current_length = 0; + char *buf = NULL; + struct stat st; + int fd = aiocb->aio_fildes; + int64_t offset = aiocb->aio_offset; + Error **errp = aiocb->errp; + + if (fstat(fd, &st) < 0) { + result = -errno; + error_setg_errno(errp, -result, "Could not stat file"); + return result; + } + + current_length = st.st_size; + if (current_length > offset && aiocb->prealloc != PREALLOC_MODE_OFF) { + error_setg(errp, "Cannot use preallocation for shrinking files"); + return -ENOTSUP; + } + + switch (aiocb->prealloc) { +#ifdef CONFIG_POSIX_FALLOCATE + case PREALLOC_MODE_FALLOC: + /* + * Truncating before posix_fallocate() makes it about twice slower on + * file systems that do not support fallocate(), trying to check if a + * block is allocated before allocating it, so don't do that here. + */ + if (offset != current_length) { + result = -posix_fallocate(fd, current_length, + offset - current_length); + if (result != 0) { + /* posix_fallocate() doesn't set errno. */ + error_setg_errno(errp, -result, + "Could not preallocate new data"); + } + } else { + result = 0; + } + goto out; +#endif + case PREALLOC_MODE_FULL: + { + int64_t num = 0, left = offset - current_length; + off_t seek_result; + + /* + * Knowing the final size from the beginning could allow the file + * system driver to do less allocations and possibly avoid + * fragmentation of the file. + */ + if (ftruncate(fd, offset) != 0) { + result = -errno; + error_setg_errno(errp, -result, "Could not resize file"); + goto out; + } + + buf = g_malloc0(65536); + + seek_result = lseek(fd, current_length, SEEK_SET); + if (seek_result < 0) { + result = -errno; + error_setg_errno(errp, -result, + "Failed to seek to the old end of file"); + goto out; + } + + while (left > 0) { + num = MIN(left, 65536); + result = write(fd, buf, num); + if (result < 0) { + result = -errno; + error_setg_errno(errp, -result, + "Could not write zeros for preallocation"); + goto out; + } + left -= result; + } + if (result >= 0) { + result = fsync(fd); + if (result < 0) { + result = -errno; + error_setg_errno(errp, -result, + "Could not flush file to disk"); + goto out; + } + } + goto out; + } + case PREALLOC_MODE_OFF: + if (ftruncate(fd, offset) != 0) { + result = -errno; + error_setg_errno(errp, -result, "Could not resize file"); + } + return result; + default: + result = -ENOTSUP; + error_setg(errp, "Unsupported preallocation mode: %s", + PreallocMode_str(aiocb->prealloc)); + return result; + } + +out: + if (result < 0) { + if (ftruncate(fd, current_length) < 0) { + error_report("Failed to restore old file length: %s", + strerror(errno)); + } + } + + g_free(buf); + return result; +} + static int aio_worker(void *arg) { RawPosixAIOData *aiocb = arg; @@ -1582,6 +1707,9 @@ static int aio_worker(void *arg) case QEMU_AIO_COPY_RANGE: ret = handle_aiocb_copy_range(aiocb); break; + case QEMU_AIO_TRUNCATE: + ret = handle_aiocb_truncate(aiocb); + break; default: fprintf(stderr, "invalid aio request (0x%x)\n", aiocb->aio_type); ret = -EINVAL; @@ -1627,31 +1755,6 @@ static inline int paio_submit_co(BlockDriverState *bs, int fd, return paio_submit_co_full(bs, fd, offset, -1, 0, qiov, bytes, type); } -static BlockAIOCB *paio_submit(BlockDriverState *bs, int fd, - int64_t offset, QEMUIOVector *qiov, int bytes, - BlockCompletionFunc *cb, void *opaque, int type) -{ - RawPosixAIOData *acb = g_new(RawPosixAIOData, 1); - ThreadPool *pool; - - acb->bs = bs; - acb->aio_type = type; - acb->aio_fildes = fd; - - acb->aio_nbytes = bytes; - acb->aio_offset = offset; - - if (qiov) { - acb->aio_iov = qiov->iov; - acb->aio_niov = qiov->niov; - assert(qiov->size == acb->aio_nbytes); - } - - trace_paio_submit(acb, opaque, offset, bytes, type); - pool = aio_get_thread_pool(bdrv_get_aio_context(bs)); - return thread_pool_submit_aio(pool, aio_worker, acb, cb, opaque); -} - static int coroutine_fn raw_co_prw(BlockDriverState *bs, uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int type) { @@ -1718,15 +1821,17 @@ static void raw_aio_unplug(BlockDriverState *bs) #endif } -static BlockAIOCB *raw_aio_flush(BlockDriverState *bs, - BlockCompletionFunc *cb, void *opaque) +static int raw_co_flush_to_disk(BlockDriverState *bs) { BDRVRawState *s = bs->opaque; + int ret; - if (fd_open(bs) < 0) - return NULL; + ret = fd_open(bs); + if (ret < 0) { + return ret; + } - return paio_submit(bs, s->fd, 0, NULL, 0, cb, opaque, QEMU_AIO_FLUSH); + return paio_submit_co(bs, s->fd, 0, NULL, 0, QEMU_AIO_FLUSH); } static void raw_aio_attach_aio_context(BlockDriverState *bs, @@ -1765,121 +1870,29 @@ static void raw_close(BlockDriverState *bs) * * Returns: 0 on success, -errno on failure. */ -static int raw_regular_truncate(int fd, int64_t offset, PreallocMode prealloc, - Error **errp) +static int coroutine_fn +raw_regular_truncate(BlockDriverState *bs, int fd, int64_t offset, + PreallocMode prealloc, Error **errp) { - int result = 0; - int64_t current_length = 0; - char *buf = NULL; - struct stat st; + RawPosixAIOData *acb = g_new(RawPosixAIOData, 1); + ThreadPool *pool; - if (fstat(fd, &st) < 0) { - result = -errno; - error_setg_errno(errp, -result, "Could not stat file"); - return result; - } + *acb = (RawPosixAIOData) { + .bs = bs, + .aio_fildes = fd, + .aio_type = QEMU_AIO_TRUNCATE, + .aio_offset = offset, + .prealloc = prealloc, + .errp = errp, + }; - current_length = st.st_size; - if (current_length > offset && prealloc != PREALLOC_MODE_OFF) { - error_setg(errp, "Cannot use preallocation for shrinking files"); - return -ENOTSUP; - } - - switch (prealloc) { -#ifdef CONFIG_POSIX_FALLOCATE - case PREALLOC_MODE_FALLOC: - /* - * Truncating before posix_fallocate() makes it about twice slower on - * file systems that do not support fallocate(), trying to check if a - * block is allocated before allocating it, so don't do that here. - */ - if (offset != current_length) { - result = -posix_fallocate(fd, current_length, offset - current_length); - if (result != 0) { - /* posix_fallocate() doesn't set errno. */ - error_setg_errno(errp, -result, - "Could not preallocate new data"); - } - } else { - result = 0; - } - goto out; -#endif - case PREALLOC_MODE_FULL: - { - int64_t num = 0, left = offset - current_length; - off_t seek_result; - - /* - * Knowing the final size from the beginning could allow the file - * system driver to do less allocations and possibly avoid - * fragmentation of the file. - */ - if (ftruncate(fd, offset) != 0) { - result = -errno; - error_setg_errno(errp, -result, "Could not resize file"); - goto out; - } - - buf = g_malloc0(65536); - - seek_result = lseek(fd, current_length, SEEK_SET); - if (seek_result < 0) { - result = -errno; - error_setg_errno(errp, -result, - "Failed to seek to the old end of file"); - goto out; - } - - while (left > 0) { - num = MIN(left, 65536); - result = write(fd, buf, num); - if (result < 0) { - result = -errno; - error_setg_errno(errp, -result, - "Could not write zeros for preallocation"); - goto out; - } - left -= result; - } - if (result >= 0) { - result = fsync(fd); - if (result < 0) { - result = -errno; - error_setg_errno(errp, -result, - "Could not flush file to disk"); - goto out; - } - } - goto out; - } - case PREALLOC_MODE_OFF: - if (ftruncate(fd, offset) != 0) { - result = -errno; - error_setg_errno(errp, -result, "Could not resize file"); - } - return result; - default: - result = -ENOTSUP; - error_setg(errp, "Unsupported preallocation mode: %s", - PreallocMode_str(prealloc)); - return result; - } - -out: - if (result < 0) { - if (ftruncate(fd, current_length) < 0) { - error_report("Failed to restore old file length: %s", - strerror(errno)); - } - } - - g_free(buf); - return result; + /* @bs can be NULL, bdrv_get_aio_context() returns the main context then */ + pool = aio_get_thread_pool(bdrv_get_aio_context(bs)); + return thread_pool_submit_co(pool, aio_worker, acb); } -static int raw_truncate(BlockDriverState *bs, int64_t offset, - PreallocMode prealloc, Error **errp) +static int coroutine_fn raw_co_truncate(BlockDriverState *bs, int64_t offset, + PreallocMode prealloc, Error **errp) { BDRVRawState *s = bs->opaque; struct stat st; @@ -1892,7 +1905,7 @@ static int raw_truncate(BlockDriverState *bs, int64_t offset, } if (S_ISREG(st.st_mode)) { - return raw_regular_truncate(s->fd, offset, prealloc, errp); + return raw_regular_truncate(bs, s->fd, offset, prealloc, errp); } if (prealloc != PREALLOC_MODE_OFF) { @@ -2094,7 +2107,8 @@ static int64_t raw_get_allocated_file_size(BlockDriverState *bs) return (int64_t)st.st_blocks * 512; } -static int raw_co_create(BlockdevCreateOptions *options, Error **errp) +static int coroutine_fn +raw_co_create(BlockdevCreateOptions *options, Error **errp) { BlockdevCreateOptionsFile *file_opts; int fd; @@ -2146,7 +2160,7 @@ static int raw_co_create(BlockdevCreateOptions *options, Error **errp) } /* Clear the file by truncating it to 0 */ - result = raw_regular_truncate(fd, 0, PREALLOC_MODE_OFF, errp); + result = raw_regular_truncate(NULL, fd, 0, PREALLOC_MODE_OFF, errp); if (result < 0) { goto out_close; } @@ -2168,8 +2182,8 @@ static int raw_co_create(BlockdevCreateOptions *options, Error **errp) /* Resize and potentially preallocate the file to the desired * final size */ - result = raw_regular_truncate(fd, file_opts->size, file_opts->preallocation, - errp); + result = raw_regular_truncate(NULL, fd, file_opts->size, + file_opts->preallocation, errp); if (result < 0) { goto out_close; } @@ -2490,14 +2504,12 @@ static void coroutine_fn raw_co_invalidate_cache(BlockDriverState *bs, #endif /* !__linux__ */ } -static coroutine_fn BlockAIOCB *raw_aio_pdiscard(BlockDriverState *bs, - int64_t offset, int bytes, - BlockCompletionFunc *cb, void *opaque) +static coroutine_fn int +raw_co_pdiscard(BlockDriverState *bs, int64_t offset, int bytes) { BDRVRawState *s = bs->opaque; - return paio_submit(bs, s->fd, offset, NULL, bytes, - cb, opaque, QEMU_AIO_DISCARD); + return paio_submit_co(bs, s->fd, offset, NULL, bytes, QEMU_AIO_DISCARD); } static int coroutine_fn raw_co_pwrite_zeroes( @@ -2616,8 +2628,8 @@ BlockDriver bdrv_file = { .bdrv_co_preadv = raw_co_preadv, .bdrv_co_pwritev = raw_co_pwritev, - .bdrv_aio_flush = raw_aio_flush, - .bdrv_aio_pdiscard = raw_aio_pdiscard, + .bdrv_co_flush_to_disk = raw_co_flush_to_disk, + .bdrv_co_pdiscard = raw_co_pdiscard, .bdrv_co_copy_range_from = raw_co_copy_range_from, .bdrv_co_copy_range_to = raw_co_copy_range_to, .bdrv_refresh_limits = raw_refresh_limits, @@ -2625,7 +2637,7 @@ BlockDriver bdrv_file = { .bdrv_io_unplug = raw_aio_unplug, .bdrv_attach_aio_context = raw_aio_attach_aio_context, - .bdrv_truncate = raw_truncate, + .bdrv_co_truncate = raw_co_truncate, .bdrv_getlength = raw_getlength, .bdrv_get_info = raw_get_info, .bdrv_get_allocated_file_size @@ -2983,17 +2995,18 @@ static int fd_open(BlockDriverState *bs) return -EIO; } -static coroutine_fn BlockAIOCB *hdev_aio_pdiscard(BlockDriverState *bs, - int64_t offset, int bytes, - BlockCompletionFunc *cb, void *opaque) +static coroutine_fn int +hdev_co_pdiscard(BlockDriverState *bs, int64_t offset, int bytes) { BDRVRawState *s = bs->opaque; + int ret; - if (fd_open(bs) < 0) { - return NULL; + ret = fd_open(bs); + if (ret < 0) { + return ret; } - return paio_submit(bs, s->fd, offset, NULL, bytes, - cb, opaque, QEMU_AIO_DISCARD|QEMU_AIO_BLKDEV); + return paio_submit_co(bs, s->fd, offset, NULL, bytes, + QEMU_AIO_DISCARD | QEMU_AIO_BLKDEV); } static coroutine_fn int hdev_co_pwrite_zeroes(BlockDriverState *bs, @@ -3097,15 +3110,15 @@ static BlockDriver bdrv_host_device = { .bdrv_co_preadv = raw_co_preadv, .bdrv_co_pwritev = raw_co_pwritev, - .bdrv_aio_flush = raw_aio_flush, - .bdrv_aio_pdiscard = hdev_aio_pdiscard, + .bdrv_co_flush_to_disk = raw_co_flush_to_disk, + .bdrv_co_pdiscard = hdev_co_pdiscard, .bdrv_co_copy_range_from = raw_co_copy_range_from, .bdrv_co_copy_range_to = raw_co_copy_range_to, .bdrv_refresh_limits = raw_refresh_limits, .bdrv_io_plug = raw_aio_plug, .bdrv_io_unplug = raw_aio_unplug, - .bdrv_truncate = raw_truncate, + .bdrv_co_truncate = raw_co_truncate, .bdrv_getlength = raw_getlength, .bdrv_get_info = raw_get_info, .bdrv_get_allocated_file_size @@ -3222,12 +3235,12 @@ static BlockDriver bdrv_host_cdrom = { .bdrv_co_preadv = raw_co_preadv, .bdrv_co_pwritev = raw_co_pwritev, - .bdrv_aio_flush = raw_aio_flush, + .bdrv_co_flush_to_disk = raw_co_flush_to_disk, .bdrv_refresh_limits = raw_refresh_limits, .bdrv_io_plug = raw_aio_plug, .bdrv_io_unplug = raw_aio_unplug, - .bdrv_truncate = raw_truncate, + .bdrv_co_truncate = raw_co_truncate, .bdrv_getlength = raw_getlength, .has_variable_length = true, .bdrv_get_allocated_file_size @@ -3352,12 +3365,12 @@ static BlockDriver bdrv_host_cdrom = { .bdrv_co_preadv = raw_co_preadv, .bdrv_co_pwritev = raw_co_pwritev, - .bdrv_aio_flush = raw_aio_flush, + .bdrv_co_flush_to_disk = raw_co_flush_to_disk, .bdrv_refresh_limits = raw_refresh_limits, .bdrv_io_plug = raw_aio_plug, .bdrv_io_unplug = raw_aio_unplug, - .bdrv_truncate = raw_truncate, + .bdrv_co_truncate = raw_co_truncate, .bdrv_getlength = raw_getlength, .has_variable_length = true, .bdrv_get_allocated_file_size diff --git a/block/file-win32.c b/block/file-win32.c index 3c67db4336..0411fe80fd 100644 --- a/block/file-win32.c +++ b/block/file-win32.c @@ -467,8 +467,8 @@ static void raw_close(BlockDriverState *bs) } } -static int raw_truncate(BlockDriverState *bs, int64_t offset, - PreallocMode prealloc, Error **errp) +static int coroutine_fn raw_co_truncate(BlockDriverState *bs, int64_t offset, + PreallocMode prealloc, Error **errp) { BDRVRawState *s = bs->opaque; LONG low, high; @@ -640,7 +640,7 @@ BlockDriver bdrv_file = { .bdrv_aio_pwritev = raw_aio_pwritev, .bdrv_aio_flush = raw_aio_flush, - .bdrv_truncate = raw_truncate, + .bdrv_co_truncate = raw_co_truncate, .bdrv_getlength = raw_getlength, .bdrv_get_allocated_file_size = raw_get_allocated_file_size, diff --git a/block/gluster.c b/block/gluster.c index b5fe7f3e87..a4e1c8ecd8 100644 --- a/block/gluster.c +++ b/block/gluster.c @@ -1177,8 +1177,10 @@ static coroutine_fn int qemu_gluster_co_rw(BlockDriverState *bs, return acb.ret; } -static int qemu_gluster_truncate(BlockDriverState *bs, int64_t offset, - PreallocMode prealloc, Error **errp) +static coroutine_fn int qemu_gluster_co_truncate(BlockDriverState *bs, + int64_t offset, + PreallocMode prealloc, + Error **errp) { BDRVGlusterState *s = bs->opaque; return qemu_gluster_do_truncate(s->fd, offset, prealloc, errp); @@ -1499,7 +1501,7 @@ static BlockDriver bdrv_gluster = { .bdrv_co_create_opts = qemu_gluster_co_create_opts, .bdrv_getlength = qemu_gluster_getlength, .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size, - .bdrv_truncate = qemu_gluster_truncate, + .bdrv_co_truncate = qemu_gluster_co_truncate, .bdrv_co_readv = qemu_gluster_co_readv, .bdrv_co_writev = qemu_gluster_co_writev, .bdrv_co_flush_to_disk = qemu_gluster_co_flush_to_disk, @@ -1528,7 +1530,7 @@ static BlockDriver bdrv_gluster_tcp = { .bdrv_co_create_opts = qemu_gluster_co_create_opts, .bdrv_getlength = qemu_gluster_getlength, .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size, - .bdrv_truncate = qemu_gluster_truncate, + .bdrv_co_truncate = qemu_gluster_co_truncate, .bdrv_co_readv = qemu_gluster_co_readv, .bdrv_co_writev = qemu_gluster_co_writev, .bdrv_co_flush_to_disk = qemu_gluster_co_flush_to_disk, @@ -1557,7 +1559,7 @@ static BlockDriver bdrv_gluster_unix = { .bdrv_co_create_opts = qemu_gluster_co_create_opts, .bdrv_getlength = qemu_gluster_getlength, .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size, - .bdrv_truncate = qemu_gluster_truncate, + .bdrv_co_truncate = qemu_gluster_co_truncate, .bdrv_co_readv = qemu_gluster_co_readv, .bdrv_co_writev = qemu_gluster_co_writev, .bdrv_co_flush_to_disk = qemu_gluster_co_flush_to_disk, @@ -1592,7 +1594,7 @@ static BlockDriver bdrv_gluster_rdma = { .bdrv_co_create_opts = qemu_gluster_co_create_opts, .bdrv_getlength = qemu_gluster_getlength, .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size, - .bdrv_truncate = qemu_gluster_truncate, + .bdrv_co_truncate = qemu_gluster_co_truncate, .bdrv_co_readv = qemu_gluster_co_readv, .bdrv_co_writev = qemu_gluster_co_writev, .bdrv_co_flush_to_disk = qemu_gluster_co_flush_to_disk, diff --git a/block/io.c b/block/io.c index ef4fedd364..7035b78a20 100644 --- a/block/io.c +++ b/block/io.c @@ -1429,24 +1429,6 @@ int coroutine_fn bdrv_co_preadv(BdrvChild *child, return ret; } -static int coroutine_fn bdrv_co_do_readv(BdrvChild *child, - int64_t sector_num, int nb_sectors, QEMUIOVector *qiov, - BdrvRequestFlags flags) -{ - if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) { - return -EINVAL; - } - - return bdrv_co_preadv(child, sector_num << BDRV_SECTOR_BITS, - nb_sectors << BDRV_SECTOR_BITS, qiov, flags); -} - -int coroutine_fn bdrv_co_readv(BdrvChild *child, int64_t sector_num, - int nb_sectors, QEMUIOVector *qiov) -{ - return bdrv_co_do_readv(child, sector_num, nb_sectors, qiov, 0); -} - static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int bytes, BdrvRequestFlags flags) { @@ -1889,24 +1871,6 @@ out: return ret; } -static int coroutine_fn bdrv_co_do_writev(BdrvChild *child, - int64_t sector_num, int nb_sectors, QEMUIOVector *qiov, - BdrvRequestFlags flags) -{ - if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) { - return -EINVAL; - } - - return bdrv_co_pwritev(child, sector_num << BDRV_SECTOR_BITS, - nb_sectors << BDRV_SECTOR_BITS, qiov, flags); -} - -int coroutine_fn bdrv_co_writev(BdrvChild *child, int64_t sector_num, - int nb_sectors, QEMUIOVector *qiov) -{ - return bdrv_co_do_writev(child, sector_num, nb_sectors, qiov, 0); -} - int coroutine_fn bdrv_co_pwrite_zeroes(BdrvChild *child, int64_t offset, int bytes, BdrvRequestFlags flags) { @@ -2932,6 +2896,9 @@ static int coroutine_fn bdrv_co_copy_range_internal(BdrvChild *src, BdrvRequestFlags flags, bool recurse_src) { + BdrvTrackedRequest src_req, dst_req; + BlockDriverState *src_bs = src->bs; + BlockDriverState *dst_bs = dst->bs; int ret; if (!src || !dst || !src->bs || !dst->bs) { @@ -2955,17 +2922,31 @@ static int coroutine_fn bdrv_co_copy_range_internal(BdrvChild *src, || src->bs->encrypted || dst->bs->encrypted) { return -ENOTSUP; } + bdrv_inc_in_flight(src_bs); + bdrv_inc_in_flight(dst_bs); + tracked_request_begin(&src_req, src_bs, src_offset, + bytes, BDRV_TRACKED_READ); + tracked_request_begin(&dst_req, dst_bs, dst_offset, + bytes, BDRV_TRACKED_WRITE); + + wait_serialising_requests(&src_req); + wait_serialising_requests(&dst_req); if (recurse_src) { - return src->bs->drv->bdrv_co_copy_range_from(src->bs, - src, src_offset, - dst, dst_offset, - bytes, flags); + ret = src->bs->drv->bdrv_co_copy_range_from(src->bs, + src, src_offset, + dst, dst_offset, + bytes, flags); } else { - return dst->bs->drv->bdrv_co_copy_range_to(dst->bs, - src, src_offset, - dst, dst_offset, - bytes, flags); + ret = dst->bs->drv->bdrv_co_copy_range_to(dst->bs, + src, src_offset, + dst, dst_offset, + bytes, flags); } + tracked_request_end(&src_req); + tracked_request_end(&dst_req); + bdrv_dec_in_flight(src_bs); + bdrv_dec_in_flight(dst_bs); + return ret; } /* Copy range from @src to @dst. @@ -2996,27 +2977,141 @@ int coroutine_fn bdrv_co_copy_range(BdrvChild *src, uint64_t src_offset, BdrvChild *dst, uint64_t dst_offset, uint64_t bytes, BdrvRequestFlags flags) { - BdrvTrackedRequest src_req, dst_req; - BlockDriverState *src_bs = src->bs; - BlockDriverState *dst_bs = dst->bs; + return bdrv_co_copy_range_from(src, src_offset, + dst, dst_offset, + bytes, flags); +} + +static void bdrv_parent_cb_resize(BlockDriverState *bs) +{ + BdrvChild *c; + QLIST_FOREACH(c, &bs->parents, next_parent) { + if (c->role->resize) { + c->role->resize(c); + } + } +} + +/** + * Truncate file to 'offset' bytes (needed only for file protocols) + */ +int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, + PreallocMode prealloc, Error **errp) +{ + BlockDriverState *bs = child->bs; + BlockDriver *drv = bs->drv; + BdrvTrackedRequest req; + int64_t old_size, new_bytes; int ret; - bdrv_inc_in_flight(src_bs); - bdrv_inc_in_flight(dst_bs); - tracked_request_begin(&src_req, src_bs, src_offset, - bytes, BDRV_TRACKED_READ); - tracked_request_begin(&dst_req, dst_bs, dst_offset, - bytes, BDRV_TRACKED_WRITE); + assert(child->perm & BLK_PERM_RESIZE); - wait_serialising_requests(&src_req); - wait_serialising_requests(&dst_req); - ret = bdrv_co_copy_range_from(src, src_offset, - dst, dst_offset, - bytes, flags); + /* if bs->drv == NULL, bs is closed, so there's nothing to do here */ + if (!drv) { + error_setg(errp, "No medium inserted"); + return -ENOMEDIUM; + } + if (offset < 0) { + error_setg(errp, "Image size cannot be negative"); + return -EINVAL; + } + + old_size = bdrv_getlength(bs); + if (old_size < 0) { + error_setg_errno(errp, -old_size, "Failed to get old image size"); + return old_size; + } + + if (offset > old_size) { + new_bytes = offset - old_size; + } else { + new_bytes = 0; + } + + bdrv_inc_in_flight(bs); + tracked_request_begin(&req, bs, offset, new_bytes, BDRV_TRACKED_TRUNCATE); + + /* If we are growing the image and potentially using preallocation for the + * new area, we need to make sure that no write requests are made to it + * concurrently or they might be overwritten by preallocation. */ + if (new_bytes) { + mark_request_serialising(&req, 1); + wait_serialising_requests(&req); + } + + if (!drv->bdrv_co_truncate) { + if (bs->file && drv->is_filter) { + ret = bdrv_co_truncate(bs->file, offset, prealloc, errp); + goto out; + } + error_setg(errp, "Image format driver does not support resize"); + ret = -ENOTSUP; + goto out; + } + if (bs->read_only) { + error_setg(errp, "Image is read-only"); + ret = -EACCES; + goto out; + } + + assert(!(bs->open_flags & BDRV_O_INACTIVE)); + + ret = drv->bdrv_co_truncate(bs, offset, prealloc, errp); + if (ret < 0) { + goto out; + } + ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS); + if (ret < 0) { + error_setg_errno(errp, -ret, "Could not refresh total sector count"); + } else { + offset = bs->total_sectors * BDRV_SECTOR_SIZE; + } + bdrv_dirty_bitmap_truncate(bs, offset); + bdrv_parent_cb_resize(bs); + atomic_inc(&bs->write_gen); + +out: + tracked_request_end(&req); + bdrv_dec_in_flight(bs); - tracked_request_end(&src_req); - tracked_request_end(&dst_req); - bdrv_dec_in_flight(src_bs); - bdrv_dec_in_flight(dst_bs); return ret; } + +typedef struct TruncateCo { + BdrvChild *child; + int64_t offset; + PreallocMode prealloc; + Error **errp; + int ret; +} TruncateCo; + +static void coroutine_fn bdrv_truncate_co_entry(void *opaque) +{ + TruncateCo *tco = opaque; + tco->ret = bdrv_co_truncate(tco->child, tco->offset, tco->prealloc, + tco->errp); +} + +int bdrv_truncate(BdrvChild *child, int64_t offset, PreallocMode prealloc, + Error **errp) +{ + Coroutine *co; + TruncateCo tco = { + .child = child, + .offset = offset, + .prealloc = prealloc, + .errp = errp, + .ret = NOT_DONE, + }; + + if (qemu_in_coroutine()) { + /* Fast-path if already in coroutine context */ + bdrv_truncate_co_entry(&tco); + } else { + co = qemu_coroutine_create(bdrv_truncate_co_entry, &tco); + qemu_coroutine_enter(co); + BDRV_POLL_WHILE(child->bs, tco.ret == NOT_DONE); + } + + return tco.ret; +} diff --git a/block/iscsi.c b/block/iscsi.c index 9f00fb47a5..9beb06d498 100644 --- a/block/iscsi.c +++ b/block/iscsi.c @@ -2085,8 +2085,8 @@ static void iscsi_reopen_commit(BDRVReopenState *reopen_state) } } -static int iscsi_truncate(BlockDriverState *bs, int64_t offset, - PreallocMode prealloc, Error **errp) +static int coroutine_fn iscsi_co_truncate(BlockDriverState *bs, int64_t offset, + PreallocMode prealloc, Error **errp) { IscsiLun *iscsilun = bs->opaque; Error *local_err = NULL; @@ -2226,7 +2226,7 @@ static void iscsi_populate_target_desc(unsigned char *desc, IscsiLun *lun) desc[5] = (dd->designator_type & 0xF) | ((dd->association & 3) << 4); desc[7] = dd->designator_length; - memcpy(desc + 8, dd->designator, dd->designator_length); + memcpy(desc + 8, dd->designator, MIN(dd->designator_length, 20)); desc[28] = 0; desc[29] = (lun->block_size >> 16) & 0xFF; @@ -2431,7 +2431,7 @@ static BlockDriver bdrv_iscsi = { .bdrv_getlength = iscsi_getlength, .bdrv_get_info = iscsi_get_info, - .bdrv_truncate = iscsi_truncate, + .bdrv_co_truncate = iscsi_co_truncate, .bdrv_refresh_limits = iscsi_refresh_limits, .bdrv_co_block_status = iscsi_co_block_status, @@ -2468,7 +2468,7 @@ static BlockDriver bdrv_iser = { .bdrv_getlength = iscsi_getlength, .bdrv_get_info = iscsi_get_info, - .bdrv_truncate = iscsi_truncate, + .bdrv_co_truncate = iscsi_co_truncate, .bdrv_refresh_limits = iscsi_refresh_limits, .bdrv_co_block_status = iscsi_co_block_status, diff --git a/block/nfs.c b/block/nfs.c index 743ca0450e..eab1a2c408 100644 --- a/block/nfs.c +++ b/block/nfs.c @@ -743,8 +743,9 @@ static int64_t nfs_get_allocated_file_size(BlockDriverState *bs) return (task.ret < 0 ? task.ret : st.st_blocks * 512); } -static int nfs_file_truncate(BlockDriverState *bs, int64_t offset, - PreallocMode prealloc, Error **errp) +static int coroutine_fn +nfs_file_co_truncate(BlockDriverState *bs, int64_t offset, + PreallocMode prealloc, Error **errp) { NFSClient *client = bs->opaque; int ret; @@ -873,7 +874,7 @@ static BlockDriver bdrv_nfs = { .bdrv_has_zero_init = nfs_has_zero_init, .bdrv_get_allocated_file_size = nfs_get_allocated_file_size, - .bdrv_truncate = nfs_file_truncate, + .bdrv_co_truncate = nfs_file_co_truncate, .bdrv_file_open = nfs_file_open, .bdrv_close = nfs_file_close, diff --git a/block/parallels.c b/block/parallels.c index fd215e202a..cc9445879d 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -227,14 +227,15 @@ static int64_t allocate_clusters(BlockDriverState *bs, int64_t sector_num, }; qemu_iovec_init_external(&qiov, &iov, 1); - ret = bdrv_co_readv(bs->backing, idx * s->tracks, nb_cow_sectors, - &qiov); + ret = bdrv_co_preadv(bs->backing, idx * s->tracks * BDRV_SECTOR_SIZE, + nb_cow_bytes, &qiov, 0); if (ret < 0) { qemu_vfree(iov.iov_base); return ret; } - ret = bdrv_co_writev(bs->file, s->data_end, nb_cow_sectors, &qiov); + ret = bdrv_co_pwritev(bs->file, s->data_end * BDRV_SECTOR_SIZE, + nb_cow_bytes, &qiov, 0); qemu_vfree(iov.iov_base); if (ret < 0) { return ret; @@ -340,7 +341,8 @@ static coroutine_fn int parallels_co_writev(BlockDriverState *bs, qemu_iovec_reset(&hd_qiov); qemu_iovec_concat(&hd_qiov, qiov, bytes_done, nbytes); - ret = bdrv_co_writev(bs->file, position, n, &hd_qiov); + ret = bdrv_co_pwritev(bs->file, position * BDRV_SECTOR_SIZE, nbytes, + &hd_qiov, 0); if (ret < 0) { break; } @@ -379,7 +381,8 @@ static coroutine_fn int parallels_co_readv(BlockDriverState *bs, if (position < 0) { if (bs->backing) { - ret = bdrv_co_readv(bs->backing, sector_num, n, &hd_qiov); + ret = bdrv_co_preadv(bs->backing, sector_num * BDRV_SECTOR_SIZE, + nbytes, &hd_qiov, 0); if (ret < 0) { break; } @@ -387,7 +390,8 @@ static coroutine_fn int parallels_co_readv(BlockDriverState *bs, qemu_iovec_memset(&hd_qiov, 0, 0, nbytes); } } else { - ret = bdrv_co_readv(bs->file, position, n, &hd_qiov); + ret = bdrv_co_preadv(bs->file, position * BDRV_SECTOR_SIZE, nbytes, + &hd_qiov, 0); if (ret < 0) { break; } diff --git a/block/qcow.c b/block/qcow.c index 5532731b9f..102d058d1c 100644 --- a/block/qcow.c +++ b/block/qcow.c @@ -70,7 +70,6 @@ typedef struct QCowHeader { typedef struct BDRVQcowState { int cluster_bits; int cluster_size; - int cluster_sectors; int l2_bits; int l2_size; unsigned int l1_size; @@ -203,9 +202,8 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags, ret = -EINVAL; goto fail; } - qdict_del(encryptopts, "format"); - crypto_opts = block_crypto_open_opts_init( - Q_CRYPTO_BLOCK_FORMAT_QCOW, encryptopts, errp); + qdict_put_str(encryptopts, "format", "qcow"); + crypto_opts = block_crypto_open_opts_init(encryptopts, errp); if (!crypto_opts) { ret = -EINVAL; goto fail; @@ -236,7 +234,6 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags, } s->cluster_bits = header.cluster_bits; s->cluster_size = 1 << s->cluster_bits; - s->cluster_sectors = 1 << (s->cluster_bits - 9); s->l2_bits = header.l2_bits; s->l2_size = 1 << s->l2_bits; bs->total_sectors = header.size / 512; @@ -346,8 +343,8 @@ static int qcow_reopen_prepare(BDRVReopenState *state, * * 0 to not allocate. * - * 1 to allocate a normal cluster (for sector indexes 'n_start' to - * 'n_end') + * 1 to allocate a normal cluster (for sector-aligned byte offsets 'n_start' + * to 'n_end' within the cluster) * * 2 to allocate a compressed cluster of size * 'compressed_size'. 'compressed_size' must be > 0 and < @@ -441,9 +438,10 @@ static int get_cluster_offset(BlockDriverState *bs, if (!allocate) return 0; BLKDBG_EVENT(bs->file, BLKDBG_CLUSTER_ALLOC); + assert(QEMU_IS_ALIGNED(n_start | n_end, BDRV_SECTOR_SIZE)); /* allocate a new cluster */ if ((cluster_offset & QCOW_OFLAG_COMPRESSED) && - (n_end - n_start) < s->cluster_sectors) { + (n_end - n_start) < s->cluster_size) { /* if the cluster is already compressed, we must decompress it in the case it is not completely overwritten */ @@ -481,16 +479,15 @@ static int get_cluster_offset(BlockDriverState *bs, /* if encrypted, we must initialize the cluster content which won't be written */ if (bs->encrypted && - (n_end - n_start) < s->cluster_sectors) { - uint64_t start_sect; + (n_end - n_start) < s->cluster_size) { + uint64_t start_offset; assert(s->crypto); - start_sect = (offset & ~(s->cluster_size - 1)) >> 9; - for(i = 0; i < s->cluster_sectors; i++) { + start_offset = offset & ~(s->cluster_size - 1); + for (i = 0; i < s->cluster_size; i += BDRV_SECTOR_SIZE) { if (i < n_start || i >= n_end) { - memset(s->cluster_data, 0x00, 512); + memset(s->cluster_data, 0x00, BDRV_SECTOR_SIZE); if (qcrypto_block_encrypt(s->crypto, - (start_sect + i) * - BDRV_SECTOR_SIZE, + start_offset + i, s->cluster_data, BDRV_SECTOR_SIZE, NULL) < 0) { @@ -498,8 +495,9 @@ static int get_cluster_offset(BlockDriverState *bs, } BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO); ret = bdrv_pwrite(bs->file, - cluster_offset + i * 512, - s->cluster_data, 512); + cluster_offset + i, + s->cluster_data, + BDRV_SECTOR_SIZE); if (ret < 0) { return ret; } @@ -613,11 +611,21 @@ static int decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset) return 0; } -static coroutine_fn int qcow_co_readv(BlockDriverState *bs, int64_t sector_num, - int nb_sectors, QEMUIOVector *qiov) +static void qcow_refresh_limits(BlockDriverState *bs, Error **errp) +{ + /* At least encrypted images require 512-byte alignment. Apply the + * limit universally, rather than just on encrypted images, as + * it's easier to let the block layer handle rounding than to + * audit this code further. */ + bs->bl.request_alignment = BDRV_SECTOR_SIZE; +} + +static coroutine_fn int qcow_co_preadv(BlockDriverState *bs, uint64_t offset, + uint64_t bytes, QEMUIOVector *qiov, + int flags) { BDRVQcowState *s = bs->opaque; - int index_in_cluster; + int offset_in_cluster; int ret = 0, n; uint64_t cluster_offset; struct iovec hd_iov; @@ -625,6 +633,7 @@ static coroutine_fn int qcow_co_readv(BlockDriverState *bs, int64_t sector_num, uint8_t *buf; void *orig_buf; + assert(!flags); if (qiov->niov > 1) { buf = orig_buf = qemu_try_blockalign(bs, qiov->size); if (buf == NULL) { @@ -637,36 +646,35 @@ static coroutine_fn int qcow_co_readv(BlockDriverState *bs, int64_t sector_num, qemu_co_mutex_lock(&s->lock); - while (nb_sectors != 0) { + while (bytes != 0) { /* prepare next request */ - ret = get_cluster_offset(bs, sector_num << 9, - 0, 0, 0, 0, &cluster_offset); + ret = get_cluster_offset(bs, offset, 0, 0, 0, 0, &cluster_offset); if (ret < 0) { break; } - index_in_cluster = sector_num & (s->cluster_sectors - 1); - n = s->cluster_sectors - index_in_cluster; - if (n > nb_sectors) { - n = nb_sectors; + offset_in_cluster = offset & (s->cluster_size - 1); + n = s->cluster_size - offset_in_cluster; + if (n > bytes) { + n = bytes; } if (!cluster_offset) { if (bs->backing) { /* read from the base image */ hd_iov.iov_base = (void *)buf; - hd_iov.iov_len = n * 512; + hd_iov.iov_len = n; qemu_iovec_init_external(&hd_qiov, &hd_iov, 1); qemu_co_mutex_unlock(&s->lock); /* qcow2 emits this on bs->file instead of bs->backing */ BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO); - ret = bdrv_co_readv(bs->backing, sector_num, n, &hd_qiov); + ret = bdrv_co_preadv(bs->backing, offset, n, &hd_qiov, 0); qemu_co_mutex_lock(&s->lock); if (ret < 0) { break; } } else { /* Note: in this case, no need to wait */ - memset(buf, 0, 512 * n); + memset(buf, 0, n); } } else if (cluster_offset & QCOW_OFLAG_COMPRESSED) { /* add AIO support for compressed blocks ? */ @@ -674,21 +682,19 @@ static coroutine_fn int qcow_co_readv(BlockDriverState *bs, int64_t sector_num, ret = -EIO; break; } - memcpy(buf, - s->cluster_cache + index_in_cluster * 512, 512 * n); + memcpy(buf, s->cluster_cache + offset_in_cluster, n); } else { if ((cluster_offset & 511) != 0) { ret = -EIO; break; } hd_iov.iov_base = (void *)buf; - hd_iov.iov_len = n * 512; + hd_iov.iov_len = n; qemu_iovec_init_external(&hd_qiov, &hd_iov, 1); qemu_co_mutex_unlock(&s->lock); BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO); - ret = bdrv_co_readv(bs->file, - (cluster_offset >> 9) + index_in_cluster, - n, &hd_qiov); + ret = bdrv_co_preadv(bs->file, cluster_offset + offset_in_cluster, + n, &hd_qiov, 0); qemu_co_mutex_lock(&s->lock); if (ret < 0) { break; @@ -696,8 +702,7 @@ static coroutine_fn int qcow_co_readv(BlockDriverState *bs, int64_t sector_num, if (bs->encrypted) { assert(s->crypto); if (qcrypto_block_decrypt(s->crypto, - sector_num * BDRV_SECTOR_SIZE, buf, - n * BDRV_SECTOR_SIZE, NULL) < 0) { + offset, buf, n, NULL) < 0) { ret = -EIO; break; } @@ -705,9 +710,9 @@ static coroutine_fn int qcow_co_readv(BlockDriverState *bs, int64_t sector_num, } ret = 0; - nb_sectors -= n; - sector_num += n; - buf += n * 512; + bytes -= n; + offset += n; + buf += n; } qemu_co_mutex_unlock(&s->lock); @@ -720,12 +725,12 @@ static coroutine_fn int qcow_co_readv(BlockDriverState *bs, int64_t sector_num, return ret; } -static coroutine_fn int qcow_co_writev(BlockDriverState *bs, int64_t sector_num, - int nb_sectors, QEMUIOVector *qiov, - int flags) +static coroutine_fn int qcow_co_pwritev(BlockDriverState *bs, uint64_t offset, + uint64_t bytes, QEMUIOVector *qiov, + int flags) { BDRVQcowState *s = bs->opaque; - int index_in_cluster; + int offset_in_cluster; uint64_t cluster_offset; int ret = 0, n; struct iovec hd_iov; @@ -751,16 +756,14 @@ static coroutine_fn int qcow_co_writev(BlockDriverState *bs, int64_t sector_num, qemu_co_mutex_lock(&s->lock); - while (nb_sectors != 0) { - - index_in_cluster = sector_num & (s->cluster_sectors - 1); - n = s->cluster_sectors - index_in_cluster; - if (n > nb_sectors) { - n = nb_sectors; + while (bytes != 0) { + offset_in_cluster = offset & (s->cluster_size - 1); + n = s->cluster_size - offset_in_cluster; + if (n > bytes) { + n = bytes; } - ret = get_cluster_offset(bs, sector_num << 9, 1, 0, - index_in_cluster, - index_in_cluster + n, &cluster_offset); + ret = get_cluster_offset(bs, offset, 1, 0, offset_in_cluster, + offset_in_cluster + n, &cluster_offset); if (ret < 0) { break; } @@ -770,30 +773,28 @@ static coroutine_fn int qcow_co_writev(BlockDriverState *bs, int64_t sector_num, } if (bs->encrypted) { assert(s->crypto); - if (qcrypto_block_encrypt(s->crypto, sector_num * BDRV_SECTOR_SIZE, - buf, n * BDRV_SECTOR_SIZE, NULL) < 0) { + if (qcrypto_block_encrypt(s->crypto, offset, buf, n, NULL) < 0) { ret = -EIO; break; } } hd_iov.iov_base = (void *)buf; - hd_iov.iov_len = n * 512; + hd_iov.iov_len = n; qemu_iovec_init_external(&hd_qiov, &hd_iov, 1); qemu_co_mutex_unlock(&s->lock); BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO); - ret = bdrv_co_writev(bs->file, - (cluster_offset >> 9) + index_in_cluster, - n, &hd_qiov); + ret = bdrv_co_pwritev(bs->file, cluster_offset + offset_in_cluster, + n, &hd_qiov, 0); qemu_co_mutex_lock(&s->lock); if (ret < 0) { break; } ret = 0; - nb_sectors -= n; - sector_num += n; - buf += n * 512; + bytes -= n; + offset += n; + buf += n; } qemu_co_mutex_unlock(&s->lock); @@ -1108,8 +1109,7 @@ qcow_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset, if (ret != Z_STREAM_END || out_len >= s->cluster_size) { /* could not compress: write normal cluster */ - ret = qcow_co_writev(bs, offset >> BDRV_SECTOR_BITS, - bytes >> BDRV_SECTOR_BITS, qiov, 0); + ret = qcow_co_pwritev(bs, offset, bytes, qiov, 0); if (ret < 0) { goto fail; } @@ -1194,9 +1194,10 @@ static BlockDriver bdrv_qcow = { .bdrv_co_create_opts = qcow_co_create_opts, .bdrv_has_zero_init = bdrv_has_zero_init_1, .supports_backing = true, + .bdrv_refresh_limits = qcow_refresh_limits, - .bdrv_co_readv = qcow_co_readv, - .bdrv_co_writev = qcow_co_writev, + .bdrv_co_preadv = qcow_co_preadv, + .bdrv_co_pwritev = qcow_co_pwritev, .bdrv_co_block_status = qcow_co_block_status, .bdrv_make_empty = qcow_make_empty, diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c index 0d74584c9b..d37fe08b3d 100644 --- a/block/qcow2-cluster.c +++ b/block/qcow2-cluster.c @@ -994,6 +994,17 @@ err: return ret; } +/** + * Frees the allocated clusters because the request failed and they won't + * actually be linked. + */ +void qcow2_alloc_cluster_abort(BlockDriverState *bs, QCowL2Meta *m) +{ + BDRVQcow2State *s = bs->opaque; + qcow2_free_clusters(bs, m->alloc_offset, m->nb_clusters << s->cluster_bits, + QCOW2_DISCARD_NEVER); +} + /* * Returns the number of contiguous clusters that can be used for an allocating * write, but require COW to be performed (this includes yet unallocated space, diff --git a/block/qcow2.c b/block/qcow2.c index a3a3aa2a97..2f9e58e0c4 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -1040,9 +1040,8 @@ static int qcow2_update_options_prepare(BlockDriverState *bs, ret = -EINVAL; goto fail; } - qdict_del(encryptopts, "format"); - r->crypto_opts = block_crypto_open_opts_init( - Q_CRYPTO_BLOCK_FORMAT_QCOW, encryptopts, errp); + qdict_put_str(encryptopts, "format", "qcow"); + r->crypto_opts = block_crypto_open_opts_init(encryptopts, errp); break; case QCOW_CRYPT_LUKS: @@ -1053,9 +1052,8 @@ static int qcow2_update_options_prepare(BlockDriverState *bs, ret = -EINVAL; goto fail; } - qdict_del(encryptopts, "format"); - r->crypto_opts = block_crypto_open_opts_init( - Q_CRYPTO_BLOCK_FORMAT_LUKS, encryptopts, errp); + qdict_put_str(encryptopts, "format", "luks"); + r->crypto_opts = block_crypto_open_opts_init(encryptopts, errp); break; default: @@ -1772,11 +1770,13 @@ static coroutine_fn int qcow2_handle_l2meta(BlockDriverState *bs, while (l2meta != NULL) { QCowL2Meta *next; - if (!ret && link_l2) { + if (link_l2) { ret = qcow2_alloc_cluster_link_l2(bs, l2meta); if (ret) { goto out; } + } else { + qcow2_alloc_cluster_abort(bs, l2meta); } /* Take the request off the list of running requests */ @@ -2521,15 +2521,6 @@ static int qcow2_set_up_encryption(BlockDriverState *bs, return ret; } - -typedef struct PreallocCo { - BlockDriverState *bs; - uint64_t offset; - uint64_t new_length; - - int ret; -} PreallocCo; - /** * Preallocates metadata structures for data clusters between @offset (in the * guest disk) and @new_length (which is thus generally the new guest disk @@ -2537,21 +2528,15 @@ typedef struct PreallocCo { * * Returns: 0 on success, -errno on failure. */ -static void coroutine_fn preallocate_co(void *opaque) +static int coroutine_fn preallocate_co(BlockDriverState *bs, uint64_t offset, + uint64_t new_length) { - PreallocCo *params = opaque; - BlockDriverState *bs = params->bs; - uint64_t offset = params->offset; - uint64_t new_length = params->new_length; - BDRVQcow2State *s = bs->opaque; uint64_t bytes; uint64_t host_offset = 0; unsigned int cur_bytes; int ret; QCowL2Meta *meta; - qemu_co_mutex_lock(&s->lock); - assert(offset <= new_length); bytes = new_length - offset; @@ -2560,7 +2545,7 @@ static void coroutine_fn preallocate_co(void *opaque) ret = qcow2_alloc_cluster_offset(bs, offset, &cur_bytes, &host_offset, &meta); if (ret < 0) { - goto done; + return ret; } while (meta) { @@ -2570,7 +2555,7 @@ static void coroutine_fn preallocate_co(void *opaque) if (ret < 0) { qcow2_free_any_clusters(bs, meta->alloc_offset, meta->nb_clusters, QCOW2_DISCARD_NEVER); - goto done; + return ret; } /* There are no dependent requests, but we need to remove our @@ -2597,35 +2582,11 @@ static void coroutine_fn preallocate_co(void *opaque) ret = bdrv_pwrite(bs->file, (host_offset + cur_bytes) - 1, &data, 1); if (ret < 0) { - goto done; + return ret; } } - ret = 0; - -done: - qemu_co_mutex_unlock(&s->lock); - params->ret = ret; -} - -static int preallocate(BlockDriverState *bs, - uint64_t offset, uint64_t new_length) -{ - PreallocCo params = { - .bs = bs, - .offset = offset, - .new_length = new_length, - .ret = -EINPROGRESS, - }; - - if (qemu_in_coroutine()) { - preallocate_co(¶ms); - } else { - Coroutine *co = qemu_coroutine_create(preallocate_co, ¶ms); - bdrv_coroutine_enter(bs, co); - BDRV_POLL_WHILE(bs, params.ret == -EINPROGRESS); - } - return params.ret; + return 0; } /* qcow2_refcount_metadata_size: @@ -3041,7 +3002,11 @@ qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp) /* And if we're supposed to preallocate metadata, do that now */ if (qcow2_opts->preallocation != PREALLOC_MODE_OFF) { - ret = preallocate(blk_bs(blk), 0, qcow2_opts->size); + BDRVQcow2State *s = blk_bs(blk)->opaque; + qemu_co_mutex_lock(&s->lock); + ret = preallocate_co(blk_bs(blk), 0, qcow2_opts->size); + qemu_co_mutex_unlock(&s->lock); + if (ret < 0) { error_setg_errno(errp, -ret, "Could not preallocate metadata"); goto out; @@ -3422,6 +3387,7 @@ qcow2_co_copy_range_to(BlockDriverState *bs, } bytes -= cur_bytes; + src_offset += cur_bytes; dst_offset += cur_bytes; } ret = 0; @@ -3437,8 +3403,8 @@ fail: return ret; } -static int qcow2_truncate(BlockDriverState *bs, int64_t offset, - PreallocMode prealloc, Error **errp) +static int coroutine_fn qcow2_co_truncate(BlockDriverState *bs, int64_t offset, + PreallocMode prealloc, Error **errp) { BDRVQcow2State *s = bs->opaque; uint64_t old_length; @@ -3458,17 +3424,21 @@ static int qcow2_truncate(BlockDriverState *bs, int64_t offset, return -EINVAL; } + qemu_co_mutex_lock(&s->lock); + /* cannot proceed if image has snapshots */ if (s->nb_snapshots) { error_setg(errp, "Can't resize an image which has snapshots"); - return -ENOTSUP; + ret = -ENOTSUP; + goto fail; } /* cannot proceed if image has bitmaps */ if (s->nb_bitmaps) { /* TODO: resize bitmaps in the image */ error_setg(errp, "Can't resize an image which has bitmaps"); - return -ENOTSUP; + ret = -ENOTSUP; + goto fail; } old_length = bs->total_sectors * 512; @@ -3479,7 +3449,8 @@ static int qcow2_truncate(BlockDriverState *bs, int64_t offset, if (prealloc != PREALLOC_MODE_OFF) { error_setg(errp, "Preallocation can't be used for shrinking an image"); - return -EINVAL; + ret = -EINVAL; + goto fail; } ret = qcow2_cluster_discard(bs, ROUND_UP(offset, s->cluster_size), @@ -3488,40 +3459,42 @@ static int qcow2_truncate(BlockDriverState *bs, int64_t offset, QCOW2_DISCARD_ALWAYS, true); if (ret < 0) { error_setg_errno(errp, -ret, "Failed to discard cropped clusters"); - return ret; + goto fail; } ret = qcow2_shrink_l1_table(bs, new_l1_size); if (ret < 0) { error_setg_errno(errp, -ret, "Failed to reduce the number of L2 tables"); - return ret; + goto fail; } ret = qcow2_shrink_reftable(bs); if (ret < 0) { error_setg_errno(errp, -ret, "Failed to discard unused refblocks"); - return ret; + goto fail; } old_file_size = bdrv_getlength(bs->file->bs); if (old_file_size < 0) { error_setg_errno(errp, -old_file_size, "Failed to inquire current file length"); - return old_file_size; + ret = old_file_size; + goto fail; } last_cluster = qcow2_get_last_cluster(bs, old_file_size); if (last_cluster < 0) { error_setg_errno(errp, -last_cluster, "Failed to find the last cluster"); - return last_cluster; + ret = last_cluster; + goto fail; } if ((last_cluster + 1) * s->cluster_size < old_file_size) { Error *local_err = NULL; - bdrv_truncate(bs->file, (last_cluster + 1) * s->cluster_size, - PREALLOC_MODE_OFF, &local_err); + bdrv_co_truncate(bs->file, (last_cluster + 1) * s->cluster_size, + PREALLOC_MODE_OFF, &local_err); if (local_err) { warn_reportf_err(local_err, "Failed to truncate the tail of the image: "); @@ -3531,7 +3504,7 @@ static int qcow2_truncate(BlockDriverState *bs, int64_t offset, ret = qcow2_grow_l1_table(bs, new_l1_size, true); if (ret < 0) { error_setg_errno(errp, -ret, "Failed to grow the L1 table"); - return ret; + goto fail; } } @@ -3540,10 +3513,10 @@ static int qcow2_truncate(BlockDriverState *bs, int64_t offset, break; case PREALLOC_MODE_METADATA: - ret = preallocate(bs, old_length, offset); + ret = preallocate_co(bs, old_length, offset); if (ret < 0) { error_setg_errno(errp, -ret, "Preallocation failed"); - return ret; + goto fail; } break; @@ -3559,7 +3532,8 @@ static int qcow2_truncate(BlockDriverState *bs, int64_t offset, if (old_file_size < 0) { error_setg_errno(errp, -old_file_size, "Failed to inquire current file length"); - return old_file_size; + ret = old_file_size; + goto fail; } old_file_size = ROUND_UP(old_file_size, s->cluster_size); @@ -3589,7 +3563,8 @@ static int qcow2_truncate(BlockDriverState *bs, int64_t offset, if (allocation_start < 0) { error_setg_errno(errp, -allocation_start, "Failed to resize refcount structures"); - return allocation_start; + ret = allocation_start; + goto fail; } clusters_allocated = qcow2_alloc_clusters_at(bs, allocation_start, @@ -3597,7 +3572,8 @@ static int qcow2_truncate(BlockDriverState *bs, int64_t offset, if (clusters_allocated < 0) { error_setg_errno(errp, -clusters_allocated, "Failed to allocate data clusters"); - return -clusters_allocated; + ret = clusters_allocated; + goto fail; } assert(clusters_allocated == nb_new_data_clusters); @@ -3605,13 +3581,13 @@ static int qcow2_truncate(BlockDriverState *bs, int64_t offset, /* Allocate the data area */ new_file_size = allocation_start + nb_new_data_clusters * s->cluster_size; - ret = bdrv_truncate(bs->file, new_file_size, prealloc, errp); + ret = bdrv_co_truncate(bs->file, new_file_size, prealloc, errp); if (ret < 0) { error_prepend(errp, "Failed to resize underlying file: "); qcow2_free_clusters(bs, allocation_start, nb_new_data_clusters * s->cluster_size, QCOW2_DISCARD_OTHER); - return ret; + goto fail; } /* Create the necessary L2 entries */ @@ -3634,7 +3610,7 @@ static int qcow2_truncate(BlockDriverState *bs, int64_t offset, qcow2_free_clusters(bs, host_offset, nb_new_data_clusters * s->cluster_size, QCOW2_DISCARD_OTHER); - return ret; + goto fail; } guest_offset += nb_clusters * s->cluster_size; @@ -3650,11 +3626,11 @@ static int qcow2_truncate(BlockDriverState *bs, int64_t offset, if (prealloc != PREALLOC_MODE_OFF) { /* Flush metadata before actually changing the image size */ - ret = bdrv_flush(bs); + ret = qcow2_write_caches(bs); if (ret < 0) { error_setg_errno(errp, -ret, "Failed to flush the preallocated area to disk"); - return ret; + goto fail; } } @@ -3664,11 +3640,14 @@ static int qcow2_truncate(BlockDriverState *bs, int64_t offset, &offset, sizeof(uint64_t)); if (ret < 0) { error_setg_errno(errp, -ret, "Failed to update the image size"); - return ret; + goto fail; } s->l1_vm_state_index = new_l1_size; - return 0; + ret = 0; +fail: + qemu_co_mutex_unlock(&s->lock); + return ret; } /* XXX: put compressed sectors first, then all the cluster aligned @@ -3692,7 +3671,8 @@ qcow2_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset, if (cluster_offset < 0) { return cluster_offset; } - return bdrv_truncate(bs->file, cluster_offset, PREALLOC_MODE_OFF, NULL); + return bdrv_co_truncate(bs->file, cluster_offset, PREALLOC_MODE_OFF, + NULL); } if (offset_into_cluster(s, offset)) { @@ -4696,7 +4676,7 @@ BlockDriver bdrv_qcow2 = { .bdrv_co_pdiscard = qcow2_co_pdiscard, .bdrv_co_copy_range_from = qcow2_co_copy_range_from, .bdrv_co_copy_range_to = qcow2_co_copy_range_to, - .bdrv_truncate = qcow2_truncate, + .bdrv_co_truncate = qcow2_co_truncate, .bdrv_co_pwritev_compressed = qcow2_co_pwritev_compressed, .bdrv_make_empty = qcow2_make_empty, diff --git a/block/qcow2.h b/block/qcow2.h index 01b5250415..1c9c0d3631 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -614,6 +614,7 @@ uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs, int compressed_size); int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m); +void qcow2_alloc_cluster_abort(BlockDriverState *bs, QCowL2Meta *m); int qcow2_cluster_discard(BlockDriverState *bs, uint64_t offset, uint64_t bytes, enum qcow2_discard_type type, bool full_discard); diff --git a/block/qed.c b/block/qed.c index 2363814538..689ea9d4d5 100644 --- a/block/qed.c +++ b/block/qed.c @@ -1467,8 +1467,10 @@ static int coroutine_fn bdrv_qed_co_pwrite_zeroes(BlockDriverState *bs, QED_AIOCB_WRITE | QED_AIOCB_ZERO); } -static int bdrv_qed_truncate(BlockDriverState *bs, int64_t offset, - PreallocMode prealloc, Error **errp) +static int coroutine_fn bdrv_qed_co_truncate(BlockDriverState *bs, + int64_t offset, + PreallocMode prealloc, + Error **errp) { BDRVQEDState *s = bs->opaque; uint64_t old_image_size; @@ -1678,7 +1680,7 @@ static BlockDriver bdrv_qed = { .bdrv_co_readv = bdrv_qed_co_readv, .bdrv_co_writev = bdrv_qed_co_writev, .bdrv_co_pwrite_zeroes = bdrv_qed_co_pwrite_zeroes, - .bdrv_truncate = bdrv_qed_truncate, + .bdrv_co_truncate = bdrv_qed_co_truncate, .bdrv_getlength = bdrv_qed_getlength, .bdrv_get_info = bdrv_qed_get_info, .bdrv_refresh_limits = bdrv_qed_refresh_limits, diff --git a/block/raw-format.c b/block/raw-format.c index f2e468df6f..b78da564d4 100644 --- a/block/raw-format.c +++ b/block/raw-format.c @@ -366,8 +366,8 @@ static void raw_refresh_limits(BlockDriverState *bs, Error **errp) } } -static int raw_truncate(BlockDriverState *bs, int64_t offset, - PreallocMode prealloc, Error **errp) +static int coroutine_fn raw_co_truncate(BlockDriverState *bs, int64_t offset, + PreallocMode prealloc, Error **errp) { BDRVRawState *s = bs->opaque; @@ -383,7 +383,7 @@ static int raw_truncate(BlockDriverState *bs, int64_t offset, s->size = offset; offset += s->offset; - return bdrv_truncate(bs->file, offset, prealloc, errp); + return bdrv_co_truncate(bs->file, offset, prealloc, errp); } static void raw_eject(BlockDriverState *bs, bool eject_flag) @@ -545,7 +545,7 @@ BlockDriver bdrv_raw = { .bdrv_co_block_status = &raw_co_block_status, .bdrv_co_copy_range_from = &raw_co_copy_range_from, .bdrv_co_copy_range_to = &raw_co_copy_range_to, - .bdrv_truncate = &raw_truncate, + .bdrv_co_truncate = &raw_co_truncate, .bdrv_getlength = &raw_getlength, .has_variable_length = true, .bdrv_measure = &raw_measure, diff --git a/block/rbd.c b/block/rbd.c index f2c6965418..ca8e5bbace 100644 --- a/block/rbd.c +++ b/block/rbd.c @@ -990,8 +990,10 @@ static int64_t qemu_rbd_getlength(BlockDriverState *bs) return info.size; } -static int qemu_rbd_truncate(BlockDriverState *bs, int64_t offset, - PreallocMode prealloc, Error **errp) +static int coroutine_fn qemu_rbd_co_truncate(BlockDriverState *bs, + int64_t offset, + PreallocMode prealloc, + Error **errp) { BDRVRBDState *s = bs->opaque; int r; @@ -1184,7 +1186,7 @@ static BlockDriver bdrv_rbd = { .bdrv_get_info = qemu_rbd_getinfo, .create_opts = &qemu_rbd_create_opts, .bdrv_getlength = qemu_rbd_getlength, - .bdrv_truncate = qemu_rbd_truncate, + .bdrv_co_truncate = qemu_rbd_co_truncate, .protocol_name = "rbd", .bdrv_aio_preadv = qemu_rbd_aio_preadv, diff --git a/block/replication.c b/block/replication.c index 826db7b304..6349d6958e 100644 --- a/block/replication.c +++ b/block/replication.c @@ -246,13 +246,14 @@ static coroutine_fn int replication_co_readv(BlockDriverState *bs, backup_cow_request_begin(&req, child->bs->job, sector_num * BDRV_SECTOR_SIZE, remaining_bytes); - ret = bdrv_co_readv(bs->file, sector_num, remaining_sectors, - qiov); + ret = bdrv_co_preadv(bs->file, sector_num * BDRV_SECTOR_SIZE, + remaining_bytes, qiov, 0); backup_cow_request_end(&req); goto out; } - ret = bdrv_co_readv(bs->file, sector_num, remaining_sectors, qiov); + ret = bdrv_co_preadv(bs->file, sector_num * BDRV_SECTOR_SIZE, + remaining_sectors * BDRV_SECTOR_SIZE, qiov, 0); out: return replication_return_value(s, ret); } @@ -279,8 +280,8 @@ static coroutine_fn int replication_co_writev(BlockDriverState *bs, } if (ret == 0) { - ret = bdrv_co_writev(top, sector_num, - remaining_sectors, qiov); + ret = bdrv_co_pwritev(top, sector_num * BDRV_SECTOR_SIZE, + remaining_sectors * BDRV_SECTOR_SIZE, qiov, 0); return replication_return_value(s, ret); } @@ -306,7 +307,8 @@ static coroutine_fn int replication_co_writev(BlockDriverState *bs, qemu_iovec_concat(&hd_qiov, qiov, bytes_done, count); target = ret ? top : base; - ret = bdrv_co_writev(target, sector_num, n, &hd_qiov); + ret = bdrv_co_pwritev(target, sector_num * BDRV_SECTOR_SIZE, + n * BDRV_SECTOR_SIZE, &hd_qiov, 0); if (ret < 0) { goto out1; } diff --git a/block/sheepdog.c b/block/sheepdog.c index 665b1763eb..b229a664d9 100644 --- a/block/sheepdog.c +++ b/block/sheepdog.c @@ -2292,8 +2292,8 @@ static int64_t sd_getlength(BlockDriverState *bs) return s->inode.vdi_size; } -static int sd_truncate(BlockDriverState *bs, int64_t offset, - PreallocMode prealloc, Error **errp) +static int coroutine_fn sd_co_truncate(BlockDriverState *bs, int64_t offset, + PreallocMode prealloc, Error **errp) { BDRVSheepdogState *s = bs->opaque; int ret, fd; @@ -2609,7 +2609,7 @@ static coroutine_fn int sd_co_writev(BlockDriverState *bs, int64_t sector_num, assert(!flags); if (offset > s->inode.vdi_size) { - ret = sd_truncate(bs, offset, PREALLOC_MODE_OFF, NULL); + ret = sd_co_truncate(bs, offset, PREALLOC_MODE_OFF, NULL); if (ret < 0) { return ret; } @@ -3231,7 +3231,7 @@ static BlockDriver bdrv_sheepdog = { .bdrv_has_zero_init = bdrv_has_zero_init_1, .bdrv_getlength = sd_getlength, .bdrv_get_allocated_file_size = sd_get_allocated_file_size, - .bdrv_truncate = sd_truncate, + .bdrv_co_truncate = sd_co_truncate, .bdrv_co_readv = sd_co_readv, .bdrv_co_writev = sd_co_writev, @@ -3268,7 +3268,7 @@ static BlockDriver bdrv_sheepdog_tcp = { .bdrv_has_zero_init = bdrv_has_zero_init_1, .bdrv_getlength = sd_getlength, .bdrv_get_allocated_file_size = sd_get_allocated_file_size, - .bdrv_truncate = sd_truncate, + .bdrv_co_truncate = sd_co_truncate, .bdrv_co_readv = sd_co_readv, .bdrv_co_writev = sd_co_writev, @@ -3305,7 +3305,7 @@ static BlockDriver bdrv_sheepdog_unix = { .bdrv_has_zero_init = bdrv_has_zero_init_1, .bdrv_getlength = sd_getlength, .bdrv_get_allocated_file_size = sd_get_allocated_file_size, - .bdrv_truncate = sd_truncate, + .bdrv_co_truncate = sd_co_truncate, .bdrv_co_readv = sd_co_readv, .bdrv_co_writev = sd_co_writev, diff --git a/block/ssh.c b/block/ssh.c index da7bbf73e2..7fbc27abdf 100644 --- a/block/ssh.c +++ b/block/ssh.c @@ -1243,8 +1243,8 @@ static int64_t ssh_getlength(BlockDriverState *bs) return length; } -static int ssh_truncate(BlockDriverState *bs, int64_t offset, - PreallocMode prealloc, Error **errp) +static int coroutine_fn ssh_co_truncate(BlockDriverState *bs, int64_t offset, + PreallocMode prealloc, Error **errp) { BDRVSSHState *s = bs->opaque; @@ -1279,7 +1279,7 @@ static BlockDriver bdrv_ssh = { .bdrv_co_readv = ssh_co_readv, .bdrv_co_writev = ssh_co_writev, .bdrv_getlength = ssh_getlength, - .bdrv_truncate = ssh_truncate, + .bdrv_co_truncate = ssh_co_truncate, .bdrv_co_flush_to_disk = ssh_co_flush, .create_opts = &ssh_create_opts, }; diff --git a/block/vhdx.c b/block/vhdx.c index a677703a9e..4d0819750f 100644 --- a/block/vhdx.c +++ b/block/vhdx.c @@ -1127,9 +1127,9 @@ static coroutine_fn int vhdx_co_readv(BlockDriverState *bs, int64_t sector_num, break; case PAYLOAD_BLOCK_FULLY_PRESENT: qemu_co_mutex_unlock(&s->lock); - ret = bdrv_co_readv(bs->file, - sinfo.file_offset >> BDRV_SECTOR_BITS, - sinfo.sectors_avail, &hd_qiov); + ret = bdrv_co_preadv(bs->file, sinfo.file_offset, + sinfo.sectors_avail * BDRV_SECTOR_SIZE, + &hd_qiov, 0); qemu_co_mutex_lock(&s->lock); if (ret < 0) { goto exit; @@ -1349,9 +1349,9 @@ static coroutine_fn int vhdx_co_writev(BlockDriverState *bs, int64_t sector_num, } /* block exists, so we can just overwrite it */ qemu_co_mutex_unlock(&s->lock); - ret = bdrv_co_writev(bs->file, - sinfo.file_offset >> BDRV_SECTOR_BITS, - sectors_to_write, &hd_qiov); + ret = bdrv_co_pwritev(bs->file, sinfo.file_offset, + sectors_to_write * BDRV_SECTOR_SIZE, + &hd_qiov, 0); qemu_co_mutex_lock(&s->lock); if (ret < 0) { goto error_bat_restore; diff --git a/hw/block/nvme.c b/hw/block/nvme.c index d5bf95b79b..156ecf3c41 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -18,7 +18,8 @@ * Usage: add options: * -drive file=,if=none,id= * -device nvme,drive=,serial=,id=, \ - * cmb_size_mb= + * cmb_size_mb=, \ + * num_queues= * * Note cmb_size_mb denotes size of CMB in MB. CMB is assumed to be at * offset 0 in BAR2 and supports only WDS, RDS and SQS for now. @@ -1232,7 +1233,6 @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp) pcie_endpoint_cap_init(&n->parent_obj, 0x80); n->num_namespaces = 1; - n->num_queues = 64; n->reg_size = pow2ceil(0x1004 + 2 * (n->num_queues + 1) * 4); n->ns_size = bs_size / (uint64_t)n->num_namespaces; @@ -1341,6 +1341,7 @@ static Property nvme_props[] = { DEFINE_BLOCK_PROPERTIES(NvmeCtrl, conf), DEFINE_PROP_STRING("serial", NvmeCtrl, serial), DEFINE_PROP_UINT32("cmb_size_mb", NvmeCtrl, cmb_size_mb, 0), + DEFINE_PROP_UINT32("num_queues", NvmeCtrl, num_queues, 64), DEFINE_PROP_END_OF_LIST(), }; diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c index 9646743a7d..5905f6bf29 100644 --- a/hw/scsi/scsi-bus.c +++ b/hw/scsi/scsi-bus.c @@ -226,6 +226,8 @@ static void scsi_qdev_unrealize(DeviceState *qdev, Error **errp) SCSIDevice *scsi_bus_legacy_add_drive(SCSIBus *bus, BlockBackend *blk, int unit, bool removable, int bootindex, bool share_rw, + BlockdevOnError rerror, + BlockdevOnError werror, const char *serial, Error **errp) { const char *driver; @@ -262,6 +264,10 @@ SCSIDevice *scsi_bus_legacy_add_drive(SCSIBus *bus, BlockBackend *blk, object_unparent(OBJECT(dev)); return NULL; } + + qdev_prop_set_enum(dev, "rerror", rerror); + qdev_prop_set_enum(dev, "werror", werror); + object_property_set_bool(OBJECT(dev), true, "realized", &err); if (err != NULL) { error_propagate(errp, err); @@ -285,7 +291,10 @@ void scsi_bus_legacy_handle_cmdline(SCSIBus *bus) } qemu_opts_loc_restore(dinfo->opts); scsi_bus_legacy_add_drive(bus, blk_by_legacy_dinfo(dinfo), - unit, false, -1, false, NULL, &error_fatal); + unit, false, -1, false, + BLOCKDEV_ON_ERROR_AUTO, + BLOCKDEV_ON_ERROR_AUTO, + NULL, &error_fatal); } loc_pop(&loc); } diff --git a/hw/usb/dev-storage.c b/hw/usb/dev-storage.c index c99398b7f6..cd5551d94f 100644 --- a/hw/usb/dev-storage.c +++ b/hw/usb/dev-storage.c @@ -625,6 +625,7 @@ static void usb_msd_storage_realize(USBDevice *dev, Error **errp) &usb_msd_scsi_info_storage, NULL); scsi_dev = scsi_bus_legacy_add_drive(&s->bus, blk, 0, !!s->removable, s->conf.bootindex, s->conf.share_rw, + s->conf.rerror, s->conf.werror, dev->serial, errp); blk_unref(blk); @@ -671,6 +672,7 @@ static const VMStateDescription vmstate_usb_msd = { static Property msd_properties[] = { DEFINE_BLOCK_PROPERTIES(MSDState, conf), + DEFINE_BLOCK_ERROR_PROPERTIES(MSDState, conf), DEFINE_PROP_BIT("removable", MSDState, removable, 0, false), DEFINE_PROP_END_OF_LIST(), }; diff --git a/include/block/block.h b/include/block/block.h index b1d6fdb97a..2ffc1c64c6 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -285,10 +285,6 @@ int bdrv_pwrite(BdrvChild *child, int64_t offset, const void *buf, int bytes); int bdrv_pwritev(BdrvChild *child, int64_t offset, QEMUIOVector *qiov); int bdrv_pwrite_sync(BdrvChild *child, int64_t offset, const void *buf, int count); -int coroutine_fn bdrv_co_readv(BdrvChild *child, int64_t sector_num, - int nb_sectors, QEMUIOVector *qiov); -int coroutine_fn bdrv_co_writev(BdrvChild *child, int64_t sector_num, - int nb_sectors, QEMUIOVector *qiov); /* * Efficiently zero a region of the disk image. Note that this is a regular * I/O request like read or write and should have a reasonable size. This @@ -300,8 +296,12 @@ int coroutine_fn bdrv_co_pwrite_zeroes(BdrvChild *child, int64_t offset, BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs, const char *backing_file); void bdrv_refresh_filename(BlockDriverState *bs); + +int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, + PreallocMode prealloc, Error **errp); int bdrv_truncate(BdrvChild *child, int64_t offset, PreallocMode prealloc, Error **errp); + int64_t bdrv_nb_sectors(BlockDriverState *bs); int64_t bdrv_getlength(BlockDriverState *bs); int64_t bdrv_get_allocated_file_size(BlockDriverState *bs); diff --git a/include/block/block_int.h b/include/block/block_int.h index 74646ed722..af71b414be 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -63,6 +63,7 @@ enum BdrvTrackedRequestType { BDRV_TRACKED_READ, BDRV_TRACKED_WRITE, BDRV_TRACKED_DISCARD, + BDRV_TRACKED_TRUNCATE, }; typedef struct BdrvTrackedRequest { @@ -289,8 +290,8 @@ struct BlockDriver { * bdrv_parse_filename. */ const char *protocol_name; - int (*bdrv_truncate)(BlockDriverState *bs, int64_t offset, - PreallocMode prealloc, Error **errp); + int coroutine_fn (*bdrv_co_truncate)(BlockDriverState *bs, int64_t offset, + PreallocMode prealloc, Error **errp); int64_t (*bdrv_getlength)(BlockDriverState *bs); bool has_variable_length; @@ -1157,4 +1158,6 @@ int coroutine_fn bdrv_co_copy_range_to(BdrvChild *src, uint64_t src_offset, BdrvChild *dst, uint64_t dst_offset, uint64_t bytes, BdrvRequestFlags flags); +int refresh_total_sectors(BlockDriverState *bs, int64_t hint); + #endif /* BLOCK_INT_H */ diff --git a/include/block/raw-aio.h b/include/block/raw-aio.h index 8d698ccd31..6799614e56 100644 --- a/include/block/raw-aio.h +++ b/include/block/raw-aio.h @@ -26,6 +26,7 @@ #define QEMU_AIO_DISCARD 0x0010 #define QEMU_AIO_WRITE_ZEROES 0x0020 #define QEMU_AIO_COPY_RANGE 0x0040 +#define QEMU_AIO_TRUNCATE 0x0080 #define QEMU_AIO_TYPE_MASK \ (QEMU_AIO_READ | \ QEMU_AIO_WRITE | \ @@ -33,7 +34,8 @@ QEMU_AIO_FLUSH | \ QEMU_AIO_DISCARD | \ QEMU_AIO_WRITE_ZEROES | \ - QEMU_AIO_COPY_RANGE) + QEMU_AIO_COPY_RANGE | \ + QEMU_AIO_TRUNCATE) /* AIO flags */ #define QEMU_AIO_MISALIGNED 0x1000 diff --git a/include/hw/scsi/scsi.h b/include/hw/scsi/scsi.h index 21a3a6fec2..ee3a4118fb 100644 --- a/include/hw/scsi/scsi.h +++ b/include/hw/scsi/scsi.h @@ -155,6 +155,8 @@ static inline SCSIBus *scsi_bus_from_device(SCSIDevice *d) SCSIDevice *scsi_bus_legacy_add_drive(SCSIBus *bus, BlockBackend *blk, int unit, bool removable, int bootindex, bool share_rw, + BlockdevOnError rerror, + BlockdevOnError werror, const char *serial, Error **errp); void scsi_bus_legacy_handle_cmdline(SCSIBus *bus); void scsi_legacy_handle_cmdline(void); diff --git a/qapi/job.json b/qapi/job.json index 9d074eb8d2..a121b615fb 100644 --- a/qapi/job.json +++ b/qapi/job.json @@ -104,7 +104,7 @@ # @id: The job identifier # @status: The new job status # -# Since: 2.13 +# Since: 3.0 ## { 'event': 'JOB_STATUS_CHANGE', 'data': { 'id': 'str', @@ -126,7 +126,7 @@ # # @id: The job identifier. # -# Since: 2.13 +# Since: 3.0 ## { 'command': 'job-pause', 'data': { 'id': 'str' } } @@ -140,7 +140,7 @@ # # @id : The job identifier. # -# Since: 2.13 +# Since: 3.0 ## { 'command': 'job-resume', 'data': { 'id': 'str' } } @@ -159,7 +159,7 @@ # # @id: The job identifier. # -# Since: 2.13 +# Since: 3.0 ## { 'command': 'job-cancel', 'data': { 'id': 'str' } } @@ -171,7 +171,7 @@ # # @id: The job identifier. # -# Since: 2.13 +# Since: 3.0 ## { 'command': 'job-complete', 'data': { 'id': 'str' } } @@ -187,7 +187,7 @@ # # @id: The job identifier. # -# Since: 2.13 +# Since: 3.0 ## { 'command': 'job-dismiss', 'data': { 'id': 'str' } } @@ -205,7 +205,7 @@ # @id: The identifier of any job in the transaction, or of a job that is not # part of any transaction. # -# Since: 2.13 +# Since: 3.0 ## { 'command': 'job-finalize', 'data': { 'id': 'str' } } @@ -237,7 +237,7 @@ # the reason for the job failure. It should not be parsed # by applications. # -# Since: 2.13 +# Since: 3.0 ## { 'struct': 'JobInfo', 'data': { 'id': 'str', 'type': 'JobType', 'status': 'JobStatus', @@ -251,6 +251,6 @@ # # Returns: a list with a @JobInfo for each active job # -# Since: 2.13 +# Since: 3.0 ## { 'command': 'query-jobs', 'returns': ['JobInfo'] } diff --git a/qobject/block-qdict.c b/qobject/block-qdict.c index 36129e7379..80c653013f 100644 --- a/qobject/block-qdict.c +++ b/qobject/block-qdict.c @@ -97,7 +97,7 @@ static void qdict_flatten_qdict(QDict *qdict, QDict *target, const char *prefix) const QDictEntry *entry, *next; QDict *dict_val; QList *list_val; - char *new_key; + char *key, *new_key; entry = qdict_first(qdict); @@ -106,10 +106,12 @@ static void qdict_flatten_qdict(QDict *qdict, QDict *target, const char *prefix) value = qdict_entry_value(entry); dict_val = qobject_to(QDict, value); list_val = qobject_to(QList, value); - new_key = NULL; if (prefix) { - new_key = g_strdup_printf("%s.%s", prefix, entry->key); + key = new_key = g_strdup_printf("%s.%s", prefix, entry->key); + } else { + key = entry->key; + new_key = NULL; } /* @@ -125,19 +127,17 @@ static void qdict_flatten_qdict(QDict *qdict, QDict *target, const char *prefix) * well advised not to modify them altogether.) */ if (dict_val && qdict_size(dict_val)) { - qdict_flatten_qdict(dict_val, target, - new_key ? new_key : entry->key); + qdict_flatten_qdict(dict_val, target, key); if (target == qdict) { qdict_del(qdict, entry->key); } } else if (list_val && !qlist_empty(list_val)) { - qdict_flatten_qlist(list_val, target, - new_key ? new_key : entry->key); + qdict_flatten_qlist(list_val, target, key); if (target == qdict) { qdict_del(qdict, entry->key); } } else if (target != qdict) { - qdict_put_obj(target, new_key, qobject_ref(value)); + qdict_put_obj(target, key, qobject_ref(value)); } g_free(new_key); diff --git a/tests/qemu-iotests/026 b/tests/qemu-iotests/026 index 7fadfbace5..582d254195 100755 --- a/tests/qemu-iotests/026 +++ b/tests/qemu-iotests/026 @@ -200,6 +200,23 @@ done done done +echo +echo === Avoid cluster leaks after temporary failure === +echo + +cat > "$TEST_DIR/blkdebug.conf" </dev exit 1 fi +echo "== Regression testing for copy offloading bug ==" + +_make_test_img 1M +TEST_IMG="$TEST_IMG.target" _make_test_img 1M +$QEMU_IO -c 'write -P 1 0 512k' -c 'write -P 2 512k 512k' "$TEST_IMG" | _filter_qemu_io +$QEMU_IO -c 'write -P 4 512k 512k' -c 'write -P 3 0 512k' "$TEST_IMG.target" | _filter_qemu_io +$QEMU_IMG convert -n -O $IMGFMT "$TEST_IMG" "$TEST_IMG.target" +$QEMU_IMG compare "$TEST_IMG" "$TEST_IMG.target" + echo "*** done" rm -f $seq.full status=0 diff --git a/tests/qemu-iotests/063.out b/tests/qemu-iotests/063.out index de1c99afd8..7b691b2c9e 100644 --- a/tests/qemu-iotests/063.out +++ b/tests/qemu-iotests/063.out @@ -7,4 +7,16 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=4194304 No errors were found on the image. == Testing conversion to a smaller file fails == Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=2097152 +== Regression testing for copy offloading bug == +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576 +Formatting 'TEST_DIR/t.IMGFMT.target', fmt=IMGFMT size=1048576 +wrote 524288/524288 bytes at offset 0 +512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 524288/524288 bytes at offset 524288 +512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 524288/524288 bytes at offset 524288 +512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 524288/524288 bytes at offset 0 +512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +Images are identical. *** done