Block patches:
- One patch to make qcow2's discard-no-unref option do better what it is supposed to do (i.e. prevent fragmentation) - Two fixes for zoned requests -----BEGIN PGP SIGNATURE----- iQJGBAABCAAwFiEEy2LXoO44KeRfAE00ofpA0JgBnN8FAmVJHbgSHGhyZWl0ekBy ZWRoYXQuY29tAAoJEKH6QNCYAZzfLn4QAKxuUYZaXirv6K4U2tW4aAJtc5uESdwv WYhG7YU7MleBGCY0fRoih5thrPrzRLC8o1QhbRcA36+/PAZf4BYrJEfqLUdzuN5x 6Vb1n3NRUzPD1+VfL/B9hVZhFbtTOUZuxPGEqCoHAmqBaeKuYRT1bLZbtRtPVLSk 5eTMiyrpRMlBWc7O71eGKLqU4k0vAznwHBGf2Z93qWAsKcRZCwbAWYa7Q6rJ9jJ8 1jNsQuAk0p74/uGEpFhoEVrFEcV6pMbI4+jB9i0t9YYxT0tLIdIX1VUx+AHJfItk IF2stB6SFOaAy2W3Fn+0oJvz40aMLzg9VjEeTpGmdlKC67ZTYa6Obwzy5WNLPIap k7VUheUEe8qoKUtxQNxGLR/HKEJSFXyhU0lgAGxE1gl2xc1QFFFsrimpwFd3d37j 3PwfhjARHonf4ZXgsvtIjb7nG9seMZYO7Vht0OztJyW8c2XN5OFVPir9xLbd9VUg wZNGB8jAsHgj77+S/mRIwpP+laKL8wB7zYZ1mgFI98QJIYqL8tGdV/IiUhLljHzc XAmwekOhBMMbgHhliBy9zDuTy59+zZ0FoxZPn/JvBjqBAkEnz9EbhHxi2imQg+1d XSoLbx1X1yEbepWz8mCGiveLIPkt+3qMJuuQF76nURaA+nm3tCl/nKca6QLnVKzU 2QtPWS0qRmwd =5w7S -----END PGP SIGNATURE----- Merge tag 'pull-block-2023-11-06' of https://gitlab.com/hreitz/qemu into staging Block patches: - One patch to make qcow2's discard-no-unref option do better what it is supposed to do (i.e. prevent fragmentation) - Two fixes for zoned requests # -----BEGIN PGP SIGNATURE----- # # iQJGBAABCAAwFiEEy2LXoO44KeRfAE00ofpA0JgBnN8FAmVJHbgSHGhyZWl0ekBy # ZWRoYXQuY29tAAoJEKH6QNCYAZzfLn4QAKxuUYZaXirv6K4U2tW4aAJtc5uESdwv # WYhG7YU7MleBGCY0fRoih5thrPrzRLC8o1QhbRcA36+/PAZf4BYrJEfqLUdzuN5x # 6Vb1n3NRUzPD1+VfL/B9hVZhFbtTOUZuxPGEqCoHAmqBaeKuYRT1bLZbtRtPVLSk # 5eTMiyrpRMlBWc7O71eGKLqU4k0vAznwHBGf2Z93qWAsKcRZCwbAWYa7Q6rJ9jJ8 # 1jNsQuAk0p74/uGEpFhoEVrFEcV6pMbI4+jB9i0t9YYxT0tLIdIX1VUx+AHJfItk # IF2stB6SFOaAy2W3Fn+0oJvz40aMLzg9VjEeTpGmdlKC67ZTYa6Obwzy5WNLPIap # k7VUheUEe8qoKUtxQNxGLR/HKEJSFXyhU0lgAGxE1gl2xc1QFFFsrimpwFd3d37j # 3PwfhjARHonf4ZXgsvtIjb7nG9seMZYO7Vht0OztJyW8c2XN5OFVPir9xLbd9VUg # wZNGB8jAsHgj77+S/mRIwpP+laKL8wB7zYZ1mgFI98QJIYqL8tGdV/IiUhLljHzc # XAmwekOhBMMbgHhliBy9zDuTy59+zZ0FoxZPn/JvBjqBAkEnz9EbhHxi2imQg+1d # XSoLbx1X1yEbepWz8mCGiveLIPkt+3qMJuuQF76nURaA+nm3tCl/nKca6QLnVKzU # 2QtPWS0qRmwd # =5w7S # -----END PGP SIGNATURE----- # gpg: Signature made Tue 07 Nov 2023 01:09:12 HKT # gpg: using RSA key CB62D7A0EE3829E45F004D34A1FA40D098019CDF # gpg: issuer "hreitz@redhat.com" # gpg: Good signature from "Hanna Reitz <hreitz@redhat.com>" [unknown] # gpg: WARNING: The key's User ID is not certified with a trusted signature! # gpg: There is no indication that the signature belongs to the owner. # Primary key fingerprint: CB62 D7A0 EE38 29E4 5F00 4D34 A1FA 40D0 9801 9CDF * tag 'pull-block-2023-11-06' of https://gitlab.com/hreitz/qemu: file-posix: fix over-writing of returning zone_append offset block/file-posix: fix update_zones_wp() caller qcow2: keep reference on zeroize with discard-no-unref enabled Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
This commit is contained in:
commit
80aaef96b1
@ -160,7 +160,6 @@ typedef struct BDRVRawState {
|
||||
bool has_write_zeroes:1;
|
||||
bool use_linux_aio:1;
|
||||
bool use_linux_io_uring:1;
|
||||
int64_t *offset; /* offset of zone append operation */
|
||||
int page_cache_inconsistent; /* errno from fdatasync failure */
|
||||
bool has_fallocate;
|
||||
bool needs_alignment;
|
||||
@ -2445,12 +2444,13 @@ static bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov)
|
||||
return true;
|
||||
}
|
||||
|
||||
static int coroutine_fn raw_co_prw(BlockDriverState *bs, uint64_t offset,
|
||||
static int coroutine_fn raw_co_prw(BlockDriverState *bs, int64_t *offset_ptr,
|
||||
uint64_t bytes, QEMUIOVector *qiov, int type)
|
||||
{
|
||||
BDRVRawState *s = bs->opaque;
|
||||
RawPosixAIOData acb;
|
||||
int ret;
|
||||
uint64_t offset = *offset_ptr;
|
||||
|
||||
if (fd_open(bs) < 0)
|
||||
return -EIO;
|
||||
@ -2513,8 +2513,8 @@ out:
|
||||
uint64_t *wp = &wps->wp[offset / bs->bl.zone_size];
|
||||
if (!BDRV_ZT_IS_CONV(*wp)) {
|
||||
if (type & QEMU_AIO_ZONE_APPEND) {
|
||||
*s->offset = *wp;
|
||||
trace_zbd_zone_append_complete(bs, *s->offset
|
||||
*offset_ptr = *wp;
|
||||
trace_zbd_zone_append_complete(bs, *offset_ptr
|
||||
>> BDRV_SECTOR_BITS);
|
||||
}
|
||||
/* Advance the wp if needed */
|
||||
@ -2523,7 +2523,10 @@ out:
|
||||
}
|
||||
}
|
||||
} else {
|
||||
update_zones_wp(bs, s->fd, 0, 1);
|
||||
/*
|
||||
* write and append write are not allowed to cross zone boundaries
|
||||
*/
|
||||
update_zones_wp(bs, s->fd, offset, 1);
|
||||
}
|
||||
|
||||
qemu_co_mutex_unlock(&wps->colock);
|
||||
@ -2536,14 +2539,14 @@ static int coroutine_fn raw_co_preadv(BlockDriverState *bs, int64_t offset,
|
||||
int64_t bytes, QEMUIOVector *qiov,
|
||||
BdrvRequestFlags flags)
|
||||
{
|
||||
return raw_co_prw(bs, offset, bytes, qiov, QEMU_AIO_READ);
|
||||
return raw_co_prw(bs, &offset, bytes, qiov, QEMU_AIO_READ);
|
||||
}
|
||||
|
||||
static int coroutine_fn raw_co_pwritev(BlockDriverState *bs, int64_t offset,
|
||||
int64_t bytes, QEMUIOVector *qiov,
|
||||
BdrvRequestFlags flags)
|
||||
{
|
||||
return raw_co_prw(bs, offset, bytes, qiov, QEMU_AIO_WRITE);
|
||||
return raw_co_prw(bs, &offset, bytes, qiov, QEMU_AIO_WRITE);
|
||||
}
|
||||
|
||||
static int coroutine_fn raw_co_flush_to_disk(BlockDriverState *bs)
|
||||
@ -3470,7 +3473,7 @@ static int coroutine_fn raw_co_zone_mgmt(BlockDriverState *bs, BlockZoneOp op,
|
||||
len >> BDRV_SECTOR_BITS);
|
||||
ret = raw_thread_pool_submit(handle_aiocb_zone_mgmt, &acb);
|
||||
if (ret != 0) {
|
||||
update_zones_wp(bs, s->fd, offset, i);
|
||||
update_zones_wp(bs, s->fd, offset, nrz);
|
||||
error_report("ioctl %s failed %d", op_name, ret);
|
||||
return ret;
|
||||
}
|
||||
@ -3506,8 +3509,6 @@ static int coroutine_fn raw_co_zone_append(BlockDriverState *bs,
|
||||
int64_t zone_size_mask = bs->bl.zone_size - 1;
|
||||
int64_t iov_len = 0;
|
||||
int64_t len = 0;
|
||||
BDRVRawState *s = bs->opaque;
|
||||
s->offset = offset;
|
||||
|
||||
if (*offset & zone_size_mask) {
|
||||
error_report("sector offset %" PRId64 " is not aligned to zone size "
|
||||
@ -3528,7 +3529,7 @@ static int coroutine_fn raw_co_zone_append(BlockDriverState *bs,
|
||||
}
|
||||
|
||||
trace_zbd_zone_append(bs, *offset >> BDRV_SECTOR_BITS);
|
||||
return raw_co_prw(bs, *offset, len, qiov, QEMU_AIO_ZONE_APPEND);
|
||||
return raw_co_prw(bs, offset, len, qiov, QEMU_AIO_ZONE_APPEND);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -1983,7 +1983,7 @@ discard_in_l2_slice(BlockDriverState *bs, uint64_t offset, uint64_t nb_clusters,
|
||||
/* If we keep the reference, pass on the discard still */
|
||||
bdrv_pdiscard(s->data_file, old_l2_entry & L2E_OFFSET_MASK,
|
||||
s->cluster_size);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice);
|
||||
@ -2061,9 +2061,15 @@ zero_in_l2_slice(BlockDriverState *bs, uint64_t offset,
|
||||
QCow2ClusterType type = qcow2_get_cluster_type(bs, old_l2_entry);
|
||||
bool unmap = (type == QCOW2_CLUSTER_COMPRESSED) ||
|
||||
((flags & BDRV_REQ_MAY_UNMAP) && qcow2_cluster_is_allocated(type));
|
||||
uint64_t new_l2_entry = unmap ? 0 : old_l2_entry;
|
||||
bool keep_reference =
|
||||
(s->discard_no_unref && type != QCOW2_CLUSTER_COMPRESSED);
|
||||
uint64_t new_l2_entry = old_l2_entry;
|
||||
uint64_t new_l2_bitmap = old_l2_bitmap;
|
||||
|
||||
if (unmap && !keep_reference) {
|
||||
new_l2_entry = 0;
|
||||
}
|
||||
|
||||
if (has_subclusters(s)) {
|
||||
new_l2_bitmap = QCOW_L2_BITMAP_ALL_ZEROES;
|
||||
} else {
|
||||
@ -2081,9 +2087,17 @@ zero_in_l2_slice(BlockDriverState *bs, uint64_t offset,
|
||||
set_l2_bitmap(s, l2_slice, l2_index + i, new_l2_bitmap);
|
||||
}
|
||||
|
||||
/* Then decrease the refcount */
|
||||
if (unmap) {
|
||||
qcow2_free_any_cluster(bs, old_l2_entry, QCOW2_DISCARD_REQUEST);
|
||||
if (!keep_reference) {
|
||||
/* Then decrease the refcount */
|
||||
qcow2_free_any_cluster(bs, old_l2_entry, QCOW2_DISCARD_REQUEST);
|
||||
} else if (s->discard_passthrough[QCOW2_DISCARD_REQUEST] &&
|
||||
(type == QCOW2_CLUSTER_NORMAL ||
|
||||
type == QCOW2_CLUSTER_ZERO_ALLOC)) {
|
||||
/* If we keep the reference, pass on the discard still */
|
||||
bdrv_pdiscard(s->data_file, old_l2_entry & L2E_OFFSET_MASK,
|
||||
s->cluster_size);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -3528,16 +3528,20 @@
|
||||
# @pass-discard-other: whether discard requests for the data source
|
||||
# should be issued on other occasions where a cluster gets freed
|
||||
#
|
||||
# @discard-no-unref: when enabled, discards from the guest will not
|
||||
# cause cluster allocations to be relinquished. This prevents
|
||||
# qcow2 fragmentation that would be caused by such discards.
|
||||
# Besides potential performance degradation, such fragmentation
|
||||
# can lead to increased allocation of clusters past the end of the
|
||||
# image file, resulting in image files whose file length can grow
|
||||
# much larger than their guest disk size would suggest. If image
|
||||
# file length is of concern (e.g. when storing qcow2 images
|
||||
# directly on block devices), you should consider enabling this
|
||||
# option. (since 8.1)
|
||||
# @discard-no-unref: when enabled, data clusters will remain
|
||||
# preallocated when they are no longer used, e.g. because they are
|
||||
# discarded or converted to zero clusters. As usual, whether the
|
||||
# old data is discarded or kept on the protocol level (i.e. in the
|
||||
# image file) depends on the setting of the pass-discard-request
|
||||
# option. Keeping the clusters preallocated prevents qcow2
|
||||
# fragmentation that would otherwise be caused by freeing and
|
||||
# re-allocating them later. Besides potential performance
|
||||
# degradation, such fragmentation can lead to increased allocation
|
||||
# of clusters past the end of the image file, resulting in image
|
||||
# files whose file length can grow much larger than their guest disk
|
||||
# size would suggest. If image file length is of concern (e.g. when
|
||||
# storing qcow2 images directly on block devices), you should
|
||||
# consider enabling this option. (since 8.1)
|
||||
#
|
||||
# @overlap-check: which overlap checks to perform for writes to the
|
||||
# image, defaults to 'cached' (since 2.2)
|
||||
|
@ -1457,9 +1457,13 @@ SRST
|
||||
(on/off; default: off)
|
||||
|
||||
``discard-no-unref``
|
||||
When enabled, discards from the guest will not cause cluster
|
||||
allocations to be relinquished. This prevents qcow2 fragmentation
|
||||
that would be caused by such discards. Besides potential
|
||||
When enabled, data clusters will remain preallocated when they are
|
||||
no longer used, e.g. because they are discarded or converted to
|
||||
zero clusters. As usual, whether the old data is discarded or kept
|
||||
on the protocol level (i.e. in the image file) depends on the
|
||||
setting of the pass-discard-request option. Keeping the clusters
|
||||
preallocated prevents qcow2 fragmentation that would otherwise be
|
||||
caused by freeing and re-allocating them later. Besides potential
|
||||
performance degradation, such fragmentation can lead to increased
|
||||
allocation of clusters past the end of the image file,
|
||||
resulting in image files whose file length can grow much larger
|
||||
|
Loading…
Reference in New Issue
Block a user