block: add BDRV_REQ_REGISTERED_BUF request flag
Block drivers may optimize I/O requests accessing buffers previously registered with bdrv_register_buf(). Checking whether all elements of a request's QEMUIOVector are within previously registered buffers is expensive, so we need a hint from the user to avoid costly checks. Add a BDRV_REQ_REGISTERED_BUF request flag to indicate that all QEMUIOVector elements in an I/O request are known to be within previously registered buffers. Always pass the flag through to driver read/write functions. There is little harm in passing the flag to a driver that does not use it. Passing the flag to drivers avoids changes across many block drivers. Filter drivers would need to explicitly support the flag and pass through to their children when the children support it. That's a lot of code changes and it's hard to remember to do that everywhere, leading to silent reduced performance when the flag is accidentally dropped. The only problematic scenario with the approach in this patch is when a driver passes the flag through to internal I/O requests that don't use the same I/O buffer. In that case the hint may be set when it should actually be clear. This is a rare case though so the risk is low. Some drivers have assert(!flags), which no longer works when BDRV_REQ_REGISTERED_BUF is passed in. These assertions aren't very useful anyway since the functions are called almost exclusively by bdrv_driver_preadv/pwritev() so if we get flags handling right there then the assertion is not needed. Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> Message-id: 20221013185908.1297568-7-stefanha@redhat.com Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
This commit is contained in:
parent
98b3ddc78b
commit
e8b6535533
14
block.c
14
block.c
@ -1641,6 +1641,20 @@ static int bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv,
|
||||
goto open_failed;
|
||||
}
|
||||
|
||||
assert(!(bs->supported_read_flags & ~BDRV_REQ_MASK));
|
||||
assert(!(bs->supported_write_flags & ~BDRV_REQ_MASK));
|
||||
|
||||
/*
|
||||
* Always allow the BDRV_REQ_REGISTERED_BUF optimization hint. This saves
|
||||
* drivers that pass read/write requests through to a child the trouble of
|
||||
* declaring support explicitly.
|
||||
*
|
||||
* Drivers must not propagate this flag accidentally when they initiate I/O
|
||||
* to a bounce buffer. That case should be rare though.
|
||||
*/
|
||||
bs->supported_read_flags |= BDRV_REQ_REGISTERED_BUF;
|
||||
bs->supported_write_flags |= BDRV_REQ_REGISTERED_BUF;
|
||||
|
||||
ret = refresh_total_sectors(bs, bs->total_sectors);
|
||||
if (ret < 0) {
|
||||
error_setg_errno(errp, -ret, "Could not refresh total sector count");
|
||||
|
@ -235,8 +235,8 @@ blkverify_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
|
||||
qemu_iovec_init(&raw_qiov, qiov->niov);
|
||||
qemu_iovec_clone(&raw_qiov, qiov, buf);
|
||||
|
||||
ret = blkverify_co_prwv(bs, &r, offset, bytes, qiov, &raw_qiov, flags,
|
||||
false);
|
||||
ret = blkverify_co_prwv(bs, &r, offset, bytes, qiov, &raw_qiov,
|
||||
flags & ~BDRV_REQ_REGISTERED_BUF, false);
|
||||
|
||||
cmp_offset = qemu_iovec_compare(qiov, &raw_qiov);
|
||||
if (cmp_offset != -1) {
|
||||
|
@ -410,7 +410,6 @@ block_crypto_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
|
||||
uint64_t sector_size = qcrypto_block_get_sector_size(crypto->block);
|
||||
uint64_t payload_offset = qcrypto_block_get_payload_offset(crypto->block);
|
||||
|
||||
assert(!flags);
|
||||
assert(payload_offset < INT64_MAX);
|
||||
assert(QEMU_IS_ALIGNED(offset, sector_size));
|
||||
assert(QEMU_IS_ALIGNED(bytes, sector_size));
|
||||
@ -473,7 +472,8 @@ block_crypto_co_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes,
|
||||
uint64_t sector_size = qcrypto_block_get_sector_size(crypto->block);
|
||||
uint64_t payload_offset = qcrypto_block_get_payload_offset(crypto->block);
|
||||
|
||||
assert(!(flags & ~BDRV_REQ_FUA));
|
||||
flags &= ~BDRV_REQ_REGISTERED_BUF;
|
||||
|
||||
assert(payload_offset < INT64_MAX);
|
||||
assert(QEMU_IS_ALIGNED(offset, sector_size));
|
||||
assert(QEMU_IS_ALIGNED(bytes, sector_size));
|
||||
|
@ -2133,7 +2133,6 @@ static int coroutine_fn raw_co_pwritev(BlockDriverState *bs, int64_t offset,
|
||||
int64_t bytes, QEMUIOVector *qiov,
|
||||
BdrvRequestFlags flags)
|
||||
{
|
||||
assert(flags == 0);
|
||||
return raw_co_prw(bs, offset, bytes, qiov, QEMU_AIO_WRITE);
|
||||
}
|
||||
|
||||
|
@ -1236,7 +1236,6 @@ static coroutine_fn int qemu_gluster_co_writev(BlockDriverState *bs,
|
||||
QEMUIOVector *qiov,
|
||||
int flags)
|
||||
{
|
||||
assert(!flags);
|
||||
return qemu_gluster_co_rw(bs, sector_num, nb_sectors, qiov, 1);
|
||||
}
|
||||
|
||||
|
61
block/io.c
61
block/io.c
@ -1130,8 +1130,7 @@ static int coroutine_fn bdrv_driver_preadv(BlockDriverState *bs,
|
||||
int ret;
|
||||
|
||||
bdrv_check_qiov_request(offset, bytes, qiov, qiov_offset, &error_abort);
|
||||
assert(!(flags & ~BDRV_REQ_MASK));
|
||||
assert(!(flags & BDRV_REQ_NO_FALLBACK));
|
||||
assert(!(flags & ~bs->supported_read_flags));
|
||||
|
||||
if (!drv) {
|
||||
return -ENOMEDIUM;
|
||||
@ -1195,23 +1194,29 @@ static int coroutine_fn bdrv_driver_pwritev(BlockDriverState *bs,
|
||||
BdrvRequestFlags flags)
|
||||
{
|
||||
BlockDriver *drv = bs->drv;
|
||||
bool emulate_fua = false;
|
||||
int64_t sector_num;
|
||||
unsigned int nb_sectors;
|
||||
QEMUIOVector local_qiov;
|
||||
int ret;
|
||||
|
||||
bdrv_check_qiov_request(offset, bytes, qiov, qiov_offset, &error_abort);
|
||||
assert(!(flags & ~BDRV_REQ_MASK));
|
||||
assert(!(flags & BDRV_REQ_NO_FALLBACK));
|
||||
|
||||
if (!drv) {
|
||||
return -ENOMEDIUM;
|
||||
}
|
||||
|
||||
if ((flags & BDRV_REQ_FUA) &&
|
||||
(~bs->supported_write_flags & BDRV_REQ_FUA)) {
|
||||
flags &= ~BDRV_REQ_FUA;
|
||||
emulate_fua = true;
|
||||
}
|
||||
|
||||
flags &= bs->supported_write_flags;
|
||||
|
||||
if (drv->bdrv_co_pwritev_part) {
|
||||
ret = drv->bdrv_co_pwritev_part(bs, offset, bytes, qiov, qiov_offset,
|
||||
flags & bs->supported_write_flags);
|
||||
flags &= ~bs->supported_write_flags;
|
||||
flags);
|
||||
goto emulate_flags;
|
||||
}
|
||||
|
||||
@ -1221,9 +1226,7 @@ static int coroutine_fn bdrv_driver_pwritev(BlockDriverState *bs,
|
||||
}
|
||||
|
||||
if (drv->bdrv_co_pwritev) {
|
||||
ret = drv->bdrv_co_pwritev(bs, offset, bytes, qiov,
|
||||
flags & bs->supported_write_flags);
|
||||
flags &= ~bs->supported_write_flags;
|
||||
ret = drv->bdrv_co_pwritev(bs, offset, bytes, qiov, flags);
|
||||
goto emulate_flags;
|
||||
}
|
||||
|
||||
@ -1233,10 +1236,8 @@ static int coroutine_fn bdrv_driver_pwritev(BlockDriverState *bs,
|
||||
.coroutine = qemu_coroutine_self(),
|
||||
};
|
||||
|
||||
acb = drv->bdrv_aio_pwritev(bs, offset, bytes, qiov,
|
||||
flags & bs->supported_write_flags,
|
||||
acb = drv->bdrv_aio_pwritev(bs, offset, bytes, qiov, flags,
|
||||
bdrv_co_io_em_complete, &co);
|
||||
flags &= ~bs->supported_write_flags;
|
||||
if (acb == NULL) {
|
||||
ret = -EIO;
|
||||
} else {
|
||||
@ -1254,12 +1255,10 @@ static int coroutine_fn bdrv_driver_pwritev(BlockDriverState *bs,
|
||||
assert(bytes <= BDRV_REQUEST_MAX_BYTES);
|
||||
|
||||
assert(drv->bdrv_co_writev);
|
||||
ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov,
|
||||
flags & bs->supported_write_flags);
|
||||
flags &= ~bs->supported_write_flags;
|
||||
ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov, flags);
|
||||
|
||||
emulate_flags:
|
||||
if (ret == 0 && (flags & BDRV_REQ_FUA)) {
|
||||
if (ret == 0 && emulate_fua) {
|
||||
ret = bdrv_co_flush(bs);
|
||||
}
|
||||
|
||||
@ -1487,11 +1486,14 @@ static int coroutine_fn bdrv_aligned_preadv(BdrvChild *child,
|
||||
max_transfer = QEMU_ALIGN_DOWN(MIN_NON_ZERO(bs->bl.max_transfer, INT_MAX),
|
||||
align);
|
||||
|
||||
/* TODO: We would need a per-BDS .supported_read_flags and
|
||||
/*
|
||||
* TODO: We would need a per-BDS .supported_read_flags and
|
||||
* potential fallback support, if we ever implement any read flags
|
||||
* to pass through to drivers. For now, there aren't any
|
||||
* passthrough flags. */
|
||||
assert(!(flags & ~(BDRV_REQ_COPY_ON_READ | BDRV_REQ_PREFETCH)));
|
||||
* passthrough flags except the BDRV_REQ_REGISTERED_BUF optimization hint.
|
||||
*/
|
||||
assert(!(flags & ~(BDRV_REQ_COPY_ON_READ | BDRV_REQ_PREFETCH |
|
||||
BDRV_REQ_REGISTERED_BUF)));
|
||||
|
||||
/* Handle Copy on Read and associated serialisation */
|
||||
if (flags & BDRV_REQ_COPY_ON_READ) {
|
||||
@ -1532,7 +1534,7 @@ static int coroutine_fn bdrv_aligned_preadv(BdrvChild *child,
|
||||
goto out;
|
||||
}
|
||||
|
||||
assert(!(flags & ~bs->supported_read_flags));
|
||||
assert(!(flags & ~(bs->supported_read_flags | BDRV_REQ_REGISTERED_BUF)));
|
||||
|
||||
max_bytes = ROUND_UP(MAX(0, total_bytes - offset), align);
|
||||
if (bytes <= max_bytes && bytes <= max_transfer) {
|
||||
@ -1721,7 +1723,8 @@ static void bdrv_padding_destroy(BdrvRequestPadding *pad)
|
||||
static int bdrv_pad_request(BlockDriverState *bs,
|
||||
QEMUIOVector **qiov, size_t *qiov_offset,
|
||||
int64_t *offset, int64_t *bytes,
|
||||
BdrvRequestPadding *pad, bool *padded)
|
||||
BdrvRequestPadding *pad, bool *padded,
|
||||
BdrvRequestFlags *flags)
|
||||
{
|
||||
int ret;
|
||||
|
||||
@ -1749,6 +1752,10 @@ static int bdrv_pad_request(BlockDriverState *bs,
|
||||
if (padded) {
|
||||
*padded = true;
|
||||
}
|
||||
if (flags) {
|
||||
/* Can't use optimization hint with bounce buffer */
|
||||
*flags &= ~BDRV_REQ_REGISTERED_BUF;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -1803,7 +1810,7 @@ int coroutine_fn bdrv_co_preadv_part(BdrvChild *child,
|
||||
}
|
||||
|
||||
ret = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, &pad,
|
||||
NULL);
|
||||
NULL, &flags);
|
||||
if (ret < 0) {
|
||||
goto fail;
|
||||
}
|
||||
@ -1848,6 +1855,11 @@ static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
|
||||
return -ENOTSUP;
|
||||
}
|
||||
|
||||
/* By definition there is no user buffer so this flag doesn't make sense */
|
||||
if (flags & BDRV_REQ_REGISTERED_BUF) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* Invalidate the cached block-status data range if this write overlaps */
|
||||
bdrv_bsc_invalidate_range(bs, offset, bytes);
|
||||
|
||||
@ -2133,6 +2145,9 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BdrvChild *child,
|
||||
bool padding;
|
||||
BdrvRequestPadding pad;
|
||||
|
||||
/* This flag doesn't make sense for padding or zero writes */
|
||||
flags &= ~BDRV_REQ_REGISTERED_BUF;
|
||||
|
||||
padding = bdrv_init_padding(bs, offset, bytes, &pad);
|
||||
if (padding) {
|
||||
assert(!(flags & BDRV_REQ_NO_WAIT));
|
||||
@ -2250,7 +2265,7 @@ int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child,
|
||||
* alignment only if there is no ZERO flag.
|
||||
*/
|
||||
ret = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, &pad,
|
||||
&padded);
|
||||
&padded, &flags);
|
||||
if (ret < 0) {
|
||||
return ret;
|
||||
}
|
||||
|
@ -1486,6 +1486,8 @@ static int coroutine_fn bdrv_mirror_top_pwritev(BlockDriverState *bs,
|
||||
qemu_iovec_init(&bounce_qiov, 1);
|
||||
qemu_iovec_add(&bounce_qiov, bounce_buf, bytes);
|
||||
qiov = &bounce_qiov;
|
||||
|
||||
flags &= ~BDRV_REQ_REGISTERED_BUF;
|
||||
}
|
||||
|
||||
ret = bdrv_mirror_top_do_write(bs, MIRROR_METHOD_COPY, offset, bytes, qiov,
|
||||
|
@ -1222,7 +1222,6 @@ static int coroutine_fn nbd_client_co_preadv(BlockDriverState *bs, int64_t offse
|
||||
};
|
||||
|
||||
assert(bytes <= NBD_MAX_BUFFER_SIZE);
|
||||
assert(!flags);
|
||||
|
||||
if (!bytes) {
|
||||
return 0;
|
||||
|
@ -329,7 +329,6 @@ static coroutine_fn int parallels_co_writev(BlockDriverState *bs,
|
||||
QEMUIOVector hd_qiov;
|
||||
int ret = 0;
|
||||
|
||||
assert(!flags);
|
||||
qemu_iovec_init(&hd_qiov, qiov->niov);
|
||||
|
||||
while (nb_sectors > 0) {
|
||||
|
@ -628,7 +628,6 @@ static coroutine_fn int qcow_co_preadv(BlockDriverState *bs, int64_t offset,
|
||||
uint8_t *buf;
|
||||
void *orig_buf;
|
||||
|
||||
assert(!flags);
|
||||
if (qiov->niov > 1) {
|
||||
buf = orig_buf = qemu_try_blockalign(bs, qiov->size);
|
||||
if (buf == NULL) {
|
||||
@ -725,7 +724,6 @@ static coroutine_fn int qcow_co_pwritev(BlockDriverState *bs, int64_t offset,
|
||||
uint8_t *buf;
|
||||
void *orig_buf;
|
||||
|
||||
assert(!flags);
|
||||
s->cluster_cache_offset = -1; /* disable compressed cache */
|
||||
|
||||
/* We must always copy the iov when encrypting, so we
|
||||
|
@ -1395,7 +1395,6 @@ static int coroutine_fn bdrv_qed_co_writev(BlockDriverState *bs,
|
||||
int64_t sector_num, int nb_sectors,
|
||||
QEMUIOVector *qiov, int flags)
|
||||
{
|
||||
assert(!flags);
|
||||
return qed_co_request(bs, sector_num, qiov, nb_sectors, QED_AIOCB_WRITE);
|
||||
}
|
||||
|
||||
|
@ -258,6 +258,8 @@ static int coroutine_fn raw_co_pwritev(BlockDriverState *bs, int64_t offset,
|
||||
qemu_iovec_add(&local_qiov, buf, 512);
|
||||
qemu_iovec_concat(&local_qiov, qiov, 512, qiov->size - 512);
|
||||
qiov = &local_qiov;
|
||||
|
||||
flags &= ~BDRV_REQ_REGISTERED_BUF;
|
||||
}
|
||||
|
||||
ret = raw_adjust_offset(bs, &offset, bytes, true);
|
||||
|
@ -261,7 +261,6 @@ static coroutine_fn int replication_co_writev(BlockDriverState *bs,
|
||||
int ret;
|
||||
int64_t n;
|
||||
|
||||
assert(!flags);
|
||||
ret = replication_get_io_status(s);
|
||||
if (ret < 0) {
|
||||
goto out;
|
||||
|
@ -1196,7 +1196,6 @@ static coroutine_fn int ssh_co_writev(BlockDriverState *bs,
|
||||
BDRVSSHState *s = bs->opaque;
|
||||
int ret;
|
||||
|
||||
assert(!flags);
|
||||
qemu_co_mutex_lock(&s->lock);
|
||||
ret = ssh_write(s, bs, sector_num * BDRV_SECTOR_SIZE,
|
||||
nb_sectors * BDRV_SECTOR_SIZE, qiov);
|
||||
|
@ -1342,7 +1342,6 @@ static coroutine_fn int vhdx_co_writev(BlockDriverState *bs, int64_t sector_num,
|
||||
uint64_t bat_prior_offset = 0;
|
||||
bool bat_update = false;
|
||||
|
||||
assert(!flags);
|
||||
qemu_iovec_init(&hd_qiov, qiov->niov);
|
||||
|
||||
qemu_co_mutex_lock(&s->lock);
|
||||
|
@ -80,6 +80,15 @@ typedef enum {
|
||||
*/
|
||||
BDRV_REQ_MAY_UNMAP = 0x4,
|
||||
|
||||
/*
|
||||
* An optimization hint when all QEMUIOVector elements are within
|
||||
* previously registered bdrv_register_buf() memory ranges.
|
||||
*
|
||||
* Code that replaces the user's QEMUIOVector elements with bounce buffers
|
||||
* must take care to clear this flag.
|
||||
*/
|
||||
BDRV_REQ_REGISTERED_BUF = 0x8,
|
||||
|
||||
BDRV_REQ_FUA = 0x10,
|
||||
BDRV_REQ_WRITE_COMPRESSED = 0x20,
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user