Block layer patches

-----BEGIN PGP SIGNATURE-----
 Version: GnuPG v2.0.22 (GNU/Linux)
 
 iQIcBAABAgAGBQJXV9fqAAoJEH8JsnLIjy/W3/MQAKImbFe+Qm0eHzABKA9rLqRN
 N6RsDgU0/qRs5wOzfsnX5QCIt82cawPWdRL/Ke0JagMJ/A4mV8TsJ7zHk4pgFSyg
 yjhVUqFZy4J+BSkqaFrHbEgyXCDsa6b7Xg30Mrxm4OF5t4Nj96KejzWcyXz6QTGb
 zPwLVCsHlItPPql3w1IUZPMIYaDZbpu+VgULJZ5D4T5RKv7H4i3wi/Nuw4x6XH3E
 zA8Ky4Ws17wM17rUYr3LlcWfULLC8EAPYRYjS6Jii1eknKDQqQ/iQVYbQNJ2gqrs
 /lYygzCOWBGZCAONRnODSO7666CI9ofWhwdyl4gJ85k8YzrZw/ckVmVH3dFYU4u3
 2JVaiZ1PwjgwRuSznsSaK7NhbpPpCbvOuC7Ry3wKlnUgyy1VrhVMlSU0rmVA260k
 2bhnMFnNbf2GsBnibdZCbSAoO5Lb7BJZ29mk+CiP7BnWq1iijfZAm+k+lIOQ3LxD
 ahorWzU3HYcK/ZZAWttPvyhD/n4rbNnodp+Fl7QRc5AaavPRIdfwGpmf0hst4bXa
 xQ+spQWFSfXPpc37qcNbGTdJh9d7NmypSiTeIUeYoSydca9emlQ+M26bSB0lZsCv
 ZzysFz2+UvCoJCwpZXnPL8os7GLCbClDhfLyONfqgcir6cRUIV6TdT5HgwDymG9r
 1M9AfPY5xP73Tx+7voC9
 =joMD
 -----END PGP SIGNATURE-----

Merge remote-tracking branch 'remotes/kevin/tags/for-upstream' into staging

Block layer patches

# gpg: Signature made Wed 08 Jun 2016 09:31:38 BST
# gpg:                using RSA key 0x7F09B272C88F2FD6
# gpg: Good signature from "Kevin Wolf <kwolf@redhat.com>"

* remotes/kevin/tags/for-upstream: (31 commits)
  qemu-img bench: Add --flush-interval
  qemu-img bench: Implement -S (step size)
  qemu-img bench: Make start offset configurable
  qemu-img bench: Sequential writes
  qemu-img bench
  block: Don't emulate natively supported pwritev flags
  blockdev: clean up error handling in do_open_tray
  block: Fix bdrv_all_delete_snapshot() error handling
  qcow2: avoid extra flushes in qcow2
  raw-posix: Fetch max sectors for host block device
  block: assert that bs->request_alignment is a power of 2
  migration/block: Convert saving to BlockBackend
  migration/block: Convert load to BlockBackend
  block: Kill bdrv_co_write_zeroes()
  vmdk: Convert to bdrv_co_pwrite_zeroes()
  raw_bsd: Convert to bdrv_co_pwrite_zeroes()
  raw-posix: Convert to bdrv_co_pwrite_zeroes()
  qed: Convert to bdrv_co_pwrite_zeroes()
  gluster: Convert to bdrv_co_pwrite_zeroes()
  blkreplay: Convert to bdrv_co_pwrite_zeroes()
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
Peter Maydell 2016-06-08 17:17:16 +01:00
commit 6f50f25c82
26 changed files with 831 additions and 303 deletions

View File

@ -1018,7 +1018,7 @@ static int bdrv_open_common(BlockDriverState *bs, BdrvChild *file,
assert(bdrv_opt_mem_align(bs) != 0);
assert(bdrv_min_mem_align(bs) != 0);
assert((bs->request_alignment != 0) || bdrv_is_sg(bs));
assert(is_power_of_2(bs->request_alignment) || bdrv_is_sg(bs));
qemu_opts_del(opts);
return 0;

View File

@ -103,11 +103,11 @@ static int coroutine_fn blkreplay_co_writev(BlockDriverState *bs,
return ret;
}
static int coroutine_fn blkreplay_co_write_zeroes(BlockDriverState *bs,
int64_t sector_num, int nb_sectors, BdrvRequestFlags flags)
static int coroutine_fn blkreplay_co_pwrite_zeroes(BlockDriverState *bs,
int64_t offset, int count, BdrvRequestFlags flags)
{
uint64_t reqid = request_id++;
int ret = bdrv_co_write_zeroes(bs->file->bs, sector_num, nb_sectors, flags);
int ret = bdrv_co_pwrite_zeroes(bs->file->bs, offset, count, flags);
block_request_create(reqid, bs, qemu_coroutine_self());
qemu_coroutine_yield();
@ -147,7 +147,7 @@ static BlockDriver bdrv_blkreplay = {
.bdrv_co_readv = blkreplay_co_readv,
.bdrv_co_writev = blkreplay_co_writev,
.bdrv_co_write_zeroes = blkreplay_co_write_zeroes,
.bdrv_co_pwrite_zeroes = blkreplay_co_pwrite_zeroes,
.bdrv_co_discard = blkreplay_co_discard,
.bdrv_co_flush = blkreplay_co_flush,
};

View File

@ -454,14 +454,12 @@ static void qemu_gluster_reopen_abort(BDRVReopenState *state)
}
#ifdef CONFIG_GLUSTERFS_ZEROFILL
static coroutine_fn int qemu_gluster_co_write_zeroes(BlockDriverState *bs,
int64_t sector_num, int nb_sectors, BdrvRequestFlags flags)
static coroutine_fn int qemu_gluster_co_pwrite_zeroes(BlockDriverState *bs,
int64_t offset, int size, BdrvRequestFlags flags)
{
int ret;
GlusterAIOCB acb;
BDRVGlusterState *s = bs->opaque;
off_t size = nb_sectors * BDRV_SECTOR_SIZE;
off_t offset = sector_num * BDRV_SECTOR_SIZE;
acb.size = size;
acb.ret = 0;
@ -769,7 +767,7 @@ static BlockDriver bdrv_gluster = {
.bdrv_co_discard = qemu_gluster_co_discard,
#endif
#ifdef CONFIG_GLUSTERFS_ZEROFILL
.bdrv_co_write_zeroes = qemu_gluster_co_write_zeroes,
.bdrv_co_pwrite_zeroes = qemu_gluster_co_pwrite_zeroes,
#endif
.create_opts = &qemu_gluster_create_opts,
};
@ -796,7 +794,7 @@ static BlockDriver bdrv_gluster_tcp = {
.bdrv_co_discard = qemu_gluster_co_discard,
#endif
#ifdef CONFIG_GLUSTERFS_ZEROFILL
.bdrv_co_write_zeroes = qemu_gluster_co_write_zeroes,
.bdrv_co_pwrite_zeroes = qemu_gluster_co_pwrite_zeroes,
#endif
.create_opts = &qemu_gluster_create_opts,
};
@ -823,7 +821,7 @@ static BlockDriver bdrv_gluster_unix = {
.bdrv_co_discard = qemu_gluster_co_discard,
#endif
#ifdef CONFIG_GLUSTERFS_ZEROFILL
.bdrv_co_write_zeroes = qemu_gluster_co_write_zeroes,
.bdrv_co_pwrite_zeroes = qemu_gluster_co_pwrite_zeroes,
#endif
.create_opts = &qemu_gluster_create_opts,
};
@ -850,7 +848,7 @@ static BlockDriver bdrv_gluster_rdma = {
.bdrv_co_discard = qemu_gluster_co_discard,
#endif
#ifdef CONFIG_GLUSTERFS_ZEROFILL
.bdrv_co_write_zeroes = qemu_gluster_co_write_zeroes,
.bdrv_co_pwrite_zeroes = qemu_gluster_co_pwrite_zeroes,
#endif
.create_opts = &qemu_gluster_create_opts,
};

View File

@ -42,8 +42,8 @@ static BlockAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
void *opaque,
bool is_write);
static void coroutine_fn bdrv_co_do_rw(void *opaque);
static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
int64_t sector_num, int nb_sectors, BdrvRequestFlags flags);
static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
int64_t offset, int count, BdrvRequestFlags flags);
static void bdrv_parent_drained_begin(BlockDriverState *bs)
{
@ -620,18 +620,25 @@ int bdrv_write(BlockDriverState *bs, int64_t sector_num,
return bdrv_rw_co(bs, sector_num, (uint8_t *)buf, nb_sectors, true, 0);
}
int bdrv_write_zeroes(BlockDriverState *bs, int64_t sector_num,
int nb_sectors, BdrvRequestFlags flags)
int bdrv_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
int count, BdrvRequestFlags flags)
{
return bdrv_rw_co(bs, sector_num, NULL, nb_sectors, true,
BDRV_REQ_ZERO_WRITE | flags);
QEMUIOVector qiov;
struct iovec iov = {
.iov_base = NULL,
.iov_len = count,
};
qemu_iovec_init_external(&qiov, &iov, 1);
return bdrv_prwv_co(bs, offset, &qiov, true,
BDRV_REQ_ZERO_WRITE | flags);
}
/*
* Completely zero out a block device with the help of bdrv_write_zeroes.
* Completely zero out a block device with the help of bdrv_pwrite_zeroes.
* The operation is sped up by checking the block status and only writing
* zeroes to the device if they currently do not return zeroes. Optional
* flags are passed through to bdrv_write_zeroes (e.g. BDRV_REQ_MAY_UNMAP,
* flags are passed through to bdrv_pwrite_zeroes (e.g. BDRV_REQ_MAY_UNMAP,
* BDRV_REQ_FUA).
*
* Returns < 0 on error, 0 on success. For error codes see bdrv_write().
@ -662,7 +669,8 @@ int bdrv_make_zero(BlockDriverState *bs, BdrvRequestFlags flags)
sector_num += n;
continue;
}
ret = bdrv_write_zeroes(bs, sector_num, n, flags);
ret = bdrv_pwrite_zeroes(bs, sector_num << BDRV_SECTOR_BITS,
n << BDRV_SECTOR_BITS, flags);
if (ret < 0) {
error_report("error writing zeroes at sector %" PRId64 ": %s",
sector_num, strerror(-ret));
@ -808,7 +816,9 @@ static int coroutine_fn bdrv_driver_pwritev(BlockDriverState *bs,
int ret;
if (drv->bdrv_co_pwritev) {
ret = drv->bdrv_co_pwritev(bs, offset, bytes, qiov, flags);
ret = drv->bdrv_co_pwritev(bs, offset, bytes, qiov,
flags & bs->supported_write_flags);
flags &= ~bs->supported_write_flags;
goto emulate_flags;
}
@ -893,10 +903,12 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
goto err;
}
if (drv->bdrv_co_write_zeroes &&
if (drv->bdrv_co_pwrite_zeroes &&
buffer_is_zero(bounce_buffer, iov.iov_len)) {
ret = bdrv_co_do_write_zeroes(bs, cluster_sector_num,
cluster_nb_sectors, 0);
ret = bdrv_co_do_pwrite_zeroes(bs,
cluster_sector_num * BDRV_SECTOR_SIZE,
cluster_nb_sectors * BDRV_SECTOR_SIZE,
0);
} else {
/* This does not change the data on the disk, it is not necessary
* to flush even in cache=writethrough mode.
@ -1110,36 +1122,40 @@ int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
#define MAX_WRITE_ZEROES_BOUNCE_BUFFER 32768
static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
int64_t sector_num, int nb_sectors, BdrvRequestFlags flags)
static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
int64_t offset, int count, BdrvRequestFlags flags)
{
BlockDriver *drv = bs->drv;
QEMUIOVector qiov;
struct iovec iov = {0};
int ret = 0;
bool need_flush = false;
int head = 0;
int tail = 0;
int max_write_zeroes = MIN_NON_ZERO(bs->bl.max_write_zeroes,
BDRV_REQUEST_MAX_SECTORS);
int max_write_zeroes = MIN_NON_ZERO(bs->bl.max_pwrite_zeroes, INT_MAX);
int alignment = MAX(bs->bl.pwrite_zeroes_alignment ?: 1,
bs->request_alignment);
while (nb_sectors > 0 && !ret) {
int num = nb_sectors;
assert(is_power_of_2(alignment));
head = offset & (alignment - 1);
tail = (offset + count) & (alignment - 1);
max_write_zeroes &= ~(alignment - 1);
while (count > 0 && !ret) {
int num = count;
/* Align request. Block drivers can expect the "bulk" of the request
* to be aligned.
* to be aligned, and that unaligned requests do not cross cluster
* boundaries.
*/
if (bs->bl.write_zeroes_alignment
&& num > bs->bl.write_zeroes_alignment) {
if (sector_num % bs->bl.write_zeroes_alignment != 0) {
/* Make a small request up to the first aligned sector. */
num = bs->bl.write_zeroes_alignment;
num -= sector_num % bs->bl.write_zeroes_alignment;
} else if ((sector_num + num) % bs->bl.write_zeroes_alignment != 0) {
/* Shorten the request to the last aligned sector. num cannot
* underflow because num > bs->bl.write_zeroes_alignment.
*/
num -= (sector_num + num) % bs->bl.write_zeroes_alignment;
}
if (head) {
/* Make a small request up to the first aligned sector. */
num = MIN(count, alignment - head);
head = 0;
} else if (tail && num > alignment) {
/* Shorten the request to the last aligned sector. */
num -= tail;
}
/* limit request size */
@ -1149,9 +1165,9 @@ static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
ret = -ENOTSUP;
/* First try the efficient write zeroes operation */
if (drv->bdrv_co_write_zeroes) {
ret = drv->bdrv_co_write_zeroes(bs, sector_num, num,
flags & bs->supported_zero_flags);
if (drv->bdrv_co_pwrite_zeroes) {
ret = drv->bdrv_co_pwrite_zeroes(bs, offset, num,
flags & bs->supported_zero_flags);
if (ret != -ENOTSUP && (flags & BDRV_REQ_FUA) &&
!(bs->supported_zero_flags & BDRV_REQ_FUA)) {
need_flush = true;
@ -1173,33 +1189,31 @@ static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
write_flags &= ~BDRV_REQ_FUA;
need_flush = true;
}
num = MIN(num, max_xfer_len);
iov.iov_len = num * BDRV_SECTOR_SIZE;
num = MIN(num, max_xfer_len << BDRV_SECTOR_BITS);
iov.iov_len = num;
if (iov.iov_base == NULL) {
iov.iov_base = qemu_try_blockalign(bs, num * BDRV_SECTOR_SIZE);
iov.iov_base = qemu_try_blockalign(bs, num);
if (iov.iov_base == NULL) {
ret = -ENOMEM;
goto fail;
}
memset(iov.iov_base, 0, num * BDRV_SECTOR_SIZE);
memset(iov.iov_base, 0, num);
}
qemu_iovec_init_external(&qiov, &iov, 1);
ret = bdrv_driver_pwritev(bs, sector_num * BDRV_SECTOR_SIZE,
num * BDRV_SECTOR_SIZE, &qiov,
write_flags);
ret = bdrv_driver_pwritev(bs, offset, num, &qiov, write_flags);
/* Keep bounce buffer around if it is big enough for all
* all future requests.
*/
if (num < max_xfer_len) {
if (num < max_xfer_len << BDRV_SECTOR_BITS) {
qemu_vfree(iov.iov_base);
iov.iov_base = NULL;
}
}
sector_num += num;
nb_sectors -= num;
offset += num;
count -= num;
}
fail:
@ -1237,7 +1251,7 @@ static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs,
ret = notifier_with_return_list_notify(&bs->before_write_notifiers, req);
if (!ret && bs->detect_zeroes != BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF &&
!(flags & BDRV_REQ_ZERO_WRITE) && drv->bdrv_co_write_zeroes &&
!(flags & BDRV_REQ_ZERO_WRITE) && drv->bdrv_co_pwrite_zeroes &&
qemu_iovec_is_zero(qiov)) {
flags |= BDRV_REQ_ZERO_WRITE;
if (bs->detect_zeroes == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP) {
@ -1249,7 +1263,8 @@ static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs,
/* Do nothing, write notifier decided to fail this request */
} else if (flags & BDRV_REQ_ZERO_WRITE) {
bdrv_debug_event(bs, BLKDBG_PWRITEV_ZERO);
ret = bdrv_co_do_write_zeroes(bs, sector_num, nb_sectors, flags);
ret = bdrv_co_do_pwrite_zeroes(bs, sector_num << BDRV_SECTOR_BITS,
nb_sectors << BDRV_SECTOR_BITS, flags);
} else {
bdrv_debug_event(bs, BLKDBG_PWRITEV);
ret = bdrv_driver_pwritev(bs, offset, bytes, qiov, flags);
@ -1510,18 +1525,18 @@ int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
return bdrv_co_do_writev(bs, sector_num, nb_sectors, qiov, 0);
}
int coroutine_fn bdrv_co_write_zeroes(BlockDriverState *bs,
int64_t sector_num, int nb_sectors,
BdrvRequestFlags flags)
int coroutine_fn bdrv_co_pwrite_zeroes(BlockDriverState *bs,
int64_t offset, int count,
BdrvRequestFlags flags)
{
trace_bdrv_co_write_zeroes(bs, sector_num, nb_sectors, flags);
trace_bdrv_co_pwrite_zeroes(bs, offset, count, flags);
if (!(bs->open_flags & BDRV_O_UNMAP)) {
flags &= ~BDRV_REQ_MAY_UNMAP;
}
return bdrv_co_do_writev(bs, sector_num, nb_sectors, NULL,
BDRV_REQ_ZERO_WRITE | flags);
return bdrv_co_pwritev(bs, offset, count, NULL,
BDRV_REQ_ZERO_WRITE | flags);
}
typedef struct BdrvCoGetBlockStatusData {

View File

@ -401,18 +401,26 @@ static int64_t sector_qemu2lun(int64_t sector, IscsiLun *iscsilun)
return sector * BDRV_SECTOR_SIZE / iscsilun->block_size;
}
static bool is_request_lun_aligned(int64_t sector_num, int nb_sectors,
IscsiLun *iscsilun)
static bool is_byte_request_lun_aligned(int64_t offset, int count,
IscsiLun *iscsilun)
{
if ((sector_num * BDRV_SECTOR_SIZE) % iscsilun->block_size ||
(nb_sectors * BDRV_SECTOR_SIZE) % iscsilun->block_size) {
error_report("iSCSI misaligned request: "
"iscsilun->block_size %u, sector_num %" PRIi64
", nb_sectors %d",
iscsilun->block_size, sector_num, nb_sectors);
return 0;
if (offset % iscsilun->block_size || count % iscsilun->block_size) {
error_report("iSCSI misaligned request: "
"iscsilun->block_size %u, offset %" PRIi64
", count %d",
iscsilun->block_size, offset, count);
return false;
}
return 1;
return true;
}
static bool is_sector_request_lun_aligned(int64_t sector_num, int nb_sectors,
IscsiLun *iscsilun)
{
assert(nb_sectors < BDRV_REQUEST_MAX_SECTORS);
return is_byte_request_lun_aligned(sector_num << BDRV_SECTOR_BITS,
nb_sectors << BDRV_SECTOR_BITS,
iscsilun);
}
static unsigned long *iscsi_allocationmap_init(IscsiLun *iscsilun)
@ -461,7 +469,7 @@ iscsi_co_writev_flags(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
if (fua) {
assert(iscsilun->dpofua);
}
if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
if (!is_sector_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
return -EINVAL;
}
@ -541,7 +549,7 @@ static int64_t coroutine_fn iscsi_co_get_block_status(BlockDriverState *bs,
iscsi_co_init_iscsitask(iscsilun, &iTask);
if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
if (!is_sector_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
ret = -EINVAL;
goto out;
}
@ -638,7 +646,7 @@ static int coroutine_fn iscsi_co_readv(BlockDriverState *bs,
uint64_t lba;
uint32_t num_sectors;
if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
if (!is_sector_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
return -EINVAL;
}
@ -926,7 +934,7 @@ coroutine_fn iscsi_co_discard(BlockDriverState *bs, int64_t sector_num,
struct IscsiTask iTask;
struct unmap_list list;
if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
if (!is_sector_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
return -EINVAL;
}
@ -977,8 +985,8 @@ retry:
}
static int
coroutine_fn iscsi_co_write_zeroes(BlockDriverState *bs, int64_t sector_num,
int nb_sectors, BdrvRequestFlags flags)
coroutine_fn iscsi_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
int count, BdrvRequestFlags flags)
{
IscsiLun *iscsilun = bs->opaque;
struct IscsiTask iTask;
@ -986,8 +994,8 @@ coroutine_fn iscsi_co_write_zeroes(BlockDriverState *bs, int64_t sector_num,
uint32_t nb_blocks;
bool use_16_for_ws = iscsilun->use_16_for_rw;
if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
return -EINVAL;
if (!is_byte_request_lun_aligned(offset, count, iscsilun)) {
return -ENOTSUP;
}
if (flags & BDRV_REQ_MAY_UNMAP) {
@ -1008,8 +1016,8 @@ coroutine_fn iscsi_co_write_zeroes(BlockDriverState *bs, int64_t sector_num,
return -ENOTSUP;
}
lba = sector_qemu2lun(sector_num, iscsilun);
nb_blocks = sector_qemu2lun(nb_sectors, iscsilun);
lba = offset / iscsilun->block_size;
nb_blocks = count / iscsilun->block_size;
if (iscsilun->zeroblock == NULL) {
iscsilun->zeroblock = g_try_malloc0(iscsilun->block_size);
@ -1065,9 +1073,11 @@ retry:
}
if (flags & BDRV_REQ_MAY_UNMAP) {
iscsi_allocationmap_clear(iscsilun, sector_num, nb_sectors);
iscsi_allocationmap_clear(iscsilun, offset >> BDRV_SECTOR_BITS,
count >> BDRV_SECTOR_BITS);
} else {
iscsi_allocationmap_set(iscsilun, sector_num, nb_sectors);
iscsi_allocationmap_set(iscsilun, offset >> BDRV_SECTOR_BITS,
count >> BDRV_SECTOR_BITS);
}
return 0;
@ -1711,15 +1721,19 @@ static void iscsi_refresh_limits(BlockDriverState *bs, Error **errp)
}
bs->bl.discard_alignment =
sector_limits_lun2qemu(iscsilun->bl.opt_unmap_gran, iscsilun);
} else {
bs->bl.discard_alignment = iscsilun->block_size >> BDRV_SECTOR_BITS;
}
if (iscsilun->bl.max_ws_len < 0xffffffff) {
bs->bl.max_write_zeroes =
sector_limits_lun2qemu(iscsilun->bl.max_ws_len, iscsilun);
if (iscsilun->bl.max_ws_len < 0xffffffff / iscsilun->block_size) {
bs->bl.max_pwrite_zeroes =
iscsilun->bl.max_ws_len * iscsilun->block_size;
}
if (iscsilun->lbp.lbpws) {
bs->bl.write_zeroes_alignment =
sector_limits_lun2qemu(iscsilun->bl.opt_unmap_gran, iscsilun);
bs->bl.pwrite_zeroes_alignment =
iscsilun->bl.opt_unmap_gran * iscsilun->block_size;
} else {
bs->bl.pwrite_zeroes_alignment = iscsilun->block_size;
}
bs->bl.opt_transfer_length =
sector_limits_lun2qemu(iscsilun->bl.opt_xfer_len, iscsilun);
@ -1852,7 +1866,7 @@ static BlockDriver bdrv_iscsi = {
.bdrv_co_get_block_status = iscsi_co_get_block_status,
.bdrv_co_discard = iscsi_co_discard,
.bdrv_co_write_zeroes = iscsi_co_write_zeroes,
.bdrv_co_pwrite_zeroes = iscsi_co_pwrite_zeroes,
.bdrv_co_readv = iscsi_co_readv,
.bdrv_co_writev_flags = iscsi_co_writev_flags,
.bdrv_co_flush_to_disk = iscsi_co_flush,

View File

@ -210,7 +210,9 @@ static int64_t allocate_clusters(BlockDriverState *bs, int64_t sector_num,
int ret;
space += s->prealloc_size;
if (s->prealloc_mode == PRL_PREALLOC_MODE_FALLOCATE) {
ret = bdrv_write_zeroes(bs->file->bs, s->data_end, space, 0);
ret = bdrv_pwrite_zeroes(bs->file->bs,
s->data_end << BDRV_SECTOR_BITS,
space << BDRV_SECTOR_BITS, 0);
} else {
ret = bdrv_truncate(bs->file->bs,
(s->data_end + space) << BDRV_SECTOR_BITS);

View File

@ -226,7 +226,7 @@ static int qcow2_cache_entry_flush(BlockDriverState *bs, Qcow2Cache *c, int i)
return 0;
}
int qcow2_cache_flush(BlockDriverState *bs, Qcow2Cache *c)
int qcow2_cache_write(BlockDriverState *bs, Qcow2Cache *c)
{
BDRVQcow2State *s = bs->opaque;
int result = 0;
@ -242,8 +242,15 @@ int qcow2_cache_flush(BlockDriverState *bs, Qcow2Cache *c)
}
}
return result;
}
int qcow2_cache_flush(BlockDriverState *bs, Qcow2Cache *c)
{
int result = qcow2_cache_write(bs, c);
if (result == 0) {
ret = bdrv_flush(bs->file->bs);
int ret = bdrv_flush(bs->file->bs);
if (ret < 0) {
result = ret;
}

View File

@ -1765,8 +1765,7 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table,
goto fail;
}
ret = bdrv_write_zeroes(bs->file->bs, offset / BDRV_SECTOR_SIZE,
s->cluster_sectors, 0);
ret = bdrv_pwrite_zeroes(bs->file->bs, offset, s->cluster_size, 0);
if (ret < 0) {
if (!preallocated) {
qcow2_free_clusters(bs, offset, s->cluster_size,

View File

@ -1193,7 +1193,7 @@ static void qcow2_refresh_limits(BlockDriverState *bs, Error **errp)
{
BDRVQcow2State *s = bs->opaque;
bs->bl.write_zeroes_alignment = s->cluster_sectors;
bs->bl.pwrite_zeroes_alignment = s->cluster_size;
}
static int qcow2_set_key(BlockDriverState *bs, const char *key)
@ -2406,65 +2406,55 @@ finish:
}
static bool is_zero_cluster(BlockDriverState *bs, int64_t start)
static bool is_zero_sectors(BlockDriverState *bs, int64_t start,
uint32_t count)
{
BDRVQcow2State *s = bs->opaque;
int nr;
BlockDriverState *file;
int64_t res = bdrv_get_block_status_above(bs, NULL, start,
s->cluster_sectors, &nr, &file);
return res >= 0 && (res & BDRV_BLOCK_ZERO) && nr == s->cluster_sectors;
int64_t res;
if (!count) {
return true;
}
res = bdrv_get_block_status_above(bs, NULL, start, count,
&nr, &file);
return res >= 0 && (res & BDRV_BLOCK_ZERO) && nr == count;
}
static bool is_zero_cluster_top_locked(BlockDriverState *bs, int64_t start)
{
BDRVQcow2State *s = bs->opaque;
int nr = s->cluster_sectors;
uint64_t off;
int ret;
ret = qcow2_get_cluster_offset(bs, start << BDRV_SECTOR_BITS, &nr, &off);
assert(nr == s->cluster_sectors);
return ret == QCOW2_CLUSTER_UNALLOCATED || ret == QCOW2_CLUSTER_ZERO;
}
static coroutine_fn int qcow2_co_write_zeroes(BlockDriverState *bs,
int64_t sector_num, int nb_sectors, BdrvRequestFlags flags)
static coroutine_fn int qcow2_co_pwrite_zeroes(BlockDriverState *bs,
int64_t offset, int count, BdrvRequestFlags flags)
{
int ret;
BDRVQcow2State *s = bs->opaque;
int head = sector_num % s->cluster_sectors;
int tail = (sector_num + nb_sectors) % s->cluster_sectors;
uint32_t head = offset % s->cluster_size;
uint32_t tail = (offset + count) % s->cluster_size;
if (head != 0 || tail != 0) {
int64_t cl_end = -1;
trace_qcow2_pwrite_zeroes_start_req(qemu_coroutine_self(), offset, count);
sector_num -= head;
nb_sectors += head;
if (head || tail) {
int64_t cl_start = (offset - head) >> BDRV_SECTOR_BITS;
uint64_t off;
int nr;
if (tail != 0) {
nb_sectors += s->cluster_sectors - tail;
}
assert(head + count <= s->cluster_size);
if (!is_zero_cluster(bs, sector_num)) {
/* check whether remainder of cluster already reads as zero */
if (!(is_zero_sectors(bs, cl_start,
DIV_ROUND_UP(head, BDRV_SECTOR_SIZE)) &&
is_zero_sectors(bs, (offset + count) >> BDRV_SECTOR_BITS,
DIV_ROUND_UP(-tail & (s->cluster_size - 1),
BDRV_SECTOR_SIZE)))) {
return -ENOTSUP;
}
if (nb_sectors > s->cluster_sectors) {
/* Technically the request can cover 2 clusters, f.e. 4k write
at s->cluster_sectors - 2k offset. One of these cluster can
be zeroed, one unallocated */
cl_end = sector_num + nb_sectors - s->cluster_sectors;
if (!is_zero_cluster(bs, cl_end)) {
return -ENOTSUP;
}
}
qemu_co_mutex_lock(&s->lock);
/* We can have new write after previous check */
if (!is_zero_cluster_top_locked(bs, sector_num) ||
(cl_end > 0 && !is_zero_cluster_top_locked(bs, cl_end))) {
offset = cl_start << BDRV_SECTOR_BITS;
count = s->cluster_size;
nr = s->cluster_sectors;
ret = qcow2_get_cluster_offset(bs, offset, &nr, &off);
if (ret != QCOW2_CLUSTER_UNALLOCATED && ret != QCOW2_CLUSTER_ZERO) {
qemu_co_mutex_unlock(&s->lock);
return -ENOTSUP;
}
@ -2472,8 +2462,10 @@ static coroutine_fn int qcow2_co_write_zeroes(BlockDriverState *bs,
qemu_co_mutex_lock(&s->lock);
}
trace_qcow2_pwrite_zeroes(qemu_coroutine_self(), offset, count);
/* Whatever is left can use real zero clusters */
ret = qcow2_zero_clusters(bs, sector_num << BDRV_SECTOR_BITS, nb_sectors);
ret = qcow2_zero_clusters(bs, offset, count >> BDRV_SECTOR_BITS);
qemu_co_mutex_unlock(&s->lock);
return ret;
@ -2664,8 +2656,8 @@ static int make_completely_empty(BlockDriverState *bs)
/* After this call, neither the in-memory nor the on-disk refcount
* information accurately describe the actual references */
ret = bdrv_write_zeroes(bs->file->bs, s->l1_table_offset / BDRV_SECTOR_SIZE,
l1_clusters * s->cluster_sectors, 0);
ret = bdrv_pwrite_zeroes(bs->file->bs, s->l1_table_offset,
l1_clusters * s->cluster_size, 0);
if (ret < 0) {
goto fail_broken_refcounts;
}
@ -2678,9 +2670,8 @@ static int make_completely_empty(BlockDriverState *bs)
* overwrite parts of the existing refcount and L1 table, which is not
* an issue because the dirty flag is set, complete data loss is in fact
* desired and partial data loss is consequently fine as well */
ret = bdrv_write_zeroes(bs->file->bs, s->cluster_size / BDRV_SECTOR_SIZE,
(2 + l1_clusters) * s->cluster_size /
BDRV_SECTOR_SIZE, 0);
ret = bdrv_pwrite_zeroes(bs->file->bs, s->cluster_size,
(2 + l1_clusters) * s->cluster_size, 0);
/* This call (even if it failed overall) may have overwritten on-disk
* refcount structures; in that case, the in-memory refcount information
* will probably differ from the on-disk information which makes the BDS
@ -2822,14 +2813,14 @@ static coroutine_fn int qcow2_co_flush_to_os(BlockDriverState *bs)
int ret;
qemu_co_mutex_lock(&s->lock);
ret = qcow2_cache_flush(bs, s->l2_table_cache);
ret = qcow2_cache_write(bs, s->l2_table_cache);
if (ret < 0) {
qemu_co_mutex_unlock(&s->lock);
return ret;
}
if (qcow2_need_accurate_refcounts(s)) {
ret = qcow2_cache_flush(bs, s->refcount_block_cache);
ret = qcow2_cache_write(bs, s->refcount_block_cache);
if (ret < 0) {
qemu_co_mutex_unlock(&s->lock);
return ret;
@ -3381,7 +3372,7 @@ BlockDriver bdrv_qcow2 = {
.bdrv_co_writev = qcow2_co_writev,
.bdrv_co_flush_to_os = qcow2_co_flush_to_os,
.bdrv_co_write_zeroes = qcow2_co_write_zeroes,
.bdrv_co_pwrite_zeroes = qcow2_co_pwrite_zeroes,
.bdrv_co_discard = qcow2_co_discard,
.bdrv_truncate = qcow2_truncate,
.bdrv_write_compressed = qcow2_write_compressed,

View File

@ -583,6 +583,7 @@ int qcow2_cache_destroy(BlockDriverState* bs, Qcow2Cache *c);
void qcow2_cache_entry_mark_dirty(BlockDriverState *bs, Qcow2Cache *c,
void *table);
int qcow2_cache_flush(BlockDriverState *bs, Qcow2Cache *c);
int qcow2_cache_write(BlockDriverState *bs, Qcow2Cache *c);
int qcow2_cache_set_dependency(BlockDriverState *bs, Qcow2Cache *c,
Qcow2Cache *dependency);
void qcow2_cache_depends_on_flush(Qcow2Cache *c);

View File

@ -517,7 +517,7 @@ static void bdrv_qed_refresh_limits(BlockDriverState *bs, Error **errp)
{
BDRVQEDState *s = bs->opaque;
bs->bl.write_zeroes_alignment = s->header.cluster_size >> BDRV_SECTOR_BITS;
bs->bl.pwrite_zeroes_alignment = s->header.cluster_size;
}
/* We have nothing to do for QED reopen, stubs just return
@ -1418,7 +1418,7 @@ typedef struct {
bool done;
} QEDWriteZeroesCB;
static void coroutine_fn qed_co_write_zeroes_cb(void *opaque, int ret)
static void coroutine_fn qed_co_pwrite_zeroes_cb(void *opaque, int ret)
{
QEDWriteZeroesCB *cb = opaque;
@ -1429,10 +1429,10 @@ static void coroutine_fn qed_co_write_zeroes_cb(void *opaque, int ret)
}
}
static int coroutine_fn bdrv_qed_co_write_zeroes(BlockDriverState *bs,
int64_t sector_num,
int nb_sectors,
BdrvRequestFlags flags)
static int coroutine_fn bdrv_qed_co_pwrite_zeroes(BlockDriverState *bs,
int64_t offset,
int count,
BdrvRequestFlags flags)
{
BlockAIOCB *blockacb;
BDRVQEDState *s = bs->opaque;
@ -1440,25 +1440,22 @@ static int coroutine_fn bdrv_qed_co_write_zeroes(BlockDriverState *bs,
QEMUIOVector qiov;
struct iovec iov;
/* Refuse if there are untouched backing file sectors */
if (bs->backing) {
if (qed_offset_into_cluster(s, sector_num * BDRV_SECTOR_SIZE) != 0) {
return -ENOTSUP;
}
if (qed_offset_into_cluster(s, nb_sectors * BDRV_SECTOR_SIZE) != 0) {
return -ENOTSUP;
}
/* Fall back if the request is not aligned */
if (qed_offset_into_cluster(s, offset) ||
qed_offset_into_cluster(s, count)) {
return -ENOTSUP;
}
/* Zero writes start without an I/O buffer. If a buffer becomes necessary
* then it will be allocated during request processing.
*/
iov.iov_base = NULL,
iov.iov_len = nb_sectors * BDRV_SECTOR_SIZE,
iov.iov_base = NULL;
iov.iov_len = count;
qemu_iovec_init_external(&qiov, &iov, 1);
blockacb = qed_aio_setup(bs, sector_num, &qiov, nb_sectors,
qed_co_write_zeroes_cb, &cb,
blockacb = qed_aio_setup(bs, offset >> BDRV_SECTOR_BITS, &qiov,
count >> BDRV_SECTOR_BITS,
qed_co_pwrite_zeroes_cb, &cb,
QED_AIOCB_WRITE | QED_AIOCB_ZERO);
if (!blockacb) {
return -EIO;
@ -1663,7 +1660,7 @@ static BlockDriver bdrv_qed = {
.bdrv_co_get_block_status = bdrv_qed_co_get_block_status,
.bdrv_aio_readv = bdrv_qed_aio_readv,
.bdrv_aio_writev = bdrv_qed_aio_writev,
.bdrv_co_write_zeroes = bdrv_qed_co_write_zeroes,
.bdrv_co_pwrite_zeroes = bdrv_qed_co_pwrite_zeroes,
.bdrv_truncate = bdrv_qed_truncate,
.bdrv_getlength = bdrv_qed_getlength,
.bdrv_get_info = bdrv_qed_get_info,

View File

@ -729,9 +729,33 @@ static void raw_reopen_abort(BDRVReopenState *state)
state->opaque = NULL;
}
static int hdev_get_max_transfer_length(int fd)
{
#ifdef BLKSECTGET
int max_sectors = 0;
if (ioctl(fd, BLKSECTGET, &max_sectors) == 0) {
return max_sectors;
} else {
return -errno;
}
#else
return -ENOSYS;
#endif
}
static void raw_refresh_limits(BlockDriverState *bs, Error **errp)
{
BDRVRawState *s = bs->opaque;
struct stat st;
if (!fstat(s->fd, &st)) {
if (S_ISBLK(st.st_mode)) {
int ret = hdev_get_max_transfer_length(s->fd);
if (ret >= 0) {
bs->bl.max_transfer_length = ret;
}
}
}
raw_probe_alignment(bs, s->fd, errp);
bs->bl.min_mem_alignment = s->buf_align;
@ -1252,8 +1276,8 @@ static int aio_worker(void *arg)
}
static int paio_submit_co(BlockDriverState *bs, int fd,
int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
int type)
int64_t offset, QEMUIOVector *qiov,
int count, int type)
{
RawPosixAIOData *acb = g_new(RawPosixAIOData, 1);
ThreadPool *pool;
@ -1262,16 +1286,16 @@ static int paio_submit_co(BlockDriverState *bs, int fd,
acb->aio_type = type;
acb->aio_fildes = fd;
acb->aio_nbytes = nb_sectors * BDRV_SECTOR_SIZE;
acb->aio_offset = sector_num * BDRV_SECTOR_SIZE;
acb->aio_nbytes = count;
acb->aio_offset = offset;
if (qiov) {
acb->aio_iov = qiov->iov;
acb->aio_niov = qiov->niov;
assert(qiov->size == acb->aio_nbytes);
assert(qiov->size == count);
}
trace_paio_submit_co(sector_num, nb_sectors, type);
trace_paio_submit_co(offset, count, type);
pool = aio_get_thread_pool(bdrv_get_aio_context(bs));
return thread_pool_submit_co(pool, aio_worker, acb);
}
@ -1868,17 +1892,17 @@ static coroutine_fn BlockAIOCB *raw_aio_discard(BlockDriverState *bs,
cb, opaque, QEMU_AIO_DISCARD);
}
static int coroutine_fn raw_co_write_zeroes(
BlockDriverState *bs, int64_t sector_num,
int nb_sectors, BdrvRequestFlags flags)
static int coroutine_fn raw_co_pwrite_zeroes(
BlockDriverState *bs, int64_t offset,
int count, BdrvRequestFlags flags)
{
BDRVRawState *s = bs->opaque;
if (!(flags & BDRV_REQ_MAY_UNMAP)) {
return paio_submit_co(bs, s->fd, sector_num, NULL, nb_sectors,
return paio_submit_co(bs, s->fd, offset, NULL, count,
QEMU_AIO_WRITE_ZEROES);
} else if (s->discard_zeroes) {
return paio_submit_co(bs, s->fd, sector_num, NULL, nb_sectors,
return paio_submit_co(bs, s->fd, offset, NULL, count,
QEMU_AIO_DISCARD);
}
return -ENOTSUP;
@ -1931,7 +1955,7 @@ BlockDriver bdrv_file = {
.bdrv_create = raw_create,
.bdrv_has_zero_init = bdrv_has_zero_init_1,
.bdrv_co_get_block_status = raw_co_get_block_status,
.bdrv_co_write_zeroes = raw_co_write_zeroes,
.bdrv_co_pwrite_zeroes = raw_co_pwrite_zeroes,
.bdrv_aio_readv = raw_aio_readv,
.bdrv_aio_writev = raw_aio_writev,
@ -2293,8 +2317,8 @@ static coroutine_fn BlockAIOCB *hdev_aio_discard(BlockDriverState *bs,
cb, opaque, QEMU_AIO_DISCARD|QEMU_AIO_BLKDEV);
}
static coroutine_fn int hdev_co_write_zeroes(BlockDriverState *bs,
int64_t sector_num, int nb_sectors, BdrvRequestFlags flags)
static coroutine_fn int hdev_co_pwrite_zeroes(BlockDriverState *bs,
int64_t offset, int count, BdrvRequestFlags flags)
{
BDRVRawState *s = bs->opaque;
int rc;
@ -2304,10 +2328,10 @@ static coroutine_fn int hdev_co_write_zeroes(BlockDriverState *bs,
return rc;
}
if (!(flags & BDRV_REQ_MAY_UNMAP)) {
return paio_submit_co(bs, s->fd, sector_num, NULL, nb_sectors,
return paio_submit_co(bs, s->fd, offset, NULL, count,
QEMU_AIO_WRITE_ZEROES|QEMU_AIO_BLKDEV);
} else if (s->discard_zeroes) {
return paio_submit_co(bs, s->fd, sector_num, NULL, nb_sectors,
return paio_submit_co(bs, s->fd, offset, NULL, count,
QEMU_AIO_DISCARD|QEMU_AIO_BLKDEV);
}
return -ENOTSUP;
@ -2379,7 +2403,7 @@ static BlockDriver bdrv_host_device = {
.bdrv_reopen_abort = raw_reopen_abort,
.bdrv_create = hdev_create,
.create_opts = &raw_create_opts,
.bdrv_co_write_zeroes = hdev_co_write_zeroes,
.bdrv_co_pwrite_zeroes = hdev_co_pwrite_zeroes,
.bdrv_aio_readv = raw_aio_readv,
.bdrv_aio_writev = raw_aio_writev,

View File

@ -127,11 +127,11 @@ static int64_t coroutine_fn raw_co_get_block_status(BlockDriverState *bs,
(sector_num << BDRV_SECTOR_BITS);
}
static int coroutine_fn raw_co_write_zeroes(BlockDriverState *bs,
int64_t sector_num, int nb_sectors,
BdrvRequestFlags flags)
static int coroutine_fn raw_co_pwrite_zeroes(BlockDriverState *bs,
int64_t offset, int count,
BdrvRequestFlags flags)
{
return bdrv_co_write_zeroes(bs->file->bs, sector_num, nb_sectors, flags);
return bdrv_co_pwrite_zeroes(bs->file->bs, offset, count, flags);
}
static int coroutine_fn raw_co_discard(BlockDriverState *bs,
@ -252,7 +252,7 @@ BlockDriver bdrv_raw = {
.bdrv_create = &raw_create,
.bdrv_co_readv = &raw_co_readv,
.bdrv_co_writev_flags = &raw_co_writev_flags,
.bdrv_co_write_zeroes = &raw_co_write_zeroes,
.bdrv_co_pwrite_zeroes = &raw_co_pwrite_zeroes,
.bdrv_co_discard = &raw_co_discard,
.bdrv_co_get_block_status = &raw_co_get_block_status,
.bdrv_truncate = &raw_truncate,

View File

@ -409,9 +409,6 @@ int bdrv_all_delete_snapshot(const char *name, BlockDriverState **first_bad_bs,
if (bdrv_can_snapshot(bs) &&
bdrv_snapshot_find(bs, snapshot, name) >= 0) {
ret = bdrv_snapshot_delete_by_id_or_name(bs, name, err);
if (ret < 0) {
goto fail;
}
}
aio_context_release(ctx);
if (ret < 0) {

View File

@ -997,9 +997,9 @@ static void vmdk_refresh_limits(BlockDriverState *bs, Error **errp)
for (i = 0; i < s->num_extents; i++) {
if (!s->extents[i].flat) {
bs->bl.write_zeroes_alignment =
MAX(bs->bl.write_zeroes_alignment,
s->extents[i].cluster_sectors);
bs->bl.pwrite_zeroes_alignment =
MAX(bs->bl.pwrite_zeroes_alignment,
s->extents[i].cluster_sectors << BDRV_SECTOR_BITS);
}
}
}
@ -1703,15 +1703,13 @@ static int vmdk_write_compressed(BlockDriverState *bs,
}
}
static int coroutine_fn vmdk_co_write_zeroes(BlockDriverState *bs,
int64_t sector_num,
int nb_sectors,
BdrvRequestFlags flags)
static int coroutine_fn vmdk_co_pwrite_zeroes(BlockDriverState *bs,
int64_t offset,
int bytes,
BdrvRequestFlags flags)
{
int ret;
BDRVVmdkState *s = bs->opaque;
uint64_t offset = sector_num * BDRV_SECTOR_SIZE;
uint64_t bytes = nb_sectors * BDRV_SECTOR_SIZE;
qemu_co_mutex_lock(&s->lock);
/* write zeroes could fail if sectors not aligned to cluster, test it with
@ -2402,7 +2400,7 @@ static BlockDriver bdrv_vmdk = {
.bdrv_co_preadv = vmdk_co_preadv,
.bdrv_co_pwritev = vmdk_co_pwritev,
.bdrv_write_compressed = vmdk_write_compressed,
.bdrv_co_write_zeroes = vmdk_co_write_zeroes,
.bdrv_co_pwrite_zeroes = vmdk_co_pwrite_zeroes,
.bdrv_close = vmdk_close,
.bdrv_create = vmdk_create,
.bdrv_co_flush_to_disk = vmdk_co_flush,

View File

@ -56,6 +56,8 @@
static QTAILQ_HEAD(, BlockDriverState) monitor_bdrv_states =
QTAILQ_HEAD_INITIALIZER(monitor_bdrv_states);
static int do_open_tray(const char *device, bool force, Error **errp);
static const char *const if_name[IF_COUNT] = {
[IF_NONE] = "none",
[IF_IDE] = "ide",
@ -2274,8 +2276,6 @@ exit:
block_job_txn_unref(block_job_txn);
}
static int do_open_tray(const char *device, bool force, Error **errp);
void qmp_eject(const char *device, bool has_force, bool force, Error **errp)
{
Error *local_err = NULL;
@ -2286,16 +2286,11 @@ void qmp_eject(const char *device, bool has_force, bool force, Error **errp)
}
rc = do_open_tray(device, force, &local_err);
if (local_err) {
if (rc && rc != -ENOSYS) {
error_propagate(errp, local_err);
return;
}
if (rc == EINPROGRESS) {
error_setg(errp, "Device '%s' is locked and force was not specified, "
"wait for tray to open and try again", device);
return;
}
error_free(local_err);
qmp_x_blockdev_remove_medium(device, errp);
}
@ -2324,11 +2319,16 @@ void qmp_block_passwd(bool has_device, const char *device,
aio_context_release(aio_context);
}
/**
* returns -errno on fatal error, +errno for non-fatal situations.
* errp will always be set when the return code is negative.
* May return +ENOSYS if the device has no tray,
* or +EINPROGRESS if the tray is locked and the guest has been notified.
/*
* Attempt to open the tray of @device.
* If @force, ignore its tray lock.
* Else, if the tray is locked, don't open it, but ask the guest to open it.
* On error, store an error through @errp and return -errno.
* If @device does not exist, return -ENODEV.
* If it has no removable media, return -ENOTSUP.
* If it has no tray, return -ENOSYS.
* If the guest was asked to open the tray, return -EINPROGRESS.
* Else, return 0.
*/
static int do_open_tray(const char *device, bool force, Error **errp)
{
@ -2348,8 +2348,8 @@ static int do_open_tray(const char *device, bool force, Error **errp)
}
if (!blk_dev_has_tray(blk)) {
/* Ignore this command on tray-less devices */
return ENOSYS;
error_setg(errp, "Device '%s' does not have a tray", device);
return -ENOSYS;
}
if (blk_dev_is_tray_open(blk)) {
@ -2366,7 +2366,9 @@ static int do_open_tray(const char *device, bool force, Error **errp)
}
if (locked && !force) {
return EINPROGRESS;
error_setg(errp, "Device '%s' is locked and force was not specified, "
"wait for tray to open and try again", device);
return -EINPROGRESS;
}
return 0;
@ -2375,10 +2377,18 @@ static int do_open_tray(const char *device, bool force, Error **errp)
void qmp_blockdev_open_tray(const char *device, bool has_force, bool force,
Error **errp)
{
Error *local_err = NULL;
int rc;
if (!has_force) {
force = false;
}
do_open_tray(device, force, errp);
rc = do_open_tray(device, force, &local_err);
if (rc && rc != -ENOSYS && rc != -EINPROGRESS) {
error_propagate(errp, local_err);
return;
}
error_free(local_err);
}
void qmp_blockdev_close_tray(const char *device, Error **errp)

View File

@ -33,7 +33,7 @@ typedef struct BlockDriverInfo {
* True if the driver can optimize writing zeroes by unmapping
* sectors. This is equivalent to the BLKDISCARDZEROES ioctl in Linux
* with the difference that in qemu a discard is allowed to silently
* fail. Therefore we have to use bdrv_write_zeroes with the
* fail. Therefore we have to use bdrv_pwrite_zeroes with the
* BDRV_REQ_MAY_UNMAP flag for an optimized zero write with unmapping.
* After this call the driver has to guarantee that the contents read
* back as zero. It is additionally required that the block device is
@ -227,8 +227,8 @@ int bdrv_read(BlockDriverState *bs, int64_t sector_num,
uint8_t *buf, int nb_sectors);
int bdrv_write(BlockDriverState *bs, int64_t sector_num,
const uint8_t *buf, int nb_sectors);
int bdrv_write_zeroes(BlockDriverState *bs, int64_t sector_num,
int nb_sectors, BdrvRequestFlags flags);
int bdrv_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
int count, BdrvRequestFlags flags);
int bdrv_make_zero(BlockDriverState *bs, BdrvRequestFlags flags);
int bdrv_pread(BlockDriverState *bs, int64_t offset,
void *buf, int count);
@ -247,8 +247,8 @@ int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
* function is not suitable for zeroing the entire image in a single request
* because it may allocate memory for the entire region.
*/
int coroutine_fn bdrv_co_write_zeroes(BlockDriverState *bs, int64_t sector_num,
int nb_sectors, BdrvRequestFlags flags);
int coroutine_fn bdrv_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
int count, BdrvRequestFlags flags);
BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
const char *backing_file);
int bdrv_get_backing_file_depth(BlockDriverState *bs);

View File

@ -163,8 +163,8 @@ struct BlockDriver {
* function pointer may be NULL or return -ENOSUP and .bdrv_co_writev()
* will be called instead.
*/
int coroutine_fn (*bdrv_co_write_zeroes)(BlockDriverState *bs,
int64_t sector_num, int nb_sectors, BdrvRequestFlags flags);
int coroutine_fn (*bdrv_co_pwrite_zeroes)(BlockDriverState *bs,
int64_t offset, int count, BdrvRequestFlags flags);
int coroutine_fn (*bdrv_co_discard)(BlockDriverState *bs,
int64_t sector_num, int nb_sectors);
int64_t coroutine_fn (*bdrv_co_get_block_status)(BlockDriverState *bs,
@ -328,11 +328,13 @@ typedef struct BlockLimits {
/* optimal alignment for discard requests in sectors */
int64_t discard_alignment;
/* maximum number of sectors that can zeroized at once */
int max_write_zeroes;
/* maximum number of bytes that can zeroized at once (since it is
* signed, it must be < 2G, if set) */
int32_t max_pwrite_zeroes;
/* optimal alignment for write zeroes requests in sectors */
int64_t write_zeroes_alignment;
/* optimal alignment for write zeroes requests in bytes, must be
* power of 2, and less than max_pwrite_zeroes if that is set */
uint32_t pwrite_zeroes_alignment;
/* optimal transfer length in sectors */
int opt_transfer_length;
@ -454,7 +456,7 @@ struct BlockDriverState {
unsigned int request_alignment;
/* Flags honored during pwrite (so far: BDRV_REQ_FUA) */
unsigned int supported_write_flags;
/* Flags honored during write_zeroes (so far: BDRV_REQ_FUA,
/* Flags honored during pwrite_zeroes (so far: BDRV_REQ_FUA,
* BDRV_REQ_MAY_UNMAP) */
unsigned int supported_zero_flags;

View File

@ -52,7 +52,8 @@
typedef struct BlkMigDevState {
/* Written during setup phase. Can be read without a lock. */
BlockDriverState *bs;
BlockBackend *blk;
char *blk_name;
int shared_base;
int64_t total_sectors;
QSIMPLEQ_ENTRY(BlkMigDevState) entry;
@ -145,9 +146,9 @@ static void blk_send(QEMUFile *f, BlkMigBlock * blk)
| flags);
/* device name */
len = strlen(bdrv_get_device_name(blk->bmds->bs));
len = strlen(blk->bmds->blk_name);
qemu_put_byte(f, len);
qemu_put_buffer(f, (uint8_t *)bdrv_get_device_name(blk->bmds->bs), len);
qemu_put_buffer(f, (uint8_t *) blk->bmds->blk_name, len);
/* if a block is zero we need to flush here since the network
* bandwidth is now a lot higher than the storage device bandwidth.
@ -201,7 +202,7 @@ static int bmds_aio_inflight(BlkMigDevState *bmds, int64_t sector)
{
int64_t chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK;
if (sector < bdrv_nb_sectors(bmds->bs)) {
if (sector < blk_nb_sectors(bmds->blk)) {
return !!(bmds->aio_bitmap[chunk / (sizeof(unsigned long) * 8)] &
(1UL << (chunk % (sizeof(unsigned long) * 8))));
} else {
@ -235,10 +236,10 @@ static void bmds_set_aio_inflight(BlkMigDevState *bmds, int64_t sector_num,
static void alloc_aio_bitmap(BlkMigDevState *bmds)
{
BlockDriverState *bs = bmds->bs;
BlockBackend *bb = bmds->blk;
int64_t bitmap_size;
bitmap_size = bdrv_nb_sectors(bs) + BDRV_SECTORS_PER_DIRTY_CHUNK * 8 - 1;
bitmap_size = blk_nb_sectors(bb) + BDRV_SECTORS_PER_DIRTY_CHUNK * 8 - 1;
bitmap_size /= BDRV_SECTORS_PER_DIRTY_CHUNK * 8;
bmds->aio_bitmap = g_malloc0(bitmap_size);
@ -268,19 +269,19 @@ static int mig_save_device_bulk(QEMUFile *f, BlkMigDevState *bmds)
{
int64_t total_sectors = bmds->total_sectors;
int64_t cur_sector = bmds->cur_sector;
BlockDriverState *bs = bmds->bs;
BlockBackend *bb = bmds->blk;
BlkMigBlock *blk;
int nr_sectors;
if (bmds->shared_base) {
qemu_mutex_lock_iothread();
aio_context_acquire(bdrv_get_aio_context(bs));
aio_context_acquire(blk_get_aio_context(bb));
while (cur_sector < total_sectors &&
!bdrv_is_allocated(bs, cur_sector, MAX_IS_ALLOCATED_SEARCH,
&nr_sectors)) {
!bdrv_is_allocated(blk_bs(bb), cur_sector,
MAX_IS_ALLOCATED_SEARCH, &nr_sectors)) {
cur_sector += nr_sectors;
}
aio_context_release(bdrv_get_aio_context(bs));
aio_context_release(blk_get_aio_context(bb));
qemu_mutex_unlock_iothread();
}
@ -323,12 +324,12 @@ static int mig_save_device_bulk(QEMUFile *f, BlkMigDevState *bmds)
* without the need to acquire the AioContext.
*/
qemu_mutex_lock_iothread();
aio_context_acquire(bdrv_get_aio_context(bmds->bs));
blk->aiocb = bdrv_aio_readv(bs, cur_sector, &blk->qiov,
nr_sectors, blk_mig_read_cb, blk);
aio_context_acquire(blk_get_aio_context(bmds->blk));
blk->aiocb = blk_aio_preadv(bb, cur_sector * BDRV_SECTOR_SIZE, &blk->qiov,
0, blk_mig_read_cb, blk);
bdrv_reset_dirty_bitmap(bmds->dirty_bitmap, cur_sector, nr_sectors);
aio_context_release(bdrv_get_aio_context(bmds->bs));
aio_context_release(blk_get_aio_context(bmds->blk));
qemu_mutex_unlock_iothread();
bmds->cur_sector = cur_sector + nr_sectors;
@ -343,10 +344,10 @@ static int set_dirty_tracking(void)
int ret;
QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
aio_context_acquire(bdrv_get_aio_context(bmds->bs));
bmds->dirty_bitmap = bdrv_create_dirty_bitmap(bmds->bs, BLOCK_SIZE,
NULL, NULL);
aio_context_release(bdrv_get_aio_context(bmds->bs));
aio_context_acquire(blk_get_aio_context(bmds->blk));
bmds->dirty_bitmap = bdrv_create_dirty_bitmap(blk_bs(bmds->blk),
BLOCK_SIZE, NULL, NULL);
aio_context_release(blk_get_aio_context(bmds->blk));
if (!bmds->dirty_bitmap) {
ret = -errno;
goto fail;
@ -357,9 +358,9 @@ static int set_dirty_tracking(void)
fail:
QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
if (bmds->dirty_bitmap) {
aio_context_acquire(bdrv_get_aio_context(bmds->bs));
bdrv_release_dirty_bitmap(bmds->bs, bmds->dirty_bitmap);
aio_context_release(bdrv_get_aio_context(bmds->bs));
aio_context_acquire(blk_get_aio_context(bmds->blk));
bdrv_release_dirty_bitmap(blk_bs(bmds->blk), bmds->dirty_bitmap);
aio_context_release(blk_get_aio_context(bmds->blk));
}
}
return ret;
@ -372,9 +373,9 @@ static void unset_dirty_tracking(void)
BlkMigDevState *bmds;
QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
aio_context_acquire(bdrv_get_aio_context(bmds->bs));
bdrv_release_dirty_bitmap(bmds->bs, bmds->dirty_bitmap);
aio_context_release(bdrv_get_aio_context(bmds->bs));
aio_context_acquire(blk_get_aio_context(bmds->blk));
bdrv_release_dirty_bitmap(blk_bs(bmds->blk), bmds->dirty_bitmap);
aio_context_release(blk_get_aio_context(bmds->blk));
}
}
@ -384,6 +385,11 @@ static void init_blk_migration(QEMUFile *f)
BlkMigDevState *bmds;
int64_t sectors;
BdrvNextIterator it;
int i, num_bs = 0;
struct {
BlkMigDevState *bmds;
BlockDriverState *bs;
} *bmds_bs;
block_mig_state.submitted = 0;
block_mig_state.read_done = 0;
@ -393,27 +399,32 @@ static void init_blk_migration(QEMUFile *f)
block_mig_state.bulk_completed = 0;
block_mig_state.zero_blocks = migrate_zero_blocks();
for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
num_bs++;
}
bmds_bs = g_malloc0(num_bs * sizeof(*bmds_bs));
for (i = 0, bs = bdrv_first(&it); bs; bs = bdrv_next(&it), i++) {
if (bdrv_is_read_only(bs)) {
continue;
}
sectors = bdrv_nb_sectors(bs);
if (sectors <= 0) {
return;
goto out;
}
bmds = g_new0(BlkMigDevState, 1);
bmds->bs = bs;
bmds->blk = blk_new();
bmds->blk_name = g_strdup(bdrv_get_device_name(bs));
bmds->bulk_completed = 0;
bmds->total_sectors = sectors;
bmds->completed_sectors = 0;
bmds->shared_base = block_mig_state.shared_base;
alloc_aio_bitmap(bmds);
error_setg(&bmds->blocker, "block device is in use by migration");
bdrv_op_block_all(bs, bmds->blocker);
bdrv_ref(bs);
assert(i < num_bs);
bmds_bs[i].bmds = bmds;
bmds_bs[i].bs = bs;
block_mig_state.total_sector_sum += sectors;
@ -426,6 +437,24 @@ static void init_blk_migration(QEMUFile *f)
QSIMPLEQ_INSERT_TAIL(&block_mig_state.bmds_list, bmds, entry);
}
/* Can only insert new BDSes now because doing so while iterating block
* devices may end up in a deadlock (iterating the new BDSes, too). */
for (i = 0; i < num_bs; i++) {
BlkMigDevState *bmds = bmds_bs[i].bmds;
BlockDriverState *bs = bmds_bs[i].bs;
if (bmds) {
blk_insert_bs(bmds->blk, bs);
alloc_aio_bitmap(bmds);
error_setg(&bmds->blocker, "block device is in use by migration");
bdrv_op_block_all(bs, bmds->blocker);
}
}
out:
g_free(bmds_bs);
}
/* Called with no lock taken. */
@ -482,6 +511,7 @@ static int mig_save_device_dirty(QEMUFile *f, BlkMigDevState *bmds,
int is_async)
{
BlkMigBlock *blk;
BlockDriverState *bs = blk_bs(bmds->blk);
int64_t total_sectors = bmds->total_sectors;
int64_t sector;
int nr_sectors;
@ -491,11 +521,11 @@ static int mig_save_device_dirty(QEMUFile *f, BlkMigDevState *bmds,
blk_mig_lock();
if (bmds_aio_inflight(bmds, sector)) {
blk_mig_unlock();
bdrv_drain(bmds->bs);
blk_drain(bmds->blk);
} else {
blk_mig_unlock();
}
if (bdrv_get_dirty(bmds->bs, bmds->dirty_bitmap, sector)) {
if (bdrv_get_dirty(bs, bmds->dirty_bitmap, sector)) {
if (total_sectors - sector < BDRV_SECTORS_PER_DIRTY_CHUNK) {
nr_sectors = total_sectors - sector;
@ -513,15 +543,18 @@ static int mig_save_device_dirty(QEMUFile *f, BlkMigDevState *bmds,
blk->iov.iov_len = nr_sectors * BDRV_SECTOR_SIZE;
qemu_iovec_init_external(&blk->qiov, &blk->iov, 1);
blk->aiocb = bdrv_aio_readv(bmds->bs, sector, &blk->qiov,
nr_sectors, blk_mig_read_cb, blk);
blk->aiocb = blk_aio_preadv(bmds->blk,
sector * BDRV_SECTOR_SIZE,
&blk->qiov, 0, blk_mig_read_cb,
blk);
blk_mig_lock();
block_mig_state.submitted++;
bmds_set_aio_inflight(bmds, sector, nr_sectors, 1);
blk_mig_unlock();
} else {
ret = bdrv_read(bmds->bs, sector, blk->buf, nr_sectors);
ret = blk_pread(bmds->blk, sector * BDRV_SECTOR_SIZE, blk->buf,
nr_sectors * BDRV_SECTOR_SIZE);
if (ret < 0) {
goto error;
}
@ -559,9 +592,9 @@ static int blk_mig_save_dirty_block(QEMUFile *f, int is_async)
int ret = 1;
QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
aio_context_acquire(bdrv_get_aio_context(bmds->bs));
aio_context_acquire(blk_get_aio_context(bmds->blk));
ret = mig_save_device_dirty(f, bmds, is_async);
aio_context_release(bdrv_get_aio_context(bmds->bs));
aio_context_release(blk_get_aio_context(bmds->blk));
if (ret <= 0) {
break;
}
@ -619,9 +652,9 @@ static int64_t get_remaining_dirty(void)
int64_t dirty = 0;
QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
aio_context_acquire(bdrv_get_aio_context(bmds->bs));
aio_context_acquire(blk_get_aio_context(bmds->blk));
dirty += bdrv_get_dirty_count(bmds->dirty_bitmap);
aio_context_release(bdrv_get_aio_context(bmds->bs));
aio_context_release(blk_get_aio_context(bmds->blk));
}
return dirty << BDRV_SECTOR_BITS;
@ -641,15 +674,16 @@ static void block_migration_cleanup(void *opaque)
while ((bmds = QSIMPLEQ_FIRST(&block_mig_state.bmds_list)) != NULL) {
QSIMPLEQ_REMOVE_HEAD(&block_mig_state.bmds_list, entry);
bdrv_op_unblock_all(bmds->bs, bmds->blocker);
bdrv_op_unblock_all(blk_bs(bmds->blk), bmds->blocker);
error_free(bmds->blocker);
/* Save ctx, because bmds->bs can disappear during bdrv_unref. */
ctx = bdrv_get_aio_context(bmds->bs);
/* Save ctx, because bmds->blk can disappear during blk_unref. */
ctx = blk_get_aio_context(bmds->blk);
aio_context_acquire(ctx);
bdrv_unref(bmds->bs);
blk_unref(bmds->blk);
aio_context_release(ctx);
g_free(bmds->blk_name);
g_free(bmds->aio_bitmap);
g_free(bmds);
}
@ -827,8 +861,7 @@ static int block_load(QEMUFile *f, void *opaque, int version_id)
int len, flags;
char device_name[256];
int64_t addr;
BlockDriverState *bs, *bs_prev = NULL;
BlockBackend *blk;
BlockBackend *blk, *blk_prev = NULL;;
Error *local_err = NULL;
uint8_t *buf;
int64_t total_sectors = 0;
@ -853,23 +886,17 @@ static int block_load(QEMUFile *f, void *opaque, int version_id)
device_name);
return -EINVAL;
}
bs = blk_bs(blk);
if (!bs) {
fprintf(stderr, "Block device %s has no medium\n",
device_name);
return -EINVAL;
}
if (bs != bs_prev) {
bs_prev = bs;
total_sectors = bdrv_nb_sectors(bs);
if (blk != blk_prev) {
blk_prev = blk;
total_sectors = blk_nb_sectors(blk);
if (total_sectors <= 0) {
error_report("Error getting length of block device %s",
device_name);
return -EINVAL;
}
bdrv_invalidate_cache(bs, &local_err);
blk_invalidate_cache(blk, &local_err);
if (local_err) {
error_report_err(local_err);
return -EINVAL;
@ -883,12 +910,14 @@ static int block_load(QEMUFile *f, void *opaque, int version_id)
}
if (flags & BLK_MIG_FLAG_ZERO_BLOCK) {
ret = bdrv_write_zeroes(bs, addr, nr_sectors,
ret = blk_pwrite_zeroes(blk, addr * BDRV_SECTOR_SIZE,
nr_sectors * BDRV_SECTOR_SIZE,
BDRV_REQ_MAY_UNMAP);
} else {
buf = g_malloc(BLOCK_SIZE);
qemu_get_buffer(f, buf, BLOCK_SIZE);
ret = bdrv_write(bs, addr, buf, nr_sectors);
ret = blk_pwrite(blk, addr * BDRV_SECTOR_SIZE, buf,
nr_sectors * BDRV_SECTOR_SIZE, 0);
g_free(buf);
}

View File

@ -9,6 +9,12 @@ STEXI
@table @option
ETEXI
DEF("bench", img_bench,
"bench [-c count] [-d depth] [-f fmt] [--flush-interval=flush_interval] [-n] [--no-drain] [-o offset] [--pattern=pattern] [-q] [-s buffer_size] [-S step_size] [-t cache] [-w] filename")
STEXI
@item bench [-c @var{count}] [-d @var{depth}] [-f @var{fmt}] [--flush-interval=@var{flush_interval}] [-n] [--no-drain] [-o @var{offset}] [--pattern=@var{pattern}] [-q] [-s @var{buffer_size}] [-S @var{step_size}] [-t @var{cache}] [-w] @var{filename}
ETEXI
DEF("check", img_check,
"check [-q] [--object objectdef] [--image-opts] [-f fmt] [--output=ofmt] [-r [leaks | all]] [-T src_cache] filename")
STEXI

View File

@ -54,6 +54,9 @@ enum {
OPTION_BACKING_CHAIN = 257,
OPTION_OBJECT = 258,
OPTION_IMAGE_OPTS = 259,
OPTION_PATTERN = 260,
OPTION_FLUSH_INTERVAL = 261,
OPTION_NO_DRAIN = 262,
};
typedef enum OutputFormat {
@ -3460,6 +3463,332 @@ out_no_progress:
return 0;
}
typedef struct BenchData {
BlockBackend *blk;
uint64_t image_size;
bool write;
int bufsize;
int step;
int nrreq;
int n;
int flush_interval;
bool drain_on_flush;
uint8_t *buf;
QEMUIOVector *qiov;
int in_flight;
bool in_flush;
uint64_t offset;
} BenchData;
static void bench_undrained_flush_cb(void *opaque, int ret)
{
if (ret < 0) {
error_report("Failed flush request: %s\n", strerror(-ret));
exit(EXIT_FAILURE);
}
}
static void bench_cb(void *opaque, int ret)
{
BenchData *b = opaque;
BlockAIOCB *acb;
if (ret < 0) {
error_report("Failed request: %s\n", strerror(-ret));
exit(EXIT_FAILURE);
}
if (b->in_flush) {
/* Just finished a flush with drained queue: Start next requests */
assert(b->in_flight == 0);
b->in_flush = false;
} else if (b->in_flight > 0) {
int remaining = b->n - b->in_flight;
b->n--;
b->in_flight--;
/* Time for flush? Drain queue if requested, then flush */
if (b->flush_interval && remaining % b->flush_interval == 0) {
if (!b->in_flight || !b->drain_on_flush) {
BlockCompletionFunc *cb;
if (b->drain_on_flush) {
b->in_flush = true;
cb = bench_cb;
} else {
cb = bench_undrained_flush_cb;
}
acb = blk_aio_flush(b->blk, cb, b);
if (!acb) {
error_report("Failed to issue flush request");
exit(EXIT_FAILURE);
}
}
if (b->drain_on_flush) {
return;
}
}
}
while (b->n > b->in_flight && b->in_flight < b->nrreq) {
if (b->write) {
acb = blk_aio_pwritev(b->blk, b->offset, b->qiov, 0,
bench_cb, b);
} else {
acb = blk_aio_preadv(b->blk, b->offset, b->qiov, 0,
bench_cb, b);
}
if (!acb) {
error_report("Failed to issue request");
exit(EXIT_FAILURE);
}
b->in_flight++;
b->offset += b->step;
b->offset %= b->image_size;
}
}
static int img_bench(int argc, char **argv)
{
int c, ret = 0;
const char *fmt = NULL, *filename;
bool quiet = false;
bool image_opts = false;
bool is_write = false;
int count = 75000;
int depth = 64;
int64_t offset = 0;
size_t bufsize = 4096;
int pattern = 0;
size_t step = 0;
int flush_interval = 0;
bool drain_on_flush = true;
int64_t image_size;
BlockBackend *blk = NULL;
BenchData data = {};
int flags = 0;
bool writethrough;
struct timeval t1, t2;
int i;
for (;;) {
static const struct option long_options[] = {
{"help", no_argument, 0, 'h'},
{"flush-interval", required_argument, 0, OPTION_FLUSH_INTERVAL},
{"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
{"pattern", required_argument, 0, OPTION_PATTERN},
{"no-drain", no_argument, 0, OPTION_NO_DRAIN},
{0, 0, 0, 0}
};
c = getopt_long(argc, argv, "hc:d:f:no:qs:S:t:w", long_options, NULL);
if (c == -1) {
break;
}
switch (c) {
case 'h':
case '?':
help();
break;
case 'c':
{
char *end;
errno = 0;
count = strtoul(optarg, &end, 0);
if (errno || *end || count > INT_MAX) {
error_report("Invalid request count specified");
return 1;
}
break;
}
case 'd':
{
char *end;
errno = 0;
depth = strtoul(optarg, &end, 0);
if (errno || *end || depth > INT_MAX) {
error_report("Invalid queue depth specified");
return 1;
}
break;
}
case 'f':
fmt = optarg;
break;
case 'n':
flags |= BDRV_O_NATIVE_AIO;
break;
case 'o':
{
char *end;
errno = 0;
offset = qemu_strtosz_suffix(optarg, &end,
QEMU_STRTOSZ_DEFSUFFIX_B);
if (offset < 0|| *end) {
error_report("Invalid offset specified");
return 1;
}
break;
}
break;
case 'q':
quiet = true;
break;
case 's':
{
int64_t sval;
char *end;
sval = qemu_strtosz_suffix(optarg, &end, QEMU_STRTOSZ_DEFSUFFIX_B);
if (sval < 0 || sval > INT_MAX || *end) {
error_report("Invalid buffer size specified");
return 1;
}
bufsize = sval;
break;
}
case 'S':
{
int64_t sval;
char *end;
sval = qemu_strtosz_suffix(optarg, &end, QEMU_STRTOSZ_DEFSUFFIX_B);
if (sval < 0 || sval > INT_MAX || *end) {
error_report("Invalid step size specified");
return 1;
}
step = sval;
break;
}
case 't':
ret = bdrv_parse_cache_mode(optarg, &flags, &writethrough);
if (ret < 0) {
error_report("Invalid cache mode");
ret = -1;
goto out;
}
break;
case 'w':
flags |= BDRV_O_RDWR;
is_write = true;
break;
case OPTION_PATTERN:
{
char *end;
errno = 0;
pattern = strtoul(optarg, &end, 0);
if (errno || *end || pattern > 0xff) {
error_report("Invalid pattern byte specified");
return 1;
}
break;
}
case OPTION_FLUSH_INTERVAL:
{
char *end;
errno = 0;
flush_interval = strtoul(optarg, &end, 0);
if (errno || *end || flush_interval > INT_MAX) {
error_report("Invalid flush interval specified");
return 1;
}
break;
}
case OPTION_NO_DRAIN:
drain_on_flush = false;
break;
case OPTION_IMAGE_OPTS:
image_opts = true;
break;
}
}
if (optind != argc - 1) {
error_exit("Expecting one image file name");
}
filename = argv[argc - 1];
if (!is_write && flush_interval) {
error_report("--flush-interval is only available in write tests");
ret = -1;
goto out;
}
if (flush_interval && flush_interval < depth) {
error_report("Flush interval can't be smaller than depth");
ret = -1;
goto out;
}
blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet);
if (!blk) {
ret = -1;
goto out;
}
image_size = blk_getlength(blk);
if (image_size < 0) {
ret = image_size;
goto out;
}
data = (BenchData) {
.blk = blk,
.image_size = image_size,
.bufsize = bufsize,
.step = step ?: bufsize,
.nrreq = depth,
.n = count,
.offset = offset,
.write = is_write,
.flush_interval = flush_interval,
.drain_on_flush = drain_on_flush,
};
printf("Sending %d %s requests, %d bytes each, %d in parallel "
"(starting at offset %" PRId64 ", step size %d)\n",
data.n, data.write ? "write" : "read", data.bufsize, data.nrreq,
data.offset, data.step);
if (flush_interval) {
printf("Sending flush every %d requests\n", flush_interval);
}
data.buf = blk_blockalign(blk, data.nrreq * data.bufsize);
memset(data.buf, pattern, data.nrreq * data.bufsize);
data.qiov = g_new(QEMUIOVector, data.nrreq);
for (i = 0; i < data.nrreq; i++) {
qemu_iovec_init(&data.qiov[i], 1);
qemu_iovec_add(&data.qiov[i],
data.buf + i * data.bufsize, data.bufsize);
}
gettimeofday(&t1, NULL);
bench_cb(&data, 0);
while (data.n > 0) {
main_loop_wait(false);
}
gettimeofday(&t2, NULL);
printf("Run completed in %3.3f seconds.\n",
(t2.tv_sec - t1.tv_sec)
+ ((double)(t2.tv_usec - t1.tv_usec) / 1000000));
out:
qemu_vfree(data.buf);
blk_unref(blk);
if (ret) {
return 1;
}
return 0;
}
static const img_cmd_t img_cmds[] = {
#define DEF(option, callback, arg_string) \
{ option, callback },

View File

@ -131,6 +131,30 @@ Skip the creation of the target volume
Command description:
@table @option
@item bench [-c @var{count}] [-d @var{depth}] [-f @var{fmt}] [--flush-interval=@var{flush_interval}] [-n] [--no-drain] [-o @var{offset}] [--pattern=@var{pattern}] [-q] [-s @var{buffer_size}] [-S @var{step_size}] [-t @var{cache}] [-w] @var{filename}
Run a simple sequential I/O benchmark on the specified image. If @code{-w} is
specified, a write test is performed, otherwise a read test is performed.
A total number of @var{count} I/O requests is performed, each @var{buffer_size}
bytes in size, and with @var{depth} requests in parallel. The first request
starts at the position given by @var{offset}, each following request increases
the current position by @var{step_size}. If @var{step_size} is not given,
@var{buffer_size} is used for its value.
If @var{flush_interval} is specified for a write test, the request queue is
drained and a flush is issued before new writes are made whenever the number of
remaining requests is a multiple of @var{flush_interval}. If additionally
@code{--no-drain} is specified, a flush is issued without draining the request
queue first.
If @code{-n} is specified, the native AIO backend is used if possible. On
Linux, this option only works if @code{-t none} or @code{-t directsync} is
specified as well.
For write tests, by default a buffer filled with zeros is written. This can be
overridden with a pattern byte specified by @var{pattern}.
@item check [-f @var{fmt}] [--output=@var{ofmt}] [-r [leaks | all]] [-T @var{src_cache}] @var{filename}
Perform a consistency check on the disk image @var{filename}. The command can

View File

@ -1,6 +1,6 @@
#!/bin/bash
#
# Test bdrv_write_zeroes with backing files
# Test bdrv_pwrite_zeroes with backing files (see also 154)
#
# Copyright (C) 2012 Red Hat, Inc.
#

View File

@ -1,6 +1,6 @@
#!/bin/bash
#
# qcow2 specific bdrv_write_zeroes tests with backing files (complements 034)
# qcow2 specific bdrv_pwrite_zeroes tests with backing files (complements 034)
#
# Copyright (C) 2016 Red Hat, Inc.
#
@ -114,6 +114,46 @@ $QEMU_IO -c "read -P 0 40k 3k" "$TEST_IMG" | _filter_qemu_io
$QEMU_IMG map --output=json "$TEST_IMG" | _filter_qemu_img_map
echo
echo == write_zeroes covers non-zero data ==
CLUSTER_SIZE=512 TEST_IMG="$TEST_IMG.base" _make_test_img $size
_make_test_img -b "$TEST_IMG.base"
# non-zero data at front of request
# Backing file: -- XX -- --
# Active layer: -- 00 00 --
$QEMU_IO -c "write -P 0x11 5k 1k" "$TEST_IMG.base" | _filter_qemu_io
$QEMU_IO -c "write -z 5k 2k" "$TEST_IMG" | _filter_qemu_io
$QEMU_IO -c "read -P 0 4k 4k" "$TEST_IMG" | _filter_qemu_io
# non-zero data at end of request
# Backing file: -- -- XX --
# Active layer: -- 00 00 --
$QEMU_IO -c "write -P 0x11 14k 1k" "$TEST_IMG.base" | _filter_qemu_io
$QEMU_IO -c "write -z 13k 2k" "$TEST_IMG" | _filter_qemu_io
$QEMU_IO -c "read -P 0 12k 4k" "$TEST_IMG" | _filter_qemu_io
# non-zero data matches size of request
# Backing file: -- XX XX --
# Active layer: -- 00 00 --
$QEMU_IO -c "write -P 0x11 21k 2k" "$TEST_IMG.base" | _filter_qemu_io
$QEMU_IO -c "write -z 21k 2k" "$TEST_IMG" | _filter_qemu_io
$QEMU_IO -c "read -P 0 20k 4k" "$TEST_IMG" | _filter_qemu_io
# non-zero data smaller than request
# Backing file: -- -X X- --
# Active layer: -- 00 00 --
$QEMU_IO -c "write -P 0x11 30208 1k" "$TEST_IMG.base" | _filter_qemu_io
$QEMU_IO -c "write -z 29k 2k" "$TEST_IMG" | _filter_qemu_io
$QEMU_IO -c "read -P 0 28k 4k" "$TEST_IMG" | _filter_qemu_io
$QEMU_IMG map --output=json "$TEST_IMG" | _filter_qemu_img_map
echo
echo == spanning two clusters, non-zero before request ==

View File

@ -74,6 +74,43 @@ read 3072/3072 bytes at offset 40960
{ "start": 40960, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": 24576},
{ "start": 45056, "length": 134172672, "depth": 1, "zero": true, "data": false}]
== write_zeroes covers non-zero data ==
Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=134217728
Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 backing_file=TEST_DIR/t.IMGFMT.base
wrote 1024/1024 bytes at offset 5120
1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
wrote 2048/2048 bytes at offset 5120
2 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
read 4096/4096 bytes at offset 4096
4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
wrote 1024/1024 bytes at offset 14336
1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
wrote 2048/2048 bytes at offset 13312
2 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
read 4096/4096 bytes at offset 12288
4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
wrote 2048/2048 bytes at offset 21504
2 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
wrote 2048/2048 bytes at offset 21504
2 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
read 4096/4096 bytes at offset 20480
4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
wrote 1024/1024 bytes at offset 30208
1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
wrote 2048/2048 bytes at offset 29696
2 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
read 4096/4096 bytes at offset 28672
4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
[{ "start": 0, "length": 4096, "depth": 1, "zero": true, "data": false},
{ "start": 4096, "length": 4096, "depth": 0, "zero": true, "data": false},
{ "start": 8192, "length": 4096, "depth": 1, "zero": true, "data": false},
{ "start": 12288, "length": 4096, "depth": 0, "zero": true, "data": false},
{ "start": 16384, "length": 4096, "depth": 1, "zero": true, "data": false},
{ "start": 20480, "length": 4096, "depth": 0, "zero": true, "data": false},
{ "start": 24576, "length": 4096, "depth": 1, "zero": true, "data": false},
{ "start": 28672, "length": 4096, "depth": 0, "zero": true, "data": false},
{ "start": 32768, "length": 134184960, "depth": 1, "zero": true, "data": false}]
== spanning two clusters, non-zero before request ==
Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=134217728
Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 backing_file=TEST_DIR/t.IMGFMT.base
@ -106,11 +143,14 @@ read 1024/1024 bytes at offset 67584
read 5120/5120 bytes at offset 68608
5 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
[{ "start": 0, "length": 32768, "depth": 1, "zero": true, "data": false},
{ "start": 32768, "length": 8192, "depth": 0, "zero": false, "data": true, "offset": 20480},
{ "start": 32768, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": 20480},
{ "start": 36864, "length": 4096, "depth": 0, "zero": true, "data": false},
{ "start": 40960, "length": 8192, "depth": 1, "zero": true, "data": false},
{ "start": 49152, "length": 8192, "depth": 0, "zero": false, "data": true, "offset": 28672},
{ "start": 49152, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": 24576},
{ "start": 53248, "length": 4096, "depth": 0, "zero": true, "data": false},
{ "start": 57344, "length": 8192, "depth": 1, "zero": true, "data": false},
{ "start": 65536, "length": 8192, "depth": 0, "zero": false, "data": true, "offset": 36864},
{ "start": 65536, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": 28672},
{ "start": 69632, "length": 4096, "depth": 0, "zero": true, "data": false},
{ "start": 73728, "length": 134144000, "depth": 1, "zero": true, "data": false}]
== spanning two clusters, non-zero after request ==
@ -145,11 +185,14 @@ read 7168/7168 bytes at offset 65536
read 1024/1024 bytes at offset 72704
1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
[{ "start": 0, "length": 32768, "depth": 1, "zero": true, "data": false},
{ "start": 32768, "length": 8192, "depth": 0, "zero": false, "data": true, "offset": 20480},
{ "start": 32768, "length": 4096, "depth": 0, "zero": true, "data": false},
{ "start": 36864, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": 20480},
{ "start": 40960, "length": 8192, "depth": 1, "zero": true, "data": false},
{ "start": 49152, "length": 8192, "depth": 0, "zero": false, "data": true, "offset": 28672},
{ "start": 49152, "length": 4096, "depth": 0, "zero": true, "data": false},
{ "start": 53248, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": 24576},
{ "start": 57344, "length": 8192, "depth": 1, "zero": true, "data": false},
{ "start": 65536, "length": 8192, "depth": 0, "zero": false, "data": true, "offset": 36864},
{ "start": 65536, "length": 4096, "depth": 0, "zero": true, "data": false},
{ "start": 69632, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": 28672},
{ "start": 73728, "length": 134144000, "depth": 1, "zero": true, "data": false}]
== spanning two clusters, partially overwriting backing file ==

View File

@ -72,7 +72,7 @@ bdrv_aio_readv(void *bs, int64_t sector_num, int nb_sectors, void *opaque) "bs %
bdrv_aio_writev(void *bs, int64_t sector_num, int nb_sectors, void *opaque) "bs %p sector_num %"PRId64" nb_sectors %d opaque %p"
bdrv_co_readv(void *bs, int64_t sector_num, int nb_sector) "bs %p sector_num %"PRId64" nb_sectors %d"
bdrv_co_writev(void *bs, int64_t sector_num, int nb_sector) "bs %p sector_num %"PRId64" nb_sectors %d"
bdrv_co_write_zeroes(void *bs, int64_t sector_num, int nb_sector, int flags) "bs %p sector_num %"PRId64" nb_sectors %d flags %#x"
bdrv_co_pwrite_zeroes(void *bs, int64_t offset, int count, int flags) "bs %p offset %"PRId64" count %d flags %#x"
bdrv_co_do_copy_on_readv(void *bs, int64_t sector_num, int nb_sectors, int64_t cluster_sector_num, int cluster_nb_sectors) "bs %p sector_num %"PRId64" nb_sectors %d cluster_sector_num %"PRId64" cluster_nb_sectors %d"
# block/stream.c
@ -131,7 +131,7 @@ thread_pool_cancel(void *req, void *opaque) "req %p opaque %p"
# block/raw-win32.c
# block/raw-posix.c
paio_submit_co(int64_t sector_num, int nb_sectors, int type) "sector_num %"PRId64" nb_sectors %d type %d"
paio_submit_co(int64_t offset, int count, int type) "offset %"PRId64" count %d type %d"
paio_submit(void *acb, void *opaque, int64_t sector_num, int nb_sectors, int type) "acb %p opaque %p sector_num %"PRId64" nb_sectors %d type %d"
# ioport.c
@ -611,6 +611,8 @@ qcow2_writev_done_req(void *co, int ret) "co %p ret %d"
qcow2_writev_start_part(void *co) "co %p"
qcow2_writev_done_part(void *co, int cur_nr_sectors) "co %p cur_nr_sectors %d"
qcow2_writev_data(void *co, uint64_t offset) "co %p offset %" PRIx64
qcow2_pwrite_zeroes_start_req(void *co, int64_t offset, int count) "co %p offset %" PRIx64 " count %d"
qcow2_pwrite_zeroes(void *co, int64_t offset, int count) "co %p offset %" PRIx64 " count %d"
# block/qcow2-cluster.c
qcow2_alloc_clusters_offset(void *co, uint64_t offset, int num) "co %p offset %" PRIx64 " num %d"