2003-06-30 14:03:06 +04:00
|
|
|
/*
|
|
|
|
* QEMU System Emulator block driver
|
2007-09-17 01:08:06 +04:00
|
|
|
*
|
2003-06-30 14:03:06 +04:00
|
|
|
* Copyright (c) 2003 Fabrice Bellard
|
2007-09-17 01:08:06 +04:00
|
|
|
*
|
2003-06-30 14:03:06 +04:00
|
|
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
|
|
* of this software and associated documentation files (the "Software"), to deal
|
|
|
|
* in the Software without restriction, including without limitation the rights
|
|
|
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
|
|
* copies of the Software, and to permit persons to whom the Software is
|
|
|
|
* furnished to do so, subject to the following conditions:
|
|
|
|
*
|
|
|
|
* The above copyright notice and this permission notice shall be included in
|
|
|
|
* all copies or substantial portions of the Software.
|
|
|
|
*
|
|
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
|
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
|
|
* THE SOFTWARE.
|
|
|
|
*/
|
2008-12-05 20:53:21 +03:00
|
|
|
#include "config-host.h"
|
2007-11-11 05:51:17 +03:00
|
|
|
#include "qemu-common.h"
|
2010-05-22 21:15:08 +04:00
|
|
|
#include "trace.h"
|
2012-12-17 21:19:44 +04:00
|
|
|
#include "block/block_int.h"
|
|
|
|
#include "block/blockjob.h"
|
2012-12-17 21:20:00 +04:00
|
|
|
#include "qemu/module.h"
|
2012-12-17 21:19:43 +04:00
|
|
|
#include "qapi/qmp/qjson.h"
|
2014-10-07 15:59:11 +04:00
|
|
|
#include "sysemu/block-backend.h"
|
2012-12-17 21:20:04 +04:00
|
|
|
#include "sysemu/sysemu.h"
|
2012-12-17 21:20:00 +04:00
|
|
|
#include "qemu/notify.h"
|
2012-12-17 21:19:44 +04:00
|
|
|
#include "block/coroutine.h"
|
2014-01-24 00:31:34 +04:00
|
|
|
#include "block/qapi.h"
|
2011-09-22 00:16:47 +04:00
|
|
|
#include "qmp-commands.h"
|
2012-12-17 21:20:00 +04:00
|
|
|
#include "qemu/timer.h"
|
2014-06-18 10:43:44 +04:00
|
|
|
#include "qapi-event.h"
|
2003-06-30 14:03:06 +04:00
|
|
|
|
2009-07-27 18:12:56 +04:00
|
|
|
#ifdef CONFIG_BSD
|
2005-04-27 01:59:26 +04:00
|
|
|
#include <sys/types.h>
|
|
|
|
#include <sys/stat.h>
|
|
|
|
#include <sys/ioctl.h>
|
2009-09-12 11:36:22 +04:00
|
|
|
#include <sys/queue.h>
|
2009-03-07 23:06:23 +03:00
|
|
|
#ifndef __DragonFly__
|
2005-04-27 01:59:26 +04:00
|
|
|
#include <sys/disk.h>
|
|
|
|
#endif
|
2009-03-07 23:06:23 +03:00
|
|
|
#endif
|
2005-04-27 01:59:26 +04:00
|
|
|
|
2009-03-08 19:26:59 +03:00
|
|
|
#ifdef _WIN32
|
|
|
|
#include <windows.h>
|
|
|
|
#endif
|
|
|
|
|
2013-11-13 14:29:43 +04:00
|
|
|
struct BdrvDirtyBitmap {
|
|
|
|
HBitmap *bitmap;
|
|
|
|
QLIST_ENTRY(BdrvDirtyBitmap) list;
|
|
|
|
};
|
|
|
|
|
2011-10-13 16:08:22 +04:00
|
|
|
#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
|
|
|
|
|
2014-10-07 15:59:14 +04:00
|
|
|
static BlockAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
|
2009-04-07 22:43:24 +04:00
|
|
|
int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
|
2014-10-07 15:59:15 +04:00
|
|
|
BlockCompletionFunc *cb, void *opaque);
|
2014-10-07 15:59:14 +04:00
|
|
|
static BlockAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
|
2009-04-07 22:43:24 +04:00
|
|
|
int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
|
2014-10-07 15:59:15 +04:00
|
|
|
BlockCompletionFunc *cb, void *opaque);
|
2011-07-15 15:50:26 +04:00
|
|
|
static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
|
|
|
|
int64_t sector_num, int nb_sectors,
|
|
|
|
QEMUIOVector *iov);
|
|
|
|
static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
|
|
|
|
int64_t sector_num, int nb_sectors,
|
|
|
|
QEMUIOVector *iov);
|
2013-12-05 15:09:38 +04:00
|
|
|
static int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs,
|
|
|
|
int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
|
2012-01-18 18:40:42 +04:00
|
|
|
BdrvRequestFlags flags);
|
2013-12-05 15:09:38 +04:00
|
|
|
static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs,
|
|
|
|
int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
|
2012-02-07 17:27:25 +04:00
|
|
|
BdrvRequestFlags flags);
|
2014-10-07 15:59:14 +04:00
|
|
|
static BlockAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
|
|
|
|
int64_t sector_num,
|
|
|
|
QEMUIOVector *qiov,
|
|
|
|
int nb_sectors,
|
|
|
|
BdrvRequestFlags flags,
|
2014-10-07 15:59:15 +04:00
|
|
|
BlockCompletionFunc *cb,
|
2014-10-07 15:59:14 +04:00
|
|
|
void *opaque,
|
|
|
|
bool is_write);
|
2011-10-13 16:08:23 +04:00
|
|
|
static void coroutine_fn bdrv_co_do_rw(void *opaque);
|
2012-03-20 18:12:58 +04:00
|
|
|
static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
|
2013-10-24 14:06:51 +04:00
|
|
|
int64_t sector_num, int nb_sectors, BdrvRequestFlags flags);
|
2006-04-26 02:36:06 +04:00
|
|
|
|
2010-04-10 10:02:42 +04:00
|
|
|
static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
|
|
|
|
QTAILQ_HEAD_INITIALIZER(bdrv_states);
|
2008-09-17 23:04:14 +04:00
|
|
|
|
2014-01-24 00:31:32 +04:00
|
|
|
static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states =
|
|
|
|
QTAILQ_HEAD_INITIALIZER(graph_bdrv_states);
|
|
|
|
|
2010-04-13 13:29:33 +04:00
|
|
|
static QLIST_HEAD(, BlockDriver) bdrv_drivers =
|
|
|
|
QLIST_HEAD_INITIALIZER(bdrv_drivers);
|
2004-08-02 01:59:26 +04:00
|
|
|
|
2014-11-27 12:40:46 +03:00
|
|
|
static void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
|
|
|
|
int nr_sectors);
|
|
|
|
static void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
|
|
|
|
int nr_sectors);
|
2009-10-27 20:41:44 +03:00
|
|
|
/* If non-zero, use only whitelisted block drivers */
|
|
|
|
static int use_bdrv_whitelist;
|
|
|
|
|
2010-12-09 14:53:00 +03:00
|
|
|
#ifdef _WIN32
|
|
|
|
static int is_windows_drive_prefix(const char *filename)
|
|
|
|
{
|
|
|
|
return (((filename[0] >= 'a' && filename[0] <= 'z') ||
|
|
|
|
(filename[0] >= 'A' && filename[0] <= 'Z')) &&
|
|
|
|
filename[1] == ':');
|
|
|
|
}
|
|
|
|
|
|
|
|
int is_windows_drive(const char *filename)
|
|
|
|
{
|
|
|
|
if (is_windows_drive_prefix(filename) &&
|
|
|
|
filename[2] == '\0')
|
|
|
|
return 1;
|
|
|
|
if (strstart(filename, "\\\\.\\", NULL) ||
|
|
|
|
strstart(filename, "//./", NULL))
|
|
|
|
return 1;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2011-11-03 12:57:25 +04:00
|
|
|
/* throttling disk I/O limits */
|
2013-09-02 16:14:39 +04:00
|
|
|
void bdrv_set_io_limits(BlockDriverState *bs,
|
|
|
|
ThrottleConfig *cfg)
|
2011-11-08 09:00:14 +04:00
|
|
|
{
|
2013-09-02 16:14:39 +04:00
|
|
|
int i;
|
2011-11-08 09:00:14 +04:00
|
|
|
|
2013-09-02 16:14:39 +04:00
|
|
|
throttle_config(&bs->throttle_state, cfg);
|
2011-11-08 09:00:14 +04:00
|
|
|
|
2013-09-02 16:14:39 +04:00
|
|
|
for (i = 0; i < 2; i++) {
|
|
|
|
qemu_co_enter_next(&bs->throttled_reqs[i]);
|
2011-11-08 09:00:14 +04:00
|
|
|
}
|
2013-09-02 16:14:39 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
/* this function drain all the throttled IOs */
|
|
|
|
static bool bdrv_start_throttled_reqs(BlockDriverState *bs)
|
|
|
|
{
|
|
|
|
bool drained = false;
|
|
|
|
bool enabled = bs->io_limits_enabled;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
bs->io_limits_enabled = false;
|
|
|
|
|
|
|
|
for (i = 0; i < 2; i++) {
|
|
|
|
while (qemu_co_enter_next(&bs->throttled_reqs[i])) {
|
|
|
|
drained = true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
bs->io_limits_enabled = enabled;
|
2011-11-08 09:00:14 +04:00
|
|
|
|
2013-09-02 16:14:39 +04:00
|
|
|
return drained;
|
2011-11-08 09:00:14 +04:00
|
|
|
}
|
|
|
|
|
2013-09-02 16:14:39 +04:00
|
|
|
void bdrv_io_limits_disable(BlockDriverState *bs)
|
2011-11-03 12:57:25 +04:00
|
|
|
{
|
2013-09-02 16:14:39 +04:00
|
|
|
bs->io_limits_enabled = false;
|
2011-11-03 12:57:25 +04:00
|
|
|
|
2013-09-02 16:14:39 +04:00
|
|
|
bdrv_start_throttled_reqs(bs);
|
|
|
|
|
|
|
|
throttle_destroy(&bs->throttle_state);
|
2011-11-03 12:57:25 +04:00
|
|
|
}
|
|
|
|
|
2013-09-02 16:14:39 +04:00
|
|
|
static void bdrv_throttle_read_timer_cb(void *opaque)
|
2011-11-03 12:57:25 +04:00
|
|
|
{
|
2013-09-02 16:14:39 +04:00
|
|
|
BlockDriverState *bs = opaque;
|
|
|
|
qemu_co_enter_next(&bs->throttled_reqs[0]);
|
2011-11-03 12:57:25 +04:00
|
|
|
}
|
|
|
|
|
2013-09-02 16:14:39 +04:00
|
|
|
static void bdrv_throttle_write_timer_cb(void *opaque)
|
2011-11-03 12:57:25 +04:00
|
|
|
{
|
2013-09-02 16:14:39 +04:00
|
|
|
BlockDriverState *bs = opaque;
|
|
|
|
qemu_co_enter_next(&bs->throttled_reqs[1]);
|
2011-11-03 12:57:25 +04:00
|
|
|
}
|
|
|
|
|
2013-09-02 16:14:39 +04:00
|
|
|
/* should be called before bdrv_set_io_limits if a limit is set */
|
|
|
|
void bdrv_io_limits_enable(BlockDriverState *bs)
|
|
|
|
{
|
|
|
|
assert(!bs->io_limits_enabled);
|
|
|
|
throttle_init(&bs->throttle_state,
|
2014-05-14 18:22:45 +04:00
|
|
|
bdrv_get_aio_context(bs),
|
2013-09-02 16:14:39 +04:00
|
|
|
QEMU_CLOCK_VIRTUAL,
|
|
|
|
bdrv_throttle_read_timer_cb,
|
|
|
|
bdrv_throttle_write_timer_cb,
|
|
|
|
bs);
|
|
|
|
bs->io_limits_enabled = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* This function makes an IO wait if needed
|
|
|
|
*
|
|
|
|
* @nb_sectors: the number of sectors of the IO
|
|
|
|
* @is_write: is the IO a write
|
|
|
|
*/
|
2011-11-08 09:00:14 +04:00
|
|
|
static void bdrv_io_limits_intercept(BlockDriverState *bs,
|
2014-01-16 16:29:10 +04:00
|
|
|
unsigned int bytes,
|
2013-09-02 16:14:39 +04:00
|
|
|
bool is_write)
|
2011-11-08 09:00:14 +04:00
|
|
|
{
|
2013-09-02 16:14:39 +04:00
|
|
|
/* does this io must wait */
|
|
|
|
bool must_wait = throttle_schedule_timer(&bs->throttle_state, is_write);
|
2011-11-08 09:00:14 +04:00
|
|
|
|
2013-09-02 16:14:39 +04:00
|
|
|
/* if must wait or any request of this type throttled queue the IO */
|
|
|
|
if (must_wait ||
|
|
|
|
!qemu_co_queue_empty(&bs->throttled_reqs[is_write])) {
|
|
|
|
qemu_co_queue_wait(&bs->throttled_reqs[is_write]);
|
2011-11-08 09:00:14 +04:00
|
|
|
}
|
|
|
|
|
2013-09-02 16:14:39 +04:00
|
|
|
/* the IO will be executed, do the accounting */
|
2014-01-16 16:29:10 +04:00
|
|
|
throttle_account(&bs->throttle_state, is_write, bytes);
|
|
|
|
|
2011-11-08 09:00:14 +04:00
|
|
|
|
2013-09-02 16:14:39 +04:00
|
|
|
/* if the next request must wait -> do nothing */
|
|
|
|
if (throttle_schedule_timer(&bs->throttle_state, is_write)) {
|
|
|
|
return;
|
2011-11-08 09:00:14 +04:00
|
|
|
}
|
|
|
|
|
2013-09-02 16:14:39 +04:00
|
|
|
/* else queue next request for execution */
|
|
|
|
qemu_co_queue_next(&bs->throttled_reqs[is_write]);
|
2011-11-08 09:00:14 +04:00
|
|
|
}
|
|
|
|
|
2013-11-28 13:23:32 +04:00
|
|
|
size_t bdrv_opt_mem_align(BlockDriverState *bs)
|
|
|
|
{
|
|
|
|
if (!bs || !bs->drv) {
|
|
|
|
/* 4k should be on the safe side */
|
|
|
|
return 4096;
|
|
|
|
}
|
|
|
|
|
|
|
|
return bs->bl.opt_mem_alignment;
|
|
|
|
}
|
|
|
|
|
2010-12-09 14:53:00 +03:00
|
|
|
/* check if the path starts with "<protocol>:" */
|
2014-12-03 16:57:22 +03:00
|
|
|
int path_has_protocol(const char *path)
|
2010-12-09 14:53:00 +03:00
|
|
|
{
|
2012-05-08 18:51:48 +04:00
|
|
|
const char *p;
|
|
|
|
|
2010-12-09 14:53:00 +03:00
|
|
|
#ifdef _WIN32
|
|
|
|
if (is_windows_drive(path) ||
|
|
|
|
is_windows_drive_prefix(path)) {
|
|
|
|
return 0;
|
|
|
|
}
|
2012-05-08 18:51:48 +04:00
|
|
|
p = path + strcspn(path, ":/\\");
|
|
|
|
#else
|
|
|
|
p = path + strcspn(path, ":/");
|
2010-12-09 14:53:00 +03:00
|
|
|
#endif
|
|
|
|
|
2012-05-08 18:51:48 +04:00
|
|
|
return *p == ':';
|
2010-12-09 14:53:00 +03:00
|
|
|
}
|
|
|
|
|
2006-08-01 20:21:11 +04:00
|
|
|
int path_is_absolute(const char *path)
|
2005-10-30 21:30:10 +03:00
|
|
|
{
|
2007-01-07 21:22:37 +03:00
|
|
|
#ifdef _WIN32
|
|
|
|
/* specific case for names like: "\\.\d:" */
|
2012-05-08 18:51:47 +04:00
|
|
|
if (is_windows_drive(path) || is_windows_drive_prefix(path)) {
|
2007-01-07 21:22:37 +03:00
|
|
|
return 1;
|
2012-05-08 18:51:47 +04:00
|
|
|
}
|
|
|
|
return (*path == '/' || *path == '\\');
|
2007-01-07 20:27:07 +03:00
|
|
|
#else
|
2012-05-08 18:51:47 +04:00
|
|
|
return (*path == '/');
|
2007-01-07 20:27:07 +03:00
|
|
|
#endif
|
2005-10-30 21:30:10 +03:00
|
|
|
}
|
|
|
|
|
2006-08-01 20:21:11 +04:00
|
|
|
/* if filename is absolute, just copy it to dest. Otherwise, build a
|
|
|
|
path to it by considering it is relative to base_path. URL are
|
|
|
|
supported. */
|
|
|
|
void path_combine(char *dest, int dest_size,
|
|
|
|
const char *base_path,
|
|
|
|
const char *filename)
|
2005-10-30 21:30:10 +03:00
|
|
|
{
|
2006-08-01 20:21:11 +04:00
|
|
|
const char *p, *p1;
|
|
|
|
int len;
|
|
|
|
|
|
|
|
if (dest_size <= 0)
|
|
|
|
return;
|
|
|
|
if (path_is_absolute(filename)) {
|
|
|
|
pstrcpy(dest, dest_size, filename);
|
|
|
|
} else {
|
|
|
|
p = strchr(base_path, ':');
|
|
|
|
if (p)
|
|
|
|
p++;
|
|
|
|
else
|
|
|
|
p = base_path;
|
2007-01-07 20:27:07 +03:00
|
|
|
p1 = strrchr(base_path, '/');
|
|
|
|
#ifdef _WIN32
|
|
|
|
{
|
|
|
|
const char *p2;
|
|
|
|
p2 = strrchr(base_path, '\\');
|
|
|
|
if (!p1 || p2 > p1)
|
|
|
|
p1 = p2;
|
|
|
|
}
|
|
|
|
#endif
|
2006-08-01 20:21:11 +04:00
|
|
|
if (p1)
|
|
|
|
p1++;
|
|
|
|
else
|
|
|
|
p1 = base_path;
|
|
|
|
if (p1 > p)
|
|
|
|
p = p1;
|
|
|
|
len = p - base_path;
|
|
|
|
if (len > dest_size - 1)
|
|
|
|
len = dest_size - 1;
|
|
|
|
memcpy(dest, base_path, len);
|
|
|
|
dest[len] = '\0';
|
|
|
|
pstrcat(dest, dest_size, filename);
|
2005-10-30 21:30:10 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-11-26 19:20:25 +03:00
|
|
|
void bdrv_get_full_backing_filename_from_filename(const char *backed,
|
|
|
|
const char *backing,
|
2014-11-26 19:20:26 +03:00
|
|
|
char *dest, size_t sz,
|
|
|
|
Error **errp)
|
2012-05-08 18:51:50 +04:00
|
|
|
{
|
2014-11-26 19:20:26 +03:00
|
|
|
if (backing[0] == '\0' || path_has_protocol(backing) ||
|
|
|
|
path_is_absolute(backing))
|
|
|
|
{
|
2014-11-26 19:20:25 +03:00
|
|
|
pstrcpy(dest, sz, backing);
|
2014-11-26 19:20:26 +03:00
|
|
|
} else if (backed[0] == '\0' || strstart(backed, "json:", NULL)) {
|
|
|
|
error_setg(errp, "Cannot use relative backing file names for '%s'",
|
|
|
|
backed);
|
2012-05-08 18:51:50 +04:00
|
|
|
} else {
|
2014-11-26 19:20:25 +03:00
|
|
|
path_combine(dest, sz, backed, backing);
|
2012-05-08 18:51:50 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-11-26 19:20:26 +03:00
|
|
|
void bdrv_get_full_backing_filename(BlockDriverState *bs, char *dest, size_t sz,
|
|
|
|
Error **errp)
|
2014-11-26 19:20:25 +03:00
|
|
|
{
|
2014-11-26 19:20:26 +03:00
|
|
|
char *backed = bs->exact_filename[0] ? bs->exact_filename : bs->filename;
|
|
|
|
|
|
|
|
bdrv_get_full_backing_filename_from_filename(backed, bs->backing_file,
|
|
|
|
dest, sz, errp);
|
2014-11-26 19:20:25 +03:00
|
|
|
}
|
|
|
|
|
2009-05-10 02:03:42 +04:00
|
|
|
void bdrv_register(BlockDriver *bdrv)
|
2004-08-02 01:59:26 +04:00
|
|
|
{
|
2011-10-14 00:09:28 +04:00
|
|
|
/* Block drivers without coroutine functions need emulation */
|
|
|
|
if (!bdrv->bdrv_co_readv) {
|
2011-07-15 15:50:26 +04:00
|
|
|
bdrv->bdrv_co_readv = bdrv_co_readv_em;
|
|
|
|
bdrv->bdrv_co_writev = bdrv_co_writev_em;
|
|
|
|
|
2011-10-14 00:09:31 +04:00
|
|
|
/* bdrv_co_readv_em()/brdv_co_writev_em() work in terms of aio, so if
|
|
|
|
* the block driver lacks aio we need to emulate that too.
|
|
|
|
*/
|
2011-07-15 15:50:26 +04:00
|
|
|
if (!bdrv->bdrv_aio_readv) {
|
|
|
|
/* add AIO emulation layer */
|
|
|
|
bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
|
|
|
|
bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
|
|
|
|
}
|
2006-08-01 20:21:11 +04:00
|
|
|
}
|
2009-09-04 21:01:49 +04:00
|
|
|
|
2010-04-13 13:29:33 +04:00
|
|
|
QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
|
2004-08-02 01:59:26 +04:00
|
|
|
}
|
2004-03-15 00:38:54 +03:00
|
|
|
|
2014-10-07 15:59:12 +04:00
|
|
|
BlockDriverState *bdrv_new_root(void)
|
2004-03-15 00:38:54 +03:00
|
|
|
{
|
2014-10-07 15:59:12 +04:00
|
|
|
BlockDriverState *bs = bdrv_new();
|
2014-10-07 15:59:03 +04:00
|
|
|
|
|
|
|
QTAILQ_INSERT_TAIL(&bdrv_states, bs, device_list);
|
|
|
|
return bs;
|
|
|
|
}
|
|
|
|
|
|
|
|
BlockDriverState *bdrv_new(void)
|
|
|
|
{
|
|
|
|
BlockDriverState *bs;
|
|
|
|
int i;
|
|
|
|
|
block: Use g_new() & friends where that makes obvious sense
g_new(T, n) is neater than g_malloc(sizeof(T) * n). It's also safer,
for two reasons. One, it catches multiplication overflowing size_t.
Two, it returns T * rather than void *, which lets the compiler catch
more type errors.
Patch created with Coccinelle, with two manual changes on top:
* Add const to bdrv_iterate_format() to keep the types straight
* Convert the allocation in bdrv_drop_intermediate(), which Coccinelle
inexplicably misses
Coccinelle semantic patch:
@@
type T;
@@
-g_malloc(sizeof(T))
+g_new(T, 1)
@@
type T;
@@
-g_try_malloc(sizeof(T))
+g_try_new(T, 1)
@@
type T;
@@
-g_malloc0(sizeof(T))
+g_new0(T, 1)
@@
type T;
@@
-g_try_malloc0(sizeof(T))
+g_try_new0(T, 1)
@@
type T;
expression n;
@@
-g_malloc(sizeof(T) * (n))
+g_new(T, n)
@@
type T;
expression n;
@@
-g_try_malloc(sizeof(T) * (n))
+g_try_new(T, n)
@@
type T;
expression n;
@@
-g_malloc0(sizeof(T) * (n))
+g_new0(T, n)
@@
type T;
expression n;
@@
-g_try_malloc0(sizeof(T) * (n))
+g_try_new0(T, n)
@@
type T;
expression p, n;
@@
-g_realloc(p, sizeof(T) * (n))
+g_renew(T, p, n)
@@
type T;
expression p, n;
@@
-g_try_realloc(p, sizeof(T) * (n))
+g_try_renew(T, p, n)
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: Jeff Cody <jcody@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2014-08-19 12:31:08 +04:00
|
|
|
bs = g_new0(BlockDriverState, 1);
|
2013-11-13 14:29:43 +04:00
|
|
|
QLIST_INIT(&bs->dirty_bitmaps);
|
2014-05-23 17:29:42 +04:00
|
|
|
for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
|
|
|
|
QLIST_INIT(&bs->op_blockers[i]);
|
|
|
|
}
|
2011-09-27 00:43:50 +04:00
|
|
|
bdrv_iostatus_disable(bs);
|
2012-08-23 13:20:36 +04:00
|
|
|
notifier_list_init(&bs->close_notifiers);
|
2013-06-24 19:13:10 +04:00
|
|
|
notifier_with_return_list_init(&bs->before_write_notifiers);
|
2013-09-02 16:14:39 +04:00
|
|
|
qemu_co_queue_init(&bs->throttled_reqs[0]);
|
|
|
|
qemu_co_queue_init(&bs->throttled_reqs[1]);
|
2013-08-23 05:14:46 +04:00
|
|
|
bs->refcnt = 1;
|
2014-05-08 18:34:37 +04:00
|
|
|
bs->aio_context = qemu_get_aio_context();
|
2012-08-23 13:20:36 +04:00
|
|
|
|
2004-03-15 00:38:54 +03:00
|
|
|
return bs;
|
|
|
|
}
|
|
|
|
|
2012-08-23 13:20:36 +04:00
|
|
|
void bdrv_add_close_notifier(BlockDriverState *bs, Notifier *notify)
|
|
|
|
{
|
|
|
|
notifier_list_add(&bs->close_notifiers, notify);
|
|
|
|
}
|
|
|
|
|
2004-08-02 01:59:26 +04:00
|
|
|
BlockDriver *bdrv_find_format(const char *format_name)
|
|
|
|
{
|
|
|
|
BlockDriver *drv1;
|
2010-04-13 13:29:33 +04:00
|
|
|
QLIST_FOREACH(drv1, &bdrv_drivers, list) {
|
|
|
|
if (!strcmp(drv1->format_name, format_name)) {
|
2004-08-02 01:59:26 +04:00
|
|
|
return drv1;
|
2010-04-13 13:29:33 +04:00
|
|
|
}
|
2004-08-02 01:59:26 +04:00
|
|
|
}
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2013-05-29 15:35:40 +04:00
|
|
|
static int bdrv_is_whitelisted(BlockDriver *drv, bool read_only)
|
2009-10-27 20:41:44 +03:00
|
|
|
{
|
2013-05-29 15:35:40 +04:00
|
|
|
static const char *whitelist_rw[] = {
|
|
|
|
CONFIG_BDRV_RW_WHITELIST
|
|
|
|
};
|
|
|
|
static const char *whitelist_ro[] = {
|
|
|
|
CONFIG_BDRV_RO_WHITELIST
|
2009-10-27 20:41:44 +03:00
|
|
|
};
|
|
|
|
const char **p;
|
|
|
|
|
2013-05-29 15:35:40 +04:00
|
|
|
if (!whitelist_rw[0] && !whitelist_ro[0]) {
|
2009-10-27 20:41:44 +03:00
|
|
|
return 1; /* no whitelist, anything goes */
|
2013-05-29 15:35:40 +04:00
|
|
|
}
|
2009-10-27 20:41:44 +03:00
|
|
|
|
2013-05-29 15:35:40 +04:00
|
|
|
for (p = whitelist_rw; *p; p++) {
|
2009-10-27 20:41:44 +03:00
|
|
|
if (!strcmp(drv->format_name, *p)) {
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
}
|
2013-05-29 15:35:40 +04:00
|
|
|
if (read_only) {
|
|
|
|
for (p = whitelist_ro; *p; p++) {
|
|
|
|
if (!strcmp(drv->format_name, *p)) {
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2009-10-27 20:41:44 +03:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2013-05-29 15:35:40 +04:00
|
|
|
BlockDriver *bdrv_find_whitelisted_format(const char *format_name,
|
|
|
|
bool read_only)
|
2009-10-27 20:41:44 +03:00
|
|
|
{
|
|
|
|
BlockDriver *drv = bdrv_find_format(format_name);
|
2013-05-29 15:35:40 +04:00
|
|
|
return drv && bdrv_is_whitelisted(drv, read_only) ? drv : NULL;
|
2009-10-27 20:41:44 +03:00
|
|
|
}
|
|
|
|
|
2012-05-07 12:50:42 +04:00
|
|
|
typedef struct CreateCo {
|
|
|
|
BlockDriver *drv;
|
|
|
|
char *filename;
|
2014-06-05 13:20:51 +04:00
|
|
|
QemuOpts *opts;
|
2012-05-07 12:50:42 +04:00
|
|
|
int ret;
|
2013-09-06 19:14:26 +04:00
|
|
|
Error *err;
|
2012-05-07 12:50:42 +04:00
|
|
|
} CreateCo;
|
|
|
|
|
|
|
|
static void coroutine_fn bdrv_create_co_entry(void *opaque)
|
|
|
|
{
|
2013-09-06 19:14:26 +04:00
|
|
|
Error *local_err = NULL;
|
|
|
|
int ret;
|
|
|
|
|
2012-05-07 12:50:42 +04:00
|
|
|
CreateCo *cco = opaque;
|
|
|
|
assert(cco->drv);
|
|
|
|
|
2014-06-05 13:21:11 +04:00
|
|
|
ret = cco->drv->bdrv_create(cco->filename, cco->opts, &local_err);
|
2014-01-30 18:07:28 +04:00
|
|
|
if (local_err) {
|
2013-09-06 19:14:26 +04:00
|
|
|
error_propagate(&cco->err, local_err);
|
|
|
|
}
|
|
|
|
cco->ret = ret;
|
2012-05-07 12:50:42 +04:00
|
|
|
}
|
|
|
|
|
2009-05-18 18:42:10 +04:00
|
|
|
int bdrv_create(BlockDriver *drv, const char* filename,
|
2014-06-05 13:20:51 +04:00
|
|
|
QemuOpts *opts, Error **errp)
|
2004-08-02 01:59:26 +04:00
|
|
|
{
|
2012-05-07 12:50:42 +04:00
|
|
|
int ret;
|
|
|
|
|
|
|
|
Coroutine *co;
|
|
|
|
CreateCo cco = {
|
|
|
|
.drv = drv,
|
|
|
|
.filename = g_strdup(filename),
|
2014-06-05 13:20:51 +04:00
|
|
|
.opts = opts,
|
2012-05-07 12:50:42 +04:00
|
|
|
.ret = NOT_DONE,
|
2013-09-06 19:14:26 +04:00
|
|
|
.err = NULL,
|
2012-05-07 12:50:42 +04:00
|
|
|
};
|
|
|
|
|
2014-06-05 13:21:11 +04:00
|
|
|
if (!drv->bdrv_create) {
|
2013-09-06 19:14:26 +04:00
|
|
|
error_setg(errp, "Driver '%s' does not support image creation", drv->format_name);
|
2012-10-17 23:45:25 +04:00
|
|
|
ret = -ENOTSUP;
|
|
|
|
goto out;
|
2012-05-07 12:50:42 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
if (qemu_in_coroutine()) {
|
|
|
|
/* Fast-path if already in coroutine context */
|
|
|
|
bdrv_create_co_entry(&cco);
|
|
|
|
} else {
|
|
|
|
co = qemu_coroutine_create(bdrv_create_co_entry);
|
|
|
|
qemu_coroutine_enter(co, &cco);
|
|
|
|
while (cco.ret == NOT_DONE) {
|
2014-07-07 17:18:01 +04:00
|
|
|
aio_poll(qemu_get_aio_context(), true);
|
2012-05-07 12:50:42 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
ret = cco.ret;
|
2013-09-06 19:14:26 +04:00
|
|
|
if (ret < 0) {
|
2014-01-30 18:07:28 +04:00
|
|
|
if (cco.err) {
|
2013-09-06 19:14:26 +04:00
|
|
|
error_propagate(errp, cco.err);
|
|
|
|
} else {
|
|
|
|
error_setg_errno(errp, -ret, "Could not create image");
|
|
|
|
}
|
|
|
|
}
|
2009-05-18 18:42:10 +04:00
|
|
|
|
2012-10-17 23:45:25 +04:00
|
|
|
out:
|
|
|
|
g_free(cco.filename);
|
2012-05-07 12:50:42 +04:00
|
|
|
return ret;
|
2004-08-02 01:59:26 +04:00
|
|
|
}
|
|
|
|
|
2014-06-05 13:21:11 +04:00
|
|
|
int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp)
|
2010-04-08 00:30:24 +04:00
|
|
|
{
|
|
|
|
BlockDriver *drv;
|
2013-09-06 19:14:26 +04:00
|
|
|
Error *local_err = NULL;
|
|
|
|
int ret;
|
2010-04-08 00:30:24 +04:00
|
|
|
|
2015-02-05 21:58:12 +03:00
|
|
|
drv = bdrv_find_protocol(filename, true, errp);
|
2010-04-08 00:30:24 +04:00
|
|
|
if (drv == NULL) {
|
2010-11-30 18:14:14 +03:00
|
|
|
return -ENOENT;
|
2010-04-08 00:30:24 +04:00
|
|
|
}
|
|
|
|
|
2014-06-05 13:21:11 +04:00
|
|
|
ret = bdrv_create(drv, filename, opts, &local_err);
|
2014-01-30 18:07:28 +04:00
|
|
|
if (local_err) {
|
2013-09-06 19:14:26 +04:00
|
|
|
error_propagate(errp, local_err);
|
|
|
|
}
|
|
|
|
return ret;
|
2010-04-08 00:30:24 +04:00
|
|
|
}
|
|
|
|
|
2014-07-16 19:48:16 +04:00
|
|
|
void bdrv_refresh_limits(BlockDriverState *bs, Error **errp)
|
2013-12-11 22:26:16 +04:00
|
|
|
{
|
|
|
|
BlockDriver *drv = bs->drv;
|
2014-07-16 19:48:16 +04:00
|
|
|
Error *local_err = NULL;
|
2013-12-11 22:26:16 +04:00
|
|
|
|
|
|
|
memset(&bs->bl, 0, sizeof(bs->bl));
|
|
|
|
|
2013-12-11 22:50:32 +04:00
|
|
|
if (!drv) {
|
2014-07-16 19:48:16 +04:00
|
|
|
return;
|
2013-12-11 22:50:32 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Take some limits from the children as a default */
|
|
|
|
if (bs->file) {
|
2014-07-16 19:48:16 +04:00
|
|
|
bdrv_refresh_limits(bs->file, &local_err);
|
|
|
|
if (local_err) {
|
|
|
|
error_propagate(errp, local_err);
|
|
|
|
return;
|
|
|
|
}
|
2013-12-11 22:50:32 +04:00
|
|
|
bs->bl.opt_transfer_length = bs->file->bl.opt_transfer_length;
|
2014-10-27 12:18:44 +03:00
|
|
|
bs->bl.max_transfer_length = bs->file->bl.max_transfer_length;
|
2013-11-28 13:23:32 +04:00
|
|
|
bs->bl.opt_mem_alignment = bs->file->bl.opt_mem_alignment;
|
|
|
|
} else {
|
|
|
|
bs->bl.opt_mem_alignment = 512;
|
2013-12-11 22:50:32 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
if (bs->backing_hd) {
|
2014-07-16 19:48:16 +04:00
|
|
|
bdrv_refresh_limits(bs->backing_hd, &local_err);
|
|
|
|
if (local_err) {
|
|
|
|
error_propagate(errp, local_err);
|
|
|
|
return;
|
|
|
|
}
|
2013-12-11 22:50:32 +04:00
|
|
|
bs->bl.opt_transfer_length =
|
|
|
|
MAX(bs->bl.opt_transfer_length,
|
|
|
|
bs->backing_hd->bl.opt_transfer_length);
|
2014-10-27 12:18:44 +03:00
|
|
|
bs->bl.max_transfer_length =
|
|
|
|
MIN_NON_ZERO(bs->bl.max_transfer_length,
|
|
|
|
bs->backing_hd->bl.max_transfer_length);
|
2013-11-28 13:23:32 +04:00
|
|
|
bs->bl.opt_mem_alignment =
|
|
|
|
MAX(bs->bl.opt_mem_alignment,
|
|
|
|
bs->backing_hd->bl.opt_mem_alignment);
|
2013-12-11 22:50:32 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Then let the driver override it */
|
|
|
|
if (drv->bdrv_refresh_limits) {
|
2014-07-16 19:48:16 +04:00
|
|
|
drv->bdrv_refresh_limits(bs, errp);
|
2013-12-11 22:26:16 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-02-16 14:47:54 +03:00
|
|
|
/**
|
|
|
|
* Try to get @bs's logical and physical block size.
|
|
|
|
* On success, store them in @bsz struct and return 0.
|
|
|
|
* On failure return -errno.
|
|
|
|
* @bs must not be empty.
|
|
|
|
*/
|
|
|
|
int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
|
|
|
|
{
|
|
|
|
BlockDriver *drv = bs->drv;
|
|
|
|
|
|
|
|
if (drv && drv->bdrv_probe_blocksizes) {
|
|
|
|
return drv->bdrv_probe_blocksizes(bs, bsz);
|
|
|
|
}
|
|
|
|
|
|
|
|
return -ENOTSUP;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Try to get @bs's geometry (cyls, heads, sectors).
|
|
|
|
* On success, store them in @geo struct and return 0.
|
|
|
|
* On failure return -errno.
|
|
|
|
* @bs must not be empty.
|
|
|
|
*/
|
|
|
|
int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo)
|
|
|
|
{
|
|
|
|
BlockDriver *drv = bs->drv;
|
|
|
|
|
|
|
|
if (drv && drv->bdrv_probe_geometry) {
|
|
|
|
return drv->bdrv_probe_geometry(bs, geo);
|
|
|
|
}
|
|
|
|
|
|
|
|
return -ENOTSUP;
|
|
|
|
}
|
|
|
|
|
2012-05-28 11:27:54 +04:00
|
|
|
/*
|
|
|
|
* Create a uniquely-named empty temporary file.
|
|
|
|
* Return 0 upon success, otherwise a negative errno value.
|
|
|
|
*/
|
|
|
|
int get_tmp_filename(char *filename, int size)
|
2004-08-04 01:14:23 +04:00
|
|
|
{
|
2012-05-28 11:27:54 +04:00
|
|
|
#ifdef _WIN32
|
2007-01-07 20:27:07 +03:00
|
|
|
char temp_dir[MAX_PATH];
|
2012-05-28 11:27:54 +04:00
|
|
|
/* GetTempFileName requires that its output buffer (4th param)
|
|
|
|
have length MAX_PATH or greater. */
|
|
|
|
assert(size >= MAX_PATH);
|
|
|
|
return (GetTempPath(MAX_PATH, temp_dir)
|
|
|
|
&& GetTempFileName(temp_dir, "qem", 0, filename)
|
|
|
|
? 0 : -GetLastError());
|
2004-08-04 01:14:23 +04:00
|
|
|
#else
|
2004-04-01 03:37:16 +04:00
|
|
|
int fd;
|
2008-09-14 10:45:34 +04:00
|
|
|
const char *tmpdir;
|
2008-03-10 03:05:34 +03:00
|
|
|
tmpdir = getenv("TMPDIR");
|
2014-02-26 13:42:37 +04:00
|
|
|
if (!tmpdir) {
|
|
|
|
tmpdir = "/var/tmp";
|
|
|
|
}
|
2012-05-28 11:27:54 +04:00
|
|
|
if (snprintf(filename, size, "%s/vl.XXXXXX", tmpdir) >= size) {
|
|
|
|
return -EOVERFLOW;
|
|
|
|
}
|
2004-08-02 01:59:26 +04:00
|
|
|
fd = mkstemp(filename);
|
2012-09-05 17:26:22 +04:00
|
|
|
if (fd < 0) {
|
|
|
|
return -errno;
|
|
|
|
}
|
|
|
|
if (close(fd) != 0) {
|
|
|
|
unlink(filename);
|
2012-05-28 11:27:54 +04:00
|
|
|
return -errno;
|
|
|
|
}
|
|
|
|
return 0;
|
2004-08-04 01:14:23 +04:00
|
|
|
#endif
|
2012-05-28 11:27:54 +04:00
|
|
|
}
|
2003-06-30 14:03:06 +04:00
|
|
|
|
2010-04-08 00:30:24 +04:00
|
|
|
/*
|
|
|
|
* Detect host devices. By convention, /dev/cdrom[N] is always
|
|
|
|
* recognized as a host CDROM.
|
|
|
|
*/
|
|
|
|
static BlockDriver *find_hdev_driver(const char *filename)
|
|
|
|
{
|
|
|
|
int score_max = 0, score;
|
|
|
|
BlockDriver *drv = NULL, *d;
|
|
|
|
|
|
|
|
QLIST_FOREACH(d, &bdrv_drivers, list) {
|
|
|
|
if (d->bdrv_probe_device) {
|
|
|
|
score = d->bdrv_probe_device(filename);
|
|
|
|
if (score > score_max) {
|
|
|
|
score_max = score;
|
|
|
|
drv = d;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return drv;
|
|
|
|
}
|
|
|
|
|
2013-07-10 17:47:39 +04:00
|
|
|
BlockDriver *bdrv_find_protocol(const char *filename,
|
2015-02-05 21:58:12 +03:00
|
|
|
bool allow_protocol_prefix,
|
|
|
|
Error **errp)
|
2006-08-01 20:21:11 +04:00
|
|
|
{
|
|
|
|
BlockDriver *drv1;
|
|
|
|
char protocol[128];
|
2009-07-02 17:12:26 +04:00
|
|
|
int len;
|
2006-08-01 20:21:11 +04:00
|
|
|
const char *p;
|
2006-08-19 15:45:59 +04:00
|
|
|
|
2010-04-14 16:17:38 +04:00
|
|
|
/* TODO Drivers without bdrv_file_open must be specified explicitly */
|
|
|
|
|
2010-06-23 14:25:17 +04:00
|
|
|
/*
|
|
|
|
* XXX(hch): we really should not let host device detection
|
|
|
|
* override an explicit protocol specification, but moving this
|
|
|
|
* later breaks access to device names with colons in them.
|
|
|
|
* Thanks to the brain-dead persistent naming schemes on udev-
|
|
|
|
* based Linux systems those actually are quite common.
|
|
|
|
*/
|
|
|
|
drv1 = find_hdev_driver(filename);
|
|
|
|
if (drv1) {
|
|
|
|
return drv1;
|
|
|
|
}
|
|
|
|
|
2013-07-10 17:47:39 +04:00
|
|
|
if (!path_has_protocol(filename) || !allow_protocol_prefix) {
|
2014-12-02 20:32:42 +03:00
|
|
|
return &bdrv_file;
|
2010-04-08 00:30:24 +04:00
|
|
|
}
|
2013-07-10 17:47:39 +04:00
|
|
|
|
2010-12-09 14:53:00 +03:00
|
|
|
p = strchr(filename, ':');
|
|
|
|
assert(p != NULL);
|
2009-07-02 17:12:26 +04:00
|
|
|
len = p - filename;
|
|
|
|
if (len > sizeof(protocol) - 1)
|
|
|
|
len = sizeof(protocol) - 1;
|
|
|
|
memcpy(protocol, filename, len);
|
|
|
|
protocol[len] = '\0';
|
2010-04-13 13:29:33 +04:00
|
|
|
QLIST_FOREACH(drv1, &bdrv_drivers, list) {
|
2007-09-17 01:08:06 +04:00
|
|
|
if (drv1->protocol_name &&
|
2010-04-13 13:29:33 +04:00
|
|
|
!strcmp(drv1->protocol_name, protocol)) {
|
2006-08-01 20:21:11 +04:00
|
|
|
return drv1;
|
2010-04-13 13:29:33 +04:00
|
|
|
}
|
2006-08-01 20:21:11 +04:00
|
|
|
}
|
2015-02-05 21:58:12 +03:00
|
|
|
|
|
|
|
error_setg(errp, "Unknown protocol '%s'", protocol);
|
2006-08-01 20:21:11 +04:00
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2014-11-20 18:27:10 +03:00
|
|
|
/*
|
|
|
|
* Guess image format by probing its contents.
|
|
|
|
* This is not a good idea when your image is raw (CVE-2008-2004), but
|
|
|
|
* we do it anyway for backward compatibility.
|
|
|
|
*
|
|
|
|
* @buf contains the image's first @buf_size bytes.
|
2014-11-20 18:27:11 +03:00
|
|
|
* @buf_size is the buffer size in bytes (generally BLOCK_PROBE_BUF_SIZE,
|
|
|
|
* but can be smaller if the image file is smaller)
|
2014-11-20 18:27:10 +03:00
|
|
|
* @filename is its filename.
|
|
|
|
*
|
|
|
|
* For all block drivers, call the bdrv_probe() method to get its
|
|
|
|
* probing score.
|
|
|
|
* Return the first block driver with the highest probing score.
|
|
|
|
*/
|
raw: Prohibit dangerous writes for probed images
If the user neglects to specify the image format, QEMU probes the
image to guess it automatically, for convenience.
Relying on format probing is insecure for raw images (CVE-2008-2004).
If the guest writes a suitable header to the device, the next probe
will recognize a format chosen by the guest. A malicious guest can
abuse this to gain access to host files, e.g. by crafting a QCOW2
header with backing file /etc/shadow.
Commit 1e72d3b (April 2008) provided -drive parameter format to let
users disable probing. Commit f965509 (March 2009) extended QCOW2 to
optionally store the backing file format, to let users disable backing
file probing. QED has had a flag to suppress probing since the
beginning (2010), set whenever a raw backing file is assigned.
All of these additions that allow to avoid format probing have to be
specified explicitly. The default still allows the attack.
In order to fix this, commit 79368c8 (July 2010) put probed raw images
in a restricted mode, in which they wouldn't be able to overwrite the
first few bytes of the image so that they would identify as a different
image. If a write to the first sector would write one of the signatures
of another driver, qemu would instead zero out the first four bytes.
This patch was later reverted in commit 8b33d9e (September 2010) because
it didn't get the handling of unaligned qiov members right.
Today's block layer that is based on coroutines and has qiov utility
functions makes it much easier to get this functionality right, so this
patch implements it.
The other differences of this patch to the old one are that it doesn't
silently write something different than the guest requested by zeroing
out some bytes (it fails the request instead) and that it doesn't
maintain a list of signatures in the raw driver (it calls the usual
probe function instead).
Note that this change doesn't introduce new breakage for false positive
cases where the guest legitimately writes data into the first sector
that matches the signatures of an image format (e.g. for nested virt):
These cases were broken before, only the failure mode changes from
corruption after the next restart (when the wrong format is probed) to
failing the problematic write request.
Also note that like in the original patch, the restrictions only apply
if the image format has been guessed by probing. Explicitly specifying a
format allows guests to write anything they like.
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-id: 1416497234-29880-8-git-send-email-kwolf@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2014-11-20 18:27:12 +03:00
|
|
|
BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size,
|
|
|
|
const char *filename)
|
2014-11-20 18:27:10 +03:00
|
|
|
{
|
|
|
|
int score_max = 0, score;
|
|
|
|
BlockDriver *drv = NULL, *d;
|
|
|
|
|
|
|
|
QLIST_FOREACH(d, &bdrv_drivers, list) {
|
|
|
|
if (d->bdrv_probe) {
|
|
|
|
score = d->bdrv_probe(buf, buf_size, filename);
|
|
|
|
if (score > score_max) {
|
|
|
|
score_max = score;
|
|
|
|
drv = d;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return drv;
|
|
|
|
}
|
|
|
|
|
2012-11-12 20:35:27 +04:00
|
|
|
static int find_image_format(BlockDriverState *bs, const char *filename,
|
2013-09-05 16:45:29 +04:00
|
|
|
BlockDriver **pdrv, Error **errp)
|
2009-06-15 15:55:19 +04:00
|
|
|
{
|
2014-11-20 18:27:10 +03:00
|
|
|
BlockDriver *drv;
|
2014-11-20 18:27:11 +03:00
|
|
|
uint8_t buf[BLOCK_PROBE_BUF_SIZE];
|
2012-11-12 20:35:27 +04:00
|
|
|
int ret = 0;
|
2010-05-17 20:45:57 +04:00
|
|
|
|
2010-06-01 20:37:31 +04:00
|
|
|
/* Return the raw BlockDriver * to scsi-generic devices or empty drives */
|
2013-01-10 18:39:27 +04:00
|
|
|
if (bs->sg || !bdrv_is_inserted(bs) || bdrv_getlength(bs) == 0) {
|
2014-12-02 20:32:42 +03:00
|
|
|
*pdrv = &bdrv_raw;
|
2010-07-21 23:51:51 +04:00
|
|
|
return ret;
|
2010-05-27 19:56:28 +04:00
|
|
|
}
|
2010-05-17 20:45:57 +04:00
|
|
|
|
2006-08-01 20:21:11 +04:00
|
|
|
ret = bdrv_pread(bs, 0, buf, sizeof(buf));
|
|
|
|
if (ret < 0) {
|
2013-09-05 16:45:29 +04:00
|
|
|
error_setg_errno(errp, -ret, "Could not read image for determining its "
|
|
|
|
"format");
|
2010-07-21 23:51:51 +04:00
|
|
|
*pdrv = NULL;
|
|
|
|
return ret;
|
2006-08-01 20:21:11 +04:00
|
|
|
}
|
|
|
|
|
2014-11-20 18:27:10 +03:00
|
|
|
drv = bdrv_probe_all(buf, ret, filename);
|
2010-07-21 23:51:51 +04:00
|
|
|
if (!drv) {
|
2013-09-05 16:45:29 +04:00
|
|
|
error_setg(errp, "Could not determine image format: No compatible "
|
|
|
|
"driver found");
|
2010-07-21 23:51:51 +04:00
|
|
|
ret = -ENOENT;
|
|
|
|
}
|
|
|
|
*pdrv = drv;
|
|
|
|
return ret;
|
2004-08-02 01:59:26 +04:00
|
|
|
}
|
|
|
|
|
2010-04-19 19:56:41 +04:00
|
|
|
/**
|
|
|
|
* Set the current 'total_sectors' value
|
2014-06-26 15:23:17 +04:00
|
|
|
* Return 0 on success, -errno on error.
|
2010-04-19 19:56:41 +04:00
|
|
|
*/
|
|
|
|
static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
|
|
|
|
{
|
|
|
|
BlockDriver *drv = bs->drv;
|
|
|
|
|
2010-05-17 20:46:04 +04:00
|
|
|
/* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
|
|
|
|
if (bs->sg)
|
|
|
|
return 0;
|
|
|
|
|
2010-04-19 19:56:41 +04:00
|
|
|
/* query actual device if possible, otherwise just trust the hint */
|
|
|
|
if (drv->bdrv_getlength) {
|
|
|
|
int64_t length = drv->bdrv_getlength(bs);
|
|
|
|
if (length < 0) {
|
|
|
|
return length;
|
|
|
|
}
|
2013-11-06 15:48:06 +04:00
|
|
|
hint = DIV_ROUND_UP(length, BDRV_SECTOR_SIZE);
|
2010-04-19 19:56:41 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
bs->total_sectors = hint;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2013-02-08 17:06:11 +04:00
|
|
|
/**
|
|
|
|
* Set open flags for a given discard mode
|
|
|
|
*
|
|
|
|
* Return 0 on success, -1 if the discard mode was invalid.
|
|
|
|
*/
|
|
|
|
int bdrv_parse_discard_flags(const char *mode, int *flags)
|
|
|
|
{
|
|
|
|
*flags &= ~BDRV_O_UNMAP;
|
|
|
|
|
|
|
|
if (!strcmp(mode, "off") || !strcmp(mode, "ignore")) {
|
|
|
|
/* do nothing */
|
|
|
|
} else if (!strcmp(mode, "on") || !strcmp(mode, "unmap")) {
|
|
|
|
*flags |= BDRV_O_UNMAP;
|
|
|
|
} else {
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2011-08-04 15:26:51 +04:00
|
|
|
/**
|
|
|
|
* Set open flags for a given cache mode
|
|
|
|
*
|
|
|
|
* Return 0 on success, -1 if the cache mode was invalid.
|
|
|
|
*/
|
|
|
|
int bdrv_parse_cache_flags(const char *mode, int *flags)
|
|
|
|
{
|
|
|
|
*flags &= ~BDRV_O_CACHE_MASK;
|
|
|
|
|
|
|
|
if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
|
|
|
|
*flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
|
2011-08-04 15:26:52 +04:00
|
|
|
} else if (!strcmp(mode, "directsync")) {
|
|
|
|
*flags |= BDRV_O_NOCACHE;
|
2011-08-04 15:26:51 +04:00
|
|
|
} else if (!strcmp(mode, "writeback")) {
|
|
|
|
*flags |= BDRV_O_CACHE_WB;
|
|
|
|
} else if (!strcmp(mode, "unsafe")) {
|
|
|
|
*flags |= BDRV_O_CACHE_WB;
|
|
|
|
*flags |= BDRV_O_NO_FLUSH;
|
|
|
|
} else if (!strcmp(mode, "writethrough")) {
|
|
|
|
/* this is the default */
|
|
|
|
} else {
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2011-11-28 20:08:47 +04:00
|
|
|
/**
|
|
|
|
* The copy-on-read flag is actually a reference count so multiple users may
|
|
|
|
* use the feature without worrying about clobbering its previous state.
|
|
|
|
* Copy-on-read stays enabled until all users have called to disable it.
|
|
|
|
*/
|
|
|
|
void bdrv_enable_copy_on_read(BlockDriverState *bs)
|
|
|
|
{
|
|
|
|
bs->copy_on_read++;
|
|
|
|
}
|
|
|
|
|
|
|
|
void bdrv_disable_copy_on_read(BlockDriverState *bs)
|
|
|
|
{
|
|
|
|
assert(bs->copy_on_read > 0);
|
|
|
|
bs->copy_on_read--;
|
|
|
|
}
|
|
|
|
|
2014-05-06 14:11:42 +04:00
|
|
|
/*
|
|
|
|
* Returns the flags that a temporary snapshot should get, based on the
|
|
|
|
* originally requested flags (the originally requested image will have flags
|
|
|
|
* like a backing file)
|
|
|
|
*/
|
|
|
|
static int bdrv_temp_snapshot_flags(int flags)
|
|
|
|
{
|
|
|
|
return (flags & ~BDRV_O_SNAPSHOT) | BDRV_O_TEMPORARY;
|
|
|
|
}
|
|
|
|
|
2014-04-11 23:29:52 +04:00
|
|
|
/*
|
|
|
|
* Returns the flags that bs->file should get, based on the given flags for
|
|
|
|
* the parent BDS
|
|
|
|
*/
|
|
|
|
static int bdrv_inherited_flags(int flags)
|
|
|
|
{
|
|
|
|
/* Enable protocol handling, disable format probing for bs->file */
|
|
|
|
flags |= BDRV_O_PROTOCOL;
|
|
|
|
|
|
|
|
/* Our block drivers take care to send flushes and respect unmap policy,
|
|
|
|
* so we can enable both unconditionally on lower layers. */
|
|
|
|
flags |= BDRV_O_CACHE_WB | BDRV_O_UNMAP;
|
|
|
|
|
|
|
|
/* Clear flags that only apply to the top layer */
|
2014-04-11 23:36:45 +04:00
|
|
|
flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_COPY_ON_READ);
|
2014-04-11 23:29:52 +04:00
|
|
|
|
|
|
|
return flags;
|
|
|
|
}
|
|
|
|
|
2014-04-25 15:27:34 +04:00
|
|
|
/*
|
|
|
|
* Returns the flags that bs->backing_hd should get, based on the given flags
|
|
|
|
* for the parent BDS
|
|
|
|
*/
|
|
|
|
static int bdrv_backing_flags(int flags)
|
|
|
|
{
|
|
|
|
/* backing files always opened read-only */
|
|
|
|
flags &= ~(BDRV_O_RDWR | BDRV_O_COPY_ON_READ);
|
|
|
|
|
|
|
|
/* snapshot=on is handled on the top layer */
|
2014-04-11 21:16:36 +04:00
|
|
|
flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_TEMPORARY);
|
2014-04-25 15:27:34 +04:00
|
|
|
|
|
|
|
return flags;
|
|
|
|
}
|
|
|
|
|
2012-11-12 20:05:39 +04:00
|
|
|
static int bdrv_open_flags(BlockDriverState *bs, int flags)
|
|
|
|
{
|
|
|
|
int open_flags = flags | BDRV_O_CACHE_WB;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Clear flags that are internal to the block layer before opening the
|
|
|
|
* image.
|
|
|
|
*/
|
2014-06-04 16:33:27 +04:00
|
|
|
open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_PROTOCOL);
|
2012-11-12 20:05:39 +04:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Snapshots should be writable.
|
|
|
|
*/
|
2014-04-11 21:16:36 +04:00
|
|
|
if (flags & BDRV_O_TEMPORARY) {
|
2012-11-12 20:05:39 +04:00
|
|
|
open_flags |= BDRV_O_RDWR;
|
|
|
|
}
|
|
|
|
|
|
|
|
return open_flags;
|
|
|
|
}
|
|
|
|
|
2014-01-24 17:11:52 +04:00
|
|
|
static void bdrv_assign_node_name(BlockDriverState *bs,
|
|
|
|
const char *node_name,
|
|
|
|
Error **errp)
|
2014-01-24 00:31:33 +04:00
|
|
|
{
|
|
|
|
if (!node_name) {
|
2014-01-24 17:11:52 +04:00
|
|
|
return;
|
2014-01-24 00:31:33 +04:00
|
|
|
}
|
|
|
|
|
2014-09-25 11:54:02 +04:00
|
|
|
/* Check for empty string or invalid characters */
|
2014-09-30 15:59:30 +04:00
|
|
|
if (!id_wellformed(node_name)) {
|
2014-09-25 11:54:02 +04:00
|
|
|
error_setg(errp, "Invalid node name");
|
2014-01-24 17:11:52 +04:00
|
|
|
return;
|
2014-01-24 00:31:33 +04:00
|
|
|
}
|
|
|
|
|
2014-02-12 20:15:07 +04:00
|
|
|
/* takes care of avoiding namespaces collisions */
|
2014-10-07 15:59:12 +04:00
|
|
|
if (blk_by_name(node_name)) {
|
2014-02-12 20:15:07 +04:00
|
|
|
error_setg(errp, "node-name=%s is conflicting with a device id",
|
|
|
|
node_name);
|
2014-01-24 17:11:52 +04:00
|
|
|
return;
|
2014-02-12 20:15:07 +04:00
|
|
|
}
|
|
|
|
|
2014-01-24 00:31:33 +04:00
|
|
|
/* takes care of avoiding duplicates node names */
|
|
|
|
if (bdrv_find_node(node_name)) {
|
|
|
|
error_setg(errp, "Duplicate node name");
|
2014-01-24 17:11:52 +04:00
|
|
|
return;
|
2014-01-24 00:31:33 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
/* copy node name into the bs and insert it into the graph list */
|
|
|
|
pstrcpy(bs->node_name, sizeof(bs->node_name), node_name);
|
|
|
|
QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs, node_list);
|
|
|
|
}
|
|
|
|
|
2010-04-14 17:24:50 +04:00
|
|
|
/*
|
|
|
|
* Common part for opening disk images and files
|
2013-03-15 13:35:04 +04:00
|
|
|
*
|
|
|
|
* Removes all processed options from *options.
|
2010-04-14 17:24:50 +04:00
|
|
|
*/
|
2012-11-12 20:35:27 +04:00
|
|
|
static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file,
|
2013-09-05 16:45:29 +04:00
|
|
|
QDict *options, int flags, BlockDriver *drv, Error **errp)
|
2010-04-14 17:24:50 +04:00
|
|
|
{
|
|
|
|
int ret, open_flags;
|
2013-04-09 16:34:19 +04:00
|
|
|
const char *filename;
|
2014-01-24 00:31:33 +04:00
|
|
|
const char *node_name = NULL;
|
2013-09-05 16:45:29 +04:00
|
|
|
Error *local_err = NULL;
|
2010-04-14 17:24:50 +04:00
|
|
|
|
|
|
|
assert(drv != NULL);
|
2012-05-08 18:51:49 +04:00
|
|
|
assert(bs->file == NULL);
|
2013-03-06 15:20:31 +04:00
|
|
|
assert(options != NULL && bs->options != options);
|
2010-04-14 17:24:50 +04:00
|
|
|
|
2013-04-22 19:48:40 +04:00
|
|
|
if (file != NULL) {
|
|
|
|
filename = file->filename;
|
|
|
|
} else {
|
|
|
|
filename = qdict_get_try_str(options, "filename");
|
|
|
|
}
|
|
|
|
|
2014-02-03 17:49:42 +04:00
|
|
|
if (drv->bdrv_needs_filename && !filename) {
|
|
|
|
error_setg(errp, "The '%s' block driver requires a file name",
|
|
|
|
drv->format_name);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2013-04-22 19:48:40 +04:00
|
|
|
trace_bdrv_open_common(bs, filename ?: "", flags, drv->format_name);
|
2011-09-22 23:14:12 +04:00
|
|
|
|
2014-01-24 00:31:33 +04:00
|
|
|
node_name = qdict_get_try_str(options, "node-name");
|
2014-01-24 17:11:52 +04:00
|
|
|
bdrv_assign_node_name(bs, node_name, &local_err);
|
2014-04-25 18:50:31 +04:00
|
|
|
if (local_err) {
|
2014-01-24 17:11:52 +04:00
|
|
|
error_propagate(errp, local_err);
|
|
|
|
return -EINVAL;
|
2014-01-24 00:31:33 +04:00
|
|
|
}
|
|
|
|
qdict_del(options, "node-name");
|
|
|
|
|
2013-03-27 20:28:18 +04:00
|
|
|
/* bdrv_open() with directly using a protocol as drv. This layer is already
|
|
|
|
* opened, so assign it to bs (while file becomes a closed BlockDriverState)
|
|
|
|
* and return immediately. */
|
|
|
|
if (file != NULL && drv->bdrv_file_open) {
|
|
|
|
bdrv_swap(file, bs);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2010-04-14 17:24:50 +04:00
|
|
|
bs->open_flags = flags;
|
2011-11-29 14:35:47 +04:00
|
|
|
bs->guest_block_size = 512;
|
2011-11-29 15:42:20 +04:00
|
|
|
bs->request_alignment = 512;
|
2013-08-22 11:24:14 +04:00
|
|
|
bs->zero_beyond_eof = true;
|
2013-05-29 15:35:40 +04:00
|
|
|
open_flags = bdrv_open_flags(bs, flags);
|
|
|
|
bs->read_only = !(open_flags & BDRV_O_RDWR);
|
|
|
|
|
|
|
|
if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) {
|
2013-10-10 13:45:55 +04:00
|
|
|
error_setg(errp,
|
|
|
|
!bs->read_only && bdrv_is_whitelisted(drv, true)
|
|
|
|
? "Driver '%s' can only be used for read-only devices"
|
|
|
|
: "Driver '%s' is not whitelisted",
|
|
|
|
drv->format_name);
|
2013-05-29 15:35:40 +04:00
|
|
|
return -ENOTSUP;
|
|
|
|
}
|
2010-04-14 17:24:50 +04:00
|
|
|
|
2011-11-28 20:08:47 +04:00
|
|
|
assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */
|
2013-09-19 17:12:18 +04:00
|
|
|
if (flags & BDRV_O_COPY_ON_READ) {
|
|
|
|
if (!bs->read_only) {
|
|
|
|
bdrv_enable_copy_on_read(bs);
|
|
|
|
} else {
|
|
|
|
error_setg(errp, "Can't use copy-on-read on read-only device");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
2011-11-28 20:08:47 +04:00
|
|
|
}
|
|
|
|
|
2013-03-18 19:40:51 +04:00
|
|
|
if (filename != NULL) {
|
|
|
|
pstrcpy(bs->filename, sizeof(bs->filename), filename);
|
|
|
|
} else {
|
|
|
|
bs->filename[0] = '\0';
|
|
|
|
}
|
2014-07-18 22:24:56 +04:00
|
|
|
pstrcpy(bs->exact_filename, sizeof(bs->exact_filename), bs->filename);
|
2010-04-14 17:24:50 +04:00
|
|
|
|
|
|
|
bs->drv = drv;
|
2011-08-21 07:09:37 +04:00
|
|
|
bs->opaque = g_malloc0(drv->instance_size);
|
2010-04-14 17:24:50 +04:00
|
|
|
|
2011-10-27 13:54:28 +04:00
|
|
|
bs->enable_write_cache = !!(flags & BDRV_O_CACHE_WB);
|
2011-10-27 13:54:27 +04:00
|
|
|
|
2010-04-14 16:17:38 +04:00
|
|
|
/* Open the image, either directly or using a protocol */
|
|
|
|
if (drv->bdrv_file_open) {
|
2013-03-27 20:28:18 +04:00
|
|
|
assert(file == NULL);
|
2013-09-24 19:07:04 +04:00
|
|
|
assert(!drv->bdrv_needs_filename || filename != NULL);
|
2013-09-05 16:45:29 +04:00
|
|
|
ret = drv->bdrv_file_open(bs, options, open_flags, &local_err);
|
2012-11-12 20:35:27 +04:00
|
|
|
} else {
|
2013-04-09 15:19:18 +04:00
|
|
|
if (file == NULL) {
|
2013-09-05 16:45:29 +04:00
|
|
|
error_setg(errp, "Can't use '%s' as a block driver for the "
|
|
|
|
"protocol level", drv->format_name);
|
2013-04-09 15:19:18 +04:00
|
|
|
ret = -EINVAL;
|
|
|
|
goto free_and_fail;
|
|
|
|
}
|
2012-11-12 20:35:27 +04:00
|
|
|
bs->file = file;
|
2013-09-05 16:45:29 +04:00
|
|
|
ret = drv->bdrv_open(bs, options, open_flags, &local_err);
|
2010-04-14 16:17:38 +04:00
|
|
|
}
|
|
|
|
|
2010-04-14 17:24:50 +04:00
|
|
|
if (ret < 0) {
|
2014-01-30 18:07:28 +04:00
|
|
|
if (local_err) {
|
2013-09-05 16:45:29 +04:00
|
|
|
error_propagate(errp, local_err);
|
2013-09-24 14:14:01 +04:00
|
|
|
} else if (bs->filename[0]) {
|
|
|
|
error_setg_errno(errp, -ret, "Could not open '%s'", bs->filename);
|
2013-09-05 16:45:29 +04:00
|
|
|
} else {
|
|
|
|
error_setg_errno(errp, -ret, "Could not open image");
|
|
|
|
}
|
2010-04-14 17:24:50 +04:00
|
|
|
goto free_and_fail;
|
|
|
|
}
|
|
|
|
|
block: Deprecate QCOW/QCOW2 encryption
We've steered users away from QCOW/QCOW2 encryption for a while,
because it's a flawed design (commit 136cd19 Describe flaws in
qcow/qcow2 encryption in the docs).
In addition to flawed crypto, we have comically bad usability, and
plain old bugs. Let me show you.
= Example images =
I'm going to use a raw image as backing file, and two QCOW2 images,
one encrypted, and one not:
$ qemu-img create -f raw backing.img 4m
Formatting 'backing.img', fmt=raw size=4194304
$ qemu-img create -f qcow2 -o encryption,backing_file=backing.img,backing_fmt=raw geheim.qcow2 4m
Formatting 'geheim.qcow2', fmt=qcow2 size=4194304 backing_file='backing.img' backing_fmt='raw' encryption=on cluster_size=65536 lazy_refcounts=off
$ qemu-img create -f qcow2 -o backing_file=backing.img,backing_fmt=raw normal.qcow2 4m
Formatting 'normal.qcow2', fmt=qcow2 size=4194304 backing_file='backing.img' backing_fmt='raw' encryption=off cluster_size=65536 lazy_refcounts=off
= Usability issues =
== Confusing startup ==
When no image is encrypted, and you don't give -S, QEMU starts the
guest immediately:
$ qemu-system-x86_64 -nodefaults -display none -monitor stdio normal.qcow2
QEMU 2.2.50 monitor - type 'help' for more information
(qemu) info status
VM status: running
But as soon as there's an encrypted image in play, the guest is *not*
started, with no notification whatsoever:
$ qemu-system-x86_64 -nodefaults -display none -monitor stdio geheim.qcow2
QEMU 2.2.50 monitor - type 'help' for more information
(qemu) info status
VM status: paused (prelaunch)
If the user figured out that he needs to type "cont" to enter his
keys, the confusion enters the next level: "cont" asks for at most
*one* key. If more are needed, it then silently does nothing. The
user has to type "cont" once per encrypted image:
$ qemu-system-x86_64 -nodefaults -display none -monitor stdio -drive if=none,file=geheim.qcow2 -drive if=none,file=geheim.qcow2
QEMU 2.2.50 monitor - type 'help' for more information
(qemu) info status
VM status: paused (prelaunch)
(qemu) c
none0 (geheim.qcow2) is encrypted.
Password: ******
(qemu) info status
VM status: paused (prelaunch)
(qemu) c
none1 (geheim.qcow2) is encrypted.
Password: ******
(qemu) info status
VM status: running
== Incorrect passwords not caught ==
All existing encryption schemes give you the GIGO treatment: garbage
password in, garbage data out. Guests usually refuse to mount
garbage, but other usage is prone to data loss.
== Need to stop the guest to add an encrypted image ==
$ qemu-system-x86_64 -nodefaults -display none -monitor stdio
QEMU 2.2.50 monitor - type 'help' for more information
(qemu) info status
VM status: running
(qemu) drive_add "" if=none,file=geheim.qcow2
Guest must be stopped for opening of encrypted image
(qemu) stop
(qemu) drive_add "" if=none,file=geheim.qcow2
OK
Commit c3adb58 added this restriction. Before, we could expose images
lacking an encryption key to guests, with potentially catastrophic
results. See also "Use without key is not always caught".
= Bugs =
== Use without key is not always caught ==
Encrypted images can be in an intermediate state "opened, but no key".
The weird startup behavior and the need to stop the guest are there to
ensure the guest isn't exposed to that state. But other things still
are!
* drive_backup
$ qemu-system-x86_64 -nodefaults -display none -monitor stdio geheim.qcow2
QEMU 2.2.50 monitor - type 'help' for more information
(qemu) drive_backup -f ide0-hd0 out.img raw
Formatting 'out.img', fmt=raw size=4194304
I guess this writes encrypted data to raw image out.img. Good luck
with figuring out how to decrypt that again.
* commit
$ qemu-system-x86_64 -nodefaults -display none -monitor stdio geheim.qcow2
QEMU 2.2.50 monitor - type 'help' for more information
(qemu) commit ide0-hd0
I guess this writes encrypted data into the unencrypted raw backing
image, effectively destroying it.
== QMP device_add of usb-storage fails when it shouldn't ==
When the image is encrypted, device_add creates the device, defers
actually attaching it to when the key becomes available, then fails.
This is wrong. device_add must either create the device and succeed,
or do nothing and fail.
$ qemu-system-x86_64 -nodefaults -display none -usb -qmp stdio -drive if=none,id=foo,file=geheim.qcow2
{"QMP": {"version": {"qemu": {"micro": 50, "minor": 2, "major": 2}, "package": ""}, "capabilities": []}}
{ "execute": "qmp_capabilities" }
{"return": {}}
{ "execute": "device_add", "arguments": { "driver": "usb-storage", "id": "bar", "drive": "foo" } }
{"error": {"class": "DeviceEncrypted", "desc": "'foo' (geheim.qcow2) is encrypted"}}
{"execute":"device_del","arguments": { "id": "bar" } }
{"timestamp": {"seconds": 1426003440, "microseconds": 237181}, "event": "DEVICE_DELETED", "data": {"path": "/machine/peripheral/bar/bar.0/legacy[0]"}}
{"timestamp": {"seconds": 1426003440, "microseconds": 238231}, "event": "DEVICE_DELETED", "data": {"device": "bar", "path": "/machine/peripheral/bar"}}
{"return": {}}
This stuff is worse than useless, it's a trap for users.
If people become sufficiently interested in encrypted images to
contribute a cryptographically sane implementation for QCOW2 (or
whatever other format), then rewriting the necessary support around it
from scratch will likely be easier and yield better results than
fixing up the existing mess.
Let's deprecate the mess now, drop it after a grace period, and move
on.
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2015-03-13 23:09:40 +03:00
|
|
|
if (bs->encrypted) {
|
|
|
|
error_report("Encrypted images are deprecated");
|
|
|
|
error_printf("Support for them will be removed in a future release.\n"
|
|
|
|
"You can use 'qemu-img convert' to convert your image"
|
|
|
|
" to an unencrypted one.\n");
|
|
|
|
}
|
|
|
|
|
2010-04-19 19:56:41 +04:00
|
|
|
ret = refresh_total_sectors(bs, bs->total_sectors);
|
|
|
|
if (ret < 0) {
|
2013-09-05 16:45:29 +04:00
|
|
|
error_setg_errno(errp, -ret, "Could not refresh total sector count");
|
2010-04-19 19:56:41 +04:00
|
|
|
goto free_and_fail;
|
2010-04-14 17:24:50 +04:00
|
|
|
}
|
2010-04-19 19:56:41 +04:00
|
|
|
|
2014-07-16 19:48:16 +04:00
|
|
|
bdrv_refresh_limits(bs, &local_err);
|
|
|
|
if (local_err) {
|
|
|
|
error_propagate(errp, local_err);
|
|
|
|
ret = -EINVAL;
|
|
|
|
goto free_and_fail;
|
|
|
|
}
|
|
|
|
|
2011-11-29 15:42:20 +04:00
|
|
|
assert(bdrv_opt_mem_align(bs) != 0);
|
2014-03-05 18:49:55 +04:00
|
|
|
assert((bs->request_alignment != 0) || bs->sg);
|
2010-04-14 17:24:50 +04:00
|
|
|
return 0;
|
|
|
|
|
|
|
|
free_and_fail:
|
2012-11-12 20:35:27 +04:00
|
|
|
bs->file = NULL;
|
2011-08-21 07:09:37 +04:00
|
|
|
g_free(bs->opaque);
|
2010-04-14 17:24:50 +04:00
|
|
|
bs->opaque = NULL;
|
|
|
|
bs->drv = NULL;
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2014-05-26 13:45:08 +04:00
|
|
|
static QDict *parse_json_filename(const char *filename, Error **errp)
|
|
|
|
{
|
|
|
|
QObject *options_obj;
|
|
|
|
QDict *options;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
ret = strstart(filename, "json:", &filename);
|
|
|
|
assert(ret);
|
|
|
|
|
|
|
|
options_obj = qobject_from_json(filename);
|
|
|
|
if (!options_obj) {
|
|
|
|
error_setg(errp, "Could not parse the JSON options");
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (qobject_type(options_obj) != QTYPE_QDICT) {
|
|
|
|
qobject_decref(options_obj);
|
|
|
|
error_setg(errp, "Invalid JSON object given");
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
options = qobject_to_qdict(options_obj);
|
|
|
|
qdict_flatten(options);
|
|
|
|
|
|
|
|
return options;
|
|
|
|
}
|
|
|
|
|
2010-04-12 18:37:13 +04:00
|
|
|
/*
|
2014-05-26 13:09:59 +04:00
|
|
|
* Fills in default options for opening images and converts the legacy
|
|
|
|
* filename/flags pair to option QDict entries.
|
2010-04-12 18:37:13 +04:00
|
|
|
*/
|
2014-05-26 13:45:08 +04:00
|
|
|
static int bdrv_fill_options(QDict **options, const char **pfilename, int flags,
|
2014-05-27 12:50:29 +04:00
|
|
|
BlockDriver *drv, Error **errp)
|
2004-08-02 01:59:26 +04:00
|
|
|
{
|
2014-05-26 13:45:08 +04:00
|
|
|
const char *filename = *pfilename;
|
2013-03-18 19:40:51 +04:00
|
|
|
const char *drvname;
|
2014-05-26 13:39:55 +04:00
|
|
|
bool protocol = flags & BDRV_O_PROTOCOL;
|
2014-04-03 14:45:51 +04:00
|
|
|
bool parse_filename = false;
|
2013-09-05 16:45:29 +04:00
|
|
|
Error *local_err = NULL;
|
2006-08-01 20:21:11 +04:00
|
|
|
|
2014-05-26 13:45:08 +04:00
|
|
|
/* Parse json: pseudo-protocol */
|
|
|
|
if (filename && g_str_has_prefix(filename, "json:")) {
|
|
|
|
QDict *json_options = parse_json_filename(filename, &local_err);
|
|
|
|
if (local_err) {
|
|
|
|
error_propagate(errp, local_err);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Options given in the filename have lower priority than options
|
|
|
|
* specified directly */
|
|
|
|
qdict_join(*options, json_options, false);
|
|
|
|
QDECREF(json_options);
|
|
|
|
*pfilename = filename = NULL;
|
|
|
|
}
|
|
|
|
|
2013-04-09 16:34:19 +04:00
|
|
|
/* Fetch the file name from the options QDict if necessary */
|
2014-05-27 12:50:29 +04:00
|
|
|
if (protocol && filename) {
|
2014-05-26 13:09:59 +04:00
|
|
|
if (!qdict_haskey(*options, "filename")) {
|
|
|
|
qdict_put(*options, "filename", qstring_from_str(filename));
|
|
|
|
parse_filename = true;
|
|
|
|
} else {
|
|
|
|
error_setg(errp, "Can't specify 'file' and 'filename' options at "
|
|
|
|
"the same time");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
2013-04-09 16:34:19 +04:00
|
|
|
}
|
|
|
|
|
2013-03-18 19:40:51 +04:00
|
|
|
/* Find the right block driver */
|
2014-05-26 13:09:59 +04:00
|
|
|
filename = qdict_get_try_str(*options, "filename");
|
2014-02-18 21:33:11 +04:00
|
|
|
drvname = qdict_get_try_str(*options, "driver");
|
2014-05-26 13:09:59 +04:00
|
|
|
|
2014-05-27 12:50:29 +04:00
|
|
|
if (drv) {
|
|
|
|
if (drvname) {
|
|
|
|
error_setg(errp, "Driver specified twice");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
drvname = drv->format_name;
|
|
|
|
qdict_put(*options, "driver", qstring_from_str(drvname));
|
|
|
|
} else {
|
|
|
|
if (!drvname && protocol) {
|
|
|
|
if (filename) {
|
2015-02-05 21:58:12 +03:00
|
|
|
drv = bdrv_find_protocol(filename, parse_filename, errp);
|
2014-05-27 12:50:29 +04:00
|
|
|
if (!drv) {
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
drvname = drv->format_name;
|
|
|
|
qdict_put(*options, "driver", qstring_from_str(drvname));
|
|
|
|
} else {
|
|
|
|
error_setg(errp, "Must specify either driver or file");
|
2014-05-26 13:09:59 +04:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
2014-05-27 12:50:29 +04:00
|
|
|
} else if (drvname) {
|
|
|
|
drv = bdrv_find_format(drvname);
|
|
|
|
if (!drv) {
|
|
|
|
error_setg(errp, "Unknown driver '%s'", drvname);
|
|
|
|
return -ENOENT;
|
|
|
|
}
|
2013-07-10 17:47:39 +04:00
|
|
|
}
|
2013-03-18 19:40:51 +04:00
|
|
|
}
|
|
|
|
|
2014-05-27 12:50:29 +04:00
|
|
|
assert(drv || !protocol);
|
2013-03-18 19:40:51 +04:00
|
|
|
|
2014-05-26 13:09:59 +04:00
|
|
|
/* Driver-specific filename parsing */
|
2014-05-27 12:50:29 +04:00
|
|
|
if (drv && drv->bdrv_parse_filename && parse_filename) {
|
2014-02-18 21:33:11 +04:00
|
|
|
drv->bdrv_parse_filename(filename, *options, &local_err);
|
2014-01-30 18:07:28 +04:00
|
|
|
if (local_err) {
|
2013-09-05 16:45:29 +04:00
|
|
|
error_propagate(errp, local_err);
|
2014-05-26 13:09:59 +04:00
|
|
|
return -EINVAL;
|
2013-03-15 21:47:22 +04:00
|
|
|
}
|
2014-03-06 01:41:36 +04:00
|
|
|
|
|
|
|
if (!drv->bdrv_needs_filename) {
|
|
|
|
qdict_del(*options, "filename");
|
|
|
|
}
|
2013-03-15 21:47:22 +04:00
|
|
|
}
|
|
|
|
|
2014-05-26 13:09:59 +04:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2014-05-23 17:29:45 +04:00
|
|
|
void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd)
|
|
|
|
{
|
|
|
|
|
2014-05-23 17:29:47 +04:00
|
|
|
if (bs->backing_hd) {
|
|
|
|
assert(bs->backing_blocker);
|
|
|
|
bdrv_op_unblock_all(bs->backing_hd, bs->backing_blocker);
|
|
|
|
} else if (backing_hd) {
|
|
|
|
error_setg(&bs->backing_blocker,
|
|
|
|
"device is used as backing hd of '%s'",
|
2014-10-07 15:59:11 +04:00
|
|
|
bdrv_get_device_name(bs));
|
2014-05-23 17:29:47 +04:00
|
|
|
}
|
|
|
|
|
2014-05-23 17:29:45 +04:00
|
|
|
bs->backing_hd = backing_hd;
|
|
|
|
if (!backing_hd) {
|
2014-05-23 17:29:47 +04:00
|
|
|
error_free(bs->backing_blocker);
|
|
|
|
bs->backing_blocker = NULL;
|
2014-05-23 17:29:45 +04:00
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
bs->open_flags &= ~BDRV_O_NO_BACKING;
|
|
|
|
pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_hd->filename);
|
|
|
|
pstrcpy(bs->backing_format, sizeof(bs->backing_format),
|
|
|
|
backing_hd->drv ? backing_hd->drv->format_name : "");
|
2014-05-23 17:29:47 +04:00
|
|
|
|
|
|
|
bdrv_op_block_all(bs->backing_hd, bs->backing_blocker);
|
|
|
|
/* Otherwise we won't be able to commit due to check in bdrv_commit */
|
2014-09-11 09:14:00 +04:00
|
|
|
bdrv_op_unblock(bs->backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET,
|
2014-05-23 17:29:47 +04:00
|
|
|
bs->backing_blocker);
|
2014-05-23 17:29:45 +04:00
|
|
|
out:
|
2014-07-16 19:48:16 +04:00
|
|
|
bdrv_refresh_limits(bs, NULL);
|
2014-05-23 17:29:45 +04:00
|
|
|
}
|
|
|
|
|
2013-03-28 18:29:24 +04:00
|
|
|
/*
|
|
|
|
* Opens the backing file for a BlockDriverState if not yet open
|
|
|
|
*
|
|
|
|
* options is a QDict of options to pass to the block drivers, or NULL for an
|
|
|
|
* empty set of options. The reference to the QDict is transferred to this
|
|
|
|
* function (even on failure), so if the caller intends to reuse the dictionary,
|
|
|
|
* it needs to use QINCREF() before calling bdrv_file_open.
|
|
|
|
*/
|
2013-09-05 16:45:29 +04:00
|
|
|
int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp)
|
2012-10-18 18:49:17 +04:00
|
|
|
{
|
2014-04-22 19:05:27 +04:00
|
|
|
char *backing_filename = g_malloc0(PATH_MAX);
|
2014-04-25 15:27:34 +04:00
|
|
|
int ret = 0;
|
2014-05-23 17:29:45 +04:00
|
|
|
BlockDriverState *backing_hd;
|
2013-09-05 16:45:29 +04:00
|
|
|
Error *local_err = NULL;
|
2012-10-18 18:49:17 +04:00
|
|
|
|
|
|
|
if (bs->backing_hd != NULL) {
|
2013-03-28 18:29:24 +04:00
|
|
|
QDECREF(options);
|
2014-04-22 19:05:27 +04:00
|
|
|
goto free_exit;
|
2012-10-18 18:49:17 +04:00
|
|
|
}
|
|
|
|
|
2013-03-28 18:29:24 +04:00
|
|
|
/* NULL means an empty set of options */
|
|
|
|
if (options == NULL) {
|
|
|
|
options = qdict_new();
|
|
|
|
}
|
|
|
|
|
2012-10-18 18:49:17 +04:00
|
|
|
bs->open_flags &= ~BDRV_O_NO_BACKING;
|
2013-04-12 22:27:07 +04:00
|
|
|
if (qdict_haskey(options, "file.filename")) {
|
|
|
|
backing_filename[0] = '\0';
|
|
|
|
} else if (bs->backing_file[0] == '\0' && qdict_size(options) == 0) {
|
2013-03-28 18:29:24 +04:00
|
|
|
QDECREF(options);
|
2014-04-22 19:05:27 +04:00
|
|
|
goto free_exit;
|
2013-09-22 16:05:06 +04:00
|
|
|
} else {
|
2014-11-26 19:20:26 +03:00
|
|
|
bdrv_get_full_backing_filename(bs, backing_filename, PATH_MAX,
|
|
|
|
&local_err);
|
|
|
|
if (local_err) {
|
|
|
|
ret = -EINVAL;
|
|
|
|
error_propagate(errp, local_err);
|
|
|
|
QDECREF(options);
|
|
|
|
goto free_exit;
|
|
|
|
}
|
2012-10-18 18:49:17 +04:00
|
|
|
}
|
|
|
|
|
2014-06-04 17:09:35 +04:00
|
|
|
if (!bs->drv || !bs->drv->supports_backing) {
|
|
|
|
ret = -EINVAL;
|
|
|
|
error_setg(errp, "Driver doesn't support backing files");
|
|
|
|
QDECREF(options);
|
|
|
|
goto free_exit;
|
|
|
|
}
|
|
|
|
|
2014-10-07 15:59:03 +04:00
|
|
|
backing_hd = bdrv_new();
|
2014-05-23 17:29:45 +04:00
|
|
|
|
2014-11-25 20:12:42 +03:00
|
|
|
if (bs->backing_format[0] != '\0' && !qdict_haskey(options, "driver")) {
|
|
|
|
qdict_put(options, "driver", qstring_from_str(bs->backing_format));
|
2012-10-18 18:49:17 +04:00
|
|
|
}
|
|
|
|
|
2014-02-18 21:33:05 +04:00
|
|
|
assert(bs->backing_hd == NULL);
|
2014-05-23 17:29:45 +04:00
|
|
|
ret = bdrv_open(&backing_hd,
|
2014-02-18 21:33:06 +04:00
|
|
|
*backing_filename ? backing_filename : NULL, NULL, options,
|
2014-11-25 20:12:42 +03:00
|
|
|
bdrv_backing_flags(bs->open_flags), NULL, &local_err);
|
2012-10-18 18:49:17 +04:00
|
|
|
if (ret < 0) {
|
2014-05-23 17:29:45 +04:00
|
|
|
bdrv_unref(backing_hd);
|
|
|
|
backing_hd = NULL;
|
2012-10-18 18:49:17 +04:00
|
|
|
bs->open_flags |= BDRV_O_NO_BACKING;
|
2013-11-08 07:26:49 +04:00
|
|
|
error_setg(errp, "Could not open backing file: %s",
|
|
|
|
error_get_pretty(local_err));
|
|
|
|
error_free(local_err);
|
2014-04-22 19:05:27 +04:00
|
|
|
goto free_exit;
|
2012-10-18 18:49:17 +04:00
|
|
|
}
|
2014-05-23 17:29:45 +04:00
|
|
|
bdrv_set_backing_hd(bs, backing_hd);
|
2014-01-08 23:43:25 +04:00
|
|
|
|
2014-04-22 19:05:27 +04:00
|
|
|
free_exit:
|
|
|
|
g_free(backing_filename);
|
|
|
|
return ret;
|
2012-10-18 18:49:17 +04:00
|
|
|
}
|
|
|
|
|
2013-12-20 22:28:11 +04:00
|
|
|
/*
|
|
|
|
* Opens a disk image whose options are given as BlockdevRef in another block
|
|
|
|
* device's options.
|
|
|
|
*
|
|
|
|
* If allow_none is true, no image will be opened if filename is false and no
|
|
|
|
* BlockdevRef is given. *pbs will remain unchanged and 0 will be returned.
|
|
|
|
*
|
|
|
|
* bdrev_key specifies the key for the image's BlockdevRef in the options QDict.
|
|
|
|
* That QDict has to be flattened; therefore, if the BlockdevRef is a QDict
|
|
|
|
* itself, all options starting with "${bdref_key}." are considered part of the
|
|
|
|
* BlockdevRef.
|
|
|
|
*
|
|
|
|
* The BlockdevRef will be removed from the options QDict.
|
2014-02-18 21:33:05 +04:00
|
|
|
*
|
|
|
|
* To conform with the behavior of bdrv_open(), *pbs has to be NULL.
|
2013-12-20 22:28:11 +04:00
|
|
|
*/
|
|
|
|
int bdrv_open_image(BlockDriverState **pbs, const char *filename,
|
|
|
|
QDict *options, const char *bdref_key, int flags,
|
2014-02-18 21:33:12 +04:00
|
|
|
bool allow_none, Error **errp)
|
2013-12-20 22:28:11 +04:00
|
|
|
{
|
|
|
|
QDict *image_options;
|
|
|
|
int ret;
|
|
|
|
char *bdref_key_dot;
|
|
|
|
const char *reference;
|
|
|
|
|
2014-02-18 21:33:05 +04:00
|
|
|
assert(pbs);
|
|
|
|
assert(*pbs == NULL);
|
|
|
|
|
2013-12-20 22:28:11 +04:00
|
|
|
bdref_key_dot = g_strdup_printf("%s.", bdref_key);
|
|
|
|
qdict_extract_subqdict(options, &image_options, bdref_key_dot);
|
|
|
|
g_free(bdref_key_dot);
|
|
|
|
|
|
|
|
reference = qdict_get_try_str(options, bdref_key);
|
|
|
|
if (!filename && !reference && !qdict_size(image_options)) {
|
|
|
|
if (allow_none) {
|
|
|
|
ret = 0;
|
|
|
|
} else {
|
|
|
|
error_setg(errp, "A block device must be specified for \"%s\"",
|
|
|
|
bdref_key);
|
|
|
|
ret = -EINVAL;
|
|
|
|
}
|
2014-05-28 13:16:57 +04:00
|
|
|
QDECREF(image_options);
|
2013-12-20 22:28:11 +04:00
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
|
2014-02-18 21:33:12 +04:00
|
|
|
ret = bdrv_open(pbs, filename, reference, image_options, flags, NULL, errp);
|
2013-12-20 22:28:11 +04:00
|
|
|
|
|
|
|
done:
|
|
|
|
qdict_del(options, bdref_key);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2014-06-23 19:28:23 +04:00
|
|
|
int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp)
|
2014-04-03 14:09:34 +04:00
|
|
|
{
|
|
|
|
/* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */
|
2014-04-22 19:05:27 +04:00
|
|
|
char *tmp_filename = g_malloc0(PATH_MAX + 1);
|
2014-04-03 14:09:34 +04:00
|
|
|
int64_t total_size;
|
2014-06-05 13:20:51 +04:00
|
|
|
QemuOpts *opts = NULL;
|
2014-04-03 14:09:34 +04:00
|
|
|
QDict *snapshot_options;
|
|
|
|
BlockDriverState *bs_snapshot;
|
|
|
|
Error *local_err;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
/* if snapshot, we create a temporary backing file and open it
|
|
|
|
instead of opening 'filename' directly */
|
|
|
|
|
|
|
|
/* Get the required size from the image */
|
2014-04-04 19:07:19 +04:00
|
|
|
total_size = bdrv_getlength(bs);
|
|
|
|
if (total_size < 0) {
|
2014-06-23 19:28:23 +04:00
|
|
|
ret = total_size;
|
2014-04-04 19:07:19 +04:00
|
|
|
error_setg_errno(errp, -total_size, "Could not get image size");
|
2014-04-22 19:05:27 +04:00
|
|
|
goto out;
|
2014-04-04 19:07:19 +04:00
|
|
|
}
|
2014-04-03 14:09:34 +04:00
|
|
|
|
|
|
|
/* Create the temporary image */
|
2014-04-22 19:05:27 +04:00
|
|
|
ret = get_tmp_filename(tmp_filename, PATH_MAX + 1);
|
2014-04-03 14:09:34 +04:00
|
|
|
if (ret < 0) {
|
|
|
|
error_setg_errno(errp, -ret, "Could not get temporary filename");
|
2014-04-22 19:05:27 +04:00
|
|
|
goto out;
|
2014-04-03 14:09:34 +04:00
|
|
|
}
|
|
|
|
|
2014-12-02 20:32:42 +03:00
|
|
|
opts = qemu_opts_create(bdrv_qcow2.create_opts, NULL, 0,
|
2014-06-05 13:21:11 +04:00
|
|
|
&error_abort);
|
2015-02-12 18:46:36 +03:00
|
|
|
qemu_opt_set_number(opts, BLOCK_OPT_SIZE, total_size, &error_abort);
|
2014-12-02 20:32:42 +03:00
|
|
|
ret = bdrv_create(&bdrv_qcow2, tmp_filename, opts, &local_err);
|
2014-06-05 13:20:51 +04:00
|
|
|
qemu_opts_del(opts);
|
2014-04-03 14:09:34 +04:00
|
|
|
if (ret < 0) {
|
|
|
|
error_setg_errno(errp, -ret, "Could not create temporary overlay "
|
|
|
|
"'%s': %s", tmp_filename,
|
|
|
|
error_get_pretty(local_err));
|
|
|
|
error_free(local_err);
|
2014-04-22 19:05:27 +04:00
|
|
|
goto out;
|
2014-04-03 14:09:34 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Prepare a new options QDict for the temporary file */
|
|
|
|
snapshot_options = qdict_new();
|
|
|
|
qdict_put(snapshot_options, "file.driver",
|
|
|
|
qstring_from_str("file"));
|
|
|
|
qdict_put(snapshot_options, "file.filename",
|
|
|
|
qstring_from_str(tmp_filename));
|
|
|
|
|
2014-10-07 15:59:03 +04:00
|
|
|
bs_snapshot = bdrv_new();
|
2014-04-03 14:09:34 +04:00
|
|
|
|
|
|
|
ret = bdrv_open(&bs_snapshot, NULL, NULL, snapshot_options,
|
2014-12-02 20:32:42 +03:00
|
|
|
flags, &bdrv_qcow2, &local_err);
|
2014-04-03 14:09:34 +04:00
|
|
|
if (ret < 0) {
|
|
|
|
error_propagate(errp, local_err);
|
2014-04-22 19:05:27 +04:00
|
|
|
goto out;
|
2014-04-03 14:09:34 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
bdrv_append(bs_snapshot, bs);
|
2014-04-22 19:05:27 +04:00
|
|
|
|
|
|
|
out:
|
|
|
|
g_free(tmp_filename);
|
2014-06-23 19:28:23 +04:00
|
|
|
return ret;
|
2014-04-03 14:09:34 +04:00
|
|
|
}
|
|
|
|
|
2010-04-12 18:37:13 +04:00
|
|
|
/*
|
|
|
|
* Opens a disk image (raw, qcow2, vmdk, ...)
|
2013-03-15 13:35:02 +04:00
|
|
|
*
|
|
|
|
* options is a QDict of options to pass to the block drivers, or NULL for an
|
|
|
|
* empty set of options. The reference to the QDict belongs to the block layer
|
|
|
|
* after the call (even on failure), so if the caller intends to reuse the
|
|
|
|
* dictionary, it needs to use QINCREF() before calling bdrv_open.
|
2014-02-18 21:33:05 +04:00
|
|
|
*
|
|
|
|
* If *pbs is NULL, a new BDS will be created with a pointer to it stored there.
|
|
|
|
* If it is not NULL, the referenced BDS will be reused.
|
2014-02-18 21:33:06 +04:00
|
|
|
*
|
|
|
|
* The reference parameter may be used to specify an existing block device which
|
|
|
|
* should be opened. If specified, neither options nor a filename may be given,
|
|
|
|
* nor can an existing BDS be reused (that is, *pbs has to be NULL).
|
2010-04-12 18:37:13 +04:00
|
|
|
*/
|
2014-02-18 21:33:06 +04:00
|
|
|
int bdrv_open(BlockDriverState **pbs, const char *filename,
|
|
|
|
const char *reference, QDict *options, int flags,
|
|
|
|
BlockDriver *drv, Error **errp)
|
2004-08-02 01:59:26 +04:00
|
|
|
{
|
2010-04-12 18:37:13 +04:00
|
|
|
int ret;
|
2014-02-18 21:33:05 +04:00
|
|
|
BlockDriverState *file = NULL, *bs;
|
2013-07-09 13:09:02 +04:00
|
|
|
const char *drvname;
|
2013-09-05 16:45:29 +04:00
|
|
|
Error *local_err = NULL;
|
2014-05-06 14:11:42 +04:00
|
|
|
int snapshot_flags = 0;
|
2005-04-29 01:09:32 +04:00
|
|
|
|
2014-02-18 21:33:05 +04:00
|
|
|
assert(pbs);
|
|
|
|
|
2014-02-18 21:33:06 +04:00
|
|
|
if (reference) {
|
|
|
|
bool options_non_empty = options ? qdict_size(options) : false;
|
|
|
|
QDECREF(options);
|
|
|
|
|
|
|
|
if (*pbs) {
|
|
|
|
error_setg(errp, "Cannot reuse an existing BDS when referencing "
|
|
|
|
"another block device");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (filename || options_non_empty) {
|
|
|
|
error_setg(errp, "Cannot reference an existing block device with "
|
|
|
|
"additional options or a new filename");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
bs = bdrv_lookup_bs(reference, reference, errp);
|
|
|
|
if (!bs) {
|
|
|
|
return -ENODEV;
|
|
|
|
}
|
|
|
|
bdrv_ref(bs);
|
|
|
|
*pbs = bs;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2014-02-18 21:33:05 +04:00
|
|
|
if (*pbs) {
|
|
|
|
bs = *pbs;
|
|
|
|
} else {
|
2014-10-07 15:59:03 +04:00
|
|
|
bs = bdrv_new();
|
2014-02-18 21:33:05 +04:00
|
|
|
}
|
|
|
|
|
2013-03-15 13:35:02 +04:00
|
|
|
/* NULL means an empty set of options */
|
|
|
|
if (options == NULL) {
|
|
|
|
options = qdict_new();
|
|
|
|
}
|
|
|
|
|
2014-05-27 12:50:29 +04:00
|
|
|
ret = bdrv_fill_options(&options, &filename, flags, drv, &local_err);
|
2014-05-26 13:39:55 +04:00
|
|
|
if (local_err) {
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
|
2014-06-04 16:19:44 +04:00
|
|
|
/* Find the right image format driver */
|
|
|
|
drv = NULL;
|
|
|
|
drvname = qdict_get_try_str(options, "driver");
|
|
|
|
if (drvname) {
|
|
|
|
drv = bdrv_find_format(drvname);
|
|
|
|
qdict_del(options, "driver");
|
|
|
|
if (!drv) {
|
|
|
|
error_setg(errp, "Unknown driver: '%s'", drvname);
|
|
|
|
ret = -EINVAL;
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
assert(drvname || !(flags & BDRV_O_PROTOCOL));
|
|
|
|
if (drv && !drv->bdrv_file_open) {
|
|
|
|
/* If the user explicitly wants a format driver here, we'll need to add
|
|
|
|
* another layer for the protocol in bs->file */
|
|
|
|
flags &= ~BDRV_O_PROTOCOL;
|
|
|
|
}
|
|
|
|
|
2013-03-15 13:35:02 +04:00
|
|
|
bs->options = options;
|
2013-03-15 13:35:04 +04:00
|
|
|
options = qdict_clone_shallow(options);
|
2013-03-15 13:35:02 +04:00
|
|
|
|
2012-11-12 20:35:27 +04:00
|
|
|
/* Open image file without format layer */
|
2014-06-03 18:44:19 +04:00
|
|
|
if ((flags & BDRV_O_PROTOCOL) == 0) {
|
|
|
|
if (flags & BDRV_O_RDWR) {
|
|
|
|
flags |= BDRV_O_ALLOW_RDWR;
|
|
|
|
}
|
|
|
|
if (flags & BDRV_O_SNAPSHOT) {
|
|
|
|
snapshot_flags = bdrv_temp_snapshot_flags(flags);
|
|
|
|
flags = bdrv_backing_flags(flags);
|
|
|
|
}
|
2012-11-12 20:35:27 +04:00
|
|
|
|
2014-06-03 18:44:19 +04:00
|
|
|
assert(file == NULL);
|
|
|
|
ret = bdrv_open_image(&file, filename, options, "file",
|
|
|
|
bdrv_inherited_flags(flags),
|
|
|
|
true, &local_err);
|
|
|
|
if (ret < 0) {
|
|
|
|
goto fail;
|
|
|
|
}
|
2012-11-12 20:35:27 +04:00
|
|
|
}
|
|
|
|
|
2014-06-04 16:19:44 +04:00
|
|
|
/* Image format probing */
|
raw: Prohibit dangerous writes for probed images
If the user neglects to specify the image format, QEMU probes the
image to guess it automatically, for convenience.
Relying on format probing is insecure for raw images (CVE-2008-2004).
If the guest writes a suitable header to the device, the next probe
will recognize a format chosen by the guest. A malicious guest can
abuse this to gain access to host files, e.g. by crafting a QCOW2
header with backing file /etc/shadow.
Commit 1e72d3b (April 2008) provided -drive parameter format to let
users disable probing. Commit f965509 (March 2009) extended QCOW2 to
optionally store the backing file format, to let users disable backing
file probing. QED has had a flag to suppress probing since the
beginning (2010), set whenever a raw backing file is assigned.
All of these additions that allow to avoid format probing have to be
specified explicitly. The default still allows the attack.
In order to fix this, commit 79368c8 (July 2010) put probed raw images
in a restricted mode, in which they wouldn't be able to overwrite the
first few bytes of the image so that they would identify as a different
image. If a write to the first sector would write one of the signatures
of another driver, qemu would instead zero out the first four bytes.
This patch was later reverted in commit 8b33d9e (September 2010) because
it didn't get the handling of unaligned qiov members right.
Today's block layer that is based on coroutines and has qiov utility
functions makes it much easier to get this functionality right, so this
patch implements it.
The other differences of this patch to the old one are that it doesn't
silently write something different than the guest requested by zeroing
out some bytes (it fails the request instead) and that it doesn't
maintain a list of signatures in the raw driver (it calls the usual
probe function instead).
Note that this change doesn't introduce new breakage for false positive
cases where the guest legitimately writes data into the first sector
that matches the signatures of an image format (e.g. for nested virt):
These cases were broken before, only the failure mode changes from
corruption after the next restart (when the wrong format is probed) to
failing the problematic write request.
Also note that like in the original patch, the restrictions only apply
if the image format has been guessed by probing. Explicitly specifying a
format allows guests to write anything they like.
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-id: 1416497234-29880-8-git-send-email-kwolf@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2014-11-20 18:27:12 +03:00
|
|
|
bs->probed = !drv;
|
2014-06-04 16:19:44 +04:00
|
|
|
if (!drv && file) {
|
2014-05-27 12:50:29 +04:00
|
|
|
ret = find_image_format(file, filename, &drv, &local_err);
|
|
|
|
if (ret < 0) {
|
2014-04-11 21:16:36 +04:00
|
|
|
goto fail;
|
2013-12-20 22:28:10 +04:00
|
|
|
}
|
2014-06-04 16:19:44 +04:00
|
|
|
} else if (!drv) {
|
2014-05-27 12:50:29 +04:00
|
|
|
error_setg(errp, "Must specify either driver or file");
|
|
|
|
ret = -EINVAL;
|
2014-04-11 21:16:36 +04:00
|
|
|
goto fail;
|
2004-08-02 01:59:26 +04:00
|
|
|
}
|
2010-04-12 18:37:13 +04:00
|
|
|
|
|
|
|
/* Open the image */
|
2013-09-05 16:45:29 +04:00
|
|
|
ret = bdrv_open_common(bs, file, options, flags, drv, &local_err);
|
2010-04-12 18:37:13 +04:00
|
|
|
if (ret < 0) {
|
2014-04-11 21:16:36 +04:00
|
|
|
goto fail;
|
2010-01-20 20:13:25 +03:00
|
|
|
}
|
|
|
|
|
2013-12-20 22:28:10 +04:00
|
|
|
if (file && (bs->file != file)) {
|
2013-08-23 05:14:47 +04:00
|
|
|
bdrv_unref(file);
|
2012-11-12 20:35:27 +04:00
|
|
|
file = NULL;
|
|
|
|
}
|
|
|
|
|
2010-04-12 18:37:13 +04:00
|
|
|
/* If there is a backing file, use it */
|
2012-10-18 18:49:17 +04:00
|
|
|
if ((flags & BDRV_O_NO_BACKING) == 0) {
|
2013-03-28 18:29:24 +04:00
|
|
|
QDict *backing_options;
|
|
|
|
|
2013-09-25 15:30:01 +04:00
|
|
|
qdict_extract_subqdict(options, &backing_options, "backing.");
|
2013-09-05 16:45:29 +04:00
|
|
|
ret = bdrv_open_backing_file(bs, backing_options, &local_err);
|
2010-04-12 18:37:13 +04:00
|
|
|
if (ret < 0) {
|
2013-03-15 13:35:04 +04:00
|
|
|
goto close_and_fail;
|
2010-04-12 18:37:13 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-07-18 22:24:56 +04:00
|
|
|
bdrv_refresh_filename(bs);
|
|
|
|
|
2014-04-03 14:09:34 +04:00
|
|
|
/* For snapshot=on, create a temporary qcow2 overlay. bs points to the
|
|
|
|
* temporary snapshot afterwards. */
|
2014-05-06 14:11:42 +04:00
|
|
|
if (snapshot_flags) {
|
2014-06-23 19:28:23 +04:00
|
|
|
ret = bdrv_append_temp_snapshot(bs, snapshot_flags, &local_err);
|
2014-04-03 14:09:34 +04:00
|
|
|
if (local_err) {
|
|
|
|
goto close_and_fail;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-03-15 13:35:04 +04:00
|
|
|
/* Check if any unknown options were used */
|
2014-02-18 21:33:11 +04:00
|
|
|
if (options && (qdict_size(options) != 0)) {
|
2013-03-15 13:35:04 +04:00
|
|
|
const QDictEntry *entry = qdict_first(options);
|
2014-02-18 21:33:11 +04:00
|
|
|
if (flags & BDRV_O_PROTOCOL) {
|
|
|
|
error_setg(errp, "Block protocol '%s' doesn't support the option "
|
|
|
|
"'%s'", drv->format_name, entry->key);
|
|
|
|
} else {
|
|
|
|
error_setg(errp, "Block format '%s' used by device '%s' doesn't "
|
|
|
|
"support the option '%s'", drv->format_name,
|
2014-10-07 15:59:11 +04:00
|
|
|
bdrv_get_device_name(bs), entry->key);
|
2014-02-18 21:33:11 +04:00
|
|
|
}
|
2013-03-15 13:35:04 +04:00
|
|
|
|
|
|
|
ret = -EINVAL;
|
|
|
|
goto close_and_fail;
|
|
|
|
}
|
|
|
|
|
2010-04-12 18:37:13 +04:00
|
|
|
if (!bdrv_key_required(bs)) {
|
2014-10-07 15:59:25 +04:00
|
|
|
if (bs->blk) {
|
|
|
|
blk_dev_change_media_cb(bs->blk, true);
|
|
|
|
}
|
2014-03-14 12:22:48 +04:00
|
|
|
} else if (!runstate_check(RUN_STATE_PRELAUNCH)
|
|
|
|
&& !runstate_check(RUN_STATE_INMIGRATE)
|
|
|
|
&& !runstate_check(RUN_STATE_PAUSED)) { /* HACK */
|
|
|
|
error_setg(errp,
|
|
|
|
"Guest must be stopped for opening of encrypted image");
|
|
|
|
ret = -EBUSY;
|
|
|
|
goto close_and_fail;
|
2010-04-12 18:37:13 +04:00
|
|
|
}
|
|
|
|
|
2014-03-14 12:22:48 +04:00
|
|
|
QDECREF(options);
|
2014-02-18 21:33:05 +04:00
|
|
|
*pbs = bs;
|
2010-04-12 18:37:13 +04:00
|
|
|
return 0;
|
|
|
|
|
2014-04-11 21:16:36 +04:00
|
|
|
fail:
|
2012-11-12 20:35:27 +04:00
|
|
|
if (file != NULL) {
|
2013-08-23 05:14:47 +04:00
|
|
|
bdrv_unref(file);
|
2012-11-12 20:35:27 +04:00
|
|
|
}
|
2013-03-15 13:35:02 +04:00
|
|
|
QDECREF(bs->options);
|
2013-03-15 13:35:04 +04:00
|
|
|
QDECREF(options);
|
2013-03-15 13:35:02 +04:00
|
|
|
bs->options = NULL;
|
2014-02-18 21:33:05 +04:00
|
|
|
if (!*pbs) {
|
|
|
|
/* If *pbs is NULL, a new BDS has been created in this function and
|
|
|
|
needs to be freed now. Otherwise, it does not need to be closed,
|
|
|
|
since it has not really been opened yet. */
|
|
|
|
bdrv_unref(bs);
|
|
|
|
}
|
2014-01-30 18:07:28 +04:00
|
|
|
if (local_err) {
|
2013-09-05 16:45:29 +04:00
|
|
|
error_propagate(errp, local_err);
|
|
|
|
}
|
2013-03-15 13:35:04 +04:00
|
|
|
return ret;
|
2013-03-15 13:35:02 +04:00
|
|
|
|
2013-03-15 13:35:04 +04:00
|
|
|
close_and_fail:
|
2014-02-18 21:33:05 +04:00
|
|
|
/* See fail path, but now the BDS has to be always closed */
|
|
|
|
if (*pbs) {
|
|
|
|
bdrv_close(bs);
|
|
|
|
} else {
|
|
|
|
bdrv_unref(bs);
|
|
|
|
}
|
2013-03-15 13:35:04 +04:00
|
|
|
QDECREF(options);
|
2014-01-30 18:07:28 +04:00
|
|
|
if (local_err) {
|
2013-09-05 16:45:29 +04:00
|
|
|
error_propagate(errp, local_err);
|
|
|
|
}
|
2010-04-12 18:37:13 +04:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2012-09-20 23:13:19 +04:00
|
|
|
typedef struct BlockReopenQueueEntry {
|
|
|
|
bool prepared;
|
|
|
|
BDRVReopenState state;
|
|
|
|
QSIMPLEQ_ENTRY(BlockReopenQueueEntry) entry;
|
|
|
|
} BlockReopenQueueEntry;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Adds a BlockDriverState to a simple queue for an atomic, transactional
|
|
|
|
* reopen of multiple devices.
|
|
|
|
*
|
|
|
|
* bs_queue can either be an existing BlockReopenQueue that has had QSIMPLE_INIT
|
|
|
|
* already performed, or alternatively may be NULL a new BlockReopenQueue will
|
|
|
|
* be created and initialized. This newly created BlockReopenQueue should be
|
|
|
|
* passed back in for subsequent calls that are intended to be of the same
|
|
|
|
* atomic 'set'.
|
|
|
|
*
|
|
|
|
* bs is the BlockDriverState to add to the reopen queue.
|
|
|
|
*
|
|
|
|
* flags contains the open flags for the associated bs
|
|
|
|
*
|
|
|
|
* returns a pointer to bs_queue, which is either the newly allocated
|
|
|
|
* bs_queue, or the existing bs_queue being used.
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
|
|
|
|
BlockDriverState *bs, int flags)
|
|
|
|
{
|
|
|
|
assert(bs != NULL);
|
|
|
|
|
|
|
|
BlockReopenQueueEntry *bs_entry;
|
|
|
|
if (bs_queue == NULL) {
|
|
|
|
bs_queue = g_new0(BlockReopenQueue, 1);
|
|
|
|
QSIMPLEQ_INIT(bs_queue);
|
|
|
|
}
|
|
|
|
|
2014-04-25 21:04:55 +04:00
|
|
|
/* bdrv_open() masks this flag out */
|
|
|
|
flags &= ~BDRV_O_PROTOCOL;
|
|
|
|
|
2012-09-20 23:13:19 +04:00
|
|
|
if (bs->file) {
|
2014-04-25 21:04:55 +04:00
|
|
|
bdrv_reopen_queue(bs_queue, bs->file, bdrv_inherited_flags(flags));
|
2012-09-20 23:13:19 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
bs_entry = g_new0(BlockReopenQueueEntry, 1);
|
|
|
|
QSIMPLEQ_INSERT_TAIL(bs_queue, bs_entry, entry);
|
|
|
|
|
|
|
|
bs_entry->state.bs = bs;
|
|
|
|
bs_entry->state.flags = flags;
|
|
|
|
|
|
|
|
return bs_queue;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Reopen multiple BlockDriverStates atomically & transactionally.
|
|
|
|
*
|
|
|
|
* The queue passed in (bs_queue) must have been built up previous
|
|
|
|
* via bdrv_reopen_queue().
|
|
|
|
*
|
|
|
|
* Reopens all BDS specified in the queue, with the appropriate
|
|
|
|
* flags. All devices are prepared for reopen, and failure of any
|
|
|
|
* device will cause all device changes to be abandonded, and intermediate
|
|
|
|
* data cleaned up.
|
|
|
|
*
|
|
|
|
* If all devices prepare successfully, then the changes are committed
|
|
|
|
* to all devices.
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp)
|
|
|
|
{
|
|
|
|
int ret = -1;
|
|
|
|
BlockReopenQueueEntry *bs_entry, *next;
|
|
|
|
Error *local_err = NULL;
|
|
|
|
|
|
|
|
assert(bs_queue != NULL);
|
|
|
|
|
|
|
|
bdrv_drain_all();
|
|
|
|
|
|
|
|
QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
|
|
|
|
if (bdrv_reopen_prepare(&bs_entry->state, bs_queue, &local_err)) {
|
|
|
|
error_propagate(errp, local_err);
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
bs_entry->prepared = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* If we reach this point, we have success and just need to apply the
|
|
|
|
* changes
|
|
|
|
*/
|
|
|
|
QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
|
|
|
|
bdrv_reopen_commit(&bs_entry->state);
|
|
|
|
}
|
|
|
|
|
|
|
|
ret = 0;
|
|
|
|
|
|
|
|
cleanup:
|
|
|
|
QSIMPLEQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) {
|
|
|
|
if (ret && bs_entry->prepared) {
|
|
|
|
bdrv_reopen_abort(&bs_entry->state);
|
|
|
|
}
|
|
|
|
g_free(bs_entry);
|
|
|
|
}
|
|
|
|
g_free(bs_queue);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/* Reopen a single BlockDriverState with the specified flags. */
|
|
|
|
int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp)
|
|
|
|
{
|
|
|
|
int ret = -1;
|
|
|
|
Error *local_err = NULL;
|
|
|
|
BlockReopenQueue *queue = bdrv_reopen_queue(NULL, bs, bdrv_flags);
|
|
|
|
|
|
|
|
ret = bdrv_reopen_multiple(queue, &local_err);
|
|
|
|
if (local_err != NULL) {
|
|
|
|
error_propagate(errp, local_err);
|
|
|
|
}
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Prepares a BlockDriverState for reopen. All changes are staged in the
|
|
|
|
* 'opaque' field of the BDRVReopenState, which is used and allocated by
|
|
|
|
* the block driver layer .bdrv_reopen_prepare()
|
|
|
|
*
|
|
|
|
* bs is the BlockDriverState to reopen
|
|
|
|
* flags are the new open flags
|
|
|
|
* queue is the reopen queue
|
|
|
|
*
|
|
|
|
* Returns 0 on success, non-zero on error. On error errp will be set
|
|
|
|
* as well.
|
|
|
|
*
|
|
|
|
* On failure, bdrv_reopen_abort() will be called to clean up any data.
|
|
|
|
* It is the responsibility of the caller to then call the abort() or
|
|
|
|
* commit() for any other BDS that have been left in a prepare() state
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
|
|
|
|
Error **errp)
|
|
|
|
{
|
|
|
|
int ret = -1;
|
|
|
|
Error *local_err = NULL;
|
|
|
|
BlockDriver *drv;
|
|
|
|
|
|
|
|
assert(reopen_state != NULL);
|
|
|
|
assert(reopen_state->bs->drv != NULL);
|
|
|
|
drv = reopen_state->bs->drv;
|
|
|
|
|
|
|
|
/* if we are to stay read-only, do not allow permission change
|
|
|
|
* to r/w */
|
|
|
|
if (!(reopen_state->bs->open_flags & BDRV_O_ALLOW_RDWR) &&
|
|
|
|
reopen_state->flags & BDRV_O_RDWR) {
|
|
|
|
error_set(errp, QERR_DEVICE_IS_READ_ONLY,
|
2014-10-07 15:59:11 +04:00
|
|
|
bdrv_get_device_name(reopen_state->bs));
|
2012-09-20 23:13:19 +04:00
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
ret = bdrv_flush(reopen_state->bs);
|
|
|
|
if (ret) {
|
|
|
|
error_set(errp, ERROR_CLASS_GENERIC_ERROR, "Error (%s) flushing drive",
|
|
|
|
strerror(-ret));
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (drv->bdrv_reopen_prepare) {
|
|
|
|
ret = drv->bdrv_reopen_prepare(reopen_state, queue, &local_err);
|
|
|
|
if (ret) {
|
|
|
|
if (local_err != NULL) {
|
|
|
|
error_propagate(errp, local_err);
|
|
|
|
} else {
|
2013-06-10 19:29:27 +04:00
|
|
|
error_setg(errp, "failed while preparing to reopen image '%s'",
|
|
|
|
reopen_state->bs->filename);
|
2012-09-20 23:13:19 +04:00
|
|
|
}
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
/* It is currently mandatory to have a bdrv_reopen_prepare()
|
|
|
|
* handler for each supported drv. */
|
|
|
|
error_set(errp, QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED,
|
2014-10-07 15:59:11 +04:00
|
|
|
drv->format_name, bdrv_get_device_name(reopen_state->bs),
|
2012-09-20 23:13:19 +04:00
|
|
|
"reopening of file");
|
|
|
|
ret = -1;
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
|
|
|
|
ret = 0;
|
|
|
|
|
|
|
|
error:
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Takes the staged changes for the reopen from bdrv_reopen_prepare(), and
|
|
|
|
* makes them final by swapping the staging BlockDriverState contents into
|
|
|
|
* the active BlockDriverState contents.
|
|
|
|
*/
|
|
|
|
void bdrv_reopen_commit(BDRVReopenState *reopen_state)
|
|
|
|
{
|
|
|
|
BlockDriver *drv;
|
|
|
|
|
|
|
|
assert(reopen_state != NULL);
|
|
|
|
drv = reopen_state->bs->drv;
|
|
|
|
assert(drv != NULL);
|
|
|
|
|
|
|
|
/* If there are any driver level actions to take */
|
|
|
|
if (drv->bdrv_reopen_commit) {
|
|
|
|
drv->bdrv_reopen_commit(reopen_state);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* set BDS specific flags now */
|
|
|
|
reopen_state->bs->open_flags = reopen_state->flags;
|
|
|
|
reopen_state->bs->enable_write_cache = !!(reopen_state->flags &
|
|
|
|
BDRV_O_CACHE_WB);
|
|
|
|
reopen_state->bs->read_only = !(reopen_state->flags & BDRV_O_RDWR);
|
2013-12-11 23:14:09 +04:00
|
|
|
|
2014-07-16 19:48:16 +04:00
|
|
|
bdrv_refresh_limits(reopen_state->bs, NULL);
|
2012-09-20 23:13:19 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Abort the reopen, and delete and free the staged changes in
|
|
|
|
* reopen_state
|
|
|
|
*/
|
|
|
|
void bdrv_reopen_abort(BDRVReopenState *reopen_state)
|
|
|
|
{
|
|
|
|
BlockDriver *drv;
|
|
|
|
|
|
|
|
assert(reopen_state != NULL);
|
|
|
|
drv = reopen_state->bs->drv;
|
|
|
|
assert(drv != NULL);
|
|
|
|
|
|
|
|
if (drv->bdrv_reopen_abort) {
|
|
|
|
drv->bdrv_reopen_abort(reopen_state);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2003-06-30 14:03:06 +04:00
|
|
|
void bdrv_close(BlockDriverState *bs)
|
|
|
|
{
|
2014-06-20 23:57:33 +04:00
|
|
|
BdrvAioNotifier *ban, *ban_next;
|
|
|
|
|
2012-10-19 13:36:48 +04:00
|
|
|
if (bs->job) {
|
|
|
|
block_job_cancel_sync(bs->job);
|
|
|
|
}
|
2013-07-02 17:36:25 +04:00
|
|
|
bdrv_drain_all(); /* complete I/O */
|
|
|
|
bdrv_flush(bs);
|
|
|
|
bdrv_drain_all(); /* in case flush left pending I/O */
|
2012-08-23 13:20:36 +04:00
|
|
|
notifier_list_notify(&bs->close_notifiers, bs);
|
2012-04-11 13:06:37 +04:00
|
|
|
|
2012-10-19 13:36:48 +04:00
|
|
|
if (bs->drv) {
|
2010-04-17 13:49:06 +04:00
|
|
|
if (bs->backing_hd) {
|
2014-05-23 17:29:47 +04:00
|
|
|
BlockDriverState *backing_hd = bs->backing_hd;
|
|
|
|
bdrv_set_backing_hd(bs, NULL);
|
|
|
|
bdrv_unref(backing_hd);
|
2010-04-17 13:49:06 +04:00
|
|
|
}
|
2004-08-02 01:59:26 +04:00
|
|
|
bs->drv->bdrv_close(bs);
|
2011-08-21 07:09:37 +04:00
|
|
|
g_free(bs->opaque);
|
2004-08-02 01:59:26 +04:00
|
|
|
bs->opaque = NULL;
|
|
|
|
bs->drv = NULL;
|
2011-11-28 20:08:47 +04:00
|
|
|
bs->copy_on_read = 0;
|
2012-05-08 18:51:43 +04:00
|
|
|
bs->backing_file[0] = '\0';
|
|
|
|
bs->backing_format[0] = '\0';
|
2012-05-08 18:51:49 +04:00
|
|
|
bs->total_sectors = 0;
|
|
|
|
bs->encrypted = 0;
|
|
|
|
bs->valid_key = 0;
|
|
|
|
bs->sg = 0;
|
2013-08-22 11:24:14 +04:00
|
|
|
bs->zero_beyond_eof = false;
|
2013-03-15 13:35:02 +04:00
|
|
|
QDECREF(bs->options);
|
|
|
|
bs->options = NULL;
|
2014-07-18 22:24:56 +04:00
|
|
|
QDECREF(bs->full_open_options);
|
|
|
|
bs->full_open_options = NULL;
|
2004-03-15 00:38:54 +03:00
|
|
|
|
2010-04-14 16:17:38 +04:00
|
|
|
if (bs->file != NULL) {
|
2013-08-23 05:14:47 +04:00
|
|
|
bdrv_unref(bs->file);
|
2012-05-08 18:51:44 +04:00
|
|
|
bs->file = NULL;
|
2010-04-14 16:17:38 +04:00
|
|
|
}
|
2004-03-15 00:38:54 +03:00
|
|
|
}
|
2011-11-08 09:00:14 +04:00
|
|
|
|
2014-10-07 15:59:25 +04:00
|
|
|
if (bs->blk) {
|
|
|
|
blk_dev_change_media_cb(bs->blk, false);
|
|
|
|
}
|
2012-08-09 14:44:48 +04:00
|
|
|
|
2011-11-08 09:00:14 +04:00
|
|
|
/*throttling disk I/O limits*/
|
|
|
|
if (bs->io_limits_enabled) {
|
|
|
|
bdrv_io_limits_disable(bs);
|
|
|
|
}
|
2014-06-20 23:57:33 +04:00
|
|
|
|
|
|
|
QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
|
|
|
|
g_free(ban);
|
|
|
|
}
|
|
|
|
QLIST_INIT(&bs->aio_notifiers);
|
2004-03-15 00:38:54 +03:00
|
|
|
}
|
|
|
|
|
2010-05-28 06:44:57 +04:00
|
|
|
void bdrv_close_all(void)
|
|
|
|
{
|
|
|
|
BlockDriverState *bs;
|
|
|
|
|
2014-01-24 00:31:32 +04:00
|
|
|
QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
|
2014-05-08 18:34:35 +04:00
|
|
|
AioContext *aio_context = bdrv_get_aio_context(bs);
|
|
|
|
|
|
|
|
aio_context_acquire(aio_context);
|
2010-05-28 06:44:57 +04:00
|
|
|
bdrv_close(bs);
|
2014-05-08 18:34:35 +04:00
|
|
|
aio_context_release(aio_context);
|
2010-05-28 06:44:57 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-04-11 17:41:13 +04:00
|
|
|
/* Check if any requests are in-flight (including throttled requests) */
|
|
|
|
static bool bdrv_requests_pending(BlockDriverState *bs)
|
|
|
|
{
|
|
|
|
if (!QLIST_EMPTY(&bs->tracked_requests)) {
|
|
|
|
return true;
|
|
|
|
}
|
2013-09-02 16:14:39 +04:00
|
|
|
if (!qemu_co_queue_empty(&bs->throttled_reqs[0])) {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
if (!qemu_co_queue_empty(&bs->throttled_reqs[1])) {
|
2013-04-11 17:41:13 +04:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
if (bs->file && bdrv_requests_pending(bs->file)) {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
if (bs->backing_hd && bdrv_requests_pending(bs->backing_hd)) {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2014-10-21 15:03:55 +04:00
|
|
|
static bool bdrv_drain_one(BlockDriverState *bs)
|
|
|
|
{
|
|
|
|
bool bs_busy;
|
|
|
|
|
|
|
|
bdrv_flush_io_queue(bs);
|
|
|
|
bdrv_start_throttled_reqs(bs);
|
|
|
|
bs_busy = bdrv_requests_pending(bs);
|
|
|
|
bs_busy |= aio_poll(bdrv_get_aio_context(bs), bs_busy);
|
|
|
|
return bs_busy;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Wait for pending requests to complete on a single BlockDriverState subtree
|
|
|
|
*
|
|
|
|
* See the warning in bdrv_drain_all(). This function can only be called if
|
|
|
|
* you are sure nothing can generate I/O because you have op blockers
|
|
|
|
* installed.
|
|
|
|
*
|
|
|
|
* Note that unlike bdrv_drain_all(), the caller must hold the BlockDriverState
|
|
|
|
* AioContext.
|
|
|
|
*/
|
|
|
|
void bdrv_drain(BlockDriverState *bs)
|
|
|
|
{
|
|
|
|
while (bdrv_drain_one(bs)) {
|
|
|
|
/* Keep iterating */
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2011-11-30 16:23:43 +04:00
|
|
|
/*
|
|
|
|
* Wait for pending requests to complete across all BlockDriverStates
|
|
|
|
*
|
|
|
|
* This function does not flush data to disk, use bdrv_flush_all() for that
|
|
|
|
* after calling this function.
|
2012-04-12 16:00:57 +04:00
|
|
|
*
|
|
|
|
* Note that completion of an asynchronous I/O operation can trigger any
|
|
|
|
* number of other I/O operations on other devices---for example a coroutine
|
|
|
|
* can be arbitrarily complex and a constant flow of I/O can come until the
|
|
|
|
* coroutine is complete. Because of this, it is not possible to have a
|
|
|
|
* function to drain a single device's I/O queue.
|
2011-11-30 16:23:43 +04:00
|
|
|
*/
|
|
|
|
void bdrv_drain_all(void)
|
|
|
|
{
|
2013-04-11 17:41:13 +04:00
|
|
|
/* Always run first iteration so any pending completion BHs run */
|
|
|
|
bool busy = true;
|
2011-11-30 16:23:43 +04:00
|
|
|
BlockDriverState *bs;
|
|
|
|
|
2013-04-11 17:41:13 +04:00
|
|
|
while (busy) {
|
2014-05-08 18:34:36 +04:00
|
|
|
busy = false;
|
|
|
|
|
2014-01-24 00:31:32 +04:00
|
|
|
QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
|
2014-05-08 18:34:36 +04:00
|
|
|
AioContext *aio_context = bdrv_get_aio_context(bs);
|
|
|
|
|
|
|
|
aio_context_acquire(aio_context);
|
2014-10-21 15:03:55 +04:00
|
|
|
busy |= bdrv_drain_one(bs);
|
2014-05-08 18:34:36 +04:00
|
|
|
aio_context_release(aio_context);
|
|
|
|
}
|
2011-11-30 16:23:43 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-01-24 00:31:32 +04:00
|
|
|
/* make a BlockDriverState anonymous by removing from bdrv_state and
|
|
|
|
* graph_bdrv_state list.
|
Do not delete BlockDriverState when deleting the drive
When removing a drive from the host-side via drive_del we currently have
the following path:
drive_del
qemu_aio_flush()
bdrv_close() // zaps bs->drv, which makes any subsequent I/O get
// dropped. Works as designed
drive_uninit()
bdrv_delete() // frees the bs. Since the device is still connected to
// bs, any subsequent I/O is a use-after-free.
The value of bs->drv becomes unpredictable on free. As long as it
remains null, I/O still gets dropped, however it could become non-null
at any point after the free resulting SEGVs or other QEMU state
corruption.
To resolve this issue as simply as possible, we can chose to not
actually delete the BlockDriverState pointer. Since bdrv_close()
handles setting the drv pointer to NULL, we just need to remove the
BlockDriverState from the QLIST that is used to enumerate the block
devices. This is currently handled within bdrv_delete, so move this
into its own function, bdrv_make_anon().
The result is that we can now invoke drive_del, this closes the file
descriptors and sets BlockDriverState->drv to NULL which prevents futher
IO to the device, and since we do not free BlockDriverState, we don't
have to worry about the copy retained in the block devices.
We also don't attempt to remove the qdev property since we are no longer
deleting the BlockDriverState on drives with associated drives. This
also allows for removing Drives with no devices associated either.
Reported-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Ryan Harper <ryanh@us.ibm.com>
Acked-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2011-03-30 05:51:47 +04:00
|
|
|
Also, NULL terminate the device_name to prevent double remove */
|
|
|
|
void bdrv_make_anon(BlockDriverState *bs)
|
|
|
|
{
|
2014-10-07 15:59:11 +04:00
|
|
|
/*
|
|
|
|
* Take care to remove bs from bdrv_states only when it's actually
|
|
|
|
* in it. Note that bs->device_list.tqe_prev is initially null,
|
|
|
|
* and gets set to non-null by QTAILQ_INSERT_TAIL(). Establish
|
|
|
|
* the useful invariant "bs in bdrv_states iff bs->tqe_prev" by
|
|
|
|
* resetting it to null on remove.
|
|
|
|
*/
|
|
|
|
if (bs->device_list.tqe_prev) {
|
2014-01-24 00:31:32 +04:00
|
|
|
QTAILQ_REMOVE(&bdrv_states, bs, device_list);
|
2014-10-07 15:59:11 +04:00
|
|
|
bs->device_list.tqe_prev = NULL;
|
Do not delete BlockDriverState when deleting the drive
When removing a drive from the host-side via drive_del we currently have
the following path:
drive_del
qemu_aio_flush()
bdrv_close() // zaps bs->drv, which makes any subsequent I/O get
// dropped. Works as designed
drive_uninit()
bdrv_delete() // frees the bs. Since the device is still connected to
// bs, any subsequent I/O is a use-after-free.
The value of bs->drv becomes unpredictable on free. As long as it
remains null, I/O still gets dropped, however it could become non-null
at any point after the free resulting SEGVs or other QEMU state
corruption.
To resolve this issue as simply as possible, we can chose to not
actually delete the BlockDriverState pointer. Since bdrv_close()
handles setting the drv pointer to NULL, we just need to remove the
BlockDriverState from the QLIST that is used to enumerate the block
devices. This is currently handled within bdrv_delete, so move this
into its own function, bdrv_make_anon().
The result is that we can now invoke drive_del, this closes the file
descriptors and sets BlockDriverState->drv to NULL which prevents futher
IO to the device, and since we do not free BlockDriverState, we don't
have to worry about the copy retained in the block devices.
We also don't attempt to remove the qdev property since we are no longer
deleting the BlockDriverState on drives with associated drives. This
also allows for removing Drives with no devices associated either.
Reported-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Ryan Harper <ryanh@us.ibm.com>
Acked-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2011-03-30 05:51:47 +04:00
|
|
|
}
|
2014-01-24 00:31:32 +04:00
|
|
|
if (bs->node_name[0] != '\0') {
|
|
|
|
QTAILQ_REMOVE(&graph_bdrv_states, bs, node_list);
|
|
|
|
}
|
|
|
|
bs->node_name[0] = '\0';
|
Do not delete BlockDriverState when deleting the drive
When removing a drive from the host-side via drive_del we currently have
the following path:
drive_del
qemu_aio_flush()
bdrv_close() // zaps bs->drv, which makes any subsequent I/O get
// dropped. Works as designed
drive_uninit()
bdrv_delete() // frees the bs. Since the device is still connected to
// bs, any subsequent I/O is a use-after-free.
The value of bs->drv becomes unpredictable on free. As long as it
remains null, I/O still gets dropped, however it could become non-null
at any point after the free resulting SEGVs or other QEMU state
corruption.
To resolve this issue as simply as possible, we can chose to not
actually delete the BlockDriverState pointer. Since bdrv_close()
handles setting the drv pointer to NULL, we just need to remove the
BlockDriverState from the QLIST that is used to enumerate the block
devices. This is currently handled within bdrv_delete, so move this
into its own function, bdrv_make_anon().
The result is that we can now invoke drive_del, this closes the file
descriptors and sets BlockDriverState->drv to NULL which prevents futher
IO to the device, and since we do not free BlockDriverState, we don't
have to worry about the copy retained in the block devices.
We also don't attempt to remove the qdev property since we are no longer
deleting the BlockDriverState on drives with associated drives. This
also allows for removing Drives with no devices associated either.
Reported-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Ryan Harper <ryanh@us.ibm.com>
Acked-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2011-03-30 05:51:47 +04:00
|
|
|
}
|
|
|
|
|
2012-05-08 18:51:41 +04:00
|
|
|
static void bdrv_rebind(BlockDriverState *bs)
|
|
|
|
{
|
|
|
|
if (bs->drv && bs->drv->bdrv_rebind) {
|
|
|
|
bs->drv->bdrv_rebind(bs);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2012-06-14 18:55:02 +04:00
|
|
|
static void bdrv_move_feature_fields(BlockDriverState *bs_dest,
|
|
|
|
BlockDriverState *bs_src)
|
2012-02-29 00:54:06 +04:00
|
|
|
{
|
2012-06-14 18:55:02 +04:00
|
|
|
/* move some fields that need to stay attached to the device */
|
2012-02-29 00:54:06 +04:00
|
|
|
|
|
|
|
/* dev info */
|
2011-11-29 14:35:47 +04:00
|
|
|
bs_dest->guest_block_size = bs_src->guest_block_size;
|
2012-06-14 18:55:02 +04:00
|
|
|
bs_dest->copy_on_read = bs_src->copy_on_read;
|
2012-02-29 00:54:06 +04:00
|
|
|
|
2012-06-14 18:55:02 +04:00
|
|
|
bs_dest->enable_write_cache = bs_src->enable_write_cache;
|
2012-06-06 02:04:51 +04:00
|
|
|
|
2013-09-02 16:14:39 +04:00
|
|
|
/* i/o throttled req */
|
|
|
|
memcpy(&bs_dest->throttle_state,
|
|
|
|
&bs_src->throttle_state,
|
|
|
|
sizeof(ThrottleState));
|
|
|
|
bs_dest->throttled_reqs[0] = bs_src->throttled_reqs[0];
|
|
|
|
bs_dest->throttled_reqs[1] = bs_src->throttled_reqs[1];
|
2012-06-14 18:55:02 +04:00
|
|
|
bs_dest->io_limits_enabled = bs_src->io_limits_enabled;
|
2012-02-29 00:54:06 +04:00
|
|
|
|
|
|
|
/* r/w error */
|
2012-06-14 18:55:02 +04:00
|
|
|
bs_dest->on_read_error = bs_src->on_read_error;
|
|
|
|
bs_dest->on_write_error = bs_src->on_write_error;
|
2012-02-29 00:54:06 +04:00
|
|
|
|
|
|
|
/* i/o status */
|
2012-06-14 18:55:02 +04:00
|
|
|
bs_dest->iostatus_enabled = bs_src->iostatus_enabled;
|
|
|
|
bs_dest->iostatus = bs_src->iostatus;
|
2012-02-29 00:54:06 +04:00
|
|
|
|
2012-06-14 18:55:01 +04:00
|
|
|
/* dirty bitmap */
|
2013-11-13 14:29:43 +04:00
|
|
|
bs_dest->dirty_bitmaps = bs_src->dirty_bitmaps;
|
2012-06-14 18:55:01 +04:00
|
|
|
|
2013-08-23 05:14:46 +04:00
|
|
|
/* reference count */
|
|
|
|
bs_dest->refcnt = bs_src->refcnt;
|
|
|
|
|
2012-06-14 18:55:01 +04:00
|
|
|
/* job */
|
2012-06-14 18:55:02 +04:00
|
|
|
bs_dest->job = bs_src->job;
|
2012-06-14 18:55:01 +04:00
|
|
|
|
2012-02-29 00:54:06 +04:00
|
|
|
/* keep the same entry in bdrv_states */
|
2014-01-24 00:31:32 +04:00
|
|
|
bs_dest->device_list = bs_src->device_list;
|
2014-10-07 15:59:05 +04:00
|
|
|
bs_dest->blk = bs_src->blk;
|
|
|
|
|
2014-05-23 17:29:42 +04:00
|
|
|
memcpy(bs_dest->op_blockers, bs_src->op_blockers,
|
|
|
|
sizeof(bs_dest->op_blockers));
|
2012-06-14 18:55:02 +04:00
|
|
|
}
|
2012-02-29 00:54:06 +04:00
|
|
|
|
2012-06-14 18:55:02 +04:00
|
|
|
/*
|
|
|
|
* Swap bs contents for two image chains while they are live,
|
|
|
|
* while keeping required fields on the BlockDriverState that is
|
|
|
|
* actually attached to a device.
|
|
|
|
*
|
|
|
|
* This will modify the BlockDriverState fields, and swap contents
|
|
|
|
* between bs_new and bs_old. Both bs_new and bs_old are modified.
|
|
|
|
*
|
2014-10-07 15:59:11 +04:00
|
|
|
* bs_new must not be attached to a BlockBackend.
|
2012-06-14 18:55:02 +04:00
|
|
|
*
|
|
|
|
* This function does not create any image files.
|
|
|
|
*/
|
|
|
|
void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old)
|
|
|
|
{
|
|
|
|
BlockDriverState tmp;
|
2012-03-28 00:30:19 +04:00
|
|
|
|
2014-03-06 02:48:29 +04:00
|
|
|
/* The code needs to swap the node_name but simply swapping node_list won't
|
|
|
|
* work so first remove the nodes from the graph list, do the swap then
|
|
|
|
* insert them back if needed.
|
|
|
|
*/
|
|
|
|
if (bs_new->node_name[0] != '\0') {
|
|
|
|
QTAILQ_REMOVE(&graph_bdrv_states, bs_new, node_list);
|
|
|
|
}
|
|
|
|
if (bs_old->node_name[0] != '\0') {
|
|
|
|
QTAILQ_REMOVE(&graph_bdrv_states, bs_old, node_list);
|
|
|
|
}
|
|
|
|
|
2014-10-07 15:59:11 +04:00
|
|
|
/* bs_new must be unattached and shouldn't have anything fancy enabled */
|
2014-10-07 15:59:05 +04:00
|
|
|
assert(!bs_new->blk);
|
2013-11-13 14:29:43 +04:00
|
|
|
assert(QLIST_EMPTY(&bs_new->dirty_bitmaps));
|
2012-06-14 18:55:02 +04:00
|
|
|
assert(bs_new->job == NULL);
|
|
|
|
assert(bs_new->io_limits_enabled == false);
|
2013-09-02 16:14:39 +04:00
|
|
|
assert(!throttle_have_timer(&bs_new->throttle_state));
|
2012-02-29 00:54:06 +04:00
|
|
|
|
2012-06-14 18:55:02 +04:00
|
|
|
tmp = *bs_new;
|
|
|
|
*bs_new = *bs_old;
|
|
|
|
*bs_old = tmp;
|
2012-06-14 18:55:01 +04:00
|
|
|
|
2012-06-14 18:55:02 +04:00
|
|
|
/* there are some fields that should not be swapped, move them back */
|
|
|
|
bdrv_move_feature_fields(&tmp, bs_old);
|
|
|
|
bdrv_move_feature_fields(bs_old, bs_new);
|
|
|
|
bdrv_move_feature_fields(bs_new, &tmp);
|
2012-02-29 00:54:06 +04:00
|
|
|
|
2014-10-07 15:59:11 +04:00
|
|
|
/* bs_new must remain unattached */
|
2014-10-07 15:59:05 +04:00
|
|
|
assert(!bs_new->blk);
|
2012-06-14 18:55:02 +04:00
|
|
|
|
|
|
|
/* Check a few fields that should remain attached to the device */
|
|
|
|
assert(bs_new->job == NULL);
|
|
|
|
assert(bs_new->io_limits_enabled == false);
|
2013-09-02 16:14:39 +04:00
|
|
|
assert(!throttle_have_timer(&bs_new->throttle_state));
|
2012-05-08 18:51:41 +04:00
|
|
|
|
2014-03-06 02:48:29 +04:00
|
|
|
/* insert the nodes back into the graph node list if needed */
|
|
|
|
if (bs_new->node_name[0] != '\0') {
|
|
|
|
QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs_new, node_list);
|
|
|
|
}
|
|
|
|
if (bs_old->node_name[0] != '\0') {
|
|
|
|
QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs_old, node_list);
|
|
|
|
}
|
|
|
|
|
2012-05-08 18:51:41 +04:00
|
|
|
bdrv_rebind(bs_new);
|
2012-06-14 18:55:02 +04:00
|
|
|
bdrv_rebind(bs_old);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Add new bs contents at the top of an image chain while the chain is
|
|
|
|
* live, while keeping required fields on the top layer.
|
|
|
|
*
|
|
|
|
* This will modify the BlockDriverState fields, and swap contents
|
|
|
|
* between bs_new and bs_top. Both bs_new and bs_top are modified.
|
|
|
|
*
|
2014-10-07 15:59:11 +04:00
|
|
|
* bs_new must not be attached to a BlockBackend.
|
2012-06-14 18:55:02 +04:00
|
|
|
*
|
|
|
|
* This function does not create any image files.
|
|
|
|
*/
|
|
|
|
void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top)
|
|
|
|
{
|
|
|
|
bdrv_swap(bs_new, bs_top);
|
|
|
|
|
|
|
|
/* The contents of 'tmp' will become bs_top, as we are
|
|
|
|
* swapping bs_new and bs_top contents. */
|
2014-05-23 17:29:45 +04:00
|
|
|
bdrv_set_backing_hd(bs_top, bs_new);
|
2012-02-29 00:54:06 +04:00
|
|
|
}
|
|
|
|
|
2013-08-23 05:14:47 +04:00
|
|
|
static void bdrv_delete(BlockDriverState *bs)
|
2004-03-15 00:38:54 +03:00
|
|
|
{
|
2012-03-30 15:17:11 +04:00
|
|
|
assert(!bs->job);
|
2014-05-23 17:29:43 +04:00
|
|
|
assert(bdrv_op_blocker_is_empty(bs));
|
2013-08-23 05:14:47 +04:00
|
|
|
assert(!bs->refcnt);
|
2013-11-13 14:29:43 +04:00
|
|
|
assert(QLIST_EMPTY(&bs->dirty_bitmaps));
|
2010-06-29 18:58:30 +04:00
|
|
|
|
2013-06-27 17:32:26 +04:00
|
|
|
bdrv_close(bs);
|
|
|
|
|
2010-04-10 10:02:42 +04:00
|
|
|
/* remove from list, if necessary */
|
Do not delete BlockDriverState when deleting the drive
When removing a drive from the host-side via drive_del we currently have
the following path:
drive_del
qemu_aio_flush()
bdrv_close() // zaps bs->drv, which makes any subsequent I/O get
// dropped. Works as designed
drive_uninit()
bdrv_delete() // frees the bs. Since the device is still connected to
// bs, any subsequent I/O is a use-after-free.
The value of bs->drv becomes unpredictable on free. As long as it
remains null, I/O still gets dropped, however it could become non-null
at any point after the free resulting SEGVs or other QEMU state
corruption.
To resolve this issue as simply as possible, we can chose to not
actually delete the BlockDriverState pointer. Since bdrv_close()
handles setting the drv pointer to NULL, we just need to remove the
BlockDriverState from the QLIST that is used to enumerate the block
devices. This is currently handled within bdrv_delete, so move this
into its own function, bdrv_make_anon().
The result is that we can now invoke drive_del, this closes the file
descriptors and sets BlockDriverState->drv to NULL which prevents futher
IO to the device, and since we do not free BlockDriverState, we don't
have to worry about the copy retained in the block devices.
We also don't attempt to remove the qdev property since we are no longer
deleting the BlockDriverState on drives with associated drives. This
also allows for removing Drives with no devices associated either.
Reported-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Ryan Harper <ryanh@us.ibm.com>
Acked-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2011-03-30 05:51:47 +04:00
|
|
|
bdrv_make_anon(bs);
|
2008-04-08 23:51:21 +04:00
|
|
|
|
2011-08-21 07:09:37 +04:00
|
|
|
g_free(bs);
|
2003-06-30 14:03:06 +04:00
|
|
|
}
|
|
|
|
|
2009-04-22 03:11:50 +04:00
|
|
|
/*
|
|
|
|
* Run consistency checks on an image
|
|
|
|
*
|
2010-06-29 13:43:13 +04:00
|
|
|
* Returns 0 if the check could be completed (it doesn't mean that the image is
|
2011-04-28 19:20:38 +04:00
|
|
|
* free of errors) or -errno when an internal error occurred. The results of the
|
2010-06-29 13:43:13 +04:00
|
|
|
* check are stored in res.
|
2009-04-22 03:11:50 +04:00
|
|
|
*/
|
2012-05-11 18:07:02 +04:00
|
|
|
int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix)
|
2009-04-22 03:11:50 +04:00
|
|
|
{
|
2014-08-08 00:47:55 +04:00
|
|
|
if (bs->drv == NULL) {
|
|
|
|
return -ENOMEDIUM;
|
|
|
|
}
|
2009-04-22 03:11:50 +04:00
|
|
|
if (bs->drv->bdrv_check == NULL) {
|
|
|
|
return -ENOTSUP;
|
|
|
|
}
|
|
|
|
|
2010-06-29 13:43:13 +04:00
|
|
|
memset(res, 0, sizeof(*res));
|
2012-05-11 18:07:02 +04:00
|
|
|
return bs->drv->bdrv_check(bs, res, fix);
|
2009-04-22 03:11:50 +04:00
|
|
|
}
|
|
|
|
|
2010-07-16 19:17:01 +04:00
|
|
|
#define COMMIT_BUF_SECTORS 2048
|
|
|
|
|
2003-07-06 21:15:21 +04:00
|
|
|
/* commit COW file into the raw image */
|
|
|
|
int bdrv_commit(BlockDriverState *bs)
|
|
|
|
{
|
2006-08-19 15:45:59 +04:00
|
|
|
BlockDriver *drv = bs->drv;
|
2014-01-24 18:02:35 +04:00
|
|
|
int64_t sector, total_sectors, length, backing_length;
|
2010-07-16 19:17:01 +04:00
|
|
|
int n, ro, open_flags;
|
2012-09-20 23:13:34 +04:00
|
|
|
int ret = 0;
|
2014-01-24 18:02:35 +04:00
|
|
|
uint8_t *buf = NULL;
|
2003-07-06 21:15:21 +04:00
|
|
|
|
2006-08-19 15:45:59 +04:00
|
|
|
if (!drv)
|
|
|
|
return -ENOMEDIUM;
|
2014-09-01 09:35:21 +04:00
|
|
|
|
2010-02-14 14:39:18 +03:00
|
|
|
if (!bs->backing_hd) {
|
|
|
|
return -ENOTSUP;
|
2003-07-06 21:15:21 +04:00
|
|
|
}
|
|
|
|
|
2014-09-11 09:14:00 +04:00
|
|
|
if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT_SOURCE, NULL) ||
|
|
|
|
bdrv_op_is_blocked(bs->backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET, NULL)) {
|
2012-01-18 18:40:41 +04:00
|
|
|
return -EBUSY;
|
|
|
|
}
|
|
|
|
|
2010-02-14 14:39:18 +03:00
|
|
|
ro = bs->backing_hd->read_only;
|
|
|
|
open_flags = bs->backing_hd->open_flags;
|
|
|
|
|
|
|
|
if (ro) {
|
2012-09-20 23:13:34 +04:00
|
|
|
if (bdrv_reopen(bs->backing_hd, open_flags | BDRV_O_RDWR, NULL)) {
|
|
|
|
return -EACCES;
|
2010-02-14 14:39:18 +03:00
|
|
|
}
|
2004-08-02 01:59:26 +04:00
|
|
|
}
|
2003-07-06 21:15:21 +04:00
|
|
|
|
2014-01-24 18:02:35 +04:00
|
|
|
length = bdrv_getlength(bs);
|
|
|
|
if (length < 0) {
|
|
|
|
ret = length;
|
|
|
|
goto ro_cleanup;
|
|
|
|
}
|
|
|
|
|
|
|
|
backing_length = bdrv_getlength(bs->backing_hd);
|
|
|
|
if (backing_length < 0) {
|
|
|
|
ret = backing_length;
|
|
|
|
goto ro_cleanup;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* If our top snapshot is larger than the backing file image,
|
|
|
|
* grow the backing file image if possible. If not possible,
|
|
|
|
* we must return an error */
|
|
|
|
if (length > backing_length) {
|
|
|
|
ret = bdrv_truncate(bs->backing_hd, length);
|
|
|
|
if (ret < 0) {
|
|
|
|
goto ro_cleanup;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
total_sectors = length >> BDRV_SECTOR_BITS;
|
2014-05-20 15:16:51 +04:00
|
|
|
|
|
|
|
/* qemu_try_blockalign() for bs will choose an alignment that works for
|
|
|
|
* bs->backing_hd as well, so no need to compare the alignment manually. */
|
|
|
|
buf = qemu_try_blockalign(bs, COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
|
|
|
|
if (buf == NULL) {
|
|
|
|
ret = -ENOMEM;
|
|
|
|
goto ro_cleanup;
|
|
|
|
}
|
2010-07-16 19:17:01 +04:00
|
|
|
|
|
|
|
for (sector = 0; sector < total_sectors; sector += n) {
|
2013-09-04 21:00:25 +04:00
|
|
|
ret = bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n);
|
|
|
|
if (ret < 0) {
|
|
|
|
goto ro_cleanup;
|
|
|
|
}
|
|
|
|
if (ret) {
|
2014-01-24 17:00:43 +04:00
|
|
|
ret = bdrv_read(bs, sector, buf, n);
|
|
|
|
if (ret < 0) {
|
2010-07-16 19:17:01 +04:00
|
|
|
goto ro_cleanup;
|
|
|
|
}
|
|
|
|
|
2014-01-24 17:00:43 +04:00
|
|
|
ret = bdrv_write(bs->backing_hd, sector, buf, n);
|
|
|
|
if (ret < 0) {
|
2010-07-16 19:17:01 +04:00
|
|
|
goto ro_cleanup;
|
|
|
|
}
|
2004-08-02 01:59:26 +04:00
|
|
|
}
|
2003-07-06 21:15:21 +04:00
|
|
|
}
|
2005-12-18 21:28:15 +03:00
|
|
|
|
2010-01-17 14:32:30 +03:00
|
|
|
if (drv->bdrv_make_empty) {
|
|
|
|
ret = drv->bdrv_make_empty(bs);
|
2014-01-24 17:00:43 +04:00
|
|
|
if (ret < 0) {
|
|
|
|
goto ro_cleanup;
|
|
|
|
}
|
2010-01-17 14:32:30 +03:00
|
|
|
bdrv_flush(bs);
|
|
|
|
}
|
2005-12-18 21:28:15 +03:00
|
|
|
|
2010-01-12 15:49:23 +03:00
|
|
|
/*
|
|
|
|
* Make sure all data we wrote to the backing device is actually
|
|
|
|
* stable on disk.
|
|
|
|
*/
|
2014-01-24 17:00:43 +04:00
|
|
|
if (bs->backing_hd) {
|
2010-01-12 15:49:23 +03:00
|
|
|
bdrv_flush(bs->backing_hd);
|
2014-01-24 17:00:43 +04:00
|
|
|
}
|
2010-02-14 14:39:18 +03:00
|
|
|
|
2014-01-24 17:00:43 +04:00
|
|
|
ret = 0;
|
2010-02-14 14:39:18 +03:00
|
|
|
ro_cleanup:
|
2014-05-20 15:16:51 +04:00
|
|
|
qemu_vfree(buf);
|
2010-02-14 14:39:18 +03:00
|
|
|
|
|
|
|
if (ro) {
|
2012-09-20 23:13:34 +04:00
|
|
|
/* ignoring error return here */
|
|
|
|
bdrv_reopen(bs->backing_hd, open_flags & ~BDRV_O_RDWR, NULL);
|
2010-02-14 14:39:18 +03:00
|
|
|
}
|
|
|
|
|
2010-01-17 14:32:30 +03:00
|
|
|
return ret;
|
2003-07-06 21:15:21 +04:00
|
|
|
}
|
|
|
|
|
2012-03-05 22:10:11 +04:00
|
|
|
int bdrv_commit_all(void)
|
2010-06-02 20:55:18 +04:00
|
|
|
{
|
|
|
|
BlockDriverState *bs;
|
|
|
|
|
2014-01-24 00:31:32 +04:00
|
|
|
QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
|
2014-05-08 18:34:35 +04:00
|
|
|
AioContext *aio_context = bdrv_get_aio_context(bs);
|
|
|
|
|
|
|
|
aio_context_acquire(aio_context);
|
2013-02-26 18:55:48 +04:00
|
|
|
if (bs->drv && bs->backing_hd) {
|
|
|
|
int ret = bdrv_commit(bs);
|
|
|
|
if (ret < 0) {
|
2014-05-08 18:34:35 +04:00
|
|
|
aio_context_release(aio_context);
|
2013-02-26 18:55:48 +04:00
|
|
|
return ret;
|
|
|
|
}
|
2012-03-05 22:10:11 +04:00
|
|
|
}
|
2014-05-08 18:34:35 +04:00
|
|
|
aio_context_release(aio_context);
|
2010-06-02 20:55:18 +04:00
|
|
|
}
|
2012-03-05 22:10:11 +04:00
|
|
|
return 0;
|
2010-06-02 20:55:18 +04:00
|
|
|
}
|
|
|
|
|
2011-11-17 17:40:27 +04:00
|
|
|
/**
|
|
|
|
* Remove an active request from the tracked requests list
|
|
|
|
*
|
|
|
|
* This function should be called when a tracked request is completing.
|
|
|
|
*/
|
|
|
|
static void tracked_request_end(BdrvTrackedRequest *req)
|
|
|
|
{
|
2013-12-04 19:43:44 +04:00
|
|
|
if (req->serialising) {
|
|
|
|
req->bs->serialising_in_flight--;
|
|
|
|
}
|
|
|
|
|
2011-11-17 17:40:27 +04:00
|
|
|
QLIST_REMOVE(req, list);
|
2011-11-17 17:40:29 +04:00
|
|
|
qemu_co_queue_restart_all(&req->wait_queue);
|
2011-11-17 17:40:27 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Add an active request to the tracked requests list
|
|
|
|
*/
|
|
|
|
static void tracked_request_begin(BdrvTrackedRequest *req,
|
|
|
|
BlockDriverState *bs,
|
2013-12-03 18:31:25 +04:00
|
|
|
int64_t offset,
|
|
|
|
unsigned int bytes, bool is_write)
|
2011-11-17 17:40:27 +04:00
|
|
|
{
|
|
|
|
*req = (BdrvTrackedRequest){
|
|
|
|
.bs = bs,
|
2013-12-04 19:43:44 +04:00
|
|
|
.offset = offset,
|
|
|
|
.bytes = bytes,
|
|
|
|
.is_write = is_write,
|
|
|
|
.co = qemu_coroutine_self(),
|
|
|
|
.serialising = false,
|
2013-12-04 20:08:50 +04:00
|
|
|
.overlap_offset = offset,
|
|
|
|
.overlap_bytes = bytes,
|
2011-11-17 17:40:27 +04:00
|
|
|
};
|
|
|
|
|
2011-11-17 17:40:29 +04:00
|
|
|
qemu_co_queue_init(&req->wait_queue);
|
|
|
|
|
2011-11-17 17:40:27 +04:00
|
|
|
QLIST_INSERT_HEAD(&bs->tracked_requests, req, list);
|
|
|
|
}
|
|
|
|
|
2014-02-08 13:42:18 +04:00
|
|
|
static void mark_request_serialising(BdrvTrackedRequest *req, uint64_t align)
|
2013-12-04 19:43:44 +04:00
|
|
|
{
|
2013-12-04 20:08:50 +04:00
|
|
|
int64_t overlap_offset = req->offset & ~(align - 1);
|
2014-02-08 13:42:18 +04:00
|
|
|
unsigned int overlap_bytes = ROUND_UP(req->offset + req->bytes, align)
|
|
|
|
- overlap_offset;
|
2013-12-04 20:08:50 +04:00
|
|
|
|
2013-12-04 19:43:44 +04:00
|
|
|
if (!req->serialising) {
|
|
|
|
req->bs->serialising_in_flight++;
|
|
|
|
req->serialising = true;
|
|
|
|
}
|
2013-12-04 20:08:50 +04:00
|
|
|
|
|
|
|
req->overlap_offset = MIN(req->overlap_offset, overlap_offset);
|
|
|
|
req->overlap_bytes = MAX(req->overlap_bytes, overlap_bytes);
|
2013-12-04 19:43:44 +04:00
|
|
|
}
|
|
|
|
|
2011-11-23 15:47:56 +04:00
|
|
|
/**
|
|
|
|
* Round a region to cluster boundaries
|
|
|
|
*/
|
2013-01-21 20:09:42 +04:00
|
|
|
void bdrv_round_to_clusters(BlockDriverState *bs,
|
|
|
|
int64_t sector_num, int nb_sectors,
|
|
|
|
int64_t *cluster_sector_num,
|
|
|
|
int *cluster_nb_sectors)
|
2011-11-23 15:47:56 +04:00
|
|
|
{
|
|
|
|
BlockDriverInfo bdi;
|
|
|
|
|
|
|
|
if (bdrv_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) {
|
|
|
|
*cluster_sector_num = sector_num;
|
|
|
|
*cluster_nb_sectors = nb_sectors;
|
|
|
|
} else {
|
|
|
|
int64_t c = bdi.cluster_size / BDRV_SECTOR_SIZE;
|
|
|
|
*cluster_sector_num = QEMU_ALIGN_DOWN(sector_num, c);
|
|
|
|
*cluster_nb_sectors = QEMU_ALIGN_UP(sector_num - *cluster_sector_num +
|
|
|
|
nb_sectors, c);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-12-04 20:08:50 +04:00
|
|
|
static int bdrv_get_cluster_size(BlockDriverState *bs)
|
2013-12-03 18:31:25 +04:00
|
|
|
{
|
|
|
|
BlockDriverInfo bdi;
|
2013-12-04 20:08:50 +04:00
|
|
|
int ret;
|
2013-12-03 18:31:25 +04:00
|
|
|
|
2013-12-04 20:08:50 +04:00
|
|
|
ret = bdrv_get_info(bs, &bdi);
|
|
|
|
if (ret < 0 || bdi.cluster_size == 0) {
|
|
|
|
return bs->request_alignment;
|
2013-12-03 18:31:25 +04:00
|
|
|
} else {
|
2013-12-04 20:08:50 +04:00
|
|
|
return bdi.cluster_size;
|
2013-12-03 18:31:25 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2011-11-17 17:40:29 +04:00
|
|
|
static bool tracked_request_overlaps(BdrvTrackedRequest *req,
|
2013-12-03 18:31:25 +04:00
|
|
|
int64_t offset, unsigned int bytes)
|
|
|
|
{
|
2011-11-23 15:47:56 +04:00
|
|
|
/* aaaa bbbb */
|
2013-12-04 20:08:50 +04:00
|
|
|
if (offset >= req->overlap_offset + req->overlap_bytes) {
|
2011-11-23 15:47:56 +04:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
/* bbbb aaaa */
|
2013-12-04 20:08:50 +04:00
|
|
|
if (req->overlap_offset >= offset + bytes) {
|
2011-11-23 15:47:56 +04:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
return true;
|
2011-11-17 17:40:29 +04:00
|
|
|
}
|
|
|
|
|
2014-01-14 14:41:35 +04:00
|
|
|
static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self)
|
2011-11-17 17:40:29 +04:00
|
|
|
{
|
2013-12-04 19:43:44 +04:00
|
|
|
BlockDriverState *bs = self->bs;
|
2011-11-17 17:40:29 +04:00
|
|
|
BdrvTrackedRequest *req;
|
|
|
|
bool retry;
|
2014-01-14 14:41:35 +04:00
|
|
|
bool waited = false;
|
2011-11-17 17:40:29 +04:00
|
|
|
|
2013-12-04 19:43:44 +04:00
|
|
|
if (!bs->serialising_in_flight) {
|
2014-01-14 14:41:35 +04:00
|
|
|
return false;
|
2013-12-04 19:43:44 +04:00
|
|
|
}
|
|
|
|
|
2011-11-17 17:40:29 +04:00
|
|
|
do {
|
|
|
|
retry = false;
|
|
|
|
QLIST_FOREACH(req, &bs->tracked_requests, list) {
|
2013-12-04 19:43:44 +04:00
|
|
|
if (req == self || (!req->serialising && !self->serialising)) {
|
2013-12-03 17:55:55 +04:00
|
|
|
continue;
|
|
|
|
}
|
2013-12-04 20:08:50 +04:00
|
|
|
if (tracked_request_overlaps(req, self->overlap_offset,
|
|
|
|
self->overlap_bytes))
|
|
|
|
{
|
2011-11-30 16:23:42 +04:00
|
|
|
/* Hitting this means there was a reentrant request, for
|
|
|
|
* example, a block driver issuing nested requests. This must
|
|
|
|
* never happen since it means deadlock.
|
|
|
|
*/
|
|
|
|
assert(qemu_coroutine_self() != req->co);
|
|
|
|
|
2013-12-13 16:04:35 +04:00
|
|
|
/* If the request is already (indirectly) waiting for us, or
|
|
|
|
* will wait for us as soon as it wakes up, then just go on
|
|
|
|
* (instead of producing a deadlock in the former case). */
|
|
|
|
if (!req->waiting_for) {
|
|
|
|
self->waiting_for = req;
|
|
|
|
qemu_co_queue_wait(&req->wait_queue);
|
|
|
|
self->waiting_for = NULL;
|
|
|
|
retry = true;
|
2014-01-14 14:41:35 +04:00
|
|
|
waited = true;
|
2013-12-13 16:04:35 +04:00
|
|
|
break;
|
|
|
|
}
|
2011-11-17 17:40:29 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
} while (retry);
|
2014-01-14 14:41:35 +04:00
|
|
|
|
|
|
|
return waited;
|
2011-11-17 17:40:29 +04:00
|
|
|
}
|
|
|
|
|
2010-01-12 14:55:17 +03:00
|
|
|
/*
|
|
|
|
* Return values:
|
|
|
|
* 0 - success
|
|
|
|
* -EINVAL - backing format specified, but no file
|
|
|
|
* -ENOSPC - can't update the backing file because no space is left in the
|
|
|
|
* image file header
|
|
|
|
* -ENOTSUP - format driver doesn't support changing the backing file
|
|
|
|
*/
|
|
|
|
int bdrv_change_backing_file(BlockDriverState *bs,
|
|
|
|
const char *backing_file, const char *backing_fmt)
|
|
|
|
{
|
|
|
|
BlockDriver *drv = bs->drv;
|
2012-04-12 16:01:02 +04:00
|
|
|
int ret;
|
2010-01-12 14:55:17 +03:00
|
|
|
|
2012-04-12 16:01:01 +04:00
|
|
|
/* Backing file format doesn't make sense without a backing file */
|
|
|
|
if (backing_fmt && !backing_file) {
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2010-01-12 14:55:17 +03:00
|
|
|
if (drv->bdrv_change_backing_file != NULL) {
|
2012-04-12 16:01:02 +04:00
|
|
|
ret = drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
|
2010-01-12 14:55:17 +03:00
|
|
|
} else {
|
2012-04-12 16:01:02 +04:00
|
|
|
ret = -ENOTSUP;
|
2010-01-12 14:55:17 +03:00
|
|
|
}
|
2012-04-12 16:01:02 +04:00
|
|
|
|
|
|
|
if (ret == 0) {
|
|
|
|
pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: "");
|
|
|
|
pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: "");
|
|
|
|
}
|
|
|
|
return ret;
|
2010-01-12 14:55:17 +03:00
|
|
|
}
|
|
|
|
|
2012-09-27 21:29:12 +04:00
|
|
|
/*
|
|
|
|
* Finds the image layer in the chain that has 'bs' as its backing file.
|
|
|
|
*
|
|
|
|
* active is the current topmost image.
|
|
|
|
*
|
|
|
|
* Returns NULL if bs is not found in active's image chain,
|
|
|
|
* or if active == bs.
|
2014-06-25 23:35:26 +04:00
|
|
|
*
|
|
|
|
* Returns the bottommost base image if bs == NULL.
|
2012-09-27 21:29:12 +04:00
|
|
|
*/
|
|
|
|
BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
|
|
|
|
BlockDriverState *bs)
|
|
|
|
{
|
2014-06-25 23:35:26 +04:00
|
|
|
while (active && bs != active->backing_hd) {
|
|
|
|
active = active->backing_hd;
|
2012-09-27 21:29:12 +04:00
|
|
|
}
|
|
|
|
|
2014-06-25 23:35:26 +04:00
|
|
|
return active;
|
|
|
|
}
|
2012-09-27 21:29:12 +04:00
|
|
|
|
2014-06-25 23:35:26 +04:00
|
|
|
/* Given a BDS, searches for the base layer. */
|
|
|
|
BlockDriverState *bdrv_find_base(BlockDriverState *bs)
|
|
|
|
{
|
|
|
|
return bdrv_find_overlay(bs, NULL);
|
2012-09-27 21:29:12 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
typedef struct BlkIntermediateStates {
|
|
|
|
BlockDriverState *bs;
|
|
|
|
QSIMPLEQ_ENTRY(BlkIntermediateStates) entry;
|
|
|
|
} BlkIntermediateStates;
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Drops images above 'base' up to and including 'top', and sets the image
|
|
|
|
* above 'top' to have base as its backing file.
|
|
|
|
*
|
|
|
|
* Requires that the overlay to 'top' is opened r/w, so that the backing file
|
|
|
|
* information in 'bs' can be properly updated.
|
|
|
|
*
|
|
|
|
* E.g., this will convert the following chain:
|
|
|
|
* bottom <- base <- intermediate <- top <- active
|
|
|
|
*
|
|
|
|
* to
|
|
|
|
*
|
|
|
|
* bottom <- base <- active
|
|
|
|
*
|
|
|
|
* It is allowed for bottom==base, in which case it converts:
|
|
|
|
*
|
|
|
|
* base <- intermediate <- top <- active
|
|
|
|
*
|
|
|
|
* to
|
|
|
|
*
|
|
|
|
* base <- active
|
|
|
|
*
|
block: extend block-commit to accept a string for the backing file
On some image chains, QEMU may not always be able to resolve the
filenames properly, when updating the backing file of an image
after a block commit.
For instance, certain relative pathnames may fail, or drives may
have been specified originally by file descriptor (e.g. /dev/fd/???),
or a relative protocol pathname may have been used.
In these instances, QEMU may lack the information to be able to make
the correct choice, but the user or management layer most likely does
have that knowledge.
With this extension to the block-commit api, the user is able to change
the backing file of the overlay image as part of the block-commit
operation.
This allows the change to be 'safe', in the sense that if the attempt
to write the overlay image metadata fails, then the block-commit
operation returns failure, without disrupting the guest.
If the commit top is the active layer, then specifying the backing
file string will be treated as an error (there is no overlay image
to modify in that case).
If a backing file string is not specified in the command, the backing
file string to use is determined in the same manner as it was
previously.
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Jeff Cody <jcody@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2014-06-25 23:40:10 +04:00
|
|
|
* If backing_file_str is non-NULL, it will be used when modifying top's
|
|
|
|
* overlay image metadata.
|
|
|
|
*
|
2012-09-27 21:29:12 +04:00
|
|
|
* Error conditions:
|
|
|
|
* if active == top, that is considered an error
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
|
block: extend block-commit to accept a string for the backing file
On some image chains, QEMU may not always be able to resolve the
filenames properly, when updating the backing file of an image
after a block commit.
For instance, certain relative pathnames may fail, or drives may
have been specified originally by file descriptor (e.g. /dev/fd/???),
or a relative protocol pathname may have been used.
In these instances, QEMU may lack the information to be able to make
the correct choice, but the user or management layer most likely does
have that knowledge.
With this extension to the block-commit api, the user is able to change
the backing file of the overlay image as part of the block-commit
operation.
This allows the change to be 'safe', in the sense that if the attempt
to write the overlay image metadata fails, then the block-commit
operation returns failure, without disrupting the guest.
If the commit top is the active layer, then specifying the backing
file string will be treated as an error (there is no overlay image
to modify in that case).
If a backing file string is not specified in the command, the backing
file string to use is determined in the same manner as it was
previously.
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Jeff Cody <jcody@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2014-06-25 23:40:10 +04:00
|
|
|
BlockDriverState *base, const char *backing_file_str)
|
2012-09-27 21:29:12 +04:00
|
|
|
{
|
|
|
|
BlockDriverState *intermediate;
|
|
|
|
BlockDriverState *base_bs = NULL;
|
|
|
|
BlockDriverState *new_top_bs = NULL;
|
|
|
|
BlkIntermediateStates *intermediate_state, *next;
|
|
|
|
int ret = -EIO;
|
|
|
|
|
|
|
|
QSIMPLEQ_HEAD(states_to_delete, BlkIntermediateStates) states_to_delete;
|
|
|
|
QSIMPLEQ_INIT(&states_to_delete);
|
|
|
|
|
|
|
|
if (!top->drv || !base->drv) {
|
|
|
|
goto exit;
|
|
|
|
}
|
|
|
|
|
|
|
|
new_top_bs = bdrv_find_overlay(active, top);
|
|
|
|
|
|
|
|
if (new_top_bs == NULL) {
|
|
|
|
/* we could not find the image above 'top', this is an error */
|
|
|
|
goto exit;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* special case of new_top_bs->backing_hd already pointing to base - nothing
|
|
|
|
* to do, no intermediate images */
|
|
|
|
if (new_top_bs->backing_hd == base) {
|
|
|
|
ret = 0;
|
|
|
|
goto exit;
|
|
|
|
}
|
|
|
|
|
|
|
|
intermediate = top;
|
|
|
|
|
|
|
|
/* now we will go down through the list, and add each BDS we find
|
|
|
|
* into our deletion queue, until we hit the 'base'
|
|
|
|
*/
|
|
|
|
while (intermediate) {
|
block: Use g_new() & friends where that makes obvious sense
g_new(T, n) is neater than g_malloc(sizeof(T) * n). It's also safer,
for two reasons. One, it catches multiplication overflowing size_t.
Two, it returns T * rather than void *, which lets the compiler catch
more type errors.
Patch created with Coccinelle, with two manual changes on top:
* Add const to bdrv_iterate_format() to keep the types straight
* Convert the allocation in bdrv_drop_intermediate(), which Coccinelle
inexplicably misses
Coccinelle semantic patch:
@@
type T;
@@
-g_malloc(sizeof(T))
+g_new(T, 1)
@@
type T;
@@
-g_try_malloc(sizeof(T))
+g_try_new(T, 1)
@@
type T;
@@
-g_malloc0(sizeof(T))
+g_new0(T, 1)
@@
type T;
@@
-g_try_malloc0(sizeof(T))
+g_try_new0(T, 1)
@@
type T;
expression n;
@@
-g_malloc(sizeof(T) * (n))
+g_new(T, n)
@@
type T;
expression n;
@@
-g_try_malloc(sizeof(T) * (n))
+g_try_new(T, n)
@@
type T;
expression n;
@@
-g_malloc0(sizeof(T) * (n))
+g_new0(T, n)
@@
type T;
expression n;
@@
-g_try_malloc0(sizeof(T) * (n))
+g_try_new0(T, n)
@@
type T;
expression p, n;
@@
-g_realloc(p, sizeof(T) * (n))
+g_renew(T, p, n)
@@
type T;
expression p, n;
@@
-g_try_realloc(p, sizeof(T) * (n))
+g_try_renew(T, p, n)
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: Jeff Cody <jcody@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2014-08-19 12:31:08 +04:00
|
|
|
intermediate_state = g_new0(BlkIntermediateStates, 1);
|
2012-09-27 21:29:12 +04:00
|
|
|
intermediate_state->bs = intermediate;
|
|
|
|
QSIMPLEQ_INSERT_TAIL(&states_to_delete, intermediate_state, entry);
|
|
|
|
|
|
|
|
if (intermediate->backing_hd == base) {
|
|
|
|
base_bs = intermediate->backing_hd;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
intermediate = intermediate->backing_hd;
|
|
|
|
}
|
|
|
|
if (base_bs == NULL) {
|
|
|
|
/* something went wrong, we did not end at the base. safely
|
|
|
|
* unravel everything, and exit with error */
|
|
|
|
goto exit;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* success - we can delete the intermediate states, and link top->base */
|
block: extend block-commit to accept a string for the backing file
On some image chains, QEMU may not always be able to resolve the
filenames properly, when updating the backing file of an image
after a block commit.
For instance, certain relative pathnames may fail, or drives may
have been specified originally by file descriptor (e.g. /dev/fd/???),
or a relative protocol pathname may have been used.
In these instances, QEMU may lack the information to be able to make
the correct choice, but the user or management layer most likely does
have that knowledge.
With this extension to the block-commit api, the user is able to change
the backing file of the overlay image as part of the block-commit
operation.
This allows the change to be 'safe', in the sense that if the attempt
to write the overlay image metadata fails, then the block-commit
operation returns failure, without disrupting the guest.
If the commit top is the active layer, then specifying the backing
file string will be treated as an error (there is no overlay image
to modify in that case).
If a backing file string is not specified in the command, the backing
file string to use is determined in the same manner as it was
previously.
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Jeff Cody <jcody@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2014-06-25 23:40:10 +04:00
|
|
|
backing_file_str = backing_file_str ? backing_file_str : base_bs->filename;
|
|
|
|
ret = bdrv_change_backing_file(new_top_bs, backing_file_str,
|
2012-09-27 21:29:12 +04:00
|
|
|
base_bs->drv ? base_bs->drv->format_name : "");
|
|
|
|
if (ret) {
|
|
|
|
goto exit;
|
|
|
|
}
|
2014-05-23 17:29:46 +04:00
|
|
|
bdrv_set_backing_hd(new_top_bs, base_bs);
|
2012-09-27 21:29:12 +04:00
|
|
|
|
|
|
|
QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
|
|
|
|
/* so that bdrv_close() does not recursively close the chain */
|
2014-05-23 17:29:46 +04:00
|
|
|
bdrv_set_backing_hd(intermediate_state->bs, NULL);
|
2013-08-23 05:14:47 +04:00
|
|
|
bdrv_unref(intermediate_state->bs);
|
2012-09-27 21:29:12 +04:00
|
|
|
}
|
|
|
|
ret = 0;
|
|
|
|
|
|
|
|
exit:
|
|
|
|
QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
|
|
|
|
g_free(intermediate_state);
|
|
|
|
}
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2009-03-03 20:37:16 +03:00
|
|
|
static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
|
|
|
|
size_t size)
|
|
|
|
{
|
2015-02-06 13:54:11 +03:00
|
|
|
if (size > BDRV_REQUEST_MAX_SECTORS << BDRV_SECTOR_BITS) {
|
2014-04-14 16:48:16 +04:00
|
|
|
return -EIO;
|
|
|
|
}
|
|
|
|
|
2015-02-05 21:58:24 +03:00
|
|
|
if (!bdrv_is_inserted(bs)) {
|
2009-03-03 20:37:16 +03:00
|
|
|
return -ENOMEDIUM;
|
2015-02-05 21:58:24 +03:00
|
|
|
}
|
2009-03-03 20:37:16 +03:00
|
|
|
|
2015-02-05 21:58:24 +03:00
|
|
|
if (offset < 0) {
|
2009-03-03 20:37:16 +03:00
|
|
|
return -EIO;
|
2015-02-05 21:58:24 +03:00
|
|
|
}
|
2009-03-03 20:37:16 +03:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
|
|
|
|
int nb_sectors)
|
|
|
|
{
|
2015-02-06 13:54:11 +03:00
|
|
|
if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
|
2014-03-26 16:06:02 +04:00
|
|
|
return -EIO;
|
|
|
|
}
|
|
|
|
|
2010-05-27 18:20:31 +04:00
|
|
|
return bdrv_check_byte_request(bs, sector_num * BDRV_SECTOR_SIZE,
|
|
|
|
nb_sectors * BDRV_SECTOR_SIZE);
|
2009-03-03 20:37:16 +03:00
|
|
|
}
|
|
|
|
|
2011-10-13 16:08:22 +04:00
|
|
|
typedef struct RwCo {
|
|
|
|
BlockDriverState *bs;
|
2013-12-05 15:09:38 +04:00
|
|
|
int64_t offset;
|
2011-10-13 16:08:22 +04:00
|
|
|
QEMUIOVector *qiov;
|
|
|
|
bool is_write;
|
|
|
|
int ret;
|
2013-07-11 16:16:22 +04:00
|
|
|
BdrvRequestFlags flags;
|
2011-10-13 16:08:22 +04:00
|
|
|
} RwCo;
|
|
|
|
|
|
|
|
static void coroutine_fn bdrv_rw_co_entry(void *opaque)
|
2003-06-30 14:03:06 +04:00
|
|
|
{
|
2011-10-13 16:08:22 +04:00
|
|
|
RwCo *rwco = opaque;
|
2004-08-02 01:59:26 +04:00
|
|
|
|
2011-10-13 16:08:22 +04:00
|
|
|
if (!rwco->is_write) {
|
2013-12-05 15:09:38 +04:00
|
|
|
rwco->ret = bdrv_co_do_preadv(rwco->bs, rwco->offset,
|
|
|
|
rwco->qiov->size, rwco->qiov,
|
2013-07-11 16:16:22 +04:00
|
|
|
rwco->flags);
|
2013-12-05 15:09:38 +04:00
|
|
|
} else {
|
|
|
|
rwco->ret = bdrv_co_do_pwritev(rwco->bs, rwco->offset,
|
|
|
|
rwco->qiov->size, rwco->qiov,
|
|
|
|
rwco->flags);
|
2011-10-13 16:08:22 +04:00
|
|
|
}
|
|
|
|
}
|
2011-07-15 18:05:00 +04:00
|
|
|
|
2011-10-13 16:08:22 +04:00
|
|
|
/*
|
2013-04-05 23:27:55 +04:00
|
|
|
* Process a vectored synchronous request using coroutines
|
2011-10-13 16:08:22 +04:00
|
|
|
*/
|
2013-12-05 15:09:38 +04:00
|
|
|
static int bdrv_prwv_co(BlockDriverState *bs, int64_t offset,
|
|
|
|
QEMUIOVector *qiov, bool is_write,
|
|
|
|
BdrvRequestFlags flags)
|
2011-10-13 16:08:22 +04:00
|
|
|
{
|
|
|
|
Coroutine *co;
|
|
|
|
RwCo rwco = {
|
|
|
|
.bs = bs,
|
2013-12-05 15:09:38 +04:00
|
|
|
.offset = offset,
|
2013-04-05 23:27:55 +04:00
|
|
|
.qiov = qiov,
|
2011-10-13 16:08:22 +04:00
|
|
|
.is_write = is_write,
|
|
|
|
.ret = NOT_DONE,
|
2013-07-11 16:16:22 +04:00
|
|
|
.flags = flags,
|
2011-10-13 16:08:22 +04:00
|
|
|
};
|
2011-07-15 18:05:00 +04:00
|
|
|
|
2012-04-02 14:59:34 +04:00
|
|
|
/**
|
|
|
|
* In sync call context, when the vcpu is blocked, this throttling timer
|
|
|
|
* will not fire; so the I/O throttling function has to be disabled here
|
|
|
|
* if it has been enabled.
|
|
|
|
*/
|
|
|
|
if (bs->io_limits_enabled) {
|
|
|
|
fprintf(stderr, "Disabling I/O throttling on '%s' due "
|
|
|
|
"to synchronous I/O.\n", bdrv_get_device_name(bs));
|
|
|
|
bdrv_io_limits_disable(bs);
|
|
|
|
}
|
|
|
|
|
2011-10-13 16:08:22 +04:00
|
|
|
if (qemu_in_coroutine()) {
|
|
|
|
/* Fast-path if already in coroutine context */
|
|
|
|
bdrv_rw_co_entry(&rwco);
|
|
|
|
} else {
|
2014-05-08 18:34:34 +04:00
|
|
|
AioContext *aio_context = bdrv_get_aio_context(bs);
|
|
|
|
|
2011-10-13 16:08:22 +04:00
|
|
|
co = qemu_coroutine_create(bdrv_rw_co_entry);
|
|
|
|
qemu_coroutine_enter(co, &rwco);
|
|
|
|
while (rwco.ret == NOT_DONE) {
|
2014-05-08 18:34:34 +04:00
|
|
|
aio_poll(aio_context, true);
|
2011-10-13 16:08:22 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
return rwco.ret;
|
|
|
|
}
|
2004-03-15 00:38:54 +03:00
|
|
|
|
2013-04-05 23:27:55 +04:00
|
|
|
/*
|
|
|
|
* Process a synchronous request using coroutines
|
|
|
|
*/
|
|
|
|
static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf,
|
2013-07-11 16:16:22 +04:00
|
|
|
int nb_sectors, bool is_write, BdrvRequestFlags flags)
|
2013-04-05 23:27:55 +04:00
|
|
|
{
|
|
|
|
QEMUIOVector qiov;
|
|
|
|
struct iovec iov = {
|
|
|
|
.iov_base = (void *)buf,
|
|
|
|
.iov_len = nb_sectors * BDRV_SECTOR_SIZE,
|
|
|
|
};
|
|
|
|
|
2015-02-06 13:54:11 +03:00
|
|
|
if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
|
2014-04-14 17:39:36 +04:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2013-04-05 23:27:55 +04:00
|
|
|
qemu_iovec_init_external(&qiov, &iov, 1);
|
2013-12-05 15:09:38 +04:00
|
|
|
return bdrv_prwv_co(bs, sector_num << BDRV_SECTOR_BITS,
|
|
|
|
&qiov, is_write, flags);
|
2013-04-05 23:27:55 +04:00
|
|
|
}
|
|
|
|
|
2011-10-13 16:08:22 +04:00
|
|
|
/* return < 0 if error. See bdrv_write() for the return codes */
|
|
|
|
int bdrv_read(BlockDriverState *bs, int64_t sector_num,
|
|
|
|
uint8_t *buf, int nb_sectors)
|
|
|
|
{
|
2013-07-11 16:16:22 +04:00
|
|
|
return bdrv_rw_co(bs, sector_num, buf, nb_sectors, false, 0);
|
2003-06-30 14:03:06 +04:00
|
|
|
}
|
|
|
|
|
2012-06-29 19:34:29 +04:00
|
|
|
/* Just like bdrv_read(), but with I/O throttling temporarily disabled */
|
|
|
|
int bdrv_read_unthrottled(BlockDriverState *bs, int64_t sector_num,
|
|
|
|
uint8_t *buf, int nb_sectors)
|
|
|
|
{
|
|
|
|
bool enabled;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
enabled = bs->io_limits_enabled;
|
|
|
|
bs->io_limits_enabled = false;
|
2013-07-18 12:37:32 +04:00
|
|
|
ret = bdrv_read(bs, sector_num, buf, nb_sectors);
|
2012-06-29 19:34:29 +04:00
|
|
|
bs->io_limits_enabled = enabled;
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2007-09-17 01:08:06 +04:00
|
|
|
/* Return < 0 if error. Important errors are:
|
2006-08-19 15:45:59 +04:00
|
|
|
-EIO generic I/O error (may happen for all errors)
|
|
|
|
-ENOMEDIUM No media inserted.
|
|
|
|
-EINVAL Invalid sector number or nb_sectors
|
|
|
|
-EACCES Trying to write a read-only device
|
|
|
|
*/
|
2007-09-17 01:08:06 +04:00
|
|
|
int bdrv_write(BlockDriverState *bs, int64_t sector_num,
|
2003-06-30 14:03:06 +04:00
|
|
|
const uint8_t *buf, int nb_sectors)
|
|
|
|
{
|
2013-07-11 16:16:22 +04:00
|
|
|
return bdrv_rw_co(bs, sector_num, (uint8_t *)buf, nb_sectors, true, 0);
|
2006-08-01 20:21:11 +04:00
|
|
|
}
|
|
|
|
|
2013-10-24 14:06:51 +04:00
|
|
|
int bdrv_write_zeroes(BlockDriverState *bs, int64_t sector_num,
|
|
|
|
int nb_sectors, BdrvRequestFlags flags)
|
2013-07-11 16:16:22 +04:00
|
|
|
{
|
|
|
|
return bdrv_rw_co(bs, sector_num, NULL, nb_sectors, true,
|
2013-10-24 14:06:51 +04:00
|
|
|
BDRV_REQ_ZERO_WRITE | flags);
|
2013-04-05 23:27:55 +04:00
|
|
|
}
|
|
|
|
|
2013-10-24 14:07:03 +04:00
|
|
|
/*
|
|
|
|
* Completely zero out a block device with the help of bdrv_write_zeroes.
|
|
|
|
* The operation is sped up by checking the block status and only writing
|
|
|
|
* zeroes to the device if they currently do not return zeroes. Optional
|
|
|
|
* flags are passed through to bdrv_write_zeroes (e.g. BDRV_REQ_MAY_UNMAP).
|
|
|
|
*
|
|
|
|
* Returns < 0 on error, 0 on success. For error codes see bdrv_write().
|
|
|
|
*/
|
|
|
|
int bdrv_make_zero(BlockDriverState *bs, BdrvRequestFlags flags)
|
|
|
|
{
|
2014-06-26 15:23:18 +04:00
|
|
|
int64_t target_sectors, ret, nb_sectors, sector_num = 0;
|
2013-10-24 14:07:03 +04:00
|
|
|
int n;
|
|
|
|
|
2014-06-26 15:23:18 +04:00
|
|
|
target_sectors = bdrv_nb_sectors(bs);
|
|
|
|
if (target_sectors < 0) {
|
|
|
|
return target_sectors;
|
2014-04-14 19:03:34 +04:00
|
|
|
}
|
|
|
|
|
2013-10-24 14:07:03 +04:00
|
|
|
for (;;) {
|
2015-02-06 13:54:11 +03:00
|
|
|
nb_sectors = MIN(target_sectors - sector_num, BDRV_REQUEST_MAX_SECTORS);
|
2013-10-24 14:07:03 +04:00
|
|
|
if (nb_sectors <= 0) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
ret = bdrv_get_block_status(bs, sector_num, nb_sectors, &n);
|
2013-12-12 16:57:05 +04:00
|
|
|
if (ret < 0) {
|
|
|
|
error_report("error getting block status at sector %" PRId64 ": %s",
|
|
|
|
sector_num, strerror(-ret));
|
|
|
|
return ret;
|
|
|
|
}
|
2013-10-24 14:07:03 +04:00
|
|
|
if (ret & BDRV_BLOCK_ZERO) {
|
|
|
|
sector_num += n;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
ret = bdrv_write_zeroes(bs, sector_num, n, flags);
|
|
|
|
if (ret < 0) {
|
|
|
|
error_report("error writing zeroes at sector %" PRId64 ": %s",
|
|
|
|
sector_num, strerror(-ret));
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
sector_num += n;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-12-05 15:29:59 +04:00
|
|
|
int bdrv_pread(BlockDriverState *bs, int64_t offset, void *buf, int bytes)
|
2006-08-01 20:21:11 +04:00
|
|
|
{
|
2013-12-05 15:29:59 +04:00
|
|
|
QEMUIOVector qiov;
|
|
|
|
struct iovec iov = {
|
|
|
|
.iov_base = (void *)buf,
|
|
|
|
.iov_len = bytes,
|
|
|
|
};
|
2010-01-20 17:03:02 +03:00
|
|
|
int ret;
|
2006-08-01 20:21:11 +04:00
|
|
|
|
2013-12-05 15:29:59 +04:00
|
|
|
if (bytes < 0) {
|
|
|
|
return -EINVAL;
|
2006-08-01 20:21:11 +04:00
|
|
|
}
|
|
|
|
|
2013-12-05 15:29:59 +04:00
|
|
|
qemu_iovec_init_external(&qiov, &iov, 1);
|
|
|
|
ret = bdrv_prwv_co(bs, offset, &qiov, false, 0);
|
|
|
|
if (ret < 0) {
|
|
|
|
return ret;
|
2006-08-01 20:21:11 +04:00
|
|
|
}
|
2013-12-05 15:29:59 +04:00
|
|
|
|
|
|
|
return bytes;
|
2006-08-01 20:21:11 +04:00
|
|
|
}
|
|
|
|
|
2013-04-05 23:27:55 +04:00
|
|
|
int bdrv_pwritev(BlockDriverState *bs, int64_t offset, QEMUIOVector *qiov)
|
2006-08-01 20:21:11 +04:00
|
|
|
{
|
2010-01-20 17:03:02 +03:00
|
|
|
int ret;
|
2006-08-01 20:21:11 +04:00
|
|
|
|
2013-12-05 15:34:02 +04:00
|
|
|
ret = bdrv_prwv_co(bs, offset, qiov, true, 0);
|
|
|
|
if (ret < 0) {
|
|
|
|
return ret;
|
2006-08-01 20:21:11 +04:00
|
|
|
}
|
|
|
|
|
2013-04-05 23:27:55 +04:00
|
|
|
return qiov->size;
|
|
|
|
}
|
|
|
|
|
|
|
|
int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
|
2013-12-05 15:34:02 +04:00
|
|
|
const void *buf, int bytes)
|
2013-04-05 23:27:55 +04:00
|
|
|
{
|
|
|
|
QEMUIOVector qiov;
|
|
|
|
struct iovec iov = {
|
|
|
|
.iov_base = (void *) buf,
|
2013-12-05 15:34:02 +04:00
|
|
|
.iov_len = bytes,
|
2013-04-05 23:27:55 +04:00
|
|
|
};
|
|
|
|
|
2013-12-05 15:34:02 +04:00
|
|
|
if (bytes < 0) {
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2013-04-05 23:27:55 +04:00
|
|
|
qemu_iovec_init_external(&qiov, &iov, 1);
|
|
|
|
return bdrv_pwritev(bs, offset, &qiov);
|
2006-08-01 20:21:11 +04:00
|
|
|
}
|
|
|
|
|
2010-06-16 18:38:15 +04:00
|
|
|
/*
|
|
|
|
* Writes to the file and ensures that no writes are reordered across this
|
|
|
|
* request (acts as a barrier)
|
|
|
|
*
|
|
|
|
* Returns 0 on success, -errno in error cases.
|
|
|
|
*/
|
|
|
|
int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
|
|
|
|
const void *buf, int count)
|
|
|
|
{
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
ret = bdrv_pwrite(bs, offset, buf, count);
|
|
|
|
if (ret < 0) {
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2012-06-06 02:04:49 +04:00
|
|
|
/* No flush needed for cache modes that already do it */
|
|
|
|
if (bs->enable_write_cache) {
|
2010-06-16 18:38:15 +04:00
|
|
|
bdrv_flush(bs);
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2012-01-18 18:40:42 +04:00
|
|
|
static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
|
2011-11-17 17:40:31 +04:00
|
|
|
int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
|
|
|
|
{
|
|
|
|
/* Perform I/O through a temporary buffer so that users who scribble over
|
|
|
|
* their read buffer while the operation is in progress do not end up
|
|
|
|
* modifying the image file. This is critical for zero-copy guest I/O
|
|
|
|
* where anything might happen inside guest memory.
|
|
|
|
*/
|
|
|
|
void *bounce_buffer;
|
|
|
|
|
2012-02-07 17:27:26 +04:00
|
|
|
BlockDriver *drv = bs->drv;
|
2011-11-17 17:40:31 +04:00
|
|
|
struct iovec iov;
|
|
|
|
QEMUIOVector bounce_qiov;
|
|
|
|
int64_t cluster_sector_num;
|
|
|
|
int cluster_nb_sectors;
|
|
|
|
size_t skip_bytes;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
/* Cover entire cluster so no additional backing file I/O is required when
|
|
|
|
* allocating cluster in the image file.
|
|
|
|
*/
|
2013-01-21 20:09:42 +04:00
|
|
|
bdrv_round_to_clusters(bs, sector_num, nb_sectors,
|
|
|
|
&cluster_sector_num, &cluster_nb_sectors);
|
2011-11-17 17:40:31 +04:00
|
|
|
|
2012-01-18 18:40:42 +04:00
|
|
|
trace_bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors,
|
|
|
|
cluster_sector_num, cluster_nb_sectors);
|
2011-11-17 17:40:31 +04:00
|
|
|
|
|
|
|
iov.iov_len = cluster_nb_sectors * BDRV_SECTOR_SIZE;
|
2014-05-20 15:16:51 +04:00
|
|
|
iov.iov_base = bounce_buffer = qemu_try_blockalign(bs, iov.iov_len);
|
|
|
|
if (bounce_buffer == NULL) {
|
|
|
|
ret = -ENOMEM;
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
2011-11-17 17:40:31 +04:00
|
|
|
qemu_iovec_init_external(&bounce_qiov, &iov, 1);
|
|
|
|
|
2012-02-07 17:27:26 +04:00
|
|
|
ret = drv->bdrv_co_readv(bs, cluster_sector_num, cluster_nb_sectors,
|
|
|
|
&bounce_qiov);
|
2011-11-17 17:40:31 +04:00
|
|
|
if (ret < 0) {
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
2012-02-07 17:27:26 +04:00
|
|
|
if (drv->bdrv_co_write_zeroes &&
|
|
|
|
buffer_is_zero(bounce_buffer, iov.iov_len)) {
|
2012-03-20 18:12:58 +04:00
|
|
|
ret = bdrv_co_do_write_zeroes(bs, cluster_sector_num,
|
2013-10-24 14:06:51 +04:00
|
|
|
cluster_nb_sectors, 0);
|
2012-02-07 17:27:26 +04:00
|
|
|
} else {
|
2012-06-06 02:04:49 +04:00
|
|
|
/* This does not change the data on the disk, it is not necessary
|
|
|
|
* to flush even in cache=writethrough mode.
|
|
|
|
*/
|
2012-02-07 17:27:26 +04:00
|
|
|
ret = drv->bdrv_co_writev(bs, cluster_sector_num, cluster_nb_sectors,
|
2011-11-17 17:40:31 +04:00
|
|
|
&bounce_qiov);
|
2012-02-07 17:27:26 +04:00
|
|
|
}
|
|
|
|
|
2011-11-17 17:40:31 +04:00
|
|
|
if (ret < 0) {
|
|
|
|
/* It might be okay to ignore write errors for guest requests. If this
|
|
|
|
* is a deliberate copy-on-read then we don't want to ignore the error.
|
|
|
|
* Simply report it in all cases.
|
|
|
|
*/
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
|
|
|
skip_bytes = (sector_num - cluster_sector_num) * BDRV_SECTOR_SIZE;
|
allow qemu_iovec_from_buffer() to specify offset from which to start copying
Similar to
qemu_iovec_memset(QEMUIOVector *qiov, size_t offset,
int c, size_t bytes);
the new prototype is:
qemu_iovec_from_buf(QEMUIOVector *qiov, size_t offset,
const void *buf, size_t bytes);
The processing starts at offset bytes within qiov.
This way, we may copy a bounce buffer directly to
a middle of qiov.
This is exactly the same function as iov_from_buf() from
iov.c, so use the existing implementation and rename it
to qemu_iovec_from_buf() to be shorter and to match the
utility function.
As with utility implementation, we now assert that the
offset is inside actual iovec. Nothing changed for
current callers, because `offset' parameter is new.
While at it, stop using "bounce-qiov" in block/qcow2.c
and copy decrypted data directly from cluster_data
instead of recreating a temp qiov for doing that.
Signed-off-by: Michael Tokarev <mjt@tls.msk.ru>
2012-06-07 20:17:55 +04:00
|
|
|
qemu_iovec_from_buf(qiov, 0, bounce_buffer + skip_bytes,
|
|
|
|
nb_sectors * BDRV_SECTOR_SIZE);
|
2011-11-17 17:40:31 +04:00
|
|
|
|
|
|
|
err:
|
|
|
|
qemu_vfree(bounce_buffer);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2011-10-05 20:17:03 +04:00
|
|
|
/*
|
2013-12-02 18:07:48 +04:00
|
|
|
* Forwards an already correctly aligned request to the BlockDriver. This
|
|
|
|
* handles copy on read and zeroing after EOF; any other features must be
|
|
|
|
* implemented by the caller.
|
2011-10-05 20:17:03 +04:00
|
|
|
*/
|
2013-12-02 18:07:48 +04:00
|
|
|
static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs,
|
2013-12-03 17:55:55 +04:00
|
|
|
BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
|
2013-12-04 15:13:10 +04:00
|
|
|
int64_t align, QEMUIOVector *qiov, int flags)
|
2011-07-14 19:27:13 +04:00
|
|
|
{
|
|
|
|
BlockDriver *drv = bs->drv;
|
2011-11-17 17:40:27 +04:00
|
|
|
int ret;
|
2011-07-14 19:27:13 +04:00
|
|
|
|
2013-12-02 18:07:48 +04:00
|
|
|
int64_t sector_num = offset >> BDRV_SECTOR_BITS;
|
|
|
|
unsigned int nb_sectors = bytes >> BDRV_SECTOR_BITS;
|
2011-07-14 19:27:13 +04:00
|
|
|
|
2013-12-02 18:07:48 +04:00
|
|
|
assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
|
|
|
|
assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
|
2014-07-01 18:09:54 +04:00
|
|
|
assert(!qiov || bytes == qiov->size);
|
2013-12-02 18:07:48 +04:00
|
|
|
|
|
|
|
/* Handle Copy on Read and associated serialisation */
|
2012-01-18 18:40:42 +04:00
|
|
|
if (flags & BDRV_REQ_COPY_ON_READ) {
|
2013-12-04 20:08:50 +04:00
|
|
|
/* If we touch the same cluster it counts as an overlap. This
|
|
|
|
* guarantees that allocating writes will be serialized and not race
|
|
|
|
* with each other for the same cluster. For example, in copy-on-read
|
|
|
|
* it ensures that the CoR read and write operations are atomic and
|
|
|
|
* guest writes cannot interleave between them. */
|
|
|
|
mark_request_serialising(req, bdrv_get_cluster_size(bs));
|
2012-01-18 18:40:42 +04:00
|
|
|
}
|
|
|
|
|
2013-12-04 19:43:44 +04:00
|
|
|
wait_serialising_requests(req);
|
2011-11-17 17:40:29 +04:00
|
|
|
|
2012-01-18 18:40:42 +04:00
|
|
|
if (flags & BDRV_REQ_COPY_ON_READ) {
|
2011-11-17 17:40:31 +04:00
|
|
|
int pnum;
|
|
|
|
|
2013-09-04 21:00:22 +04:00
|
|
|
ret = bdrv_is_allocated(bs, sector_num, nb_sectors, &pnum);
|
2011-11-17 17:40:31 +04:00
|
|
|
if (ret < 0) {
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!ret || pnum != nb_sectors) {
|
2012-01-18 18:40:42 +04:00
|
|
|
ret = bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors, qiov);
|
2011-11-17 17:40:31 +04:00
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-12-02 18:07:48 +04:00
|
|
|
/* Forward the request to the BlockDriver */
|
2015-02-05 21:58:24 +03:00
|
|
|
if (!bs->zero_beyond_eof) {
|
2013-08-06 05:53:40 +04:00
|
|
|
ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
|
|
|
|
} else {
|
2015-02-05 21:58:24 +03:00
|
|
|
/* Read zeros after EOF */
|
2014-06-26 15:23:19 +04:00
|
|
|
int64_t total_sectors, max_nb_sectors;
|
2013-08-06 05:53:40 +04:00
|
|
|
|
2014-06-26 15:23:19 +04:00
|
|
|
total_sectors = bdrv_nb_sectors(bs);
|
|
|
|
if (total_sectors < 0) {
|
|
|
|
ret = total_sectors;
|
2013-08-06 05:53:40 +04:00
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
2014-02-07 19:00:09 +04:00
|
|
|
max_nb_sectors = ROUND_UP(MAX(0, total_sectors - sector_num),
|
|
|
|
align >> BDRV_SECTOR_BITS);
|
2014-12-17 18:09:59 +03:00
|
|
|
if (nb_sectors < max_nb_sectors) {
|
|
|
|
ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
|
|
|
|
} else if (max_nb_sectors > 0) {
|
2014-07-03 15:21:24 +04:00
|
|
|
QEMUIOVector local_qiov;
|
|
|
|
|
|
|
|
qemu_iovec_init(&local_qiov, qiov->niov);
|
|
|
|
qemu_iovec_concat(&local_qiov, qiov, 0,
|
2014-12-17 18:09:59 +03:00
|
|
|
max_nb_sectors * BDRV_SECTOR_SIZE);
|
2014-07-03 15:21:24 +04:00
|
|
|
|
2014-12-17 18:09:59 +03:00
|
|
|
ret = drv->bdrv_co_readv(bs, sector_num, max_nb_sectors,
|
2014-07-03 15:21:24 +04:00
|
|
|
&local_qiov);
|
|
|
|
|
|
|
|
qemu_iovec_destroy(&local_qiov);
|
2013-08-06 05:53:40 +04:00
|
|
|
} else {
|
|
|
|
ret = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Reading beyond end of file is supposed to produce zeroes */
|
|
|
|
if (ret == 0 && total_sectors < sector_num + nb_sectors) {
|
|
|
|
uint64_t offset = MAX(0, total_sectors - sector_num);
|
|
|
|
uint64_t bytes = (sector_num + nb_sectors - offset) *
|
|
|
|
BDRV_SECTOR_SIZE;
|
|
|
|
qemu_iovec_memset(qiov, offset * BDRV_SECTOR_SIZE, 0, bytes);
|
|
|
|
}
|
|
|
|
}
|
2011-11-17 17:40:31 +04:00
|
|
|
|
|
|
|
out:
|
2011-11-17 17:40:27 +04:00
|
|
|
return ret;
|
2011-07-14 19:27:13 +04:00
|
|
|
}
|
|
|
|
|
2015-03-24 04:23:49 +03:00
|
|
|
static inline uint64_t bdrv_get_align(BlockDriverState *bs)
|
|
|
|
{
|
|
|
|
/* TODO Lift BDRV_SECTOR_SIZE restriction in BlockDriver interface */
|
|
|
|
return MAX(BDRV_SECTOR_SIZE, bs->request_alignment);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline bool bdrv_req_is_aligned(BlockDriverState *bs,
|
|
|
|
int64_t offset, size_t bytes)
|
|
|
|
{
|
|
|
|
int64_t align = bdrv_get_align(bs);
|
|
|
|
return !(offset & (align - 1) || (bytes & (align - 1)));
|
|
|
|
}
|
|
|
|
|
2013-12-02 18:07:48 +04:00
|
|
|
/*
|
|
|
|
* Handle a read request in coroutine context
|
|
|
|
*/
|
2013-12-02 19:09:46 +04:00
|
|
|
static int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs,
|
|
|
|
int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
|
2013-12-02 18:07:48 +04:00
|
|
|
BdrvRequestFlags flags)
|
|
|
|
{
|
|
|
|
BlockDriver *drv = bs->drv;
|
2013-12-03 17:55:55 +04:00
|
|
|
BdrvTrackedRequest req;
|
|
|
|
|
2015-03-24 04:23:49 +03:00
|
|
|
uint64_t align = bdrv_get_align(bs);
|
2013-12-02 19:09:46 +04:00
|
|
|
uint8_t *head_buf = NULL;
|
|
|
|
uint8_t *tail_buf = NULL;
|
|
|
|
QEMUIOVector local_qiov;
|
|
|
|
bool use_local_qiov = false;
|
2013-12-02 18:07:48 +04:00
|
|
|
int ret;
|
|
|
|
|
|
|
|
if (!drv) {
|
|
|
|
return -ENOMEDIUM;
|
|
|
|
}
|
2015-02-05 21:58:25 +03:00
|
|
|
|
|
|
|
ret = bdrv_check_byte_request(bs, offset, bytes);
|
|
|
|
if (ret < 0) {
|
|
|
|
return ret;
|
2013-12-02 18:07:48 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
if (bs->copy_on_read) {
|
|
|
|
flags |= BDRV_REQ_COPY_ON_READ;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* throttling disk I/O */
|
|
|
|
if (bs->io_limits_enabled) {
|
2014-01-16 16:29:10 +04:00
|
|
|
bdrv_io_limits_intercept(bs, bytes, false);
|
2013-12-02 19:09:46 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Align read if necessary by padding qiov */
|
|
|
|
if (offset & (align - 1)) {
|
|
|
|
head_buf = qemu_blockalign(bs, align);
|
|
|
|
qemu_iovec_init(&local_qiov, qiov->niov + 2);
|
|
|
|
qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1));
|
|
|
|
qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
|
|
|
|
use_local_qiov = true;
|
|
|
|
|
|
|
|
bytes += offset & (align - 1);
|
|
|
|
offset = offset & ~(align - 1);
|
|
|
|
}
|
|
|
|
|
|
|
|
if ((offset + bytes) & (align - 1)) {
|
|
|
|
if (!use_local_qiov) {
|
|
|
|
qemu_iovec_init(&local_qiov, qiov->niov + 1);
|
|
|
|
qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
|
|
|
|
use_local_qiov = true;
|
|
|
|
}
|
|
|
|
tail_buf = qemu_blockalign(bs, align);
|
|
|
|
qemu_iovec_add(&local_qiov, tail_buf,
|
|
|
|
align - ((offset + bytes) & (align - 1)));
|
|
|
|
|
|
|
|
bytes = ROUND_UP(bytes, align);
|
|
|
|
}
|
|
|
|
|
2013-12-03 17:55:55 +04:00
|
|
|
tracked_request_begin(&req, bs, offset, bytes, false);
|
2013-12-04 15:13:10 +04:00
|
|
|
ret = bdrv_aligned_preadv(bs, &req, offset, bytes, align,
|
2013-12-02 19:09:46 +04:00
|
|
|
use_local_qiov ? &local_qiov : qiov,
|
|
|
|
flags);
|
2013-12-03 17:55:55 +04:00
|
|
|
tracked_request_end(&req);
|
2013-12-02 19:09:46 +04:00
|
|
|
|
|
|
|
if (use_local_qiov) {
|
|
|
|
qemu_iovec_destroy(&local_qiov);
|
|
|
|
qemu_vfree(head_buf);
|
|
|
|
qemu_vfree(tail_buf);
|
2013-12-02 18:07:48 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2013-12-02 19:09:46 +04:00
|
|
|
static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
|
|
|
|
int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
|
|
|
|
BdrvRequestFlags flags)
|
|
|
|
{
|
2015-02-06 13:54:11 +03:00
|
|
|
if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
|
2013-12-02 19:09:46 +04:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
return bdrv_co_do_preadv(bs, sector_num << BDRV_SECTOR_BITS,
|
|
|
|
nb_sectors << BDRV_SECTOR_BITS, qiov, flags);
|
|
|
|
}
|
|
|
|
|
2011-10-05 20:17:03 +04:00
|
|
|
int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
|
2011-07-14 19:27:13 +04:00
|
|
|
int nb_sectors, QEMUIOVector *qiov)
|
|
|
|
{
|
2011-10-05 20:17:03 +04:00
|
|
|
trace_bdrv_co_readv(bs, sector_num, nb_sectors);
|
2011-07-14 19:27:13 +04:00
|
|
|
|
2012-01-18 18:40:42 +04:00
|
|
|
return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov, 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs,
|
|
|
|
int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
|
|
|
|
{
|
|
|
|
trace_bdrv_co_copy_on_readv(bs, sector_num, nb_sectors);
|
|
|
|
|
|
|
|
return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov,
|
|
|
|
BDRV_REQ_COPY_ON_READ);
|
2011-10-05 20:17:03 +04:00
|
|
|
}
|
|
|
|
|
2015-02-02 17:48:34 +03:00
|
|
|
#define MAX_WRITE_ZEROES_BOUNCE_BUFFER 32768
|
2013-10-24 14:06:58 +04:00
|
|
|
|
2012-02-07 17:27:25 +04:00
|
|
|
static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
|
2013-10-24 14:06:51 +04:00
|
|
|
int64_t sector_num, int nb_sectors, BdrvRequestFlags flags)
|
2012-02-07 17:27:25 +04:00
|
|
|
{
|
|
|
|
BlockDriver *drv = bs->drv;
|
|
|
|
QEMUIOVector qiov;
|
2013-10-24 14:06:58 +04:00
|
|
|
struct iovec iov = {0};
|
|
|
|
int ret = 0;
|
2012-02-07 17:27:25 +04:00
|
|
|
|
2015-02-06 13:54:11 +03:00
|
|
|
int max_write_zeroes = MIN_NON_ZERO(bs->bl.max_write_zeroes,
|
|
|
|
BDRV_REQUEST_MAX_SECTORS);
|
2012-03-20 18:12:58 +04:00
|
|
|
|
2013-10-24 14:06:58 +04:00
|
|
|
while (nb_sectors > 0 && !ret) {
|
|
|
|
int num = nb_sectors;
|
|
|
|
|
2013-11-22 16:39:48 +04:00
|
|
|
/* Align request. Block drivers can expect the "bulk" of the request
|
|
|
|
* to be aligned.
|
|
|
|
*/
|
|
|
|
if (bs->bl.write_zeroes_alignment
|
|
|
|
&& num > bs->bl.write_zeroes_alignment) {
|
|
|
|
if (sector_num % bs->bl.write_zeroes_alignment != 0) {
|
|
|
|
/* Make a small request up to the first aligned sector. */
|
2013-10-24 14:06:58 +04:00
|
|
|
num = bs->bl.write_zeroes_alignment;
|
2013-11-22 16:39:48 +04:00
|
|
|
num -= sector_num % bs->bl.write_zeroes_alignment;
|
|
|
|
} else if ((sector_num + num) % bs->bl.write_zeroes_alignment != 0) {
|
|
|
|
/* Shorten the request to the last aligned sector. num cannot
|
|
|
|
* underflow because num > bs->bl.write_zeroes_alignment.
|
|
|
|
*/
|
|
|
|
num -= (sector_num + num) % bs->bl.write_zeroes_alignment;
|
2013-10-24 14:06:58 +04:00
|
|
|
}
|
2012-03-20 18:12:58 +04:00
|
|
|
}
|
2012-02-07 17:27:25 +04:00
|
|
|
|
2013-10-24 14:06:58 +04:00
|
|
|
/* limit request size */
|
|
|
|
if (num > max_write_zeroes) {
|
|
|
|
num = max_write_zeroes;
|
|
|
|
}
|
|
|
|
|
|
|
|
ret = -ENOTSUP;
|
|
|
|
/* First try the efficient write zeroes operation */
|
|
|
|
if (drv->bdrv_co_write_zeroes) {
|
|
|
|
ret = drv->bdrv_co_write_zeroes(bs, sector_num, num, flags);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (ret == -ENOTSUP) {
|
|
|
|
/* Fall back to bounce buffer if write zeroes is unsupported */
|
2015-01-05 14:29:49 +03:00
|
|
|
int max_xfer_len = MIN_NON_ZERO(bs->bl.max_transfer_length,
|
2015-02-02 17:48:34 +03:00
|
|
|
MAX_WRITE_ZEROES_BOUNCE_BUFFER);
|
2015-01-05 14:29:49 +03:00
|
|
|
num = MIN(num, max_xfer_len);
|
2013-10-24 14:06:58 +04:00
|
|
|
iov.iov_len = num * BDRV_SECTOR_SIZE;
|
|
|
|
if (iov.iov_base == NULL) {
|
2014-05-20 15:16:51 +04:00
|
|
|
iov.iov_base = qemu_try_blockalign(bs, num * BDRV_SECTOR_SIZE);
|
|
|
|
if (iov.iov_base == NULL) {
|
|
|
|
ret = -ENOMEM;
|
|
|
|
goto fail;
|
|
|
|
}
|
2013-11-22 16:39:48 +04:00
|
|
|
memset(iov.iov_base, 0, num * BDRV_SECTOR_SIZE);
|
2013-10-24 14:06:58 +04:00
|
|
|
}
|
|
|
|
qemu_iovec_init_external(&qiov, &iov, 1);
|
2012-02-07 17:27:25 +04:00
|
|
|
|
2013-10-24 14:06:58 +04:00
|
|
|
ret = drv->bdrv_co_writev(bs, sector_num, num, &qiov);
|
2013-11-22 16:39:48 +04:00
|
|
|
|
|
|
|
/* Keep bounce buffer around if it is big enough for all
|
|
|
|
* all future requests.
|
|
|
|
*/
|
2015-01-05 14:29:49 +03:00
|
|
|
if (num < max_xfer_len) {
|
2013-11-22 16:39:48 +04:00
|
|
|
qemu_vfree(iov.iov_base);
|
|
|
|
iov.iov_base = NULL;
|
|
|
|
}
|
2013-10-24 14:06:58 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
sector_num += num;
|
|
|
|
nb_sectors -= num;
|
|
|
|
}
|
2012-02-07 17:27:25 +04:00
|
|
|
|
2014-05-20 15:16:51 +04:00
|
|
|
fail:
|
2012-02-07 17:27:25 +04:00
|
|
|
qemu_vfree(iov.iov_base);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2011-10-05 20:17:03 +04:00
|
|
|
/*
|
2013-12-03 17:02:23 +04:00
|
|
|
* Forwards an already correctly aligned write request to the BlockDriver.
|
2011-10-05 20:17:03 +04:00
|
|
|
*/
|
2013-12-03 17:02:23 +04:00
|
|
|
static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs,
|
2013-12-03 17:55:55 +04:00
|
|
|
BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
|
|
|
|
QEMUIOVector *qiov, int flags)
|
2011-10-05 20:17:03 +04:00
|
|
|
{
|
|
|
|
BlockDriver *drv = bs->drv;
|
2014-01-14 14:41:35 +04:00
|
|
|
bool waited;
|
2011-10-13 16:08:24 +04:00
|
|
|
int ret;
|
2011-07-14 19:27:13 +04:00
|
|
|
|
2013-12-03 17:02:23 +04:00
|
|
|
int64_t sector_num = offset >> BDRV_SECTOR_BITS;
|
|
|
|
unsigned int nb_sectors = bytes >> BDRV_SECTOR_BITS;
|
2011-11-17 17:40:29 +04:00
|
|
|
|
2013-12-03 17:02:23 +04:00
|
|
|
assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
|
|
|
|
assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
|
2014-07-01 18:09:54 +04:00
|
|
|
assert(!qiov || bytes == qiov->size);
|
2013-09-02 16:14:39 +04:00
|
|
|
|
2014-01-14 14:41:35 +04:00
|
|
|
waited = wait_serialising_requests(req);
|
|
|
|
assert(!waited || !req->serialising);
|
2014-02-07 18:35:56 +04:00
|
|
|
assert(req->overlap_offset <= offset);
|
|
|
|
assert(offset + bytes <= req->overlap_offset + req->overlap_bytes);
|
2013-12-03 17:30:44 +04:00
|
|
|
|
2013-12-03 17:55:55 +04:00
|
|
|
ret = notifier_with_return_list_notify(&bs->before_write_notifiers, req);
|
2013-06-24 19:13:10 +04:00
|
|
|
|
2014-05-18 02:58:19 +04:00
|
|
|
if (!ret && bs->detect_zeroes != BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF &&
|
|
|
|
!(flags & BDRV_REQ_ZERO_WRITE) && drv->bdrv_co_write_zeroes &&
|
|
|
|
qemu_iovec_is_zero(qiov)) {
|
|
|
|
flags |= BDRV_REQ_ZERO_WRITE;
|
|
|
|
if (bs->detect_zeroes == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP) {
|
|
|
|
flags |= BDRV_REQ_MAY_UNMAP;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-06-24 19:13:10 +04:00
|
|
|
if (ret < 0) {
|
|
|
|
/* Do nothing, write notifier decided to fail this request */
|
|
|
|
} else if (flags & BDRV_REQ_ZERO_WRITE) {
|
2014-01-14 18:37:03 +04:00
|
|
|
BLKDBG_EVENT(bs, BLKDBG_PWRITEV_ZERO);
|
2013-10-24 14:06:51 +04:00
|
|
|
ret = bdrv_co_do_write_zeroes(bs, sector_num, nb_sectors, flags);
|
2012-02-07 17:27:25 +04:00
|
|
|
} else {
|
2014-01-14 18:37:03 +04:00
|
|
|
BLKDBG_EVENT(bs, BLKDBG_PWRITEV);
|
2012-02-07 17:27:25 +04:00
|
|
|
ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
|
|
|
|
}
|
2014-01-14 18:37:03 +04:00
|
|
|
BLKDBG_EVENT(bs, BLKDBG_PWRITEV_DONE);
|
2011-10-13 16:08:24 +04:00
|
|
|
|
2012-06-06 02:04:49 +04:00
|
|
|
if (ret == 0 && !bs->enable_write_cache) {
|
|
|
|
ret = bdrv_co_flush(bs);
|
|
|
|
}
|
|
|
|
|
2013-11-13 14:29:43 +04:00
|
|
|
bdrv_set_dirty(bs, sector_num, nb_sectors);
|
2011-07-14 19:27:13 +04:00
|
|
|
|
2014-09-05 17:46:18 +04:00
|
|
|
block_acct_highest_sector(&bs->stats, sector_num, nb_sectors);
|
2014-09-05 17:46:16 +04:00
|
|
|
|
2015-02-05 21:58:24 +03:00
|
|
|
if (ret >= 0) {
|
2013-09-04 21:00:21 +04:00
|
|
|
bs->total_sectors = MAX(bs->total_sectors, sector_num + nb_sectors);
|
|
|
|
}
|
2011-07-14 19:27:13 +04:00
|
|
|
|
2011-10-13 16:08:24 +04:00
|
|
|
return ret;
|
2011-07-14 19:27:13 +04:00
|
|
|
}
|
|
|
|
|
2013-12-03 17:02:23 +04:00
|
|
|
/*
|
|
|
|
* Handle a write request in coroutine context
|
|
|
|
*/
|
2013-12-03 17:40:18 +04:00
|
|
|
static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs,
|
|
|
|
int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
|
2013-12-03 17:02:23 +04:00
|
|
|
BdrvRequestFlags flags)
|
|
|
|
{
|
2013-12-03 17:55:55 +04:00
|
|
|
BdrvTrackedRequest req;
|
2015-03-24 04:23:49 +03:00
|
|
|
uint64_t align = bdrv_get_align(bs);
|
2013-12-03 19:34:41 +04:00
|
|
|
uint8_t *head_buf = NULL;
|
|
|
|
uint8_t *tail_buf = NULL;
|
|
|
|
QEMUIOVector local_qiov;
|
|
|
|
bool use_local_qiov = false;
|
2013-12-03 17:02:23 +04:00
|
|
|
int ret;
|
|
|
|
|
|
|
|
if (!bs->drv) {
|
|
|
|
return -ENOMEDIUM;
|
|
|
|
}
|
|
|
|
if (bs->read_only) {
|
|
|
|
return -EACCES;
|
|
|
|
}
|
2015-02-05 21:58:25 +03:00
|
|
|
|
|
|
|
ret = bdrv_check_byte_request(bs, offset, bytes);
|
|
|
|
if (ret < 0) {
|
|
|
|
return ret;
|
2013-12-03 17:02:23 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
/* throttling disk I/O */
|
|
|
|
if (bs->io_limits_enabled) {
|
2014-01-16 16:29:10 +04:00
|
|
|
bdrv_io_limits_intercept(bs, bytes, true);
|
2013-12-03 17:02:23 +04:00
|
|
|
}
|
|
|
|
|
2013-12-03 19:34:41 +04:00
|
|
|
/*
|
|
|
|
* Align write if necessary by performing a read-modify-write cycle.
|
|
|
|
* Pad qiov with the read parts and be sure to have a tracked request not
|
|
|
|
* only for bdrv_aligned_pwritev, but also for the reads of the RMW cycle.
|
|
|
|
*/
|
2013-12-03 17:55:55 +04:00
|
|
|
tracked_request_begin(&req, bs, offset, bytes, true);
|
2013-12-03 19:34:41 +04:00
|
|
|
|
|
|
|
if (offset & (align - 1)) {
|
|
|
|
QEMUIOVector head_qiov;
|
|
|
|
struct iovec head_iov;
|
|
|
|
|
|
|
|
mark_request_serialising(&req, align);
|
|
|
|
wait_serialising_requests(&req);
|
|
|
|
|
|
|
|
head_buf = qemu_blockalign(bs, align);
|
|
|
|
head_iov = (struct iovec) {
|
|
|
|
.iov_base = head_buf,
|
|
|
|
.iov_len = align,
|
|
|
|
};
|
|
|
|
qemu_iovec_init_external(&head_qiov, &head_iov, 1);
|
|
|
|
|
2014-01-14 18:37:03 +04:00
|
|
|
BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_HEAD);
|
2013-12-03 19:34:41 +04:00
|
|
|
ret = bdrv_aligned_preadv(bs, &req, offset & ~(align - 1), align,
|
|
|
|
align, &head_qiov, 0);
|
|
|
|
if (ret < 0) {
|
|
|
|
goto fail;
|
|
|
|
}
|
2014-01-14 18:37:03 +04:00
|
|
|
BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD);
|
2013-12-03 19:34:41 +04:00
|
|
|
|
|
|
|
qemu_iovec_init(&local_qiov, qiov->niov + 2);
|
|
|
|
qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1));
|
|
|
|
qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
|
|
|
|
use_local_qiov = true;
|
|
|
|
|
|
|
|
bytes += offset & (align - 1);
|
|
|
|
offset = offset & ~(align - 1);
|
|
|
|
}
|
|
|
|
|
|
|
|
if ((offset + bytes) & (align - 1)) {
|
|
|
|
QEMUIOVector tail_qiov;
|
|
|
|
struct iovec tail_iov;
|
|
|
|
size_t tail_bytes;
|
2014-01-14 14:41:35 +04:00
|
|
|
bool waited;
|
2013-12-03 19:34:41 +04:00
|
|
|
|
|
|
|
mark_request_serialising(&req, align);
|
2014-01-14 14:41:35 +04:00
|
|
|
waited = wait_serialising_requests(&req);
|
|
|
|
assert(!waited || !use_local_qiov);
|
2013-12-03 19:34:41 +04:00
|
|
|
|
|
|
|
tail_buf = qemu_blockalign(bs, align);
|
|
|
|
tail_iov = (struct iovec) {
|
|
|
|
.iov_base = tail_buf,
|
|
|
|
.iov_len = align,
|
|
|
|
};
|
|
|
|
qemu_iovec_init_external(&tail_qiov, &tail_iov, 1);
|
|
|
|
|
2014-01-14 18:37:03 +04:00
|
|
|
BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_TAIL);
|
2013-12-03 19:34:41 +04:00
|
|
|
ret = bdrv_aligned_preadv(bs, &req, (offset + bytes) & ~(align - 1), align,
|
|
|
|
align, &tail_qiov, 0);
|
|
|
|
if (ret < 0) {
|
|
|
|
goto fail;
|
|
|
|
}
|
2014-01-14 18:37:03 +04:00
|
|
|
BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);
|
2013-12-03 19:34:41 +04:00
|
|
|
|
|
|
|
if (!use_local_qiov) {
|
|
|
|
qemu_iovec_init(&local_qiov, qiov->niov + 1);
|
|
|
|
qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
|
|
|
|
use_local_qiov = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
tail_bytes = (offset + bytes) & (align - 1);
|
|
|
|
qemu_iovec_add(&local_qiov, tail_buf + tail_bytes, align - tail_bytes);
|
|
|
|
|
|
|
|
bytes = ROUND_UP(bytes, align);
|
|
|
|
}
|
|
|
|
|
2015-03-24 04:23:49 +03:00
|
|
|
if (use_local_qiov) {
|
|
|
|
/* Local buffer may have non-zero data. */
|
|
|
|
flags &= ~BDRV_REQ_ZERO_WRITE;
|
|
|
|
}
|
2013-12-03 19:34:41 +04:00
|
|
|
ret = bdrv_aligned_pwritev(bs, &req, offset, bytes,
|
|
|
|
use_local_qiov ? &local_qiov : qiov,
|
|
|
|
flags);
|
|
|
|
|
|
|
|
fail:
|
2013-12-03 17:55:55 +04:00
|
|
|
tracked_request_end(&req);
|
2013-12-03 17:02:23 +04:00
|
|
|
|
2013-12-03 19:34:41 +04:00
|
|
|
if (use_local_qiov) {
|
|
|
|
qemu_iovec_destroy(&local_qiov);
|
|
|
|
}
|
2014-02-07 18:29:00 +04:00
|
|
|
qemu_vfree(head_buf);
|
|
|
|
qemu_vfree(tail_buf);
|
2013-12-03 19:34:41 +04:00
|
|
|
|
2013-12-03 17:02:23 +04:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2013-12-03 17:40:18 +04:00
|
|
|
static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
|
|
|
|
int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
|
|
|
|
BdrvRequestFlags flags)
|
|
|
|
{
|
2015-02-06 13:54:11 +03:00
|
|
|
if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
|
2013-12-03 17:40:18 +04:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
return bdrv_co_do_pwritev(bs, sector_num << BDRV_SECTOR_BITS,
|
|
|
|
nb_sectors << BDRV_SECTOR_BITS, qiov, flags);
|
|
|
|
}
|
|
|
|
|
2011-10-05 20:17:03 +04:00
|
|
|
int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
|
|
|
|
int nb_sectors, QEMUIOVector *qiov)
|
|
|
|
{
|
|
|
|
trace_bdrv_co_writev(bs, sector_num, nb_sectors);
|
|
|
|
|
2012-02-07 17:27:25 +04:00
|
|
|
return bdrv_co_do_writev(bs, sector_num, nb_sectors, qiov, 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
int coroutine_fn bdrv_co_write_zeroes(BlockDriverState *bs,
|
2013-10-24 14:06:51 +04:00
|
|
|
int64_t sector_num, int nb_sectors,
|
|
|
|
BdrvRequestFlags flags)
|
2012-02-07 17:27:25 +04:00
|
|
|
{
|
2015-03-24 04:23:49 +03:00
|
|
|
int ret;
|
|
|
|
|
2013-11-22 16:39:45 +04:00
|
|
|
trace_bdrv_co_write_zeroes(bs, sector_num, nb_sectors, flags);
|
2012-02-07 17:27:25 +04:00
|
|
|
|
2013-10-24 14:06:52 +04:00
|
|
|
if (!(bs->open_flags & BDRV_O_UNMAP)) {
|
|
|
|
flags &= ~BDRV_REQ_MAY_UNMAP;
|
|
|
|
}
|
2015-03-24 04:23:49 +03:00
|
|
|
if (bdrv_req_is_aligned(bs, sector_num << BDRV_SECTOR_BITS,
|
|
|
|
nb_sectors << BDRV_SECTOR_BITS)) {
|
|
|
|
ret = bdrv_co_do_writev(bs, sector_num, nb_sectors, NULL,
|
|
|
|
BDRV_REQ_ZERO_WRITE | flags);
|
|
|
|
} else {
|
|
|
|
uint8_t *buf;
|
|
|
|
QEMUIOVector local_qiov;
|
|
|
|
size_t bytes = nb_sectors << BDRV_SECTOR_BITS;
|
|
|
|
|
|
|
|
buf = qemu_memalign(bdrv_opt_mem_align(bs), bytes);
|
|
|
|
memset(buf, 0, bytes);
|
|
|
|
qemu_iovec_init(&local_qiov, 1);
|
|
|
|
qemu_iovec_add(&local_qiov, buf, bytes);
|
2013-10-24 14:06:52 +04:00
|
|
|
|
2015-03-24 04:23:49 +03:00
|
|
|
ret = bdrv_co_do_writev(bs, sector_num, nb_sectors, &local_qiov,
|
|
|
|
BDRV_REQ_ZERO_WRITE | flags);
|
|
|
|
qemu_vfree(buf);
|
|
|
|
}
|
|
|
|
return ret;
|
2011-10-05 20:17:03 +04:00
|
|
|
}
|
|
|
|
|
2006-08-01 20:21:11 +04:00
|
|
|
/**
|
|
|
|
* Truncate file to 'offset' bytes (needed only for file protocols)
|
|
|
|
*/
|
|
|
|
int bdrv_truncate(BlockDriverState *bs, int64_t offset)
|
|
|
|
{
|
|
|
|
BlockDriver *drv = bs->drv;
|
2010-04-19 19:56:41 +04:00
|
|
|
int ret;
|
2006-08-01 20:21:11 +04:00
|
|
|
if (!drv)
|
2006-08-19 15:45:59 +04:00
|
|
|
return -ENOMEDIUM;
|
2006-08-01 20:21:11 +04:00
|
|
|
if (!drv->bdrv_truncate)
|
|
|
|
return -ENOTSUP;
|
2009-10-26 17:25:16 +03:00
|
|
|
if (bs->read_only)
|
|
|
|
return -EACCES;
|
2014-06-26 00:55:30 +04:00
|
|
|
|
2010-04-19 19:56:41 +04:00
|
|
|
ret = drv->bdrv_truncate(bs, offset);
|
|
|
|
if (ret == 0) {
|
|
|
|
ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
|
2014-10-07 15:59:25 +04:00
|
|
|
if (bs->blk) {
|
|
|
|
blk_dev_resize_cb(bs->blk);
|
|
|
|
}
|
2010-04-19 19:56:41 +04:00
|
|
|
}
|
|
|
|
return ret;
|
2006-08-01 20:21:11 +04:00
|
|
|
}
|
|
|
|
|
2011-07-12 15:56:39 +04:00
|
|
|
/**
|
|
|
|
* Length of a allocated file in bytes. Sparse files are counted by actual
|
|
|
|
* allocated space. Return < 0 if error or unknown.
|
|
|
|
*/
|
|
|
|
int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
|
|
|
|
{
|
|
|
|
BlockDriver *drv = bs->drv;
|
|
|
|
if (!drv) {
|
|
|
|
return -ENOMEDIUM;
|
|
|
|
}
|
|
|
|
if (drv->bdrv_get_allocated_file_size) {
|
|
|
|
return drv->bdrv_get_allocated_file_size(bs);
|
|
|
|
}
|
|
|
|
if (bs->file) {
|
|
|
|
return bdrv_get_allocated_file_size(bs->file);
|
|
|
|
}
|
|
|
|
return -ENOTSUP;
|
|
|
|
}
|
|
|
|
|
2006-08-01 20:21:11 +04:00
|
|
|
/**
|
2014-06-26 15:23:17 +04:00
|
|
|
* Return number of sectors on success, -errno on error.
|
2006-08-01 20:21:11 +04:00
|
|
|
*/
|
2014-06-26 15:23:17 +04:00
|
|
|
int64_t bdrv_nb_sectors(BlockDriverState *bs)
|
2006-08-01 20:21:11 +04:00
|
|
|
{
|
|
|
|
BlockDriver *drv = bs->drv;
|
2014-06-26 15:23:17 +04:00
|
|
|
|
2006-08-01 20:21:11 +04:00
|
|
|
if (!drv)
|
2006-08-19 15:45:59 +04:00
|
|
|
return -ENOMEDIUM;
|
2010-04-19 19:56:41 +04:00
|
|
|
|
block: Avoid unecessary drv->bdrv_getlength() calls
The block layer generally keeps the size of an image cached in
bs->total_sectors so that it doesn't have to perform expensive
operations to get the size whenever it needs it.
This doesn't work however when using a backend that can change its size
without qemu being aware of it, i.e. passthrough of removable media like
CD-ROMs or floppy disks. For this reason, the caching is disabled when a
removable device is used.
It is obvious that checking whether the _guest_ device has removable
media isn't the right thing to do when we want to know whether the size
of the host backend can change. To make things worse, non-top-level
BlockDriverStates never have any device attached, which makes qemu
assume they are removable, so drv->bdrv_getlength() is always called on
the protocol layer. In the case of raw-posix, this causes unnecessary
lseek() system calls, which turned out to be rather expensive.
This patch completely changes the logic and disables bs->total_sectors
caching only for certain block driver types, for which a size change is
expected: host_cdrom and host_floppy on POSIX, host_device on win32; also
the raw format in case it sits on top of one of these protocols, but in
the common case the nested bdrv_getlength() call on the protocol driver
will use the cache again and avoid an expensive drv->bdrv_getlength()
call.
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
2013-10-29 15:18:58 +04:00
|
|
|
if (drv->has_variable_length) {
|
|
|
|
int ret = refresh_total_sectors(bs, bs->total_sectors);
|
|
|
|
if (ret < 0) {
|
|
|
|
return ret;
|
2011-03-29 23:04:41 +04:00
|
|
|
}
|
2006-08-01 20:21:11 +04:00
|
|
|
}
|
2014-06-26 15:23:17 +04:00
|
|
|
return bs->total_sectors;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Return length in bytes on success, -errno on error.
|
|
|
|
* The length is always a multiple of BDRV_SECTOR_SIZE.
|
|
|
|
*/
|
|
|
|
int64_t bdrv_getlength(BlockDriverState *bs)
|
|
|
|
{
|
|
|
|
int64_t ret = bdrv_nb_sectors(bs);
|
|
|
|
|
|
|
|
return ret < 0 ? ret : ret * BDRV_SECTOR_SIZE;
|
2003-06-30 14:03:06 +04:00
|
|
|
}
|
|
|
|
|
2006-08-19 15:45:59 +04:00
|
|
|
/* return 0 as number of sectors if no device present or error */
|
2007-12-17 04:35:20 +03:00
|
|
|
void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
|
2003-06-30 14:03:06 +04:00
|
|
|
{
|
2014-06-26 15:23:17 +04:00
|
|
|
int64_t nb_sectors = bdrv_nb_sectors(bs);
|
|
|
|
|
|
|
|
*nb_sectors_ptr = nb_sectors < 0 ? 0 : nb_sectors;
|
2003-06-30 14:03:06 +04:00
|
|
|
}
|
2004-02-17 00:56:36 +03:00
|
|
|
|
2012-09-28 19:22:54 +04:00
|
|
|
void bdrv_set_on_error(BlockDriverState *bs, BlockdevOnError on_read_error,
|
|
|
|
BlockdevOnError on_write_error)
|
2010-06-02 20:55:17 +04:00
|
|
|
{
|
|
|
|
bs->on_read_error = on_read_error;
|
|
|
|
bs->on_write_error = on_write_error;
|
|
|
|
}
|
|
|
|
|
2012-09-28 19:22:56 +04:00
|
|
|
BlockdevOnError bdrv_get_on_error(BlockDriverState *bs, bool is_read)
|
2010-06-02 20:55:17 +04:00
|
|
|
{
|
|
|
|
return is_read ? bs->on_read_error : bs->on_write_error;
|
|
|
|
}
|
|
|
|
|
2012-09-28 19:22:57 +04:00
|
|
|
BlockErrorAction bdrv_get_error_action(BlockDriverState *bs, bool is_read, int error)
|
|
|
|
{
|
|
|
|
BlockdevOnError on_err = is_read ? bs->on_read_error : bs->on_write_error;
|
|
|
|
|
|
|
|
switch (on_err) {
|
|
|
|
case BLOCKDEV_ON_ERROR_ENOSPC:
|
2014-06-18 10:43:30 +04:00
|
|
|
return (error == ENOSPC) ?
|
|
|
|
BLOCK_ERROR_ACTION_STOP : BLOCK_ERROR_ACTION_REPORT;
|
2012-09-28 19:22:57 +04:00
|
|
|
case BLOCKDEV_ON_ERROR_STOP:
|
2014-06-18 10:43:30 +04:00
|
|
|
return BLOCK_ERROR_ACTION_STOP;
|
2012-09-28 19:22:57 +04:00
|
|
|
case BLOCKDEV_ON_ERROR_REPORT:
|
2014-06-18 10:43:30 +04:00
|
|
|
return BLOCK_ERROR_ACTION_REPORT;
|
2012-09-28 19:22:57 +04:00
|
|
|
case BLOCKDEV_ON_ERROR_IGNORE:
|
2014-06-18 10:43:30 +04:00
|
|
|
return BLOCK_ERROR_ACTION_IGNORE;
|
2012-09-28 19:22:57 +04:00
|
|
|
default:
|
|
|
|
abort();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-08-30 00:07:27 +04:00
|
|
|
static void send_qmp_error_event(BlockDriverState *bs,
|
|
|
|
BlockErrorAction action,
|
|
|
|
bool is_read, int error)
|
|
|
|
{
|
2014-10-10 23:33:03 +04:00
|
|
|
IoOperationType optype;
|
2014-08-30 00:07:27 +04:00
|
|
|
|
2014-10-10 23:33:03 +04:00
|
|
|
optype = is_read ? IO_OPERATION_TYPE_READ : IO_OPERATION_TYPE_WRITE;
|
|
|
|
qapi_event_send_block_io_error(bdrv_get_device_name(bs), optype, action,
|
2014-08-30 00:07:27 +04:00
|
|
|
bdrv_iostatus_is_enabled(bs),
|
2014-09-11 18:25:48 +04:00
|
|
|
error == ENOSPC, strerror(error),
|
|
|
|
&error_abort);
|
2014-08-30 00:07:27 +04:00
|
|
|
}
|
|
|
|
|
2012-09-28 19:22:57 +04:00
|
|
|
/* This is done by device models because, while the block layer knows
|
|
|
|
* about the error, it does not know whether an operation comes from
|
|
|
|
* the device or the block layer (from a job, for example).
|
|
|
|
*/
|
|
|
|
void bdrv_error_action(BlockDriverState *bs, BlockErrorAction action,
|
|
|
|
bool is_read, int error)
|
|
|
|
{
|
|
|
|
assert(error >= 0);
|
block: asynchronously stop the VM on I/O errors
With virtio-blk dataplane, I/O errors might occur while QEMU is
not in the main I/O thread. However, it's invalid to call vm_stop
when we're neither in a VCPU thread nor in the main I/O thread,
even if we were to take the iothread mutex around it.
To avoid this problem, we can raise a request to the main I/O thread,
similar to what QEMU does when vm_stop is called from a CPU thread.
We know that bdrv_error_action is called from an AIO callback, and
the moment at which the callback will fire is not well-defined; it
depends on the moment at which the disk or OS finishes the operation,
which can happen at any time. Note that QEMU is certainly not in a CPU
thread and we do not need to call cpu_stop_current() like vm_stop() does.
However, we need to ensure that any action taken by management will
result in correct detection of the error _and_ a running VM. In particular:
- the event must be raised after the iostatus has been set, so that
"info block" will return an iostatus that matches the event.
- the VM must be stopped after the iostatus has been set, so that
"info block" will return an iostatus that matches the runstate.
The ordering between the STOP and BLOCK_IO_ERROR events is preserved;
BLOCK_IO_ERROR is documented to come first.
This makes bdrv_error_action() thread safe (assuming QMP events are,
which is attacked by a separate series).
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2014-06-05 16:53:59 +04:00
|
|
|
|
2014-06-18 10:43:30 +04:00
|
|
|
if (action == BLOCK_ERROR_ACTION_STOP) {
|
block: asynchronously stop the VM on I/O errors
With virtio-blk dataplane, I/O errors might occur while QEMU is
not in the main I/O thread. However, it's invalid to call vm_stop
when we're neither in a VCPU thread nor in the main I/O thread,
even if we were to take the iothread mutex around it.
To avoid this problem, we can raise a request to the main I/O thread,
similar to what QEMU does when vm_stop is called from a CPU thread.
We know that bdrv_error_action is called from an AIO callback, and
the moment at which the callback will fire is not well-defined; it
depends on the moment at which the disk or OS finishes the operation,
which can happen at any time. Note that QEMU is certainly not in a CPU
thread and we do not need to call cpu_stop_current() like vm_stop() does.
However, we need to ensure that any action taken by management will
result in correct detection of the error _and_ a running VM. In particular:
- the event must be raised after the iostatus has been set, so that
"info block" will return an iostatus that matches the event.
- the VM must be stopped after the iostatus has been set, so that
"info block" will return an iostatus that matches the runstate.
The ordering between the STOP and BLOCK_IO_ERROR events is preserved;
BLOCK_IO_ERROR is documented to come first.
This makes bdrv_error_action() thread safe (assuming QMP events are,
which is attacked by a separate series).
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2014-06-05 16:53:59 +04:00
|
|
|
/* First set the iostatus, so that "info block" returns an iostatus
|
|
|
|
* that matches the events raised so far (an additional error iostatus
|
|
|
|
* is fine, but not a lost one).
|
|
|
|
*/
|
2012-09-28 19:22:57 +04:00
|
|
|
bdrv_iostatus_set_err(bs, error);
|
block: asynchronously stop the VM on I/O errors
With virtio-blk dataplane, I/O errors might occur while QEMU is
not in the main I/O thread. However, it's invalid to call vm_stop
when we're neither in a VCPU thread nor in the main I/O thread,
even if we were to take the iothread mutex around it.
To avoid this problem, we can raise a request to the main I/O thread,
similar to what QEMU does when vm_stop is called from a CPU thread.
We know that bdrv_error_action is called from an AIO callback, and
the moment at which the callback will fire is not well-defined; it
depends on the moment at which the disk or OS finishes the operation,
which can happen at any time. Note that QEMU is certainly not in a CPU
thread and we do not need to call cpu_stop_current() like vm_stop() does.
However, we need to ensure that any action taken by management will
result in correct detection of the error _and_ a running VM. In particular:
- the event must be raised after the iostatus has been set, so that
"info block" will return an iostatus that matches the event.
- the VM must be stopped after the iostatus has been set, so that
"info block" will return an iostatus that matches the runstate.
The ordering between the STOP and BLOCK_IO_ERROR events is preserved;
BLOCK_IO_ERROR is documented to come first.
This makes bdrv_error_action() thread safe (assuming QMP events are,
which is attacked by a separate series).
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2014-06-05 16:53:59 +04:00
|
|
|
|
|
|
|
/* Then raise the request to stop the VM and the event.
|
|
|
|
* qemu_system_vmstop_request_prepare has two effects. First,
|
|
|
|
* it ensures that the STOP event always comes after the
|
|
|
|
* BLOCK_IO_ERROR event. Second, it ensures that even if management
|
|
|
|
* can observe the STOP event and do a "cont" before the STOP
|
|
|
|
* event is issued, the VM will not stop. In this case, vm_start()
|
|
|
|
* also ensures that the STOP/RESUME pair of events is emitted.
|
|
|
|
*/
|
|
|
|
qemu_system_vmstop_request_prepare();
|
2014-08-30 00:07:27 +04:00
|
|
|
send_qmp_error_event(bs, action, is_read, error);
|
block: asynchronously stop the VM on I/O errors
With virtio-blk dataplane, I/O errors might occur while QEMU is
not in the main I/O thread. However, it's invalid to call vm_stop
when we're neither in a VCPU thread nor in the main I/O thread,
even if we were to take the iothread mutex around it.
To avoid this problem, we can raise a request to the main I/O thread,
similar to what QEMU does when vm_stop is called from a CPU thread.
We know that bdrv_error_action is called from an AIO callback, and
the moment at which the callback will fire is not well-defined; it
depends on the moment at which the disk or OS finishes the operation,
which can happen at any time. Note that QEMU is certainly not in a CPU
thread and we do not need to call cpu_stop_current() like vm_stop() does.
However, we need to ensure that any action taken by management will
result in correct detection of the error _and_ a running VM. In particular:
- the event must be raised after the iostatus has been set, so that
"info block" will return an iostatus that matches the event.
- the VM must be stopped after the iostatus has been set, so that
"info block" will return an iostatus that matches the runstate.
The ordering between the STOP and BLOCK_IO_ERROR events is preserved;
BLOCK_IO_ERROR is documented to come first.
This makes bdrv_error_action() thread safe (assuming QMP events are,
which is attacked by a separate series).
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2014-06-05 16:53:59 +04:00
|
|
|
qemu_system_vmstop_request(RUN_STATE_IO_ERROR);
|
|
|
|
} else {
|
2014-08-30 00:07:27 +04:00
|
|
|
send_qmp_error_event(bs, action, is_read, error);
|
2012-09-28 19:22:57 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2004-03-15 00:38:54 +03:00
|
|
|
int bdrv_is_read_only(BlockDriverState *bs)
|
|
|
|
{
|
|
|
|
return bs->read_only;
|
|
|
|
}
|
|
|
|
|
2007-12-24 19:10:43 +03:00
|
|
|
int bdrv_is_sg(BlockDriverState *bs)
|
|
|
|
{
|
|
|
|
return bs->sg;
|
|
|
|
}
|
|
|
|
|
2009-09-04 21:01:15 +04:00
|
|
|
int bdrv_enable_write_cache(BlockDriverState *bs)
|
|
|
|
{
|
|
|
|
return bs->enable_write_cache;
|
|
|
|
}
|
|
|
|
|
2012-06-06 02:04:52 +04:00
|
|
|
void bdrv_set_enable_write_cache(BlockDriverState *bs, bool wce)
|
|
|
|
{
|
|
|
|
bs->enable_write_cache = wce;
|
2012-09-20 23:13:18 +04:00
|
|
|
|
|
|
|
/* so a reopen() will preserve wce */
|
|
|
|
if (wce) {
|
|
|
|
bs->open_flags |= BDRV_O_CACHE_WB;
|
|
|
|
} else {
|
|
|
|
bs->open_flags &= ~BDRV_O_CACHE_WB;
|
|
|
|
}
|
2012-06-06 02:04:52 +04:00
|
|
|
}
|
|
|
|
|
2004-08-02 01:59:26 +04:00
|
|
|
int bdrv_is_encrypted(BlockDriverState *bs)
|
|
|
|
{
|
|
|
|
if (bs->backing_hd && bs->backing_hd->encrypted)
|
|
|
|
return 1;
|
|
|
|
return bs->encrypted;
|
|
|
|
}
|
|
|
|
|
2009-03-06 02:01:01 +03:00
|
|
|
int bdrv_key_required(BlockDriverState *bs)
|
|
|
|
{
|
|
|
|
BlockDriverState *backing_hd = bs->backing_hd;
|
|
|
|
|
|
|
|
if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
|
|
|
|
return 1;
|
|
|
|
return (bs->encrypted && !bs->valid_key);
|
|
|
|
}
|
|
|
|
|
2004-08-02 01:59:26 +04:00
|
|
|
int bdrv_set_key(BlockDriverState *bs, const char *key)
|
|
|
|
{
|
|
|
|
int ret;
|
|
|
|
if (bs->backing_hd && bs->backing_hd->encrypted) {
|
|
|
|
ret = bdrv_set_key(bs->backing_hd, key);
|
|
|
|
if (ret < 0)
|
|
|
|
return ret;
|
|
|
|
if (!bs->encrypted)
|
|
|
|
return 0;
|
|
|
|
}
|
2010-03-06 01:26:13 +03:00
|
|
|
if (!bs->encrypted) {
|
|
|
|
return -EINVAL;
|
|
|
|
} else if (!bs->drv || !bs->drv->bdrv_set_key) {
|
|
|
|
return -ENOMEDIUM;
|
|
|
|
}
|
2009-03-06 02:01:01 +03:00
|
|
|
ret = bs->drv->bdrv_set_key(bs, key);
|
2009-03-06 02:01:15 +03:00
|
|
|
if (ret < 0) {
|
|
|
|
bs->valid_key = 0;
|
|
|
|
} else if (!bs->valid_key) {
|
|
|
|
bs->valid_key = 1;
|
2014-10-07 15:59:25 +04:00
|
|
|
if (bs->blk) {
|
|
|
|
/* call the change callback now, we skipped it on open */
|
|
|
|
blk_dev_change_media_cb(bs->blk, true);
|
|
|
|
}
|
2009-03-06 02:01:15 +03:00
|
|
|
}
|
2009-03-06 02:01:01 +03:00
|
|
|
return ret;
|
2004-08-02 01:59:26 +04:00
|
|
|
}
|
|
|
|
|
2015-01-29 12:37:00 +03:00
|
|
|
/*
|
|
|
|
* Provide an encryption key for @bs.
|
|
|
|
* If @key is non-null:
|
|
|
|
* If @bs is not encrypted, fail.
|
|
|
|
* Else if the key is invalid, fail.
|
|
|
|
* Else set @bs's key to @key, replacing the existing key, if any.
|
|
|
|
* If @key is null:
|
|
|
|
* If @bs is encrypted and still lacks a key, fail.
|
|
|
|
* Else do nothing.
|
|
|
|
* On failure, store an error object through @errp if non-null.
|
|
|
|
*/
|
|
|
|
void bdrv_add_key(BlockDriverState *bs, const char *key, Error **errp)
|
|
|
|
{
|
|
|
|
if (key) {
|
|
|
|
if (!bdrv_is_encrypted(bs)) {
|
2015-01-29 12:37:01 +03:00
|
|
|
error_setg(errp, "Device '%s' is not encrypted",
|
2015-01-29 12:37:00 +03:00
|
|
|
bdrv_get_device_name(bs));
|
|
|
|
} else if (bdrv_set_key(bs, key) < 0) {
|
|
|
|
error_set(errp, QERR_INVALID_PASSWORD);
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
if (bdrv_key_required(bs)) {
|
2015-01-29 12:37:01 +03:00
|
|
|
error_set(errp, ERROR_CLASS_DEVICE_ENCRYPTED,
|
|
|
|
"'%s' (%s) is encrypted",
|
2015-01-29 12:37:00 +03:00
|
|
|
bdrv_get_device_name(bs),
|
|
|
|
bdrv_get_encrypted_filename(bs));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2012-06-13 12:11:48 +04:00
|
|
|
const char *bdrv_get_format_name(BlockDriverState *bs)
|
2004-08-02 01:59:26 +04:00
|
|
|
{
|
2012-06-13 12:11:48 +04:00
|
|
|
return bs->drv ? bs->drv->format_name : NULL;
|
2004-08-02 01:59:26 +04:00
|
|
|
}
|
|
|
|
|
2014-08-27 15:08:55 +04:00
|
|
|
static int qsort_strcmp(const void *a, const void *b)
|
|
|
|
{
|
|
|
|
return strcmp(a, b);
|
|
|
|
}
|
|
|
|
|
2007-09-17 01:08:06 +04:00
|
|
|
void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
|
2004-08-02 01:59:26 +04:00
|
|
|
void *opaque)
|
|
|
|
{
|
|
|
|
BlockDriver *drv;
|
2014-04-29 02:29:54 +04:00
|
|
|
int count = 0;
|
2014-08-27 15:08:55 +04:00
|
|
|
int i;
|
2014-04-29 02:29:54 +04:00
|
|
|
const char **formats = NULL;
|
2004-08-02 01:59:26 +04:00
|
|
|
|
2010-04-13 13:29:33 +04:00
|
|
|
QLIST_FOREACH(drv, &bdrv_drivers, list) {
|
2014-04-29 02:29:54 +04:00
|
|
|
if (drv->format_name) {
|
|
|
|
bool found = false;
|
|
|
|
int i = count;
|
|
|
|
while (formats && i && !found) {
|
|
|
|
found = !strcmp(formats[--i], drv->format_name);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!found) {
|
block: Use g_new() & friends where that makes obvious sense
g_new(T, n) is neater than g_malloc(sizeof(T) * n). It's also safer,
for two reasons. One, it catches multiplication overflowing size_t.
Two, it returns T * rather than void *, which lets the compiler catch
more type errors.
Patch created with Coccinelle, with two manual changes on top:
* Add const to bdrv_iterate_format() to keep the types straight
* Convert the allocation in bdrv_drop_intermediate(), which Coccinelle
inexplicably misses
Coccinelle semantic patch:
@@
type T;
@@
-g_malloc(sizeof(T))
+g_new(T, 1)
@@
type T;
@@
-g_try_malloc(sizeof(T))
+g_try_new(T, 1)
@@
type T;
@@
-g_malloc0(sizeof(T))
+g_new0(T, 1)
@@
type T;
@@
-g_try_malloc0(sizeof(T))
+g_try_new0(T, 1)
@@
type T;
expression n;
@@
-g_malloc(sizeof(T) * (n))
+g_new(T, n)
@@
type T;
expression n;
@@
-g_try_malloc(sizeof(T) * (n))
+g_try_new(T, n)
@@
type T;
expression n;
@@
-g_malloc0(sizeof(T) * (n))
+g_new0(T, n)
@@
type T;
expression n;
@@
-g_try_malloc0(sizeof(T) * (n))
+g_try_new0(T, n)
@@
type T;
expression p, n;
@@
-g_realloc(p, sizeof(T) * (n))
+g_renew(T, p, n)
@@
type T;
expression p, n;
@@
-g_try_realloc(p, sizeof(T) * (n))
+g_try_renew(T, p, n)
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: Jeff Cody <jcody@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2014-08-19 12:31:08 +04:00
|
|
|
formats = g_renew(const char *, formats, count + 1);
|
2014-04-29 02:29:54 +04:00
|
|
|
formats[count++] = drv->format_name;
|
|
|
|
}
|
|
|
|
}
|
2004-08-02 01:59:26 +04:00
|
|
|
}
|
2014-08-27 15:08:55 +04:00
|
|
|
|
|
|
|
qsort(formats, count, sizeof(formats[0]), qsort_strcmp);
|
|
|
|
|
|
|
|
for (i = 0; i < count; i++) {
|
|
|
|
it(opaque, formats[i]);
|
|
|
|
}
|
|
|
|
|
2014-04-29 02:29:54 +04:00
|
|
|
g_free(formats);
|
2004-08-02 01:59:26 +04:00
|
|
|
}
|
|
|
|
|
2014-01-24 00:31:32 +04:00
|
|
|
/* This function is to find a node in the bs graph */
|
|
|
|
BlockDriverState *bdrv_find_node(const char *node_name)
|
|
|
|
{
|
|
|
|
BlockDriverState *bs;
|
|
|
|
|
|
|
|
assert(node_name);
|
|
|
|
|
|
|
|
QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
|
|
|
|
if (!strcmp(node_name, bs->node_name)) {
|
|
|
|
return bs;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2014-01-24 00:31:34 +04:00
|
|
|
/* Put this QMP function here so it can access the static graph_bdrv_states. */
|
|
|
|
BlockDeviceInfoList *bdrv_named_nodes_list(void)
|
|
|
|
{
|
|
|
|
BlockDeviceInfoList *list, *entry;
|
|
|
|
BlockDriverState *bs;
|
|
|
|
|
|
|
|
list = NULL;
|
|
|
|
QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
|
|
|
|
entry = g_malloc0(sizeof(*entry));
|
|
|
|
entry->value = bdrv_block_device_info(bs);
|
|
|
|
entry->next = list;
|
|
|
|
list = entry;
|
|
|
|
}
|
|
|
|
|
|
|
|
return list;
|
|
|
|
}
|
|
|
|
|
2014-01-24 00:31:35 +04:00
|
|
|
BlockDriverState *bdrv_lookup_bs(const char *device,
|
|
|
|
const char *node_name,
|
|
|
|
Error **errp)
|
|
|
|
{
|
2014-10-07 15:59:12 +04:00
|
|
|
BlockBackend *blk;
|
|
|
|
BlockDriverState *bs;
|
2014-01-24 00:31:35 +04:00
|
|
|
|
|
|
|
if (device) {
|
2014-10-07 15:59:12 +04:00
|
|
|
blk = blk_by_name(device);
|
2014-01-24 00:31:35 +04:00
|
|
|
|
2014-10-07 15:59:12 +04:00
|
|
|
if (blk) {
|
|
|
|
return blk_bs(blk);
|
2014-01-24 00:31:35 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-02-12 20:15:06 +04:00
|
|
|
if (node_name) {
|
|
|
|
bs = bdrv_find_node(node_name);
|
2014-01-24 00:31:35 +04:00
|
|
|
|
2014-02-12 20:15:06 +04:00
|
|
|
if (bs) {
|
|
|
|
return bs;
|
|
|
|
}
|
2014-01-24 00:31:35 +04:00
|
|
|
}
|
|
|
|
|
2014-02-12 20:15:06 +04:00
|
|
|
error_setg(errp, "Cannot find device=%s nor node_name=%s",
|
|
|
|
device ? device : "",
|
|
|
|
node_name ? node_name : "");
|
|
|
|
return NULL;
|
2014-01-24 00:31:35 +04:00
|
|
|
}
|
|
|
|
|
2014-06-25 23:40:09 +04:00
|
|
|
/* If 'base' is in the same chain as 'top', return true. Otherwise,
|
|
|
|
* return false. If either argument is NULL, return false. */
|
|
|
|
bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base)
|
|
|
|
{
|
|
|
|
while (top && top != base) {
|
|
|
|
top = top->backing_hd;
|
|
|
|
}
|
|
|
|
|
|
|
|
return top != NULL;
|
|
|
|
}
|
|
|
|
|
2014-10-31 06:32:54 +03:00
|
|
|
BlockDriverState *bdrv_next_node(BlockDriverState *bs)
|
|
|
|
{
|
|
|
|
if (!bs) {
|
|
|
|
return QTAILQ_FIRST(&graph_bdrv_states);
|
|
|
|
}
|
|
|
|
return QTAILQ_NEXT(bs, node_list);
|
|
|
|
}
|
|
|
|
|
2010-06-02 20:55:20 +04:00
|
|
|
BlockDriverState *bdrv_next(BlockDriverState *bs)
|
|
|
|
{
|
|
|
|
if (!bs) {
|
|
|
|
return QTAILQ_FIRST(&bdrv_states);
|
|
|
|
}
|
2014-01-24 00:31:32 +04:00
|
|
|
return QTAILQ_NEXT(bs, device_list);
|
2010-06-02 20:55:20 +04:00
|
|
|
}
|
|
|
|
|
2014-10-31 06:32:55 +03:00
|
|
|
const char *bdrv_get_node_name(const BlockDriverState *bs)
|
|
|
|
{
|
|
|
|
return bs->node_name;
|
|
|
|
}
|
|
|
|
|
2014-10-07 15:59:12 +04:00
|
|
|
/* TODO check what callers really want: bs->node_name or blk_name() */
|
2014-10-07 15:59:11 +04:00
|
|
|
const char *bdrv_get_device_name(const BlockDriverState *bs)
|
2004-08-02 01:59:26 +04:00
|
|
|
{
|
2014-10-07 15:59:11 +04:00
|
|
|
return bs->blk ? blk_name(bs->blk) : "";
|
2004-08-02 01:59:26 +04:00
|
|
|
}
|
|
|
|
|
2012-06-05 18:49:24 +04:00
|
|
|
int bdrv_get_flags(BlockDriverState *bs)
|
|
|
|
{
|
|
|
|
return bs->open_flags;
|
|
|
|
}
|
|
|
|
|
2013-07-05 15:48:01 +04:00
|
|
|
int bdrv_flush_all(void)
|
2008-10-06 17:55:43 +04:00
|
|
|
{
|
|
|
|
BlockDriverState *bs;
|
2013-07-05 15:48:01 +04:00
|
|
|
int result = 0;
|
2008-10-06 17:55:43 +04:00
|
|
|
|
2014-01-24 00:31:32 +04:00
|
|
|
QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
|
2014-05-08 18:34:35 +04:00
|
|
|
AioContext *aio_context = bdrv_get_aio_context(bs);
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
aio_context_acquire(aio_context);
|
|
|
|
ret = bdrv_flush(bs);
|
2013-07-05 15:48:01 +04:00
|
|
|
if (ret < 0 && !result) {
|
|
|
|
result = ret;
|
|
|
|
}
|
2014-05-08 18:34:35 +04:00
|
|
|
aio_context_release(aio_context);
|
2010-04-10 10:02:42 +04:00
|
|
|
}
|
2013-07-05 15:48:01 +04:00
|
|
|
|
|
|
|
return result;
|
2008-10-06 17:55:43 +04:00
|
|
|
}
|
|
|
|
|
2013-06-28 14:47:42 +04:00
|
|
|
int bdrv_has_zero_init_1(BlockDriverState *bs)
|
|
|
|
{
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
2010-04-14 19:30:35 +04:00
|
|
|
int bdrv_has_zero_init(BlockDriverState *bs)
|
|
|
|
{
|
|
|
|
assert(bs->drv);
|
|
|
|
|
2013-09-04 21:00:27 +04:00
|
|
|
/* If BS is a copy on write image, it is initialized to
|
|
|
|
the contents of the base image, which may not be zeroes. */
|
|
|
|
if (bs->backing_hd) {
|
|
|
|
return 0;
|
|
|
|
}
|
2010-07-28 13:26:29 +04:00
|
|
|
if (bs->drv->bdrv_has_zero_init) {
|
|
|
|
return bs->drv->bdrv_has_zero_init(bs);
|
2010-04-14 19:30:35 +04:00
|
|
|
}
|
|
|
|
|
2013-06-28 14:47:42 +04:00
|
|
|
/* safe default */
|
|
|
|
return 0;
|
2010-04-14 19:30:35 +04:00
|
|
|
}
|
|
|
|
|
2013-10-24 14:06:54 +04:00
|
|
|
bool bdrv_unallocated_blocks_are_zero(BlockDriverState *bs)
|
|
|
|
{
|
|
|
|
BlockDriverInfo bdi;
|
|
|
|
|
|
|
|
if (bs->backing_hd) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (bdrv_get_info(bs, &bdi) == 0) {
|
|
|
|
return bdi.unallocated_blocks_are_zero;
|
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs)
|
|
|
|
{
|
|
|
|
BlockDriverInfo bdi;
|
|
|
|
|
|
|
|
if (bs->backing_hd || !(bs->open_flags & BDRV_O_UNMAP)) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (bdrv_get_info(bs, &bdi) == 0) {
|
|
|
|
return bdi.can_write_zeroes_with_unmap;
|
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2013-09-04 21:00:28 +04:00
|
|
|
typedef struct BdrvCoGetBlockStatusData {
|
2011-11-14 16:44:19 +04:00
|
|
|
BlockDriverState *bs;
|
2013-02-13 12:09:39 +04:00
|
|
|
BlockDriverState *base;
|
2011-11-14 16:44:19 +04:00
|
|
|
int64_t sector_num;
|
|
|
|
int nb_sectors;
|
|
|
|
int *pnum;
|
2013-09-04 21:00:28 +04:00
|
|
|
int64_t ret;
|
2011-11-14 16:44:19 +04:00
|
|
|
bool done;
|
2013-09-04 21:00:28 +04:00
|
|
|
} BdrvCoGetBlockStatusData;
|
2011-11-14 16:44:19 +04:00
|
|
|
|
2008-06-06 01:53:49 +04:00
|
|
|
/*
|
2014-11-10 12:10:38 +03:00
|
|
|
* Returns the allocation status of the specified sectors.
|
|
|
|
* Drivers not implementing the functionality are assumed to not support
|
|
|
|
* backing files, hence all their sectors are reported as allocated.
|
2008-06-06 01:53:49 +04:00
|
|
|
*
|
2011-11-29 17:49:51 +04:00
|
|
|
* If 'sector_num' is beyond the end of the disk image the return value is 0
|
|
|
|
* and 'pnum' is set to 0.
|
|
|
|
*
|
2008-06-06 01:53:49 +04:00
|
|
|
* 'pnum' is set to the number of sectors (including and immediately following
|
|
|
|
* the specified sector) that are known to be in the same
|
|
|
|
* allocated/unallocated state.
|
|
|
|
*
|
2011-11-29 17:49:51 +04:00
|
|
|
* 'nb_sectors' is the max value 'pnum' should be set to. If nb_sectors goes
|
|
|
|
* beyond the end of the disk image it will be clamped.
|
2008-06-06 01:53:49 +04:00
|
|
|
*/
|
2013-09-04 21:00:28 +04:00
|
|
|
static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs,
|
|
|
|
int64_t sector_num,
|
|
|
|
int nb_sectors, int *pnum)
|
2008-06-06 01:53:49 +04:00
|
|
|
{
|
2014-06-26 15:23:20 +04:00
|
|
|
int64_t total_sectors;
|
2011-11-29 17:49:51 +04:00
|
|
|
int64_t n;
|
2013-09-04 21:00:38 +04:00
|
|
|
int64_t ret, ret2;
|
2011-11-29 17:49:51 +04:00
|
|
|
|
2014-06-26 15:23:20 +04:00
|
|
|
total_sectors = bdrv_nb_sectors(bs);
|
|
|
|
if (total_sectors < 0) {
|
|
|
|
return total_sectors;
|
2013-09-04 21:00:23 +04:00
|
|
|
}
|
|
|
|
|
2014-06-26 15:23:20 +04:00
|
|
|
if (sector_num >= total_sectors) {
|
2011-11-29 17:49:51 +04:00
|
|
|
*pnum = 0;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2014-06-26 15:23:20 +04:00
|
|
|
n = total_sectors - sector_num;
|
2011-11-29 17:49:51 +04:00
|
|
|
if (n < nb_sectors) {
|
|
|
|
nb_sectors = n;
|
|
|
|
}
|
|
|
|
|
2013-09-04 21:00:28 +04:00
|
|
|
if (!bs->drv->bdrv_co_get_block_status) {
|
2011-11-29 17:49:51 +04:00
|
|
|
*pnum = nb_sectors;
|
2014-05-06 17:25:36 +04:00
|
|
|
ret = BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED;
|
2013-09-04 21:00:37 +04:00
|
|
|
if (bs->drv->protocol_name) {
|
|
|
|
ret |= BDRV_BLOCK_OFFSET_VALID | (sector_num * BDRV_SECTOR_SIZE);
|
|
|
|
}
|
|
|
|
return ret;
|
2008-06-06 01:53:49 +04:00
|
|
|
}
|
2011-11-14 16:44:25 +04:00
|
|
|
|
2013-09-04 21:00:31 +04:00
|
|
|
ret = bs->drv->bdrv_co_get_block_status(bs, sector_num, nb_sectors, pnum);
|
|
|
|
if (ret < 0) {
|
2013-09-24 17:35:08 +04:00
|
|
|
*pnum = 0;
|
2013-09-04 21:00:31 +04:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2013-10-08 16:43:14 +04:00
|
|
|
if (ret & BDRV_BLOCK_RAW) {
|
|
|
|
assert(ret & BDRV_BLOCK_OFFSET_VALID);
|
|
|
|
return bdrv_get_block_status(bs->file, ret >> BDRV_SECTOR_BITS,
|
|
|
|
*pnum, pnum);
|
|
|
|
}
|
|
|
|
|
2014-05-06 17:25:36 +04:00
|
|
|
if (ret & (BDRV_BLOCK_DATA | BDRV_BLOCK_ZERO)) {
|
|
|
|
ret |= BDRV_BLOCK_ALLOCATED;
|
|
|
|
}
|
|
|
|
|
2013-10-24 14:07:04 +04:00
|
|
|
if (!(ret & BDRV_BLOCK_DATA) && !(ret & BDRV_BLOCK_ZERO)) {
|
|
|
|
if (bdrv_unallocated_blocks_are_zero(bs)) {
|
2013-09-04 21:00:32 +04:00
|
|
|
ret |= BDRV_BLOCK_ZERO;
|
2013-09-24 17:35:09 +04:00
|
|
|
} else if (bs->backing_hd) {
|
2013-09-04 21:00:32 +04:00
|
|
|
BlockDriverState *bs2 = bs->backing_hd;
|
2014-06-26 15:23:20 +04:00
|
|
|
int64_t nb_sectors2 = bdrv_nb_sectors(bs2);
|
|
|
|
if (nb_sectors2 >= 0 && sector_num >= nb_sectors2) {
|
2013-09-04 21:00:32 +04:00
|
|
|
ret |= BDRV_BLOCK_ZERO;
|
|
|
|
}
|
|
|
|
}
|
2013-09-04 21:00:31 +04:00
|
|
|
}
|
2013-09-04 21:00:38 +04:00
|
|
|
|
|
|
|
if (bs->file &&
|
|
|
|
(ret & BDRV_BLOCK_DATA) && !(ret & BDRV_BLOCK_ZERO) &&
|
|
|
|
(ret & BDRV_BLOCK_OFFSET_VALID)) {
|
2014-10-22 19:00:15 +04:00
|
|
|
int file_pnum;
|
|
|
|
|
2013-09-04 21:00:38 +04:00
|
|
|
ret2 = bdrv_co_get_block_status(bs->file, ret >> BDRV_SECTOR_BITS,
|
2014-10-22 19:00:15 +04:00
|
|
|
*pnum, &file_pnum);
|
2013-09-04 21:00:38 +04:00
|
|
|
if (ret2 >= 0) {
|
|
|
|
/* Ignore errors. This is just providing extra information, it
|
|
|
|
* is useful but not necessary.
|
|
|
|
*/
|
2014-10-22 19:00:15 +04:00
|
|
|
if (!file_pnum) {
|
|
|
|
/* !file_pnum indicates an offset at or beyond the EOF; it is
|
|
|
|
* perfectly valid for the format block driver to point to such
|
|
|
|
* offsets, so catch it and mark everything as zero */
|
|
|
|
ret |= BDRV_BLOCK_ZERO;
|
|
|
|
} else {
|
|
|
|
/* Limit request to the range reported by the protocol driver */
|
|
|
|
*pnum = file_pnum;
|
|
|
|
ret |= (ret2 & BDRV_BLOCK_ZERO);
|
|
|
|
}
|
2013-09-04 21:00:38 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-09-04 21:00:31 +04:00
|
|
|
return ret;
|
2011-11-14 16:44:26 +04:00
|
|
|
}
|
|
|
|
|
2013-09-04 21:00:28 +04:00
|
|
|
/* Coroutine wrapper for bdrv_get_block_status() */
|
|
|
|
static void coroutine_fn bdrv_get_block_status_co_entry(void *opaque)
|
2011-11-14 16:44:26 +04:00
|
|
|
{
|
2013-09-04 21:00:28 +04:00
|
|
|
BdrvCoGetBlockStatusData *data = opaque;
|
2011-11-14 16:44:26 +04:00
|
|
|
BlockDriverState *bs = data->bs;
|
|
|
|
|
2013-09-04 21:00:28 +04:00
|
|
|
data->ret = bdrv_co_get_block_status(bs, data->sector_num, data->nb_sectors,
|
|
|
|
data->pnum);
|
2011-11-14 16:44:26 +04:00
|
|
|
data->done = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2013-09-04 21:00:28 +04:00
|
|
|
* Synchronous wrapper around bdrv_co_get_block_status().
|
2011-11-14 16:44:26 +04:00
|
|
|
*
|
2013-09-04 21:00:28 +04:00
|
|
|
* See bdrv_co_get_block_status() for details.
|
2011-11-14 16:44:26 +04:00
|
|
|
*/
|
2013-09-04 21:00:28 +04:00
|
|
|
int64_t bdrv_get_block_status(BlockDriverState *bs, int64_t sector_num,
|
|
|
|
int nb_sectors, int *pnum)
|
2011-11-14 16:44:26 +04:00
|
|
|
{
|
2011-11-14 16:44:25 +04:00
|
|
|
Coroutine *co;
|
2013-09-04 21:00:28 +04:00
|
|
|
BdrvCoGetBlockStatusData data = {
|
2011-11-14 16:44:25 +04:00
|
|
|
.bs = bs,
|
|
|
|
.sector_num = sector_num,
|
|
|
|
.nb_sectors = nb_sectors,
|
|
|
|
.pnum = pnum,
|
|
|
|
.done = false,
|
|
|
|
};
|
|
|
|
|
2013-09-04 21:00:22 +04:00
|
|
|
if (qemu_in_coroutine()) {
|
|
|
|
/* Fast-path if already in coroutine context */
|
2013-09-04 21:00:28 +04:00
|
|
|
bdrv_get_block_status_co_entry(&data);
|
2013-09-04 21:00:22 +04:00
|
|
|
} else {
|
2014-05-08 18:34:34 +04:00
|
|
|
AioContext *aio_context = bdrv_get_aio_context(bs);
|
|
|
|
|
2013-09-04 21:00:28 +04:00
|
|
|
co = qemu_coroutine_create(bdrv_get_block_status_co_entry);
|
2013-09-04 21:00:22 +04:00
|
|
|
qemu_coroutine_enter(co, &data);
|
|
|
|
while (!data.done) {
|
2014-05-08 18:34:34 +04:00
|
|
|
aio_poll(aio_context, true);
|
2013-09-04 21:00:22 +04:00
|
|
|
}
|
2011-11-14 16:44:25 +04:00
|
|
|
}
|
|
|
|
return data.ret;
|
2008-06-06 01:53:49 +04:00
|
|
|
}
|
|
|
|
|
2013-09-04 21:00:28 +04:00
|
|
|
int coroutine_fn bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num,
|
|
|
|
int nb_sectors, int *pnum)
|
|
|
|
{
|
2013-09-04 21:00:29 +04:00
|
|
|
int64_t ret = bdrv_get_block_status(bs, sector_num, nb_sectors, pnum);
|
|
|
|
if (ret < 0) {
|
|
|
|
return ret;
|
|
|
|
}
|
2014-07-07 19:00:37 +04:00
|
|
|
return !!(ret & BDRV_BLOCK_ALLOCATED);
|
2013-09-04 21:00:28 +04:00
|
|
|
}
|
|
|
|
|
2012-05-08 18:52:01 +04:00
|
|
|
/*
|
|
|
|
* Given an image chain: ... -> [BASE] -> [INTER1] -> [INTER2] -> [TOP]
|
|
|
|
*
|
|
|
|
* Return true if the given sector is allocated in any image between
|
|
|
|
* BASE and TOP (inclusive). BASE can be NULL to check if the given
|
|
|
|
* sector is allocated in any image of the chain. Return false otherwise.
|
|
|
|
*
|
|
|
|
* 'pnum' is set to the number of sectors (including and immediately following
|
|
|
|
* the specified sector) that are known to be in the same
|
|
|
|
* allocated/unallocated state.
|
|
|
|
*
|
|
|
|
*/
|
2013-09-04 21:00:24 +04:00
|
|
|
int bdrv_is_allocated_above(BlockDriverState *top,
|
|
|
|
BlockDriverState *base,
|
|
|
|
int64_t sector_num,
|
|
|
|
int nb_sectors, int *pnum)
|
2012-05-08 18:52:01 +04:00
|
|
|
{
|
|
|
|
BlockDriverState *intermediate;
|
|
|
|
int ret, n = nb_sectors;
|
|
|
|
|
|
|
|
intermediate = top;
|
|
|
|
while (intermediate && intermediate != base) {
|
|
|
|
int pnum_inter;
|
2013-09-04 21:00:22 +04:00
|
|
|
ret = bdrv_is_allocated(intermediate, sector_num, nb_sectors,
|
|
|
|
&pnum_inter);
|
2012-05-08 18:52:01 +04:00
|
|
|
if (ret < 0) {
|
|
|
|
return ret;
|
|
|
|
} else if (ret) {
|
|
|
|
*pnum = pnum_inter;
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* [sector_num, nb_sectors] is unallocated on top but intermediate
|
|
|
|
* might have
|
|
|
|
*
|
|
|
|
* [sector_num+x, nr_sectors] allocated.
|
|
|
|
*/
|
2013-01-24 22:02:08 +04:00
|
|
|
if (n > pnum_inter &&
|
|
|
|
(intermediate == top ||
|
|
|
|
sector_num + pnum_inter < intermediate->total_sectors)) {
|
2012-05-08 18:52:01 +04:00
|
|
|
n = pnum_inter;
|
|
|
|
}
|
|
|
|
|
|
|
|
intermediate = intermediate->backing_hd;
|
|
|
|
}
|
|
|
|
|
|
|
|
*pnum = n;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2009-03-06 02:00:48 +03:00
|
|
|
const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
|
|
|
|
{
|
|
|
|
if (bs->backing_hd && bs->backing_hd->encrypted)
|
|
|
|
return bs->backing_file;
|
|
|
|
else if (bs->encrypted)
|
|
|
|
return bs->filename;
|
|
|
|
else
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2007-09-17 01:08:06 +04:00
|
|
|
void bdrv_get_backing_filename(BlockDriverState *bs,
|
2006-08-01 20:21:11 +04:00
|
|
|
char *filename, int filename_size)
|
|
|
|
{
|
2011-10-26 13:02:11 +04:00
|
|
|
pstrcpy(filename, filename_size, bs->backing_file);
|
2006-08-01 20:21:11 +04:00
|
|
|
}
|
|
|
|
|
2007-09-17 01:08:06 +04:00
|
|
|
int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
|
2006-08-06 01:31:00 +04:00
|
|
|
const uint8_t *buf, int nb_sectors)
|
|
|
|
{
|
|
|
|
BlockDriver *drv = bs->drv;
|
2015-02-05 21:58:25 +03:00
|
|
|
int ret;
|
|
|
|
|
|
|
|
if (!drv) {
|
2006-08-19 15:45:59 +04:00
|
|
|
return -ENOMEDIUM;
|
2015-02-05 21:58:25 +03:00
|
|
|
}
|
|
|
|
if (!drv->bdrv_write_compressed) {
|
2006-08-06 01:31:00 +04:00
|
|
|
return -ENOTSUP;
|
2015-02-05 21:58:25 +03:00
|
|
|
}
|
|
|
|
ret = bdrv_check_request(bs, sector_num, nb_sectors);
|
|
|
|
if (ret < 0) {
|
|
|
|
return ret;
|
|
|
|
}
|
2009-11-30 20:21:19 +03:00
|
|
|
|
2013-11-13 14:29:43 +04:00
|
|
|
assert(QLIST_EMPTY(&bs->dirty_bitmaps));
|
2009-11-30 20:21:19 +03:00
|
|
|
|
2006-08-06 01:31:00 +04:00
|
|
|
return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
|
|
|
|
}
|
2007-09-17 12:09:54 +04:00
|
|
|
|
2006-08-06 01:31:00 +04:00
|
|
|
int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
|
|
|
|
{
|
|
|
|
BlockDriver *drv = bs->drv;
|
|
|
|
if (!drv)
|
2006-08-19 15:45:59 +04:00
|
|
|
return -ENOMEDIUM;
|
2006-08-06 01:31:00 +04:00
|
|
|
if (!drv->bdrv_get_info)
|
|
|
|
return -ENOTSUP;
|
|
|
|
memset(bdi, 0, sizeof(*bdi));
|
|
|
|
return drv->bdrv_get_info(bs, bdi);
|
|
|
|
}
|
|
|
|
|
2013-10-09 12:46:16 +04:00
|
|
|
ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs)
|
|
|
|
{
|
|
|
|
BlockDriver *drv = bs->drv;
|
|
|
|
if (drv && drv->bdrv_get_specific_info) {
|
|
|
|
return drv->bdrv_get_specific_info(bs);
|
|
|
|
}
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2009-07-11 01:11:57 +04:00
|
|
|
int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
|
|
|
|
int64_t pos, int size)
|
2013-04-05 23:27:53 +04:00
|
|
|
{
|
|
|
|
QEMUIOVector qiov;
|
|
|
|
struct iovec iov = {
|
|
|
|
.iov_base = (void *) buf,
|
|
|
|
.iov_len = size,
|
|
|
|
};
|
|
|
|
|
|
|
|
qemu_iovec_init_external(&qiov, &iov, 1);
|
|
|
|
return bdrv_writev_vmstate(bs, &qiov, pos);
|
|
|
|
}
|
|
|
|
|
|
|
|
int bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
|
2009-04-05 23:10:55 +04:00
|
|
|
{
|
|
|
|
BlockDriver *drv = bs->drv;
|
2013-04-05 23:27:53 +04:00
|
|
|
|
|
|
|
if (!drv) {
|
2009-04-05 23:10:55 +04:00
|
|
|
return -ENOMEDIUM;
|
2013-04-05 23:27:53 +04:00
|
|
|
} else if (drv->bdrv_save_vmstate) {
|
|
|
|
return drv->bdrv_save_vmstate(bs, qiov, pos);
|
|
|
|
} else if (bs->file) {
|
|
|
|
return bdrv_writev_vmstate(bs->file, qiov, pos);
|
|
|
|
}
|
|
|
|
|
2010-05-28 06:44:58 +04:00
|
|
|
return -ENOTSUP;
|
2009-04-05 23:10:55 +04:00
|
|
|
}
|
|
|
|
|
2009-07-11 01:11:57 +04:00
|
|
|
int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
|
|
|
|
int64_t pos, int size)
|
2009-04-05 23:10:55 +04:00
|
|
|
{
|
|
|
|
BlockDriver *drv = bs->drv;
|
|
|
|
if (!drv)
|
|
|
|
return -ENOMEDIUM;
|
2010-05-28 06:44:58 +04:00
|
|
|
if (drv->bdrv_load_vmstate)
|
|
|
|
return drv->bdrv_load_vmstate(bs, buf, pos, size);
|
|
|
|
if (bs->file)
|
|
|
|
return bdrv_load_vmstate(bs->file, buf, pos, size);
|
|
|
|
return -ENOTSUP;
|
2009-04-05 23:10:55 +04:00
|
|
|
}
|
|
|
|
|
2010-03-15 19:27:00 +03:00
|
|
|
void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
|
|
|
|
{
|
2013-06-05 17:17:55 +04:00
|
|
|
if (!bs || !bs->drv || !bs->drv->bdrv_debug_event) {
|
2010-03-15 19:27:00 +03:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2013-06-05 17:17:55 +04:00
|
|
|
bs->drv->bdrv_debug_event(bs, event);
|
2012-12-06 17:32:58 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event,
|
|
|
|
const char *tag)
|
|
|
|
{
|
|
|
|
while (bs && bs->drv && !bs->drv->bdrv_debug_breakpoint) {
|
|
|
|
bs = bs->file;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (bs && bs->drv && bs->drv->bdrv_debug_breakpoint) {
|
|
|
|
return bs->drv->bdrv_debug_breakpoint(bs, event, tag);
|
|
|
|
}
|
|
|
|
|
|
|
|
return -ENOTSUP;
|
|
|
|
}
|
|
|
|
|
2013-11-20 06:01:54 +04:00
|
|
|
int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag)
|
|
|
|
{
|
|
|
|
while (bs && bs->drv && !bs->drv->bdrv_debug_remove_breakpoint) {
|
|
|
|
bs = bs->file;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (bs && bs->drv && bs->drv->bdrv_debug_remove_breakpoint) {
|
|
|
|
return bs->drv->bdrv_debug_remove_breakpoint(bs, tag);
|
|
|
|
}
|
|
|
|
|
|
|
|
return -ENOTSUP;
|
|
|
|
}
|
|
|
|
|
2012-12-06 17:32:58 +04:00
|
|
|
int bdrv_debug_resume(BlockDriverState *bs, const char *tag)
|
|
|
|
{
|
2014-03-11 02:44:08 +04:00
|
|
|
while (bs && (!bs->drv || !bs->drv->bdrv_debug_resume)) {
|
2012-12-06 17:32:58 +04:00
|
|
|
bs = bs->file;
|
|
|
|
}
|
2010-03-15 19:27:00 +03:00
|
|
|
|
2012-12-06 17:32:58 +04:00
|
|
|
if (bs && bs->drv && bs->drv->bdrv_debug_resume) {
|
|
|
|
return bs->drv->bdrv_debug_resume(bs, tag);
|
|
|
|
}
|
|
|
|
|
|
|
|
return -ENOTSUP;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag)
|
|
|
|
{
|
|
|
|
while (bs && bs->drv && !bs->drv->bdrv_debug_is_suspended) {
|
|
|
|
bs = bs->file;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (bs && bs->drv && bs->drv->bdrv_debug_is_suspended) {
|
|
|
|
return bs->drv->bdrv_debug_is_suspended(bs, tag);
|
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
2010-03-15 19:27:00 +03:00
|
|
|
}
|
|
|
|
|
2010-07-26 00:49:34 +04:00
|
|
|
int bdrv_is_snapshot(BlockDriverState *bs)
|
|
|
|
{
|
|
|
|
return !!(bs->open_flags & BDRV_O_SNAPSHOT);
|
|
|
|
}
|
|
|
|
|
2012-10-16 23:49:09 +04:00
|
|
|
/* backing_file can either be relative, or absolute, or a protocol. If it is
|
|
|
|
* relative, it must be relative to the chain. So, passing in bs->filename
|
|
|
|
* from a BDS as backing_file should not be done, as that may be relative to
|
|
|
|
* the CWD rather than the chain. */
|
2012-01-18 18:40:51 +04:00
|
|
|
BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
|
|
|
|
const char *backing_file)
|
|
|
|
{
|
2012-10-16 23:49:09 +04:00
|
|
|
char *filename_full = NULL;
|
|
|
|
char *backing_file_full = NULL;
|
|
|
|
char *filename_tmp = NULL;
|
|
|
|
int is_protocol = 0;
|
|
|
|
BlockDriverState *curr_bs = NULL;
|
|
|
|
BlockDriverState *retval = NULL;
|
|
|
|
|
|
|
|
if (!bs || !bs->drv || !backing_file) {
|
2012-01-18 18:40:51 +04:00
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2012-10-16 23:49:09 +04:00
|
|
|
filename_full = g_malloc(PATH_MAX);
|
|
|
|
backing_file_full = g_malloc(PATH_MAX);
|
|
|
|
filename_tmp = g_malloc(PATH_MAX);
|
|
|
|
|
|
|
|
is_protocol = path_has_protocol(backing_file);
|
|
|
|
|
|
|
|
for (curr_bs = bs; curr_bs->backing_hd; curr_bs = curr_bs->backing_hd) {
|
|
|
|
|
|
|
|
/* If either of the filename paths is actually a protocol, then
|
|
|
|
* compare unmodified paths; otherwise make paths relative */
|
|
|
|
if (is_protocol || path_has_protocol(curr_bs->backing_file)) {
|
|
|
|
if (strcmp(backing_file, curr_bs->backing_file) == 0) {
|
|
|
|
retval = curr_bs->backing_hd;
|
|
|
|
break;
|
|
|
|
}
|
2012-01-18 18:40:51 +04:00
|
|
|
} else {
|
2012-10-16 23:49:09 +04:00
|
|
|
/* If not an absolute filename path, make it relative to the current
|
|
|
|
* image's filename path */
|
|
|
|
path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
|
|
|
|
backing_file);
|
|
|
|
|
|
|
|
/* We are going to compare absolute pathnames */
|
|
|
|
if (!realpath(filename_tmp, filename_full)) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* We need to make sure the backing filename we are comparing against
|
|
|
|
* is relative to the current image filename (or absolute) */
|
|
|
|
path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
|
|
|
|
curr_bs->backing_file);
|
|
|
|
|
|
|
|
if (!realpath(filename_tmp, backing_file_full)) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (strcmp(backing_file_full, filename_full) == 0) {
|
|
|
|
retval = curr_bs->backing_hd;
|
|
|
|
break;
|
|
|
|
}
|
2012-01-18 18:40:51 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2012-10-16 23:49:09 +04:00
|
|
|
g_free(filename_full);
|
|
|
|
g_free(backing_file_full);
|
|
|
|
g_free(filename_tmp);
|
|
|
|
return retval;
|
2012-01-18 18:40:51 +04:00
|
|
|
}
|
|
|
|
|
2012-08-02 12:22:47 +04:00
|
|
|
int bdrv_get_backing_file_depth(BlockDriverState *bs)
|
|
|
|
{
|
|
|
|
if (!bs->drv) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!bs->backing_hd) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 1 + bdrv_get_backing_file_depth(bs->backing_hd);
|
|
|
|
}
|
|
|
|
|
2004-08-02 01:59:26 +04:00
|
|
|
/**************************************************************/
|
2006-08-01 20:21:11 +04:00
|
|
|
/* async I/Os */
|
2004-08-02 01:59:26 +04:00
|
|
|
|
2014-10-07 15:59:14 +04:00
|
|
|
BlockAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
|
|
|
|
QEMUIOVector *qiov, int nb_sectors,
|
2014-10-07 15:59:15 +04:00
|
|
|
BlockCompletionFunc *cb, void *opaque)
|
2006-08-01 20:21:11 +04:00
|
|
|
{
|
2010-10-05 17:28:53 +04:00
|
|
|
trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque);
|
|
|
|
|
2013-11-22 16:39:44 +04:00
|
|
|
return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, 0,
|
2011-10-14 00:09:28 +04:00
|
|
|
cb, opaque, false);
|
2004-08-02 01:59:26 +04:00
|
|
|
}
|
|
|
|
|
2014-10-07 15:59:14 +04:00
|
|
|
BlockAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
|
|
|
|
QEMUIOVector *qiov, int nb_sectors,
|
2014-10-07 15:59:15 +04:00
|
|
|
BlockCompletionFunc *cb, void *opaque)
|
2004-08-02 01:59:26 +04:00
|
|
|
{
|
2010-10-05 17:28:53 +04:00
|
|
|
trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque);
|
|
|
|
|
2013-11-22 16:39:44 +04:00
|
|
|
return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, 0,
|
2011-10-14 00:09:28 +04:00
|
|
|
cb, opaque, true);
|
2006-08-01 20:21:11 +04:00
|
|
|
}
|
|
|
|
|
2014-10-07 15:59:14 +04:00
|
|
|
BlockAIOCB *bdrv_aio_write_zeroes(BlockDriverState *bs,
|
2013-11-22 16:39:46 +04:00
|
|
|
int64_t sector_num, int nb_sectors, BdrvRequestFlags flags,
|
2014-10-07 15:59:15 +04:00
|
|
|
BlockCompletionFunc *cb, void *opaque)
|
2013-11-22 16:39:46 +04:00
|
|
|
{
|
|
|
|
trace_bdrv_aio_write_zeroes(bs, sector_num, nb_sectors, flags, opaque);
|
|
|
|
|
|
|
|
return bdrv_co_aio_rw_vector(bs, sector_num, NULL, nb_sectors,
|
|
|
|
BDRV_REQ_ZERO_WRITE | flags,
|
|
|
|
cb, opaque, true);
|
|
|
|
}
|
|
|
|
|
2009-09-09 19:53:37 +04:00
|
|
|
|
|
|
|
typedef struct MultiwriteCB {
|
|
|
|
int error;
|
|
|
|
int num_requests;
|
|
|
|
int num_callbacks;
|
|
|
|
struct {
|
2014-10-07 15:59:15 +04:00
|
|
|
BlockCompletionFunc *cb;
|
2009-09-09 19:53:37 +04:00
|
|
|
void *opaque;
|
|
|
|
QEMUIOVector *free_qiov;
|
|
|
|
} callbacks[];
|
|
|
|
} MultiwriteCB;
|
|
|
|
|
|
|
|
static void multiwrite_user_cb(MultiwriteCB *mcb)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
|
|
|
for (i = 0; i < mcb->num_callbacks; i++) {
|
|
|
|
mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error);
|
2010-04-21 23:35:45 +04:00
|
|
|
if (mcb->callbacks[i].free_qiov) {
|
|
|
|
qemu_iovec_destroy(mcb->callbacks[i].free_qiov);
|
|
|
|
}
|
2011-08-21 07:09:37 +04:00
|
|
|
g_free(mcb->callbacks[i].free_qiov);
|
2009-09-09 19:53:37 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void multiwrite_cb(void *opaque, int ret)
|
|
|
|
{
|
|
|
|
MultiwriteCB *mcb = opaque;
|
|
|
|
|
2010-05-22 21:15:08 +04:00
|
|
|
trace_multiwrite_cb(mcb, ret);
|
|
|
|
|
2010-04-02 00:48:44 +04:00
|
|
|
if (ret < 0 && !mcb->error) {
|
2009-09-09 19:53:37 +04:00
|
|
|
mcb->error = ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
mcb->num_requests--;
|
|
|
|
if (mcb->num_requests == 0) {
|
2010-07-01 18:08:51 +04:00
|
|
|
multiwrite_user_cb(mcb);
|
2011-08-21 07:09:37 +04:00
|
|
|
g_free(mcb);
|
2009-09-09 19:53:37 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static int multiwrite_req_compare(const void *a, const void *b)
|
|
|
|
{
|
2010-05-19 22:53:10 +04:00
|
|
|
const BlockRequest *req1 = a, *req2 = b;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Note that we can't simply subtract req2->sector from req1->sector
|
|
|
|
* here as that could overflow the return value.
|
|
|
|
*/
|
|
|
|
if (req1->sector > req2->sector) {
|
|
|
|
return 1;
|
|
|
|
} else if (req1->sector < req2->sector) {
|
|
|
|
return -1;
|
|
|
|
} else {
|
|
|
|
return 0;
|
|
|
|
}
|
2009-09-09 19:53:37 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Takes a bunch of requests and tries to merge them. Returns the number of
|
|
|
|
* requests that remain after merging.
|
|
|
|
*/
|
|
|
|
static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
|
|
|
|
int num_reqs, MultiwriteCB *mcb)
|
|
|
|
{
|
|
|
|
int i, outidx;
|
|
|
|
|
|
|
|
// Sort requests by start sector
|
|
|
|
qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
|
|
|
|
|
|
|
|
// Check if adjacent requests touch the same clusters. If so, combine them,
|
|
|
|
// filling up gaps with zero sectors.
|
|
|
|
outidx = 0;
|
|
|
|
for (i = 1; i < num_reqs; i++) {
|
|
|
|
int merge = 0;
|
|
|
|
int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors;
|
|
|
|
|
2012-02-21 19:43:52 +04:00
|
|
|
// Handle exactly sequential writes and overlapping writes.
|
2009-09-09 19:53:37 +04:00
|
|
|
if (reqs[i].sector <= oldreq_last) {
|
|
|
|
merge = 1;
|
|
|
|
}
|
|
|
|
|
2010-01-26 16:49:08 +03:00
|
|
|
if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) {
|
|
|
|
merge = 0;
|
|
|
|
}
|
|
|
|
|
2014-10-27 12:18:46 +03:00
|
|
|
if (bs->bl.max_transfer_length && reqs[outidx].nb_sectors +
|
|
|
|
reqs[i].nb_sectors > bs->bl.max_transfer_length) {
|
|
|
|
merge = 0;
|
|
|
|
}
|
|
|
|
|
2009-09-09 19:53:37 +04:00
|
|
|
if (merge) {
|
|
|
|
size_t size;
|
2011-08-21 07:09:37 +04:00
|
|
|
QEMUIOVector *qiov = g_malloc0(sizeof(*qiov));
|
2009-09-09 19:53:37 +04:00
|
|
|
qemu_iovec_init(qiov,
|
|
|
|
reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1);
|
|
|
|
|
|
|
|
// Add the first request to the merged one. If the requests are
|
|
|
|
// overlapping, drop the last sectors of the first request.
|
|
|
|
size = (reqs[i].sector - reqs[outidx].sector) << 9;
|
consolidate qemu_iovec_copy() and qemu_iovec_concat() and make them consistent
qemu_iovec_concat() is currently a wrapper for
qemu_iovec_copy(), use the former (with extra
"0" arg) in a few places where it is used.
Change skip argument of qemu_iovec_copy() from
uint64_t to size_t, since size of qiov itself
is size_t, so there's no way to skip larger
sizes. Rename it to soffset, to make it clear
that the offset is applied to src.
Also change the only usage of uint64_t in
hw/9pfs/virtio-9p.c, in v9fs_init_qiov_from_pdu() -
all callers of it actually uses size_t too,
not uint64_t.
One added restriction: as for all other iovec-related
functions, soffset must point inside src.
Order of argumens is already good:
qemu_iovec_memset(QEMUIOVector *qiov, size_t offset,
int c, size_t bytes)
vs:
qemu_iovec_concat(QEMUIOVector *dst,
QEMUIOVector *src,
size_t soffset, size_t sbytes)
(note soffset is after _src_ not dst, since it applies to src;
for memset it applies to qiov).
Note that in many places where this function is used,
the previous call is qemu_iovec_reset(), which means
many callers actually want copy (replacing dst content),
not concat. So we may want to add a wrapper like
qemu_iovec_copy() with the same arguments but which
calls qemu_iovec_reset() before _concat().
Signed-off-by: Michael Tokarev <mjt@tls.msk.ru>
2012-03-12 21:28:06 +04:00
|
|
|
qemu_iovec_concat(qiov, reqs[outidx].qiov, 0, size);
|
2009-09-09 19:53:37 +04:00
|
|
|
|
2012-02-21 19:43:52 +04:00
|
|
|
// We should need to add any zeros between the two requests
|
|
|
|
assert (reqs[i].sector <= oldreq_last);
|
2009-09-09 19:53:37 +04:00
|
|
|
|
|
|
|
// Add the second request
|
consolidate qemu_iovec_copy() and qemu_iovec_concat() and make them consistent
qemu_iovec_concat() is currently a wrapper for
qemu_iovec_copy(), use the former (with extra
"0" arg) in a few places where it is used.
Change skip argument of qemu_iovec_copy() from
uint64_t to size_t, since size of qiov itself
is size_t, so there's no way to skip larger
sizes. Rename it to soffset, to make it clear
that the offset is applied to src.
Also change the only usage of uint64_t in
hw/9pfs/virtio-9p.c, in v9fs_init_qiov_from_pdu() -
all callers of it actually uses size_t too,
not uint64_t.
One added restriction: as for all other iovec-related
functions, soffset must point inside src.
Order of argumens is already good:
qemu_iovec_memset(QEMUIOVector *qiov, size_t offset,
int c, size_t bytes)
vs:
qemu_iovec_concat(QEMUIOVector *dst,
QEMUIOVector *src,
size_t soffset, size_t sbytes)
(note soffset is after _src_ not dst, since it applies to src;
for memset it applies to qiov).
Note that in many places where this function is used,
the previous call is qemu_iovec_reset(), which means
many callers actually want copy (replacing dst content),
not concat. So we may want to add a wrapper like
qemu_iovec_copy() with the same arguments but which
calls qemu_iovec_reset() before _concat().
Signed-off-by: Michael Tokarev <mjt@tls.msk.ru>
2012-03-12 21:28:06 +04:00
|
|
|
qemu_iovec_concat(qiov, reqs[i].qiov, 0, reqs[i].qiov->size);
|
2009-09-09 19:53:37 +04:00
|
|
|
|
2014-07-30 12:53:30 +04:00
|
|
|
// Add tail of first request, if necessary
|
|
|
|
if (qiov->size < reqs[outidx].qiov->size) {
|
|
|
|
qemu_iovec_concat(qiov, reqs[outidx].qiov, qiov->size,
|
|
|
|
reqs[outidx].qiov->size - qiov->size);
|
|
|
|
}
|
|
|
|
|
2010-05-21 13:09:42 +04:00
|
|
|
reqs[outidx].nb_sectors = qiov->size >> 9;
|
2009-09-09 19:53:37 +04:00
|
|
|
reqs[outidx].qiov = qiov;
|
|
|
|
|
|
|
|
mcb->callbacks[i].free_qiov = reqs[outidx].qiov;
|
|
|
|
} else {
|
|
|
|
outidx++;
|
|
|
|
reqs[outidx].sector = reqs[i].sector;
|
|
|
|
reqs[outidx].nb_sectors = reqs[i].nb_sectors;
|
|
|
|
reqs[outidx].qiov = reqs[i].qiov;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-02-02 16:52:18 +03:00
|
|
|
block_acct_merge_done(&bs->stats, BLOCK_ACCT_WRITE, num_reqs - outidx - 1);
|
|
|
|
|
2009-09-09 19:53:37 +04:00
|
|
|
return outidx + 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Submit multiple AIO write requests at once.
|
|
|
|
*
|
|
|
|
* On success, the function returns 0 and all requests in the reqs array have
|
|
|
|
* been submitted. In error case this function returns -1, and any of the
|
|
|
|
* requests may or may not be submitted yet. In particular, this means that the
|
|
|
|
* callback will be called for some of the requests, for others it won't. The
|
|
|
|
* caller must check the error field of the BlockRequest to wait for the right
|
|
|
|
* callbacks (if error != 0, no callback will be called).
|
|
|
|
*
|
|
|
|
* The implementation may modify the contents of the reqs array, e.g. to merge
|
|
|
|
* requests. However, the fields opaque and error are left unmodified as they
|
|
|
|
* are used to signal failure for a single request to the caller.
|
|
|
|
*/
|
|
|
|
int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
|
|
|
|
{
|
|
|
|
MultiwriteCB *mcb;
|
|
|
|
int i;
|
|
|
|
|
2011-03-07 19:01:04 +03:00
|
|
|
/* don't submit writes if we don't have a medium */
|
|
|
|
if (bs->drv == NULL) {
|
|
|
|
for (i = 0; i < num_reqs; i++) {
|
|
|
|
reqs[i].error = -ENOMEDIUM;
|
|
|
|
}
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2009-09-09 19:53:37 +04:00
|
|
|
if (num_reqs == 0) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Create MultiwriteCB structure
|
2011-08-21 07:09:37 +04:00
|
|
|
mcb = g_malloc0(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks));
|
2009-09-09 19:53:37 +04:00
|
|
|
mcb->num_requests = 0;
|
|
|
|
mcb->num_callbacks = num_reqs;
|
|
|
|
|
|
|
|
for (i = 0; i < num_reqs; i++) {
|
|
|
|
mcb->callbacks[i].cb = reqs[i].cb;
|
|
|
|
mcb->callbacks[i].opaque = reqs[i].opaque;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Check for mergable requests
|
|
|
|
num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb);
|
|
|
|
|
2010-05-22 21:15:08 +04:00
|
|
|
trace_bdrv_aio_multiwrite(mcb, mcb->num_callbacks, num_reqs);
|
|
|
|
|
2011-11-14 20:50:50 +04:00
|
|
|
/* Run the aio requests. */
|
|
|
|
mcb->num_requests = num_reqs;
|
2009-09-09 19:53:37 +04:00
|
|
|
for (i = 0; i < num_reqs; i++) {
|
2013-11-22 16:39:44 +04:00
|
|
|
bdrv_co_aio_rw_vector(bs, reqs[i].sector, reqs[i].qiov,
|
|
|
|
reqs[i].nb_sectors, reqs[i].flags,
|
|
|
|
multiwrite_cb, mcb,
|
|
|
|
true);
|
2009-09-09 19:53:37 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2014-10-07 15:59:14 +04:00
|
|
|
void bdrv_aio_cancel(BlockAIOCB *acb)
|
2006-08-01 20:21:11 +04:00
|
|
|
{
|
2014-09-11 09:41:27 +04:00
|
|
|
qemu_aio_ref(acb);
|
|
|
|
bdrv_aio_cancel_async(acb);
|
|
|
|
while (acb->refcnt > 1) {
|
|
|
|
if (acb->aiocb_info->get_aio_context) {
|
|
|
|
aio_poll(acb->aiocb_info->get_aio_context(acb), true);
|
|
|
|
} else if (acb->bs) {
|
|
|
|
aio_poll(bdrv_get_aio_context(acb->bs), true);
|
|
|
|
} else {
|
|
|
|
abort();
|
2014-09-11 09:41:09 +04:00
|
|
|
}
|
|
|
|
}
|
2014-09-11 09:41:28 +04:00
|
|
|
qemu_aio_unref(acb);
|
2014-09-11 09:41:09 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Async version of aio cancel. The caller is not blocked if the acb implements
|
|
|
|
* cancel_async, otherwise we do nothing and let the request normally complete.
|
|
|
|
* In either case the completion callback must be called. */
|
2014-10-07 15:59:14 +04:00
|
|
|
void bdrv_aio_cancel_async(BlockAIOCB *acb)
|
2014-09-11 09:41:09 +04:00
|
|
|
{
|
|
|
|
if (acb->aiocb_info->cancel_async) {
|
|
|
|
acb->aiocb_info->cancel_async(acb);
|
|
|
|
}
|
2006-08-01 20:21:11 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
/**************************************************************/
|
|
|
|
/* async block device emulation */
|
|
|
|
|
2014-10-07 15:59:14 +04:00
|
|
|
typedef struct BlockAIOCBSync {
|
|
|
|
BlockAIOCB common;
|
2009-05-25 14:37:32 +04:00
|
|
|
QEMUBH *bh;
|
|
|
|
int ret;
|
|
|
|
/* vector translation state */
|
|
|
|
QEMUIOVector *qiov;
|
|
|
|
uint8_t *bounce;
|
|
|
|
int is_write;
|
2014-10-07 15:59:14 +04:00
|
|
|
} BlockAIOCBSync;
|
2009-05-25 14:37:32 +04:00
|
|
|
|
2012-10-31 19:34:37 +04:00
|
|
|
static const AIOCBInfo bdrv_em_aiocb_info = {
|
2014-10-07 15:59:14 +04:00
|
|
|
.aiocb_size = sizeof(BlockAIOCBSync),
|
2009-05-25 14:37:32 +04:00
|
|
|
};
|
|
|
|
|
2006-08-07 06:38:06 +04:00
|
|
|
static void bdrv_aio_bh_cb(void *opaque)
|
2006-08-01 20:21:11 +04:00
|
|
|
{
|
2014-10-07 15:59:14 +04:00
|
|
|
BlockAIOCBSync *acb = opaque;
|
2009-04-07 22:43:24 +04:00
|
|
|
|
2014-05-20 15:16:51 +04:00
|
|
|
if (!acb->is_write && acb->ret >= 0) {
|
allow qemu_iovec_from_buffer() to specify offset from which to start copying
Similar to
qemu_iovec_memset(QEMUIOVector *qiov, size_t offset,
int c, size_t bytes);
the new prototype is:
qemu_iovec_from_buf(QEMUIOVector *qiov, size_t offset,
const void *buf, size_t bytes);
The processing starts at offset bytes within qiov.
This way, we may copy a bounce buffer directly to
a middle of qiov.
This is exactly the same function as iov_from_buf() from
iov.c, so use the existing implementation and rename it
to qemu_iovec_from_buf() to be shorter and to match the
utility function.
As with utility implementation, we now assert that the
offset is inside actual iovec. Nothing changed for
current callers, because `offset' parameter is new.
While at it, stop using "bounce-qiov" in block/qcow2.c
and copy decrypted data directly from cluster_data
instead of recreating a temp qiov for doing that.
Signed-off-by: Michael Tokarev <mjt@tls.msk.ru>
2012-06-07 20:17:55 +04:00
|
|
|
qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size);
|
2014-05-20 15:16:51 +04:00
|
|
|
}
|
2009-04-07 22:43:28 +04:00
|
|
|
qemu_vfree(acb->bounce);
|
2006-08-07 06:38:06 +04:00
|
|
|
acb->common.cb(acb->common.opaque, acb->ret);
|
2009-06-01 13:07:23 +04:00
|
|
|
qemu_bh_delete(acb->bh);
|
2009-06-23 17:20:36 +04:00
|
|
|
acb->bh = NULL;
|
2014-09-11 09:41:28 +04:00
|
|
|
qemu_aio_unref(acb);
|
2006-08-01 20:21:11 +04:00
|
|
|
}
|
2006-06-27 00:08:57 +04:00
|
|
|
|
2014-10-07 15:59:14 +04:00
|
|
|
static BlockAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
|
|
|
|
int64_t sector_num,
|
|
|
|
QEMUIOVector *qiov,
|
|
|
|
int nb_sectors,
|
2014-10-07 15:59:15 +04:00
|
|
|
BlockCompletionFunc *cb,
|
2014-10-07 15:59:14 +04:00
|
|
|
void *opaque,
|
|
|
|
int is_write)
|
2009-04-07 22:43:24 +04:00
|
|
|
|
2006-08-01 20:21:11 +04:00
|
|
|
{
|
2014-10-07 15:59:14 +04:00
|
|
|
BlockAIOCBSync *acb;
|
2006-08-07 06:38:06 +04:00
|
|
|
|
2012-10-31 19:34:37 +04:00
|
|
|
acb = qemu_aio_get(&bdrv_em_aiocb_info, bs, cb, opaque);
|
2009-04-07 22:43:24 +04:00
|
|
|
acb->is_write = is_write;
|
|
|
|
acb->qiov = qiov;
|
2014-05-20 15:16:51 +04:00
|
|
|
acb->bounce = qemu_try_blockalign(bs, qiov->size);
|
2014-05-08 18:34:34 +04:00
|
|
|
acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_aio_bh_cb, acb);
|
2009-04-07 22:43:24 +04:00
|
|
|
|
2014-05-20 15:16:51 +04:00
|
|
|
if (acb->bounce == NULL) {
|
|
|
|
acb->ret = -ENOMEM;
|
|
|
|
} else if (is_write) {
|
2012-06-07 20:21:06 +04:00
|
|
|
qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size);
|
2011-10-13 16:08:21 +04:00
|
|
|
acb->ret = bs->drv->bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
|
2009-04-07 22:43:24 +04:00
|
|
|
} else {
|
2011-10-13 16:08:21 +04:00
|
|
|
acb->ret = bs->drv->bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
|
2009-04-07 22:43:24 +04:00
|
|
|
}
|
|
|
|
|
2006-08-07 06:38:06 +04:00
|
|
|
qemu_bh_schedule(acb->bh);
|
2009-04-07 22:43:24 +04:00
|
|
|
|
2006-08-07 06:38:06 +04:00
|
|
|
return &acb->common;
|
2006-06-27 00:08:57 +04:00
|
|
|
}
|
|
|
|
|
2014-10-07 15:59:14 +04:00
|
|
|
static BlockAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
|
2009-04-07 22:43:24 +04:00
|
|
|
int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
|
2014-10-07 15:59:15 +04:00
|
|
|
BlockCompletionFunc *cb, void *opaque)
|
2006-06-27 00:08:57 +04:00
|
|
|
{
|
2009-04-07 22:43:24 +04:00
|
|
|
return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
|
|
|
|
}
|
2006-08-01 20:21:11 +04:00
|
|
|
|
2014-10-07 15:59:14 +04:00
|
|
|
static BlockAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
|
2009-04-07 22:43:24 +04:00
|
|
|
int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
|
2014-10-07 15:59:15 +04:00
|
|
|
BlockCompletionFunc *cb, void *opaque)
|
2009-04-07 22:43:24 +04:00
|
|
|
{
|
|
|
|
return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
|
2006-06-27 00:08:57 +04:00
|
|
|
}
|
|
|
|
|
2011-06-30 12:05:46 +04:00
|
|
|
|
2014-10-07 15:59:14 +04:00
|
|
|
typedef struct BlockAIOCBCoroutine {
|
|
|
|
BlockAIOCB common;
|
2011-06-30 12:05:46 +04:00
|
|
|
BlockRequest req;
|
|
|
|
bool is_write;
|
2012-11-13 19:35:08 +04:00
|
|
|
bool *done;
|
2011-06-30 12:05:46 +04:00
|
|
|
QEMUBH* bh;
|
2014-10-07 15:59:14 +04:00
|
|
|
} BlockAIOCBCoroutine;
|
2011-06-30 12:05:46 +04:00
|
|
|
|
2012-10-31 19:34:37 +04:00
|
|
|
static const AIOCBInfo bdrv_em_co_aiocb_info = {
|
2014-10-07 15:59:14 +04:00
|
|
|
.aiocb_size = sizeof(BlockAIOCBCoroutine),
|
2011-06-30 12:05:46 +04:00
|
|
|
};
|
|
|
|
|
2011-10-14 12:41:29 +04:00
|
|
|
static void bdrv_co_em_bh(void *opaque)
|
2011-06-30 12:05:46 +04:00
|
|
|
{
|
2014-10-07 15:59:14 +04:00
|
|
|
BlockAIOCBCoroutine *acb = opaque;
|
2011-06-30 12:05:46 +04:00
|
|
|
|
|
|
|
acb->common.cb(acb->common.opaque, acb->req.error);
|
2012-11-13 19:35:08 +04:00
|
|
|
|
2011-06-30 12:05:46 +04:00
|
|
|
qemu_bh_delete(acb->bh);
|
2014-09-11 09:41:28 +04:00
|
|
|
qemu_aio_unref(acb);
|
2011-06-30 12:05:46 +04:00
|
|
|
}
|
|
|
|
|
2011-10-13 16:08:23 +04:00
|
|
|
/* Invoke bdrv_co_do_readv/bdrv_co_do_writev */
|
|
|
|
static void coroutine_fn bdrv_co_do_rw(void *opaque)
|
|
|
|
{
|
2014-10-07 15:59:14 +04:00
|
|
|
BlockAIOCBCoroutine *acb = opaque;
|
2011-10-13 16:08:23 +04:00
|
|
|
BlockDriverState *bs = acb->common.bs;
|
|
|
|
|
|
|
|
if (!acb->is_write) {
|
|
|
|
acb->req.error = bdrv_co_do_readv(bs, acb->req.sector,
|
2013-11-22 16:39:44 +04:00
|
|
|
acb->req.nb_sectors, acb->req.qiov, acb->req.flags);
|
2011-10-13 16:08:23 +04:00
|
|
|
} else {
|
|
|
|
acb->req.error = bdrv_co_do_writev(bs, acb->req.sector,
|
2013-11-22 16:39:44 +04:00
|
|
|
acb->req.nb_sectors, acb->req.qiov, acb->req.flags);
|
2011-10-13 16:08:23 +04:00
|
|
|
}
|
|
|
|
|
2014-05-08 18:34:34 +04:00
|
|
|
acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_co_em_bh, acb);
|
2011-10-13 16:08:23 +04:00
|
|
|
qemu_bh_schedule(acb->bh);
|
|
|
|
}
|
|
|
|
|
2014-10-07 15:59:14 +04:00
|
|
|
static BlockAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
|
|
|
|
int64_t sector_num,
|
|
|
|
QEMUIOVector *qiov,
|
|
|
|
int nb_sectors,
|
|
|
|
BdrvRequestFlags flags,
|
2014-10-07 15:59:15 +04:00
|
|
|
BlockCompletionFunc *cb,
|
2014-10-07 15:59:14 +04:00
|
|
|
void *opaque,
|
|
|
|
bool is_write)
|
2011-06-30 12:05:46 +04:00
|
|
|
{
|
|
|
|
Coroutine *co;
|
2014-10-07 15:59:14 +04:00
|
|
|
BlockAIOCBCoroutine *acb;
|
2011-06-30 12:05:46 +04:00
|
|
|
|
2012-10-31 19:34:37 +04:00
|
|
|
acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
|
2011-06-30 12:05:46 +04:00
|
|
|
acb->req.sector = sector_num;
|
|
|
|
acb->req.nb_sectors = nb_sectors;
|
|
|
|
acb->req.qiov = qiov;
|
2013-11-22 16:39:44 +04:00
|
|
|
acb->req.flags = flags;
|
2011-06-30 12:05:46 +04:00
|
|
|
acb->is_write = is_write;
|
|
|
|
|
2011-10-14 00:09:28 +04:00
|
|
|
co = qemu_coroutine_create(bdrv_co_do_rw);
|
2011-06-30 12:05:46 +04:00
|
|
|
qemu_coroutine_enter(co, acb);
|
|
|
|
|
|
|
|
return &acb->common;
|
|
|
|
}
|
|
|
|
|
2011-10-17 14:32:12 +04:00
|
|
|
static void coroutine_fn bdrv_aio_flush_co_entry(void *opaque)
|
2009-09-04 21:01:49 +04:00
|
|
|
{
|
2014-10-07 15:59:14 +04:00
|
|
|
BlockAIOCBCoroutine *acb = opaque;
|
2011-10-17 14:32:12 +04:00
|
|
|
BlockDriverState *bs = acb->common.bs;
|
2009-09-04 21:01:49 +04:00
|
|
|
|
2011-10-17 14:32:12 +04:00
|
|
|
acb->req.error = bdrv_co_flush(bs);
|
2014-05-08 18:34:34 +04:00
|
|
|
acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_co_em_bh, acb);
|
2009-09-04 21:01:49 +04:00
|
|
|
qemu_bh_schedule(acb->bh);
|
|
|
|
}
|
|
|
|
|
2014-10-07 15:59:14 +04:00
|
|
|
BlockAIOCB *bdrv_aio_flush(BlockDriverState *bs,
|
2014-10-07 15:59:15 +04:00
|
|
|
BlockCompletionFunc *cb, void *opaque)
|
2010-05-26 19:51:49 +04:00
|
|
|
{
|
2011-10-17 14:32:12 +04:00
|
|
|
trace_bdrv_aio_flush(bs, opaque);
|
2010-05-26 19:51:49 +04:00
|
|
|
|
2011-10-17 14:32:12 +04:00
|
|
|
Coroutine *co;
|
2014-10-07 15:59:14 +04:00
|
|
|
BlockAIOCBCoroutine *acb;
|
2010-05-26 19:51:49 +04:00
|
|
|
|
2012-10-31 19:34:37 +04:00
|
|
|
acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
|
2012-11-13 19:35:08 +04:00
|
|
|
|
2011-10-17 14:32:12 +04:00
|
|
|
co = qemu_coroutine_create(bdrv_aio_flush_co_entry);
|
|
|
|
qemu_coroutine_enter(co, acb);
|
2010-05-26 19:51:49 +04:00
|
|
|
|
|
|
|
return &acb->common;
|
|
|
|
}
|
|
|
|
|
2011-10-17 14:32:14 +04:00
|
|
|
static void coroutine_fn bdrv_aio_discard_co_entry(void *opaque)
|
|
|
|
{
|
2014-10-07 15:59:14 +04:00
|
|
|
BlockAIOCBCoroutine *acb = opaque;
|
2011-10-17 14:32:14 +04:00
|
|
|
BlockDriverState *bs = acb->common.bs;
|
|
|
|
|
|
|
|
acb->req.error = bdrv_co_discard(bs, acb->req.sector, acb->req.nb_sectors);
|
2014-05-08 18:34:34 +04:00
|
|
|
acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_co_em_bh, acb);
|
2011-10-17 14:32:14 +04:00
|
|
|
qemu_bh_schedule(acb->bh);
|
|
|
|
}
|
|
|
|
|
2014-10-07 15:59:14 +04:00
|
|
|
BlockAIOCB *bdrv_aio_discard(BlockDriverState *bs,
|
2011-10-17 14:32:14 +04:00
|
|
|
int64_t sector_num, int nb_sectors,
|
2014-10-07 15:59:15 +04:00
|
|
|
BlockCompletionFunc *cb, void *opaque)
|
2011-10-17 14:32:14 +04:00
|
|
|
{
|
|
|
|
Coroutine *co;
|
2014-10-07 15:59:14 +04:00
|
|
|
BlockAIOCBCoroutine *acb;
|
2011-10-17 14:32:14 +04:00
|
|
|
|
|
|
|
trace_bdrv_aio_discard(bs, sector_num, nb_sectors, opaque);
|
|
|
|
|
2012-10-31 19:34:37 +04:00
|
|
|
acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
|
2011-10-17 14:32:14 +04:00
|
|
|
acb->req.sector = sector_num;
|
|
|
|
acb->req.nb_sectors = nb_sectors;
|
|
|
|
co = qemu_coroutine_create(bdrv_aio_discard_co_entry);
|
|
|
|
qemu_coroutine_enter(co, acb);
|
|
|
|
|
|
|
|
return &acb->common;
|
|
|
|
}
|
|
|
|
|
2004-08-02 01:59:26 +04:00
|
|
|
void bdrv_init(void)
|
|
|
|
{
|
2009-05-10 02:03:42 +04:00
|
|
|
module_call_init(MODULE_INIT_BLOCK);
|
2004-08-02 01:59:26 +04:00
|
|
|
}
|
2006-08-07 06:38:06 +04:00
|
|
|
|
2009-10-27 20:41:44 +03:00
|
|
|
void bdrv_init_with_whitelist(void)
|
|
|
|
{
|
|
|
|
use_bdrv_whitelist = 1;
|
|
|
|
bdrv_init();
|
|
|
|
}
|
|
|
|
|
2012-10-31 19:34:37 +04:00
|
|
|
void *qemu_aio_get(const AIOCBInfo *aiocb_info, BlockDriverState *bs,
|
2014-10-07 15:59:15 +04:00
|
|
|
BlockCompletionFunc *cb, void *opaque)
|
2006-08-07 06:38:06 +04:00
|
|
|
{
|
2014-10-07 15:59:14 +04:00
|
|
|
BlockAIOCB *acb;
|
2006-08-07 06:38:06 +04:00
|
|
|
|
2012-10-31 19:34:37 +04:00
|
|
|
acb = g_slice_alloc(aiocb_info->aiocb_size);
|
|
|
|
acb->aiocb_info = aiocb_info;
|
2006-08-07 06:38:06 +04:00
|
|
|
acb->bs = bs;
|
|
|
|
acb->cb = cb;
|
|
|
|
acb->opaque = opaque;
|
2014-09-11 09:41:08 +04:00
|
|
|
acb->refcnt = 1;
|
2006-08-07 06:38:06 +04:00
|
|
|
return acb;
|
|
|
|
}
|
|
|
|
|
2014-09-11 09:41:08 +04:00
|
|
|
void qemu_aio_ref(void *p)
|
|
|
|
{
|
2014-10-07 15:59:14 +04:00
|
|
|
BlockAIOCB *acb = p;
|
2014-09-11 09:41:08 +04:00
|
|
|
acb->refcnt++;
|
|
|
|
}
|
|
|
|
|
2014-09-11 09:41:28 +04:00
|
|
|
void qemu_aio_unref(void *p)
|
2006-08-07 06:38:06 +04:00
|
|
|
{
|
2014-10-07 15:59:14 +04:00
|
|
|
BlockAIOCB *acb = p;
|
2014-09-11 09:41:08 +04:00
|
|
|
assert(acb->refcnt > 0);
|
|
|
|
if (--acb->refcnt == 0) {
|
|
|
|
g_slice_free1(acb->aiocb_info->aiocb_size, acb);
|
|
|
|
}
|
2006-08-07 06:38:06 +04:00
|
|
|
}
|
2006-08-19 15:45:59 +04:00
|
|
|
|
2011-07-15 15:50:26 +04:00
|
|
|
/**************************************************************/
|
|
|
|
/* Coroutine block device emulation */
|
|
|
|
|
|
|
|
typedef struct CoroutineIOCompletion {
|
|
|
|
Coroutine *coroutine;
|
|
|
|
int ret;
|
|
|
|
} CoroutineIOCompletion;
|
|
|
|
|
|
|
|
static void bdrv_co_io_em_complete(void *opaque, int ret)
|
|
|
|
{
|
|
|
|
CoroutineIOCompletion *co = opaque;
|
|
|
|
|
|
|
|
co->ret = ret;
|
|
|
|
qemu_coroutine_enter(co->coroutine, NULL);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num,
|
|
|
|
int nb_sectors, QEMUIOVector *iov,
|
|
|
|
bool is_write)
|
|
|
|
{
|
|
|
|
CoroutineIOCompletion co = {
|
|
|
|
.coroutine = qemu_coroutine_self(),
|
|
|
|
};
|
2014-10-07 15:59:14 +04:00
|
|
|
BlockAIOCB *acb;
|
2011-07-15 15:50:26 +04:00
|
|
|
|
|
|
|
if (is_write) {
|
2011-10-05 20:17:02 +04:00
|
|
|
acb = bs->drv->bdrv_aio_writev(bs, sector_num, iov, nb_sectors,
|
|
|
|
bdrv_co_io_em_complete, &co);
|
2011-07-15 15:50:26 +04:00
|
|
|
} else {
|
2011-10-05 20:17:02 +04:00
|
|
|
acb = bs->drv->bdrv_aio_readv(bs, sector_num, iov, nb_sectors,
|
|
|
|
bdrv_co_io_em_complete, &co);
|
2011-07-15 15:50:26 +04:00
|
|
|
}
|
|
|
|
|
2011-09-30 20:34:58 +04:00
|
|
|
trace_bdrv_co_io_em(bs, sector_num, nb_sectors, is_write, acb);
|
2011-07-15 15:50:26 +04:00
|
|
|
if (!acb) {
|
|
|
|
return -EIO;
|
|
|
|
}
|
|
|
|
qemu_coroutine_yield();
|
|
|
|
|
|
|
|
return co.ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
|
|
|
|
int64_t sector_num, int nb_sectors,
|
|
|
|
QEMUIOVector *iov)
|
|
|
|
{
|
|
|
|
return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, false);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
|
|
|
|
int64_t sector_num, int nb_sectors,
|
|
|
|
QEMUIOVector *iov)
|
|
|
|
{
|
|
|
|
return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, true);
|
|
|
|
}
|
|
|
|
|
2011-10-17 14:32:12 +04:00
|
|
|
static void coroutine_fn bdrv_flush_co_entry(void *opaque)
|
2011-07-15 18:05:00 +04:00
|
|
|
{
|
2011-10-17 14:32:12 +04:00
|
|
|
RwCo *rwco = opaque;
|
|
|
|
|
|
|
|
rwco->ret = bdrv_co_flush(rwco->bs);
|
|
|
|
}
|
|
|
|
|
|
|
|
int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
|
|
|
|
{
|
2011-11-10 21:10:11 +04:00
|
|
|
int ret;
|
|
|
|
|
2012-03-12 21:26:01 +04:00
|
|
|
if (!bs || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
|
2011-10-17 14:32:12 +04:00
|
|
|
return 0;
|
2011-11-10 21:10:11 +04:00
|
|
|
}
|
|
|
|
|
2011-11-10 21:13:59 +04:00
|
|
|
/* Write back cached data to the OS even with cache=unsafe */
|
2013-06-05 17:17:55 +04:00
|
|
|
BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_OS);
|
2011-11-10 21:10:11 +04:00
|
|
|
if (bs->drv->bdrv_co_flush_to_os) {
|
|
|
|
ret = bs->drv->bdrv_co_flush_to_os(bs);
|
|
|
|
if (ret < 0) {
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2011-11-10 21:13:59 +04:00
|
|
|
/* But don't actually force it to the disk with cache=unsafe */
|
|
|
|
if (bs->open_flags & BDRV_O_NO_FLUSH) {
|
2012-08-15 14:52:45 +04:00
|
|
|
goto flush_parent;
|
2011-11-10 21:13:59 +04:00
|
|
|
}
|
|
|
|
|
2013-06-05 17:17:55 +04:00
|
|
|
BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_DISK);
|
2011-11-10 21:10:11 +04:00
|
|
|
if (bs->drv->bdrv_co_flush_to_disk) {
|
2012-03-12 21:26:01 +04:00
|
|
|
ret = bs->drv->bdrv_co_flush_to_disk(bs);
|
2011-10-17 14:32:12 +04:00
|
|
|
} else if (bs->drv->bdrv_aio_flush) {
|
2014-10-07 15:59:14 +04:00
|
|
|
BlockAIOCB *acb;
|
2011-10-17 14:32:12 +04:00
|
|
|
CoroutineIOCompletion co = {
|
|
|
|
.coroutine = qemu_coroutine_self(),
|
|
|
|
};
|
|
|
|
|
|
|
|
acb = bs->drv->bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co);
|
|
|
|
if (acb == NULL) {
|
2012-03-12 21:26:01 +04:00
|
|
|
ret = -EIO;
|
2011-10-17 14:32:12 +04:00
|
|
|
} else {
|
|
|
|
qemu_coroutine_yield();
|
2012-03-12 21:26:01 +04:00
|
|
|
ret = co.ret;
|
2011-10-17 14:32:12 +04:00
|
|
|
}
|
|
|
|
} else {
|
|
|
|
/*
|
|
|
|
* Some block drivers always operate in either writethrough or unsafe
|
|
|
|
* mode and don't support bdrv_flush therefore. Usually qemu doesn't
|
|
|
|
* know how the server works (because the behaviour is hardcoded or
|
|
|
|
* depends on server-side configuration), so we can't ensure that
|
|
|
|
* everything is safe on disk. Returning an error doesn't work because
|
|
|
|
* that would break guests even if the server operates in writethrough
|
|
|
|
* mode.
|
|
|
|
*
|
|
|
|
* Let's hope the user knows what he's doing.
|
|
|
|
*/
|
2012-03-12 21:26:01 +04:00
|
|
|
ret = 0;
|
2011-10-17 14:32:12 +04:00
|
|
|
}
|
2012-03-12 21:26:01 +04:00
|
|
|
if (ret < 0) {
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Now flush the underlying protocol. It will also have BDRV_O_NO_FLUSH
|
|
|
|
* in the case of cache=unsafe, so there are no useless flushes.
|
|
|
|
*/
|
2012-08-15 14:52:45 +04:00
|
|
|
flush_parent:
|
2012-03-12 21:26:01 +04:00
|
|
|
return bdrv_co_flush(bs->file);
|
2011-10-17 14:32:12 +04:00
|
|
|
}
|
|
|
|
|
2014-03-12 18:59:16 +04:00
|
|
|
void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp)
|
2011-11-15 01:09:45 +04:00
|
|
|
{
|
2014-03-12 18:59:16 +04:00
|
|
|
Error *local_err = NULL;
|
|
|
|
int ret;
|
|
|
|
|
2014-03-11 13:58:39 +04:00
|
|
|
if (!bs->drv) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2014-10-09 06:50:46 +04:00
|
|
|
if (!(bs->open_flags & BDRV_O_INCOMING)) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
bs->open_flags &= ~BDRV_O_INCOMING;
|
|
|
|
|
2014-03-11 13:58:39 +04:00
|
|
|
if (bs->drv->bdrv_invalidate_cache) {
|
2014-03-12 18:59:16 +04:00
|
|
|
bs->drv->bdrv_invalidate_cache(bs, &local_err);
|
2014-03-11 13:58:39 +04:00
|
|
|
} else if (bs->file) {
|
2014-03-12 18:59:16 +04:00
|
|
|
bdrv_invalidate_cache(bs->file, &local_err);
|
|
|
|
}
|
|
|
|
if (local_err) {
|
|
|
|
error_propagate(errp, local_err);
|
|
|
|
return;
|
2011-11-15 01:09:45 +04:00
|
|
|
}
|
2014-03-11 13:58:39 +04:00
|
|
|
|
2014-03-12 18:59:16 +04:00
|
|
|
ret = refresh_total_sectors(bs, bs->total_sectors);
|
|
|
|
if (ret < 0) {
|
|
|
|
error_setg_errno(errp, -ret, "Could not refresh total sector count");
|
|
|
|
return;
|
|
|
|
}
|
2011-11-15 01:09:45 +04:00
|
|
|
}
|
|
|
|
|
2014-03-12 18:59:16 +04:00
|
|
|
void bdrv_invalidate_cache_all(Error **errp)
|
2011-11-15 01:09:45 +04:00
|
|
|
{
|
|
|
|
BlockDriverState *bs;
|
2014-03-12 18:59:16 +04:00
|
|
|
Error *local_err = NULL;
|
2011-11-15 01:09:45 +04:00
|
|
|
|
2014-01-24 00:31:32 +04:00
|
|
|
QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
|
2014-05-08 18:34:35 +04:00
|
|
|
AioContext *aio_context = bdrv_get_aio_context(bs);
|
|
|
|
|
|
|
|
aio_context_acquire(aio_context);
|
2014-03-12 18:59:16 +04:00
|
|
|
bdrv_invalidate_cache(bs, &local_err);
|
2014-05-08 18:34:35 +04:00
|
|
|
aio_context_release(aio_context);
|
2014-03-12 18:59:16 +04:00
|
|
|
if (local_err) {
|
|
|
|
error_propagate(errp, local_err);
|
|
|
|
return;
|
|
|
|
}
|
2011-11-15 01:09:45 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2011-10-17 14:32:12 +04:00
|
|
|
int bdrv_flush(BlockDriverState *bs)
|
|
|
|
{
|
|
|
|
Coroutine *co;
|
|
|
|
RwCo rwco = {
|
|
|
|
.bs = bs,
|
|
|
|
.ret = NOT_DONE,
|
2011-07-15 18:05:00 +04:00
|
|
|
};
|
|
|
|
|
2011-10-17 14:32:12 +04:00
|
|
|
if (qemu_in_coroutine()) {
|
|
|
|
/* Fast-path if already in coroutine context */
|
|
|
|
bdrv_flush_co_entry(&rwco);
|
|
|
|
} else {
|
2014-05-08 18:34:34 +04:00
|
|
|
AioContext *aio_context = bdrv_get_aio_context(bs);
|
|
|
|
|
2011-10-17 14:32:12 +04:00
|
|
|
co = qemu_coroutine_create(bdrv_flush_co_entry);
|
|
|
|
qemu_coroutine_enter(co, &rwco);
|
|
|
|
while (rwco.ret == NOT_DONE) {
|
2014-05-08 18:34:34 +04:00
|
|
|
aio_poll(aio_context, true);
|
2011-10-17 14:32:12 +04:00
|
|
|
}
|
2011-07-15 18:05:00 +04:00
|
|
|
}
|
2011-10-17 14:32:12 +04:00
|
|
|
|
|
|
|
return rwco.ret;
|
2011-07-15 18:05:00 +04:00
|
|
|
}
|
|
|
|
|
2013-12-05 15:09:38 +04:00
|
|
|
typedef struct DiscardCo {
|
|
|
|
BlockDriverState *bs;
|
|
|
|
int64_t sector_num;
|
|
|
|
int nb_sectors;
|
|
|
|
int ret;
|
|
|
|
} DiscardCo;
|
2011-10-17 14:32:14 +04:00
|
|
|
static void coroutine_fn bdrv_discard_co_entry(void *opaque)
|
|
|
|
{
|
2013-12-05 15:09:38 +04:00
|
|
|
DiscardCo *rwco = opaque;
|
2011-10-17 14:32:14 +04:00
|
|
|
|
|
|
|
rwco->ret = bdrv_co_discard(rwco->bs, rwco->sector_num, rwco->nb_sectors);
|
|
|
|
}
|
|
|
|
|
|
|
|
int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
|
|
|
|
int nb_sectors)
|
|
|
|
{
|
2015-02-05 21:58:25 +03:00
|
|
|
int max_discard, ret;
|
2013-11-22 16:39:43 +04:00
|
|
|
|
2011-10-17 14:32:14 +04:00
|
|
|
if (!bs->drv) {
|
|
|
|
return -ENOMEDIUM;
|
2015-02-05 21:58:25 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
ret = bdrv_check_request(bs, sector_num, nb_sectors);
|
|
|
|
if (ret < 0) {
|
|
|
|
return ret;
|
2011-10-17 14:32:14 +04:00
|
|
|
} else if (bs->read_only) {
|
|
|
|
return -EROFS;
|
2013-01-14 19:26:58 +04:00
|
|
|
}
|
|
|
|
|
2013-11-13 14:29:43 +04:00
|
|
|
bdrv_reset_dirty(bs, sector_num, nb_sectors);
|
2013-01-14 19:26:58 +04:00
|
|
|
|
2013-02-08 17:06:11 +04:00
|
|
|
/* Do nothing if disabled. */
|
|
|
|
if (!(bs->open_flags & BDRV_O_UNMAP)) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2013-11-22 16:39:43 +04:00
|
|
|
if (!bs->drv->bdrv_co_discard && !bs->drv->bdrv_aio_discard) {
|
|
|
|
return 0;
|
|
|
|
}
|
2013-10-24 14:06:59 +04:00
|
|
|
|
2015-02-06 13:54:11 +03:00
|
|
|
max_discard = MIN_NON_ZERO(bs->bl.max_discard, BDRV_REQUEST_MAX_SECTORS);
|
2013-11-22 16:39:43 +04:00
|
|
|
while (nb_sectors > 0) {
|
|
|
|
int ret;
|
|
|
|
int num = nb_sectors;
|
2013-10-24 14:06:59 +04:00
|
|
|
|
2013-11-22 16:39:43 +04:00
|
|
|
/* align request */
|
|
|
|
if (bs->bl.discard_alignment &&
|
|
|
|
num >= bs->bl.discard_alignment &&
|
|
|
|
sector_num % bs->bl.discard_alignment) {
|
|
|
|
if (num > bs->bl.discard_alignment) {
|
|
|
|
num = bs->bl.discard_alignment;
|
2013-10-24 14:06:59 +04:00
|
|
|
}
|
2013-11-22 16:39:43 +04:00
|
|
|
num -= sector_num % bs->bl.discard_alignment;
|
|
|
|
}
|
2013-10-24 14:06:59 +04:00
|
|
|
|
2013-11-22 16:39:43 +04:00
|
|
|
/* limit request size */
|
|
|
|
if (num > max_discard) {
|
|
|
|
num = max_discard;
|
|
|
|
}
|
2013-10-24 14:06:59 +04:00
|
|
|
|
2013-11-22 16:39:43 +04:00
|
|
|
if (bs->drv->bdrv_co_discard) {
|
2013-10-24 14:06:59 +04:00
|
|
|
ret = bs->drv->bdrv_co_discard(bs, sector_num, num);
|
2013-11-22 16:39:43 +04:00
|
|
|
} else {
|
2014-10-07 15:59:14 +04:00
|
|
|
BlockAIOCB *acb;
|
2013-11-22 16:39:43 +04:00
|
|
|
CoroutineIOCompletion co = {
|
|
|
|
.coroutine = qemu_coroutine_self(),
|
|
|
|
};
|
|
|
|
|
|
|
|
acb = bs->drv->bdrv_aio_discard(bs, sector_num, nb_sectors,
|
|
|
|
bdrv_co_io_em_complete, &co);
|
|
|
|
if (acb == NULL) {
|
|
|
|
return -EIO;
|
|
|
|
} else {
|
|
|
|
qemu_coroutine_yield();
|
|
|
|
ret = co.ret;
|
2013-10-24 14:06:59 +04:00
|
|
|
}
|
|
|
|
}
|
2013-11-22 16:39:47 +04:00
|
|
|
if (ret && ret != -ENOTSUP) {
|
2013-11-22 16:39:43 +04:00
|
|
|
return ret;
|
2011-10-17 14:32:14 +04:00
|
|
|
}
|
2013-11-22 16:39:43 +04:00
|
|
|
|
|
|
|
sector_num += num;
|
|
|
|
nb_sectors -= num;
|
2011-10-17 14:32:14 +04:00
|
|
|
}
|
2013-11-22 16:39:43 +04:00
|
|
|
return 0;
|
2011-10-17 14:32:14 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
|
|
|
|
{
|
|
|
|
Coroutine *co;
|
2013-12-05 15:09:38 +04:00
|
|
|
DiscardCo rwco = {
|
2011-10-17 14:32:14 +04:00
|
|
|
.bs = bs,
|
|
|
|
.sector_num = sector_num,
|
|
|
|
.nb_sectors = nb_sectors,
|
|
|
|
.ret = NOT_DONE,
|
|
|
|
};
|
|
|
|
|
|
|
|
if (qemu_in_coroutine()) {
|
|
|
|
/* Fast-path if already in coroutine context */
|
|
|
|
bdrv_discard_co_entry(&rwco);
|
|
|
|
} else {
|
2014-05-08 18:34:34 +04:00
|
|
|
AioContext *aio_context = bdrv_get_aio_context(bs);
|
|
|
|
|
2011-10-17 14:32:14 +04:00
|
|
|
co = qemu_coroutine_create(bdrv_discard_co_entry);
|
|
|
|
qemu_coroutine_enter(co, &rwco);
|
|
|
|
while (rwco.ret == NOT_DONE) {
|
2014-05-08 18:34:34 +04:00
|
|
|
aio_poll(aio_context, true);
|
2011-10-17 14:32:14 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return rwco.ret;
|
|
|
|
}
|
|
|
|
|
2006-08-19 15:45:59 +04:00
|
|
|
/**************************************************************/
|
|
|
|
/* removable device support */
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Return TRUE if the media is present
|
|
|
|
*/
|
|
|
|
int bdrv_is_inserted(BlockDriverState *bs)
|
|
|
|
{
|
|
|
|
BlockDriver *drv = bs->drv;
|
2011-09-06 20:58:41 +04:00
|
|
|
|
2006-08-19 15:45:59 +04:00
|
|
|
if (!drv)
|
|
|
|
return 0;
|
|
|
|
if (!drv->bdrv_is_inserted)
|
2011-09-06 20:58:41 +04:00
|
|
|
return 1;
|
|
|
|
return drv->bdrv_is_inserted(bs);
|
2006-08-19 15:45:59 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
2011-08-03 17:08:08 +04:00
|
|
|
* Return whether the media changed since the last call to this
|
|
|
|
* function, or -ENOTSUP if we don't know. Most drivers don't know.
|
2006-08-19 15:45:59 +04:00
|
|
|
*/
|
|
|
|
int bdrv_media_changed(BlockDriverState *bs)
|
|
|
|
{
|
|
|
|
BlockDriver *drv = bs->drv;
|
|
|
|
|
2011-08-03 17:08:08 +04:00
|
|
|
if (drv && drv->bdrv_media_changed) {
|
|
|
|
return drv->bdrv_media_changed(bs);
|
|
|
|
}
|
|
|
|
return -ENOTSUP;
|
2006-08-19 15:45:59 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* If eject_flag is TRUE, eject the media. Otherwise, close the tray
|
|
|
|
*/
|
2012-02-03 22:24:53 +04:00
|
|
|
void bdrv_eject(BlockDriverState *bs, bool eject_flag)
|
2006-08-19 15:45:59 +04:00
|
|
|
{
|
|
|
|
BlockDriver *drv = bs->drv;
|
2014-10-07 15:59:11 +04:00
|
|
|
const char *device_name;
|
2006-08-19 15:45:59 +04:00
|
|
|
|
2011-07-20 20:23:42 +04:00
|
|
|
if (drv && drv->bdrv_eject) {
|
|
|
|
drv->bdrv_eject(bs, eject_flag);
|
2006-08-19 15:45:59 +04:00
|
|
|
}
|
2012-02-14 19:41:13 +04:00
|
|
|
|
2014-10-07 15:59:11 +04:00
|
|
|
device_name = bdrv_get_device_name(bs);
|
|
|
|
if (device_name[0] != '\0') {
|
|
|
|
qapi_event_send_device_tray_moved(device_name,
|
2014-06-18 10:43:44 +04:00
|
|
|
eject_flag, &error_abort);
|
2012-02-14 19:41:13 +04:00
|
|
|
}
|
2006-08-19 15:45:59 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Lock or unlock the media (if it is locked, the user won't be able
|
|
|
|
* to eject it manually).
|
|
|
|
*/
|
2011-09-06 20:58:47 +04:00
|
|
|
void bdrv_lock_medium(BlockDriverState *bs, bool locked)
|
2006-08-19 15:45:59 +04:00
|
|
|
{
|
|
|
|
BlockDriver *drv = bs->drv;
|
|
|
|
|
2011-09-06 20:58:47 +04:00
|
|
|
trace_bdrv_lock_medium(bs, locked);
|
2011-03-29 23:04:40 +04:00
|
|
|
|
2011-09-06 20:58:47 +04:00
|
|
|
if (drv && drv->bdrv_lock_medium) {
|
|
|
|
drv->bdrv_lock_medium(bs, locked);
|
2006-08-19 15:45:59 +04:00
|
|
|
}
|
|
|
|
}
|
2007-12-24 19:10:43 +03:00
|
|
|
|
|
|
|
/* needed for generic scsi interface */
|
|
|
|
|
|
|
|
int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
|
|
|
|
{
|
|
|
|
BlockDriver *drv = bs->drv;
|
|
|
|
|
|
|
|
if (drv && drv->bdrv_ioctl)
|
|
|
|
return drv->bdrv_ioctl(bs, req, buf);
|
|
|
|
return -ENOTSUP;
|
|
|
|
}
|
2009-03-12 22:57:08 +03:00
|
|
|
|
2014-10-07 15:59:14 +04:00
|
|
|
BlockAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
|
2009-03-28 20:28:41 +03:00
|
|
|
unsigned long int req, void *buf,
|
2014-10-07 15:59:15 +04:00
|
|
|
BlockCompletionFunc *cb, void *opaque)
|
2009-03-12 22:57:08 +03:00
|
|
|
{
|
2009-03-28 20:28:41 +03:00
|
|
|
BlockDriver *drv = bs->drv;
|
2009-03-12 22:57:08 +03:00
|
|
|
|
2009-03-28 20:28:41 +03:00
|
|
|
if (drv && drv->bdrv_aio_ioctl)
|
|
|
|
return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
|
|
|
|
return NULL;
|
2009-03-12 22:57:08 +03:00
|
|
|
}
|
2009-04-23 00:20:00 +04:00
|
|
|
|
2011-11-29 14:35:47 +04:00
|
|
|
void bdrv_set_guest_block_size(BlockDriverState *bs, int align)
|
2011-09-06 20:58:56 +04:00
|
|
|
{
|
2011-11-29 14:35:47 +04:00
|
|
|
bs->guest_block_size = align;
|
2011-09-06 20:58:56 +04:00
|
|
|
}
|
2009-11-02 16:40:41 +03:00
|
|
|
|
2009-04-23 00:20:00 +04:00
|
|
|
void *qemu_blockalign(BlockDriverState *bs, size_t size)
|
|
|
|
{
|
2013-11-28 13:23:32 +04:00
|
|
|
return qemu_memalign(bdrv_opt_mem_align(bs), size);
|
2009-04-23 00:20:00 +04:00
|
|
|
}
|
2009-11-02 16:40:41 +03:00
|
|
|
|
2014-10-22 16:09:27 +04:00
|
|
|
void *qemu_blockalign0(BlockDriverState *bs, size_t size)
|
|
|
|
{
|
|
|
|
return memset(qemu_blockalign(bs, size), 0, size);
|
|
|
|
}
|
|
|
|
|
2014-05-20 14:24:05 +04:00
|
|
|
void *qemu_try_blockalign(BlockDriverState *bs, size_t size)
|
|
|
|
{
|
|
|
|
size_t align = bdrv_opt_mem_align(bs);
|
|
|
|
|
|
|
|
/* Ensure that NULL is never returned on success */
|
|
|
|
assert(align > 0);
|
|
|
|
if (size == 0) {
|
|
|
|
size = align;
|
|
|
|
}
|
|
|
|
|
|
|
|
return qemu_try_memalign(align, size);
|
|
|
|
}
|
|
|
|
|
2014-10-22 16:09:27 +04:00
|
|
|
void *qemu_try_blockalign0(BlockDriverState *bs, size_t size)
|
|
|
|
{
|
|
|
|
void *mem = qemu_try_blockalign(bs, size);
|
|
|
|
|
|
|
|
if (mem) {
|
|
|
|
memset(mem, 0, size);
|
|
|
|
}
|
|
|
|
|
|
|
|
return mem;
|
|
|
|
}
|
|
|
|
|
2013-01-11 19:41:27 +04:00
|
|
|
/*
|
|
|
|
* Check if all memory in this vector is sector aligned.
|
|
|
|
*/
|
|
|
|
bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov)
|
|
|
|
{
|
|
|
|
int i;
|
2013-11-28 13:23:32 +04:00
|
|
|
size_t alignment = bdrv_opt_mem_align(bs);
|
2013-01-11 19:41:27 +04:00
|
|
|
|
|
|
|
for (i = 0; i < qiov->niov; i++) {
|
2013-11-28 13:23:32 +04:00
|
|
|
if ((uintptr_t) qiov->iov[i].iov_base % alignment) {
|
2013-01-11 19:41:27 +04:00
|
|
|
return false;
|
2013-12-05 16:01:46 +04:00
|
|
|
}
|
2013-11-28 13:23:32 +04:00
|
|
|
if (qiov->iov[i].iov_len % alignment) {
|
2013-12-05 16:01:46 +04:00
|
|
|
return false;
|
2013-01-11 19:41:27 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2014-04-16 05:34:30 +04:00
|
|
|
BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs, int granularity,
|
|
|
|
Error **errp)
|
2009-11-02 16:40:41 +03:00
|
|
|
{
|
|
|
|
int64_t bitmap_size;
|
2013-11-13 14:29:43 +04:00
|
|
|
BdrvDirtyBitmap *bitmap;
|
2009-11-30 20:21:19 +03:00
|
|
|
|
2013-01-21 20:09:45 +04:00
|
|
|
assert((granularity & (granularity - 1)) == 0);
|
|
|
|
|
2013-11-13 14:29:43 +04:00
|
|
|
granularity >>= BDRV_SECTOR_BITS;
|
|
|
|
assert(granularity);
|
2014-06-26 15:23:22 +04:00
|
|
|
bitmap_size = bdrv_nb_sectors(bs);
|
2014-04-16 05:34:30 +04:00
|
|
|
if (bitmap_size < 0) {
|
|
|
|
error_setg_errno(errp, -bitmap_size, "could not get length of device");
|
|
|
|
errno = -bitmap_size;
|
|
|
|
return NULL;
|
|
|
|
}
|
block: Use g_new() & friends where that makes obvious sense
g_new(T, n) is neater than g_malloc(sizeof(T) * n). It's also safer,
for two reasons. One, it catches multiplication overflowing size_t.
Two, it returns T * rather than void *, which lets the compiler catch
more type errors.
Patch created with Coccinelle, with two manual changes on top:
* Add const to bdrv_iterate_format() to keep the types straight
* Convert the allocation in bdrv_drop_intermediate(), which Coccinelle
inexplicably misses
Coccinelle semantic patch:
@@
type T;
@@
-g_malloc(sizeof(T))
+g_new(T, 1)
@@
type T;
@@
-g_try_malloc(sizeof(T))
+g_try_new(T, 1)
@@
type T;
@@
-g_malloc0(sizeof(T))
+g_new0(T, 1)
@@
type T;
@@
-g_try_malloc0(sizeof(T))
+g_try_new0(T, 1)
@@
type T;
expression n;
@@
-g_malloc(sizeof(T) * (n))
+g_new(T, n)
@@
type T;
expression n;
@@
-g_try_malloc(sizeof(T) * (n))
+g_try_new(T, n)
@@
type T;
expression n;
@@
-g_malloc0(sizeof(T) * (n))
+g_new0(T, n)
@@
type T;
expression n;
@@
-g_try_malloc0(sizeof(T) * (n))
+g_try_new0(T, n)
@@
type T;
expression p, n;
@@
-g_realloc(p, sizeof(T) * (n))
+g_renew(T, p, n)
@@
type T;
expression p, n;
@@
-g_try_realloc(p, sizeof(T) * (n))
+g_try_renew(T, p, n)
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: Jeff Cody <jcody@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2014-08-19 12:31:08 +04:00
|
|
|
bitmap = g_new0(BdrvDirtyBitmap, 1);
|
2013-11-13 14:29:43 +04:00
|
|
|
bitmap->bitmap = hbitmap_alloc(bitmap_size, ffs(granularity) - 1);
|
|
|
|
QLIST_INSERT_HEAD(&bs->dirty_bitmaps, bitmap, list);
|
|
|
|
return bitmap;
|
|
|
|
}
|
|
|
|
|
|
|
|
void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
|
|
|
|
{
|
|
|
|
BdrvDirtyBitmap *bm, *next;
|
|
|
|
QLIST_FOREACH_SAFE(bm, &bs->dirty_bitmaps, list, next) {
|
|
|
|
if (bm == bitmap) {
|
|
|
|
QLIST_REMOVE(bitmap, list);
|
|
|
|
hbitmap_free(bitmap->bitmap);
|
|
|
|
g_free(bitmap);
|
|
|
|
return;
|
2009-11-30 20:21:19 +03:00
|
|
|
}
|
2009-11-02 16:40:41 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-11-13 14:29:44 +04:00
|
|
|
BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs)
|
|
|
|
{
|
|
|
|
BdrvDirtyBitmap *bm;
|
|
|
|
BlockDirtyInfoList *list = NULL;
|
|
|
|
BlockDirtyInfoList **plist = &list;
|
|
|
|
|
|
|
|
QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
|
block: Use g_new() & friends where that makes obvious sense
g_new(T, n) is neater than g_malloc(sizeof(T) * n). It's also safer,
for two reasons. One, it catches multiplication overflowing size_t.
Two, it returns T * rather than void *, which lets the compiler catch
more type errors.
Patch created with Coccinelle, with two manual changes on top:
* Add const to bdrv_iterate_format() to keep the types straight
* Convert the allocation in bdrv_drop_intermediate(), which Coccinelle
inexplicably misses
Coccinelle semantic patch:
@@
type T;
@@
-g_malloc(sizeof(T))
+g_new(T, 1)
@@
type T;
@@
-g_try_malloc(sizeof(T))
+g_try_new(T, 1)
@@
type T;
@@
-g_malloc0(sizeof(T))
+g_new0(T, 1)
@@
type T;
@@
-g_try_malloc0(sizeof(T))
+g_try_new0(T, 1)
@@
type T;
expression n;
@@
-g_malloc(sizeof(T) * (n))
+g_new(T, n)
@@
type T;
expression n;
@@
-g_try_malloc(sizeof(T) * (n))
+g_try_new(T, n)
@@
type T;
expression n;
@@
-g_malloc0(sizeof(T) * (n))
+g_new0(T, n)
@@
type T;
expression n;
@@
-g_try_malloc0(sizeof(T) * (n))
+g_try_new0(T, n)
@@
type T;
expression p, n;
@@
-g_realloc(p, sizeof(T) * (n))
+g_renew(T, p, n)
@@
type T;
expression p, n;
@@
-g_try_realloc(p, sizeof(T) * (n))
+g_try_renew(T, p, n)
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: Jeff Cody <jcody@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2014-08-19 12:31:08 +04:00
|
|
|
BlockDirtyInfo *info = g_new0(BlockDirtyInfo, 1);
|
|
|
|
BlockDirtyInfoList *entry = g_new0(BlockDirtyInfoList, 1);
|
2013-11-13 14:29:44 +04:00
|
|
|
info->count = bdrv_get_dirty_count(bs, bm);
|
|
|
|
info->granularity =
|
|
|
|
((int64_t) BDRV_SECTOR_SIZE << hbitmap_granularity(bm->bitmap));
|
|
|
|
entry->value = info;
|
|
|
|
*plist = entry;
|
|
|
|
plist = &entry->next;
|
|
|
|
}
|
|
|
|
|
|
|
|
return list;
|
|
|
|
}
|
|
|
|
|
2013-11-13 14:29:43 +04:00
|
|
|
int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, int64_t sector)
|
2009-11-02 16:40:41 +03:00
|
|
|
{
|
2013-11-13 14:29:43 +04:00
|
|
|
if (bitmap) {
|
|
|
|
return hbitmap_get(bitmap->bitmap, sector);
|
2009-11-02 16:40:41 +03:00
|
|
|
} else {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-11-13 14:29:43 +04:00
|
|
|
void bdrv_dirty_iter_init(BlockDriverState *bs,
|
|
|
|
BdrvDirtyBitmap *bitmap, HBitmapIter *hbi)
|
2012-10-18 18:49:18 +04:00
|
|
|
{
|
2013-11-13 14:29:43 +04:00
|
|
|
hbitmap_iter_init(hbi, bitmap->bitmap, 0);
|
2012-10-18 18:49:18 +04:00
|
|
|
}
|
|
|
|
|
2014-11-27 12:40:46 +03:00
|
|
|
void bdrv_set_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap,
|
|
|
|
int64_t cur_sector, int nr_sectors)
|
|
|
|
{
|
|
|
|
hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
|
|
|
|
}
|
|
|
|
|
|
|
|
void bdrv_reset_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap,
|
|
|
|
int64_t cur_sector, int nr_sectors)
|
|
|
|
{
|
|
|
|
hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
|
|
|
|
int nr_sectors)
|
2012-10-18 18:49:18 +04:00
|
|
|
{
|
2013-11-13 14:29:43 +04:00
|
|
|
BdrvDirtyBitmap *bitmap;
|
|
|
|
QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
|
|
|
|
hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
|
|
|
|
}
|
2012-10-18 18:49:18 +04:00
|
|
|
}
|
|
|
|
|
2014-11-27 12:40:46 +03:00
|
|
|
static void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
|
|
|
|
int nr_sectors)
|
2009-11-02 16:40:41 +03:00
|
|
|
{
|
2013-11-13 14:29:43 +04:00
|
|
|
BdrvDirtyBitmap *bitmap;
|
|
|
|
QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
|
|
|
|
hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
|
|
|
|
}
|
2009-11-02 16:40:41 +03:00
|
|
|
}
|
2010-01-26 11:31:48 +03:00
|
|
|
|
2013-11-13 14:29:43 +04:00
|
|
|
int64_t bdrv_get_dirty_count(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
|
2010-01-26 11:31:48 +03:00
|
|
|
{
|
2013-11-13 14:29:43 +04:00
|
|
|
return hbitmap_count(bitmap->bitmap);
|
2010-01-26 11:31:48 +03:00
|
|
|
}
|
2010-12-16 15:52:15 +03:00
|
|
|
|
2013-08-23 05:14:46 +04:00
|
|
|
/* Get a reference to bs */
|
|
|
|
void bdrv_ref(BlockDriverState *bs)
|
|
|
|
{
|
|
|
|
bs->refcnt++;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Release a previously grabbed reference to bs.
|
|
|
|
* If after releasing, reference count is zero, the BlockDriverState is
|
|
|
|
* deleted. */
|
|
|
|
void bdrv_unref(BlockDriverState *bs)
|
|
|
|
{
|
2014-07-24 01:22:57 +04:00
|
|
|
if (!bs) {
|
|
|
|
return;
|
|
|
|
}
|
2013-08-23 05:14:46 +04:00
|
|
|
assert(bs->refcnt > 0);
|
|
|
|
if (--bs->refcnt == 0) {
|
|
|
|
bdrv_delete(bs);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-05-23 17:29:42 +04:00
|
|
|
struct BdrvOpBlocker {
|
|
|
|
Error *reason;
|
|
|
|
QLIST_ENTRY(BdrvOpBlocker) list;
|
|
|
|
};
|
|
|
|
|
|
|
|
bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp)
|
|
|
|
{
|
|
|
|
BdrvOpBlocker *blocker;
|
|
|
|
assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
|
|
|
|
if (!QLIST_EMPTY(&bs->op_blockers[op])) {
|
|
|
|
blocker = QLIST_FIRST(&bs->op_blockers[op]);
|
|
|
|
if (errp) {
|
|
|
|
error_setg(errp, "Device '%s' is busy: %s",
|
2014-10-07 15:59:11 +04:00
|
|
|
bdrv_get_device_name(bs),
|
|
|
|
error_get_pretty(blocker->reason));
|
2014-05-23 17:29:42 +04:00
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason)
|
|
|
|
{
|
|
|
|
BdrvOpBlocker *blocker;
|
|
|
|
assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
|
|
|
|
|
block: Use g_new() & friends where that makes obvious sense
g_new(T, n) is neater than g_malloc(sizeof(T) * n). It's also safer,
for two reasons. One, it catches multiplication overflowing size_t.
Two, it returns T * rather than void *, which lets the compiler catch
more type errors.
Patch created with Coccinelle, with two manual changes on top:
* Add const to bdrv_iterate_format() to keep the types straight
* Convert the allocation in bdrv_drop_intermediate(), which Coccinelle
inexplicably misses
Coccinelle semantic patch:
@@
type T;
@@
-g_malloc(sizeof(T))
+g_new(T, 1)
@@
type T;
@@
-g_try_malloc(sizeof(T))
+g_try_new(T, 1)
@@
type T;
@@
-g_malloc0(sizeof(T))
+g_new0(T, 1)
@@
type T;
@@
-g_try_malloc0(sizeof(T))
+g_try_new0(T, 1)
@@
type T;
expression n;
@@
-g_malloc(sizeof(T) * (n))
+g_new(T, n)
@@
type T;
expression n;
@@
-g_try_malloc(sizeof(T) * (n))
+g_try_new(T, n)
@@
type T;
expression n;
@@
-g_malloc0(sizeof(T) * (n))
+g_new0(T, n)
@@
type T;
expression n;
@@
-g_try_malloc0(sizeof(T) * (n))
+g_try_new0(T, n)
@@
type T;
expression p, n;
@@
-g_realloc(p, sizeof(T) * (n))
+g_renew(T, p, n)
@@
type T;
expression p, n;
@@
-g_try_realloc(p, sizeof(T) * (n))
+g_try_renew(T, p, n)
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: Jeff Cody <jcody@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2014-08-19 12:31:08 +04:00
|
|
|
blocker = g_new0(BdrvOpBlocker, 1);
|
2014-05-23 17:29:42 +04:00
|
|
|
blocker->reason = reason;
|
|
|
|
QLIST_INSERT_HEAD(&bs->op_blockers[op], blocker, list);
|
|
|
|
}
|
|
|
|
|
|
|
|
void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason)
|
|
|
|
{
|
|
|
|
BdrvOpBlocker *blocker, *next;
|
|
|
|
assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
|
|
|
|
QLIST_FOREACH_SAFE(blocker, &bs->op_blockers[op], list, next) {
|
|
|
|
if (blocker->reason == reason) {
|
|
|
|
QLIST_REMOVE(blocker, list);
|
|
|
|
g_free(blocker);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void bdrv_op_block_all(BlockDriverState *bs, Error *reason)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
|
|
|
|
bdrv_op_block(bs, i, reason);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
|
|
|
|
bdrv_op_unblock(bs, i, reason);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
bool bdrv_op_blocker_is_empty(BlockDriverState *bs)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
|
|
|
for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
|
|
|
|
if (!QLIST_EMPTY(&bs->op_blockers[i])) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2011-09-27 00:43:50 +04:00
|
|
|
void bdrv_iostatus_enable(BlockDriverState *bs)
|
|
|
|
{
|
2011-10-15 00:11:23 +04:00
|
|
|
bs->iostatus_enabled = true;
|
2011-10-15 00:22:24 +04:00
|
|
|
bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
|
2011-09-27 00:43:50 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
/* The I/O status is only enabled if the drive explicitly
|
|
|
|
* enables it _and_ the VM is configured to stop on errors */
|
|
|
|
bool bdrv_iostatus_is_enabled(const BlockDriverState *bs)
|
|
|
|
{
|
2011-10-15 00:11:23 +04:00
|
|
|
return (bs->iostatus_enabled &&
|
2012-09-28 19:22:55 +04:00
|
|
|
(bs->on_write_error == BLOCKDEV_ON_ERROR_ENOSPC ||
|
|
|
|
bs->on_write_error == BLOCKDEV_ON_ERROR_STOP ||
|
|
|
|
bs->on_read_error == BLOCKDEV_ON_ERROR_STOP));
|
2011-09-27 00:43:50 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
void bdrv_iostatus_disable(BlockDriverState *bs)
|
|
|
|
{
|
2011-10-15 00:11:23 +04:00
|
|
|
bs->iostatus_enabled = false;
|
2011-09-27 00:43:50 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
void bdrv_iostatus_reset(BlockDriverState *bs)
|
|
|
|
{
|
|
|
|
if (bdrv_iostatus_is_enabled(bs)) {
|
2011-10-15 00:22:24 +04:00
|
|
|
bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
|
2012-10-18 18:49:27 +04:00
|
|
|
if (bs->job) {
|
|
|
|
block_job_iostatus_reset(bs->job);
|
|
|
|
}
|
2011-09-27 00:43:50 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void bdrv_iostatus_set_err(BlockDriverState *bs, int error)
|
|
|
|
{
|
2012-09-28 19:22:57 +04:00
|
|
|
assert(bdrv_iostatus_is_enabled(bs));
|
|
|
|
if (bs->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
|
2011-10-15 00:22:24 +04:00
|
|
|
bs->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
|
|
|
|
BLOCK_DEVICE_IO_STATUS_FAILED;
|
2011-09-27 00:43:50 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2012-11-30 16:52:09 +04:00
|
|
|
void bdrv_img_create(const char *filename, const char *fmt,
|
|
|
|
const char *base_filename, const char *base_fmt,
|
2013-02-13 12:09:40 +04:00
|
|
|
char *options, uint64_t img_size, int flags,
|
|
|
|
Error **errp, bool quiet)
|
2010-12-16 15:52:15 +03:00
|
|
|
{
|
2014-06-05 13:20:51 +04:00
|
|
|
QemuOptsList *create_opts = NULL;
|
|
|
|
QemuOpts *opts = NULL;
|
|
|
|
const char *backing_fmt, *backing_file;
|
|
|
|
int64_t size;
|
2010-12-16 15:52:15 +03:00
|
|
|
BlockDriver *drv, *proto_drv;
|
2011-01-24 12:32:20 +03:00
|
|
|
BlockDriver *backing_drv = NULL;
|
2013-09-06 19:14:26 +04:00
|
|
|
Error *local_err = NULL;
|
2010-12-16 15:52:15 +03:00
|
|
|
int ret = 0;
|
|
|
|
|
|
|
|
/* Find driver and parse its options */
|
|
|
|
drv = bdrv_find_format(fmt);
|
|
|
|
if (!drv) {
|
2012-11-30 16:52:04 +04:00
|
|
|
error_setg(errp, "Unknown file format '%s'", fmt);
|
2012-11-30 16:52:09 +04:00
|
|
|
return;
|
2010-12-16 15:52:15 +03:00
|
|
|
}
|
|
|
|
|
2015-02-05 21:58:12 +03:00
|
|
|
proto_drv = bdrv_find_protocol(filename, true, errp);
|
2010-12-16 15:52:15 +03:00
|
|
|
if (!proto_drv) {
|
2012-11-30 16:52:09 +04:00
|
|
|
return;
|
2010-12-16 15:52:15 +03:00
|
|
|
}
|
|
|
|
|
2014-12-02 20:32:45 +03:00
|
|
|
if (!drv->create_opts) {
|
|
|
|
error_setg(errp, "Format driver '%s' does not support image creation",
|
|
|
|
drv->format_name);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!proto_drv->create_opts) {
|
|
|
|
error_setg(errp, "Protocol driver '%s' does not support image creation",
|
|
|
|
proto_drv->format_name);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2014-06-05 13:21:11 +04:00
|
|
|
create_opts = qemu_opts_append(create_opts, drv->create_opts);
|
|
|
|
create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
|
2010-12-16 15:52:15 +03:00
|
|
|
|
|
|
|
/* Create parameter list with default values */
|
2014-06-05 13:20:51 +04:00
|
|
|
opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
|
2015-02-12 18:46:36 +03:00
|
|
|
qemu_opt_set_number(opts, BLOCK_OPT_SIZE, img_size, &error_abort);
|
2010-12-16 15:52:15 +03:00
|
|
|
|
|
|
|
/* Parse -o options */
|
|
|
|
if (options) {
|
2015-02-12 20:37:11 +03:00
|
|
|
qemu_opts_do_parse(opts, options, NULL, &local_err);
|
|
|
|
if (local_err) {
|
|
|
|
error_report_err(local_err);
|
|
|
|
local_err = NULL;
|
2014-06-05 13:20:51 +04:00
|
|
|
error_setg(errp, "Invalid options for file format '%s'", fmt);
|
2010-12-16 15:52:15 +03:00
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (base_filename) {
|
2015-02-12 19:52:20 +03:00
|
|
|
qemu_opt_set(opts, BLOCK_OPT_BACKING_FILE, base_filename, &local_err);
|
2015-02-12 19:49:02 +03:00
|
|
|
if (local_err) {
|
2012-11-30 16:52:04 +04:00
|
|
|
error_setg(errp, "Backing file not supported for file format '%s'",
|
|
|
|
fmt);
|
2010-12-16 15:52:15 +03:00
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (base_fmt) {
|
2015-02-12 19:52:20 +03:00
|
|
|
qemu_opt_set(opts, BLOCK_OPT_BACKING_FMT, base_fmt, &local_err);
|
2015-02-12 19:49:02 +03:00
|
|
|
if (local_err) {
|
2012-11-30 16:52:04 +04:00
|
|
|
error_setg(errp, "Backing file format not supported for file "
|
|
|
|
"format '%s'", fmt);
|
2010-12-16 15:52:15 +03:00
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-06-05 13:20:51 +04:00
|
|
|
backing_file = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE);
|
|
|
|
if (backing_file) {
|
|
|
|
if (!strcmp(filename, backing_file)) {
|
2012-11-30 16:52:04 +04:00
|
|
|
error_setg(errp, "Error: Trying to create an image with the "
|
|
|
|
"same filename as the backing file");
|
2010-12-16 15:52:17 +03:00
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-06-05 13:20:51 +04:00
|
|
|
backing_fmt = qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT);
|
|
|
|
if (backing_fmt) {
|
|
|
|
backing_drv = bdrv_find_format(backing_fmt);
|
2011-01-24 12:32:20 +03:00
|
|
|
if (!backing_drv) {
|
2012-11-30 16:52:04 +04:00
|
|
|
error_setg(errp, "Unknown backing file format '%s'",
|
2014-06-05 13:20:51 +04:00
|
|
|
backing_fmt);
|
2010-12-16 15:52:15 +03:00
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// The size for the image must always be specified, with one exception:
|
|
|
|
// If we are using a backing file, we can obtain the size from there
|
2014-06-05 13:20:51 +04:00
|
|
|
size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, 0);
|
|
|
|
if (size == -1) {
|
|
|
|
if (backing_file) {
|
2013-12-03 17:57:52 +04:00
|
|
|
BlockDriverState *bs;
|
2014-11-26 19:20:27 +03:00
|
|
|
char *full_backing = g_new0(char, PATH_MAX);
|
2014-06-26 15:23:25 +04:00
|
|
|
int64_t size;
|
2012-04-12 16:01:03 +04:00
|
|
|
int back_flags;
|
|
|
|
|
2014-11-26 19:20:27 +03:00
|
|
|
bdrv_get_full_backing_filename_from_filename(filename, backing_file,
|
|
|
|
full_backing, PATH_MAX,
|
|
|
|
&local_err);
|
|
|
|
if (local_err) {
|
|
|
|
g_free(full_backing);
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
2012-04-12 16:01:03 +04:00
|
|
|
/* backing files always opened read-only */
|
|
|
|
back_flags =
|
|
|
|
flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
|
2010-12-16 15:52:15 +03:00
|
|
|
|
2014-02-18 21:33:05 +04:00
|
|
|
bs = NULL;
|
2014-11-26 19:20:27 +03:00
|
|
|
ret = bdrv_open(&bs, full_backing, NULL, NULL, back_flags,
|
2013-09-06 19:14:26 +04:00
|
|
|
backing_drv, &local_err);
|
2014-11-26 19:20:27 +03:00
|
|
|
g_free(full_backing);
|
2010-12-16 15:52:15 +03:00
|
|
|
if (ret < 0) {
|
|
|
|
goto out;
|
|
|
|
}
|
2014-06-26 15:23:25 +04:00
|
|
|
size = bdrv_getlength(bs);
|
|
|
|
if (size < 0) {
|
|
|
|
error_setg_errno(errp, -size, "Could not get size of '%s'",
|
|
|
|
backing_file);
|
|
|
|
bdrv_unref(bs);
|
|
|
|
goto out;
|
|
|
|
}
|
2010-12-16 15:52:15 +03:00
|
|
|
|
2015-02-12 18:46:36 +03:00
|
|
|
qemu_opt_set_number(opts, BLOCK_OPT_SIZE, size, &error_abort);
|
2013-12-03 17:57:52 +04:00
|
|
|
|
|
|
|
bdrv_unref(bs);
|
2010-12-16 15:52:15 +03:00
|
|
|
} else {
|
2012-11-30 16:52:04 +04:00
|
|
|
error_setg(errp, "Image creation needs a size parameter");
|
2010-12-16 15:52:15 +03:00
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-02-13 12:09:40 +04:00
|
|
|
if (!quiet) {
|
2014-12-09 10:38:04 +03:00
|
|
|
printf("Formatting '%s', fmt=%s", filename, fmt);
|
|
|
|
qemu_opts_print(opts, " ");
|
2013-02-13 12:09:40 +04:00
|
|
|
puts("");
|
|
|
|
}
|
2014-06-05 13:20:51 +04:00
|
|
|
|
2014-06-05 13:21:11 +04:00
|
|
|
ret = bdrv_create(drv, filename, opts, &local_err);
|
2014-06-05 13:20:51 +04:00
|
|
|
|
2013-09-06 19:14:26 +04:00
|
|
|
if (ret == -EFBIG) {
|
|
|
|
/* This is generally a better message than whatever the driver would
|
|
|
|
* deliver (especially because of the cluster_size_hint), since that
|
|
|
|
* is most probably not much different from "image too large". */
|
|
|
|
const char *cluster_size_hint = "";
|
2014-06-05 13:20:51 +04:00
|
|
|
if (qemu_opt_get_size(opts, BLOCK_OPT_CLUSTER_SIZE, 0)) {
|
2013-09-06 19:14:26 +04:00
|
|
|
cluster_size_hint = " (try using a larger cluster size)";
|
2010-12-16 15:52:15 +03:00
|
|
|
}
|
2013-09-06 19:14:26 +04:00
|
|
|
error_setg(errp, "The image size is too large for file format '%s'"
|
|
|
|
"%s", fmt, cluster_size_hint);
|
|
|
|
error_free(local_err);
|
|
|
|
local_err = NULL;
|
2010-12-16 15:52:15 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
out:
|
2014-06-05 13:20:51 +04:00
|
|
|
qemu_opts_del(opts);
|
|
|
|
qemu_opts_free(create_opts);
|
2014-01-30 18:07:28 +04:00
|
|
|
if (local_err) {
|
2013-09-06 19:14:26 +04:00
|
|
|
error_propagate(errp, local_err);
|
|
|
|
}
|
2010-12-16 15:52:15 +03:00
|
|
|
}
|
2013-03-07 16:41:48 +04:00
|
|
|
|
|
|
|
AioContext *bdrv_get_aio_context(BlockDriverState *bs)
|
|
|
|
{
|
2014-05-08 18:34:37 +04:00
|
|
|
return bs->aio_context;
|
|
|
|
}
|
|
|
|
|
|
|
|
void bdrv_detach_aio_context(BlockDriverState *bs)
|
|
|
|
{
|
2014-06-20 23:57:33 +04:00
|
|
|
BdrvAioNotifier *baf;
|
|
|
|
|
2014-05-08 18:34:37 +04:00
|
|
|
if (!bs->drv) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2014-06-20 23:57:33 +04:00
|
|
|
QLIST_FOREACH(baf, &bs->aio_notifiers, list) {
|
|
|
|
baf->detach_aio_context(baf->opaque);
|
|
|
|
}
|
|
|
|
|
2014-05-14 18:22:45 +04:00
|
|
|
if (bs->io_limits_enabled) {
|
|
|
|
throttle_detach_aio_context(&bs->throttle_state);
|
|
|
|
}
|
2014-05-08 18:34:37 +04:00
|
|
|
if (bs->drv->bdrv_detach_aio_context) {
|
|
|
|
bs->drv->bdrv_detach_aio_context(bs);
|
|
|
|
}
|
|
|
|
if (bs->file) {
|
|
|
|
bdrv_detach_aio_context(bs->file);
|
|
|
|
}
|
|
|
|
if (bs->backing_hd) {
|
|
|
|
bdrv_detach_aio_context(bs->backing_hd);
|
|
|
|
}
|
|
|
|
|
|
|
|
bs->aio_context = NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
void bdrv_attach_aio_context(BlockDriverState *bs,
|
|
|
|
AioContext *new_context)
|
|
|
|
{
|
2014-06-20 23:57:33 +04:00
|
|
|
BdrvAioNotifier *ban;
|
|
|
|
|
2014-05-08 18:34:37 +04:00
|
|
|
if (!bs->drv) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
bs->aio_context = new_context;
|
|
|
|
|
|
|
|
if (bs->backing_hd) {
|
|
|
|
bdrv_attach_aio_context(bs->backing_hd, new_context);
|
|
|
|
}
|
|
|
|
if (bs->file) {
|
|
|
|
bdrv_attach_aio_context(bs->file, new_context);
|
|
|
|
}
|
|
|
|
if (bs->drv->bdrv_attach_aio_context) {
|
|
|
|
bs->drv->bdrv_attach_aio_context(bs, new_context);
|
|
|
|
}
|
2014-05-14 18:22:45 +04:00
|
|
|
if (bs->io_limits_enabled) {
|
|
|
|
throttle_attach_aio_context(&bs->throttle_state, new_context);
|
|
|
|
}
|
2014-06-20 23:57:33 +04:00
|
|
|
|
|
|
|
QLIST_FOREACH(ban, &bs->aio_notifiers, list) {
|
|
|
|
ban->attached_aio_context(new_context, ban->opaque);
|
|
|
|
}
|
2014-05-08 18:34:37 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context)
|
|
|
|
{
|
|
|
|
bdrv_drain_all(); /* ensure there are no in-flight requests */
|
|
|
|
|
|
|
|
bdrv_detach_aio_context(bs);
|
|
|
|
|
|
|
|
/* This function executes in the old AioContext so acquire the new one in
|
|
|
|
* case it runs in a different thread.
|
|
|
|
*/
|
|
|
|
aio_context_acquire(new_context);
|
|
|
|
bdrv_attach_aio_context(bs, new_context);
|
|
|
|
aio_context_release(new_context);
|
2013-03-07 16:41:48 +04:00
|
|
|
}
|
2013-06-24 19:13:10 +04:00
|
|
|
|
2014-06-20 23:57:33 +04:00
|
|
|
void bdrv_add_aio_context_notifier(BlockDriverState *bs,
|
|
|
|
void (*attached_aio_context)(AioContext *new_context, void *opaque),
|
|
|
|
void (*detach_aio_context)(void *opaque), void *opaque)
|
|
|
|
{
|
|
|
|
BdrvAioNotifier *ban = g_new(BdrvAioNotifier, 1);
|
|
|
|
*ban = (BdrvAioNotifier){
|
|
|
|
.attached_aio_context = attached_aio_context,
|
|
|
|
.detach_aio_context = detach_aio_context,
|
|
|
|
.opaque = opaque
|
|
|
|
};
|
|
|
|
|
|
|
|
QLIST_INSERT_HEAD(&bs->aio_notifiers, ban, list);
|
|
|
|
}
|
|
|
|
|
|
|
|
void bdrv_remove_aio_context_notifier(BlockDriverState *bs,
|
|
|
|
void (*attached_aio_context)(AioContext *,
|
|
|
|
void *),
|
|
|
|
void (*detach_aio_context)(void *),
|
|
|
|
void *opaque)
|
|
|
|
{
|
|
|
|
BdrvAioNotifier *ban, *ban_next;
|
|
|
|
|
|
|
|
QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
|
|
|
|
if (ban->attached_aio_context == attached_aio_context &&
|
|
|
|
ban->detach_aio_context == detach_aio_context &&
|
|
|
|
ban->opaque == opaque)
|
|
|
|
{
|
|
|
|
QLIST_REMOVE(ban, list);
|
|
|
|
g_free(ban);
|
|
|
|
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
abort();
|
|
|
|
}
|
|
|
|
|
2013-06-24 19:13:10 +04:00
|
|
|
void bdrv_add_before_write_notifier(BlockDriverState *bs,
|
|
|
|
NotifierWithReturn *notifier)
|
|
|
|
{
|
|
|
|
notifier_with_return_list_add(&bs->before_write_notifiers, notifier);
|
|
|
|
}
|
2013-09-03 12:09:50 +04:00
|
|
|
|
2014-10-27 13:12:50 +03:00
|
|
|
int bdrv_amend_options(BlockDriverState *bs, QemuOpts *opts,
|
|
|
|
BlockDriverAmendStatusCB *status_cb)
|
2013-09-03 12:09:50 +04:00
|
|
|
{
|
2014-06-05 13:21:11 +04:00
|
|
|
if (!bs->drv->bdrv_amend_options) {
|
2013-09-03 12:09:50 +04:00
|
|
|
return -ENOTSUP;
|
|
|
|
}
|
2014-10-27 13:12:50 +03:00
|
|
|
return bs->drv->bdrv_amend_options(bs, opts, status_cb);
|
2013-09-03 12:09:50 +04:00
|
|
|
}
|
2013-10-02 16:33:48 +04:00
|
|
|
|
2014-03-03 22:11:34 +04:00
|
|
|
/* This function will be called by the bdrv_recurse_is_first_non_filter method
|
|
|
|
* of block filter and by bdrv_is_first_non_filter.
|
|
|
|
* It is used to test if the given bs is the candidate or recurse more in the
|
|
|
|
* node graph.
|
2014-01-24 00:31:36 +04:00
|
|
|
*/
|
2014-03-03 22:11:34 +04:00
|
|
|
bool bdrv_recurse_is_first_non_filter(BlockDriverState *bs,
|
2014-01-24 00:31:36 +04:00
|
|
|
BlockDriverState *candidate)
|
2013-10-02 16:33:48 +04:00
|
|
|
{
|
2014-03-03 22:11:34 +04:00
|
|
|
/* return false if basic checks fails */
|
|
|
|
if (!bs || !bs->drv) {
|
2014-01-24 00:31:36 +04:00
|
|
|
return false;
|
2013-10-02 16:33:48 +04:00
|
|
|
}
|
|
|
|
|
2014-03-03 22:11:34 +04:00
|
|
|
/* the code reached a non block filter driver -> check if the bs is
|
|
|
|
* the same as the candidate. It's the recursion termination condition.
|
|
|
|
*/
|
|
|
|
if (!bs->drv->is_filter) {
|
|
|
|
return bs == candidate;
|
2014-01-24 00:31:36 +04:00
|
|
|
}
|
2014-03-03 22:11:34 +04:00
|
|
|
/* Down this path the driver is a block filter driver */
|
2014-01-24 00:31:36 +04:00
|
|
|
|
2014-03-03 22:11:34 +04:00
|
|
|
/* If the block filter recursion method is defined use it to recurse down
|
|
|
|
* the node graph.
|
|
|
|
*/
|
|
|
|
if (bs->drv->bdrv_recurse_is_first_non_filter) {
|
2014-01-24 00:31:36 +04:00
|
|
|
return bs->drv->bdrv_recurse_is_first_non_filter(bs, candidate);
|
2013-10-02 16:33:48 +04:00
|
|
|
}
|
|
|
|
|
2014-03-03 22:11:34 +04:00
|
|
|
/* the driver is a block filter but don't allow to recurse -> return false
|
|
|
|
*/
|
|
|
|
return false;
|
2013-10-02 16:33:48 +04:00
|
|
|
}
|
|
|
|
|
2014-01-24 00:31:36 +04:00
|
|
|
/* This function checks if the candidate is the first non filter bs down it's
|
|
|
|
* bs chain. Since we don't have pointers to parents it explore all bs chains
|
|
|
|
* from the top. Some filters can choose not to pass down the recursion.
|
|
|
|
*/
|
|
|
|
bool bdrv_is_first_non_filter(BlockDriverState *candidate)
|
2013-10-02 16:33:48 +04:00
|
|
|
{
|
2014-01-24 00:31:36 +04:00
|
|
|
BlockDriverState *bs;
|
|
|
|
|
|
|
|
/* walk down the bs forest recursively */
|
|
|
|
QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
|
|
|
|
bool perm;
|
|
|
|
|
2014-03-03 22:11:34 +04:00
|
|
|
/* try to recurse in this top level bs */
|
2014-02-04 14:45:31 +04:00
|
|
|
perm = bdrv_recurse_is_first_non_filter(bs, candidate);
|
2014-01-24 00:31:36 +04:00
|
|
|
|
|
|
|
/* candidate is the first non filter */
|
|
|
|
if (perm) {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
2013-10-02 16:33:48 +04:00
|
|
|
}
|
2014-06-27 20:25:25 +04:00
|
|
|
|
|
|
|
BlockDriverState *check_to_replace_node(const char *node_name, Error **errp)
|
|
|
|
{
|
|
|
|
BlockDriverState *to_replace_bs = bdrv_find_node(node_name);
|
2014-10-21 15:03:58 +04:00
|
|
|
AioContext *aio_context;
|
|
|
|
|
2014-06-27 20:25:25 +04:00
|
|
|
if (!to_replace_bs) {
|
|
|
|
error_setg(errp, "Node name '%s' not found", node_name);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2014-10-21 15:03:58 +04:00
|
|
|
aio_context = bdrv_get_aio_context(to_replace_bs);
|
|
|
|
aio_context_acquire(aio_context);
|
|
|
|
|
2014-06-27 20:25:25 +04:00
|
|
|
if (bdrv_op_is_blocked(to_replace_bs, BLOCK_OP_TYPE_REPLACE, errp)) {
|
2014-10-21 15:03:58 +04:00
|
|
|
to_replace_bs = NULL;
|
|
|
|
goto out;
|
2014-06-27 20:25:25 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
/* We don't want arbitrary node of the BDS chain to be replaced only the top
|
|
|
|
* most non filter in order to prevent data corruption.
|
|
|
|
* Another benefit is that this tests exclude backing files which are
|
|
|
|
* blocked by the backing blockers.
|
|
|
|
*/
|
|
|
|
if (!bdrv_is_first_non_filter(to_replace_bs)) {
|
|
|
|
error_setg(errp, "Only top most non filter can be replaced");
|
2014-10-21 15:03:58 +04:00
|
|
|
to_replace_bs = NULL;
|
|
|
|
goto out;
|
2014-06-27 20:25:25 +04:00
|
|
|
}
|
|
|
|
|
2014-10-21 15:03:58 +04:00
|
|
|
out:
|
|
|
|
aio_context_release(aio_context);
|
2014-06-27 20:25:25 +04:00
|
|
|
return to_replace_bs;
|
|
|
|
}
|
2014-07-04 14:04:33 +04:00
|
|
|
|
|
|
|
void bdrv_io_plug(BlockDriverState *bs)
|
|
|
|
{
|
|
|
|
BlockDriver *drv = bs->drv;
|
|
|
|
if (drv && drv->bdrv_io_plug) {
|
|
|
|
drv->bdrv_io_plug(bs);
|
|
|
|
} else if (bs->file) {
|
|
|
|
bdrv_io_plug(bs->file);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void bdrv_io_unplug(BlockDriverState *bs)
|
|
|
|
{
|
|
|
|
BlockDriver *drv = bs->drv;
|
|
|
|
if (drv && drv->bdrv_io_unplug) {
|
|
|
|
drv->bdrv_io_unplug(bs);
|
|
|
|
} else if (bs->file) {
|
|
|
|
bdrv_io_unplug(bs->file);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void bdrv_flush_io_queue(BlockDriverState *bs)
|
|
|
|
{
|
|
|
|
BlockDriver *drv = bs->drv;
|
|
|
|
if (drv && drv->bdrv_flush_io_queue) {
|
|
|
|
drv->bdrv_flush_io_queue(bs);
|
|
|
|
} else if (bs->file) {
|
|
|
|
bdrv_flush_io_queue(bs->file);
|
|
|
|
}
|
|
|
|
}
|
2014-07-18 22:24:56 +04:00
|
|
|
|
|
|
|
static bool append_open_options(QDict *d, BlockDriverState *bs)
|
|
|
|
{
|
|
|
|
const QDictEntry *entry;
|
|
|
|
bool found_any = false;
|
|
|
|
|
|
|
|
for (entry = qdict_first(bs->options); entry;
|
|
|
|
entry = qdict_next(bs->options, entry))
|
|
|
|
{
|
|
|
|
/* Only take options for this level and exclude all non-driver-specific
|
|
|
|
* options */
|
|
|
|
if (!strchr(qdict_entry_key(entry), '.') &&
|
|
|
|
strcmp(qdict_entry_key(entry), "node-name"))
|
|
|
|
{
|
|
|
|
qobject_incref(qdict_entry_value(entry));
|
|
|
|
qdict_put_obj(d, qdict_entry_key(entry), qdict_entry_value(entry));
|
|
|
|
found_any = true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return found_any;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Updates the following BDS fields:
|
|
|
|
* - exact_filename: A filename which may be used for opening a block device
|
|
|
|
* which (mostly) equals the given BDS (even without any
|
|
|
|
* other options; so reading and writing must return the same
|
|
|
|
* results, but caching etc. may be different)
|
|
|
|
* - full_open_options: Options which, when given when opening a block device
|
|
|
|
* (without a filename), result in a BDS (mostly)
|
|
|
|
* equalling the given one
|
|
|
|
* - filename: If exact_filename is set, it is copied here. Otherwise,
|
|
|
|
* full_open_options is converted to a JSON object, prefixed with
|
|
|
|
* "json:" (for use through the JSON pseudo protocol) and put here.
|
|
|
|
*/
|
|
|
|
void bdrv_refresh_filename(BlockDriverState *bs)
|
|
|
|
{
|
|
|
|
BlockDriver *drv = bs->drv;
|
|
|
|
QDict *opts;
|
|
|
|
|
|
|
|
if (!drv) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* This BDS's file name will most probably depend on its file's name, so
|
|
|
|
* refresh that first */
|
|
|
|
if (bs->file) {
|
|
|
|
bdrv_refresh_filename(bs->file);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (drv->bdrv_refresh_filename) {
|
|
|
|
/* Obsolete information is of no use here, so drop the old file name
|
|
|
|
* information before refreshing it */
|
|
|
|
bs->exact_filename[0] = '\0';
|
|
|
|
if (bs->full_open_options) {
|
|
|
|
QDECREF(bs->full_open_options);
|
|
|
|
bs->full_open_options = NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
drv->bdrv_refresh_filename(bs);
|
|
|
|
} else if (bs->file) {
|
|
|
|
/* Try to reconstruct valid information from the underlying file */
|
|
|
|
bool has_open_options;
|
|
|
|
|
|
|
|
bs->exact_filename[0] = '\0';
|
|
|
|
if (bs->full_open_options) {
|
|
|
|
QDECREF(bs->full_open_options);
|
|
|
|
bs->full_open_options = NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
opts = qdict_new();
|
|
|
|
has_open_options = append_open_options(opts, bs);
|
|
|
|
|
|
|
|
/* If no specific options have been given for this BDS, the filename of
|
|
|
|
* the underlying file should suffice for this one as well */
|
|
|
|
if (bs->file->exact_filename[0] && !has_open_options) {
|
|
|
|
strcpy(bs->exact_filename, bs->file->exact_filename);
|
|
|
|
}
|
|
|
|
/* Reconstructing the full options QDict is simple for most format block
|
|
|
|
* drivers, as long as the full options are known for the underlying
|
|
|
|
* file BDS. The full options QDict of that file BDS should somehow
|
|
|
|
* contain a representation of the filename, therefore the following
|
|
|
|
* suffices without querying the (exact_)filename of this BDS. */
|
|
|
|
if (bs->file->full_open_options) {
|
|
|
|
qdict_put_obj(opts, "driver",
|
|
|
|
QOBJECT(qstring_from_str(drv->format_name)));
|
|
|
|
QINCREF(bs->file->full_open_options);
|
|
|
|
qdict_put_obj(opts, "file", QOBJECT(bs->file->full_open_options));
|
|
|
|
|
|
|
|
bs->full_open_options = opts;
|
|
|
|
} else {
|
|
|
|
QDECREF(opts);
|
|
|
|
}
|
|
|
|
} else if (!bs->full_open_options && qdict_size(bs->options)) {
|
|
|
|
/* There is no underlying file BDS (at least referenced by BDS.file),
|
|
|
|
* so the full options QDict should be equal to the options given
|
|
|
|
* specifically for this block device when it was opened (plus the
|
|
|
|
* driver specification).
|
|
|
|
* Because those options don't change, there is no need to update
|
|
|
|
* full_open_options when it's already set. */
|
|
|
|
|
|
|
|
opts = qdict_new();
|
|
|
|
append_open_options(opts, bs);
|
|
|
|
qdict_put_obj(opts, "driver",
|
|
|
|
QOBJECT(qstring_from_str(drv->format_name)));
|
|
|
|
|
|
|
|
if (bs->exact_filename[0]) {
|
|
|
|
/* This may not work for all block protocol drivers (some may
|
|
|
|
* require this filename to be parsed), but we have to find some
|
|
|
|
* default solution here, so just include it. If some block driver
|
|
|
|
* does not support pure options without any filename at all or
|
|
|
|
* needs some special format of the options QDict, it needs to
|
|
|
|
* implement the driver-specific bdrv_refresh_filename() function.
|
|
|
|
*/
|
|
|
|
qdict_put_obj(opts, "filename",
|
|
|
|
QOBJECT(qstring_from_str(bs->exact_filename)));
|
|
|
|
}
|
|
|
|
|
|
|
|
bs->full_open_options = opts;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (bs->exact_filename[0]) {
|
|
|
|
pstrcpy(bs->filename, sizeof(bs->filename), bs->exact_filename);
|
|
|
|
} else if (bs->full_open_options) {
|
|
|
|
QString *json = qobject_to_json(QOBJECT(bs->full_open_options));
|
|
|
|
snprintf(bs->filename, sizeof(bs->filename), "json:%s",
|
|
|
|
qstring_get_str(json));
|
|
|
|
QDECREF(json);
|
|
|
|
}
|
|
|
|
}
|
2014-09-05 17:46:18 +04:00
|
|
|
|
|
|
|
/* This accessor function purpose is to allow the device models to access the
|
|
|
|
* BlockAcctStats structure embedded inside a BlockDriverState without being
|
|
|
|
* aware of the BlockDriverState structure layout.
|
|
|
|
* It will go away when the BlockAcctStats structure will be moved inside
|
|
|
|
* the device models.
|
|
|
|
*/
|
|
|
|
BlockAcctStats *bdrv_get_stats(BlockDriverState *bs)
|
|
|
|
{
|
|
|
|
return &bs->stats;
|
|
|
|
}
|