2004-08-02 01:59:26 +04:00
|
|
|
/*
|
|
|
|
* QEMU System Emulator block driver
|
2007-09-17 01:08:06 +04:00
|
|
|
*
|
2004-08-02 01:59:26 +04:00
|
|
|
* Copyright (c) 2003 Fabrice Bellard
|
2007-09-17 01:08:06 +04:00
|
|
|
*
|
2004-08-02 01:59:26 +04:00
|
|
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
|
|
* of this software and associated documentation files (the "Software"), to deal
|
|
|
|
* in the Software without restriction, including without limitation the rights
|
|
|
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
|
|
* copies of the Software, and to permit persons to whom the Software is
|
|
|
|
* furnished to do so, subject to the following conditions:
|
|
|
|
*
|
|
|
|
* The above copyright notice and this permission notice shall be included in
|
|
|
|
* all copies or substantial portions of the Software.
|
|
|
|
*
|
|
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
|
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
|
|
* THE SOFTWARE.
|
|
|
|
*/
|
|
|
|
#ifndef BLOCK_INT_H
|
|
|
|
#define BLOCK_INT_H
|
|
|
|
|
2012-12-17 21:19:44 +04:00
|
|
|
#include "block/block.h"
|
2012-12-17 21:20:00 +04:00
|
|
|
#include "qemu/option.h"
|
|
|
|
#include "qemu/queue.h"
|
2012-12-17 21:19:44 +04:00
|
|
|
#include "block/coroutine.h"
|
2012-12-17 21:20:00 +04:00
|
|
|
#include "qemu/timer.h"
|
2011-09-22 00:16:47 +04:00
|
|
|
#include "qapi-types.h"
|
2012-12-17 21:19:43 +04:00
|
|
|
#include "qapi/qmp/qerror.h"
|
2012-12-17 21:19:49 +04:00
|
|
|
#include "monitor/monitor.h"
|
2013-01-21 20:09:41 +04:00
|
|
|
#include "qemu/hbitmap.h"
|
2013-05-25 07:09:44 +04:00
|
|
|
#include "block/snapshot.h"
|
2013-08-21 19:02:47 +04:00
|
|
|
#include "qemu/main-loop.h"
|
2013-09-02 16:14:39 +04:00
|
|
|
#include "qemu/throttle.h"
|
2007-11-11 05:51:17 +03:00
|
|
|
|
2012-07-27 12:05:22 +04:00
|
|
|
#define BLOCK_FLAG_ENCRYPT 1
|
|
|
|
#define BLOCK_FLAG_COMPAT6 4
|
|
|
|
#define BLOCK_FLAG_LAZY_REFCOUNTS 8
|
2007-09-17 01:59:02 +04:00
|
|
|
|
2012-07-27 12:05:22 +04:00
|
|
|
#define BLOCK_OPT_SIZE "size"
|
|
|
|
#define BLOCK_OPT_ENCRYPT "encryption"
|
|
|
|
#define BLOCK_OPT_COMPAT6 "compat6"
|
|
|
|
#define BLOCK_OPT_BACKING_FILE "backing_file"
|
|
|
|
#define BLOCK_OPT_BACKING_FMT "backing_fmt"
|
|
|
|
#define BLOCK_OPT_CLUSTER_SIZE "cluster_size"
|
|
|
|
#define BLOCK_OPT_TABLE_SIZE "table_size"
|
|
|
|
#define BLOCK_OPT_PREALLOC "preallocation"
|
|
|
|
#define BLOCK_OPT_SUBFMT "subformat"
|
|
|
|
#define BLOCK_OPT_COMPAT_LEVEL "compat"
|
|
|
|
#define BLOCK_OPT_LAZY_REFCOUNTS "lazy_refcounts"
|
2013-01-30 03:26:52 +04:00
|
|
|
#define BLOCK_OPT_ADAPTER_TYPE "adapter_type"
|
2013-11-07 18:56:38 +04:00
|
|
|
#define BLOCK_OPT_REDUNDANCY "redundancy"
|
2009-05-18 18:42:10 +04:00
|
|
|
|
2013-06-24 19:13:10 +04:00
|
|
|
typedef struct BdrvTrackedRequest {
|
|
|
|
BlockDriverState *bs;
|
|
|
|
int64_t sector_num;
|
|
|
|
int nb_sectors;
|
|
|
|
bool is_write;
|
|
|
|
QLIST_ENTRY(BdrvTrackedRequest) list;
|
|
|
|
Coroutine *co; /* owner, used for deadlock detection */
|
|
|
|
CoQueue wait_queue; /* coroutines blocked on this request */
|
|
|
|
} BdrvTrackedRequest;
|
|
|
|
|
2004-08-02 01:59:26 +04:00
|
|
|
struct BlockDriver {
|
|
|
|
const char *format_name;
|
|
|
|
int instance_size;
|
2013-10-02 16:33:48 +04:00
|
|
|
|
2014-01-24 00:31:36 +04:00
|
|
|
/* this table of boolean contains authorizations for the block operations */
|
|
|
|
bool authorizations[BS_AUTHORIZATION_COUNT];
|
|
|
|
/* for snapshots complex block filter like Quorum can implement the
|
|
|
|
* following recursive callback instead of BS_IS_A_FILTER.
|
|
|
|
* It's purpose is to recurse on the filter children while calling
|
|
|
|
* bdrv_recurse_is_first_non_filter on them.
|
|
|
|
* For a sample implementation look in the future Quorum block filter.
|
2013-10-02 16:33:48 +04:00
|
|
|
*/
|
2014-01-24 00:31:36 +04:00
|
|
|
bool (*bdrv_recurse_is_first_non_filter)(BlockDriverState *bs,
|
|
|
|
BlockDriverState *candidate);
|
2013-10-02 16:33:48 +04:00
|
|
|
|
2004-08-02 01:59:26 +04:00
|
|
|
int (*bdrv_probe)(const uint8_t *buf, int buf_size, const char *filename);
|
2009-06-15 16:04:22 +04:00
|
|
|
int (*bdrv_probe_device)(const char *filename);
|
2013-03-18 19:40:51 +04:00
|
|
|
|
|
|
|
/* Any driver implementing this callback is expected to be able to handle
|
|
|
|
* NULL file names in its .bdrv_open() implementation */
|
2013-03-15 21:47:22 +04:00
|
|
|
void (*bdrv_parse_filename)(const char *filename, QDict *options, Error **errp);
|
2013-09-24 19:07:04 +04:00
|
|
|
/* Drivers not implementing bdrv_parse_filename nor bdrv_open should have
|
|
|
|
* this field set to true, except ones that are defined only by their
|
|
|
|
* child's bs.
|
|
|
|
* An example of the last type will be the quorum block driver.
|
|
|
|
*/
|
|
|
|
bool bdrv_needs_filename;
|
2012-09-20 23:13:19 +04:00
|
|
|
|
|
|
|
/* For handling image reopen for split or non-split files */
|
|
|
|
int (*bdrv_reopen_prepare)(BDRVReopenState *reopen_state,
|
|
|
|
BlockReopenQueue *queue, Error **errp);
|
|
|
|
void (*bdrv_reopen_commit)(BDRVReopenState *reopen_state);
|
|
|
|
void (*bdrv_reopen_abort)(BDRVReopenState *reopen_state);
|
|
|
|
|
2013-09-05 16:22:29 +04:00
|
|
|
int (*bdrv_open)(BlockDriverState *bs, QDict *options, int flags,
|
|
|
|
Error **errp);
|
|
|
|
int (*bdrv_file_open)(BlockDriverState *bs, QDict *options, int flags,
|
|
|
|
Error **errp);
|
2007-09-17 01:08:06 +04:00
|
|
|
int (*bdrv_read)(BlockDriverState *bs, int64_t sector_num,
|
2004-08-02 01:59:26 +04:00
|
|
|
uint8_t *buf, int nb_sectors);
|
2007-09-17 01:08:06 +04:00
|
|
|
int (*bdrv_write)(BlockDriverState *bs, int64_t sector_num,
|
2004-08-02 01:59:26 +04:00
|
|
|
const uint8_t *buf, int nb_sectors);
|
2004-09-18 23:32:11 +04:00
|
|
|
void (*bdrv_close)(BlockDriverState *bs);
|
2012-05-08 18:51:41 +04:00
|
|
|
void (*bdrv_rebind)(BlockDriverState *bs);
|
2013-09-05 16:26:05 +04:00
|
|
|
int (*bdrv_create)(const char *filename, QEMUOptionParameter *options,
|
|
|
|
Error **errp);
|
2004-08-02 01:59:26 +04:00
|
|
|
int (*bdrv_set_key)(BlockDriverState *bs, const char *key);
|
2005-12-18 21:28:15 +03:00
|
|
|
int (*bdrv_make_empty)(BlockDriverState *bs);
|
2006-08-01 20:21:11 +04:00
|
|
|
/* aio */
|
2009-04-07 22:43:24 +04:00
|
|
|
BlockDriverAIOCB *(*bdrv_aio_readv)(BlockDriverState *bs,
|
|
|
|
int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
|
2006-08-07 06:38:06 +04:00
|
|
|
BlockDriverCompletionFunc *cb, void *opaque);
|
2009-04-07 22:43:24 +04:00
|
|
|
BlockDriverAIOCB *(*bdrv_aio_writev)(BlockDriverState *bs,
|
|
|
|
int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
|
2006-08-07 06:38:06 +04:00
|
|
|
BlockDriverCompletionFunc *cb, void *opaque);
|
2009-09-04 21:01:49 +04:00
|
|
|
BlockDriverAIOCB *(*bdrv_aio_flush)(BlockDriverState *bs,
|
|
|
|
BlockDriverCompletionFunc *cb, void *opaque);
|
2011-10-17 14:32:14 +04:00
|
|
|
BlockDriverAIOCB *(*bdrv_aio_discard)(BlockDriverState *bs,
|
|
|
|
int64_t sector_num, int nb_sectors,
|
|
|
|
BlockDriverCompletionFunc *cb, void *opaque);
|
2006-08-01 20:21:11 +04:00
|
|
|
|
2011-07-14 19:27:13 +04:00
|
|
|
int coroutine_fn (*bdrv_co_readv)(BlockDriverState *bs,
|
|
|
|
int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
|
|
|
|
int coroutine_fn (*bdrv_co_writev)(BlockDriverState *bs,
|
|
|
|
int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
|
2012-02-07 17:27:25 +04:00
|
|
|
/*
|
|
|
|
* Efficiently zero a region of the disk image. Typically an image format
|
|
|
|
* would use a compact metadata representation to implement this. This
|
|
|
|
* function pointer may be NULL and .bdrv_co_writev() will be called
|
|
|
|
* instead.
|
|
|
|
*/
|
|
|
|
int coroutine_fn (*bdrv_co_write_zeroes)(BlockDriverState *bs,
|
2013-10-24 14:06:51 +04:00
|
|
|
int64_t sector_num, int nb_sectors, BdrvRequestFlags flags);
|
2011-10-17 14:32:14 +04:00
|
|
|
int coroutine_fn (*bdrv_co_discard)(BlockDriverState *bs,
|
|
|
|
int64_t sector_num, int nb_sectors);
|
2013-09-04 21:00:28 +04:00
|
|
|
int64_t coroutine_fn (*bdrv_co_get_block_status)(BlockDriverState *bs,
|
2011-11-14 16:44:19 +04:00
|
|
|
int64_t sector_num, int nb_sectors, int *pnum);
|
2011-07-14 19:27:13 +04:00
|
|
|
|
2011-11-15 01:09:45 +04:00
|
|
|
/*
|
|
|
|
* Invalidate any cached meta-data.
|
|
|
|
*/
|
|
|
|
void (*bdrv_invalidate_cache)(BlockDriverState *bs);
|
|
|
|
|
2011-11-10 20:25:44 +04:00
|
|
|
/*
|
|
|
|
* Flushes all data that was already written to the OS all the way down to
|
|
|
|
* the disk (for example raw-posix calls fsync()).
|
|
|
|
*/
|
|
|
|
int coroutine_fn (*bdrv_co_flush_to_disk)(BlockDriverState *bs);
|
|
|
|
|
2011-11-10 21:10:11 +04:00
|
|
|
/*
|
|
|
|
* Flushes all internal caches to the OS. The data may still sit in a
|
|
|
|
* writeback cache of the host OS, but it will survive a crash of the qemu
|
|
|
|
* process.
|
|
|
|
*/
|
|
|
|
int coroutine_fn (*bdrv_co_flush_to_os)(BlockDriverState *bs);
|
|
|
|
|
2006-08-01 20:21:11 +04:00
|
|
|
const char *protocol_name;
|
|
|
|
int (*bdrv_truncate)(BlockDriverState *bs, int64_t offset);
|
block: Avoid unecessary drv->bdrv_getlength() calls
The block layer generally keeps the size of an image cached in
bs->total_sectors so that it doesn't have to perform expensive
operations to get the size whenever it needs it.
This doesn't work however when using a backend that can change its size
without qemu being aware of it, i.e. passthrough of removable media like
CD-ROMs or floppy disks. For this reason, the caching is disabled when a
removable device is used.
It is obvious that checking whether the _guest_ device has removable
media isn't the right thing to do when we want to know whether the size
of the host backend can change. To make things worse, non-top-level
BlockDriverStates never have any device attached, which makes qemu
assume they are removable, so drv->bdrv_getlength() is always called on
the protocol layer. In the case of raw-posix, this causes unnecessary
lseek() system calls, which turned out to be rather expensive.
This patch completely changes the logic and disables bs->total_sectors
caching only for certain block driver types, for which a size change is
expected: host_cdrom and host_floppy on POSIX, host_device on win32; also
the raw format in case it sits on top of one of these protocols, but in
the common case the nested bdrv_getlength() call on the protocol driver
will use the cache again and avoid an expensive drv->bdrv_getlength()
call.
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
2013-10-29 15:18:58 +04:00
|
|
|
|
2006-08-01 20:21:11 +04:00
|
|
|
int64_t (*bdrv_getlength)(BlockDriverState *bs);
|
block: Avoid unecessary drv->bdrv_getlength() calls
The block layer generally keeps the size of an image cached in
bs->total_sectors so that it doesn't have to perform expensive
operations to get the size whenever it needs it.
This doesn't work however when using a backend that can change its size
without qemu being aware of it, i.e. passthrough of removable media like
CD-ROMs or floppy disks. For this reason, the caching is disabled when a
removable device is used.
It is obvious that checking whether the _guest_ device has removable
media isn't the right thing to do when we want to know whether the size
of the host backend can change. To make things worse, non-top-level
BlockDriverStates never have any device attached, which makes qemu
assume they are removable, so drv->bdrv_getlength() is always called on
the protocol layer. In the case of raw-posix, this causes unnecessary
lseek() system calls, which turned out to be rather expensive.
This patch completely changes the logic and disables bs->total_sectors
caching only for certain block driver types, for which a size change is
expected: host_cdrom and host_floppy on POSIX, host_device on win32; also
the raw format in case it sits on top of one of these protocols, but in
the common case the nested bdrv_getlength() call on the protocol driver
will use the cache again and avoid an expensive drv->bdrv_getlength()
call.
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
2013-10-29 15:18:58 +04:00
|
|
|
bool has_variable_length;
|
2011-07-12 15:56:39 +04:00
|
|
|
int64_t (*bdrv_get_allocated_file_size)(BlockDriverState *bs);
|
block: Avoid unecessary drv->bdrv_getlength() calls
The block layer generally keeps the size of an image cached in
bs->total_sectors so that it doesn't have to perform expensive
operations to get the size whenever it needs it.
This doesn't work however when using a backend that can change its size
without qemu being aware of it, i.e. passthrough of removable media like
CD-ROMs or floppy disks. For this reason, the caching is disabled when a
removable device is used.
It is obvious that checking whether the _guest_ device has removable
media isn't the right thing to do when we want to know whether the size
of the host backend can change. To make things worse, non-top-level
BlockDriverStates never have any device attached, which makes qemu
assume they are removable, so drv->bdrv_getlength() is always called on
the protocol layer. In the case of raw-posix, this causes unnecessary
lseek() system calls, which turned out to be rather expensive.
This patch completely changes the logic and disables bs->total_sectors
caching only for certain block driver types, for which a size change is
expected: host_cdrom and host_floppy on POSIX, host_device on win32; also
the raw format in case it sits on top of one of these protocols, but in
the common case the nested bdrv_getlength() call on the protocol driver
will use the cache again and avoid an expensive drv->bdrv_getlength()
call.
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
2013-10-29 15:18:58 +04:00
|
|
|
|
2007-09-17 01:08:06 +04:00
|
|
|
int (*bdrv_write_compressed)(BlockDriverState *bs, int64_t sector_num,
|
2006-08-06 01:31:00 +04:00
|
|
|
const uint8_t *buf, int nb_sectors);
|
|
|
|
|
2007-09-17 01:08:06 +04:00
|
|
|
int (*bdrv_snapshot_create)(BlockDriverState *bs,
|
2006-08-06 01:31:00 +04:00
|
|
|
QEMUSnapshotInfo *sn_info);
|
2007-09-17 01:08:06 +04:00
|
|
|
int (*bdrv_snapshot_goto)(BlockDriverState *bs,
|
2006-08-06 01:31:00 +04:00
|
|
|
const char *snapshot_id);
|
snapshot: distinguish id and name in snapshot delete
Snapshot creation actually already distinguish id and name since it take
a structured parameter *sn, but delete can't. Later an accurate delete
is needed in qmp_transaction abort and blockdev-snapshot-delete-sync,
so change its prototype. Also *errp is added to tip error, but return
value is kepted to let caller check what kind of error happens. Existing
caller for it are savevm, delvm and qemu-img, they are not impacted by
introducing a new function bdrv_snapshot_delete_by_id_or_name(), which
check the return value and do the operation again.
Before this patch:
For qcow2, it search id first then name to find the one to delete.
For rbd, it search name.
For sheepdog, it does nothing.
After this patch:
For qcow2, logic is the same by call it twice in caller.
For rbd, it always fails in delete with id, but still search for name
in second try, no change to user.
Some code for *errp is based on Pavel's patch.
Signed-off-by: Wenchao Xia <xiawenc@linux.vnet.ibm.com>
Signed-off-by: Pavel Hrdina <phrdina@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2013-09-11 10:04:33 +04:00
|
|
|
int (*bdrv_snapshot_delete)(BlockDriverState *bs,
|
|
|
|
const char *snapshot_id,
|
|
|
|
const char *name,
|
|
|
|
Error **errp);
|
2007-09-17 01:08:06 +04:00
|
|
|
int (*bdrv_snapshot_list)(BlockDriverState *bs,
|
2006-08-06 01:31:00 +04:00
|
|
|
QEMUSnapshotInfo **psn_info);
|
2010-09-22 06:58:41 +04:00
|
|
|
int (*bdrv_snapshot_load_tmp)(BlockDriverState *bs,
|
2013-12-04 13:10:54 +04:00
|
|
|
const char *snapshot_id,
|
|
|
|
const char *name,
|
|
|
|
Error **errp);
|
2006-08-06 01:31:00 +04:00
|
|
|
int (*bdrv_get_info)(BlockDriverState *bs, BlockDriverInfo *bdi);
|
2013-10-09 12:46:16 +04:00
|
|
|
ImageInfoSpecific *(*bdrv_get_specific_info)(BlockDriverState *bs);
|
2006-08-01 20:21:11 +04:00
|
|
|
|
2013-04-05 23:27:53 +04:00
|
|
|
int (*bdrv_save_vmstate)(BlockDriverState *bs, QEMUIOVector *qiov,
|
|
|
|
int64_t pos);
|
2009-07-11 01:11:57 +04:00
|
|
|
int (*bdrv_load_vmstate)(BlockDriverState *bs, uint8_t *buf,
|
|
|
|
int64_t pos, int size);
|
2009-04-05 23:10:55 +04:00
|
|
|
|
2010-01-12 14:55:17 +03:00
|
|
|
int (*bdrv_change_backing_file)(BlockDriverState *bs,
|
|
|
|
const char *backing_file, const char *backing_fmt);
|
|
|
|
|
2006-08-19 15:45:59 +04:00
|
|
|
/* removable device specific */
|
|
|
|
int (*bdrv_is_inserted)(BlockDriverState *bs);
|
|
|
|
int (*bdrv_media_changed)(BlockDriverState *bs);
|
2012-02-03 22:24:53 +04:00
|
|
|
void (*bdrv_eject)(BlockDriverState *bs, bool eject_flag);
|
2011-09-06 20:58:47 +04:00
|
|
|
void (*bdrv_lock_medium)(BlockDriverState *bs, bool locked);
|
2007-09-17 12:09:54 +04:00
|
|
|
|
2007-12-24 19:10:43 +03:00
|
|
|
/* to control generic scsi devices */
|
|
|
|
int (*bdrv_ioctl)(BlockDriverState *bs, unsigned long int req, void *buf);
|
2009-03-28 20:28:41 +03:00
|
|
|
BlockDriverAIOCB *(*bdrv_aio_ioctl)(BlockDriverState *bs,
|
|
|
|
unsigned long int req, void *buf,
|
|
|
|
BlockDriverCompletionFunc *cb, void *opaque);
|
2007-12-24 19:10:43 +03:00
|
|
|
|
2009-05-18 18:42:10 +04:00
|
|
|
/* List of options for creating images, terminated by name == NULL */
|
|
|
|
QEMUOptionParameter *create_options;
|
|
|
|
|
2009-03-28 20:55:10 +03:00
|
|
|
|
2010-06-29 14:37:54 +04:00
|
|
|
/*
|
|
|
|
* Returns 0 for completed check, -errno for internal errors.
|
|
|
|
* The check results are stored in result.
|
|
|
|
*/
|
2012-05-11 18:07:02 +04:00
|
|
|
int (*bdrv_check)(BlockDriverState* bs, BdrvCheckResult *result,
|
|
|
|
BdrvCheckMode fix);
|
2009-04-22 03:11:50 +04:00
|
|
|
|
2013-09-03 12:09:50 +04:00
|
|
|
int (*bdrv_amend_options)(BlockDriverState *bs,
|
|
|
|
QEMUOptionParameter *options);
|
|
|
|
|
2010-03-15 19:27:00 +03:00
|
|
|
void (*bdrv_debug_event)(BlockDriverState *bs, BlkDebugEvent event);
|
|
|
|
|
2012-12-06 17:32:58 +04:00
|
|
|
/* TODO Better pass a option string/QDict/QemuOpts to add any rule? */
|
|
|
|
int (*bdrv_debug_breakpoint)(BlockDriverState *bs, const char *event,
|
|
|
|
const char *tag);
|
2013-11-20 06:01:54 +04:00
|
|
|
int (*bdrv_debug_remove_breakpoint)(BlockDriverState *bs,
|
|
|
|
const char *tag);
|
2012-12-06 17:32:58 +04:00
|
|
|
int (*bdrv_debug_resume)(BlockDriverState *bs, const char *tag);
|
|
|
|
bool (*bdrv_debug_is_suspended)(BlockDriverState *bs, const char *tag);
|
|
|
|
|
2013-12-11 22:26:16 +04:00
|
|
|
int (*bdrv_refresh_limits)(BlockDriverState *bs);
|
|
|
|
|
2010-07-28 13:26:29 +04:00
|
|
|
/*
|
|
|
|
* Returns 1 if newly created images are guaranteed to contain only
|
|
|
|
* zeros, 0 otherwise.
|
|
|
|
*/
|
|
|
|
int (*bdrv_has_zero_init)(BlockDriverState *bs);
|
2009-11-30 18:54:15 +03:00
|
|
|
|
2010-04-13 13:29:33 +04:00
|
|
|
QLIST_ENTRY(BlockDriver) list;
|
2004-08-02 01:59:26 +04:00
|
|
|
};
|
|
|
|
|
2013-10-24 14:06:56 +04:00
|
|
|
typedef struct BlockLimits {
|
|
|
|
/* maximum number of sectors that can be discarded at once */
|
|
|
|
int max_discard;
|
|
|
|
|
|
|
|
/* optimal alignment for discard requests in sectors */
|
|
|
|
int64_t discard_alignment;
|
|
|
|
|
|
|
|
/* maximum number of sectors that can zeroized at once */
|
|
|
|
int max_write_zeroes;
|
|
|
|
|
|
|
|
/* optimal alignment for write zeroes requests in sectors */
|
|
|
|
int64_t write_zeroes_alignment;
|
2013-11-27 14:07:04 +04:00
|
|
|
|
|
|
|
/* optimal transfer length in sectors */
|
|
|
|
int opt_transfer_length;
|
2013-10-24 14:06:56 +04:00
|
|
|
} BlockLimits;
|
|
|
|
|
2012-02-29 00:54:06 +04:00
|
|
|
/*
|
|
|
|
* Note: the function bdrv_append() copies and swaps contents of
|
|
|
|
* BlockDriverStates, so if you add new fields to this struct, please
|
|
|
|
* inspect bdrv_append() to determine if the new fields need to be
|
|
|
|
* copied as well.
|
|
|
|
*/
|
2004-08-02 01:59:26 +04:00
|
|
|
struct BlockDriverState {
|
2006-08-06 17:35:09 +04:00
|
|
|
int64_t total_sectors; /* if we are reading a disk image, give its
|
|
|
|
size in sectors */
|
2004-08-02 01:59:26 +04:00
|
|
|
int read_only; /* if true, the media is read only */
|
2010-02-14 14:39:18 +03:00
|
|
|
int open_flags; /* flags used to open the file, re-used for re-open */
|
2004-08-02 01:59:26 +04:00
|
|
|
int encrypted; /* if true, the media is encrypted */
|
2009-03-06 02:01:01 +03:00
|
|
|
int valid_key; /* if true, a valid encryption key has been set */
|
2007-12-24 19:10:43 +03:00
|
|
|
int sg; /* if true, the device is a /dev/sg* */
|
2011-11-28 20:08:47 +04:00
|
|
|
int copy_on_read; /* if true, copy read backing sectors into image
|
|
|
|
note this is a reference count */
|
2004-08-02 01:59:26 +04:00
|
|
|
|
2006-08-19 15:45:59 +04:00
|
|
|
BlockDriver *drv; /* NULL means no media */
|
2004-08-02 01:59:26 +04:00
|
|
|
void *opaque;
|
|
|
|
|
2011-08-03 17:07:40 +04:00
|
|
|
void *dev; /* attached device model, if any */
|
|
|
|
/* TODO change to DeviceState when all users are qdevified */
|
2011-08-03 17:07:41 +04:00
|
|
|
const BlockDevOps *dev_ops;
|
|
|
|
void *dev_opaque;
|
2010-06-29 18:58:30 +04:00
|
|
|
|
2004-08-02 01:59:26 +04:00
|
|
|
char filename[1024];
|
|
|
|
char backing_file[1024]; /* if non zero, the image is a diff of
|
|
|
|
this file image */
|
2009-03-28 20:55:10 +03:00
|
|
|
char backing_format[16]; /* if non-zero and backing_file exists */
|
2004-08-02 01:59:26 +04:00
|
|
|
int is_temporary;
|
2006-08-19 15:45:59 +04:00
|
|
|
|
2004-08-02 01:59:26 +04:00
|
|
|
BlockDriverState *backing_hd;
|
2010-04-14 16:17:38 +04:00
|
|
|
BlockDriverState *file;
|
|
|
|
|
2012-08-23 13:20:36 +04:00
|
|
|
NotifierList close_notifiers;
|
|
|
|
|
2013-06-24 19:13:10 +04:00
|
|
|
/* Callback before write request is processed */
|
|
|
|
NotifierWithReturnList before_write_notifiers;
|
|
|
|
|
2012-01-18 18:40:42 +04:00
|
|
|
/* number of in-flight copy-on-read requests */
|
|
|
|
unsigned int copy_on_read_in_flight;
|
|
|
|
|
2013-09-02 16:14:39 +04:00
|
|
|
/* I/O throttling */
|
|
|
|
ThrottleState throttle_state;
|
|
|
|
CoQueue throttled_reqs[2];
|
2011-11-03 12:57:25 +04:00
|
|
|
bool io_limits_enabled;
|
|
|
|
|
2007-12-02 08:18:19 +03:00
|
|
|
/* I/O stats (display with "info blockstats"). */
|
2011-08-25 10:26:01 +04:00
|
|
|
uint64_t nr_bytes[BDRV_MAX_IOTYPE];
|
|
|
|
uint64_t nr_ops[BDRV_MAX_IOTYPE];
|
2011-08-25 10:26:10 +04:00
|
|
|
uint64_t total_time_ns[BDRV_MAX_IOTYPE];
|
2010-04-28 16:34:01 +04:00
|
|
|
uint64_t wr_highest_sector;
|
2007-12-02 08:18:19 +03:00
|
|
|
|
2013-10-24 14:06:56 +04:00
|
|
|
/* I/O Limits */
|
|
|
|
BlockLimits bl;
|
|
|
|
|
2009-03-03 20:37:16 +03:00
|
|
|
/* Whether the disk can expand beyond total_sectors */
|
|
|
|
int growable;
|
|
|
|
|
2013-08-22 11:24:14 +04:00
|
|
|
/* Whether produces zeros when read beyond eof */
|
|
|
|
bool zero_beyond_eof;
|
|
|
|
|
2009-04-23 00:20:00 +04:00
|
|
|
/* the memory alignment required for the buffers handled by this driver */
|
|
|
|
int buffer_alignment;
|
|
|
|
|
2009-09-04 21:01:15 +04:00
|
|
|
/* do we need to tell the quest if we have a volatile write cache? */
|
|
|
|
int enable_write_cache;
|
|
|
|
|
2004-08-02 01:59:26 +04:00
|
|
|
/* NOTE: the following infos are only hints for real hardware
|
|
|
|
drivers. They are not used by the block driver */
|
2012-09-28 19:22:54 +04:00
|
|
|
BlockdevOnError on_read_error, on_write_error;
|
2011-10-15 00:11:23 +04:00
|
|
|
bool iostatus_enabled;
|
2011-09-22 00:16:47 +04:00
|
|
|
BlockDeviceIoStatus iostatus;
|
2014-01-24 00:31:32 +04:00
|
|
|
|
|
|
|
/* the following member gives a name to every node on the bs graph. */
|
|
|
|
char node_name[32];
|
|
|
|
/* element of the list of named nodes building the graph */
|
|
|
|
QTAILQ_ENTRY(BlockDriverState) node_list;
|
|
|
|
/* Device name is the name associated with the "drive" the guest sees */
|
2004-08-02 01:59:26 +04:00
|
|
|
char device_name[32];
|
2014-01-24 00:31:32 +04:00
|
|
|
/* element of the list of "drives" the guest sees */
|
|
|
|
QTAILQ_ENTRY(BlockDriverState) device_list;
|
2013-11-13 14:29:43 +04:00
|
|
|
QLIST_HEAD(, BdrvDirtyBitmap) dirty_bitmaps;
|
2013-08-23 05:14:46 +04:00
|
|
|
int refcnt;
|
2011-01-26 17:12:34 +03:00
|
|
|
int in_use; /* users other than guest access, eg. block migration */
|
2011-11-17 17:40:27 +04:00
|
|
|
|
|
|
|
QLIST_HEAD(, BdrvTrackedRequest) tracked_requests;
|
2012-01-18 18:40:43 +04:00
|
|
|
|
|
|
|
/* long-running background operation */
|
|
|
|
BlockJob *job;
|
2012-09-20 23:13:19 +04:00
|
|
|
|
2013-03-15 13:35:02 +04:00
|
|
|
QDict *options;
|
2004-08-02 01:59:26 +04:00
|
|
|
};
|
|
|
|
|
2012-05-28 11:27:54 +04:00
|
|
|
int get_tmp_filename(char *filename, int size);
|
2005-12-18 21:28:15 +03:00
|
|
|
|
2011-11-03 12:57:25 +04:00
|
|
|
void bdrv_set_io_limits(BlockDriverState *bs,
|
2013-09-02 16:14:39 +04:00
|
|
|
ThrottleConfig *cfg);
|
|
|
|
|
2011-11-03 12:57:25 +04:00
|
|
|
|
2013-06-24 19:13:10 +04:00
|
|
|
/**
|
|
|
|
* bdrv_add_before_write_notifier:
|
|
|
|
*
|
|
|
|
* Register a callback that is invoked before write requests are processed but
|
|
|
|
* after any throttling or waiting for overlapping requests.
|
|
|
|
*/
|
|
|
|
void bdrv_add_before_write_notifier(BlockDriverState *bs,
|
|
|
|
NotifierWithReturn *notifier);
|
|
|
|
|
2013-03-07 16:41:48 +04:00
|
|
|
/**
|
|
|
|
* bdrv_get_aio_context:
|
|
|
|
*
|
|
|
|
* Returns: the currently bound #AioContext
|
|
|
|
*/
|
|
|
|
AioContext *bdrv_get_aio_context(BlockDriverState *bs);
|
|
|
|
|
2009-06-15 16:04:22 +04:00
|
|
|
#ifdef _WIN32
|
|
|
|
int is_windows_drive(const char *filename);
|
|
|
|
#endif
|
block: introduce block job error
The following behaviors are possible:
'report': The behavior is the same as in 1.1. An I/O error,
respectively during a read or a write, will complete the job immediately
with an error code.
'ignore': An I/O error, respectively during a read or a write, will be
ignored. For streaming, the job will complete with an error and the
backing file will be left in place. For mirroring, the sector will be
marked again as dirty and re-examined later.
'stop': The job will be paused and the job iostatus will be set to
failed or nospace, while the VM will keep running. This can only be
specified if the block device has rerror=stop and werror=stop or enospc.
'enospc': Behaves as 'stop' for ENOSPC errors, 'report' for others.
In all cases, even for 'report', the I/O error is reported as a QMP
event BLOCK_JOB_ERROR, with the same arguments as BLOCK_IO_ERROR.
It is possible that while stopping the VM a BLOCK_IO_ERROR event will be
reported and will clobber the event from BLOCK_JOB_ERROR, or vice versa.
This is not really avoidable since stopping the VM completes all pending
I/O requests. In fact, it is already possible now that a series of
BLOCK_IO_ERROR events are reported with rerror=stop, because vm_stop
calls bdrv_drain_all and this can generate further errors.
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2012-09-28 19:22:58 +04:00
|
|
|
void bdrv_emit_qmp_error_event(const BlockDriverState *bdrv,
|
|
|
|
enum MonitorEvent ev,
|
|
|
|
BlockErrorAction action, bool is_read);
|
2009-06-15 16:04:22 +04:00
|
|
|
|
2012-03-30 15:17:13 +04:00
|
|
|
/**
|
|
|
|
* stream_start:
|
|
|
|
* @bs: Block device to operate on.
|
|
|
|
* @base: Block device that will become the new base, or %NULL to
|
|
|
|
* flatten the whole backing file chain onto @bs.
|
|
|
|
* @base_id: The file name that will be written to @bs as the new
|
|
|
|
* backing file if the job completes. Ignored if @base is %NULL.
|
2012-04-25 19:51:03 +04:00
|
|
|
* @speed: The maximum speed, in bytes per second, or 0 for unlimited.
|
2012-09-28 19:22:59 +04:00
|
|
|
* @on_error: The action to take upon error.
|
2012-03-30 15:17:13 +04:00
|
|
|
* @cb: Completion function for the job.
|
|
|
|
* @opaque: Opaque pointer value passed to @cb.
|
2012-04-25 19:51:00 +04:00
|
|
|
* @errp: Error object.
|
2012-03-30 15:17:13 +04:00
|
|
|
*
|
|
|
|
* Start a streaming operation on @bs. Clusters that are unallocated
|
|
|
|
* in @bs, but allocated in any image between @base and @bs (both
|
|
|
|
* exclusive) will be written to @bs. At the end of a successful
|
|
|
|
* streaming job, the backing file of @bs will be changed to
|
|
|
|
* @base_id in the written image and to @base in the live BlockDriverState.
|
|
|
|
*/
|
2012-04-25 19:51:00 +04:00
|
|
|
void stream_start(BlockDriverState *bs, BlockDriverState *base,
|
2012-09-28 19:22:59 +04:00
|
|
|
const char *base_id, int64_t speed, BlockdevOnError on_error,
|
2012-04-25 19:51:03 +04:00
|
|
|
BlockDriverCompletionFunc *cb,
|
2012-04-25 19:51:00 +04:00
|
|
|
void *opaque, Error **errp);
|
2012-01-18 18:40:44 +04:00
|
|
|
|
2012-09-27 21:29:13 +04:00
|
|
|
/**
|
|
|
|
* commit_start:
|
2013-12-16 10:45:30 +04:00
|
|
|
* @bs: Active block device.
|
|
|
|
* @top: Top block device to be committed.
|
|
|
|
* @base: Block device that will be written into, and become the new top.
|
2012-09-27 21:29:13 +04:00
|
|
|
* @speed: The maximum speed, in bytes per second, or 0 for unlimited.
|
|
|
|
* @on_error: The action to take upon error.
|
|
|
|
* @cb: Completion function for the job.
|
|
|
|
* @opaque: Opaque pointer value passed to @cb.
|
|
|
|
* @errp: Error object.
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
void commit_start(BlockDriverState *bs, BlockDriverState *base,
|
|
|
|
BlockDriverState *top, int64_t speed,
|
2012-09-28 19:22:55 +04:00
|
|
|
BlockdevOnError on_error, BlockDriverCompletionFunc *cb,
|
2012-09-27 21:29:13 +04:00
|
|
|
void *opaque, Error **errp);
|
2013-12-16 10:45:30 +04:00
|
|
|
/**
|
|
|
|
* commit_active_start:
|
|
|
|
* @bs: Active block device to be committed.
|
|
|
|
* @base: Block device that will be written into, and become the new top.
|
|
|
|
* @speed: The maximum speed, in bytes per second, or 0 for unlimited.
|
|
|
|
* @on_error: The action to take upon error.
|
|
|
|
* @cb: Completion function for the job.
|
|
|
|
* @opaque: Opaque pointer value passed to @cb.
|
|
|
|
* @errp: Error object.
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
void commit_active_start(BlockDriverState *bs, BlockDriverState *base,
|
|
|
|
int64_t speed,
|
|
|
|
BlockdevOnError on_error,
|
|
|
|
BlockDriverCompletionFunc *cb,
|
|
|
|
void *opaque, Error **errp);
|
2012-10-18 18:49:23 +04:00
|
|
|
/*
|
|
|
|
* mirror_start:
|
|
|
|
* @bs: Block device to operate on.
|
|
|
|
* @target: Block device to write to.
|
|
|
|
* @speed: The maximum speed, in bytes per second, or 0 for unlimited.
|
2013-01-21 20:09:46 +04:00
|
|
|
* @granularity: The chosen granularity for the dirty bitmap.
|
2013-01-22 12:03:13 +04:00
|
|
|
* @buf_size: The amount of data that can be in flight at one time.
|
2012-10-18 18:49:23 +04:00
|
|
|
* @mode: Whether to collapse all images in the chain to the target.
|
2012-10-18 18:49:28 +04:00
|
|
|
* @on_source_error: The action to take upon error reading from the source.
|
|
|
|
* @on_target_error: The action to take upon error writing to the target.
|
2012-10-18 18:49:23 +04:00
|
|
|
* @cb: Completion function for the job.
|
|
|
|
* @opaque: Opaque pointer value passed to @cb.
|
|
|
|
* @errp: Error object.
|
|
|
|
*
|
|
|
|
* Start a mirroring operation on @bs. Clusters that are allocated
|
|
|
|
* in @bs will be written to @bs until the job is cancelled or
|
|
|
|
* manually completed. At the end of a successful mirroring job,
|
|
|
|
* @bs will be switched to read from @target.
|
|
|
|
*/
|
|
|
|
void mirror_start(BlockDriverState *bs, BlockDriverState *target,
|
2013-01-22 12:03:13 +04:00
|
|
|
int64_t speed, int64_t granularity, int64_t buf_size,
|
|
|
|
MirrorSyncMode mode, BlockdevOnError on_source_error,
|
2012-10-18 18:49:28 +04:00
|
|
|
BlockdevOnError on_target_error,
|
2012-10-18 18:49:23 +04:00
|
|
|
BlockDriverCompletionFunc *cb,
|
|
|
|
void *opaque, Error **errp);
|
|
|
|
|
2013-06-24 19:13:11 +04:00
|
|
|
/*
|
|
|
|
* backup_start:
|
|
|
|
* @bs: Block device to operate on.
|
|
|
|
* @target: Block device to write to.
|
|
|
|
* @speed: The maximum speed, in bytes per second, or 0 for unlimited.
|
2013-07-26 22:39:04 +04:00
|
|
|
* @sync_mode: What parts of the disk image should be copied to the destination.
|
2013-06-24 19:13:11 +04:00
|
|
|
* @on_source_error: The action to take upon error reading from the source.
|
|
|
|
* @on_target_error: The action to take upon error writing to the target.
|
|
|
|
* @cb: Completion function for the job.
|
|
|
|
* @opaque: Opaque pointer value passed to @cb.
|
|
|
|
*
|
|
|
|
* Start a backup operation on @bs. Clusters in @bs are written to @target
|
|
|
|
* until the job is cancelled or manually completed.
|
|
|
|
*/
|
|
|
|
void backup_start(BlockDriverState *bs, BlockDriverState *target,
|
2013-07-26 22:39:04 +04:00
|
|
|
int64_t speed, MirrorSyncMode sync_mode,
|
|
|
|
BlockdevOnError on_source_error,
|
2013-06-24 19:13:11 +04:00
|
|
|
BlockdevOnError on_target_error,
|
|
|
|
BlockDriverCompletionFunc *cb, void *opaque,
|
|
|
|
Error **errp);
|
|
|
|
|
2004-08-02 01:59:26 +04:00
|
|
|
#endif /* BLOCK_INT_H */
|