2007-11-11 05:51:17 +03:00
|
|
|
#ifndef BLOCK_H
|
|
|
|
#define BLOCK_H
|
|
|
|
|
2012-12-17 21:19:44 +04:00
|
|
|
#include "block/aio.h"
|
2018-02-01 14:18:39 +03:00
|
|
|
#include "qapi-types.h"
|
2016-03-09 12:52:44 +03:00
|
|
|
#include "qemu/iov.h"
|
2015-09-01 16:48:02 +03:00
|
|
|
#include "qemu/coroutine.h"
|
2014-09-05 17:46:18 +04:00
|
|
|
#include "block/accounting.h"
|
2016-03-08 07:44:55 +03:00
|
|
|
#include "block/dirty-bitmap.h"
|
2016-10-27 19:07:00 +03:00
|
|
|
#include "block/blockjob.h"
|
2016-03-08 07:44:53 +03:00
|
|
|
#include "qemu/hbitmap.h"
|
2008-09-22 23:17:18 +04:00
|
|
|
|
2007-11-11 05:51:17 +03:00
|
|
|
/* block.c */
|
|
|
|
typedef struct BlockDriver BlockDriver;
|
2015-06-15 14:24:19 +03:00
|
|
|
typedef struct BdrvChild BdrvChild;
|
2015-04-08 14:43:47 +03:00
|
|
|
typedef struct BdrvChildRole BdrvChildRole;
|
2007-11-11 05:51:17 +03:00
|
|
|
|
|
|
|
typedef struct BlockDriverInfo {
|
|
|
|
/* in bytes, 0 if irrelevant */
|
|
|
|
int cluster_size;
|
|
|
|
/* offset at which the VM state can be saved (0 if not possible) */
|
|
|
|
int64_t vm_state_offset;
|
2012-03-15 16:13:33 +04:00
|
|
|
bool is_dirty;
|
2013-10-24 14:06:53 +04:00
|
|
|
/*
|
|
|
|
* True if unallocated blocks read back as zeroes. This is equivalent
|
2015-08-26 14:17:13 +03:00
|
|
|
* to the LBPRZ flag in the SCSI logical block provisioning page.
|
2013-10-24 14:06:53 +04:00
|
|
|
*/
|
|
|
|
bool unallocated_blocks_are_zero;
|
2014-05-06 17:08:43 +04:00
|
|
|
/*
|
|
|
|
* True if this block driver only supports compressed writes
|
|
|
|
*/
|
|
|
|
bool needs_compressed_writes;
|
2007-11-11 05:51:17 +03:00
|
|
|
} BlockDriverInfo;
|
|
|
|
|
2012-03-15 16:13:31 +04:00
|
|
|
typedef struct BlockFragInfo {
|
|
|
|
uint64_t allocated_clusters;
|
|
|
|
uint64_t total_clusters;
|
|
|
|
uint64_t fragmented_clusters;
|
2013-02-07 20:15:04 +04:00
|
|
|
uint64_t compressed_clusters;
|
2012-03-15 16:13:31 +04:00
|
|
|
} BlockFragInfo;
|
|
|
|
|
2013-10-24 14:06:50 +04:00
|
|
|
typedef enum {
|
2015-09-08 06:28:32 +03:00
|
|
|
BDRV_REQ_COPY_ON_READ = 0x1,
|
|
|
|
BDRV_REQ_ZERO_WRITE = 0x2,
|
2013-10-24 14:06:52 +04:00
|
|
|
/* The BDRV_REQ_MAY_UNMAP flag is used to indicate that the block driver
|
|
|
|
* is allowed to optimize a write zeroes request by unmapping (discarding)
|
|
|
|
* blocks if it is guaranteed that the result will read back as
|
|
|
|
* zeroes. The flag is only passed to the driver if the block device is
|
|
|
|
* opened with BDRV_O_UNMAP.
|
|
|
|
*/
|
2015-09-08 06:28:32 +03:00
|
|
|
BDRV_REQ_MAY_UNMAP = 0x4,
|
2015-12-01 12:36:28 +03:00
|
|
|
BDRV_REQ_NO_SERIALISING = 0x8,
|
2016-03-04 16:28:01 +03:00
|
|
|
BDRV_REQ_FUA = 0x10,
|
2016-07-22 11:17:42 +03:00
|
|
|
BDRV_REQ_WRITE_COMPRESSED = 0x20,
|
2016-06-13 21:56:35 +03:00
|
|
|
|
|
|
|
/* Mask of valid flags */
|
2016-07-22 11:17:42 +03:00
|
|
|
BDRV_REQ_MASK = 0x3f,
|
2013-10-24 14:06:50 +04:00
|
|
|
} BdrvRequestFlags;
|
|
|
|
|
2015-02-16 14:47:54 +03:00
|
|
|
typedef struct BlockSizes {
|
|
|
|
uint32_t phys;
|
|
|
|
uint32_t log;
|
|
|
|
} BlockSizes;
|
|
|
|
|
|
|
|
typedef struct HDGeometry {
|
|
|
|
uint32_t heads;
|
|
|
|
uint32_t sectors;
|
|
|
|
uint32_t cylinders;
|
|
|
|
} HDGeometry;
|
|
|
|
|
2007-11-11 05:51:17 +03:00
|
|
|
#define BDRV_O_RDWR 0x0002
|
2017-02-17 17:07:38 +03:00
|
|
|
#define BDRV_O_RESIZE 0x0004 /* request permission for resizing the node */
|
2007-11-11 05:51:17 +03:00
|
|
|
#define BDRV_O_SNAPSHOT 0x0008 /* open the file read only and save writes in a snapshot */
|
2014-04-11 21:16:36 +04:00
|
|
|
#define BDRV_O_TEMPORARY 0x0010 /* delete the file after use */
|
2008-10-14 18:42:54 +04:00
|
|
|
#define BDRV_O_NOCACHE 0x0020 /* do not use the host page cache */
|
2009-08-20 18:58:35 +04:00
|
|
|
#define BDRV_O_NATIVE_AIO 0x0080 /* use native AIO instead of the thread pool */
|
2010-01-12 14:55:16 +03:00
|
|
|
#define BDRV_O_NO_BACKING 0x0100 /* don't open the backing file */
|
2010-05-26 19:51:49 +04:00
|
|
|
#define BDRV_O_NO_FLUSH 0x0200 /* disable flushing on this disk */
|
2011-11-28 20:08:47 +04:00
|
|
|
#define BDRV_O_COPY_ON_READ 0x0400 /* copy read backing sectors into image */
|
2016-01-13 17:56:06 +03:00
|
|
|
#define BDRV_O_INACTIVE 0x0800 /* consistency hint for migration handoff */
|
2012-08-09 16:05:56 +04:00
|
|
|
#define BDRV_O_CHECK 0x1000 /* open solely for consistency check */
|
block: correctly set the keep_read_only flag
I believe the bs->keep_read_only flag is supposed to reflect
the initial open state of the device. If the device is initially
opened R/O, then commit operations, or reopen operations changing
to R/W, are prohibited.
Currently, the keep_read_only flag is only accurate for the active
layer, and its backing file. Subsequent images end up always having
the keep_read_only flag set.
For instance, what happens now:
[ base ] kro = 1, ro = 1
|
v
[ snap-1 ] kro = 1, ro = 1
|
v
[ snap-2 ] kro = 0, ro = 1
|
v
[ active ] kro = 0, ro = 0
What we want:
[ base ] kro = 0, ro = 1
|
v
[ snap-1 ] kro = 0, ro = 1
|
v
[ snap-2 ] kro = 0, ro = 1
|
v
[ active ] kro = 0, ro = 0
Signed-off-by: Jeff Cody <jcody@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2012-09-20 23:13:17 +04:00
|
|
|
#define BDRV_O_ALLOW_RDWR 0x2000 /* allow reopen to change from r/o to r/w */
|
2013-02-08 17:06:11 +04:00
|
|
|
#define BDRV_O_UNMAP 0x4000 /* execute guest UNMAP/TRIM operations */
|
2014-02-18 21:33:07 +04:00
|
|
|
#define BDRV_O_PROTOCOL 0x8000 /* if no block driver is explicitly given:
|
|
|
|
select an appropriate protocol driver,
|
|
|
|
ignoring the format layer */
|
2016-03-21 17:11:42 +03:00
|
|
|
#define BDRV_O_NO_IO 0x10000 /* don't initialize for I/O */
|
2008-10-14 18:42:54 +04:00
|
|
|
|
2016-03-18 19:46:45 +03:00
|
|
|
#define BDRV_O_CACHE_MASK (BDRV_O_NOCACHE | BDRV_O_NO_FLUSH)
|
2007-11-11 05:51:17 +03:00
|
|
|
|
2015-04-07 17:55:00 +03:00
|
|
|
|
|
|
|
/* Option names of options parsed by the block layer */
|
|
|
|
|
|
|
|
#define BDRV_OPT_CACHE_WB "cache.writeback"
|
|
|
|
#define BDRV_OPT_CACHE_DIRECT "cache.direct"
|
|
|
|
#define BDRV_OPT_CACHE_NO_FLUSH "cache.no-flush"
|
2016-09-15 17:53:02 +03:00
|
|
|
#define BDRV_OPT_READ_ONLY "read-only"
|
2016-09-12 19:03:18 +03:00
|
|
|
#define BDRV_OPT_DISCARD "discard"
|
2017-05-02 19:35:37 +03:00
|
|
|
#define BDRV_OPT_FORCE_SHARE "force-share"
|
2015-04-07 17:55:00 +03:00
|
|
|
|
|
|
|
|
2009-11-30 20:21:19 +03:00
|
|
|
#define BDRV_SECTOR_BITS 9
|
2010-05-27 17:46:55 +04:00
|
|
|
#define BDRV_SECTOR_SIZE (1ULL << BDRV_SECTOR_BITS)
|
2010-05-01 10:23:32 +04:00
|
|
|
#define BDRV_SECTOR_MASK ~(BDRV_SECTOR_SIZE - 1)
|
2009-11-30 20:21:19 +03:00
|
|
|
|
2015-02-06 13:54:11 +03:00
|
|
|
#define BDRV_REQUEST_MAX_SECTORS MIN(SIZE_MAX >> BDRV_SECTOR_BITS, \
|
|
|
|
INT_MAX >> BDRV_SECTOR_BITS)
|
2017-01-20 19:25:26 +03:00
|
|
|
#define BDRV_REQUEST_MAX_BYTES (BDRV_REQUEST_MAX_SECTORS << BDRV_SECTOR_BITS)
|
2015-02-06 13:54:11 +03:00
|
|
|
|
2014-11-10 12:10:38 +03:00
|
|
|
/*
|
block: Convert bdrv_get_block_status() to bytes
We are gradually moving away from sector-based interfaces, towards
byte-based. In the common case, allocation is unlikely to ever use
values that are not naturally sector-aligned, but it is possible
that byte-based values will let us be more precise about allocation
at the end of an unaligned file that can do byte-based access.
Changing the name of the function from bdrv_get_block_status() to
bdrv_block_status() ensures that the compiler enforces that all
callers are updated. For now, the io.c layer still assert()s that
all callers are sector-aligned, but that can be relaxed when a later
patch implements byte-based block status in the drivers.
There was an inherent limitation in returning the offset via the
return value: we only have room for BDRV_BLOCK_OFFSET_MASK bits, which
means an offset can only be mapped for sector-aligned queries (or,
if we declare that non-aligned input is at the same relative position
modulo 512 of the answer), so the new interface also changes things to
return the offset via output through a parameter by reference rather
than mashed into the return value. We'll have some glue code that
munges between the two styles until we finish converting all uses.
For the most part this patch is just the addition of scaling at the
callers followed by inverse scaling at bdrv_block_status(), coupled
with the tweak in calling convention. But some code, particularly
bdrv_is_allocated(), gets a lot simpler because it no longer has to
mess with sectors.
For ease of review, bdrv_get_block_status_above() will be tackled
separately.
Signed-off-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-10-12 06:47:03 +03:00
|
|
|
* Allocation status flags for bdrv_block_status() and friends.
|
2017-05-07 03:05:43 +03:00
|
|
|
*
|
|
|
|
* Public flags:
|
|
|
|
* BDRV_BLOCK_DATA: allocation for data at offset is tied to this layer
|
|
|
|
* BDRV_BLOCK_ZERO: offset reads as zero
|
|
|
|
* BDRV_BLOCK_OFFSET_VALID: an associated offset exists for accessing raw data
|
2014-05-06 17:25:36 +04:00
|
|
|
* BDRV_BLOCK_ALLOCATED: the content of the block is determined by this
|
2017-05-07 03:05:43 +03:00
|
|
|
* layer (short for DATA || ZERO), set by block layer
|
2017-05-05 05:14:59 +03:00
|
|
|
* BDRV_BLOCK_EOF: the returned pnum covers through end of file for this layer
|
2013-09-04 21:00:29 +04:00
|
|
|
*
|
2017-05-07 03:05:43 +03:00
|
|
|
* Internal flag:
|
2017-06-05 23:38:44 +03:00
|
|
|
* BDRV_BLOCK_RAW: for use by passthrough drivers, such as raw, to request
|
|
|
|
* that the block layer recompute the answer from the returned
|
|
|
|
* BDS; must be accompanied by just BDRV_BLOCK_OFFSET_VALID.
|
2013-09-04 21:00:29 +04:00
|
|
|
*
|
block: Convert bdrv_get_block_status() to bytes
We are gradually moving away from sector-based interfaces, towards
byte-based. In the common case, allocation is unlikely to ever use
values that are not naturally sector-aligned, but it is possible
that byte-based values will let us be more precise about allocation
at the end of an unaligned file that can do byte-based access.
Changing the name of the function from bdrv_get_block_status() to
bdrv_block_status() ensures that the compiler enforces that all
callers are updated. For now, the io.c layer still assert()s that
all callers are sector-aligned, but that can be relaxed when a later
patch implements byte-based block status in the drivers.
There was an inherent limitation in returning the offset via the
return value: we only have room for BDRV_BLOCK_OFFSET_MASK bits, which
means an offset can only be mapped for sector-aligned queries (or,
if we declare that non-aligned input is at the same relative position
modulo 512 of the answer), so the new interface also changes things to
return the offset via output through a parameter by reference rather
than mashed into the return value. We'll have some glue code that
munges between the two styles until we finish converting all uses.
For the most part this patch is just the addition of scaling at the
callers followed by inverse scaling at bdrv_block_status(), coupled
with the tweak in calling convention. But some code, particularly
bdrv_is_allocated(), gets a lot simpler because it no longer has to
mess with sectors.
For ease of review, bdrv_get_block_status_above() will be tackled
separately.
Signed-off-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-10-12 06:47:03 +03:00
|
|
|
* If BDRV_BLOCK_OFFSET_VALID is set, bits 9-62 (BDRV_BLOCK_OFFSET_MASK) of
|
|
|
|
* the return value (old interface) or the entire map parameter (new
|
|
|
|
* interface) represent the offset in the returned BDS that is allocated for
|
|
|
|
* the corresponding raw data. However, whether that offset actually
|
|
|
|
* contains data also depends on BDRV_BLOCK_DATA, as follows:
|
2013-09-04 21:00:29 +04:00
|
|
|
*
|
|
|
|
* DATA ZERO OFFSET_VALID
|
2017-05-07 03:05:43 +03:00
|
|
|
* t t t sectors read as zero, returned file is zero at offset
|
|
|
|
* t f t sectors read as valid from file at offset
|
|
|
|
* f t t sectors preallocated, read as zero, returned file not
|
2013-09-04 21:00:29 +04:00
|
|
|
* necessarily zero at offset
|
|
|
|
* f f t sectors preallocated but read from backing_hd,
|
2017-05-07 03:05:43 +03:00
|
|
|
* returned file contains garbage at offset
|
2013-09-04 21:00:29 +04:00
|
|
|
* t t f sectors preallocated, read as zero, unknown offset
|
|
|
|
* t f f sectors read from unknown file or offset
|
|
|
|
* f t f not allocated or unknown offset, read as zero
|
|
|
|
* f f f not allocated or unknown offset, read from backing_hd
|
|
|
|
*/
|
2014-05-06 17:25:36 +04:00
|
|
|
#define BDRV_BLOCK_DATA 0x01
|
|
|
|
#define BDRV_BLOCK_ZERO 0x02
|
|
|
|
#define BDRV_BLOCK_OFFSET_VALID 0x04
|
|
|
|
#define BDRV_BLOCK_RAW 0x08
|
|
|
|
#define BDRV_BLOCK_ALLOCATED 0x10
|
2017-05-05 05:14:59 +03:00
|
|
|
#define BDRV_BLOCK_EOF 0x20
|
2013-09-04 21:00:29 +04:00
|
|
|
#define BDRV_BLOCK_OFFSET_MASK BDRV_SECTOR_MASK
|
|
|
|
|
2012-09-20 23:13:19 +04:00
|
|
|
typedef QSIMPLEQ_HEAD(BlockReopenQueue, BlockReopenQueueEntry) BlockReopenQueue;
|
|
|
|
|
|
|
|
typedef struct BDRVReopenState {
|
|
|
|
BlockDriverState *bs;
|
|
|
|
int flags;
|
2017-07-03 18:07:35 +03:00
|
|
|
uint64_t perm, shared_perm;
|
2015-04-10 18:50:50 +03:00
|
|
|
QDict *options;
|
2015-05-08 17:15:03 +03:00
|
|
|
QDict *explicit_options;
|
2012-09-20 23:13:19 +04:00
|
|
|
void *opaque;
|
|
|
|
} BDRVReopenState;
|
|
|
|
|
2014-05-23 17:29:41 +04:00
|
|
|
/*
|
|
|
|
* Block operation types
|
|
|
|
*/
|
|
|
|
typedef enum BlockOpType {
|
|
|
|
BLOCK_OP_TYPE_BACKUP_SOURCE,
|
|
|
|
BLOCK_OP_TYPE_BACKUP_TARGET,
|
|
|
|
BLOCK_OP_TYPE_CHANGE,
|
2014-09-11 09:14:00 +04:00
|
|
|
BLOCK_OP_TYPE_COMMIT_SOURCE,
|
|
|
|
BLOCK_OP_TYPE_COMMIT_TARGET,
|
2014-05-23 17:29:41 +04:00
|
|
|
BLOCK_OP_TYPE_DATAPLANE,
|
|
|
|
BLOCK_OP_TYPE_DRIVE_DEL,
|
|
|
|
BLOCK_OP_TYPE_EJECT,
|
|
|
|
BLOCK_OP_TYPE_EXTERNAL_SNAPSHOT,
|
|
|
|
BLOCK_OP_TYPE_INTERNAL_SNAPSHOT,
|
|
|
|
BLOCK_OP_TYPE_INTERNAL_SNAPSHOT_DELETE,
|
2015-12-24 07:45:02 +03:00
|
|
|
BLOCK_OP_TYPE_MIRROR_SOURCE,
|
2015-12-24 07:45:04 +03:00
|
|
|
BLOCK_OP_TYPE_MIRROR_TARGET,
|
2014-05-23 17:29:41 +04:00
|
|
|
BLOCK_OP_TYPE_RESIZE,
|
|
|
|
BLOCK_OP_TYPE_STREAM,
|
2014-06-27 20:25:25 +04:00
|
|
|
BLOCK_OP_TYPE_REPLACE,
|
2014-05-23 17:29:41 +04:00
|
|
|
BLOCK_OP_TYPE_MAX,
|
|
|
|
} BlockOpType;
|
2012-09-20 23:13:19 +04:00
|
|
|
|
2016-12-20 18:52:41 +03:00
|
|
|
/* Block node permission constants */
|
|
|
|
enum {
|
|
|
|
/**
|
|
|
|
* A user that has the "permission" of consistent reads is guaranteed that
|
|
|
|
* their view of the contents of the block device is complete and
|
|
|
|
* self-consistent, representing the contents of a disk at a specific
|
|
|
|
* point.
|
|
|
|
*
|
|
|
|
* For most block devices (including their backing files) this is true, but
|
|
|
|
* the property cannot be maintained in a few situations like for
|
|
|
|
* intermediate nodes of a commit block job.
|
|
|
|
*/
|
|
|
|
BLK_PERM_CONSISTENT_READ = 0x01,
|
|
|
|
|
|
|
|
/** This permission is required to change the visible disk contents. */
|
|
|
|
BLK_PERM_WRITE = 0x02,
|
|
|
|
|
|
|
|
/**
|
|
|
|
* This permission (which is weaker than BLK_PERM_WRITE) is both enough and
|
|
|
|
* required for writes to the block node when the caller promises that
|
|
|
|
* the visible disk content doesn't change.
|
|
|
|
*/
|
|
|
|
BLK_PERM_WRITE_UNCHANGED = 0x04,
|
|
|
|
|
|
|
|
/** This permission is required to change the size of a block node. */
|
|
|
|
BLK_PERM_RESIZE = 0x08,
|
|
|
|
|
|
|
|
/**
|
|
|
|
* This permission is required to change the node that this BdrvChild
|
|
|
|
* points to.
|
|
|
|
*/
|
|
|
|
BLK_PERM_GRAPH_MOD = 0x10,
|
|
|
|
|
|
|
|
BLK_PERM_ALL = 0x1f,
|
|
|
|
};
|
|
|
|
|
2017-05-02 19:35:36 +03:00
|
|
|
char *bdrv_perm_names(uint64_t perm);
|
|
|
|
|
2011-11-03 12:57:25 +04:00
|
|
|
/* disk I/O throttling */
|
2007-11-11 05:51:17 +03:00
|
|
|
void bdrv_init(void);
|
2009-10-27 20:41:44 +03:00
|
|
|
void bdrv_init_with_whitelist(void);
|
2016-03-21 17:11:48 +03:00
|
|
|
bool bdrv_uses_whitelist(void);
|
2013-07-10 17:47:39 +04:00
|
|
|
BlockDriver *bdrv_find_protocol(const char *filename,
|
2015-02-05 21:58:12 +03:00
|
|
|
bool allow_protocol_prefix,
|
|
|
|
Error **errp);
|
2007-11-11 05:51:17 +03:00
|
|
|
BlockDriver *bdrv_find_format(const char *format_name);
|
2009-05-18 18:42:10 +04:00
|
|
|
int bdrv_create(BlockDriver *drv, const char* filename,
|
2014-06-05 13:21:11 +04:00
|
|
|
QemuOpts *opts, Error **errp);
|
|
|
|
int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp);
|
2014-10-07 15:59:03 +04:00
|
|
|
BlockDriverState *bdrv_new(void);
|
2017-02-20 14:46:42 +03:00
|
|
|
void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top,
|
|
|
|
Error **errp);
|
2017-03-06 18:20:51 +03:00
|
|
|
void bdrv_replace_node(BlockDriverState *from, BlockDriverState *to,
|
|
|
|
Error **errp);
|
2015-09-15 12:58:23 +03:00
|
|
|
|
2016-03-14 13:40:23 +03:00
|
|
|
int bdrv_parse_cache_mode(const char *mode, int *flags, bool *writethrough);
|
2013-02-08 17:06:11 +04:00
|
|
|
int bdrv_parse_discard_flags(const char *mode, int *flags);
|
2015-06-15 14:24:19 +03:00
|
|
|
BdrvChild *bdrv_open_child(const char *filename,
|
|
|
|
QDict *options, const char *bdref_key,
|
|
|
|
BlockDriverState* parent,
|
|
|
|
const BdrvChildRole *child_role,
|
|
|
|
bool allow_none, Error **errp);
|
2017-02-17 22:42:32 +03:00
|
|
|
void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd,
|
|
|
|
Error **errp);
|
2015-01-16 20:23:41 +03:00
|
|
|
int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options,
|
|
|
|
const char *bdref_key, Error **errp);
|
2016-05-17 17:41:31 +03:00
|
|
|
BlockDriverState *bdrv_open(const char *filename, const char *reference,
|
|
|
|
QDict *options, int flags, Error **errp);
|
2017-01-18 19:16:41 +03:00
|
|
|
BlockDriverState *bdrv_new_open_driver(BlockDriver *drv, const char *node_name,
|
|
|
|
int flags, Error **errp);
|
2012-09-20 23:13:19 +04:00
|
|
|
BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
|
2015-04-10 18:50:50 +03:00
|
|
|
BlockDriverState *bs,
|
|
|
|
QDict *options, int flags);
|
2016-10-27 13:49:02 +03:00
|
|
|
int bdrv_reopen_multiple(AioContext *ctx, BlockReopenQueue *bs_queue, Error **errp);
|
2012-09-20 23:13:19 +04:00
|
|
|
int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp);
|
|
|
|
int bdrv_reopen_prepare(BDRVReopenState *reopen_state,
|
|
|
|
BlockReopenQueue *queue, Error **errp);
|
|
|
|
void bdrv_reopen_commit(BDRVReopenState *reopen_state);
|
|
|
|
void bdrv_reopen_abort(BDRVReopenState *reopen_state);
|
2016-05-30 17:48:35 +03:00
|
|
|
int bdrv_read(BdrvChild *child, int64_t sector_num,
|
2007-11-11 05:51:17 +03:00
|
|
|
uint8_t *buf, int nb_sectors);
|
2016-05-31 15:42:08 +03:00
|
|
|
int bdrv_write(BdrvChild *child, int64_t sector_num,
|
2007-11-11 05:51:17 +03:00
|
|
|
const uint8_t *buf, int nb_sectors);
|
2016-06-16 16:13:15 +03:00
|
|
|
int bdrv_pwrite_zeroes(BdrvChild *child, int64_t offset,
|
2017-06-09 13:18:08 +03:00
|
|
|
int bytes, BdrvRequestFlags flags);
|
2016-06-16 16:13:15 +03:00
|
|
|
int bdrv_make_zero(BdrvChild *child, BdrvRequestFlags flags);
|
2016-06-20 19:24:02 +03:00
|
|
|
int bdrv_pread(BdrvChild *child, int64_t offset, void *buf, int bytes);
|
|
|
|
int bdrv_preadv(BdrvChild *child, int64_t offset, QEMUIOVector *qiov);
|
2016-06-20 21:09:15 +03:00
|
|
|
int bdrv_pwrite(BdrvChild *child, int64_t offset, const void *buf, int bytes);
|
|
|
|
int bdrv_pwritev(BdrvChild *child, int64_t offset, QEMUIOVector *qiov);
|
|
|
|
int bdrv_pwrite_sync(BdrvChild *child, int64_t offset,
|
|
|
|
const void *buf, int count);
|
2016-05-24 18:21:22 +03:00
|
|
|
int coroutine_fn bdrv_co_readv(BdrvChild *child, int64_t sector_num,
|
|
|
|
int nb_sectors, QEMUIOVector *qiov);
|
2016-05-24 18:21:22 +03:00
|
|
|
int coroutine_fn bdrv_co_writev(BdrvChild *child, int64_t sector_num,
|
|
|
|
int nb_sectors, QEMUIOVector *qiov);
|
2012-02-07 17:27:25 +04:00
|
|
|
/*
|
|
|
|
* Efficiently zero a region of the disk image. Note that this is a regular
|
|
|
|
* I/O request like read or write and should have a reasonable size. This
|
|
|
|
* function is not suitable for zeroing the entire image in a single request
|
|
|
|
* because it may allocate memory for the entire region.
|
|
|
|
*/
|
2016-06-20 22:31:46 +03:00
|
|
|
int coroutine_fn bdrv_co_pwrite_zeroes(BdrvChild *child, int64_t offset,
|
2017-06-09 13:18:08 +03:00
|
|
|
int bytes, BdrvRequestFlags flags);
|
2012-01-18 18:40:51 +04:00
|
|
|
BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
|
|
|
|
const char *backing_file);
|
2014-07-18 22:24:56 +04:00
|
|
|
void bdrv_refresh_filename(BlockDriverState *bs);
|
2017-06-13 23:20:53 +03:00
|
|
|
int bdrv_truncate(BdrvChild *child, int64_t offset, PreallocMode prealloc,
|
|
|
|
Error **errp);
|
2014-06-26 15:23:17 +04:00
|
|
|
int64_t bdrv_nb_sectors(BlockDriverState *bs);
|
2007-11-11 05:51:17 +03:00
|
|
|
int64_t bdrv_getlength(BlockDriverState *bs);
|
2011-07-12 15:56:39 +04:00
|
|
|
int64_t bdrv_get_allocated_file_size(BlockDriverState *bs);
|
2017-07-05 15:57:30 +03:00
|
|
|
BlockMeasureInfo *bdrv_measure(BlockDriver *drv, QemuOpts *opts,
|
|
|
|
BlockDriverState *in_bs, Error **errp);
|
2007-12-17 04:35:20 +03:00
|
|
|
void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr);
|
2014-07-16 19:48:16 +04:00
|
|
|
void bdrv_refresh_limits(BlockDriverState *bs, Error **errp);
|
2007-11-11 05:51:17 +03:00
|
|
|
int bdrv_commit(BlockDriverState *bs);
|
2010-01-12 14:55:17 +03:00
|
|
|
int bdrv_change_backing_file(BlockDriverState *bs,
|
|
|
|
const char *backing_file, const char *backing_fmt);
|
2009-05-10 02:03:42 +04:00
|
|
|
void bdrv_register(BlockDriver *bdrv);
|
2017-06-27 21:36:18 +03:00
|
|
|
int bdrv_drop_intermediate(BlockDriverState *top, BlockDriverState *base,
|
block: extend block-commit to accept a string for the backing file
On some image chains, QEMU may not always be able to resolve the
filenames properly, when updating the backing file of an image
after a block commit.
For instance, certain relative pathnames may fail, or drives may
have been specified originally by file descriptor (e.g. /dev/fd/???),
or a relative protocol pathname may have been used.
In these instances, QEMU may lack the information to be able to make
the correct choice, but the user or management layer most likely does
have that knowledge.
With this extension to the block-commit api, the user is able to change
the backing file of the overlay image as part of the block-commit
operation.
This allows the change to be 'safe', in the sense that if the attempt
to write the overlay image metadata fails, then the block-commit
operation returns failure, without disrupting the guest.
If the commit top is the active layer, then specifying the backing
file string will be treated as an error (there is no overlay image
to modify in that case).
If a backing file string is not specified in the command, the backing
file string to use is determined in the same manner as it was
previously.
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Jeff Cody <jcody@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2014-06-25 23:40:10 +04:00
|
|
|
const char *backing_file_str);
|
2012-09-27 21:29:12 +04:00
|
|
|
BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
|
|
|
|
BlockDriverState *bs);
|
2012-09-27 21:29:15 +04:00
|
|
|
BlockDriverState *bdrv_find_base(BlockDriverState *bs);
|
2009-05-10 02:03:42 +04:00
|
|
|
|
2010-06-29 13:43:13 +04:00
|
|
|
|
|
|
|
typedef struct BdrvCheckResult {
|
|
|
|
int corruptions;
|
|
|
|
int leaks;
|
|
|
|
int check_errors;
|
2012-05-11 20:16:54 +04:00
|
|
|
int corruptions_fixed;
|
|
|
|
int leaks_fixed;
|
2013-01-28 15:59:46 +04:00
|
|
|
int64_t image_end_offset;
|
2012-03-15 16:13:31 +04:00
|
|
|
BlockFragInfo bfi;
|
2010-06-29 13:43:13 +04:00
|
|
|
} BdrvCheckResult;
|
|
|
|
|
2012-05-11 18:07:02 +04:00
|
|
|
typedef enum {
|
|
|
|
BDRV_FIX_LEAKS = 1,
|
|
|
|
BDRV_FIX_ERRORS = 2,
|
|
|
|
} BdrvCheckMode;
|
|
|
|
|
|
|
|
int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix);
|
2010-06-29 13:43:13 +04:00
|
|
|
|
2014-10-27 13:12:50 +03:00
|
|
|
/* The units of offset and total_work_size may be chosen arbitrarily by the
|
|
|
|
* block driver; total_work_size may change during the course of the amendment
|
|
|
|
* operation */
|
|
|
|
typedef void BlockDriverAmendStatusCB(BlockDriverState *bs, int64_t offset,
|
2015-07-27 18:51:32 +03:00
|
|
|
int64_t total_work_size, void *opaque);
|
2014-10-27 13:12:50 +03:00
|
|
|
int bdrv_amend_options(BlockDriverState *bs_new, QemuOpts *opts,
|
2015-07-27 18:51:32 +03:00
|
|
|
BlockDriverAmendStatusCB *status_cb, void *cb_opaque);
|
2013-09-03 12:09:50 +04:00
|
|
|
|
2013-10-02 16:33:48 +04:00
|
|
|
/* external snapshots */
|
2014-01-24 00:31:36 +04:00
|
|
|
bool bdrv_recurse_is_first_non_filter(BlockDriverState *bs,
|
|
|
|
BlockDriverState *candidate);
|
|
|
|
bool bdrv_is_first_non_filter(BlockDriverState *candidate);
|
2013-10-02 16:33:48 +04:00
|
|
|
|
2014-06-27 20:25:25 +04:00
|
|
|
/* check if a named node can be replaced when doing drive-mirror */
|
2015-07-17 05:12:22 +03:00
|
|
|
BlockDriverState *check_to_replace_node(BlockDriverState *parent_bs,
|
|
|
|
const char *node_name, Error **errp);
|
2014-06-27 20:25:25 +04:00
|
|
|
|
2007-11-11 05:51:17 +03:00
|
|
|
/* async block I/O */
|
2014-10-07 15:59:14 +04:00
|
|
|
void bdrv_aio_cancel(BlockAIOCB *acb);
|
|
|
|
void bdrv_aio_cancel_async(BlockAIOCB *acb);
|
2007-11-11 05:51:17 +03:00
|
|
|
|
2009-03-12 22:57:08 +03:00
|
|
|
/* sg packet commands */
|
2016-10-20 13:56:14 +03:00
|
|
|
int bdrv_co_ioctl(BlockDriverState *bs, int req, void *buf);
|
2009-03-12 22:57:08 +03:00
|
|
|
|
2011-11-15 01:09:45 +04:00
|
|
|
/* Invalidate any cached metadata used by image formats */
|
2014-03-12 18:59:16 +04:00
|
|
|
void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp);
|
|
|
|
void bdrv_invalidate_cache_all(Error **errp);
|
2015-12-22 16:07:08 +03:00
|
|
|
int bdrv_inactivate_all(void);
|
2011-11-15 01:09:45 +04:00
|
|
|
|
2007-11-11 05:51:17 +03:00
|
|
|
/* Ensure contents are flushed to disk. */
|
2010-10-21 18:43:43 +04:00
|
|
|
int bdrv_flush(BlockDriverState *bs);
|
2011-10-17 14:32:12 +04:00
|
|
|
int coroutine_fn bdrv_co_flush(BlockDriverState *bs);
|
2016-09-23 04:45:50 +03:00
|
|
|
int bdrv_flush_all(void);
|
2010-05-28 06:44:57 +04:00
|
|
|
void bdrv_close_all(void);
|
2014-10-21 15:03:55 +04:00
|
|
|
void bdrv_drain(BlockDriverState *bs);
|
2016-04-05 14:20:52 +03:00
|
|
|
void coroutine_fn bdrv_co_drain(BlockDriverState *bs);
|
2016-10-28 10:08:02 +03:00
|
|
|
void bdrv_drain_all_begin(void);
|
|
|
|
void bdrv_drain_all_end(void);
|
2011-11-30 16:23:43 +04:00
|
|
|
void bdrv_drain_all(void);
|
2008-10-06 17:55:43 +04:00
|
|
|
|
2016-10-27 13:48:55 +03:00
|
|
|
#define BDRV_POLL_WHILE(bs, cond) ({ \
|
|
|
|
bool waited_ = false; \
|
block: Drain BH in bdrv_drained_begin
During block job completion, nothing is preventing
block_job_defer_to_main_loop_bh from being called in a nested
aio_poll(), which is a trouble, such as in this code path:
qmp_block_commit
commit_active_start
bdrv_reopen
bdrv_reopen_multiple
bdrv_reopen_prepare
bdrv_flush
aio_poll
aio_bh_poll
aio_bh_call
block_job_defer_to_main_loop_bh
stream_complete
bdrv_reopen
block_job_defer_to_main_loop_bh is the last step of the stream job,
which should have been "paused" by the bdrv_drained_begin/end in
bdrv_reopen_multiple, but it is not done because it's in the form of a
main loop BH.
Similar to why block jobs should be paused between drained_begin and
drained_end, BHs they schedule must be excluded as well. To achieve
this, this patch forces draining the BH in BDRV_POLL_WHILE.
As a side effect this fixes a hang in block_job_detach_aio_context
during system_reset when a block job is ready:
#0 0x0000555555aa79f3 in bdrv_drain_recurse
#1 0x0000555555aa825d in bdrv_drained_begin
#2 0x0000555555aa8449 in bdrv_drain
#3 0x0000555555a9c356 in blk_drain
#4 0x0000555555aa3cfd in mirror_drain
#5 0x0000555555a66e11 in block_job_detach_aio_context
#6 0x0000555555a62f4d in bdrv_detach_aio_context
#7 0x0000555555a63116 in bdrv_set_aio_context
#8 0x0000555555a9d326 in blk_set_aio_context
#9 0x00005555557e38da in virtio_blk_data_plane_stop
#10 0x00005555559f9d5f in virtio_bus_stop_ioeventfd
#11 0x00005555559fa49b in virtio_bus_stop_ioeventfd
#12 0x00005555559f6a18 in virtio_pci_stop_ioeventfd
#13 0x00005555559f6a18 in virtio_pci_reset
#14 0x00005555559139a9 in qdev_reset_one
#15 0x0000555555916738 in qbus_walk_children
#16 0x0000555555913318 in qdev_walk_children
#17 0x0000555555916738 in qbus_walk_children
#18 0x00005555559168ca in qemu_devices_reset
#19 0x000055555581fcbb in pc_machine_reset
#20 0x00005555558a4d96 in qemu_system_reset
#21 0x000055555577157a in main_loop_should_exit
#22 0x000055555577157a in main_loop
#23 0x000055555577157a in main
The rationale is that the loop in block_job_detach_aio_context cannot
make any progress in pausing/completing the job, because bs->in_flight
is 0, so bdrv_drain doesn't process the block_job_defer_to_main_loop
BH. With this patch, it does.
Reported-by: Jeff Cody <jcody@redhat.com>
Signed-off-by: Fam Zheng <famz@redhat.com>
Message-Id: <20170418143044.12187-3-famz@redhat.com>
Reviewed-by: Jeff Cody <jcody@redhat.com>
Tested-by: Jeff Cody <jcody@redhat.com>
Signed-off-by: Fam Zheng <famz@redhat.com>
2017-04-18 17:30:44 +03:00
|
|
|
bool busy_ = true; \
|
2016-10-27 13:48:55 +03:00
|
|
|
BlockDriverState *bs_ = (bs); \
|
2016-10-27 13:49:05 +03:00
|
|
|
AioContext *ctx_ = bdrv_get_aio_context(bs_); \
|
|
|
|
if (aio_context_in_iothread(ctx_)) { \
|
block: Drain BH in bdrv_drained_begin
During block job completion, nothing is preventing
block_job_defer_to_main_loop_bh from being called in a nested
aio_poll(), which is a trouble, such as in this code path:
qmp_block_commit
commit_active_start
bdrv_reopen
bdrv_reopen_multiple
bdrv_reopen_prepare
bdrv_flush
aio_poll
aio_bh_poll
aio_bh_call
block_job_defer_to_main_loop_bh
stream_complete
bdrv_reopen
block_job_defer_to_main_loop_bh is the last step of the stream job,
which should have been "paused" by the bdrv_drained_begin/end in
bdrv_reopen_multiple, but it is not done because it's in the form of a
main loop BH.
Similar to why block jobs should be paused between drained_begin and
drained_end, BHs they schedule must be excluded as well. To achieve
this, this patch forces draining the BH in BDRV_POLL_WHILE.
As a side effect this fixes a hang in block_job_detach_aio_context
during system_reset when a block job is ready:
#0 0x0000555555aa79f3 in bdrv_drain_recurse
#1 0x0000555555aa825d in bdrv_drained_begin
#2 0x0000555555aa8449 in bdrv_drain
#3 0x0000555555a9c356 in blk_drain
#4 0x0000555555aa3cfd in mirror_drain
#5 0x0000555555a66e11 in block_job_detach_aio_context
#6 0x0000555555a62f4d in bdrv_detach_aio_context
#7 0x0000555555a63116 in bdrv_set_aio_context
#8 0x0000555555a9d326 in blk_set_aio_context
#9 0x00005555557e38da in virtio_blk_data_plane_stop
#10 0x00005555559f9d5f in virtio_bus_stop_ioeventfd
#11 0x00005555559fa49b in virtio_bus_stop_ioeventfd
#12 0x00005555559f6a18 in virtio_pci_stop_ioeventfd
#13 0x00005555559f6a18 in virtio_pci_reset
#14 0x00005555559139a9 in qdev_reset_one
#15 0x0000555555916738 in qbus_walk_children
#16 0x0000555555913318 in qdev_walk_children
#17 0x0000555555916738 in qbus_walk_children
#18 0x00005555559168ca in qemu_devices_reset
#19 0x000055555581fcbb in pc_machine_reset
#20 0x00005555558a4d96 in qemu_system_reset
#21 0x000055555577157a in main_loop_should_exit
#22 0x000055555577157a in main_loop
#23 0x000055555577157a in main
The rationale is that the loop in block_job_detach_aio_context cannot
make any progress in pausing/completing the job, because bs->in_flight
is 0, so bdrv_drain doesn't process the block_job_defer_to_main_loop
BH. With this patch, it does.
Reported-by: Jeff Cody <jcody@redhat.com>
Signed-off-by: Fam Zheng <famz@redhat.com>
Message-Id: <20170418143044.12187-3-famz@redhat.com>
Reviewed-by: Jeff Cody <jcody@redhat.com>
Tested-by: Jeff Cody <jcody@redhat.com>
Signed-off-by: Fam Zheng <famz@redhat.com>
2017-04-18 17:30:44 +03:00
|
|
|
while ((cond) || busy_) { \
|
|
|
|
busy_ = aio_poll(ctx_, (cond)); \
|
|
|
|
waited_ |= !!(cond) | busy_; \
|
2016-10-27 13:49:05 +03:00
|
|
|
} \
|
|
|
|
} else { \
|
|
|
|
assert(qemu_get_current_aio_context() == \
|
|
|
|
qemu_get_aio_context()); \
|
|
|
|
/* Ask bdrv_dec_in_flight to wake up the main \
|
|
|
|
* QEMU AioContext. Extra I/O threads never take \
|
|
|
|
* other I/O threads' AioContexts (see for example \
|
|
|
|
* block_job_defer_to_main_loop for how to do it). \
|
|
|
|
*/ \
|
|
|
|
assert(!bs_->wakeup); \
|
2017-06-05 15:38:54 +03:00
|
|
|
/* Set bs->wakeup before evaluating cond. */ \
|
|
|
|
atomic_mb_set(&bs_->wakeup, true); \
|
block: Drain BH in bdrv_drained_begin
During block job completion, nothing is preventing
block_job_defer_to_main_loop_bh from being called in a nested
aio_poll(), which is a trouble, such as in this code path:
qmp_block_commit
commit_active_start
bdrv_reopen
bdrv_reopen_multiple
bdrv_reopen_prepare
bdrv_flush
aio_poll
aio_bh_poll
aio_bh_call
block_job_defer_to_main_loop_bh
stream_complete
bdrv_reopen
block_job_defer_to_main_loop_bh is the last step of the stream job,
which should have been "paused" by the bdrv_drained_begin/end in
bdrv_reopen_multiple, but it is not done because it's in the form of a
main loop BH.
Similar to why block jobs should be paused between drained_begin and
drained_end, BHs they schedule must be excluded as well. To achieve
this, this patch forces draining the BH in BDRV_POLL_WHILE.
As a side effect this fixes a hang in block_job_detach_aio_context
during system_reset when a block job is ready:
#0 0x0000555555aa79f3 in bdrv_drain_recurse
#1 0x0000555555aa825d in bdrv_drained_begin
#2 0x0000555555aa8449 in bdrv_drain
#3 0x0000555555a9c356 in blk_drain
#4 0x0000555555aa3cfd in mirror_drain
#5 0x0000555555a66e11 in block_job_detach_aio_context
#6 0x0000555555a62f4d in bdrv_detach_aio_context
#7 0x0000555555a63116 in bdrv_set_aio_context
#8 0x0000555555a9d326 in blk_set_aio_context
#9 0x00005555557e38da in virtio_blk_data_plane_stop
#10 0x00005555559f9d5f in virtio_bus_stop_ioeventfd
#11 0x00005555559fa49b in virtio_bus_stop_ioeventfd
#12 0x00005555559f6a18 in virtio_pci_stop_ioeventfd
#13 0x00005555559f6a18 in virtio_pci_reset
#14 0x00005555559139a9 in qdev_reset_one
#15 0x0000555555916738 in qbus_walk_children
#16 0x0000555555913318 in qdev_walk_children
#17 0x0000555555916738 in qbus_walk_children
#18 0x00005555559168ca in qemu_devices_reset
#19 0x000055555581fcbb in pc_machine_reset
#20 0x00005555558a4d96 in qemu_system_reset
#21 0x000055555577157a in main_loop_should_exit
#22 0x000055555577157a in main_loop
#23 0x000055555577157a in main
The rationale is that the loop in block_job_detach_aio_context cannot
make any progress in pausing/completing the job, because bs->in_flight
is 0, so bdrv_drain doesn't process the block_job_defer_to_main_loop
BH. With this patch, it does.
Reported-by: Jeff Cody <jcody@redhat.com>
Signed-off-by: Fam Zheng <famz@redhat.com>
Message-Id: <20170418143044.12187-3-famz@redhat.com>
Reviewed-by: Jeff Cody <jcody@redhat.com>
Tested-by: Jeff Cody <jcody@redhat.com>
Signed-off-by: Fam Zheng <famz@redhat.com>
2017-04-18 17:30:44 +03:00
|
|
|
while (busy_) { \
|
|
|
|
if ((cond)) { \
|
|
|
|
waited_ = busy_ = true; \
|
|
|
|
aio_context_release(ctx_); \
|
|
|
|
aio_poll(qemu_get_aio_context(), true); \
|
|
|
|
aio_context_acquire(ctx_); \
|
|
|
|
} else { \
|
|
|
|
busy_ = aio_poll(ctx_, false); \
|
|
|
|
waited_ |= busy_; \
|
|
|
|
} \
|
2016-10-27 13:49:05 +03:00
|
|
|
} \
|
2017-06-05 15:38:54 +03:00
|
|
|
atomic_set(&bs_->wakeup, false); \
|
2016-10-27 13:48:55 +03:00
|
|
|
} \
|
|
|
|
waited_; })
|
|
|
|
|
2017-06-09 13:18:08 +03:00
|
|
|
int bdrv_pdiscard(BlockDriverState *bs, int64_t offset, int bytes);
|
|
|
|
int bdrv_co_pdiscard(BlockDriverState *bs, int64_t offset, int bytes);
|
2013-06-28 14:47:42 +04:00
|
|
|
int bdrv_has_zero_init_1(BlockDriverState *bs);
|
2010-04-14 19:30:35 +04:00
|
|
|
int bdrv_has_zero_init(BlockDriverState *bs);
|
2013-10-24 14:06:54 +04:00
|
|
|
bool bdrv_unallocated_blocks_are_zero(BlockDriverState *bs);
|
|
|
|
bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs);
|
block: Convert bdrv_get_block_status() to bytes
We are gradually moving away from sector-based interfaces, towards
byte-based. In the common case, allocation is unlikely to ever use
values that are not naturally sector-aligned, but it is possible
that byte-based values will let us be more precise about allocation
at the end of an unaligned file that can do byte-based access.
Changing the name of the function from bdrv_get_block_status() to
bdrv_block_status() ensures that the compiler enforces that all
callers are updated. For now, the io.c layer still assert()s that
all callers are sector-aligned, but that can be relaxed when a later
patch implements byte-based block status in the drivers.
There was an inherent limitation in returning the offset via the
return value: we only have room for BDRV_BLOCK_OFFSET_MASK bits, which
means an offset can only be mapped for sector-aligned queries (or,
if we declare that non-aligned input is at the same relative position
modulo 512 of the answer), so the new interface also changes things to
return the offset via output through a parameter by reference rather
than mashed into the return value. We'll have some glue code that
munges between the two styles until we finish converting all uses.
For the most part this patch is just the addition of scaling at the
callers followed by inverse scaling at bdrv_block_status(), coupled
with the tweak in calling convention. But some code, particularly
bdrv_is_allocated(), gets a lot simpler because it no longer has to
mess with sectors.
For ease of review, bdrv_get_block_status_above() will be tackled
separately.
Signed-off-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-10-12 06:47:03 +03:00
|
|
|
int bdrv_block_status(BlockDriverState *bs, int64_t offset,
|
|
|
|
int64_t bytes, int64_t *pnum, int64_t *map,
|
|
|
|
BlockDriverState **file);
|
block: Convert bdrv_get_block_status_above() to bytes
We are gradually moving away from sector-based interfaces, towards
byte-based. In the common case, allocation is unlikely to ever use
values that are not naturally sector-aligned, but it is possible
that byte-based values will let us be more precise about allocation
at the end of an unaligned file that can do byte-based access.
Changing the name of the function from bdrv_get_block_status_above()
to bdrv_block_status_above() ensures that the compiler enforces that
all callers are updated. Likewise, since it a byte interface allows
an offset mapping that might not be sector aligned, split the mapping
out of the return value and into a pass-by-reference parameter. For
now, the io.c layer still assert()s that all uses are sector-aligned,
but that can be relaxed when a later patch implements byte-based
block status in the drivers.
For the most part this patch is just the addition of scaling at the
callers followed by inverse scaling at bdrv_block_status(), plus
updates for the new split return interface. But some code,
particularly bdrv_block_status(), gets a lot simpler because it no
longer has to mess with sectors. Likewise, mirror code no longer
computes s->granularity >> BDRV_SECTOR_BITS, and can therefore drop
an assertion about alignment because the loop no longer depends on
alignment (never mind that we don't really have a driver that
reports sub-sector alignments, so it's not really possible to test
the effect of sub-sector mirroring). Fix a neighboring assertion to
use is_power_of_2 while there.
For ease of review, bdrv_get_block_status() was tackled separately.
Signed-off-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-10-12 06:47:08 +03:00
|
|
|
int bdrv_block_status_above(BlockDriverState *bs, BlockDriverState *base,
|
|
|
|
int64_t offset, int64_t bytes, int64_t *pnum,
|
|
|
|
int64_t *map, BlockDriverState **file);
|
block: Make bdrv_is_allocated() byte-based
We are gradually moving away from sector-based interfaces, towards
byte-based. In the common case, allocation is unlikely to ever use
values that are not naturally sector-aligned, but it is possible
that byte-based values will let us be more precise about allocation
at the end of an unaligned file that can do byte-based access.
Changing the signature of the function to use int64_t *pnum ensures
that the compiler enforces that all callers are updated. For now,
the io.c layer still assert()s that all callers are sector-aligned
on input and that *pnum is sector-aligned on return to the caller,
but that can be relaxed when a later patch implements byte-based
block status. Therefore, this code adds usages like
DIV_ROUND_UP(,BDRV_SECTOR_SIZE) to callers that still want aligned
values, where the call might reasonbly give non-aligned results
in the future; on the other hand, no rounding is needed for callers
that should just continue to work with byte alignment.
For the most part this patch is just the addition of scaling at the
callers followed by inverse scaling at bdrv_is_allocated(). But
some code, particularly bdrv_commit(), gets a lot simpler because it
no longer has to mess with sectors; also, it is now possible to pass
NULL if the caller does not care how much of the image is allocated
beyond the initial offset. Leave comments where we can further
simplify once a later patch eliminates the need for sector-aligned
requests through bdrv_is_allocated().
For ease of review, bdrv_is_allocated_above() will be tackled
separately.
Signed-off-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-07-07 15:44:57 +03:00
|
|
|
int bdrv_is_allocated(BlockDriverState *bs, int64_t offset, int64_t bytes,
|
|
|
|
int64_t *pnum);
|
2013-02-13 12:09:39 +04:00
|
|
|
int bdrv_is_allocated_above(BlockDriverState *top, BlockDriverState *base,
|
block: Make bdrv_is_allocated_above() byte-based
We are gradually moving away from sector-based interfaces, towards
byte-based. In the common case, allocation is unlikely to ever use
values that are not naturally sector-aligned, but it is possible
that byte-based values will let us be more precise about allocation
at the end of an unaligned file that can do byte-based access.
Changing the signature of the function to use int64_t *pnum ensures
that the compiler enforces that all callers are updated. For now,
the io.c layer still assert()s that all callers are sector-aligned,
but that can be relaxed when a later patch implements byte-based
block status. Therefore, for the most part this patch is just the
addition of scaling at the callers followed by inverse scaling at
bdrv_is_allocated(). But some code, particularly stream_run(),
gets a lot simpler because it no longer has to mess with sectors.
Leave comments where we can further simplify by switching to
byte-based iterations, once later patches eliminate the need for
sector-aligned operations.
For ease of review, bdrv_is_allocated() was tackled separately.
Signed-off-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-07-07 15:44:59 +03:00
|
|
|
int64_t offset, int64_t bytes, int64_t *pnum);
|
2007-11-11 05:51:17 +03:00
|
|
|
|
2016-06-24 01:37:26 +03:00
|
|
|
bool bdrv_is_read_only(BlockDriverState *bs);
|
2017-08-03 18:02:58 +03:00
|
|
|
int bdrv_can_set_read_only(BlockDriverState *bs, bool read_only,
|
|
|
|
bool ignore_allow_rdw, Error **errp);
|
block: do not set BDS read_only if copy_on_read enabled
A few block drivers will set the BDS read_only flag from their
.bdrv_open() function. This means the bs->read_only flag could
be set after we enable copy_on_read, as the BDRV_O_COPY_ON_READ
flag check occurs prior to the call to bdrv->bdrv_open().
This adds an error return to bdrv_set_read_only(), and an error will be
return if we try to set the BDS to read_only while copy_on_read is
enabled.
This patch also changes the behavior of vvfat. Before, vvfat could
override the drive 'readonly' flag with its own, internal 'rw' flag.
For instance, this -drive parameter would result in a writable image:
"-drive format=vvfat,dir=/tmp/vvfat,rw,if=virtio,readonly=on"
This is not correct. Now, attempting to use the above -drive parameter
will result in an error (i.e., 'rw' is incompatible with 'readonly=on').
Signed-off-by: Jeff Cody <jcody@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
Message-id: 0c5b4c1cc2c651471b131f21376dfd5ea24d2196.1491597120.git.jcody@redhat.com
2017-04-07 23:55:26 +03:00
|
|
|
int bdrv_set_read_only(BlockDriverState *bs, bool read_only, Error **errp);
|
2016-06-24 01:37:26 +03:00
|
|
|
bool bdrv_is_sg(BlockDriverState *bs);
|
2015-10-19 18:53:11 +03:00
|
|
|
bool bdrv_is_inserted(BlockDriverState *bs);
|
2011-09-06 20:58:47 +04:00
|
|
|
void bdrv_lock_medium(BlockDriverState *bs, bool locked);
|
2012-02-03 22:24:53 +04:00
|
|
|
void bdrv_eject(BlockDriverState *bs, bool eject_flag);
|
2012-06-13 12:11:48 +04:00
|
|
|
const char *bdrv_get_format_name(BlockDriverState *bs);
|
2014-01-24 00:31:32 +04:00
|
|
|
BlockDriverState *bdrv_find_node(const char *node_name);
|
2015-04-17 14:52:43 +03:00
|
|
|
BlockDeviceInfoList *bdrv_named_nodes_list(Error **errp);
|
2014-01-24 00:31:35 +04:00
|
|
|
BlockDriverState *bdrv_lookup_bs(const char *device,
|
|
|
|
const char *node_name,
|
|
|
|
Error **errp);
|
2014-06-25 23:40:09 +04:00
|
|
|
bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base);
|
2014-10-31 06:32:54 +03:00
|
|
|
BlockDriverState *bdrv_next_node(BlockDriverState *bs);
|
2016-05-20 19:49:07 +03:00
|
|
|
|
|
|
|
typedef struct BdrvNextIterator {
|
|
|
|
enum {
|
|
|
|
BDRV_NEXT_BACKEND_ROOTS,
|
|
|
|
BDRV_NEXT_MONITOR_OWNED,
|
|
|
|
} phase;
|
|
|
|
BlockBackend *blk;
|
|
|
|
BlockDriverState *bs;
|
|
|
|
} BdrvNextIterator;
|
|
|
|
|
|
|
|
BlockDriverState *bdrv_first(BdrvNextIterator *it);
|
|
|
|
BlockDriverState *bdrv_next(BdrvNextIterator *it);
|
2017-11-10 20:25:45 +03:00
|
|
|
void bdrv_next_cleanup(BdrvNextIterator *it);
|
2016-05-20 19:49:07 +03:00
|
|
|
|
2016-03-16 21:54:41 +03:00
|
|
|
BlockDriverState *bdrv_next_monitor_owned(BlockDriverState *bs);
|
2016-06-24 01:37:26 +03:00
|
|
|
bool bdrv_is_encrypted(BlockDriverState *bs);
|
2007-11-11 05:51:17 +03:00
|
|
|
void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
|
|
|
|
void *opaque);
|
2014-10-31 06:32:55 +03:00
|
|
|
const char *bdrv_get_node_name(const BlockDriverState *bs);
|
2014-10-07 15:59:11 +04:00
|
|
|
const char *bdrv_get_device_name(const BlockDriverState *bs);
|
2015-04-08 12:29:18 +03:00
|
|
|
const char *bdrv_get_device_or_node_name(const BlockDriverState *bs);
|
2012-06-05 18:49:24 +04:00
|
|
|
int bdrv_get_flags(BlockDriverState *bs);
|
2007-11-11 05:51:17 +03:00
|
|
|
int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi);
|
2013-10-09 12:46:16 +04:00
|
|
|
ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs);
|
2013-01-21 20:09:42 +04:00
|
|
|
void bdrv_round_to_clusters(BlockDriverState *bs,
|
2017-10-12 06:46:59 +03:00
|
|
|
int64_t offset, int64_t bytes,
|
2016-06-02 12:41:52 +03:00
|
|
|
int64_t *cluster_offset,
|
2017-10-12 06:46:59 +03:00
|
|
|
int64_t *cluster_bytes);
|
2007-11-11 05:51:17 +03:00
|
|
|
|
2009-03-06 02:00:48 +03:00
|
|
|
const char *bdrv_get_encrypted_filename(BlockDriverState *bs);
|
2007-11-11 05:51:17 +03:00
|
|
|
void bdrv_get_backing_filename(BlockDriverState *bs,
|
|
|
|
char *filename, int filename_size);
|
2012-05-08 18:51:50 +04:00
|
|
|
void bdrv_get_full_backing_filename(BlockDriverState *bs,
|
2014-11-26 19:20:26 +03:00
|
|
|
char *dest, size_t sz, Error **errp);
|
2014-11-26 19:20:25 +03:00
|
|
|
void bdrv_get_full_backing_filename_from_filename(const char *backed,
|
|
|
|
const char *backing,
|
2014-11-26 19:20:26 +03:00
|
|
|
char *dest, size_t sz,
|
|
|
|
Error **errp);
|
2007-11-11 05:51:17 +03:00
|
|
|
|
2014-12-03 16:57:22 +03:00
|
|
|
int path_has_protocol(const char *path);
|
2007-11-11 05:51:17 +03:00
|
|
|
int path_is_absolute(const char *path);
|
|
|
|
void path_combine(char *dest, int dest_size,
|
|
|
|
const char *base_path,
|
|
|
|
const char *filename);
|
|
|
|
|
2016-06-09 17:50:16 +03:00
|
|
|
int bdrv_readv_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos);
|
2013-04-05 23:27:53 +04:00
|
|
|
int bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos);
|
2009-07-11 01:11:57 +04:00
|
|
|
int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
|
|
|
|
int64_t pos, int size);
|
2009-04-05 23:10:55 +04:00
|
|
|
|
2009-07-11 01:11:57 +04:00
|
|
|
int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
|
|
|
|
int64_t pos, int size);
|
2009-04-05 23:10:55 +04:00
|
|
|
|
2012-11-30 16:52:09 +04:00
|
|
|
void bdrv_img_create(const char *filename, const char *fmt,
|
|
|
|
const char *base_filename, const char *base_fmt,
|
2013-02-13 12:09:40 +04:00
|
|
|
char *options, uint64_t img_size, int flags,
|
2017-04-21 15:27:01 +03:00
|
|
|
bool quiet, Error **errp);
|
2010-12-16 15:52:15 +03:00
|
|
|
|
2013-11-28 13:23:32 +04:00
|
|
|
/* Returns the alignment in bytes that is required so that no bounce buffer
|
|
|
|
* is required throughout the stack */
|
2015-05-12 17:30:55 +03:00
|
|
|
size_t bdrv_min_mem_align(BlockDriverState *bs);
|
|
|
|
/* Returns optimal alignment in bytes for bounce buffer */
|
2013-11-28 13:23:32 +04:00
|
|
|
size_t bdrv_opt_mem_align(BlockDriverState *bs);
|
2011-08-03 17:08:19 +04:00
|
|
|
void *qemu_blockalign(BlockDriverState *bs, size_t size);
|
2014-10-22 16:09:27 +04:00
|
|
|
void *qemu_blockalign0(BlockDriverState *bs, size_t size);
|
2014-05-20 14:24:05 +04:00
|
|
|
void *qemu_try_blockalign(BlockDriverState *bs, size_t size);
|
2014-10-22 16:09:27 +04:00
|
|
|
void *qemu_try_blockalign0(BlockDriverState *bs, size_t size);
|
2013-01-11 19:41:27 +04:00
|
|
|
bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov);
|
2011-08-03 17:08:19 +04:00
|
|
|
|
2011-11-28 20:08:47 +04:00
|
|
|
void bdrv_enable_copy_on_read(BlockDriverState *bs);
|
|
|
|
void bdrv_disable_copy_on_read(BlockDriverState *bs);
|
|
|
|
|
2013-08-23 05:14:46 +04:00
|
|
|
void bdrv_ref(BlockDriverState *bs);
|
|
|
|
void bdrv_unref(BlockDriverState *bs);
|
2015-06-15 14:51:04 +03:00
|
|
|
void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child);
|
2016-05-10 10:36:38 +03:00
|
|
|
BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
|
|
|
|
BlockDriverState *child_bs,
|
|
|
|
const char *child_name,
|
2016-12-21 00:21:17 +03:00
|
|
|
const BdrvChildRole *child_role,
|
|
|
|
Error **errp);
|
2010-03-15 19:27:00 +03:00
|
|
|
|
2014-05-23 17:29:42 +04:00
|
|
|
bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp);
|
|
|
|
void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason);
|
|
|
|
void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason);
|
|
|
|
void bdrv_op_block_all(BlockDriverState *bs, Error *reason);
|
|
|
|
void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason);
|
|
|
|
bool bdrv_op_blocker_is_empty(BlockDriverState *bs);
|
|
|
|
|
2015-06-16 15:19:22 +03:00
|
|
|
#define BLKDBG_EVENT(child, evt) \
|
|
|
|
do { \
|
|
|
|
if (child) { \
|
|
|
|
bdrv_debug_event(child->bs, evt); \
|
|
|
|
} \
|
|
|
|
} while (0)
|
|
|
|
|
2015-11-18 11:52:54 +03:00
|
|
|
void bdrv_debug_event(BlockDriverState *bs, BlkdebugEvent event);
|
2010-03-15 19:27:00 +03:00
|
|
|
|
2012-12-06 17:32:58 +04:00
|
|
|
int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event,
|
|
|
|
const char *tag);
|
2013-11-20 06:01:54 +04:00
|
|
|
int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag);
|
2012-12-06 17:32:58 +04:00
|
|
|
int bdrv_debug_resume(BlockDriverState *bs, const char *tag);
|
|
|
|
bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag);
|
|
|
|
|
2014-05-15 15:22:05 +04:00
|
|
|
/**
|
|
|
|
* bdrv_get_aio_context:
|
|
|
|
*
|
|
|
|
* Returns: the currently bound #AioContext
|
|
|
|
*/
|
|
|
|
AioContext *bdrv_get_aio_context(BlockDriverState *bs);
|
|
|
|
|
2017-04-10 15:09:25 +03:00
|
|
|
/**
|
|
|
|
* Transfer control to @co in the aio context of @bs
|
|
|
|
*/
|
|
|
|
void bdrv_coroutine_enter(BlockDriverState *bs, Coroutine *co);
|
|
|
|
|
2014-05-08 18:34:37 +04:00
|
|
|
/**
|
|
|
|
* bdrv_set_aio_context:
|
|
|
|
*
|
|
|
|
* Changes the #AioContext used for fd handlers, timers, and BHs by this
|
|
|
|
* BlockDriverState and all its children.
|
|
|
|
*
|
block: Forbid bdrv_set_aio_context outside BQL
Even if the caller has both the old and the new AioContext's, there can
be a deadlock, due to the leading bdrv_drain_all.
Suppose there are four io threads (A, B, A0, B0) with A and B owning a
BDS for each (bs_a, bs_b); Now A wants to move bs_a to iothread A0, and
B wants to move bs_b to B0, at the same time:
iothread A iothread B
--------------------------------------------------------------------------
aio_context_acquire(A0) /* OK */ aio_context_acquire(B0) /* OK */
bdrv_set_aio_context(bs_a, A0) bdrv_set_aio_context(bs_b, B0)
-> bdrv_drain_all() -> bdrv_drain_all()
-> acquire A /* OK */ -> acquire A /* blocked */
-> acquire B /* blocked */ -> acquire B
... ...
Deadlock happens because A is waiting for B, and B is waiting for A.
Signed-off-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <1423969591-23646-2-git-send-email-famz@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2015-02-15 06:06:30 +03:00
|
|
|
* This function must be called with iothread lock held.
|
2014-05-08 18:34:37 +04:00
|
|
|
*/
|
|
|
|
void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context);
|
2015-02-16 14:47:54 +03:00
|
|
|
int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz);
|
|
|
|
int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo);
|
2014-05-08 18:34:37 +04:00
|
|
|
|
2014-07-04 14:04:33 +04:00
|
|
|
void bdrv_io_plug(BlockDriverState *bs);
|
|
|
|
void bdrv_io_unplug(BlockDriverState *bs);
|
|
|
|
|
2017-04-08 06:34:45 +03:00
|
|
|
/**
|
|
|
|
* bdrv_parent_drained_begin:
|
|
|
|
*
|
|
|
|
* Begin a quiesced section of all users of @bs. This is part of
|
|
|
|
* bdrv_drained_begin.
|
|
|
|
*/
|
2017-12-07 15:03:13 +03:00
|
|
|
void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore);
|
2017-04-08 06:34:45 +03:00
|
|
|
|
|
|
|
/**
|
|
|
|
* bdrv_parent_drained_end:
|
|
|
|
*
|
|
|
|
* End a quiesced section of all users of @bs. This is part of
|
|
|
|
* bdrv_drained_end.
|
|
|
|
*/
|
2017-12-07 15:03:13 +03:00
|
|
|
void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore);
|
2017-04-08 06:34:45 +03:00
|
|
|
|
2015-10-23 06:08:09 +03:00
|
|
|
/**
|
|
|
|
* bdrv_drained_begin:
|
|
|
|
*
|
|
|
|
* Begin a quiesced section for exclusive access to the BDS, by disabling
|
|
|
|
* external request sources including NBD server and device model. Note that
|
|
|
|
* this doesn't block timers or coroutines from submitting more requests, which
|
|
|
|
* means block_job_pause is still necessary.
|
|
|
|
*
|
|
|
|
* This function can be recursive.
|
|
|
|
*/
|
|
|
|
void bdrv_drained_begin(BlockDriverState *bs);
|
|
|
|
|
2017-12-06 19:05:44 +03:00
|
|
|
/**
|
|
|
|
* Like bdrv_drained_begin, but recursively begins a quiesced section for
|
|
|
|
* exclusive access to all child nodes as well.
|
|
|
|
*/
|
|
|
|
void bdrv_subtree_drained_begin(BlockDriverState *bs);
|
|
|
|
|
2015-10-23 06:08:09 +03:00
|
|
|
/**
|
|
|
|
* bdrv_drained_end:
|
|
|
|
*
|
|
|
|
* End a quiescent section started by bdrv_drained_begin().
|
|
|
|
*/
|
|
|
|
void bdrv_drained_end(BlockDriverState *bs);
|
|
|
|
|
2017-12-06 19:05:44 +03:00
|
|
|
/**
|
|
|
|
* End a quiescent section started by bdrv_subtree_drained_begin().
|
|
|
|
*/
|
|
|
|
void bdrv_subtree_drained_end(BlockDriverState *bs);
|
|
|
|
|
2016-05-10 10:36:37 +03:00
|
|
|
void bdrv_add_child(BlockDriverState *parent, BlockDriverState *child,
|
|
|
|
Error **errp);
|
|
|
|
void bdrv_del_child(BlockDriverState *parent, BdrvChild *child, Error **errp);
|
|
|
|
|
2017-06-28 15:05:21 +03:00
|
|
|
bool bdrv_can_store_new_dirty_bitmap(BlockDriverState *bs, const char *name,
|
|
|
|
uint32_t granularity, Error **errp);
|
2018-01-16 09:08:56 +03:00
|
|
|
/**
|
|
|
|
*
|
|
|
|
* bdrv_register_buf/bdrv_unregister_buf:
|
|
|
|
*
|
|
|
|
* Register/unregister a buffer for I/O. For example, VFIO drivers are
|
|
|
|
* interested to know the memory areas that would later be used for I/O, so
|
|
|
|
* that they can prepare IOMMU mapping etc., to get better performance.
|
|
|
|
*/
|
|
|
|
void bdrv_register_buf(BlockDriverState *bs, void *host, size_t size);
|
|
|
|
void bdrv_unregister_buf(BlockDriverState *bs, void *host);
|
2012-07-10 13:12:40 +04:00
|
|
|
#endif
|