Merge remote-tracking branch 'kwolf/for-anthony' into staging
# By Paolo Bonzini (14) and others # Via Kevin Wolf * kwolf/for-anthony: (24 commits) ide: Add fall through annotations block: Create proper size file for disk mirror ahci: Add migration support ahci: Change data types in preparation for migration ahci: Remove unused AHCIDevice fields hbitmap: add assertion on hbitmap_iter_init mirror: do nothing on zero-sized disk block/vdi: Check for bad signature block/vdi: Improved return values from vdi_open block/vdi: Improve debug output for signature block: Use error code EMEDIUMTYPE for wrong format in some block drivers block: Add special error code for wrong format mirror: support arbitrarily-sized iterations mirror: support more than one in-flight AIO operation mirror: add buf-size argument to drive-mirror mirror: switch mirror_iteration to AIO mirror: allow customizing the granularity block: allow customizing the granularity of the dirty bitmap block: return count of dirty sectors, not chunks mirror: perform COW if the cluster size is bigger than the granularity ...
This commit is contained in:
commit
503cb22e05
@ -23,7 +23,8 @@
|
||||
#include "sysemu/blockdev.h"
|
||||
#include <assert.h>
|
||||
|
||||
#define BLOCK_SIZE (BDRV_SECTORS_PER_DIRTY_CHUNK << BDRV_SECTOR_BITS)
|
||||
#define BLOCK_SIZE (1 << 20)
|
||||
#define BDRV_SECTORS_PER_DIRTY_CHUNK (BLOCK_SIZE >> BDRV_SECTOR_BITS)
|
||||
|
||||
#define BLK_MIG_FLAG_DEVICE_BLOCK 0x01
|
||||
#define BLK_MIG_FLAG_EOS 0x02
|
||||
@ -254,7 +255,7 @@ static void set_dirty_tracking(int enable)
|
||||
BlkMigDevState *bmds;
|
||||
|
||||
QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
|
||||
bdrv_set_dirty_tracking(bmds->bs, enable);
|
||||
bdrv_set_dirty_tracking(bmds->bs, enable ? BLOCK_SIZE : 0);
|
||||
}
|
||||
}
|
||||
|
||||
@ -478,7 +479,7 @@ static int64_t get_remaining_dirty(void)
|
||||
dirty += bdrv_get_dirty_count(bmds->bs);
|
||||
}
|
||||
|
||||
return dirty * BLOCK_SIZE;
|
||||
return dirty << BDRV_SECTOR_BITS;
|
||||
}
|
||||
|
||||
static void blk_mig_cleanup(void)
|
||||
|
124
block.c
124
block.c
@ -1286,7 +1286,6 @@ static void bdrv_move_feature_fields(BlockDriverState *bs_dest,
|
||||
bs_dest->iostatus = bs_src->iostatus;
|
||||
|
||||
/* dirty bitmap */
|
||||
bs_dest->dirty_count = bs_src->dirty_count;
|
||||
bs_dest->dirty_bitmap = bs_src->dirty_bitmap;
|
||||
|
||||
/* job */
|
||||
@ -1674,10 +1673,10 @@ static void tracked_request_begin(BdrvTrackedRequest *req,
|
||||
/**
|
||||
* Round a region to cluster boundaries
|
||||
*/
|
||||
static void round_to_clusters(BlockDriverState *bs,
|
||||
int64_t sector_num, int nb_sectors,
|
||||
int64_t *cluster_sector_num,
|
||||
int *cluster_nb_sectors)
|
||||
void bdrv_round_to_clusters(BlockDriverState *bs,
|
||||
int64_t sector_num, int nb_sectors,
|
||||
int64_t *cluster_sector_num,
|
||||
int *cluster_nb_sectors)
|
||||
{
|
||||
BlockDriverInfo bdi;
|
||||
|
||||
@ -1719,8 +1718,8 @@ static void coroutine_fn wait_for_overlapping_requests(BlockDriverState *bs,
|
||||
* CoR read and write operations are atomic and guest writes cannot
|
||||
* interleave between them.
|
||||
*/
|
||||
round_to_clusters(bs, sector_num, nb_sectors,
|
||||
&cluster_sector_num, &cluster_nb_sectors);
|
||||
bdrv_round_to_clusters(bs, sector_num, nb_sectors,
|
||||
&cluster_sector_num, &cluster_nb_sectors);
|
||||
|
||||
do {
|
||||
retry = false;
|
||||
@ -2035,36 +2034,6 @@ int bdrv_read_unthrottled(BlockDriverState *bs, int64_t sector_num,
|
||||
return ret;
|
||||
}
|
||||
|
||||
#define BITS_PER_LONG (sizeof(unsigned long) * 8)
|
||||
|
||||
static void set_dirty_bitmap(BlockDriverState *bs, int64_t sector_num,
|
||||
int nb_sectors, int dirty)
|
||||
{
|
||||
int64_t start, end;
|
||||
unsigned long val, idx, bit;
|
||||
|
||||
start = sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK;
|
||||
end = (sector_num + nb_sectors - 1) / BDRV_SECTORS_PER_DIRTY_CHUNK;
|
||||
|
||||
for (; start <= end; start++) {
|
||||
idx = start / BITS_PER_LONG;
|
||||
bit = start % BITS_PER_LONG;
|
||||
val = bs->dirty_bitmap[idx];
|
||||
if (dirty) {
|
||||
if (!(val & (1UL << bit))) {
|
||||
bs->dirty_count++;
|
||||
val |= 1UL << bit;
|
||||
}
|
||||
} else {
|
||||
if (val & (1UL << bit)) {
|
||||
bs->dirty_count--;
|
||||
val &= ~(1UL << bit);
|
||||
}
|
||||
}
|
||||
bs->dirty_bitmap[idx] = val;
|
||||
}
|
||||
}
|
||||
|
||||
/* Return < 0 if error. Important errors are:
|
||||
-EIO generic I/O error (may happen for all errors)
|
||||
-ENOMEDIUM No media inserted.
|
||||
@ -2216,8 +2185,8 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
|
||||
/* Cover entire cluster so no additional backing file I/O is required when
|
||||
* allocating cluster in the image file.
|
||||
*/
|
||||
round_to_clusters(bs, sector_num, nb_sectors,
|
||||
&cluster_sector_num, &cluster_nb_sectors);
|
||||
bdrv_round_to_clusters(bs, sector_num, nb_sectors,
|
||||
&cluster_sector_num, &cluster_nb_sectors);
|
||||
|
||||
trace_bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors,
|
||||
cluster_sector_num, cluster_nb_sectors);
|
||||
@ -2863,8 +2832,9 @@ BlockInfo *bdrv_query_info(BlockDriverState *bs)
|
||||
if (bs->dirty_bitmap) {
|
||||
info->has_dirty = true;
|
||||
info->dirty = g_malloc0(sizeof(*info->dirty));
|
||||
info->dirty->count = bdrv_get_dirty_count(bs) *
|
||||
BDRV_SECTORS_PER_DIRTY_CHUNK * BDRV_SECTOR_SIZE;
|
||||
info->dirty->count = bdrv_get_dirty_count(bs) * BDRV_SECTOR_SIZE;
|
||||
info->dirty->granularity =
|
||||
((int64_t) BDRV_SECTOR_SIZE << hbitmap_granularity(bs->dirty_bitmap));
|
||||
}
|
||||
|
||||
if (bs->drv) {
|
||||
@ -4173,7 +4143,7 @@ int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
|
||||
}
|
||||
|
||||
if (bs->dirty_bitmap) {
|
||||
set_dirty_bitmap(bs, sector_num, nb_sectors, 0);
|
||||
bdrv_reset_dirty(bs, sector_num, nb_sectors);
|
||||
}
|
||||
|
||||
if (bs->drv->bdrv_co_discard) {
|
||||
@ -4331,22 +4301,20 @@ bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov)
|
||||
return true;
|
||||
}
|
||||
|
||||
void bdrv_set_dirty_tracking(BlockDriverState *bs, int enable)
|
||||
void bdrv_set_dirty_tracking(BlockDriverState *bs, int granularity)
|
||||
{
|
||||
int64_t bitmap_size;
|
||||
|
||||
bs->dirty_count = 0;
|
||||
if (enable) {
|
||||
if (!bs->dirty_bitmap) {
|
||||
bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS) +
|
||||
BDRV_SECTORS_PER_DIRTY_CHUNK * BITS_PER_LONG - 1;
|
||||
bitmap_size /= BDRV_SECTORS_PER_DIRTY_CHUNK * BITS_PER_LONG;
|
||||
assert((granularity & (granularity - 1)) == 0);
|
||||
|
||||
bs->dirty_bitmap = g_new0(unsigned long, bitmap_size);
|
||||
}
|
||||
if (granularity) {
|
||||
granularity >>= BDRV_SECTOR_BITS;
|
||||
assert(!bs->dirty_bitmap);
|
||||
bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS);
|
||||
bs->dirty_bitmap = hbitmap_alloc(bitmap_size, ffs(granularity) - 1);
|
||||
} else {
|
||||
if (bs->dirty_bitmap) {
|
||||
g_free(bs->dirty_bitmap);
|
||||
hbitmap_free(bs->dirty_bitmap);
|
||||
bs->dirty_bitmap = NULL;
|
||||
}
|
||||
}
|
||||
@ -4354,67 +4322,37 @@ void bdrv_set_dirty_tracking(BlockDriverState *bs, int enable)
|
||||
|
||||
int bdrv_get_dirty(BlockDriverState *bs, int64_t sector)
|
||||
{
|
||||
int64_t chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK;
|
||||
|
||||
if (bs->dirty_bitmap &&
|
||||
(sector << BDRV_SECTOR_BITS) < bdrv_getlength(bs)) {
|
||||
return !!(bs->dirty_bitmap[chunk / BITS_PER_LONG] &
|
||||
(1UL << (chunk % BITS_PER_LONG)));
|
||||
if (bs->dirty_bitmap) {
|
||||
return hbitmap_get(bs->dirty_bitmap, sector);
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
int64_t bdrv_get_next_dirty(BlockDriverState *bs, int64_t sector)
|
||||
void bdrv_dirty_iter_init(BlockDriverState *bs, HBitmapIter *hbi)
|
||||
{
|
||||
int64_t chunk;
|
||||
int bit, elem;
|
||||
|
||||
/* Avoid an infinite loop. */
|
||||
assert(bs->dirty_count > 0);
|
||||
|
||||
sector = (sector | (BDRV_SECTORS_PER_DIRTY_CHUNK - 1)) + 1;
|
||||
chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK;
|
||||
|
||||
QEMU_BUILD_BUG_ON(sizeof(bs->dirty_bitmap[0]) * 8 != BITS_PER_LONG);
|
||||
elem = chunk / BITS_PER_LONG;
|
||||
bit = chunk % BITS_PER_LONG;
|
||||
for (;;) {
|
||||
if (sector >= bs->total_sectors) {
|
||||
sector = 0;
|
||||
bit = elem = 0;
|
||||
}
|
||||
if (bit == 0 && bs->dirty_bitmap[elem] == 0) {
|
||||
sector += BDRV_SECTORS_PER_DIRTY_CHUNK * BITS_PER_LONG;
|
||||
elem++;
|
||||
} else {
|
||||
if (bs->dirty_bitmap[elem] & (1UL << bit)) {
|
||||
return sector;
|
||||
}
|
||||
sector += BDRV_SECTORS_PER_DIRTY_CHUNK;
|
||||
if (++bit == BITS_PER_LONG) {
|
||||
bit = 0;
|
||||
elem++;
|
||||
}
|
||||
}
|
||||
}
|
||||
hbitmap_iter_init(hbi, bs->dirty_bitmap, 0);
|
||||
}
|
||||
|
||||
void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
|
||||
int nr_sectors)
|
||||
{
|
||||
set_dirty_bitmap(bs, cur_sector, nr_sectors, 1);
|
||||
hbitmap_set(bs->dirty_bitmap, cur_sector, nr_sectors);
|
||||
}
|
||||
|
||||
void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
|
||||
int nr_sectors)
|
||||
{
|
||||
set_dirty_bitmap(bs, cur_sector, nr_sectors, 0);
|
||||
hbitmap_reset(bs->dirty_bitmap, cur_sector, nr_sectors);
|
||||
}
|
||||
|
||||
int64_t bdrv_get_dirty_count(BlockDriverState *bs)
|
||||
{
|
||||
return bs->dirty_count;
|
||||
if (bs->dirty_bitmap) {
|
||||
return hbitmap_count(bs->dirty_bitmap);
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
void bdrv_set_in_use(BlockDriverState *bs, int in_use)
|
||||
|
@ -126,7 +126,7 @@ static int bochs_open(BlockDriverState *bs, int flags)
|
||||
strcmp(bochs.subtype, GROWING_TYPE) ||
|
||||
((le32_to_cpu(bochs.version) != HEADER_VERSION) &&
|
||||
(le32_to_cpu(bochs.version) != HEADER_V1))) {
|
||||
goto fail;
|
||||
return -EMEDIUMTYPE;
|
||||
}
|
||||
|
||||
if (le32_to_cpu(bochs.version) == HEADER_V1) {
|
||||
|
@ -73,7 +73,7 @@ static int cow_open(BlockDriverState *bs, int flags)
|
||||
}
|
||||
|
||||
if (be32_to_cpu(cow_header.magic) != COW_MAGIC) {
|
||||
ret = -EINVAL;
|
||||
ret = -EMEDIUMTYPE;
|
||||
goto fail;
|
||||
}
|
||||
|
||||
|
384
block/mirror.c
384
block/mirror.c
@ -15,17 +15,17 @@
|
||||
#include "block/blockjob.h"
|
||||
#include "block/block_int.h"
|
||||
#include "qemu/ratelimit.h"
|
||||
#include "qemu/bitmap.h"
|
||||
|
||||
enum {
|
||||
/*
|
||||
* Size of data buffer for populating the image file. This should be large
|
||||
* enough to process multiple clusters in a single call, so that populating
|
||||
* contiguous regions of the image is efficient.
|
||||
*/
|
||||
BLOCK_SIZE = 512 * BDRV_SECTORS_PER_DIRTY_CHUNK, /* in bytes */
|
||||
};
|
||||
#define SLICE_TIME 100000000ULL /* ns */
|
||||
#define MAX_IN_FLIGHT 16
|
||||
|
||||
#define SLICE_TIME 100000000ULL /* ns */
|
||||
/* The mirroring buffer is a list of granularity-sized chunks.
|
||||
* Free chunks are organized in a list.
|
||||
*/
|
||||
typedef struct MirrorBuffer {
|
||||
QSIMPLEQ_ENTRY(MirrorBuffer) next;
|
||||
} MirrorBuffer;
|
||||
|
||||
typedef struct MirrorBlockJob {
|
||||
BlockJob common;
|
||||
@ -36,9 +36,26 @@ typedef struct MirrorBlockJob {
|
||||
bool synced;
|
||||
bool should_complete;
|
||||
int64_t sector_num;
|
||||
int64_t granularity;
|
||||
size_t buf_size;
|
||||
unsigned long *cow_bitmap;
|
||||
HBitmapIter hbi;
|
||||
uint8_t *buf;
|
||||
QSIMPLEQ_HEAD(, MirrorBuffer) buf_free;
|
||||
int buf_free_count;
|
||||
|
||||
unsigned long *in_flight_bitmap;
|
||||
int in_flight;
|
||||
int ret;
|
||||
} MirrorBlockJob;
|
||||
|
||||
typedef struct MirrorOp {
|
||||
MirrorBlockJob *s;
|
||||
QEMUIOVector qiov;
|
||||
int64_t sector_num;
|
||||
int nb_sectors;
|
||||
} MirrorOp;
|
||||
|
||||
static BlockErrorAction mirror_error_action(MirrorBlockJob *s, bool read,
|
||||
int error)
|
||||
{
|
||||
@ -52,51 +69,234 @@ static BlockErrorAction mirror_error_action(MirrorBlockJob *s, bool read,
|
||||
}
|
||||
}
|
||||
|
||||
static int coroutine_fn mirror_iteration(MirrorBlockJob *s,
|
||||
BlockErrorAction *p_action)
|
||||
static void mirror_iteration_done(MirrorOp *op, int ret)
|
||||
{
|
||||
MirrorBlockJob *s = op->s;
|
||||
struct iovec *iov;
|
||||
int64_t chunk_num;
|
||||
int i, nb_chunks, sectors_per_chunk;
|
||||
|
||||
trace_mirror_iteration_done(s, op->sector_num, op->nb_sectors, ret);
|
||||
|
||||
s->in_flight--;
|
||||
iov = op->qiov.iov;
|
||||
for (i = 0; i < op->qiov.niov; i++) {
|
||||
MirrorBuffer *buf = (MirrorBuffer *) iov[i].iov_base;
|
||||
QSIMPLEQ_INSERT_TAIL(&s->buf_free, buf, next);
|
||||
s->buf_free_count++;
|
||||
}
|
||||
|
||||
sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS;
|
||||
chunk_num = op->sector_num / sectors_per_chunk;
|
||||
nb_chunks = op->nb_sectors / sectors_per_chunk;
|
||||
bitmap_clear(s->in_flight_bitmap, chunk_num, nb_chunks);
|
||||
if (s->cow_bitmap && ret >= 0) {
|
||||
bitmap_set(s->cow_bitmap, chunk_num, nb_chunks);
|
||||
}
|
||||
|
||||
g_slice_free(MirrorOp, op);
|
||||
qemu_coroutine_enter(s->common.co, NULL);
|
||||
}
|
||||
|
||||
static void mirror_write_complete(void *opaque, int ret)
|
||||
{
|
||||
MirrorOp *op = opaque;
|
||||
MirrorBlockJob *s = op->s;
|
||||
if (ret < 0) {
|
||||
BlockDriverState *source = s->common.bs;
|
||||
BlockErrorAction action;
|
||||
|
||||
bdrv_set_dirty(source, op->sector_num, op->nb_sectors);
|
||||
action = mirror_error_action(s, false, -ret);
|
||||
if (action == BDRV_ACTION_REPORT && s->ret >= 0) {
|
||||
s->ret = ret;
|
||||
}
|
||||
}
|
||||
mirror_iteration_done(op, ret);
|
||||
}
|
||||
|
||||
static void mirror_read_complete(void *opaque, int ret)
|
||||
{
|
||||
MirrorOp *op = opaque;
|
||||
MirrorBlockJob *s = op->s;
|
||||
if (ret < 0) {
|
||||
BlockDriverState *source = s->common.bs;
|
||||
BlockErrorAction action;
|
||||
|
||||
bdrv_set_dirty(source, op->sector_num, op->nb_sectors);
|
||||
action = mirror_error_action(s, true, -ret);
|
||||
if (action == BDRV_ACTION_REPORT && s->ret >= 0) {
|
||||
s->ret = ret;
|
||||
}
|
||||
|
||||
mirror_iteration_done(op, ret);
|
||||
return;
|
||||
}
|
||||
bdrv_aio_writev(s->target, op->sector_num, &op->qiov, op->nb_sectors,
|
||||
mirror_write_complete, op);
|
||||
}
|
||||
|
||||
static void coroutine_fn mirror_iteration(MirrorBlockJob *s)
|
||||
{
|
||||
BlockDriverState *source = s->common.bs;
|
||||
BlockDriverState *target = s->target;
|
||||
QEMUIOVector qiov;
|
||||
int ret, nb_sectors;
|
||||
int64_t end;
|
||||
struct iovec iov;
|
||||
int nb_sectors, sectors_per_chunk, nb_chunks;
|
||||
int64_t end, sector_num, next_chunk, next_sector, hbitmap_next_sector;
|
||||
MirrorOp *op;
|
||||
|
||||
s->sector_num = hbitmap_iter_next(&s->hbi);
|
||||
if (s->sector_num < 0) {
|
||||
bdrv_dirty_iter_init(source, &s->hbi);
|
||||
s->sector_num = hbitmap_iter_next(&s->hbi);
|
||||
trace_mirror_restart_iter(s, bdrv_get_dirty_count(source));
|
||||
assert(s->sector_num >= 0);
|
||||
}
|
||||
|
||||
hbitmap_next_sector = s->sector_num;
|
||||
sector_num = s->sector_num;
|
||||
sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS;
|
||||
end = s->common.len >> BDRV_SECTOR_BITS;
|
||||
s->sector_num = bdrv_get_next_dirty(source, s->sector_num);
|
||||
nb_sectors = MIN(BDRV_SECTORS_PER_DIRTY_CHUNK, end - s->sector_num);
|
||||
bdrv_reset_dirty(source, s->sector_num, nb_sectors);
|
||||
|
||||
/* Extend the QEMUIOVector to include all adjacent blocks that will
|
||||
* be copied in this operation.
|
||||
*
|
||||
* We have to do this if we have no backing file yet in the destination,
|
||||
* and the cluster size is very large. Then we need to do COW ourselves.
|
||||
* The first time a cluster is copied, copy it entirely. Note that,
|
||||
* because both the granularity and the cluster size are powers of two,
|
||||
* the number of sectors to copy cannot exceed one cluster.
|
||||
*
|
||||
* We also want to extend the QEMUIOVector to include more adjacent
|
||||
* dirty blocks if possible, to limit the number of I/O operations and
|
||||
* run efficiently even with a small granularity.
|
||||
*/
|
||||
nb_chunks = 0;
|
||||
nb_sectors = 0;
|
||||
next_sector = sector_num;
|
||||
next_chunk = sector_num / sectors_per_chunk;
|
||||
|
||||
/* Wait for I/O to this cluster (from a previous iteration) to be done. */
|
||||
while (test_bit(next_chunk, s->in_flight_bitmap)) {
|
||||
trace_mirror_yield_in_flight(s, sector_num, s->in_flight);
|
||||
qemu_coroutine_yield();
|
||||
}
|
||||
|
||||
do {
|
||||
int added_sectors, added_chunks;
|
||||
|
||||
if (!bdrv_get_dirty(source, next_sector) ||
|
||||
test_bit(next_chunk, s->in_flight_bitmap)) {
|
||||
assert(nb_sectors > 0);
|
||||
break;
|
||||
}
|
||||
|
||||
added_sectors = sectors_per_chunk;
|
||||
if (s->cow_bitmap && !test_bit(next_chunk, s->cow_bitmap)) {
|
||||
bdrv_round_to_clusters(s->target,
|
||||
next_sector, added_sectors,
|
||||
&next_sector, &added_sectors);
|
||||
|
||||
/* On the first iteration, the rounding may make us copy
|
||||
* sectors before the first dirty one.
|
||||
*/
|
||||
if (next_sector < sector_num) {
|
||||
assert(nb_sectors == 0);
|
||||
sector_num = next_sector;
|
||||
next_chunk = next_sector / sectors_per_chunk;
|
||||
}
|
||||
}
|
||||
|
||||
added_sectors = MIN(added_sectors, end - (sector_num + nb_sectors));
|
||||
added_chunks = (added_sectors + sectors_per_chunk - 1) / sectors_per_chunk;
|
||||
|
||||
/* When doing COW, it may happen that there is not enough space for
|
||||
* a full cluster. Wait if that is the case.
|
||||
*/
|
||||
while (nb_chunks == 0 && s->buf_free_count < added_chunks) {
|
||||
trace_mirror_yield_buf_busy(s, nb_chunks, s->in_flight);
|
||||
qemu_coroutine_yield();
|
||||
}
|
||||
if (s->buf_free_count < nb_chunks + added_chunks) {
|
||||
trace_mirror_break_buf_busy(s, nb_chunks, s->in_flight);
|
||||
break;
|
||||
}
|
||||
|
||||
/* We have enough free space to copy these sectors. */
|
||||
bitmap_set(s->in_flight_bitmap, next_chunk, added_chunks);
|
||||
|
||||
nb_sectors += added_sectors;
|
||||
nb_chunks += added_chunks;
|
||||
next_sector += added_sectors;
|
||||
next_chunk += added_chunks;
|
||||
} while (next_sector < end);
|
||||
|
||||
/* Allocate a MirrorOp that is used as an AIO callback. */
|
||||
op = g_slice_new(MirrorOp);
|
||||
op->s = s;
|
||||
op->sector_num = sector_num;
|
||||
op->nb_sectors = nb_sectors;
|
||||
|
||||
/* Now make a QEMUIOVector taking enough granularity-sized chunks
|
||||
* from s->buf_free.
|
||||
*/
|
||||
qemu_iovec_init(&op->qiov, nb_chunks);
|
||||
next_sector = sector_num;
|
||||
while (nb_chunks-- > 0) {
|
||||
MirrorBuffer *buf = QSIMPLEQ_FIRST(&s->buf_free);
|
||||
QSIMPLEQ_REMOVE_HEAD(&s->buf_free, next);
|
||||
s->buf_free_count--;
|
||||
qemu_iovec_add(&op->qiov, buf, s->granularity);
|
||||
|
||||
/* Advance the HBitmapIter in parallel, so that we do not examine
|
||||
* the same sector twice.
|
||||
*/
|
||||
if (next_sector > hbitmap_next_sector && bdrv_get_dirty(source, next_sector)) {
|
||||
hbitmap_next_sector = hbitmap_iter_next(&s->hbi);
|
||||
}
|
||||
|
||||
next_sector += sectors_per_chunk;
|
||||
}
|
||||
|
||||
bdrv_reset_dirty(source, sector_num, nb_sectors);
|
||||
|
||||
/* Copy the dirty cluster. */
|
||||
iov.iov_base = s->buf;
|
||||
iov.iov_len = nb_sectors * 512;
|
||||
qemu_iovec_init_external(&qiov, &iov, 1);
|
||||
s->in_flight++;
|
||||
trace_mirror_one_iteration(s, sector_num, nb_sectors);
|
||||
bdrv_aio_readv(source, sector_num, &op->qiov, nb_sectors,
|
||||
mirror_read_complete, op);
|
||||
}
|
||||
|
||||
trace_mirror_one_iteration(s, s->sector_num, nb_sectors);
|
||||
ret = bdrv_co_readv(source, s->sector_num, nb_sectors, &qiov);
|
||||
if (ret < 0) {
|
||||
*p_action = mirror_error_action(s, true, -ret);
|
||||
goto fail;
|
||||
}
|
||||
ret = bdrv_co_writev(target, s->sector_num, nb_sectors, &qiov);
|
||||
if (ret < 0) {
|
||||
*p_action = mirror_error_action(s, false, -ret);
|
||||
s->synced = false;
|
||||
goto fail;
|
||||
}
|
||||
return 0;
|
||||
static void mirror_free_init(MirrorBlockJob *s)
|
||||
{
|
||||
int granularity = s->granularity;
|
||||
size_t buf_size = s->buf_size;
|
||||
uint8_t *buf = s->buf;
|
||||
|
||||
fail:
|
||||
/* Try again later. */
|
||||
bdrv_set_dirty(source, s->sector_num, nb_sectors);
|
||||
return ret;
|
||||
assert(s->buf_free_count == 0);
|
||||
QSIMPLEQ_INIT(&s->buf_free);
|
||||
while (buf_size != 0) {
|
||||
MirrorBuffer *cur = (MirrorBuffer *)buf;
|
||||
QSIMPLEQ_INSERT_TAIL(&s->buf_free, cur, next);
|
||||
s->buf_free_count++;
|
||||
buf_size -= granularity;
|
||||
buf += granularity;
|
||||
}
|
||||
}
|
||||
|
||||
static void mirror_drain(MirrorBlockJob *s)
|
||||
{
|
||||
while (s->in_flight > 0) {
|
||||
qemu_coroutine_yield();
|
||||
}
|
||||
}
|
||||
|
||||
static void coroutine_fn mirror_run(void *opaque)
|
||||
{
|
||||
MirrorBlockJob *s = opaque;
|
||||
BlockDriverState *bs = s->common.bs;
|
||||
int64_t sector_num, end;
|
||||
int64_t sector_num, end, sectors_per_chunk, length;
|
||||
uint64_t last_pause_ns;
|
||||
BlockDriverInfo bdi;
|
||||
char backing_filename[1024];
|
||||
int ret = 0;
|
||||
int n;
|
||||
|
||||
@ -105,20 +305,39 @@ static void coroutine_fn mirror_run(void *opaque)
|
||||
}
|
||||
|
||||
s->common.len = bdrv_getlength(bs);
|
||||
if (s->common.len < 0) {
|
||||
if (s->common.len <= 0) {
|
||||
block_job_completed(&s->common, s->common.len);
|
||||
return;
|
||||
}
|
||||
|
||||
length = (bdrv_getlength(bs) + s->granularity - 1) / s->granularity;
|
||||
s->in_flight_bitmap = bitmap_new(length);
|
||||
|
||||
/* If we have no backing file yet in the destination, we cannot let
|
||||
* the destination do COW. Instead, we copy sectors around the
|
||||
* dirty data if needed. We need a bitmap to do that.
|
||||
*/
|
||||
bdrv_get_backing_filename(s->target, backing_filename,
|
||||
sizeof(backing_filename));
|
||||
if (backing_filename[0] && !s->target->backing_hd) {
|
||||
bdrv_get_info(s->target, &bdi);
|
||||
if (s->granularity < bdi.cluster_size) {
|
||||
s->buf_size = MAX(s->buf_size, bdi.cluster_size);
|
||||
s->cow_bitmap = bitmap_new(length);
|
||||
}
|
||||
}
|
||||
|
||||
end = s->common.len >> BDRV_SECTOR_BITS;
|
||||
s->buf = qemu_blockalign(bs, BLOCK_SIZE);
|
||||
s->buf = qemu_blockalign(bs, s->buf_size);
|
||||
sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS;
|
||||
mirror_free_init(s);
|
||||
|
||||
if (s->mode != MIRROR_SYNC_MODE_NONE) {
|
||||
/* First part, loop on the sectors and initialize the dirty bitmap. */
|
||||
BlockDriverState *base;
|
||||
base = s->mode == MIRROR_SYNC_MODE_FULL ? NULL : bs->backing_hd;
|
||||
for (sector_num = 0; sector_num < end; ) {
|
||||
int64_t next = (sector_num | (BDRV_SECTORS_PER_DIRTY_CHUNK - 1)) + 1;
|
||||
int64_t next = (sector_num | (sectors_per_chunk - 1)) + 1;
|
||||
ret = bdrv_co_is_allocated_above(bs, base,
|
||||
sector_num, next - sector_num, &n);
|
||||
|
||||
@ -136,24 +355,40 @@ static void coroutine_fn mirror_run(void *opaque)
|
||||
}
|
||||
}
|
||||
|
||||
s->sector_num = -1;
|
||||
bdrv_dirty_iter_init(bs, &s->hbi);
|
||||
last_pause_ns = qemu_get_clock_ns(rt_clock);
|
||||
for (;;) {
|
||||
uint64_t delay_ns;
|
||||
int64_t cnt;
|
||||
bool should_complete;
|
||||
|
||||
if (s->ret < 0) {
|
||||
ret = s->ret;
|
||||
goto immediate_exit;
|
||||
}
|
||||
|
||||
cnt = bdrv_get_dirty_count(bs);
|
||||
if (cnt != 0) {
|
||||
BlockErrorAction action = BDRV_ACTION_REPORT;
|
||||
ret = mirror_iteration(s, &action);
|
||||
if (ret < 0 && action == BDRV_ACTION_REPORT) {
|
||||
goto immediate_exit;
|
||||
|
||||
/* Note that even when no rate limit is applied we need to yield
|
||||
* periodically with no pending I/O so that qemu_aio_flush() returns.
|
||||
* We do so every SLICE_TIME nanoseconds, or when there is an error,
|
||||
* or when the source is clean, whichever comes first.
|
||||
*/
|
||||
if (qemu_get_clock_ns(rt_clock) - last_pause_ns < SLICE_TIME &&
|
||||
s->common.iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
|
||||
if (s->in_flight == MAX_IN_FLIGHT || s->buf_free_count == 0 ||
|
||||
(cnt == 0 && s->in_flight > 0)) {
|
||||
trace_mirror_yield(s, s->in_flight, s->buf_free_count, cnt);
|
||||
qemu_coroutine_yield();
|
||||
continue;
|
||||
} else if (cnt != 0) {
|
||||
mirror_iteration(s);
|
||||
continue;
|
||||
}
|
||||
cnt = bdrv_get_dirty_count(bs);
|
||||
}
|
||||
|
||||
should_complete = false;
|
||||
if (cnt == 0) {
|
||||
if (s->in_flight == 0 && cnt == 0) {
|
||||
trace_mirror_before_flush(s);
|
||||
ret = bdrv_flush(s->target);
|
||||
if (ret < 0) {
|
||||
@ -196,23 +431,20 @@ static void coroutine_fn mirror_run(void *opaque)
|
||||
trace_mirror_before_sleep(s, cnt, s->synced);
|
||||
if (!s->synced) {
|
||||
/* Publish progress */
|
||||
s->common.offset = end * BDRV_SECTOR_SIZE - cnt * BLOCK_SIZE;
|
||||
s->common.offset = (end - cnt) * BDRV_SECTOR_SIZE;
|
||||
|
||||
if (s->common.speed) {
|
||||
delay_ns = ratelimit_calculate_delay(&s->limit, BDRV_SECTORS_PER_DIRTY_CHUNK);
|
||||
delay_ns = ratelimit_calculate_delay(&s->limit, sectors_per_chunk);
|
||||
} else {
|
||||
delay_ns = 0;
|
||||
}
|
||||
|
||||
/* Note that even when no rate limit is applied we need to yield
|
||||
* with no pending I/O here so that bdrv_drain_all() returns.
|
||||
*/
|
||||
block_job_sleep_ns(&s->common, rt_clock, delay_ns);
|
||||
if (block_job_is_cancelled(&s->common)) {
|
||||
break;
|
||||
}
|
||||
} else if (!should_complete) {
|
||||
delay_ns = (cnt == 0 ? SLICE_TIME : 0);
|
||||
delay_ns = (s->in_flight == 0 && cnt == 0 ? SLICE_TIME : 0);
|
||||
block_job_sleep_ns(&s->common, rt_clock, delay_ns);
|
||||
} else if (cnt == 0) {
|
||||
/* The two disks are in sync. Exit and report successful
|
||||
@ -222,11 +454,24 @@ static void coroutine_fn mirror_run(void *opaque)
|
||||
s->common.cancelled = false;
|
||||
break;
|
||||
}
|
||||
last_pause_ns = qemu_get_clock_ns(rt_clock);
|
||||
}
|
||||
|
||||
immediate_exit:
|
||||
if (s->in_flight > 0) {
|
||||
/* We get here only if something went wrong. Either the job failed,
|
||||
* or it was cancelled prematurely so that we do not guarantee that
|
||||
* the target is a copy of the source.
|
||||
*/
|
||||
assert(ret < 0 || (!s->synced && block_job_is_cancelled(&s->common)));
|
||||
mirror_drain(s);
|
||||
}
|
||||
|
||||
assert(s->in_flight == 0);
|
||||
qemu_vfree(s->buf);
|
||||
bdrv_set_dirty_tracking(bs, false);
|
||||
g_free(s->cow_bitmap);
|
||||
g_free(s->in_flight_bitmap);
|
||||
bdrv_set_dirty_tracking(bs, 0);
|
||||
bdrv_iostatus_disable(s->target);
|
||||
if (s->should_complete && ret == 0) {
|
||||
if (bdrv_get_flags(s->target) != bdrv_get_flags(s->common.bs)) {
|
||||
@ -288,14 +533,28 @@ static BlockJobType mirror_job_type = {
|
||||
};
|
||||
|
||||
void mirror_start(BlockDriverState *bs, BlockDriverState *target,
|
||||
int64_t speed, MirrorSyncMode mode,
|
||||
BlockdevOnError on_source_error,
|
||||
int64_t speed, int64_t granularity, int64_t buf_size,
|
||||
MirrorSyncMode mode, BlockdevOnError on_source_error,
|
||||
BlockdevOnError on_target_error,
|
||||
BlockDriverCompletionFunc *cb,
|
||||
void *opaque, Error **errp)
|
||||
{
|
||||
MirrorBlockJob *s;
|
||||
|
||||
if (granularity == 0) {
|
||||
/* Choose the default granularity based on the target file's cluster
|
||||
* size, clamped between 4k and 64k. */
|
||||
BlockDriverInfo bdi;
|
||||
if (bdrv_get_info(target, &bdi) >= 0 && bdi.cluster_size != 0) {
|
||||
granularity = MAX(4096, bdi.cluster_size);
|
||||
granularity = MIN(65536, granularity);
|
||||
} else {
|
||||
granularity = 65536;
|
||||
}
|
||||
}
|
||||
|
||||
assert ((granularity & (granularity - 1)) == 0);
|
||||
|
||||
if ((on_source_error == BLOCKDEV_ON_ERROR_STOP ||
|
||||
on_source_error == BLOCKDEV_ON_ERROR_ENOSPC) &&
|
||||
!bdrv_iostatus_is_enabled(bs)) {
|
||||
@ -312,7 +571,10 @@ void mirror_start(BlockDriverState *bs, BlockDriverState *target,
|
||||
s->on_target_error = on_target_error;
|
||||
s->target = target;
|
||||
s->mode = mode;
|
||||
bdrv_set_dirty_tracking(bs, true);
|
||||
s->granularity = granularity;
|
||||
s->buf_size = MAX(buf_size, granularity);
|
||||
|
||||
bdrv_set_dirty_tracking(bs, granularity);
|
||||
bdrv_set_enable_write_cache(s->target, true);
|
||||
bdrv_set_on_error(s->target, on_target_error, on_target_error);
|
||||
bdrv_iostatus_enable(s->target);
|
||||
|
@ -112,7 +112,7 @@ static int qcow_open(BlockDriverState *bs, int flags)
|
||||
be64_to_cpus(&header.l1_table_offset);
|
||||
|
||||
if (header.magic != QCOW_MAGIC) {
|
||||
ret = -EINVAL;
|
||||
ret = -EMEDIUMTYPE;
|
||||
goto fail;
|
||||
}
|
||||
if (header.version != QCOW_VERSION) {
|
||||
|
@ -311,7 +311,7 @@ static int qcow2_open(BlockDriverState *bs, int flags)
|
||||
be32_to_cpus(&header.nb_snapshots);
|
||||
|
||||
if (header.magic != QCOW_MAGIC) {
|
||||
ret = -EINVAL;
|
||||
ret = -EMEDIUMTYPE;
|
||||
goto fail;
|
||||
}
|
||||
if (header.version < 2 || header.version > 3) {
|
||||
|
@ -390,7 +390,7 @@ static int bdrv_qed_open(BlockDriverState *bs, int flags)
|
||||
qed_header_le_to_cpu(&le_header, &s->header);
|
||||
|
||||
if (s->header.magic != QED_MAGIC) {
|
||||
return -EINVAL;
|
||||
return -EMEDIUMTYPE;
|
||||
}
|
||||
if (s->header.features & ~QED_FEATURE_MASK) {
|
||||
/* image uses unsupported feature bits */
|
||||
|
25
block/vdi.c
25
block/vdi.c
@ -246,7 +246,7 @@ static void vdi_header_print(VdiHeader *header)
|
||||
{
|
||||
char uuid[37];
|
||||
logout("text %s", header->text);
|
||||
logout("signature 0x%04x\n", header->signature);
|
||||
logout("signature 0x%08x\n", header->signature);
|
||||
logout("header size 0x%04x\n", header->header_size);
|
||||
logout("image type 0x%04x\n", header->image_type);
|
||||
logout("image flags 0x%04x\n", header->image_flags);
|
||||
@ -369,10 +369,12 @@ static int vdi_open(BlockDriverState *bs, int flags)
|
||||
BDRVVdiState *s = bs->opaque;
|
||||
VdiHeader header;
|
||||
size_t bmap_size;
|
||||
int ret;
|
||||
|
||||
logout("\n");
|
||||
|
||||
if (bdrv_read(bs->file, 0, (uint8_t *)&header, 1) < 0) {
|
||||
ret = bdrv_read(bs->file, 0, (uint8_t *)&header, 1);
|
||||
if (ret < 0) {
|
||||
goto fail;
|
||||
}
|
||||
|
||||
@ -390,33 +392,45 @@ static int vdi_open(BlockDriverState *bs, int flags)
|
||||
header.disk_size &= ~(SECTOR_SIZE - 1);
|
||||
}
|
||||
|
||||
if (header.version != VDI_VERSION_1_1) {
|
||||
if (header.signature != VDI_SIGNATURE) {
|
||||
logout("bad vdi signature %08x\n", header.signature);
|
||||
ret = -EMEDIUMTYPE;
|
||||
goto fail;
|
||||
} else if (header.version != VDI_VERSION_1_1) {
|
||||
logout("unsupported version %u.%u\n",
|
||||
header.version >> 16, header.version & 0xffff);
|
||||
ret = -ENOTSUP;
|
||||
goto fail;
|
||||
} else if (header.offset_bmap % SECTOR_SIZE != 0) {
|
||||
/* We only support block maps which start on a sector boundary. */
|
||||
logout("unsupported block map offset 0x%x B\n", header.offset_bmap);
|
||||
ret = -ENOTSUP;
|
||||
goto fail;
|
||||
} else if (header.offset_data % SECTOR_SIZE != 0) {
|
||||
/* We only support data blocks which start on a sector boundary. */
|
||||
logout("unsupported data offset 0x%x B\n", header.offset_data);
|
||||
ret = -ENOTSUP;
|
||||
goto fail;
|
||||
} else if (header.sector_size != SECTOR_SIZE) {
|
||||
logout("unsupported sector size %u B\n", header.sector_size);
|
||||
ret = -ENOTSUP;
|
||||
goto fail;
|
||||
} else if (header.block_size != 1 * MiB) {
|
||||
logout("unsupported block size %u B\n", header.block_size);
|
||||
ret = -ENOTSUP;
|
||||
goto fail;
|
||||
} else if (header.disk_size >
|
||||
(uint64_t)header.blocks_in_image * header.block_size) {
|
||||
logout("unsupported disk size %" PRIu64 " B\n", header.disk_size);
|
||||
ret = -ENOTSUP;
|
||||
goto fail;
|
||||
} else if (!uuid_is_null(header.uuid_link)) {
|
||||
logout("link uuid != 0, unsupported\n");
|
||||
ret = -ENOTSUP;
|
||||
goto fail;
|
||||
} else if (!uuid_is_null(header.uuid_parent)) {
|
||||
logout("parent uuid != 0, unsupported\n");
|
||||
ret = -ENOTSUP;
|
||||
goto fail;
|
||||
}
|
||||
|
||||
@ -432,7 +446,8 @@ static int vdi_open(BlockDriverState *bs, int flags)
|
||||
if (bmap_size > 0) {
|
||||
s->bmap = g_malloc(bmap_size * SECTOR_SIZE);
|
||||
}
|
||||
if (bdrv_read(bs->file, s->bmap_sector, (uint8_t *)s->bmap, bmap_size) < 0) {
|
||||
ret = bdrv_read(bs->file, s->bmap_sector, (uint8_t *)s->bmap, bmap_size);
|
||||
if (ret < 0) {
|
||||
goto fail_free_bmap;
|
||||
}
|
||||
|
||||
@ -448,7 +463,7 @@ static int vdi_open(BlockDriverState *bs, int flags)
|
||||
g_free(s->bmap);
|
||||
|
||||
fail:
|
||||
return -1;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int vdi_reopen_prepare(BDRVReopenState *state,
|
||||
|
@ -616,7 +616,7 @@ static int vmdk_open_sparse(BlockDriverState *bs,
|
||||
return vmdk_open_vmdk4(bs, file, flags);
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
return -EMEDIUMTYPE;
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -718,7 +718,7 @@ static int vmdk_open_desc_file(BlockDriverState *bs, int flags,
|
||||
}
|
||||
buf[2047] = '\0';
|
||||
if (vmdk_parse_description(buf, "createType", ct, sizeof(ct))) {
|
||||
return -EINVAL;
|
||||
return -EMEDIUMTYPE;
|
||||
}
|
||||
if (strcmp(ct, "monolithicFlat") &&
|
||||
strcmp(ct, "twoGbMaxExtentSparse") &&
|
||||
|
52
blockdev.c
52
blockdev.c
@ -617,8 +617,13 @@ DriveInfo *drive_init(QemuOpts *opts, BlockInterfaceType block_default_type)
|
||||
|
||||
ret = bdrv_open(dinfo->bdrv, file, bdrv_flags, drv);
|
||||
if (ret < 0) {
|
||||
error_report("could not open disk image %s: %s",
|
||||
file, strerror(-ret));
|
||||
if (ret == -EMEDIUMTYPE) {
|
||||
error_report("could not open disk image %s: not in %s format",
|
||||
file, drv->format_name);
|
||||
} else {
|
||||
error_report("could not open disk image %s: %s",
|
||||
file, strerror(-ret));
|
||||
}
|
||||
goto err;
|
||||
}
|
||||
|
||||
@ -1184,16 +1189,19 @@ void qmp_block_commit(const char *device,
|
||||
drive_get_ref(drive_get_by_blockdev(bs));
|
||||
}
|
||||
|
||||
#define DEFAULT_MIRROR_BUF_SIZE (10 << 20)
|
||||
|
||||
void qmp_drive_mirror(const char *device, const char *target,
|
||||
bool has_format, const char *format,
|
||||
enum MirrorSyncMode sync,
|
||||
bool has_mode, enum NewImageMode mode,
|
||||
bool has_speed, int64_t speed,
|
||||
bool has_granularity, uint32_t granularity,
|
||||
bool has_buf_size, int64_t buf_size,
|
||||
bool has_on_source_error, BlockdevOnError on_source_error,
|
||||
bool has_on_target_error, BlockdevOnError on_target_error,
|
||||
Error **errp)
|
||||
{
|
||||
BlockDriverInfo bdi;
|
||||
BlockDriverState *bs;
|
||||
BlockDriverState *source, *target_bs;
|
||||
BlockDriver *proto_drv;
|
||||
@ -1215,6 +1223,21 @@ void qmp_drive_mirror(const char *device, const char *target,
|
||||
if (!has_mode) {
|
||||
mode = NEW_IMAGE_MODE_ABSOLUTE_PATHS;
|
||||
}
|
||||
if (!has_granularity) {
|
||||
granularity = 0;
|
||||
}
|
||||
if (!has_buf_size) {
|
||||
buf_size = DEFAULT_MIRROR_BUF_SIZE;
|
||||
}
|
||||
|
||||
if (granularity != 0 && (granularity < 512 || granularity > 1048576 * 64)) {
|
||||
error_set(errp, QERR_INVALID_PARAMETER, device);
|
||||
return;
|
||||
}
|
||||
if (granularity & (granularity - 1)) {
|
||||
error_set(errp, QERR_INVALID_PARAMETER, device);
|
||||
return;
|
||||
}
|
||||
|
||||
bs = bdrv_find(device);
|
||||
if (!bs) {
|
||||
@ -1255,11 +1278,11 @@ void qmp_drive_mirror(const char *device, const char *target,
|
||||
return;
|
||||
}
|
||||
|
||||
bdrv_get_geometry(bs, &size);
|
||||
size *= 512;
|
||||
if (sync == MIRROR_SYNC_MODE_FULL && mode != NEW_IMAGE_MODE_EXISTING) {
|
||||
/* create new image w/o backing file */
|
||||
assert(format && drv);
|
||||
bdrv_get_geometry(bs, &size);
|
||||
size *= 512;
|
||||
bdrv_img_create(target, format,
|
||||
NULL, NULL, NULL, size, flags, &local_err);
|
||||
} else {
|
||||
@ -1272,7 +1295,7 @@ void qmp_drive_mirror(const char *device, const char *target,
|
||||
bdrv_img_create(target, format,
|
||||
source->filename,
|
||||
source->drv->format_name,
|
||||
NULL, -1, flags, &local_err);
|
||||
NULL, size, flags, &local_err);
|
||||
break;
|
||||
default:
|
||||
abort();
|
||||
@ -1284,6 +1307,9 @@ void qmp_drive_mirror(const char *device, const char *target,
|
||||
return;
|
||||
}
|
||||
|
||||
/* Mirroring takes care of copy-on-write using the source's backing
|
||||
* file.
|
||||
*/
|
||||
target_bs = bdrv_new("");
|
||||
ret = bdrv_open(target_bs, target, flags | BDRV_O_NO_BACKING, drv);
|
||||
|
||||
@ -1293,18 +1319,8 @@ void qmp_drive_mirror(const char *device, const char *target,
|
||||
return;
|
||||
}
|
||||
|
||||
/* We need a backing file if we will copy parts of a cluster. */
|
||||
if (bdrv_get_info(target_bs, &bdi) >= 0 && bdi.cluster_size != 0 &&
|
||||
bdi.cluster_size >= BDRV_SECTORS_PER_DIRTY_CHUNK * 512) {
|
||||
ret = bdrv_open_backing_file(target_bs);
|
||||
if (ret < 0) {
|
||||
bdrv_delete(target_bs);
|
||||
error_set(errp, QERR_OPEN_FILE_FAILED, target);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
mirror_start(bs, target_bs, speed, sync, on_source_error, on_target_error,
|
||||
mirror_start(bs, target_bs, speed, granularity, buf_size, sync,
|
||||
on_source_error, on_target_error,
|
||||
block_job_cb, bs, &local_err);
|
||||
if (local_err != NULL) {
|
||||
bdrv_delete(target_bs);
|
||||
|
2
hmp.c
2
hmp.c
@ -808,7 +808,7 @@ void hmp_drive_mirror(Monitor *mon, const QDict *qdict)
|
||||
|
||||
qmp_drive_mirror(device, filename, !!format, format,
|
||||
full ? MIRROR_SYNC_MODE_FULL : MIRROR_SYNC_MODE_TOP,
|
||||
true, mode, false, 0,
|
||||
true, mode, false, 0, false, 0, false, 0,
|
||||
false, 0, false, 0, &errp);
|
||||
hmp_handle_error(mon, &errp);
|
||||
}
|
||||
|
@ -241,7 +241,7 @@ static void ahci_port_write(AHCIState *s, int port, int offset, uint32_t val)
|
||||
if ((pr->cmd & PORT_CMD_FIS_ON) &&
|
||||
!s->dev[port].init_d2h_sent) {
|
||||
ahci_init_d2h(&s->dev[port]);
|
||||
s->dev[port].init_d2h_sent = 1;
|
||||
s->dev[port].init_d2h_sent = true;
|
||||
}
|
||||
|
||||
check_cmd(s, port);
|
||||
@ -494,7 +494,7 @@ static void ahci_reset_port(AHCIState *s, int port)
|
||||
pr->scr_err = 0;
|
||||
pr->scr_act = 0;
|
||||
d->busy_slot = -1;
|
||||
d->init_d2h_sent = 0;
|
||||
d->init_d2h_sent = false;
|
||||
|
||||
ide_state = &s->dev[port].port.ifs[0];
|
||||
if (!ide_state->bs) {
|
||||
@ -946,7 +946,7 @@ static int handle_cmd(AHCIState *s, int port, int slot)
|
||||
ide_state->hcyl = 0xeb;
|
||||
debug_print_fis(ide_state->io_buffer, 0x10);
|
||||
ide_state->feature = IDE_FEATURE_DMA;
|
||||
s->dev[port].done_atapi_packet = 0;
|
||||
s->dev[port].done_atapi_packet = false;
|
||||
/* XXX send PIO setup FIS */
|
||||
}
|
||||
|
||||
@ -991,7 +991,7 @@ static int ahci_start_transfer(IDEDMA *dma)
|
||||
|
||||
if (is_atapi && !ad->done_atapi_packet) {
|
||||
/* already prepopulated iobuffer */
|
||||
ad->done_atapi_packet = 1;
|
||||
ad->done_atapi_packet = true;
|
||||
goto out;
|
||||
}
|
||||
|
||||
@ -1035,11 +1035,10 @@ out:
|
||||
static void ahci_start_dma(IDEDMA *dma, IDEState *s,
|
||||
BlockDriverCompletionFunc *dma_cb)
|
||||
{
|
||||
#ifdef DEBUG_AHCI
|
||||
AHCIDevice *ad = DO_UPCAST(AHCIDevice, dma, dma);
|
||||
|
||||
#endif
|
||||
DPRINTF(ad->port_no, "\n");
|
||||
ad->dma_cb = dma_cb;
|
||||
ad->dma_status |= BM_STATUS_DMAING;
|
||||
s->io_buffer_offset = 0;
|
||||
dma_cb(s, 0);
|
||||
}
|
||||
@ -1095,7 +1094,6 @@ static int ahci_dma_set_unit(IDEDMA *dma, int unit)
|
||||
static int ahci_dma_add_status(IDEDMA *dma, int status)
|
||||
{
|
||||
AHCIDevice *ad = DO_UPCAST(AHCIDevice, dma, dma);
|
||||
ad->dma_status |= status;
|
||||
DPRINTF(ad->port_no, "set status: %x\n", status);
|
||||
|
||||
if (status & BM_STATUS_INT) {
|
||||
@ -1114,8 +1112,6 @@ static int ahci_dma_set_inactive(IDEDMA *dma)
|
||||
/* update d2h status */
|
||||
ahci_write_fis_d2h(ad, NULL);
|
||||
|
||||
ad->dma_cb = NULL;
|
||||
|
||||
if (!ad->check_bh) {
|
||||
/* maybe we still have something to process, check later */
|
||||
ad->check_bh = qemu_bh_new(ahci_check_cmd_bh, ad);
|
||||
@ -1203,6 +1199,82 @@ void ahci_reset(AHCIState *s)
|
||||
}
|
||||
}
|
||||
|
||||
static const VMStateDescription vmstate_ahci_device = {
|
||||
.name = "ahci port",
|
||||
.version_id = 1,
|
||||
.fields = (VMStateField []) {
|
||||
VMSTATE_IDE_BUS(port, AHCIDevice),
|
||||
VMSTATE_UINT32(port_state, AHCIDevice),
|
||||
VMSTATE_UINT32(finished, AHCIDevice),
|
||||
VMSTATE_UINT32(port_regs.lst_addr, AHCIDevice),
|
||||
VMSTATE_UINT32(port_regs.lst_addr_hi, AHCIDevice),
|
||||
VMSTATE_UINT32(port_regs.fis_addr, AHCIDevice),
|
||||
VMSTATE_UINT32(port_regs.fis_addr_hi, AHCIDevice),
|
||||
VMSTATE_UINT32(port_regs.irq_stat, AHCIDevice),
|
||||
VMSTATE_UINT32(port_regs.irq_mask, AHCIDevice),
|
||||
VMSTATE_UINT32(port_regs.cmd, AHCIDevice),
|
||||
VMSTATE_UINT32(port_regs.tfdata, AHCIDevice),
|
||||
VMSTATE_UINT32(port_regs.sig, AHCIDevice),
|
||||
VMSTATE_UINT32(port_regs.scr_stat, AHCIDevice),
|
||||
VMSTATE_UINT32(port_regs.scr_ctl, AHCIDevice),
|
||||
VMSTATE_UINT32(port_regs.scr_err, AHCIDevice),
|
||||
VMSTATE_UINT32(port_regs.scr_act, AHCIDevice),
|
||||
VMSTATE_UINT32(port_regs.cmd_issue, AHCIDevice),
|
||||
VMSTATE_BOOL(done_atapi_packet, AHCIDevice),
|
||||
VMSTATE_INT32(busy_slot, AHCIDevice),
|
||||
VMSTATE_BOOL(init_d2h_sent, AHCIDevice),
|
||||
VMSTATE_END_OF_LIST()
|
||||
},
|
||||
};
|
||||
|
||||
static int ahci_state_post_load(void *opaque, int version_id)
|
||||
{
|
||||
int i;
|
||||
struct AHCIDevice *ad;
|
||||
AHCIState *s = opaque;
|
||||
|
||||
for (i = 0; i < s->ports; i++) {
|
||||
ad = &s->dev[i];
|
||||
AHCIPortRegs *pr = &ad->port_regs;
|
||||
|
||||
map_page(&ad->lst,
|
||||
((uint64_t)pr->lst_addr_hi << 32) | pr->lst_addr, 1024);
|
||||
map_page(&ad->res_fis,
|
||||
((uint64_t)pr->fis_addr_hi << 32) | pr->fis_addr, 256);
|
||||
/*
|
||||
* All pending i/o should be flushed out on a migrate. However,
|
||||
* we might not have cleared the busy_slot since this is done
|
||||
* in a bh. Also, issue i/o against any slots that are pending.
|
||||
*/
|
||||
if ((ad->busy_slot != -1) &&
|
||||
!(ad->port.ifs[0].status & (BUSY_STAT|DRQ_STAT))) {
|
||||
pr->cmd_issue &= ~(1 << ad->busy_slot);
|
||||
ad->busy_slot = -1;
|
||||
}
|
||||
check_cmd(s, i);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
const VMStateDescription vmstate_ahci = {
|
||||
.name = "ahci",
|
||||
.version_id = 1,
|
||||
.post_load = ahci_state_post_load,
|
||||
.fields = (VMStateField []) {
|
||||
VMSTATE_STRUCT_VARRAY_POINTER_INT32(dev, AHCIState, ports,
|
||||
vmstate_ahci_device, AHCIDevice),
|
||||
VMSTATE_UINT32(control_regs.cap, AHCIState),
|
||||
VMSTATE_UINT32(control_regs.ghc, AHCIState),
|
||||
VMSTATE_UINT32(control_regs.irqstatus, AHCIState),
|
||||
VMSTATE_UINT32(control_regs.impl, AHCIState),
|
||||
VMSTATE_UINT32(control_regs.version, AHCIState),
|
||||
VMSTATE_UINT32(idp_index, AHCIState),
|
||||
VMSTATE_INT32(ports, AHCIState),
|
||||
VMSTATE_END_OF_LIST()
|
||||
},
|
||||
};
|
||||
|
||||
typedef struct SysbusAHCIState {
|
||||
SysBusDevice busdev;
|
||||
AHCIState ahci;
|
||||
@ -1211,7 +1283,11 @@ typedef struct SysbusAHCIState {
|
||||
|
||||
static const VMStateDescription vmstate_sysbus_ahci = {
|
||||
.name = "sysbus-ahci",
|
||||
.unmigratable = 1,
|
||||
.unmigratable = 1, /* Still buggy under I/O load */
|
||||
.fields = (VMStateField []) {
|
||||
VMSTATE_AHCI(ahci, AHCIPCIState),
|
||||
VMSTATE_END_OF_LIST()
|
||||
},
|
||||
};
|
||||
|
||||
static void sysbus_ahci_reset(DeviceState *dev)
|
||||
|
@ -281,11 +281,9 @@ struct AHCIDevice {
|
||||
QEMUBH *check_bh;
|
||||
uint8_t *lst;
|
||||
uint8_t *res_fis;
|
||||
int dma_status;
|
||||
int done_atapi_packet;
|
||||
int busy_slot;
|
||||
int init_d2h_sent;
|
||||
BlockDriverCompletionFunc *dma_cb;
|
||||
bool done_atapi_packet;
|
||||
int32_t busy_slot;
|
||||
bool init_d2h_sent;
|
||||
AHCICmdHdr *cur_cmd;
|
||||
NCQTransferState ncq_tfs[AHCI_MAX_CMDS];
|
||||
};
|
||||
@ -297,7 +295,7 @@ typedef struct AHCIState {
|
||||
MemoryRegion idp; /* Index-Data Pair I/O port space */
|
||||
unsigned idp_offset; /* Offset of index in I/O port space */
|
||||
uint32_t idp_index; /* Current IDP index */
|
||||
int ports;
|
||||
int32_t ports;
|
||||
qemu_irq irq;
|
||||
DMAContext *dma;
|
||||
} AHCIState;
|
||||
@ -307,6 +305,16 @@ typedef struct AHCIPCIState {
|
||||
AHCIState ahci;
|
||||
} AHCIPCIState;
|
||||
|
||||
extern const VMStateDescription vmstate_ahci;
|
||||
|
||||
#define VMSTATE_AHCI(_field, _state) { \
|
||||
.name = (stringify(_field)), \
|
||||
.size = sizeof(AHCIState), \
|
||||
.vmsd = &vmstate_ahci, \
|
||||
.flags = VMS_STRUCT, \
|
||||
.offset = vmstate_offset_value(_state, _field, AHCIState), \
|
||||
}
|
||||
|
||||
typedef struct NCQFrame {
|
||||
uint8_t fis_type;
|
||||
uint8_t c;
|
||||
|
@ -1149,8 +1149,10 @@ void ide_exec_cmd(IDEBus *bus, uint32_t val)
|
||||
}
|
||||
ide_set_irq(s->bus);
|
||||
break;
|
||||
|
||||
case WIN_VERIFY_EXT:
|
||||
lba48 = 1;
|
||||
lba48 = 1;
|
||||
/* fall through */
|
||||
case WIN_VERIFY:
|
||||
case WIN_VERIFY_ONCE:
|
||||
/* do sector number check ? */
|
||||
@ -1158,8 +1160,10 @@ void ide_exec_cmd(IDEBus *bus, uint32_t val)
|
||||
s->status = READY_STAT | SEEK_STAT;
|
||||
ide_set_irq(s->bus);
|
||||
break;
|
||||
|
||||
case WIN_READ_EXT:
|
||||
lba48 = 1;
|
||||
lba48 = 1;
|
||||
/* fall through */
|
||||
case WIN_READ:
|
||||
case WIN_READ_ONCE:
|
||||
if (s->drive_kind == IDE_CD) {
|
||||
@ -1173,8 +1177,10 @@ void ide_exec_cmd(IDEBus *bus, uint32_t val)
|
||||
s->req_nb_sectors = 1;
|
||||
ide_sector_read(s);
|
||||
break;
|
||||
|
||||
case WIN_WRITE_EXT:
|
||||
lba48 = 1;
|
||||
lba48 = 1;
|
||||
/* fall through */
|
||||
case WIN_WRITE:
|
||||
case WIN_WRITE_ONCE:
|
||||
case CFA_WRITE_SECT_WO_ERASE:
|
||||
@ -1189,8 +1195,10 @@ void ide_exec_cmd(IDEBus *bus, uint32_t val)
|
||||
ide_transfer_start(s, s->io_buffer, 512, ide_sector_write);
|
||||
s->media_changed = 1;
|
||||
break;
|
||||
|
||||
case WIN_MULTREAD_EXT:
|
||||
lba48 = 1;
|
||||
lba48 = 1;
|
||||
/* fall through */
|
||||
case WIN_MULTREAD:
|
||||
if (!s->bs) {
|
||||
goto abort_cmd;
|
||||
@ -1202,8 +1210,10 @@ void ide_exec_cmd(IDEBus *bus, uint32_t val)
|
||||
s->req_nb_sectors = s->mult_sectors;
|
||||
ide_sector_read(s);
|
||||
break;
|
||||
|
||||
case WIN_MULTWRITE_EXT:
|
||||
lba48 = 1;
|
||||
lba48 = 1;
|
||||
/* fall through */
|
||||
case WIN_MULTWRITE:
|
||||
case CFA_WRITE_MULTI_WO_ERASE:
|
||||
if (!s->bs) {
|
||||
@ -1222,8 +1232,10 @@ void ide_exec_cmd(IDEBus *bus, uint32_t val)
|
||||
ide_transfer_start(s, s->io_buffer, 512 * n, ide_sector_write);
|
||||
s->media_changed = 1;
|
||||
break;
|
||||
|
||||
case WIN_READDMA_EXT:
|
||||
lba48 = 1;
|
||||
lba48 = 1;
|
||||
/* fall through */
|
||||
case WIN_READDMA:
|
||||
case WIN_READDMA_ONCE:
|
||||
if (!s->bs) {
|
||||
@ -1232,8 +1244,10 @@ void ide_exec_cmd(IDEBus *bus, uint32_t val)
|
||||
ide_cmd_lba48_transform(s, lba48);
|
||||
ide_sector_start_dma(s, IDE_DMA_READ);
|
||||
break;
|
||||
|
||||
case WIN_WRITEDMA_EXT:
|
||||
lba48 = 1;
|
||||
lba48 = 1;
|
||||
/* fall through */
|
||||
case WIN_WRITEDMA:
|
||||
case WIN_WRITEDMA_ONCE:
|
||||
if (!s->bs) {
|
||||
@ -1243,14 +1257,17 @@ void ide_exec_cmd(IDEBus *bus, uint32_t val)
|
||||
ide_sector_start_dma(s, IDE_DMA_WRITE);
|
||||
s->media_changed = 1;
|
||||
break;
|
||||
|
||||
case WIN_READ_NATIVE_MAX_EXT:
|
||||
lba48 = 1;
|
||||
lba48 = 1;
|
||||
/* fall through */
|
||||
case WIN_READ_NATIVE_MAX:
|
||||
ide_cmd_lba48_transform(s, lba48);
|
||||
ide_set_sector(s, s->nb_sectors - 1);
|
||||
s->status = READY_STAT | SEEK_STAT;
|
||||
ide_set_irq(s->bus);
|
||||
break;
|
||||
|
||||
case WIN_CHECKPOWERMODE1:
|
||||
case WIN_CHECKPOWERMODE2:
|
||||
s->error = 0;
|
||||
|
14
hw/ide/ich.c
14
hw/ide/ich.c
@ -79,9 +79,15 @@
|
||||
#define ICH9_IDP_INDEX 0x10
|
||||
#define ICH9_IDP_INDEX_LOG2 0x04
|
||||
|
||||
static const VMStateDescription vmstate_ahci = {
|
||||
.name = "ahci",
|
||||
.unmigratable = 1,
|
||||
static const VMStateDescription vmstate_ich9_ahci = {
|
||||
.name = "ich9_ahci",
|
||||
.unmigratable = 1, /* Still buggy under I/O load */
|
||||
.version_id = 1,
|
||||
.fields = (VMStateField []) {
|
||||
VMSTATE_PCI_DEVICE(card, AHCIPCIState),
|
||||
VMSTATE_AHCI(ahci, AHCIPCIState),
|
||||
VMSTATE_END_OF_LIST()
|
||||
},
|
||||
};
|
||||
|
||||
static void pci_ich9_reset(DeviceState *dev)
|
||||
@ -152,7 +158,7 @@ static void ich_ahci_class_init(ObjectClass *klass, void *data)
|
||||
k->device_id = PCI_DEVICE_ID_INTEL_82801IR;
|
||||
k->revision = 0x02;
|
||||
k->class_id = PCI_CLASS_STORAGE_SATA;
|
||||
dc->vmsd = &vmstate_ahci;
|
||||
dc->vmsd = &vmstate_ich9_ahci;
|
||||
dc->reset = pci_ich9_reset;
|
||||
}
|
||||
|
||||
|
@ -309,6 +309,10 @@ int bdrv_get_flags(BlockDriverState *bs);
|
||||
int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
|
||||
const uint8_t *buf, int nb_sectors);
|
||||
int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi);
|
||||
void bdrv_round_to_clusters(BlockDriverState *bs,
|
||||
int64_t sector_num, int nb_sectors,
|
||||
int64_t *cluster_sector_num,
|
||||
int *cluster_nb_sectors);
|
||||
|
||||
const char *bdrv_get_encrypted_filename(BlockDriverState *bs);
|
||||
void bdrv_get_backing_filename(BlockDriverState *bs,
|
||||
@ -351,13 +355,12 @@ void bdrv_set_buffer_alignment(BlockDriverState *bs, int align);
|
||||
void *qemu_blockalign(BlockDriverState *bs, size_t size);
|
||||
bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov);
|
||||
|
||||
#define BDRV_SECTORS_PER_DIRTY_CHUNK 2048
|
||||
|
||||
void bdrv_set_dirty_tracking(BlockDriverState *bs, int enable);
|
||||
struct HBitmapIter;
|
||||
void bdrv_set_dirty_tracking(BlockDriverState *bs, int granularity);
|
||||
int bdrv_get_dirty(BlockDriverState *bs, int64_t sector);
|
||||
void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector, int nr_sectors);
|
||||
void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector, int nr_sectors);
|
||||
int64_t bdrv_get_next_dirty(BlockDriverState *bs, int64_t sector);
|
||||
void bdrv_dirty_iter_init(BlockDriverState *bs, struct HBitmapIter *hbi);
|
||||
int64_t bdrv_get_dirty_count(BlockDriverState *bs);
|
||||
|
||||
void bdrv_enable_copy_on_read(BlockDriverState *bs);
|
||||
|
@ -32,6 +32,7 @@
|
||||
#include "qapi-types.h"
|
||||
#include "qapi/qmp/qerror.h"
|
||||
#include "monitor/monitor.h"
|
||||
#include "qemu/hbitmap.h"
|
||||
|
||||
#define BLOCK_FLAG_ENCRYPT 1
|
||||
#define BLOCK_FLAG_COMPAT6 4
|
||||
@ -275,8 +276,7 @@ struct BlockDriverState {
|
||||
bool iostatus_enabled;
|
||||
BlockDeviceIoStatus iostatus;
|
||||
char device_name[32];
|
||||
unsigned long *dirty_bitmap;
|
||||
int64_t dirty_count;
|
||||
HBitmap *dirty_bitmap;
|
||||
int in_use; /* users other than guest access, eg. block migration */
|
||||
QTAILQ_ENTRY(BlockDriverState) list;
|
||||
|
||||
@ -344,6 +344,8 @@ void commit_start(BlockDriverState *bs, BlockDriverState *base,
|
||||
* @bs: Block device to operate on.
|
||||
* @target: Block device to write to.
|
||||
* @speed: The maximum speed, in bytes per second, or 0 for unlimited.
|
||||
* @granularity: The chosen granularity for the dirty bitmap.
|
||||
* @buf_size: The amount of data that can be in flight at one time.
|
||||
* @mode: Whether to collapse all images in the chain to the target.
|
||||
* @on_source_error: The action to take upon error reading from the source.
|
||||
* @on_target_error: The action to take upon error writing to the target.
|
||||
@ -357,8 +359,8 @@ void commit_start(BlockDriverState *bs, BlockDriverState *base,
|
||||
* @bs will be switched to read from @target.
|
||||
*/
|
||||
void mirror_start(BlockDriverState *bs, BlockDriverState *target,
|
||||
int64_t speed, MirrorSyncMode mode,
|
||||
BlockdevOnError on_source_error,
|
||||
int64_t speed, int64_t granularity, int64_t buf_size,
|
||||
MirrorSyncMode mode, BlockdevOnError on_source_error,
|
||||
BlockdevOnError on_target_error,
|
||||
BlockDriverCompletionFunc *cb,
|
||||
void *opaque, Error **errp);
|
||||
|
@ -68,6 +68,9 @@
|
||||
#if !defined(ECANCELED)
|
||||
#define ECANCELED 4097
|
||||
#endif
|
||||
#if !defined(EMEDIUMTYPE)
|
||||
#define EMEDIUMTYPE 4098
|
||||
#endif
|
||||
#ifndef TIME_MAX
|
||||
#define TIME_MAX LONG_MAX
|
||||
#endif
|
||||
|
208
include/qemu/hbitmap.h
Normal file
208
include/qemu/hbitmap.h
Normal file
@ -0,0 +1,208 @@
|
||||
/*
|
||||
* Hierarchical Bitmap Data Type
|
||||
*
|
||||
* Copyright Red Hat, Inc., 2012
|
||||
*
|
||||
* Author: Paolo Bonzini <pbonzini@redhat.com>
|
||||
*
|
||||
* This work is licensed under the terms of the GNU GPL, version 2 or
|
||||
* later. See the COPYING file in the top-level directory.
|
||||
*/
|
||||
|
||||
#ifndef HBITMAP_H
|
||||
#define HBITMAP_H 1
|
||||
|
||||
#include <limits.h>
|
||||
#include <stdint.h>
|
||||
#include <stdbool.h>
|
||||
#include "bitops.h"
|
||||
|
||||
typedef struct HBitmap HBitmap;
|
||||
typedef struct HBitmapIter HBitmapIter;
|
||||
|
||||
#define BITS_PER_LEVEL (BITS_PER_LONG == 32 ? 5 : 6)
|
||||
|
||||
/* For 32-bit, the largest that fits in a 4 GiB address space.
|
||||
* For 64-bit, the number of sectors in 1 PiB. Good luck, in
|
||||
* either case... :)
|
||||
*/
|
||||
#define HBITMAP_LOG_MAX_SIZE (BITS_PER_LONG == 32 ? 34 : 41)
|
||||
|
||||
/* We need to place a sentinel in level 0 to speed up iteration. Thus,
|
||||
* we do this instead of HBITMAP_LOG_MAX_SIZE / BITS_PER_LEVEL. The
|
||||
* difference is that it allocates an extra level when HBITMAP_LOG_MAX_SIZE
|
||||
* is an exact multiple of BITS_PER_LEVEL.
|
||||
*/
|
||||
#define HBITMAP_LEVELS ((HBITMAP_LOG_MAX_SIZE / BITS_PER_LEVEL) + 1)
|
||||
|
||||
struct HBitmapIter {
|
||||
const HBitmap *hb;
|
||||
|
||||
/* Copied from hb for access in the inline functions (hb is opaque). */
|
||||
int granularity;
|
||||
|
||||
/* Entry offset into the last-level array of longs. */
|
||||
size_t pos;
|
||||
|
||||
/* The currently-active path in the tree. Each item of cur[i] stores
|
||||
* the bits (i.e. the subtrees) yet to be processed under that node.
|
||||
*/
|
||||
unsigned long cur[HBITMAP_LEVELS];
|
||||
};
|
||||
|
||||
/**
|
||||
* hbitmap_alloc:
|
||||
* @size: Number of bits in the bitmap.
|
||||
* @granularity: Granularity of the bitmap. Aligned groups of 2^@granularity
|
||||
* bits will be represented by a single bit. Each operation on a
|
||||
* range of bits first rounds the bits to determine which group they land
|
||||
* in, and then affect the entire set; iteration will only visit the first
|
||||
* bit of each group.
|
||||
*
|
||||
* Allocate a new HBitmap.
|
||||
*/
|
||||
HBitmap *hbitmap_alloc(uint64_t size, int granularity);
|
||||
|
||||
/**
|
||||
* hbitmap_empty:
|
||||
* @hb: HBitmap to operate on.
|
||||
*
|
||||
* Return whether the bitmap is empty.
|
||||
*/
|
||||
bool hbitmap_empty(const HBitmap *hb);
|
||||
|
||||
/**
|
||||
* hbitmap_granularity:
|
||||
* @hb: HBitmap to operate on.
|
||||
*
|
||||
* Return the granularity of the HBitmap.
|
||||
*/
|
||||
int hbitmap_granularity(const HBitmap *hb);
|
||||
|
||||
/**
|
||||
* hbitmap_count:
|
||||
* @hb: HBitmap to operate on.
|
||||
*
|
||||
* Return the number of bits set in the HBitmap.
|
||||
*/
|
||||
uint64_t hbitmap_count(const HBitmap *hb);
|
||||
|
||||
/**
|
||||
* hbitmap_set:
|
||||
* @hb: HBitmap to operate on.
|
||||
* @start: First bit to set (0-based).
|
||||
* @count: Number of bits to set.
|
||||
*
|
||||
* Set a consecutive range of bits in an HBitmap.
|
||||
*/
|
||||
void hbitmap_set(HBitmap *hb, uint64_t start, uint64_t count);
|
||||
|
||||
/**
|
||||
* hbitmap_reset:
|
||||
* @hb: HBitmap to operate on.
|
||||
* @start: First bit to reset (0-based).
|
||||
* @count: Number of bits to reset.
|
||||
*
|
||||
* Reset a consecutive range of bits in an HBitmap.
|
||||
*/
|
||||
void hbitmap_reset(HBitmap *hb, uint64_t start, uint64_t count);
|
||||
|
||||
/**
|
||||
* hbitmap_get:
|
||||
* @hb: HBitmap to operate on.
|
||||
* @item: Bit to query (0-based).
|
||||
*
|
||||
* Return whether the @item-th bit in an HBitmap is set.
|
||||
*/
|
||||
bool hbitmap_get(const HBitmap *hb, uint64_t item);
|
||||
|
||||
/**
|
||||
* hbitmap_free:
|
||||
* @hb: HBitmap to operate on.
|
||||
*
|
||||
* Free an HBitmap and all of its associated memory.
|
||||
*/
|
||||
void hbitmap_free(HBitmap *hb);
|
||||
|
||||
/**
|
||||
* hbitmap_iter_init:
|
||||
* @hbi: HBitmapIter to initialize.
|
||||
* @hb: HBitmap to iterate on.
|
||||
* @first: First bit to visit (0-based, must be strictly less than the
|
||||
* size of the bitmap).
|
||||
*
|
||||
* Set up @hbi to iterate on the HBitmap @hb. hbitmap_iter_next will return
|
||||
* the lowest-numbered bit that is set in @hb, starting at @first.
|
||||
*
|
||||
* Concurrent setting of bits is acceptable, and will at worst cause the
|
||||
* iteration to miss some of those bits. Resetting bits before the current
|
||||
* position of the iterator is also okay. However, concurrent resetting of
|
||||
* bits can lead to unexpected behavior if the iterator has not yet reached
|
||||
* those bits.
|
||||
*/
|
||||
void hbitmap_iter_init(HBitmapIter *hbi, const HBitmap *hb, uint64_t first);
|
||||
|
||||
/* hbitmap_iter_skip_words:
|
||||
* @hbi: HBitmapIter to operate on.
|
||||
*
|
||||
* Internal function used by hbitmap_iter_next and hbitmap_iter_next_word.
|
||||
*/
|
||||
unsigned long hbitmap_iter_skip_words(HBitmapIter *hbi);
|
||||
|
||||
/**
|
||||
* hbitmap_iter_next:
|
||||
* @hbi: HBitmapIter to operate on.
|
||||
*
|
||||
* Return the next bit that is set in @hbi's associated HBitmap,
|
||||
* or -1 if all remaining bits are zero.
|
||||
*/
|
||||
static inline int64_t hbitmap_iter_next(HBitmapIter *hbi)
|
||||
{
|
||||
unsigned long cur = hbi->cur[HBITMAP_LEVELS - 1];
|
||||
int64_t item;
|
||||
|
||||
if (cur == 0) {
|
||||
cur = hbitmap_iter_skip_words(hbi);
|
||||
if (cur == 0) {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
/* The next call will resume work from the next bit. */
|
||||
hbi->cur[HBITMAP_LEVELS - 1] = cur & (cur - 1);
|
||||
item = ((uint64_t)hbi->pos << BITS_PER_LEVEL) + ffsl(cur) - 1;
|
||||
|
||||
return item << hbi->granularity;
|
||||
}
|
||||
|
||||
/**
|
||||
* hbitmap_iter_next_word:
|
||||
* @hbi: HBitmapIter to operate on.
|
||||
* @p_cur: Location where to store the next non-zero word.
|
||||
*
|
||||
* Return the index of the next nonzero word that is set in @hbi's
|
||||
* associated HBitmap, and set *p_cur to the content of that word
|
||||
* (bits before the index that was passed to hbitmap_iter_init are
|
||||
* trimmed on the first call). Return -1, and set *p_cur to zero,
|
||||
* if all remaining words are zero.
|
||||
*/
|
||||
static inline size_t hbitmap_iter_next_word(HBitmapIter *hbi, unsigned long *p_cur)
|
||||
{
|
||||
unsigned long cur = hbi->cur[HBITMAP_LEVELS - 1];
|
||||
|
||||
if (cur == 0) {
|
||||
cur = hbitmap_iter_skip_words(hbi);
|
||||
if (cur == 0) {
|
||||
*p_cur = 0;
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
/* The next call will resume work from the next word. */
|
||||
hbi->cur[HBITMAP_LEVELS - 1] = 0;
|
||||
*p_cur = cur;
|
||||
return hbi->pos;
|
||||
}
|
||||
|
||||
|
||||
#endif
|
@ -26,6 +26,7 @@
|
||||
#define HOST_UTILS_H 1
|
||||
|
||||
#include "qemu/compiler.h" /* QEMU_GNUC_PREREQ */
|
||||
#include <string.h> /* ffsl */
|
||||
|
||||
#if defined(__x86_64__)
|
||||
#define __HAVE_FAST_MULU64__
|
||||
@ -237,4 +238,29 @@ static inline int ctpop64(uint64_t val)
|
||||
#endif
|
||||
}
|
||||
|
||||
/* glibc does not provide an inline version of ffsl, so always define
|
||||
* ours. We need to give it a different name, however.
|
||||
*/
|
||||
#ifdef __GLIBC__
|
||||
#define ffsl qemu_ffsl
|
||||
#endif
|
||||
static inline int ffsl(long val)
|
||||
{
|
||||
if (!val) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
#if QEMU_GNUC_PREREQ(3, 4)
|
||||
return __builtin_ctzl(val) + 1;
|
||||
#else
|
||||
if (sizeof(long) == 4) {
|
||||
return ctz32(val) + 1;
|
||||
} else if (sizeof(long) == 8) {
|
||||
return ctz64(val) + 1;
|
||||
} else {
|
||||
abort();
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -741,10 +741,12 @@
|
||||
#
|
||||
# @count: number of dirty bytes according to the dirty bitmap
|
||||
#
|
||||
# @granularity: granularity of the dirty bitmap in bytes (since 1.4)
|
||||
#
|
||||
# Since: 1.3
|
||||
##
|
||||
{ 'type': 'BlockDirtyInfo',
|
||||
'data': {'count': 'int'} }
|
||||
'data': {'count': 'int', 'granularity': 'int'} }
|
||||
|
||||
##
|
||||
# @BlockInfo:
|
||||
@ -1690,6 +1692,14 @@
|
||||
# (all the disk, only the sectors allocated in the topmost image, or
|
||||
# only new I/O).
|
||||
#
|
||||
# @granularity: #optional granularity of the dirty bitmap, default is 64K
|
||||
# if the image format doesn't have clusters, 4K if the clusters
|
||||
# are smaller than that, else the cluster size. Must be a
|
||||
# power of 2 between 512 and 64M (since 1.4).
|
||||
#
|
||||
# @buf-size: #optional maximum amount of data in flight from source to
|
||||
# target (since 1.4).
|
||||
#
|
||||
# @on-source-error: #optional the action to take on an error on the source,
|
||||
# default 'report'. 'stop' and 'enospc' can only be used
|
||||
# if the block device supports io-status (see BlockInfo).
|
||||
@ -1706,7 +1716,8 @@
|
||||
{ 'command': 'drive-mirror',
|
||||
'data': { 'device': 'str', 'target': 'str', '*format': 'str',
|
||||
'sync': 'MirrorSyncMode', '*mode': 'NewImageMode',
|
||||
'*speed': 'int', '*on-source-error': 'BlockdevOnError',
|
||||
'*speed': 'int', '*granularity': 'uint32',
|
||||
'*buf-size': 'int', '*on-source-error': 'BlockdevOnError',
|
||||
'*on-target-error': 'BlockdevOnError' } }
|
||||
|
||||
##
|
||||
|
@ -1004,7 +1004,8 @@ EQMP
|
||||
{
|
||||
.name = "drive-mirror",
|
||||
.args_type = "sync:s,device:B,target:s,speed:i?,mode:s?,format:s?,"
|
||||
"on-source-error:s?,on-target-error:s?",
|
||||
"on-source-error:s?,on-target-error:s?,"
|
||||
"granularity:i?,buf-size:i?",
|
||||
.mhandler.cmd_new = qmp_marshal_input_drive_mirror,
|
||||
},
|
||||
|
||||
@ -1028,6 +1029,9 @@ Arguments:
|
||||
file/device (NewImageMode, optional, default 'absolute-paths')
|
||||
- "speed": maximum speed of the streaming job, in bytes per second
|
||||
(json-int)
|
||||
- "granularity": granularity of the dirty bitmap, in bytes (json-int, optional)
|
||||
- "buf_size": maximum amount of data in flight from source to target, in bytes
|
||||
(json-int, default 10M)
|
||||
- "sync": what parts of the disk image should be copied to the destination;
|
||||
possibilities include "full" for all the disk, "top" for only the sectors
|
||||
allocated in the topmost image, or "none" to only replicate new I/O
|
||||
@ -1037,6 +1041,10 @@ Arguments:
|
||||
- "on-target-error": the action to take on an error on the target
|
||||
(BlockdevOnError, default 'report')
|
||||
|
||||
The default value of the granularity is the image cluster size clamped
|
||||
between 4096 and 65536, if the image format defines one. If the format
|
||||
does not define a cluster size, the default value of the granularity
|
||||
is 65536.
|
||||
|
||||
|
||||
Example:
|
||||
|
@ -45,6 +45,8 @@ gcov-files-test-aio-$(CONFIG_WIN32) = aio-win32.c
|
||||
gcov-files-test-aio-$(CONFIG_POSIX) = aio-posix.c
|
||||
check-unit-y += tests/test-thread-pool$(EXESUF)
|
||||
gcov-files-test-thread-pool-y = thread-pool.c
|
||||
gcov-files-test-hbitmap-y = util/hbitmap.c
|
||||
check-unit-y += tests/test-hbitmap$(EXESUF)
|
||||
|
||||
check-block-$(CONFIG_POSIX) += tests/qemu-iotests-quick.sh
|
||||
|
||||
@ -88,6 +90,7 @@ tests/test-coroutine$(EXESUF): tests/test-coroutine.o $(block-obj-y) libqemuutil
|
||||
tests/test-aio$(EXESUF): tests/test-aio.o $(block-obj-y) libqemuutil.a libqemustub.a
|
||||
tests/test-thread-pool$(EXESUF): tests/test-thread-pool.o $(block-obj-y) libqemuutil.a libqemustub.a
|
||||
tests/test-iov$(EXESUF): tests/test-iov.o libqemuutil.a
|
||||
tests/test-hbitmap$(EXESUF): tests/test-hbitmap.o libqemuutil.a libqemustub.a
|
||||
|
||||
tests/test-qapi-types.c tests/test-qapi-types.h :\
|
||||
$(SRC_PATH)/qapi-schema-test.json $(SRC_PATH)/scripts/qapi-types.py
|
||||
|
@ -207,6 +207,37 @@ class TestSingleDrive(ImageMirroringTestCase):
|
||||
self.assertTrue(self.compare_images(test_img, target_img),
|
||||
'target image does not match source after mirroring')
|
||||
|
||||
def test_small_buffer(self):
|
||||
self.assert_no_active_mirrors()
|
||||
|
||||
# A small buffer is rounded up automatically
|
||||
result = self.vm.qmp('drive-mirror', device='drive0', sync='full',
|
||||
buf_size=4096, target=target_img)
|
||||
self.assert_qmp(result, 'return', {})
|
||||
|
||||
self.complete_and_wait()
|
||||
result = self.vm.qmp('query-block')
|
||||
self.assert_qmp(result, 'return[0]/inserted/file', target_img)
|
||||
self.vm.shutdown()
|
||||
self.assertTrue(self.compare_images(test_img, target_img),
|
||||
'target image does not match source after mirroring')
|
||||
|
||||
def test_small_buffer2(self):
|
||||
self.assert_no_active_mirrors()
|
||||
|
||||
qemu_img('create', '-f', iotests.imgfmt, '-o', 'cluster_size=%d,size=%d'
|
||||
% (TestSingleDrive.image_len, TestSingleDrive.image_len), target_img)
|
||||
result = self.vm.qmp('drive-mirror', device='drive0', sync='full',
|
||||
buf_size=65536, mode='existing', target=target_img)
|
||||
self.assert_qmp(result, 'return', {})
|
||||
|
||||
self.complete_and_wait()
|
||||
result = self.vm.qmp('query-block')
|
||||
self.assert_qmp(result, 'return[0]/inserted/file', target_img)
|
||||
self.vm.shutdown()
|
||||
self.assertTrue(self.compare_images(test_img, target_img),
|
||||
'target image does not match source after mirroring')
|
||||
|
||||
def test_large_cluster(self):
|
||||
self.assert_no_active_mirrors()
|
||||
|
||||
@ -292,6 +323,27 @@ class TestMirrorNoBacking(ImageMirroringTestCase):
|
||||
self.assertTrue(self.compare_images(test_img, target_img),
|
||||
'target image does not match source after mirroring')
|
||||
|
||||
def test_large_cluster(self):
|
||||
self.assert_no_active_mirrors()
|
||||
|
||||
# qemu-img create fails if the image is not there
|
||||
qemu_img('create', '-f', iotests.imgfmt, '-o', 'size=%d'
|
||||
%(TestMirrorNoBacking.image_len), target_backing_img)
|
||||
qemu_img('create', '-f', iotests.imgfmt, '-o', 'cluster_size=%d,backing_file=%s'
|
||||
% (TestMirrorNoBacking.image_len, target_backing_img), target_img)
|
||||
os.remove(target_backing_img)
|
||||
|
||||
result = self.vm.qmp('drive-mirror', device='drive0', sync='full',
|
||||
mode='existing', target=target_img)
|
||||
self.assert_qmp(result, 'return', {})
|
||||
|
||||
self.complete_and_wait()
|
||||
result = self.vm.qmp('query-block')
|
||||
self.assert_qmp(result, 'return[0]/inserted/file', target_img)
|
||||
self.vm.shutdown()
|
||||
self.assertTrue(self.compare_images(test_img, target_img),
|
||||
'target image does not match source after mirroring')
|
||||
|
||||
class TestReadErrors(ImageMirroringTestCase):
|
||||
image_len = 2 * 1024 * 1024 # MB
|
||||
|
||||
@ -330,6 +382,9 @@ new_state = "1"
|
||||
'-o', 'backing_file=blkdebug:%s:%s,backing_fmt=raw'
|
||||
% (self.blkdebug_file, backing_img),
|
||||
test_img)
|
||||
# Write something for tests that use sync='top'
|
||||
qemu_io('-c', 'write %d 512' % (self.MIRROR_GRANULARITY + 65536),
|
||||
test_img)
|
||||
self.vm = iotests.VM().add_drive(test_img)
|
||||
self.vm.launch()
|
||||
|
||||
@ -383,6 +438,32 @@ new_state = "1"
|
||||
self.complete_and_wait()
|
||||
self.vm.shutdown()
|
||||
|
||||
def test_large_cluster(self):
|
||||
self.assert_no_active_mirrors()
|
||||
|
||||
# Test COW into the target image. The first half of the
|
||||
# cluster at MIRROR_GRANULARITY has to be copied from
|
||||
# backing_img, even though sync='top'.
|
||||
qemu_img('create', '-f', iotests.imgfmt, '-ocluster_size=131072,backing_file=%s' %(backing_img), target_img)
|
||||
result = self.vm.qmp('drive-mirror', device='drive0', sync='top',
|
||||
on_source_error='ignore',
|
||||
mode='existing', target=target_img)
|
||||
self.assert_qmp(result, 'return', {})
|
||||
|
||||
event = self.vm.get_qmp_event(wait=True)
|
||||
self.assertEquals(event['event'], 'BLOCK_JOB_ERROR')
|
||||
self.assert_qmp(event, 'data/device', 'drive0')
|
||||
self.assert_qmp(event, 'data/operation', 'read')
|
||||
result = self.vm.qmp('query-block-jobs')
|
||||
self.assert_qmp(result, 'return[0]/paused', False)
|
||||
self.complete_and_wait()
|
||||
self.vm.shutdown()
|
||||
|
||||
# Detach blkdebug to compare images successfully
|
||||
qemu_img('rebase', '-f', iotests.imgfmt, '-u', '-b', backing_img, test_img)
|
||||
self.assertTrue(self.compare_images(test_img, target_img),
|
||||
'target image does not match source after mirroring')
|
||||
|
||||
def test_stop_read(self):
|
||||
self.assert_no_active_mirrors()
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
..................
|
||||
......................
|
||||
----------------------------------------------------------------------
|
||||
Ran 18 tests
|
||||
Ran 22 tests
|
||||
|
||||
OK
|
||||
|
401
tests/test-hbitmap.c
Normal file
401
tests/test-hbitmap.c
Normal file
@ -0,0 +1,401 @@
|
||||
/*
|
||||
* Hierarchical bitmap unit-tests.
|
||||
*
|
||||
* Copyright (C) 2012 Red Hat Inc.
|
||||
*
|
||||
* Author: Paolo Bonzini <pbonzini@redhat.com>
|
||||
*
|
||||
* This work is licensed under the terms of the GNU GPL, version 2 or later.
|
||||
* See the COPYING file in the top-level directory.
|
||||
*/
|
||||
|
||||
#include <glib.h>
|
||||
#include <stdarg.h>
|
||||
#include "qemu/hbitmap.h"
|
||||
|
||||
#define LOG_BITS_PER_LONG (BITS_PER_LONG == 32 ? 5 : 6)
|
||||
|
||||
#define L1 BITS_PER_LONG
|
||||
#define L2 (BITS_PER_LONG * L1)
|
||||
#define L3 (BITS_PER_LONG * L2)
|
||||
|
||||
typedef struct TestHBitmapData {
|
||||
HBitmap *hb;
|
||||
unsigned long *bits;
|
||||
size_t size;
|
||||
int granularity;
|
||||
} TestHBitmapData;
|
||||
|
||||
|
||||
/* Check that the HBitmap and the shadow bitmap contain the same data,
|
||||
* ignoring the same "first" bits.
|
||||
*/
|
||||
static void hbitmap_test_check(TestHBitmapData *data,
|
||||
uint64_t first)
|
||||
{
|
||||
uint64_t count = 0;
|
||||
size_t pos;
|
||||
int bit;
|
||||
HBitmapIter hbi;
|
||||
int64_t i, next;
|
||||
|
||||
hbitmap_iter_init(&hbi, data->hb, first);
|
||||
|
||||
i = first;
|
||||
for (;;) {
|
||||
next = hbitmap_iter_next(&hbi);
|
||||
if (next < 0) {
|
||||
next = data->size;
|
||||
}
|
||||
|
||||
while (i < next) {
|
||||
pos = i >> LOG_BITS_PER_LONG;
|
||||
bit = i & (BITS_PER_LONG - 1);
|
||||
i++;
|
||||
g_assert_cmpint(data->bits[pos] & (1UL << bit), ==, 0);
|
||||
}
|
||||
|
||||
if (next == data->size) {
|
||||
break;
|
||||
}
|
||||
|
||||
pos = i >> LOG_BITS_PER_LONG;
|
||||
bit = i & (BITS_PER_LONG - 1);
|
||||
i++;
|
||||
count++;
|
||||
g_assert_cmpint(data->bits[pos] & (1UL << bit), !=, 0);
|
||||
}
|
||||
|
||||
if (first == 0) {
|
||||
g_assert_cmpint(count << data->granularity, ==, hbitmap_count(data->hb));
|
||||
}
|
||||
}
|
||||
|
||||
/* This is provided instead of a test setup function so that the sizes
|
||||
are kept in the test functions (and not in main()) */
|
||||
static void hbitmap_test_init(TestHBitmapData *data,
|
||||
uint64_t size, int granularity)
|
||||
{
|
||||
size_t n;
|
||||
data->hb = hbitmap_alloc(size, granularity);
|
||||
|
||||
n = (size + BITS_PER_LONG - 1) / BITS_PER_LONG;
|
||||
if (n == 0) {
|
||||
n = 1;
|
||||
}
|
||||
data->bits = g_new0(unsigned long, n);
|
||||
data->size = size;
|
||||
data->granularity = granularity;
|
||||
if (size) {
|
||||
hbitmap_test_check(data, 0);
|
||||
}
|
||||
}
|
||||
|
||||
static void hbitmap_test_teardown(TestHBitmapData *data,
|
||||
const void *unused)
|
||||
{
|
||||
if (data->hb) {
|
||||
hbitmap_free(data->hb);
|
||||
data->hb = NULL;
|
||||
}
|
||||
if (data->bits) {
|
||||
g_free(data->bits);
|
||||
data->bits = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
/* Set a range in the HBitmap and in the shadow "simple" bitmap.
|
||||
* The two bitmaps are then tested against each other.
|
||||
*/
|
||||
static void hbitmap_test_set(TestHBitmapData *data,
|
||||
uint64_t first, uint64_t count)
|
||||
{
|
||||
hbitmap_set(data->hb, first, count);
|
||||
while (count-- != 0) {
|
||||
size_t pos = first >> LOG_BITS_PER_LONG;
|
||||
int bit = first & (BITS_PER_LONG - 1);
|
||||
first++;
|
||||
|
||||
data->bits[pos] |= 1UL << bit;
|
||||
}
|
||||
|
||||
if (data->granularity == 0) {
|
||||
hbitmap_test_check(data, 0);
|
||||
}
|
||||
}
|
||||
|
||||
/* Reset a range in the HBitmap and in the shadow "simple" bitmap.
|
||||
*/
|
||||
static void hbitmap_test_reset(TestHBitmapData *data,
|
||||
uint64_t first, uint64_t count)
|
||||
{
|
||||
hbitmap_reset(data->hb, first, count);
|
||||
while (count-- != 0) {
|
||||
size_t pos = first >> LOG_BITS_PER_LONG;
|
||||
int bit = first & (BITS_PER_LONG - 1);
|
||||
first++;
|
||||
|
||||
data->bits[pos] &= ~(1UL << bit);
|
||||
}
|
||||
|
||||
if (data->granularity == 0) {
|
||||
hbitmap_test_check(data, 0);
|
||||
}
|
||||
}
|
||||
|
||||
static void hbitmap_test_check_get(TestHBitmapData *data)
|
||||
{
|
||||
uint64_t count = 0;
|
||||
uint64_t i;
|
||||
|
||||
for (i = 0; i < data->size; i++) {
|
||||
size_t pos = i >> LOG_BITS_PER_LONG;
|
||||
int bit = i & (BITS_PER_LONG - 1);
|
||||
unsigned long val = data->bits[pos] & (1UL << bit);
|
||||
count += hbitmap_get(data->hb, i);
|
||||
g_assert_cmpint(hbitmap_get(data->hb, i), ==, val != 0);
|
||||
}
|
||||
g_assert_cmpint(count, ==, hbitmap_count(data->hb));
|
||||
}
|
||||
|
||||
static void test_hbitmap_zero(TestHBitmapData *data,
|
||||
const void *unused)
|
||||
{
|
||||
hbitmap_test_init(data, 0, 0);
|
||||
}
|
||||
|
||||
static void test_hbitmap_unaligned(TestHBitmapData *data,
|
||||
const void *unused)
|
||||
{
|
||||
hbitmap_test_init(data, L3 + 23, 0);
|
||||
hbitmap_test_set(data, 0, 1);
|
||||
hbitmap_test_set(data, L3 + 22, 1);
|
||||
}
|
||||
|
||||
static void test_hbitmap_iter_empty(TestHBitmapData *data,
|
||||
const void *unused)
|
||||
{
|
||||
hbitmap_test_init(data, L1, 0);
|
||||
}
|
||||
|
||||
static void test_hbitmap_iter_partial(TestHBitmapData *data,
|
||||
const void *unused)
|
||||
{
|
||||
hbitmap_test_init(data, L3, 0);
|
||||
hbitmap_test_set(data, 0, L3);
|
||||
hbitmap_test_check(data, 1);
|
||||
hbitmap_test_check(data, L1 - 1);
|
||||
hbitmap_test_check(data, L1);
|
||||
hbitmap_test_check(data, L1 * 2 - 1);
|
||||
hbitmap_test_check(data, L2 - 1);
|
||||
hbitmap_test_check(data, L2);
|
||||
hbitmap_test_check(data, L2 + 1);
|
||||
hbitmap_test_check(data, L2 + L1);
|
||||
hbitmap_test_check(data, L2 + L1 * 2 - 1);
|
||||
hbitmap_test_check(data, L2 * 2 - 1);
|
||||
hbitmap_test_check(data, L2 * 2);
|
||||
hbitmap_test_check(data, L2 * 2 + 1);
|
||||
hbitmap_test_check(data, L2 * 2 + L1);
|
||||
hbitmap_test_check(data, L2 * 2 + L1 * 2 - 1);
|
||||
hbitmap_test_check(data, L3 / 2);
|
||||
}
|
||||
|
||||
static void test_hbitmap_set_all(TestHBitmapData *data,
|
||||
const void *unused)
|
||||
{
|
||||
hbitmap_test_init(data, L3, 0);
|
||||
hbitmap_test_set(data, 0, L3);
|
||||
}
|
||||
|
||||
static void test_hbitmap_get_all(TestHBitmapData *data,
|
||||
const void *unused)
|
||||
{
|
||||
hbitmap_test_init(data, L3, 0);
|
||||
hbitmap_test_set(data, 0, L3);
|
||||
hbitmap_test_check_get(data);
|
||||
}
|
||||
|
||||
static void test_hbitmap_get_some(TestHBitmapData *data,
|
||||
const void *unused)
|
||||
{
|
||||
hbitmap_test_init(data, 2 * L2, 0);
|
||||
hbitmap_test_set(data, 10, 1);
|
||||
hbitmap_test_check_get(data);
|
||||
hbitmap_test_set(data, L1 - 1, 1);
|
||||
hbitmap_test_check_get(data);
|
||||
hbitmap_test_set(data, L1, 1);
|
||||
hbitmap_test_check_get(data);
|
||||
hbitmap_test_set(data, L2 - 1, 1);
|
||||
hbitmap_test_check_get(data);
|
||||
hbitmap_test_set(data, L2, 1);
|
||||
hbitmap_test_check_get(data);
|
||||
}
|
||||
|
||||
static void test_hbitmap_set_one(TestHBitmapData *data,
|
||||
const void *unused)
|
||||
{
|
||||
hbitmap_test_init(data, 2 * L2, 0);
|
||||
hbitmap_test_set(data, 10, 1);
|
||||
hbitmap_test_set(data, L1 - 1, 1);
|
||||
hbitmap_test_set(data, L1, 1);
|
||||
hbitmap_test_set(data, L2 - 1, 1);
|
||||
hbitmap_test_set(data, L2, 1);
|
||||
}
|
||||
|
||||
static void test_hbitmap_set_two_elem(TestHBitmapData *data,
|
||||
const void *unused)
|
||||
{
|
||||
hbitmap_test_init(data, 2 * L2, 0);
|
||||
hbitmap_test_set(data, L1 - 1, 2);
|
||||
hbitmap_test_set(data, L1 * 2 - 1, 4);
|
||||
hbitmap_test_set(data, L1 * 4, L1 + 1);
|
||||
hbitmap_test_set(data, L1 * 8 - 1, L1 + 1);
|
||||
hbitmap_test_set(data, L2 - 1, 2);
|
||||
hbitmap_test_set(data, L2 + L1 - 1, 8);
|
||||
hbitmap_test_set(data, L2 + L1 * 4, L1 + 1);
|
||||
hbitmap_test_set(data, L2 + L1 * 8 - 1, L1 + 1);
|
||||
}
|
||||
|
||||
static void test_hbitmap_set(TestHBitmapData *data,
|
||||
const void *unused)
|
||||
{
|
||||
hbitmap_test_init(data, L3 * 2, 0);
|
||||
hbitmap_test_set(data, L1 - 1, L1 + 2);
|
||||
hbitmap_test_set(data, L1 * 3 - 1, L1 + 2);
|
||||
hbitmap_test_set(data, L1 * 5, L1 * 2 + 1);
|
||||
hbitmap_test_set(data, L1 * 8 - 1, L1 * 2 + 1);
|
||||
hbitmap_test_set(data, L2 - 1, L1 + 2);
|
||||
hbitmap_test_set(data, L2 + L1 * 2 - 1, L1 + 2);
|
||||
hbitmap_test_set(data, L2 + L1 * 4, L1 * 2 + 1);
|
||||
hbitmap_test_set(data, L2 + L1 * 7 - 1, L1 * 2 + 1);
|
||||
hbitmap_test_set(data, L2 * 2 - 1, L3 * 2 - L2 * 2);
|
||||
}
|
||||
|
||||
static void test_hbitmap_set_twice(TestHBitmapData *data,
|
||||
const void *unused)
|
||||
{
|
||||
hbitmap_test_init(data, L1 * 3, 0);
|
||||
hbitmap_test_set(data, 0, L1 * 3);
|
||||
hbitmap_test_set(data, L1, 1);
|
||||
}
|
||||
|
||||
static void test_hbitmap_set_overlap(TestHBitmapData *data,
|
||||
const void *unused)
|
||||
{
|
||||
hbitmap_test_init(data, L3 * 2, 0);
|
||||
hbitmap_test_set(data, L1 - 1, L1 + 2);
|
||||
hbitmap_test_set(data, L1 * 2 - 1, L1 * 2 + 2);
|
||||
hbitmap_test_set(data, 0, L1 * 3);
|
||||
hbitmap_test_set(data, L1 * 8 - 1, L2);
|
||||
hbitmap_test_set(data, L2, L1);
|
||||
hbitmap_test_set(data, L2 - L1 - 1, L1 * 8 + 2);
|
||||
hbitmap_test_set(data, L2, L3 - L2 + 1);
|
||||
hbitmap_test_set(data, L3 - L1, L1 * 3);
|
||||
hbitmap_test_set(data, L3 - 1, 3);
|
||||
hbitmap_test_set(data, L3 - 1, L2);
|
||||
}
|
||||
|
||||
static void test_hbitmap_reset_empty(TestHBitmapData *data,
|
||||
const void *unused)
|
||||
{
|
||||
hbitmap_test_init(data, L3, 0);
|
||||
hbitmap_test_reset(data, 0, L3);
|
||||
}
|
||||
|
||||
static void test_hbitmap_reset(TestHBitmapData *data,
|
||||
const void *unused)
|
||||
{
|
||||
hbitmap_test_init(data, L3 * 2, 0);
|
||||
hbitmap_test_set(data, L1 - 1, L1 + 2);
|
||||
hbitmap_test_reset(data, L1 * 2 - 1, L1 * 2 + 2);
|
||||
hbitmap_test_set(data, 0, L1 * 3);
|
||||
hbitmap_test_reset(data, L1 * 8 - 1, L2);
|
||||
hbitmap_test_set(data, L2, L1);
|
||||
hbitmap_test_reset(data, L2 - L1 - 1, L1 * 8 + 2);
|
||||
hbitmap_test_set(data, L2, L3 - L2 + 1);
|
||||
hbitmap_test_reset(data, L3 - L1, L1 * 3);
|
||||
hbitmap_test_set(data, L3 - 1, 3);
|
||||
hbitmap_test_reset(data, L3 - 1, L2);
|
||||
hbitmap_test_set(data, 0, L3 * 2);
|
||||
hbitmap_test_reset(data, 0, L1);
|
||||
hbitmap_test_reset(data, 0, L2);
|
||||
hbitmap_test_reset(data, L3, L3);
|
||||
hbitmap_test_set(data, L3 / 2, L3);
|
||||
}
|
||||
|
||||
static void test_hbitmap_granularity(TestHBitmapData *data,
|
||||
const void *unused)
|
||||
{
|
||||
/* Note that hbitmap_test_check has to be invoked manually in this test. */
|
||||
hbitmap_test_init(data, L1, 1);
|
||||
hbitmap_test_set(data, 0, 1);
|
||||
g_assert_cmpint(hbitmap_count(data->hb), ==, 2);
|
||||
hbitmap_test_check(data, 0);
|
||||
hbitmap_test_set(data, 2, 1);
|
||||
g_assert_cmpint(hbitmap_count(data->hb), ==, 4);
|
||||
hbitmap_test_check(data, 0);
|
||||
hbitmap_test_set(data, 0, 3);
|
||||
g_assert_cmpint(hbitmap_count(data->hb), ==, 4);
|
||||
hbitmap_test_reset(data, 0, 1);
|
||||
g_assert_cmpint(hbitmap_count(data->hb), ==, 2);
|
||||
}
|
||||
|
||||
static void test_hbitmap_iter_granularity(TestHBitmapData *data,
|
||||
const void *unused)
|
||||
{
|
||||
HBitmapIter hbi;
|
||||
|
||||
/* Note that hbitmap_test_check has to be invoked manually in this test. */
|
||||
hbitmap_test_init(data, 131072 << 7, 7);
|
||||
hbitmap_iter_init(&hbi, data->hb, 0);
|
||||
g_assert_cmpint(hbitmap_iter_next(&hbi), <, 0);
|
||||
|
||||
hbitmap_test_set(data, ((L2 + L1 + 1) << 7) + 8, 8);
|
||||
hbitmap_iter_init(&hbi, data->hb, 0);
|
||||
g_assert_cmpint(hbitmap_iter_next(&hbi), ==, (L2 + L1 + 1) << 7);
|
||||
g_assert_cmpint(hbitmap_iter_next(&hbi), <, 0);
|
||||
|
||||
hbitmap_iter_init(&hbi, data->hb, (L2 + L1 + 2) << 7);
|
||||
g_assert_cmpint(hbitmap_iter_next(&hbi), <, 0);
|
||||
|
||||
hbitmap_test_set(data, (131072 << 7) - 8, 8);
|
||||
hbitmap_iter_init(&hbi, data->hb, 0);
|
||||
g_assert_cmpint(hbitmap_iter_next(&hbi), ==, (L2 + L1 + 1) << 7);
|
||||
g_assert_cmpint(hbitmap_iter_next(&hbi), ==, 131071 << 7);
|
||||
g_assert_cmpint(hbitmap_iter_next(&hbi), <, 0);
|
||||
|
||||
hbitmap_iter_init(&hbi, data->hb, (L2 + L1 + 2) << 7);
|
||||
g_assert_cmpint(hbitmap_iter_next(&hbi), ==, 131071 << 7);
|
||||
g_assert_cmpint(hbitmap_iter_next(&hbi), <, 0);
|
||||
}
|
||||
|
||||
static void hbitmap_test_add(const char *testpath,
|
||||
void (*test_func)(TestHBitmapData *data, const void *user_data))
|
||||
{
|
||||
g_test_add(testpath, TestHBitmapData, NULL, NULL, test_func,
|
||||
hbitmap_test_teardown);
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
g_test_init(&argc, &argv, NULL);
|
||||
hbitmap_test_add("/hbitmap/size/0", test_hbitmap_zero);
|
||||
hbitmap_test_add("/hbitmap/size/unaligned", test_hbitmap_unaligned);
|
||||
hbitmap_test_add("/hbitmap/iter/empty", test_hbitmap_iter_empty);
|
||||
hbitmap_test_add("/hbitmap/iter/partial", test_hbitmap_iter_partial);
|
||||
hbitmap_test_add("/hbitmap/iter/granularity", test_hbitmap_iter_granularity);
|
||||
hbitmap_test_add("/hbitmap/get/all", test_hbitmap_get_all);
|
||||
hbitmap_test_add("/hbitmap/get/some", test_hbitmap_get_some);
|
||||
hbitmap_test_add("/hbitmap/set/all", test_hbitmap_set_all);
|
||||
hbitmap_test_add("/hbitmap/set/one", test_hbitmap_set_one);
|
||||
hbitmap_test_add("/hbitmap/set/two-elem", test_hbitmap_set_two_elem);
|
||||
hbitmap_test_add("/hbitmap/set/general", test_hbitmap_set);
|
||||
hbitmap_test_add("/hbitmap/set/twice", test_hbitmap_set_twice);
|
||||
hbitmap_test_add("/hbitmap/set/overlap", test_hbitmap_set_overlap);
|
||||
hbitmap_test_add("/hbitmap/reset/empty", test_hbitmap_reset_empty);
|
||||
hbitmap_test_add("/hbitmap/reset/general", test_hbitmap_reset);
|
||||
hbitmap_test_add("/hbitmap/granularity", test_hbitmap_granularity);
|
||||
g_test_run();
|
||||
|
||||
return 0;
|
||||
}
|
12
trace-events
12
trace-events
@ -79,10 +79,17 @@ commit_start(void *bs, void *base, void *top, void *s, void *co, void *opaque) "
|
||||
|
||||
# block/mirror.c
|
||||
mirror_start(void *bs, void *s, void *co, void *opaque) "bs %p s %p co %p opaque %p"
|
||||
mirror_restart_iter(void *s, int64_t cnt) "s %p dirty count %"PRId64
|
||||
mirror_before_flush(void *s) "s %p"
|
||||
mirror_before_drain(void *s, int64_t cnt) "s %p dirty count %"PRId64
|
||||
mirror_before_sleep(void *s, int64_t cnt, int synced) "s %p dirty count %"PRId64" synced %d"
|
||||
mirror_one_iteration(void *s, int64_t sector_num, int nb_sectors) "s %p sector_num %"PRId64" nb_sectors %d"
|
||||
mirror_cow(void *s, int64_t sector_num) "s %p sector_num %"PRId64
|
||||
mirror_iteration_done(void *s, int64_t sector_num, int nb_sectors, int ret) "s %p sector_num %"PRId64" nb_sectors %d ret %d"
|
||||
mirror_yield(void *s, int64_t cnt, int buf_free_count, int in_flight) "s %p dirty count %"PRId64" free buffers %d in_flight %d"
|
||||
mirror_yield_in_flight(void *s, int64_t sector_num, int in_flight) "s %p sector_num %"PRId64" in_flight %d"
|
||||
mirror_yield_buf_busy(void *s, int nb_chunks, int in_flight) "s %p requested chunks %d in_flight %d"
|
||||
mirror_break_buf_busy(void *s, int nb_chunks, int in_flight) "s %p requested chunks %d in_flight %d"
|
||||
|
||||
# blockdev.c
|
||||
qmp_block_job_cancel(void *job) "job %p"
|
||||
@ -1060,3 +1067,8 @@ xics_set_irq_lsi(int srcno, int nr) "set_irq_lsi: srcno %d [irq %#x]"
|
||||
xics_ics_write_xive(int nr, int srcno, int server, uint8_t priority) "ics_write_xive: irq %#x [src %d] server %#x prio %#x"
|
||||
xics_ics_reject(int nr, int srcno) "reject irq %#x [src %d]"
|
||||
xics_ics_eoi(int nr) "ics_eoi: irq %#x"
|
||||
|
||||
# hbitmap.c
|
||||
hbitmap_iter_skip_words(const void *hb, void *hbi, uint64_t pos, unsigned long cur) "hb %p hbi %p pos %"PRId64" cur 0x%lx"
|
||||
hbitmap_reset(void *hb, uint64_t start, uint64_t count, uint64_t sbit, uint64_t ebit) "hb %p items %"PRIu64",%"PRIu64" bits %"PRIu64"..%"PRIu64
|
||||
hbitmap_set(void *hb, uint64_t start, uint64_t count, uint64_t sbit, uint64_t ebit) "hb %p items %"PRIu64",%"PRIu64" bits %"PRIu64"..%"PRIu64
|
||||
|
@ -2,7 +2,7 @@ util-obj-y = osdep.o cutils.o qemu-timer-common.o
|
||||
util-obj-$(CONFIG_WIN32) += oslib-win32.o qemu-thread-win32.o event_notifier-win32.o
|
||||
util-obj-$(CONFIG_POSIX) += oslib-posix.o qemu-thread-posix.o event_notifier-posix.o
|
||||
util-obj-y += envlist.o path.o host-utils.o cache-utils.o module.o
|
||||
util-obj-y += bitmap.o bitops.o
|
||||
util-obj-y += bitmap.o bitops.o hbitmap.o
|
||||
util-obj-y += acl.o
|
||||
util-obj-y += error.o qemu-error.o
|
||||
util-obj-$(CONFIG_POSIX) += compatfd.o
|
||||
|
401
util/hbitmap.c
Normal file
401
util/hbitmap.c
Normal file
@ -0,0 +1,401 @@
|
||||
/*
|
||||
* Hierarchical Bitmap Data Type
|
||||
*
|
||||
* Copyright Red Hat, Inc., 2012
|
||||
*
|
||||
* Author: Paolo Bonzini <pbonzini@redhat.com>
|
||||
*
|
||||
* This work is licensed under the terms of the GNU GPL, version 2 or
|
||||
* later. See the COPYING file in the top-level directory.
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
#include <glib.h>
|
||||
#include <assert.h>
|
||||
#include "qemu/osdep.h"
|
||||
#include "qemu/hbitmap.h"
|
||||
#include "qemu/host-utils.h"
|
||||
#include "trace.h"
|
||||
|
||||
/* HBitmaps provides an array of bits. The bits are stored as usual in an
|
||||
* array of unsigned longs, but HBitmap is also optimized to provide fast
|
||||
* iteration over set bits; going from one bit to the next is O(logB n)
|
||||
* worst case, with B = sizeof(long) * CHAR_BIT: the result is low enough
|
||||
* that the number of levels is in fact fixed.
|
||||
*
|
||||
* In order to do this, it stacks multiple bitmaps with progressively coarser
|
||||
* granularity; in all levels except the last, bit N is set iff the N-th
|
||||
* unsigned long is nonzero in the immediately next level. When iteration
|
||||
* completes on the last level it can examine the 2nd-last level to quickly
|
||||
* skip entire words, and even do so recursively to skip blocks of 64 words or
|
||||
* powers thereof (32 on 32-bit machines).
|
||||
*
|
||||
* Given an index in the bitmap, it can be split in group of bits like
|
||||
* this (for the 64-bit case):
|
||||
*
|
||||
* bits 0-57 => word in the last bitmap | bits 58-63 => bit in the word
|
||||
* bits 0-51 => word in the 2nd-last bitmap | bits 52-57 => bit in the word
|
||||
* bits 0-45 => word in the 3rd-last bitmap | bits 46-51 => bit in the word
|
||||
*
|
||||
* So it is easy to move up simply by shifting the index right by
|
||||
* log2(BITS_PER_LONG) bits. To move down, you shift the index left
|
||||
* similarly, and add the word index within the group. Iteration uses
|
||||
* ffs (find first set bit) to find the next word to examine; this
|
||||
* operation can be done in constant time in most current architectures.
|
||||
*
|
||||
* Setting or clearing a range of m bits on all levels, the work to perform
|
||||
* is O(m + m/W + m/W^2 + ...), which is O(m) like on a regular bitmap.
|
||||
*
|
||||
* When iterating on a bitmap, each bit (on any level) is only visited
|
||||
* once. Hence, The total cost of visiting a bitmap with m bits in it is
|
||||
* the number of bits that are set in all bitmaps. Unless the bitmap is
|
||||
* extremely sparse, this is also O(m + m/W + m/W^2 + ...), so the amortized
|
||||
* cost of advancing from one bit to the next is usually constant (worst case
|
||||
* O(logB n) as in the non-amortized complexity).
|
||||
*/
|
||||
|
||||
struct HBitmap {
|
||||
/* Number of total bits in the bottom level. */
|
||||
uint64_t size;
|
||||
|
||||
/* Number of set bits in the bottom level. */
|
||||
uint64_t count;
|
||||
|
||||
/* A scaling factor. Given a granularity of G, each bit in the bitmap will
|
||||
* will actually represent a group of 2^G elements. Each operation on a
|
||||
* range of bits first rounds the bits to determine which group they land
|
||||
* in, and then affect the entire page; iteration will only visit the first
|
||||
* bit of each group. Here is an example of operations in a size-16,
|
||||
* granularity-1 HBitmap:
|
||||
*
|
||||
* initial state 00000000
|
||||
* set(start=0, count=9) 11111000 (iter: 0, 2, 4, 6, 8)
|
||||
* reset(start=1, count=3) 00111000 (iter: 4, 6, 8)
|
||||
* set(start=9, count=2) 00111100 (iter: 4, 6, 8, 10)
|
||||
* reset(start=5, count=5) 00000000
|
||||
*
|
||||
* From an implementation point of view, when setting or resetting bits,
|
||||
* the bitmap will scale bit numbers right by this amount of bits. When
|
||||
* iterating, the bitmap will scale bit numbers left by this amount of
|
||||
* bits.
|
||||
*/
|
||||
int granularity;
|
||||
|
||||
/* A number of progressively less coarse bitmaps (i.e. level 0 is the
|
||||
* coarsest). Each bit in level N represents a word in level N+1 that
|
||||
* has a set bit, except the last level where each bit represents the
|
||||
* actual bitmap.
|
||||
*
|
||||
* Note that all bitmaps have the same number of levels. Even a 1-bit
|
||||
* bitmap will still allocate HBITMAP_LEVELS arrays.
|
||||
*/
|
||||
unsigned long *levels[HBITMAP_LEVELS];
|
||||
};
|
||||
|
||||
static inline int popcountl(unsigned long l)
|
||||
{
|
||||
return BITS_PER_LONG == 32 ? ctpop32(l) : ctpop64(l);
|
||||
}
|
||||
|
||||
/* Advance hbi to the next nonzero word and return it. hbi->pos
|
||||
* is updated. Returns zero if we reach the end of the bitmap.
|
||||
*/
|
||||
unsigned long hbitmap_iter_skip_words(HBitmapIter *hbi)
|
||||
{
|
||||
size_t pos = hbi->pos;
|
||||
const HBitmap *hb = hbi->hb;
|
||||
unsigned i = HBITMAP_LEVELS - 1;
|
||||
|
||||
unsigned long cur;
|
||||
do {
|
||||
cur = hbi->cur[--i];
|
||||
pos >>= BITS_PER_LEVEL;
|
||||
} while (cur == 0);
|
||||
|
||||
/* Check for end of iteration. We always use fewer than BITS_PER_LONG
|
||||
* bits in the level 0 bitmap; thus we can repurpose the most significant
|
||||
* bit as a sentinel. The sentinel is set in hbitmap_alloc and ensures
|
||||
* that the above loop ends even without an explicit check on i.
|
||||
*/
|
||||
|
||||
if (i == 0 && cur == (1UL << (BITS_PER_LONG - 1))) {
|
||||
return 0;
|
||||
}
|
||||
for (; i < HBITMAP_LEVELS - 1; i++) {
|
||||
/* Shift back pos to the left, matching the right shifts above.
|
||||
* The index of this word's least significant set bit provides
|
||||
* the low-order bits.
|
||||
*/
|
||||
pos = (pos << BITS_PER_LEVEL) + ffsl(cur) - 1;
|
||||
hbi->cur[i] = cur & (cur - 1);
|
||||
|
||||
/* Set up next level for iteration. */
|
||||
cur = hb->levels[i + 1][pos];
|
||||
}
|
||||
|
||||
hbi->pos = pos;
|
||||
trace_hbitmap_iter_skip_words(hbi->hb, hbi, pos, cur);
|
||||
|
||||
assert(cur);
|
||||
return cur;
|
||||
}
|
||||
|
||||
void hbitmap_iter_init(HBitmapIter *hbi, const HBitmap *hb, uint64_t first)
|
||||
{
|
||||
unsigned i, bit;
|
||||
uint64_t pos;
|
||||
|
||||
hbi->hb = hb;
|
||||
pos = first >> hb->granularity;
|
||||
assert(pos < hb->size);
|
||||
hbi->pos = pos >> BITS_PER_LEVEL;
|
||||
hbi->granularity = hb->granularity;
|
||||
|
||||
for (i = HBITMAP_LEVELS; i-- > 0; ) {
|
||||
bit = pos & (BITS_PER_LONG - 1);
|
||||
pos >>= BITS_PER_LEVEL;
|
||||
|
||||
/* Drop bits representing items before first. */
|
||||
hbi->cur[i] = hb->levels[i][pos] & ~((1UL << bit) - 1);
|
||||
|
||||
/* We have already added level i+1, so the lowest set bit has
|
||||
* been processed. Clear it.
|
||||
*/
|
||||
if (i != HBITMAP_LEVELS - 1) {
|
||||
hbi->cur[i] &= ~(1UL << bit);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool hbitmap_empty(const HBitmap *hb)
|
||||
{
|
||||
return hb->count == 0;
|
||||
}
|
||||
|
||||
int hbitmap_granularity(const HBitmap *hb)
|
||||
{
|
||||
return hb->granularity;
|
||||
}
|
||||
|
||||
uint64_t hbitmap_count(const HBitmap *hb)
|
||||
{
|
||||
return hb->count << hb->granularity;
|
||||
}
|
||||
|
||||
/* Count the number of set bits between start and end, not accounting for
|
||||
* the granularity. Also an example of how to use hbitmap_iter_next_word.
|
||||
*/
|
||||
static uint64_t hb_count_between(HBitmap *hb, uint64_t start, uint64_t last)
|
||||
{
|
||||
HBitmapIter hbi;
|
||||
uint64_t count = 0;
|
||||
uint64_t end = last + 1;
|
||||
unsigned long cur;
|
||||
size_t pos;
|
||||
|
||||
hbitmap_iter_init(&hbi, hb, start << hb->granularity);
|
||||
for (;;) {
|
||||
pos = hbitmap_iter_next_word(&hbi, &cur);
|
||||
if (pos >= (end >> BITS_PER_LEVEL)) {
|
||||
break;
|
||||
}
|
||||
count += popcountl(cur);
|
||||
}
|
||||
|
||||
if (pos == (end >> BITS_PER_LEVEL)) {
|
||||
/* Drop bits representing the END-th and subsequent items. */
|
||||
int bit = end & (BITS_PER_LONG - 1);
|
||||
cur &= (1UL << bit) - 1;
|
||||
count += popcountl(cur);
|
||||
}
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
/* Setting starts at the last layer and propagates up if an element
|
||||
* changes from zero to non-zero.
|
||||
*/
|
||||
static inline bool hb_set_elem(unsigned long *elem, uint64_t start, uint64_t last)
|
||||
{
|
||||
unsigned long mask;
|
||||
bool changed;
|
||||
|
||||
assert((last >> BITS_PER_LEVEL) == (start >> BITS_PER_LEVEL));
|
||||
assert(start <= last);
|
||||
|
||||
mask = 2UL << (last & (BITS_PER_LONG - 1));
|
||||
mask -= 1UL << (start & (BITS_PER_LONG - 1));
|
||||
changed = (*elem == 0);
|
||||
*elem |= mask;
|
||||
return changed;
|
||||
}
|
||||
|
||||
/* The recursive workhorse (the depth is limited to HBITMAP_LEVELS)... */
|
||||
static void hb_set_between(HBitmap *hb, int level, uint64_t start, uint64_t last)
|
||||
{
|
||||
size_t pos = start >> BITS_PER_LEVEL;
|
||||
size_t lastpos = last >> BITS_PER_LEVEL;
|
||||
bool changed = false;
|
||||
size_t i;
|
||||
|
||||
i = pos;
|
||||
if (i < lastpos) {
|
||||
uint64_t next = (start | (BITS_PER_LONG - 1)) + 1;
|
||||
changed |= hb_set_elem(&hb->levels[level][i], start, next - 1);
|
||||
for (;;) {
|
||||
start = next;
|
||||
next += BITS_PER_LONG;
|
||||
if (++i == lastpos) {
|
||||
break;
|
||||
}
|
||||
changed |= (hb->levels[level][i] == 0);
|
||||
hb->levels[level][i] = ~0UL;
|
||||
}
|
||||
}
|
||||
changed |= hb_set_elem(&hb->levels[level][i], start, last);
|
||||
|
||||
/* If there was any change in this layer, we may have to update
|
||||
* the one above.
|
||||
*/
|
||||
if (level > 0 && changed) {
|
||||
hb_set_between(hb, level - 1, pos, lastpos);
|
||||
}
|
||||
}
|
||||
|
||||
void hbitmap_set(HBitmap *hb, uint64_t start, uint64_t count)
|
||||
{
|
||||
/* Compute range in the last layer. */
|
||||
uint64_t last = start + count - 1;
|
||||
|
||||
trace_hbitmap_set(hb, start, count,
|
||||
start >> hb->granularity, last >> hb->granularity);
|
||||
|
||||
start >>= hb->granularity;
|
||||
last >>= hb->granularity;
|
||||
count = last - start + 1;
|
||||
|
||||
hb->count += count - hb_count_between(hb, start, last);
|
||||
hb_set_between(hb, HBITMAP_LEVELS - 1, start, last);
|
||||
}
|
||||
|
||||
/* Resetting works the other way round: propagate up if the new
|
||||
* value is zero.
|
||||
*/
|
||||
static inline bool hb_reset_elem(unsigned long *elem, uint64_t start, uint64_t last)
|
||||
{
|
||||
unsigned long mask;
|
||||
bool blanked;
|
||||
|
||||
assert((last >> BITS_PER_LEVEL) == (start >> BITS_PER_LEVEL));
|
||||
assert(start <= last);
|
||||
|
||||
mask = 2UL << (last & (BITS_PER_LONG - 1));
|
||||
mask -= 1UL << (start & (BITS_PER_LONG - 1));
|
||||
blanked = *elem != 0 && ((*elem & ~mask) == 0);
|
||||
*elem &= ~mask;
|
||||
return blanked;
|
||||
}
|
||||
|
||||
/* The recursive workhorse (the depth is limited to HBITMAP_LEVELS)... */
|
||||
static void hb_reset_between(HBitmap *hb, int level, uint64_t start, uint64_t last)
|
||||
{
|
||||
size_t pos = start >> BITS_PER_LEVEL;
|
||||
size_t lastpos = last >> BITS_PER_LEVEL;
|
||||
bool changed = false;
|
||||
size_t i;
|
||||
|
||||
i = pos;
|
||||
if (i < lastpos) {
|
||||
uint64_t next = (start | (BITS_PER_LONG - 1)) + 1;
|
||||
|
||||
/* Here we need a more complex test than when setting bits. Even if
|
||||
* something was changed, we must not blank bits in the upper level
|
||||
* unless the lower-level word became entirely zero. So, remove pos
|
||||
* from the upper-level range if bits remain set.
|
||||
*/
|
||||
if (hb_reset_elem(&hb->levels[level][i], start, next - 1)) {
|
||||
changed = true;
|
||||
} else {
|
||||
pos++;
|
||||
}
|
||||
|
||||
for (;;) {
|
||||
start = next;
|
||||
next += BITS_PER_LONG;
|
||||
if (++i == lastpos) {
|
||||
break;
|
||||
}
|
||||
changed |= (hb->levels[level][i] != 0);
|
||||
hb->levels[level][i] = 0UL;
|
||||
}
|
||||
}
|
||||
|
||||
/* Same as above, this time for lastpos. */
|
||||
if (hb_reset_elem(&hb->levels[level][i], start, last)) {
|
||||
changed = true;
|
||||
} else {
|
||||
lastpos--;
|
||||
}
|
||||
|
||||
if (level > 0 && changed) {
|
||||
hb_reset_between(hb, level - 1, pos, lastpos);
|
||||
}
|
||||
}
|
||||
|
||||
void hbitmap_reset(HBitmap *hb, uint64_t start, uint64_t count)
|
||||
{
|
||||
/* Compute range in the last layer. */
|
||||
uint64_t last = start + count - 1;
|
||||
|
||||
trace_hbitmap_reset(hb, start, count,
|
||||
start >> hb->granularity, last >> hb->granularity);
|
||||
|
||||
start >>= hb->granularity;
|
||||
last >>= hb->granularity;
|
||||
|
||||
hb->count -= hb_count_between(hb, start, last);
|
||||
hb_reset_between(hb, HBITMAP_LEVELS - 1, start, last);
|
||||
}
|
||||
|
||||
bool hbitmap_get(const HBitmap *hb, uint64_t item)
|
||||
{
|
||||
/* Compute position and bit in the last layer. */
|
||||
uint64_t pos = item >> hb->granularity;
|
||||
unsigned long bit = 1UL << (pos & (BITS_PER_LONG - 1));
|
||||
|
||||
return (hb->levels[HBITMAP_LEVELS - 1][pos >> BITS_PER_LEVEL] & bit) != 0;
|
||||
}
|
||||
|
||||
void hbitmap_free(HBitmap *hb)
|
||||
{
|
||||
unsigned i;
|
||||
for (i = HBITMAP_LEVELS; i-- > 0; ) {
|
||||
g_free(hb->levels[i]);
|
||||
}
|
||||
g_free(hb);
|
||||
}
|
||||
|
||||
HBitmap *hbitmap_alloc(uint64_t size, int granularity)
|
||||
{
|
||||
HBitmap *hb = g_malloc0(sizeof (struct HBitmap));
|
||||
unsigned i;
|
||||
|
||||
assert(granularity >= 0 && granularity < 64);
|
||||
size = (size + (1ULL << granularity) - 1) >> granularity;
|
||||
assert(size <= ((uint64_t)1 << HBITMAP_LOG_MAX_SIZE));
|
||||
|
||||
hb->size = size;
|
||||
hb->granularity = granularity;
|
||||
for (i = HBITMAP_LEVELS; i-- > 0; ) {
|
||||
size = MAX((size + BITS_PER_LONG - 1) >> BITS_PER_LEVEL, 1);
|
||||
hb->levels[i] = g_malloc0(size * sizeof(unsigned long));
|
||||
}
|
||||
|
||||
/* We necessarily have free bits in level 0 due to the definition
|
||||
* of HBITMAP_LEVELS, so use one for a sentinel. This speeds up
|
||||
* hbitmap_iter_skip_words.
|
||||
*/
|
||||
assert(size == 1);
|
||||
hb->levels[0][0] |= 1UL << (BITS_PER_LONG - 1);
|
||||
return hb;
|
||||
}
|
Loading…
Reference in New Issue
Block a user