-----BEGIN PGP SIGNATURE-----
Version: GnuPG v2 iQEtBAABCAAXBQJZQyPmEBxmYW16QHJlZGhhdC5jb20ACgkQyjViTGqRccaWrgf/ SCAHpi4gzWbr7AN03jP16Qy/kqNik6F7LTNSqrRbvBPb3TNchDd4z44SAghK5m/r +IlYQc20sBZ60tRHIHAUSF2WNcea2pj1v3ZVgjrI7hiJ3DXPiqqt/dAR/W/BLIDO tAHAVF6Pnrjm9DC4d2zATLDHvcHMzWOsnePh7XcOm44REbwUr3GDg6bf2+j+5yfS 9ewmXfh8z4w1IvSn+f5B+IeCvGvJNA1D55dqcGo8Ivlg9PnElziXFaXO2s7UiLIM mF3eTSIbJQNNN+E+0lpRpnqQiq+Txxggu61Q4f8bOTBhEOPa3etj1ydnXMVbvX25 6SUuBfGh51tyOIZOJz3GtA== =9b+J -----END PGP SIGNATURE----- Merge remote-tracking branch 'remotes/famz/tags/docker-and-block-pull-request' into staging # gpg: Signature made Fri 16 Jun 2017 01:18:46 BST # gpg: using RSA key 0xCA35624C6A9171C6 # gpg: Good signature from "Fam Zheng <famz@redhat.com>" # gpg: WARNING: This key is not certified with sufficiently trusted signatures! # gpg: It is not certain that the signature belongs to the owner. # Primary key fingerprint: 5003 7CB7 9706 0F76 F021 AD56 CA35 624C 6A91 71C6 * remotes/famz/tags/docker-and-block-pull-request: (23 commits) block: make accounting thread-safe block: split BlockAcctStats creation and setup block: introduce block_account_one_io block: protect modification of dirty bitmaps with a mutex migration/block: reset dirty bitmap before reading block: introduce dirty_bitmap_mutex block: protect tracked_requests and flush_queue with reqs_lock block: access write_gen with atomics block: use Stat64 for wr_highest_offset util: add stats64 module throttle-groups: protect throttled requests with a CoMutex throttle-groups: do not use qemu_co_enter_next throttle-groups: only start one coroutine from drained_begin block: access io_plugged with atomic ops block: access wakeup with atomic ops block: access serialising_in_flight with atomic ops block: access io_limits_disabled with atomic ops block: access quiesce_counter with atomic ops block: access copy_on_read with atomic ops docker: Add flex and bison to centos6 image ... Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
commit
65a0e3e842
10
block.c
10
block.c
@ -320,6 +320,8 @@ BlockDriverState *bdrv_new(void)
|
||||
QLIST_INIT(&bs->op_blockers[i]);
|
||||
}
|
||||
notifier_with_return_list_init(&bs->before_write_notifiers);
|
||||
qemu_co_mutex_init(&bs->reqs_lock);
|
||||
qemu_mutex_init(&bs->dirty_bitmap_mutex);
|
||||
bs->refcnt = 1;
|
||||
bs->aio_context = qemu_get_aio_context();
|
||||
|
||||
@ -1300,7 +1302,9 @@ static int bdrv_open_common(BlockDriverState *bs, BlockBackend *file,
|
||||
goto fail_opts;
|
||||
}
|
||||
|
||||
assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */
|
||||
/* bdrv_new() and bdrv_close() make it so */
|
||||
assert(atomic_read(&bs->copy_on_read) == 0);
|
||||
|
||||
if (bs->open_flags & BDRV_O_COPY_ON_READ) {
|
||||
if (!bs->read_only) {
|
||||
bdrv_enable_copy_on_read(bs);
|
||||
@ -3063,7 +3067,7 @@ static void bdrv_close(BlockDriverState *bs)
|
||||
|
||||
g_free(bs->opaque);
|
||||
bs->opaque = NULL;
|
||||
bs->copy_on_read = 0;
|
||||
atomic_set(&bs->copy_on_read, 0);
|
||||
bs->backing_file[0] = '\0';
|
||||
bs->backing_format[0] = '\0';
|
||||
bs->total_sectors = 0;
|
||||
@ -3422,7 +3426,7 @@ int bdrv_truncate(BdrvChild *child, int64_t offset, Error **errp)
|
||||
ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
|
||||
bdrv_dirty_bitmap_truncate(bs);
|
||||
bdrv_parent_cb_resize(bs);
|
||||
++bs->write_gen;
|
||||
atomic_inc(&bs->write_gen);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
@ -32,15 +32,19 @@
|
||||
static QEMUClockType clock_type = QEMU_CLOCK_REALTIME;
|
||||
static const int qtest_latency_ns = NANOSECONDS_PER_SECOND / 1000;
|
||||
|
||||
void block_acct_init(BlockAcctStats *stats, bool account_invalid,
|
||||
void block_acct_init(BlockAcctStats *stats)
|
||||
{
|
||||
qemu_mutex_init(&stats->lock);
|
||||
if (qtest_enabled()) {
|
||||
clock_type = QEMU_CLOCK_VIRTUAL;
|
||||
}
|
||||
}
|
||||
|
||||
void block_acct_setup(BlockAcctStats *stats, bool account_invalid,
|
||||
bool account_failed)
|
||||
{
|
||||
stats->account_invalid = account_invalid;
|
||||
stats->account_failed = account_failed;
|
||||
|
||||
if (qtest_enabled()) {
|
||||
clock_type = QEMU_CLOCK_VIRTUAL;
|
||||
}
|
||||
}
|
||||
|
||||
void block_acct_cleanup(BlockAcctStats *stats)
|
||||
@ -49,6 +53,7 @@ void block_acct_cleanup(BlockAcctStats *stats)
|
||||
QSLIST_FOREACH_SAFE(s, &stats->intervals, entries, next) {
|
||||
g_free(s);
|
||||
}
|
||||
qemu_mutex_destroy(&stats->lock);
|
||||
}
|
||||
|
||||
void block_acct_add_interval(BlockAcctStats *stats, unsigned interval_length)
|
||||
@ -58,12 +63,15 @@ void block_acct_add_interval(BlockAcctStats *stats, unsigned interval_length)
|
||||
|
||||
s = g_new0(BlockAcctTimedStats, 1);
|
||||
s->interval_length = interval_length;
|
||||
s->stats = stats;
|
||||
qemu_mutex_lock(&stats->lock);
|
||||
QSLIST_INSERT_HEAD(&stats->intervals, s, entries);
|
||||
|
||||
for (i = 0; i < BLOCK_MAX_IOTYPE; i++) {
|
||||
timed_average_init(&s->latency[i], clock_type,
|
||||
(uint64_t) interval_length * NANOSECONDS_PER_SECOND);
|
||||
}
|
||||
qemu_mutex_unlock(&stats->lock);
|
||||
}
|
||||
|
||||
BlockAcctTimedStats *block_acct_interval_next(BlockAcctStats *stats,
|
||||
@ -86,7 +94,8 @@ void block_acct_start(BlockAcctStats *stats, BlockAcctCookie *cookie,
|
||||
cookie->type = type;
|
||||
}
|
||||
|
||||
void block_acct_done(BlockAcctStats *stats, BlockAcctCookie *cookie)
|
||||
static void block_account_one_io(BlockAcctStats *stats, BlockAcctCookie *cookie,
|
||||
bool failed)
|
||||
{
|
||||
BlockAcctTimedStats *s;
|
||||
int64_t time_ns = qemu_clock_get_ns(clock_type);
|
||||
@ -98,8 +107,16 @@ void block_acct_done(BlockAcctStats *stats, BlockAcctCookie *cookie)
|
||||
|
||||
assert(cookie->type < BLOCK_MAX_IOTYPE);
|
||||
|
||||
qemu_mutex_lock(&stats->lock);
|
||||
|
||||
if (failed) {
|
||||
stats->failed_ops[cookie->type]++;
|
||||
} else {
|
||||
stats->nr_bytes[cookie->type] += cookie->bytes;
|
||||
stats->nr_ops[cookie->type]++;
|
||||
}
|
||||
|
||||
if (!failed || stats->account_failed) {
|
||||
stats->total_time_ns[cookie->type] += latency_ns;
|
||||
stats->last_access_time_ns = time_ns;
|
||||
|
||||
@ -108,51 +125,44 @@ void block_acct_done(BlockAcctStats *stats, BlockAcctCookie *cookie)
|
||||
}
|
||||
}
|
||||
|
||||
qemu_mutex_unlock(&stats->lock);
|
||||
}
|
||||
|
||||
void block_acct_done(BlockAcctStats *stats, BlockAcctCookie *cookie)
|
||||
{
|
||||
block_account_one_io(stats, cookie, false);
|
||||
}
|
||||
|
||||
void block_acct_failed(BlockAcctStats *stats, BlockAcctCookie *cookie)
|
||||
{
|
||||
assert(cookie->type < BLOCK_MAX_IOTYPE);
|
||||
|
||||
stats->failed_ops[cookie->type]++;
|
||||
|
||||
if (stats->account_failed) {
|
||||
BlockAcctTimedStats *s;
|
||||
int64_t time_ns = qemu_clock_get_ns(clock_type);
|
||||
int64_t latency_ns = time_ns - cookie->start_time_ns;
|
||||
|
||||
if (qtest_enabled()) {
|
||||
latency_ns = qtest_latency_ns;
|
||||
}
|
||||
|
||||
stats->total_time_ns[cookie->type] += latency_ns;
|
||||
stats->last_access_time_ns = time_ns;
|
||||
|
||||
QSLIST_FOREACH(s, &stats->intervals, entries) {
|
||||
timed_average_account(&s->latency[cookie->type], latency_ns);
|
||||
}
|
||||
}
|
||||
block_account_one_io(stats, cookie, true);
|
||||
}
|
||||
|
||||
void block_acct_invalid(BlockAcctStats *stats, enum BlockAcctType type)
|
||||
{
|
||||
assert(type < BLOCK_MAX_IOTYPE);
|
||||
|
||||
/* block_acct_done() and block_acct_failed() update
|
||||
* total_time_ns[], but this one does not. The reason is that
|
||||
* invalid requests are accounted during their submission,
|
||||
* therefore there's no actual I/O involved. */
|
||||
|
||||
/* block_account_one_io() updates total_time_ns[], but this one does
|
||||
* not. The reason is that invalid requests are accounted during their
|
||||
* submission, therefore there's no actual I/O involved.
|
||||
*/
|
||||
qemu_mutex_lock(&stats->lock);
|
||||
stats->invalid_ops[type]++;
|
||||
|
||||
if (stats->account_invalid) {
|
||||
stats->last_access_time_ns = qemu_clock_get_ns(clock_type);
|
||||
}
|
||||
qemu_mutex_unlock(&stats->lock);
|
||||
}
|
||||
|
||||
void block_acct_merge_done(BlockAcctStats *stats, enum BlockAcctType type,
|
||||
int num_requests)
|
||||
{
|
||||
assert(type < BLOCK_MAX_IOTYPE);
|
||||
|
||||
qemu_mutex_lock(&stats->lock);
|
||||
stats->merged[type] += num_requests;
|
||||
qemu_mutex_unlock(&stats->lock);
|
||||
}
|
||||
|
||||
int64_t block_acct_idle_time_ns(BlockAcctStats *stats)
|
||||
@ -167,7 +177,9 @@ double block_acct_queue_depth(BlockAcctTimedStats *stats,
|
||||
|
||||
assert(type < BLOCK_MAX_IOTYPE);
|
||||
|
||||
qemu_mutex_lock(&stats->stats->lock);
|
||||
sum = timed_average_sum(&stats->latency[type], &elapsed);
|
||||
qemu_mutex_unlock(&stats->stats->lock);
|
||||
|
||||
return (double) sum / elapsed;
|
||||
}
|
||||
|
@ -216,8 +216,10 @@ BlockBackend *blk_new(uint64_t perm, uint64_t shared_perm)
|
||||
blk->shared_perm = shared_perm;
|
||||
blk_set_enable_write_cache(blk, true);
|
||||
|
||||
qemu_co_mutex_init(&blk->public.throttled_reqs_lock);
|
||||
qemu_co_queue_init(&blk->public.throttled_reqs[0]);
|
||||
qemu_co_queue_init(&blk->public.throttled_reqs[1]);
|
||||
block_acct_init(&blk->stats);
|
||||
|
||||
notifier_list_init(&blk->remove_bs_notifiers);
|
||||
notifier_list_init(&blk->insert_bs_notifiers);
|
||||
@ -1953,7 +1955,7 @@ static void blk_root_drained_begin(BdrvChild *child)
|
||||
/* Note that blk->root may not be accessible here yet if we are just
|
||||
* attaching to a BlockDriverState that is drained. Use child instead. */
|
||||
|
||||
if (blk->public.io_limits_disabled++ == 0) {
|
||||
if (atomic_fetch_inc(&blk->public.io_limits_disabled) == 0) {
|
||||
throttle_group_restart_blk(blk);
|
||||
}
|
||||
}
|
||||
@ -1964,7 +1966,7 @@ static void blk_root_drained_end(BdrvChild *child)
|
||||
assert(blk->quiesce_counter);
|
||||
|
||||
assert(blk->public.io_limits_disabled);
|
||||
--blk->public.io_limits_disabled;
|
||||
atomic_dec(&blk->public.io_limits_disabled);
|
||||
|
||||
if (--blk->quiesce_counter == 0) {
|
||||
if (blk->dev_ops && blk->dev_ops->drained_end) {
|
||||
|
@ -37,6 +37,7 @@
|
||||
* or enabled. A frozen bitmap can only abdicate() or reclaim().
|
||||
*/
|
||||
struct BdrvDirtyBitmap {
|
||||
QemuMutex *mutex;
|
||||
HBitmap *bitmap; /* Dirty sector bitmap implementation */
|
||||
HBitmap *meta; /* Meta dirty bitmap */
|
||||
BdrvDirtyBitmap *successor; /* Anonymous child; implies frozen status */
|
||||
@ -52,6 +53,27 @@ struct BdrvDirtyBitmapIter {
|
||||
BdrvDirtyBitmap *bitmap;
|
||||
};
|
||||
|
||||
static inline void bdrv_dirty_bitmaps_lock(BlockDriverState *bs)
|
||||
{
|
||||
qemu_mutex_lock(&bs->dirty_bitmap_mutex);
|
||||
}
|
||||
|
||||
static inline void bdrv_dirty_bitmaps_unlock(BlockDriverState *bs)
|
||||
{
|
||||
qemu_mutex_unlock(&bs->dirty_bitmap_mutex);
|
||||
}
|
||||
|
||||
void bdrv_dirty_bitmap_lock(BdrvDirtyBitmap *bitmap)
|
||||
{
|
||||
qemu_mutex_lock(bitmap->mutex);
|
||||
}
|
||||
|
||||
void bdrv_dirty_bitmap_unlock(BdrvDirtyBitmap *bitmap)
|
||||
{
|
||||
qemu_mutex_unlock(bitmap->mutex);
|
||||
}
|
||||
|
||||
/* Called with BQL or dirty_bitmap lock taken. */
|
||||
BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs, const char *name)
|
||||
{
|
||||
BdrvDirtyBitmap *bm;
|
||||
@ -65,6 +87,7 @@ BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs, const char *name)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Called with BQL taken. */
|
||||
void bdrv_dirty_bitmap_make_anon(BdrvDirtyBitmap *bitmap)
|
||||
{
|
||||
assert(!bdrv_dirty_bitmap_frozen(bitmap));
|
||||
@ -72,6 +95,7 @@ void bdrv_dirty_bitmap_make_anon(BdrvDirtyBitmap *bitmap)
|
||||
bitmap->name = NULL;
|
||||
}
|
||||
|
||||
/* Called with BQL taken. */
|
||||
BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs,
|
||||
uint32_t granularity,
|
||||
const char *name,
|
||||
@ -96,11 +120,14 @@ BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs,
|
||||
return NULL;
|
||||
}
|
||||
bitmap = g_new0(BdrvDirtyBitmap, 1);
|
||||
bitmap->mutex = &bs->dirty_bitmap_mutex;
|
||||
bitmap->bitmap = hbitmap_alloc(bitmap_size, ctz32(sector_granularity));
|
||||
bitmap->size = bitmap_size;
|
||||
bitmap->name = g_strdup(name);
|
||||
bitmap->disabled = false;
|
||||
bdrv_dirty_bitmaps_lock(bs);
|
||||
QLIST_INSERT_HEAD(&bs->dirty_bitmaps, bitmap, list);
|
||||
bdrv_dirty_bitmaps_unlock(bs);
|
||||
return bitmap;
|
||||
}
|
||||
|
||||
@ -119,18 +146,22 @@ void bdrv_create_meta_dirty_bitmap(BdrvDirtyBitmap *bitmap,
|
||||
int chunk_size)
|
||||
{
|
||||
assert(!bitmap->meta);
|
||||
qemu_mutex_lock(bitmap->mutex);
|
||||
bitmap->meta = hbitmap_create_meta(bitmap->bitmap,
|
||||
chunk_size * BITS_PER_BYTE);
|
||||
qemu_mutex_unlock(bitmap->mutex);
|
||||
}
|
||||
|
||||
void bdrv_release_meta_dirty_bitmap(BdrvDirtyBitmap *bitmap)
|
||||
{
|
||||
assert(bitmap->meta);
|
||||
qemu_mutex_lock(bitmap->mutex);
|
||||
hbitmap_free_meta(bitmap->bitmap);
|
||||
bitmap->meta = NULL;
|
||||
qemu_mutex_unlock(bitmap->mutex);
|
||||
}
|
||||
|
||||
int bdrv_dirty_bitmap_get_meta(BlockDriverState *bs,
|
||||
int bdrv_dirty_bitmap_get_meta_locked(BlockDriverState *bs,
|
||||
BdrvDirtyBitmap *bitmap, int64_t sector,
|
||||
int nb_sectors)
|
||||
{
|
||||
@ -147,11 +178,26 @@ int bdrv_dirty_bitmap_get_meta(BlockDriverState *bs,
|
||||
return false;
|
||||
}
|
||||
|
||||
int bdrv_dirty_bitmap_get_meta(BlockDriverState *bs,
|
||||
BdrvDirtyBitmap *bitmap, int64_t sector,
|
||||
int nb_sectors)
|
||||
{
|
||||
bool dirty;
|
||||
|
||||
qemu_mutex_lock(bitmap->mutex);
|
||||
dirty = bdrv_dirty_bitmap_get_meta_locked(bs, bitmap, sector, nb_sectors);
|
||||
qemu_mutex_unlock(bitmap->mutex);
|
||||
|
||||
return dirty;
|
||||
}
|
||||
|
||||
void bdrv_dirty_bitmap_reset_meta(BlockDriverState *bs,
|
||||
BdrvDirtyBitmap *bitmap, int64_t sector,
|
||||
int nb_sectors)
|
||||
{
|
||||
qemu_mutex_lock(bitmap->mutex);
|
||||
hbitmap_reset(bitmap->meta, sector, nb_sectors);
|
||||
qemu_mutex_unlock(bitmap->mutex);
|
||||
}
|
||||
|
||||
int64_t bdrv_dirty_bitmap_size(const BdrvDirtyBitmap *bitmap)
|
||||
@ -164,16 +210,19 @@ const char *bdrv_dirty_bitmap_name(const BdrvDirtyBitmap *bitmap)
|
||||
return bitmap->name;
|
||||
}
|
||||
|
||||
/* Called with BQL taken. */
|
||||
bool bdrv_dirty_bitmap_frozen(BdrvDirtyBitmap *bitmap)
|
||||
{
|
||||
return bitmap->successor;
|
||||
}
|
||||
|
||||
/* Called with BQL taken. */
|
||||
bool bdrv_dirty_bitmap_enabled(BdrvDirtyBitmap *bitmap)
|
||||
{
|
||||
return !(bitmap->disabled || bitmap->successor);
|
||||
}
|
||||
|
||||
/* Called with BQL taken. */
|
||||
DirtyBitmapStatus bdrv_dirty_bitmap_status(BdrvDirtyBitmap *bitmap)
|
||||
{
|
||||
if (bdrv_dirty_bitmap_frozen(bitmap)) {
|
||||
@ -188,6 +237,7 @@ DirtyBitmapStatus bdrv_dirty_bitmap_status(BdrvDirtyBitmap *bitmap)
|
||||
/**
|
||||
* Create a successor bitmap destined to replace this bitmap after an operation.
|
||||
* Requires that the bitmap is not frozen and has no successor.
|
||||
* Called with BQL taken.
|
||||
*/
|
||||
int bdrv_dirty_bitmap_create_successor(BlockDriverState *bs,
|
||||
BdrvDirtyBitmap *bitmap, Error **errp)
|
||||
@ -220,6 +270,7 @@ int bdrv_dirty_bitmap_create_successor(BlockDriverState *bs,
|
||||
/**
|
||||
* For a bitmap with a successor, yield our name to the successor,
|
||||
* delete the old bitmap, and return a handle to the new bitmap.
|
||||
* Called with BQL taken.
|
||||
*/
|
||||
BdrvDirtyBitmap *bdrv_dirty_bitmap_abdicate(BlockDriverState *bs,
|
||||
BdrvDirtyBitmap *bitmap,
|
||||
@ -247,6 +298,7 @@ BdrvDirtyBitmap *bdrv_dirty_bitmap_abdicate(BlockDriverState *bs,
|
||||
* In cases of failure where we can no longer safely delete the parent,
|
||||
* we may wish to re-join the parent and child/successor.
|
||||
* The merged parent will be un-frozen, but not explicitly re-enabled.
|
||||
* Called with BQL taken.
|
||||
*/
|
||||
BdrvDirtyBitmap *bdrv_reclaim_dirty_bitmap(BlockDriverState *bs,
|
||||
BdrvDirtyBitmap *parent,
|
||||
@ -271,25 +323,30 @@ BdrvDirtyBitmap *bdrv_reclaim_dirty_bitmap(BlockDriverState *bs,
|
||||
|
||||
/**
|
||||
* Truncates _all_ bitmaps attached to a BDS.
|
||||
* Called with BQL taken.
|
||||
*/
|
||||
void bdrv_dirty_bitmap_truncate(BlockDriverState *bs)
|
||||
{
|
||||
BdrvDirtyBitmap *bitmap;
|
||||
uint64_t size = bdrv_nb_sectors(bs);
|
||||
|
||||
bdrv_dirty_bitmaps_lock(bs);
|
||||
QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
|
||||
assert(!bdrv_dirty_bitmap_frozen(bitmap));
|
||||
assert(!bitmap->active_iterators);
|
||||
hbitmap_truncate(bitmap->bitmap, size);
|
||||
bitmap->size = size;
|
||||
}
|
||||
bdrv_dirty_bitmaps_unlock(bs);
|
||||
}
|
||||
|
||||
/* Called with BQL taken. */
|
||||
static void bdrv_do_release_matching_dirty_bitmap(BlockDriverState *bs,
|
||||
BdrvDirtyBitmap *bitmap,
|
||||
bool only_named)
|
||||
{
|
||||
BdrvDirtyBitmap *bm, *next;
|
||||
bdrv_dirty_bitmaps_lock(bs);
|
||||
QLIST_FOREACH_SAFE(bm, &bs->dirty_bitmaps, list, next) {
|
||||
if ((!bitmap || bm == bitmap) && (!only_named || bm->name)) {
|
||||
assert(!bm->active_iterators);
|
||||
@ -301,15 +358,19 @@ static void bdrv_do_release_matching_dirty_bitmap(BlockDriverState *bs,
|
||||
g_free(bm);
|
||||
|
||||
if (bitmap) {
|
||||
return;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (bitmap) {
|
||||
abort();
|
||||
}
|
||||
|
||||
out:
|
||||
bdrv_dirty_bitmaps_unlock(bs);
|
||||
}
|
||||
|
||||
/* Called with BQL taken. */
|
||||
void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
|
||||
{
|
||||
bdrv_do_release_matching_dirty_bitmap(bs, bitmap, false);
|
||||
@ -318,18 +379,21 @@ void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
|
||||
/**
|
||||
* Release all named dirty bitmaps attached to a BDS (for use in bdrv_close()).
|
||||
* There must not be any frozen bitmaps attached.
|
||||
* Called with BQL taken.
|
||||
*/
|
||||
void bdrv_release_named_dirty_bitmaps(BlockDriverState *bs)
|
||||
{
|
||||
bdrv_do_release_matching_dirty_bitmap(bs, NULL, true);
|
||||
}
|
||||
|
||||
/* Called with BQL taken. */
|
||||
void bdrv_disable_dirty_bitmap(BdrvDirtyBitmap *bitmap)
|
||||
{
|
||||
assert(!bdrv_dirty_bitmap_frozen(bitmap));
|
||||
bitmap->disabled = true;
|
||||
}
|
||||
|
||||
/* Called with BQL taken. */
|
||||
void bdrv_enable_dirty_bitmap(BdrvDirtyBitmap *bitmap)
|
||||
{
|
||||
assert(!bdrv_dirty_bitmap_frozen(bitmap));
|
||||
@ -342,6 +406,7 @@ BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs)
|
||||
BlockDirtyInfoList *list = NULL;
|
||||
BlockDirtyInfoList **plist = &list;
|
||||
|
||||
bdrv_dirty_bitmaps_lock(bs);
|
||||
QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
|
||||
BlockDirtyInfo *info = g_new0(BlockDirtyInfo, 1);
|
||||
BlockDirtyInfoList *entry = g_new0(BlockDirtyInfoList, 1);
|
||||
@ -354,11 +419,13 @@ BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs)
|
||||
*plist = entry;
|
||||
plist = &entry->next;
|
||||
}
|
||||
bdrv_dirty_bitmaps_unlock(bs);
|
||||
|
||||
return list;
|
||||
}
|
||||
|
||||
int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap,
|
||||
/* Called within bdrv_dirty_bitmap_lock..unlock */
|
||||
int bdrv_get_dirty_locked(BlockDriverState *bs, BdrvDirtyBitmap *bitmap,
|
||||
int64_t sector)
|
||||
{
|
||||
if (bitmap) {
|
||||
@ -432,23 +499,42 @@ int64_t bdrv_dirty_iter_next(BdrvDirtyBitmapIter *iter)
|
||||
return hbitmap_iter_next(&iter->hbi);
|
||||
}
|
||||
|
||||
void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap,
|
||||
/* Called within bdrv_dirty_bitmap_lock..unlock */
|
||||
void bdrv_set_dirty_bitmap_locked(BdrvDirtyBitmap *bitmap,
|
||||
int64_t cur_sector, int64_t nr_sectors)
|
||||
{
|
||||
assert(bdrv_dirty_bitmap_enabled(bitmap));
|
||||
hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
|
||||
}
|
||||
|
||||
void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap,
|
||||
void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap,
|
||||
int64_t cur_sector, int64_t nr_sectors)
|
||||
{
|
||||
bdrv_dirty_bitmap_lock(bitmap);
|
||||
bdrv_set_dirty_bitmap_locked(bitmap, cur_sector, nr_sectors);
|
||||
bdrv_dirty_bitmap_unlock(bitmap);
|
||||
}
|
||||
|
||||
/* Called within bdrv_dirty_bitmap_lock..unlock */
|
||||
void bdrv_reset_dirty_bitmap_locked(BdrvDirtyBitmap *bitmap,
|
||||
int64_t cur_sector, int64_t nr_sectors)
|
||||
{
|
||||
assert(bdrv_dirty_bitmap_enabled(bitmap));
|
||||
hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
|
||||
}
|
||||
|
||||
void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap,
|
||||
int64_t cur_sector, int64_t nr_sectors)
|
||||
{
|
||||
bdrv_dirty_bitmap_lock(bitmap);
|
||||
bdrv_reset_dirty_bitmap_locked(bitmap, cur_sector, nr_sectors);
|
||||
bdrv_dirty_bitmap_unlock(bitmap);
|
||||
}
|
||||
|
||||
void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap **out)
|
||||
{
|
||||
assert(bdrv_dirty_bitmap_enabled(bitmap));
|
||||
bdrv_dirty_bitmap_lock(bitmap);
|
||||
if (!out) {
|
||||
hbitmap_reset_all(bitmap->bitmap);
|
||||
} else {
|
||||
@ -457,6 +543,7 @@ void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap **out)
|
||||
hbitmap_granularity(backup));
|
||||
*out = backup;
|
||||
}
|
||||
bdrv_dirty_bitmap_unlock(bitmap);
|
||||
}
|
||||
|
||||
void bdrv_undo_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap *in)
|
||||
@ -508,12 +595,19 @@ void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
|
||||
int64_t nr_sectors)
|
||||
{
|
||||
BdrvDirtyBitmap *bitmap;
|
||||
|
||||
if (QLIST_EMPTY(&bs->dirty_bitmaps)) {
|
||||
return;
|
||||
}
|
||||
|
||||
bdrv_dirty_bitmaps_lock(bs);
|
||||
QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
|
||||
if (!bdrv_dirty_bitmap_enabled(bitmap)) {
|
||||
continue;
|
||||
}
|
||||
hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
|
||||
}
|
||||
bdrv_dirty_bitmaps_unlock(bs);
|
||||
}
|
||||
|
||||
/**
|
||||
|
51
block/io.c
51
block/io.c
@ -130,13 +130,13 @@ void bdrv_refresh_limits(BlockDriverState *bs, Error **errp)
|
||||
*/
|
||||
void bdrv_enable_copy_on_read(BlockDriverState *bs)
|
||||
{
|
||||
bs->copy_on_read++;
|
||||
atomic_inc(&bs->copy_on_read);
|
||||
}
|
||||
|
||||
void bdrv_disable_copy_on_read(BlockDriverState *bs)
|
||||
{
|
||||
assert(bs->copy_on_read > 0);
|
||||
bs->copy_on_read--;
|
||||
int old = atomic_fetch_dec(&bs->copy_on_read);
|
||||
assert(old >= 1);
|
||||
}
|
||||
|
||||
/* Check if any requests are in-flight (including throttled requests) */
|
||||
@ -241,7 +241,7 @@ void bdrv_drained_begin(BlockDriverState *bs)
|
||||
return;
|
||||
}
|
||||
|
||||
if (!bs->quiesce_counter++) {
|
||||
if (atomic_fetch_inc(&bs->quiesce_counter) == 0) {
|
||||
aio_disable_external(bdrv_get_aio_context(bs));
|
||||
bdrv_parent_drained_begin(bs);
|
||||
}
|
||||
@ -252,7 +252,7 @@ void bdrv_drained_begin(BlockDriverState *bs)
|
||||
void bdrv_drained_end(BlockDriverState *bs)
|
||||
{
|
||||
assert(bs->quiesce_counter > 0);
|
||||
if (--bs->quiesce_counter > 0) {
|
||||
if (atomic_fetch_dec(&bs->quiesce_counter) > 1) {
|
||||
return;
|
||||
}
|
||||
|
||||
@ -375,11 +375,13 @@ void bdrv_drain_all(void)
|
||||
static void tracked_request_end(BdrvTrackedRequest *req)
|
||||
{
|
||||
if (req->serialising) {
|
||||
req->bs->serialising_in_flight--;
|
||||
atomic_dec(&req->bs->serialising_in_flight);
|
||||
}
|
||||
|
||||
qemu_co_mutex_lock(&req->bs->reqs_lock);
|
||||
QLIST_REMOVE(req, list);
|
||||
qemu_co_queue_restart_all(&req->wait_queue);
|
||||
qemu_co_mutex_unlock(&req->bs->reqs_lock);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -404,7 +406,9 @@ static void tracked_request_begin(BdrvTrackedRequest *req,
|
||||
|
||||
qemu_co_queue_init(&req->wait_queue);
|
||||
|
||||
qemu_co_mutex_lock(&bs->reqs_lock);
|
||||
QLIST_INSERT_HEAD(&bs->tracked_requests, req, list);
|
||||
qemu_co_mutex_unlock(&bs->reqs_lock);
|
||||
}
|
||||
|
||||
static void mark_request_serialising(BdrvTrackedRequest *req, uint64_t align)
|
||||
@ -414,7 +418,7 @@ static void mark_request_serialising(BdrvTrackedRequest *req, uint64_t align)
|
||||
- overlap_offset;
|
||||
|
||||
if (!req->serialising) {
|
||||
req->bs->serialising_in_flight++;
|
||||
atomic_inc(&req->bs->serialising_in_flight);
|
||||
req->serialising = true;
|
||||
}
|
||||
|
||||
@ -501,7 +505,8 @@ static void dummy_bh_cb(void *opaque)
|
||||
|
||||
void bdrv_wakeup(BlockDriverState *bs)
|
||||
{
|
||||
if (bs->wakeup) {
|
||||
/* The barrier (or an atomic op) is in the caller. */
|
||||
if (atomic_read(&bs->wakeup)) {
|
||||
aio_bh_schedule_oneshot(qemu_get_aio_context(), dummy_bh_cb, NULL);
|
||||
}
|
||||
}
|
||||
@ -519,12 +524,13 @@ static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self)
|
||||
bool retry;
|
||||
bool waited = false;
|
||||
|
||||
if (!bs->serialising_in_flight) {
|
||||
if (!atomic_read(&bs->serialising_in_flight)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
do {
|
||||
retry = false;
|
||||
qemu_co_mutex_lock(&bs->reqs_lock);
|
||||
QLIST_FOREACH(req, &bs->tracked_requests, list) {
|
||||
if (req == self || (!req->serialising && !self->serialising)) {
|
||||
continue;
|
||||
@ -543,7 +549,7 @@ static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self)
|
||||
* (instead of producing a deadlock in the former case). */
|
||||
if (!req->waiting_for) {
|
||||
self->waiting_for = req;
|
||||
qemu_co_queue_wait(&req->wait_queue, NULL);
|
||||
qemu_co_queue_wait(&req->wait_queue, &bs->reqs_lock);
|
||||
self->waiting_for = NULL;
|
||||
retry = true;
|
||||
waited = true;
|
||||
@ -551,6 +557,7 @@ static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self)
|
||||
}
|
||||
}
|
||||
}
|
||||
qemu_co_mutex_unlock(&bs->reqs_lock);
|
||||
} while (retry);
|
||||
|
||||
return waited;
|
||||
@ -1144,7 +1151,7 @@ int coroutine_fn bdrv_co_preadv(BdrvChild *child,
|
||||
bdrv_inc_in_flight(bs);
|
||||
|
||||
/* Don't do copy-on-read if we read data before write operation */
|
||||
if (bs->copy_on_read && !(flags & BDRV_REQ_NO_SERIALISING)) {
|
||||
if (atomic_read(&bs->copy_on_read) && !(flags & BDRV_REQ_NO_SERIALISING)) {
|
||||
flags |= BDRV_REQ_COPY_ON_READ;
|
||||
}
|
||||
|
||||
@ -1401,12 +1408,10 @@ static int coroutine_fn bdrv_aligned_pwritev(BdrvChild *child,
|
||||
}
|
||||
bdrv_debug_event(bs, BLKDBG_PWRITEV_DONE);
|
||||
|
||||
++bs->write_gen;
|
||||
atomic_inc(&bs->write_gen);
|
||||
bdrv_set_dirty(bs, start_sector, end_sector - start_sector);
|
||||
|
||||
if (bs->wr_highest_offset < offset + bytes) {
|
||||
bs->wr_highest_offset = offset + bytes;
|
||||
}
|
||||
stat64_max(&bs->wr_highest_offset, offset + bytes);
|
||||
|
||||
if (ret >= 0) {
|
||||
bs->total_sectors = MAX(bs->total_sectors, end_sector);
|
||||
@ -2292,14 +2297,17 @@ int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
|
||||
goto early_exit;
|
||||
}
|
||||
|
||||
current_gen = bs->write_gen;
|
||||
qemu_co_mutex_lock(&bs->reqs_lock);
|
||||
current_gen = atomic_read(&bs->write_gen);
|
||||
|
||||
/* Wait until any previous flushes are completed */
|
||||
while (bs->active_flush_req) {
|
||||
qemu_co_queue_wait(&bs->flush_queue, NULL);
|
||||
qemu_co_queue_wait(&bs->flush_queue, &bs->reqs_lock);
|
||||
}
|
||||
|
||||
/* Flushes reach this point in nondecreasing current_gen order. */
|
||||
bs->active_flush_req = true;
|
||||
qemu_co_mutex_unlock(&bs->reqs_lock);
|
||||
|
||||
/* Write back all layers by calling one driver function */
|
||||
if (bs->drv->bdrv_co_flush) {
|
||||
@ -2371,9 +2379,12 @@ out:
|
||||
if (ret == 0) {
|
||||
bs->flushed_gen = current_gen;
|
||||
}
|
||||
|
||||
qemu_co_mutex_lock(&bs->reqs_lock);
|
||||
bs->active_flush_req = false;
|
||||
/* Return value is ignored - it's ok if wait queue is empty */
|
||||
qemu_co_queue_next(&bs->flush_queue);
|
||||
qemu_co_mutex_unlock(&bs->reqs_lock);
|
||||
|
||||
early_exit:
|
||||
bdrv_dec_in_flight(bs);
|
||||
@ -2517,7 +2528,7 @@ int coroutine_fn bdrv_co_pdiscard(BlockDriverState *bs, int64_t offset,
|
||||
}
|
||||
ret = 0;
|
||||
out:
|
||||
++bs->write_gen;
|
||||
atomic_inc(&bs->write_gen);
|
||||
bdrv_set_dirty(bs, req.offset >> BDRV_SECTOR_BITS,
|
||||
req.bytes >> BDRV_SECTOR_BITS);
|
||||
tracked_request_end(&req);
|
||||
@ -2644,7 +2655,7 @@ void bdrv_io_plug(BlockDriverState *bs)
|
||||
bdrv_io_plug(child->bs);
|
||||
}
|
||||
|
||||
if (bs->io_plugged++ == 0) {
|
||||
if (atomic_fetch_inc(&bs->io_plugged) == 0) {
|
||||
BlockDriver *drv = bs->drv;
|
||||
if (drv && drv->bdrv_io_plug) {
|
||||
drv->bdrv_io_plug(bs);
|
||||
@ -2657,7 +2668,7 @@ void bdrv_io_unplug(BlockDriverState *bs)
|
||||
BdrvChild *child;
|
||||
|
||||
assert(bs->io_plugged);
|
||||
if (--bs->io_plugged == 0) {
|
||||
if (atomic_fetch_dec(&bs->io_plugged) == 1) {
|
||||
BlockDriver *drv = bs->drv;
|
||||
if (drv && drv->bdrv_io_unplug) {
|
||||
drv->bdrv_io_unplug(bs);
|
||||
|
@ -342,6 +342,7 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
|
||||
int max_io_sectors = MAX((s->buf_size >> BDRV_SECTOR_BITS) / MAX_IN_FLIGHT,
|
||||
MAX_IO_SECTORS);
|
||||
|
||||
bdrv_dirty_bitmap_lock(s->dirty_bitmap);
|
||||
sector_num = bdrv_dirty_iter_next(s->dbi);
|
||||
if (sector_num < 0) {
|
||||
bdrv_set_dirty_iter(s->dbi, 0);
|
||||
@ -349,6 +350,7 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
|
||||
trace_mirror_restart_iter(s, bdrv_get_dirty_count(s->dirty_bitmap));
|
||||
assert(sector_num >= 0);
|
||||
}
|
||||
bdrv_dirty_bitmap_unlock(s->dirty_bitmap);
|
||||
|
||||
first_chunk = sector_num / sectors_per_chunk;
|
||||
while (test_bit(first_chunk, s->in_flight_bitmap)) {
|
||||
@ -360,12 +362,13 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
|
||||
|
||||
/* Find the number of consective dirty chunks following the first dirty
|
||||
* one, and wait for in flight requests in them. */
|
||||
bdrv_dirty_bitmap_lock(s->dirty_bitmap);
|
||||
while (nb_chunks * sectors_per_chunk < (s->buf_size >> BDRV_SECTOR_BITS)) {
|
||||
int64_t next_dirty;
|
||||
int64_t next_sector = sector_num + nb_chunks * sectors_per_chunk;
|
||||
int64_t next_chunk = next_sector / sectors_per_chunk;
|
||||
if (next_sector >= end ||
|
||||
!bdrv_get_dirty(source, s->dirty_bitmap, next_sector)) {
|
||||
!bdrv_get_dirty_locked(source, s->dirty_bitmap, next_sector)) {
|
||||
break;
|
||||
}
|
||||
if (test_bit(next_chunk, s->in_flight_bitmap)) {
|
||||
@ -386,8 +389,10 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
|
||||
* calling bdrv_get_block_status_above could yield - if some blocks are
|
||||
* marked dirty in this window, we need to know.
|
||||
*/
|
||||
bdrv_reset_dirty_bitmap(s->dirty_bitmap, sector_num,
|
||||
bdrv_reset_dirty_bitmap_locked(s->dirty_bitmap, sector_num,
|
||||
nb_chunks * sectors_per_chunk);
|
||||
bdrv_dirty_bitmap_unlock(s->dirty_bitmap);
|
||||
|
||||
bitmap_set(s->in_flight_bitmap, sector_num / sectors_per_chunk, nb_chunks);
|
||||
while (nb_chunks > 0 && sector_num < end) {
|
||||
int64_t ret;
|
||||
@ -506,6 +511,8 @@ static void mirror_exit(BlockJob *job, void *opaque)
|
||||
BlockDriverState *mirror_top_bs = s->mirror_top_bs;
|
||||
Error *local_err = NULL;
|
||||
|
||||
bdrv_release_dirty_bitmap(src, s->dirty_bitmap);
|
||||
|
||||
/* Make sure that the source BDS doesn't go away before we called
|
||||
* block_job_completed(). */
|
||||
bdrv_ref(src);
|
||||
@ -904,7 +911,6 @@ immediate_exit:
|
||||
g_free(s->cow_bitmap);
|
||||
g_free(s->in_flight_bitmap);
|
||||
bdrv_dirty_iter_free(s->dbi);
|
||||
bdrv_release_dirty_bitmap(bs, s->dirty_bitmap);
|
||||
|
||||
data = g_malloc(sizeof(*data));
|
||||
data->ret = ret;
|
||||
|
@ -730,7 +730,9 @@ nfs_get_allocated_file_size_cb(int ret, struct nfs_context *nfs, void *data,
|
||||
if (task->ret < 0) {
|
||||
error_report("NFS Error: %s", nfs_get_error(nfs));
|
||||
}
|
||||
task->complete = 1;
|
||||
|
||||
/* Set task->complete before reading bs->wakeup. */
|
||||
atomic_mb_set(&task->complete, 1);
|
||||
bdrv_wakeup(task->bs);
|
||||
}
|
||||
|
||||
|
@ -441,7 +441,7 @@ static BlockStats *bdrv_query_bds_stats(const BlockDriverState *bs,
|
||||
s->node_name = g_strdup(bdrv_get_node_name(bs));
|
||||
}
|
||||
|
||||
s->stats->wr_highest_offset = bs->wr_highest_offset;
|
||||
s->stats->wr_highest_offset = stat64_get(&bs->wr_highest_offset);
|
||||
|
||||
if (bs->file) {
|
||||
s->has_parent = true;
|
||||
|
@ -698,7 +698,8 @@ out:
|
||||
|
||||
srco->co = NULL;
|
||||
srco->ret = ret;
|
||||
srco->finished = true;
|
||||
/* Set srco->finished before reading bs->wakeup. */
|
||||
atomic_mb_set(&srco->finished, true);
|
||||
if (srco->bs) {
|
||||
bdrv_wakeup(srco->bs);
|
||||
}
|
||||
|
@ -240,7 +240,7 @@ static bool throttle_group_schedule_timer(BlockBackend *blk, bool is_write)
|
||||
ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts);
|
||||
bool must_wait;
|
||||
|
||||
if (blkp->io_limits_disabled) {
|
||||
if (atomic_read(&blkp->io_limits_disabled)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -260,6 +260,25 @@ static bool throttle_group_schedule_timer(BlockBackend *blk, bool is_write)
|
||||
return must_wait;
|
||||
}
|
||||
|
||||
/* Start the next pending I/O request for a BlockBackend. Return whether
|
||||
* any request was actually pending.
|
||||
*
|
||||
* @blk: the current BlockBackend
|
||||
* @is_write: the type of operation (read/write)
|
||||
*/
|
||||
static bool coroutine_fn throttle_group_co_restart_queue(BlockBackend *blk,
|
||||
bool is_write)
|
||||
{
|
||||
BlockBackendPublic *blkp = blk_get_public(blk);
|
||||
bool ret;
|
||||
|
||||
qemu_co_mutex_lock(&blkp->throttled_reqs_lock);
|
||||
ret = qemu_co_queue_next(&blkp->throttled_reqs[is_write]);
|
||||
qemu_co_mutex_unlock(&blkp->throttled_reqs_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Look for the next pending I/O request and schedule it.
|
||||
*
|
||||
* This assumes that tg->lock is held.
|
||||
@ -287,12 +306,12 @@ static void schedule_next_request(BlockBackend *blk, bool is_write)
|
||||
if (!must_wait) {
|
||||
/* Give preference to requests from the current blk */
|
||||
if (qemu_in_coroutine() &&
|
||||
qemu_co_queue_next(&blkp->throttled_reqs[is_write])) {
|
||||
throttle_group_co_restart_queue(blk, is_write)) {
|
||||
token = blk;
|
||||
} else {
|
||||
ThrottleTimers *tt = &blk_get_public(token)->throttle_timers;
|
||||
int64_t now = qemu_clock_get_ns(tt->clock_type);
|
||||
timer_mod(tt->timers[is_write], now + 1);
|
||||
timer_mod(tt->timers[is_write], now);
|
||||
tg->any_timer_armed[is_write] = true;
|
||||
}
|
||||
tg->tokens[is_write] = token;
|
||||
@ -326,7 +345,10 @@ void coroutine_fn throttle_group_co_io_limits_intercept(BlockBackend *blk,
|
||||
if (must_wait || blkp->pending_reqs[is_write]) {
|
||||
blkp->pending_reqs[is_write]++;
|
||||
qemu_mutex_unlock(&tg->lock);
|
||||
qemu_co_queue_wait(&blkp->throttled_reqs[is_write], NULL);
|
||||
qemu_co_mutex_lock(&blkp->throttled_reqs_lock);
|
||||
qemu_co_queue_wait(&blkp->throttled_reqs[is_write],
|
||||
&blkp->throttled_reqs_lock);
|
||||
qemu_co_mutex_unlock(&blkp->throttled_reqs_lock);
|
||||
qemu_mutex_lock(&tg->lock);
|
||||
blkp->pending_reqs[is_write]--;
|
||||
}
|
||||
@ -340,15 +362,50 @@ void coroutine_fn throttle_group_co_io_limits_intercept(BlockBackend *blk,
|
||||
qemu_mutex_unlock(&tg->lock);
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
BlockBackend *blk;
|
||||
bool is_write;
|
||||
} RestartData;
|
||||
|
||||
static void coroutine_fn throttle_group_restart_queue_entry(void *opaque)
|
||||
{
|
||||
RestartData *data = opaque;
|
||||
BlockBackend *blk = data->blk;
|
||||
bool is_write = data->is_write;
|
||||
BlockBackendPublic *blkp = blk_get_public(blk);
|
||||
ThrottleGroup *tg = container_of(blkp->throttle_state, ThrottleGroup, ts);
|
||||
bool empty_queue;
|
||||
|
||||
empty_queue = !throttle_group_co_restart_queue(blk, is_write);
|
||||
|
||||
/* If the request queue was empty then we have to take care of
|
||||
* scheduling the next one */
|
||||
if (empty_queue) {
|
||||
qemu_mutex_lock(&tg->lock);
|
||||
schedule_next_request(blk, is_write);
|
||||
qemu_mutex_unlock(&tg->lock);
|
||||
}
|
||||
}
|
||||
|
||||
static void throttle_group_restart_queue(BlockBackend *blk, bool is_write)
|
||||
{
|
||||
Coroutine *co;
|
||||
RestartData rd = {
|
||||
.blk = blk,
|
||||
.is_write = is_write
|
||||
};
|
||||
|
||||
co = qemu_coroutine_create(throttle_group_restart_queue_entry, &rd);
|
||||
aio_co_enter(blk_get_aio_context(blk), co);
|
||||
}
|
||||
|
||||
void throttle_group_restart_blk(BlockBackend *blk)
|
||||
{
|
||||
BlockBackendPublic *blkp = blk_get_public(blk);
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 2; i++) {
|
||||
while (qemu_co_enter_next(&blkp->throttled_reqs[i])) {
|
||||
;
|
||||
}
|
||||
if (blkp->throttle_state) {
|
||||
throttle_group_restart_queue(blk, 0);
|
||||
throttle_group_restart_queue(blk, 1);
|
||||
}
|
||||
}
|
||||
|
||||
@ -376,8 +433,7 @@ void throttle_group_config(BlockBackend *blk, ThrottleConfig *cfg)
|
||||
throttle_config(ts, tt, cfg);
|
||||
qemu_mutex_unlock(&tg->lock);
|
||||
|
||||
qemu_co_enter_next(&blkp->throttled_reqs[0]);
|
||||
qemu_co_enter_next(&blkp->throttled_reqs[1]);
|
||||
throttle_group_restart_blk(blk);
|
||||
}
|
||||
|
||||
/* Get the throttle configuration from a particular group. Similar to
|
||||
@ -408,7 +464,6 @@ static void timer_cb(BlockBackend *blk, bool is_write)
|
||||
BlockBackendPublic *blkp = blk_get_public(blk);
|
||||
ThrottleState *ts = blkp->throttle_state;
|
||||
ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts);
|
||||
bool empty_queue;
|
||||
|
||||
/* The timer has just been fired, so we can update the flag */
|
||||
qemu_mutex_lock(&tg->lock);
|
||||
@ -416,17 +471,7 @@ static void timer_cb(BlockBackend *blk, bool is_write)
|
||||
qemu_mutex_unlock(&tg->lock);
|
||||
|
||||
/* Run the request that was waiting for this timer */
|
||||
aio_context_acquire(blk_get_aio_context(blk));
|
||||
empty_queue = !qemu_co_enter_next(&blkp->throttled_reqs[is_write]);
|
||||
aio_context_release(blk_get_aio_context(blk));
|
||||
|
||||
/* If the request queue was empty then we have to take care of
|
||||
* scheduling the next one */
|
||||
if (empty_queue) {
|
||||
qemu_mutex_lock(&tg->lock);
|
||||
schedule_next_request(blk, is_write);
|
||||
qemu_mutex_unlock(&tg->lock);
|
||||
}
|
||||
throttle_group_restart_queue(blk, is_write);
|
||||
}
|
||||
|
||||
static void read_timer_cb(void *opaque)
|
||||
|
48
blockdev.c
48
blockdev.c
@ -595,7 +595,7 @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts,
|
||||
autostart = 0;
|
||||
}
|
||||
|
||||
block_acct_init(blk_get_stats(blk), account_invalid, account_failed);
|
||||
block_acct_setup(blk_get_stats(blk), account_invalid, account_failed);
|
||||
|
||||
if (!parse_stats_intervals(blk_get_stats(blk), interval_list, errp)) {
|
||||
blk_unref(blk);
|
||||
@ -1362,12 +1362,10 @@ out_aio_context:
|
||||
static BdrvDirtyBitmap *block_dirty_bitmap_lookup(const char *node,
|
||||
const char *name,
|
||||
BlockDriverState **pbs,
|
||||
AioContext **paio,
|
||||
Error **errp)
|
||||
{
|
||||
BlockDriverState *bs;
|
||||
BdrvDirtyBitmap *bitmap;
|
||||
AioContext *aio_context;
|
||||
|
||||
if (!node) {
|
||||
error_setg(errp, "Node cannot be NULL");
|
||||
@ -1383,29 +1381,17 @@ static BdrvDirtyBitmap *block_dirty_bitmap_lookup(const char *node,
|
||||
return NULL;
|
||||
}
|
||||
|
||||
aio_context = bdrv_get_aio_context(bs);
|
||||
aio_context_acquire(aio_context);
|
||||
|
||||
bitmap = bdrv_find_dirty_bitmap(bs, name);
|
||||
if (!bitmap) {
|
||||
error_setg(errp, "Dirty bitmap '%s' not found", name);
|
||||
goto fail;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (pbs) {
|
||||
*pbs = bs;
|
||||
}
|
||||
if (paio) {
|
||||
*paio = aio_context;
|
||||
} else {
|
||||
aio_context_release(aio_context);
|
||||
}
|
||||
|
||||
return bitmap;
|
||||
|
||||
fail:
|
||||
aio_context_release(aio_context);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* New and old BlockDriverState structs for atomic group operations */
|
||||
@ -1791,7 +1777,7 @@ static void external_snapshot_commit(BlkActionState *common)
|
||||
/* We don't need (or want) to use the transactional
|
||||
* bdrv_reopen_multiple() across all the entries at once, because we
|
||||
* don't want to abort all of them if one of them fails the reopen */
|
||||
if (!state->old_bs->copy_on_read) {
|
||||
if (!atomic_read(&state->old_bs->copy_on_read)) {
|
||||
bdrv_reopen(state->old_bs, state->old_bs->open_flags & ~BDRV_O_RDWR,
|
||||
NULL);
|
||||
}
|
||||
@ -2025,7 +2011,6 @@ static void block_dirty_bitmap_clear_prepare(BlkActionState *common,
|
||||
state->bitmap = block_dirty_bitmap_lookup(action->node,
|
||||
action->name,
|
||||
&state->bs,
|
||||
&state->aio_context,
|
||||
errp);
|
||||
if (!state->bitmap) {
|
||||
return;
|
||||
@ -2733,7 +2718,6 @@ void qmp_block_dirty_bitmap_add(const char *node, const char *name,
|
||||
bool has_granularity, uint32_t granularity,
|
||||
Error **errp)
|
||||
{
|
||||
AioContext *aio_context;
|
||||
BlockDriverState *bs;
|
||||
|
||||
if (!name || name[0] == '\0') {
|
||||
@ -2746,14 +2730,11 @@ void qmp_block_dirty_bitmap_add(const char *node, const char *name,
|
||||
return;
|
||||
}
|
||||
|
||||
aio_context = bdrv_get_aio_context(bs);
|
||||
aio_context_acquire(aio_context);
|
||||
|
||||
if (has_granularity) {
|
||||
if (granularity < 512 || !is_power_of_2(granularity)) {
|
||||
error_setg(errp, "Granularity must be power of 2 "
|
||||
"and at least 512");
|
||||
goto out;
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
/* Default to cluster size, if available: */
|
||||
@ -2761,19 +2742,15 @@ void qmp_block_dirty_bitmap_add(const char *node, const char *name,
|
||||
}
|
||||
|
||||
bdrv_create_dirty_bitmap(bs, granularity, name, errp);
|
||||
|
||||
out:
|
||||
aio_context_release(aio_context);
|
||||
}
|
||||
|
||||
void qmp_block_dirty_bitmap_remove(const char *node, const char *name,
|
||||
Error **errp)
|
||||
{
|
||||
AioContext *aio_context;
|
||||
BlockDriverState *bs;
|
||||
BdrvDirtyBitmap *bitmap;
|
||||
|
||||
bitmap = block_dirty_bitmap_lookup(node, name, &bs, &aio_context, errp);
|
||||
bitmap = block_dirty_bitmap_lookup(node, name, &bs, errp);
|
||||
if (!bitmap || !bs) {
|
||||
return;
|
||||
}
|
||||
@ -2782,13 +2759,10 @@ void qmp_block_dirty_bitmap_remove(const char *node, const char *name,
|
||||
error_setg(errp,
|
||||
"Bitmap '%s' is currently frozen and cannot be removed",
|
||||
name);
|
||||
goto out;
|
||||
return;
|
||||
}
|
||||
bdrv_dirty_bitmap_make_anon(bitmap);
|
||||
bdrv_release_dirty_bitmap(bs, bitmap);
|
||||
|
||||
out:
|
||||
aio_context_release(aio_context);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -2798,11 +2772,10 @@ void qmp_block_dirty_bitmap_remove(const char *node, const char *name,
|
||||
void qmp_block_dirty_bitmap_clear(const char *node, const char *name,
|
||||
Error **errp)
|
||||
{
|
||||
AioContext *aio_context;
|
||||
BdrvDirtyBitmap *bitmap;
|
||||
BlockDriverState *bs;
|
||||
|
||||
bitmap = block_dirty_bitmap_lookup(node, name, &bs, &aio_context, errp);
|
||||
bitmap = block_dirty_bitmap_lookup(node, name, &bs, errp);
|
||||
if (!bitmap || !bs) {
|
||||
return;
|
||||
}
|
||||
@ -2811,18 +2784,15 @@ void qmp_block_dirty_bitmap_clear(const char *node, const char *name,
|
||||
error_setg(errp,
|
||||
"Bitmap '%s' is currently frozen and cannot be modified",
|
||||
name);
|
||||
goto out;
|
||||
return;
|
||||
} else if (!bdrv_dirty_bitmap_enabled(bitmap)) {
|
||||
error_setg(errp,
|
||||
"Bitmap '%s' is currently disabled and cannot be cleared",
|
||||
name);
|
||||
goto out;
|
||||
return;
|
||||
}
|
||||
|
||||
bdrv_clear_dirty_bitmap(bitmap, NULL);
|
||||
|
||||
out:
|
||||
aio_context_release(aio_context);
|
||||
}
|
||||
|
||||
void hmp_drive_del(Monitor *mon, const QDict *qdict)
|
||||
|
@ -26,8 +26,10 @@
|
||||
#define BLOCK_ACCOUNTING_H
|
||||
|
||||
#include "qemu/timed-average.h"
|
||||
#include "qemu/thread.h"
|
||||
|
||||
typedef struct BlockAcctTimedStats BlockAcctTimedStats;
|
||||
typedef struct BlockAcctStats BlockAcctStats;
|
||||
|
||||
enum BlockAcctType {
|
||||
BLOCK_ACCT_READ,
|
||||
@ -37,12 +39,14 @@ enum BlockAcctType {
|
||||
};
|
||||
|
||||
struct BlockAcctTimedStats {
|
||||
BlockAcctStats *stats;
|
||||
TimedAverage latency[BLOCK_MAX_IOTYPE];
|
||||
unsigned interval_length; /* in seconds */
|
||||
QSLIST_ENTRY(BlockAcctTimedStats) entries;
|
||||
};
|
||||
|
||||
typedef struct BlockAcctStats {
|
||||
struct BlockAcctStats {
|
||||
QemuMutex lock;
|
||||
uint64_t nr_bytes[BLOCK_MAX_IOTYPE];
|
||||
uint64_t nr_ops[BLOCK_MAX_IOTYPE];
|
||||
uint64_t invalid_ops[BLOCK_MAX_IOTYPE];
|
||||
@ -53,7 +57,7 @@ typedef struct BlockAcctStats {
|
||||
QSLIST_HEAD(, BlockAcctTimedStats) intervals;
|
||||
bool account_invalid;
|
||||
bool account_failed;
|
||||
} BlockAcctStats;
|
||||
};
|
||||
|
||||
typedef struct BlockAcctCookie {
|
||||
int64_t bytes;
|
||||
@ -61,7 +65,8 @@ typedef struct BlockAcctCookie {
|
||||
enum BlockAcctType type;
|
||||
} BlockAcctCookie;
|
||||
|
||||
void block_acct_init(BlockAcctStats *stats, bool account_invalid,
|
||||
void block_acct_init(BlockAcctStats *stats);
|
||||
void block_acct_setup(BlockAcctStats *stats, bool account_invalid,
|
||||
bool account_failed);
|
||||
void block_acct_cleanup(BlockAcctStats *stats);
|
||||
void block_acct_add_interval(BlockAcctStats *stats, unsigned interval_length);
|
||||
|
@ -402,7 +402,8 @@ void bdrv_drain_all(void);
|
||||
* block_job_defer_to_main_loop for how to do it). \
|
||||
*/ \
|
||||
assert(!bs_->wakeup); \
|
||||
bs_->wakeup = true; \
|
||||
/* Set bs->wakeup before evaluating cond. */ \
|
||||
atomic_mb_set(&bs_->wakeup, true); \
|
||||
while (busy_) { \
|
||||
if ((cond)) { \
|
||||
waited_ = busy_ = true; \
|
||||
@ -414,7 +415,7 @@ void bdrv_drain_all(void);
|
||||
waited_ |= busy_; \
|
||||
} \
|
||||
} \
|
||||
bs_->wakeup = false; \
|
||||
atomic_set(&bs_->wakeup, false); \
|
||||
} \
|
||||
waited_; })
|
||||
|
||||
|
@ -29,6 +29,7 @@
|
||||
#include "qemu/option.h"
|
||||
#include "qemu/queue.h"
|
||||
#include "qemu/coroutine.h"
|
||||
#include "qemu/stats64.h"
|
||||
#include "qemu/timer.h"
|
||||
#include "qapi-types.h"
|
||||
#include "qemu/hbitmap.h"
|
||||
@ -595,11 +596,6 @@ struct BlockDriverState {
|
||||
|
||||
/* Protected by AioContext lock */
|
||||
|
||||
/* If true, copy read backing sectors into image. Can be >1 if more
|
||||
* than one client has requested copy-on-read.
|
||||
*/
|
||||
int copy_on_read;
|
||||
|
||||
/* If we are reading a disk image, give its size in sectors.
|
||||
* Generally read-only; it is written to by load_snapshot and
|
||||
* save_snaphost, but the block layer is quiescent during those.
|
||||
@ -609,34 +605,57 @@ struct BlockDriverState {
|
||||
/* Callback before write request is processed */
|
||||
NotifierWithReturnList before_write_notifiers;
|
||||
|
||||
/* number of in-flight requests; overall and serialising */
|
||||
unsigned int in_flight;
|
||||
unsigned int serialising_in_flight;
|
||||
|
||||
bool wakeup;
|
||||
|
||||
/* Offset after the highest byte written to */
|
||||
uint64_t wr_highest_offset;
|
||||
|
||||
/* threshold limit for writes, in bytes. "High water mark". */
|
||||
uint64_t write_threshold_offset;
|
||||
NotifierWithReturn write_threshold_notifier;
|
||||
|
||||
/* counter for nested bdrv_io_plug */
|
||||
unsigned io_plugged;
|
||||
|
||||
QLIST_HEAD(, BdrvTrackedRequest) tracked_requests;
|
||||
CoQueue flush_queue; /* Serializing flush queue */
|
||||
bool active_flush_req; /* Flush request in flight? */
|
||||
unsigned int write_gen; /* Current data generation */
|
||||
unsigned int flushed_gen; /* Flushed write generation */
|
||||
|
||||
/* Writing to the list requires the BQL _and_ the dirty_bitmap_mutex.
|
||||
* Reading from the list can be done with either the BQL or the
|
||||
* dirty_bitmap_mutex. Modifying a bitmap only requires
|
||||
* dirty_bitmap_mutex. */
|
||||
QemuMutex dirty_bitmap_mutex;
|
||||
QLIST_HEAD(, BdrvDirtyBitmap) dirty_bitmaps;
|
||||
|
||||
/* Offset after the highest byte written to */
|
||||
Stat64 wr_highest_offset;
|
||||
|
||||
/* If true, copy read backing sectors into image. Can be >1 if more
|
||||
* than one client has requested copy-on-read. Accessed with atomic
|
||||
* ops.
|
||||
*/
|
||||
int copy_on_read;
|
||||
|
||||
/* number of in-flight requests; overall and serialising.
|
||||
* Accessed with atomic ops.
|
||||
*/
|
||||
unsigned int in_flight;
|
||||
unsigned int serialising_in_flight;
|
||||
|
||||
/* Internal to BDRV_POLL_WHILE and bdrv_wakeup. Accessed with atomic
|
||||
* ops.
|
||||
*/
|
||||
bool wakeup;
|
||||
|
||||
/* counter for nested bdrv_io_plug.
|
||||
* Accessed with atomic ops.
|
||||
*/
|
||||
unsigned io_plugged;
|
||||
|
||||
/* do we need to tell the quest if we have a volatile write cache? */
|
||||
int enable_write_cache;
|
||||
|
||||
/* Accessed with atomic ops. */
|
||||
int quiesce_counter;
|
||||
unsigned int write_gen; /* Current data generation */
|
||||
|
||||
/* Protected by reqs_lock. */
|
||||
CoMutex reqs_lock;
|
||||
QLIST_HEAD(, BdrvTrackedRequest) tracked_requests;
|
||||
CoQueue flush_queue; /* Serializing flush queue */
|
||||
bool active_flush_req; /* Flush request in flight? */
|
||||
|
||||
/* Only read/written by whoever has set active_flush_req to true. */
|
||||
unsigned int flushed_gen; /* Flushed write generation */
|
||||
};
|
||||
|
||||
struct BlockBackendRootState {
|
||||
|
@ -36,8 +36,6 @@ bool bdrv_dirty_bitmap_frozen(BdrvDirtyBitmap *bitmap);
|
||||
const char *bdrv_dirty_bitmap_name(const BdrvDirtyBitmap *bitmap);
|
||||
int64_t bdrv_dirty_bitmap_size(const BdrvDirtyBitmap *bitmap);
|
||||
DirtyBitmapStatus bdrv_dirty_bitmap_status(BdrvDirtyBitmap *bitmap);
|
||||
int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap,
|
||||
int64_t sector);
|
||||
void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap,
|
||||
int64_t cur_sector, int64_t nr_sectors);
|
||||
void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap,
|
||||
@ -45,6 +43,9 @@ void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap,
|
||||
int bdrv_dirty_bitmap_get_meta(BlockDriverState *bs,
|
||||
BdrvDirtyBitmap *bitmap, int64_t sector,
|
||||
int nb_sectors);
|
||||
int bdrv_dirty_bitmap_get_meta_locked(BlockDriverState *bs,
|
||||
BdrvDirtyBitmap *bitmap, int64_t sector,
|
||||
int nb_sectors);
|
||||
void bdrv_dirty_bitmap_reset_meta(BlockDriverState *bs,
|
||||
BdrvDirtyBitmap *bitmap, int64_t sector,
|
||||
int nb_sectors);
|
||||
@ -52,11 +53,6 @@ BdrvDirtyBitmapIter *bdrv_dirty_meta_iter_new(BdrvDirtyBitmap *bitmap);
|
||||
BdrvDirtyBitmapIter *bdrv_dirty_iter_new(BdrvDirtyBitmap *bitmap,
|
||||
uint64_t first_sector);
|
||||
void bdrv_dirty_iter_free(BdrvDirtyBitmapIter *iter);
|
||||
int64_t bdrv_dirty_iter_next(BdrvDirtyBitmapIter *iter);
|
||||
void bdrv_set_dirty_iter(BdrvDirtyBitmapIter *hbi, int64_t sector_num);
|
||||
int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap);
|
||||
int64_t bdrv_get_meta_dirty_count(BdrvDirtyBitmap *bitmap);
|
||||
void bdrv_dirty_bitmap_truncate(BlockDriverState *bs);
|
||||
|
||||
uint64_t bdrv_dirty_bitmap_serialization_size(const BdrvDirtyBitmap *bitmap,
|
||||
uint64_t start, uint64_t count);
|
||||
@ -72,4 +68,19 @@ void bdrv_dirty_bitmap_deserialize_zeroes(BdrvDirtyBitmap *bitmap,
|
||||
bool finish);
|
||||
void bdrv_dirty_bitmap_deserialize_finish(BdrvDirtyBitmap *bitmap);
|
||||
|
||||
/* Functions that require manual locking. */
|
||||
void bdrv_dirty_bitmap_lock(BdrvDirtyBitmap *bitmap);
|
||||
void bdrv_dirty_bitmap_unlock(BdrvDirtyBitmap *bitmap);
|
||||
int bdrv_get_dirty_locked(BlockDriverState *bs, BdrvDirtyBitmap *bitmap,
|
||||
int64_t sector);
|
||||
void bdrv_set_dirty_bitmap_locked(BdrvDirtyBitmap *bitmap,
|
||||
int64_t cur_sector, int64_t nr_sectors);
|
||||
void bdrv_reset_dirty_bitmap_locked(BdrvDirtyBitmap *bitmap,
|
||||
int64_t cur_sector, int64_t nr_sectors);
|
||||
int64_t bdrv_dirty_iter_next(BdrvDirtyBitmapIter *iter);
|
||||
void bdrv_set_dirty_iter(BdrvDirtyBitmapIter *hbi, int64_t sector_num);
|
||||
int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap);
|
||||
int64_t bdrv_get_meta_dirty_count(BdrvDirtyBitmap *bitmap);
|
||||
void bdrv_dirty_bitmap_truncate(BlockDriverState *bs);
|
||||
|
||||
#endif
|
||||
|
193
include/qemu/stats64.h
Normal file
193
include/qemu/stats64.h
Normal file
@ -0,0 +1,193 @@
|
||||
/*
|
||||
* Atomic operations on 64-bit quantities.
|
||||
*
|
||||
* Copyright (C) 2017 Red Hat, Inc.
|
||||
*
|
||||
* Author: Paolo Bonzini <pbonzini@redhat.com>
|
||||
*
|
||||
* This work is licensed under the terms of the GNU GPL, version 2 or later.
|
||||
* See the COPYING file in the top-level directory.
|
||||
*/
|
||||
|
||||
#ifndef QEMU_STATS64_H
|
||||
#define QEMU_STATS64_H 1
|
||||
|
||||
#include "qemu/atomic.h"
|
||||
|
||||
/* This provides atomic operations on 64-bit type, using a reader-writer
|
||||
* spinlock on architectures that do not have 64-bit accesses. Even on
|
||||
* those architectures, it tries hard not to take the lock.
|
||||
*/
|
||||
|
||||
typedef struct Stat64 {
|
||||
#ifdef CONFIG_ATOMIC64
|
||||
uint64_t value;
|
||||
#else
|
||||
uint32_t low, high;
|
||||
uint32_t lock;
|
||||
#endif
|
||||
} Stat64;
|
||||
|
||||
#ifdef CONFIG_ATOMIC64
|
||||
static inline void stat64_init(Stat64 *s, uint64_t value)
|
||||
{
|
||||
/* This is not guaranteed to be atomic! */
|
||||
*s = (Stat64) { value };
|
||||
}
|
||||
|
||||
static inline uint64_t stat64_get(const Stat64 *s)
|
||||
{
|
||||
return atomic_read__nocheck(&s->value);
|
||||
}
|
||||
|
||||
static inline void stat64_add(Stat64 *s, uint64_t value)
|
||||
{
|
||||
atomic_add(&s->value, value);
|
||||
}
|
||||
|
||||
static inline void stat64_min(Stat64 *s, uint64_t value)
|
||||
{
|
||||
uint64_t orig = atomic_read__nocheck(&s->value);
|
||||
while (orig > value) {
|
||||
orig = atomic_cmpxchg__nocheck(&s->value, orig, value);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void stat64_max(Stat64 *s, uint64_t value)
|
||||
{
|
||||
uint64_t orig = atomic_read__nocheck(&s->value);
|
||||
while (orig < value) {
|
||||
orig = atomic_cmpxchg__nocheck(&s->value, orig, value);
|
||||
}
|
||||
}
|
||||
#else
|
||||
uint64_t stat64_get(const Stat64 *s);
|
||||
bool stat64_min_slow(Stat64 *s, uint64_t value);
|
||||
bool stat64_max_slow(Stat64 *s, uint64_t value);
|
||||
bool stat64_add32_carry(Stat64 *s, uint32_t low, uint32_t high);
|
||||
|
||||
static inline void stat64_init(Stat64 *s, uint64_t value)
|
||||
{
|
||||
/* This is not guaranteed to be atomic! */
|
||||
*s = (Stat64) { .low = value, .high = value >> 32, .lock = 0 };
|
||||
}
|
||||
|
||||
static inline void stat64_add(Stat64 *s, uint64_t value)
|
||||
{
|
||||
uint32_t low, high;
|
||||
high = value >> 32;
|
||||
low = (uint32_t) value;
|
||||
if (!low) {
|
||||
if (high) {
|
||||
atomic_add(&s->high, high);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
for (;;) {
|
||||
uint32_t orig = s->low;
|
||||
uint32_t result = orig + low;
|
||||
uint32_t old;
|
||||
|
||||
if (result < low || high) {
|
||||
/* If the high part is affected, take the lock. */
|
||||
if (stat64_add32_carry(s, low, high)) {
|
||||
return;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
/* No carry, try with a 32-bit cmpxchg. The result is independent of
|
||||
* the high 32 bits, so it can race just fine with stat64_add32_carry
|
||||
* and even stat64_get!
|
||||
*/
|
||||
old = atomic_cmpxchg(&s->low, orig, result);
|
||||
if (orig == old) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static inline void stat64_min(Stat64 *s, uint64_t value)
|
||||
{
|
||||
uint32_t low, high;
|
||||
uint32_t orig_low, orig_high;
|
||||
|
||||
high = value >> 32;
|
||||
low = (uint32_t) value;
|
||||
do {
|
||||
orig_high = atomic_read(&s->high);
|
||||
if (orig_high < high) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (orig_high == high) {
|
||||
/* High 32 bits are equal. Read low after high, otherwise we
|
||||
* can get a false positive (e.g. 0x1235,0x0000 changes to
|
||||
* 0x1234,0x8000 and we read it as 0x1234,0x0000). Pairs with
|
||||
* the write barrier in stat64_min_slow.
|
||||
*/
|
||||
smp_rmb();
|
||||
orig_low = atomic_read(&s->low);
|
||||
if (orig_low <= low) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* See if we were lucky and a writer raced against us. The
|
||||
* barrier is theoretically unnecessary, but if we remove it
|
||||
* we may miss being lucky.
|
||||
*/
|
||||
smp_rmb();
|
||||
orig_high = atomic_read(&s->high);
|
||||
if (orig_high < high) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
/* If the value changes in any way, we have to take the lock. */
|
||||
} while (!stat64_min_slow(s, value));
|
||||
}
|
||||
|
||||
static inline void stat64_max(Stat64 *s, uint64_t value)
|
||||
{
|
||||
uint32_t low, high;
|
||||
uint32_t orig_low, orig_high;
|
||||
|
||||
high = value >> 32;
|
||||
low = (uint32_t) value;
|
||||
do {
|
||||
orig_high = atomic_read(&s->high);
|
||||
if (orig_high > high) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (orig_high == high) {
|
||||
/* High 32 bits are equal. Read low after high, otherwise we
|
||||
* can get a false positive (e.g. 0x1234,0x8000 changes to
|
||||
* 0x1235,0x0000 and we read it as 0x1235,0x8000). Pairs with
|
||||
* the write barrier in stat64_max_slow.
|
||||
*/
|
||||
smp_rmb();
|
||||
orig_low = atomic_read(&s->low);
|
||||
if (orig_low >= low) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* See if we were lucky and a writer raced against us. The
|
||||
* barrier is theoretically unnecessary, but if we remove it
|
||||
* we may miss being lucky.
|
||||
*/
|
||||
smp_rmb();
|
||||
orig_high = atomic_read(&s->high);
|
||||
if (orig_high > high) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
/* If the value changes in any way, we have to take the lock. */
|
||||
} while (!stat64_max_slow(s, value));
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
@ -72,15 +72,13 @@ typedef struct BlockDevOps {
|
||||
* fields that must be public. This is in particular for QLIST_ENTRY() and
|
||||
* friends so that BlockBackends can be kept in lists outside block-backend.c */
|
||||
typedef struct BlockBackendPublic {
|
||||
/* I/O throttling has its own locking, but also some fields are
|
||||
* protected by the AioContext lock.
|
||||
*/
|
||||
|
||||
/* Protected by AioContext lock. */
|
||||
/* throttled_reqs_lock protects the CoQueues for throttled requests. */
|
||||
CoMutex throttled_reqs_lock;
|
||||
CoQueue throttled_reqs[2];
|
||||
|
||||
/* Nonzero if the I/O limits are currently being ignored; generally
|
||||
* it is zero. */
|
||||
* it is zero. Accessed with atomic operations.
|
||||
*/
|
||||
unsigned int io_limits_disabled;
|
||||
|
||||
/* The following fields are protected by the ThrottleGroup lock.
|
||||
|
@ -341,10 +341,8 @@ static int set_dirty_tracking(void)
|
||||
int ret;
|
||||
|
||||
QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
|
||||
aio_context_acquire(blk_get_aio_context(bmds->blk));
|
||||
bmds->dirty_bitmap = bdrv_create_dirty_bitmap(blk_bs(bmds->blk),
|
||||
BLOCK_SIZE, NULL, NULL);
|
||||
aio_context_release(blk_get_aio_context(bmds->blk));
|
||||
if (!bmds->dirty_bitmap) {
|
||||
ret = -errno;
|
||||
goto fail;
|
||||
@ -355,9 +353,7 @@ static int set_dirty_tracking(void)
|
||||
fail:
|
||||
QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
|
||||
if (bmds->dirty_bitmap) {
|
||||
aio_context_acquire(blk_get_aio_context(bmds->blk));
|
||||
bdrv_release_dirty_bitmap(blk_bs(bmds->blk), bmds->dirty_bitmap);
|
||||
aio_context_release(blk_get_aio_context(bmds->blk));
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
@ -370,9 +366,7 @@ static void unset_dirty_tracking(void)
|
||||
BlkMigDevState *bmds;
|
||||
|
||||
QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
|
||||
aio_context_acquire(blk_get_aio_context(bmds->blk));
|
||||
bdrv_release_dirty_bitmap(blk_bs(bmds->blk), bmds->dirty_bitmap);
|
||||
aio_context_release(blk_get_aio_context(bmds->blk));
|
||||
}
|
||||
}
|
||||
|
||||
@ -531,13 +525,16 @@ static int mig_save_device_dirty(QEMUFile *f, BlkMigDevState *bmds,
|
||||
} else {
|
||||
blk_mig_unlock();
|
||||
}
|
||||
if (bdrv_get_dirty(bs, bmds->dirty_bitmap, sector)) {
|
||||
|
||||
bdrv_dirty_bitmap_lock(bmds->dirty_bitmap);
|
||||
if (bdrv_get_dirty_locked(bs, bmds->dirty_bitmap, sector)) {
|
||||
if (total_sectors - sector < BDRV_SECTORS_PER_DIRTY_CHUNK) {
|
||||
nr_sectors = total_sectors - sector;
|
||||
} else {
|
||||
nr_sectors = BDRV_SECTORS_PER_DIRTY_CHUNK;
|
||||
}
|
||||
bdrv_reset_dirty_bitmap_locked(bmds->dirty_bitmap, sector, nr_sectors);
|
||||
bdrv_dirty_bitmap_unlock(bmds->dirty_bitmap);
|
||||
|
||||
blk = g_new(BlkMigBlock, 1);
|
||||
blk->buf = g_malloc(BLOCK_SIZE);
|
||||
blk->bmds = bmds;
|
||||
@ -570,12 +567,12 @@ static int mig_save_device_dirty(QEMUFile *f, BlkMigDevState *bmds,
|
||||
g_free(blk);
|
||||
}
|
||||
|
||||
bdrv_reset_dirty_bitmap(bmds->dirty_bitmap, sector, nr_sectors);
|
||||
sector += nr_sectors;
|
||||
bmds->cur_dirty = sector;
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
bdrv_dirty_bitmap_unlock(bmds->dirty_bitmap);
|
||||
sector += BDRV_SECTORS_PER_DIRTY_CHUNK;
|
||||
bmds->cur_dirty = sector;
|
||||
}
|
||||
|
@ -126,7 +126,7 @@ docker-run: docker-qemu-src
|
||||
" COPYING $(EXECUTABLE) to $(IMAGE)"))
|
||||
$(call quiet-command, \
|
||||
$(SRC_PATH)/tests/docker/docker.py run \
|
||||
-t \
|
||||
$(if $(NOUSER),,-u $(shell id -u)) -t \
|
||||
$(if $V,,--rm) \
|
||||
$(if $(DEBUG),-i,--net=none) \
|
||||
-e TARGET_LIST=$(TARGET_LIST) \
|
||||
|
@ -1,7 +1,7 @@
|
||||
FROM centos:6
|
||||
RUN yum install -y epel-release
|
||||
ENV PACKAGES libfdt-devel ccache \
|
||||
tar git make gcc g++ \
|
||||
tar git make gcc g++ flex bison \
|
||||
zlib-devel glib2-devel SDL-devel pixman-devel \
|
||||
epel-release
|
||||
RUN yum install -y $PACKAGES
|
||||
|
@ -1,8 +1,8 @@
|
||||
FROM fedora:latest
|
||||
ENV PACKAGES \
|
||||
ccache git tar PyYAML sparse flex bison python2 \
|
||||
ccache git tar PyYAML sparse flex bison python2 bzip2 hostname \
|
||||
glib2-devel pixman-devel zlib-devel SDL-devel libfdt-devel \
|
||||
gcc gcc-c++ clang make perl which bc findutils \
|
||||
gcc gcc-c++ clang make perl which bc findutils libaio-devel \
|
||||
mingw32-pixman mingw32-glib2 mingw32-gmp mingw32-SDL mingw32-pkg-config \
|
||||
mingw32-gtk2 mingw32-gtk3 mingw32-gnutls mingw32-nettle mingw32-libtasn1 \
|
||||
mingw32-libjpeg-turbo mingw32-libpng mingw32-curl mingw32-libssh2 \
|
||||
|
@ -42,4 +42,5 @@ util-obj-y += log.o
|
||||
util-obj-y += qdist.o
|
||||
util-obj-y += qht.o
|
||||
util-obj-y += range.o
|
||||
util-obj-y += stats64.o
|
||||
util-obj-y += systemd.o
|
||||
|
137
util/stats64.c
Normal file
137
util/stats64.c
Normal file
@ -0,0 +1,137 @@
|
||||
/*
|
||||
* Atomic operations on 64-bit quantities.
|
||||
*
|
||||
* Copyright (C) 2017 Red Hat, Inc.
|
||||
*
|
||||
* Author: Paolo Bonzini <pbonzini@redhat.com>
|
||||
*
|
||||
* This work is licensed under the terms of the GNU GPL, version 2 or later.
|
||||
* See the COPYING file in the top-level directory.
|
||||
*/
|
||||
|
||||
#include "qemu/osdep.h"
|
||||
#include "qemu/atomic.h"
|
||||
#include "qemu/stats64.h"
|
||||
#include "qemu/processor.h"
|
||||
|
||||
#ifndef CONFIG_ATOMIC64
|
||||
static inline void stat64_rdlock(Stat64 *s)
|
||||
{
|
||||
/* Keep out incoming writers to avoid them starving us. */
|
||||
atomic_add(&s->lock, 2);
|
||||
|
||||
/* If there is a concurrent writer, wait for it. */
|
||||
while (atomic_read(&s->lock) & 1) {
|
||||
cpu_relax();
|
||||
}
|
||||
}
|
||||
|
||||
static inline void stat64_rdunlock(Stat64 *s)
|
||||
{
|
||||
atomic_sub(&s->lock, 2);
|
||||
}
|
||||
|
||||
static inline bool stat64_wrtrylock(Stat64 *s)
|
||||
{
|
||||
return atomic_cmpxchg(&s->lock, 0, 1) == 0;
|
||||
}
|
||||
|
||||
static inline void stat64_wrunlock(Stat64 *s)
|
||||
{
|
||||
atomic_dec(&s->lock);
|
||||
}
|
||||
|
||||
uint64_t stat64_get(const Stat64 *s)
|
||||
{
|
||||
uint32_t high, low;
|
||||
|
||||
stat64_rdlock((Stat64 *)s);
|
||||
|
||||
/* 64-bit writes always take the lock, so we can read in
|
||||
* any order.
|
||||
*/
|
||||
high = atomic_read(&s->high);
|
||||
low = atomic_read(&s->low);
|
||||
stat64_rdunlock((Stat64 *)s);
|
||||
|
||||
return ((uint64_t)high << 32) | low;
|
||||
}
|
||||
|
||||
bool stat64_add32_carry(Stat64 *s, uint32_t low, uint32_t high)
|
||||
{
|
||||
uint32_t old;
|
||||
|
||||
if (!stat64_wrtrylock(s)) {
|
||||
cpu_relax();
|
||||
return false;
|
||||
}
|
||||
|
||||
/* 64-bit reads always take the lock, so they don't care about the
|
||||
* order of our update. By updating s->low first, we can check
|
||||
* whether we have to carry into s->high.
|
||||
*/
|
||||
old = atomic_fetch_add(&s->low, low);
|
||||
high += (old + low) < old;
|
||||
atomic_add(&s->high, high);
|
||||
stat64_wrunlock(s);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool stat64_min_slow(Stat64 *s, uint64_t value)
|
||||
{
|
||||
uint32_t high, low;
|
||||
uint64_t orig;
|
||||
|
||||
if (!stat64_wrtrylock(s)) {
|
||||
cpu_relax();
|
||||
return false;
|
||||
}
|
||||
|
||||
high = atomic_read(&s->high);
|
||||
low = atomic_read(&s->low);
|
||||
|
||||
orig = ((uint64_t)high << 32) | low;
|
||||
if (orig < value) {
|
||||
/* We have to set low before high, just like stat64_min reads
|
||||
* high before low. The value may become higher temporarily, but
|
||||
* stat64_get does not notice (it takes the lock) and the only ill
|
||||
* effect on stat64_min is that the slow path may be triggered
|
||||
* unnecessarily.
|
||||
*/
|
||||
atomic_set(&s->low, (uint32_t)value);
|
||||
smp_wmb();
|
||||
atomic_set(&s->high, value >> 32);
|
||||
}
|
||||
stat64_wrunlock(s);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool stat64_max_slow(Stat64 *s, uint64_t value)
|
||||
{
|
||||
uint32_t high, low;
|
||||
uint64_t orig;
|
||||
|
||||
if (!stat64_wrtrylock(s)) {
|
||||
cpu_relax();
|
||||
return false;
|
||||
}
|
||||
|
||||
high = atomic_read(&s->high);
|
||||
low = atomic_read(&s->low);
|
||||
|
||||
orig = ((uint64_t)high << 32) | low;
|
||||
if (orig > value) {
|
||||
/* We have to set low before high, just like stat64_max reads
|
||||
* high before low. The value may become lower temporarily, but
|
||||
* stat64_get does not notice (it takes the lock) and the only ill
|
||||
* effect on stat64_max is that the slow path may be triggered
|
||||
* unnecessarily.
|
||||
*/
|
||||
atomic_set(&s->low, (uint32_t)value);
|
||||
smp_wmb();
|
||||
atomic_set(&s->high, value >> 32);
|
||||
}
|
||||
stat64_wrunlock(s);
|
||||
return true;
|
||||
}
|
||||
#endif
|
Loading…
Reference in New Issue
Block a user