From 8ba02c24ea25a7a1fa4931e266af7916c115da2c Mon Sep 17 00:00:00 2001 From: zhenwei pi Date: Fri, 28 Jul 2023 10:19:58 +0800 Subject: [PATCH 01/14] throttle: introduce enum ThrottleDirection Use enum ThrottleDirection instead of number index. Reviewed-by: Alberto Garcia Reviewed-by: Hanna Czenczek Signed-off-by: zhenwei pi Message-Id: <20230728022006.1098509-2-pizhenwei@bytedance.com> Signed-off-by: Hanna Czenczek --- include/qemu/throttle.h | 11 ++++++++--- util/throttle.c | 16 +++++++++------- 2 files changed, 17 insertions(+), 10 deletions(-) diff --git a/include/qemu/throttle.h b/include/qemu/throttle.h index 05f6346137..9ca5ab8197 100644 --- a/include/qemu/throttle.h +++ b/include/qemu/throttle.h @@ -99,13 +99,18 @@ typedef struct ThrottleState { int64_t previous_leak; /* timestamp of the last leak done */ } ThrottleState; +typedef enum { + THROTTLE_READ = 0, + THROTTLE_WRITE, + THROTTLE_MAX +} ThrottleDirection; + typedef struct ThrottleTimers { - QEMUTimer *timers[2]; /* timers used to do the throttling */ + QEMUTimer *timers[THROTTLE_MAX]; /* timers used to do the throttling */ QEMUClockType clock_type; /* the clock used */ /* Callbacks */ - QEMUTimerCB *read_timer_cb; - QEMUTimerCB *write_timer_cb; + QEMUTimerCB *timer_cb[THROTTLE_MAX]; void *timer_opaque; } ThrottleTimers; diff --git a/util/throttle.c b/util/throttle.c index 81f247a8d1..5642e61763 100644 --- a/util/throttle.c +++ b/util/throttle.c @@ -199,10 +199,12 @@ static bool throttle_compute_timer(ThrottleState *ts, void throttle_timers_attach_aio_context(ThrottleTimers *tt, AioContext *new_context) { - tt->timers[0] = aio_timer_new(new_context, tt->clock_type, SCALE_NS, - tt->read_timer_cb, tt->timer_opaque); - tt->timers[1] = aio_timer_new(new_context, tt->clock_type, SCALE_NS, - tt->write_timer_cb, tt->timer_opaque); + tt->timers[THROTTLE_READ] = + aio_timer_new(new_context, tt->clock_type, SCALE_NS, + tt->timer_cb[THROTTLE_READ], tt->timer_opaque); + tt->timers[THROTTLE_WRITE] = + aio_timer_new(new_context, tt->clock_type, SCALE_NS, + tt->timer_cb[THROTTLE_WRITE], tt->timer_opaque); } /* @@ -236,8 +238,8 @@ void throttle_timers_init(ThrottleTimers *tt, memset(tt, 0, sizeof(ThrottleTimers)); tt->clock_type = clock_type; - tt->read_timer_cb = read_timer_cb; - tt->write_timer_cb = write_timer_cb; + tt->timer_cb[THROTTLE_READ] = read_timer_cb; + tt->timer_cb[THROTTLE_WRITE] = write_timer_cb; tt->timer_opaque = timer_opaque; throttle_timers_attach_aio_context(tt, aio_context); } @@ -256,7 +258,7 @@ void throttle_timers_detach_aio_context(ThrottleTimers *tt) { int i; - for (i = 0; i < 2; i++) { + for (i = 0; i < THROTTLE_MAX; i++) { throttle_timer_destroy(&tt->timers[i]); } } From 1322f63df52e23ecf47cc9a5842ec3099c517183 Mon Sep 17 00:00:00 2001 From: zhenwei pi Date: Fri, 28 Jul 2023 10:19:59 +0800 Subject: [PATCH 02/14] test-throttle: use enum ThrottleDirection Use enum ThrottleDirection instead in the throttle test codes. Reviewed-by: Alberto Garcia Reviewed-by: Hanna Czenczek Signed-off-by: zhenwei pi Message-Id: <20230728022006.1098509-3-pizhenwei@bytedance.com> Signed-off-by: Hanna Czenczek --- tests/unit/test-throttle.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/unit/test-throttle.c b/tests/unit/test-throttle.c index 7adb5e6652..a60b5fe22e 100644 --- a/tests/unit/test-throttle.c +++ b/tests/unit/test-throttle.c @@ -169,8 +169,8 @@ static void test_init(void) /* check initialized fields */ g_assert(tt->clock_type == QEMU_CLOCK_VIRTUAL); - g_assert(tt->timers[0]); - g_assert(tt->timers[1]); + g_assert(tt->timers[THROTTLE_READ]); + g_assert(tt->timers[THROTTLE_WRITE]); /* check other fields where cleared */ g_assert(!ts.previous_leak); @@ -191,7 +191,7 @@ static void test_destroy(void) throttle_timers_init(tt, ctx, QEMU_CLOCK_VIRTUAL, read_timer_cb, write_timer_cb, &ts); throttle_timers_destroy(tt); - for (i = 0; i < 2; i++) { + for (i = 0; i < THROTTLE_MAX; i++) { g_assert(!tt->timers[i]); } } From d85b08c6e27d796dbb653b8ae9584080361c1498 Mon Sep 17 00:00:00 2001 From: zhenwei pi Date: Fri, 28 Jul 2023 10:20:00 +0800 Subject: [PATCH 03/14] throttle: support read-only and write-only Only one direction is necessary in several scenarios: - a read-only disk - operations on a device are considered as *write* only. For example, encrypt/decrypt/sign/verify operations on a cryptodev use a single *write* timer(read timer callback is defined, but never invoked). Allow a single direction in throttle, this reduces memory, and uplayer does not need a dummy callback any more. Reviewed-by: Alberto Garcia Reviewed-by: Hanna Czenczek Signed-off-by: zhenwei pi Message-Id: <20230728022006.1098509-4-pizhenwei@bytedance.com> Signed-off-by: Hanna Czenczek --- util/throttle.c | 42 ++++++++++++++++++++++++++++-------------- 1 file changed, 28 insertions(+), 14 deletions(-) diff --git a/util/throttle.c b/util/throttle.c index 5642e61763..0439028d21 100644 --- a/util/throttle.c +++ b/util/throttle.c @@ -199,12 +199,15 @@ static bool throttle_compute_timer(ThrottleState *ts, void throttle_timers_attach_aio_context(ThrottleTimers *tt, AioContext *new_context) { - tt->timers[THROTTLE_READ] = - aio_timer_new(new_context, tt->clock_type, SCALE_NS, - tt->timer_cb[THROTTLE_READ], tt->timer_opaque); - tt->timers[THROTTLE_WRITE] = - aio_timer_new(new_context, tt->clock_type, SCALE_NS, - tt->timer_cb[THROTTLE_WRITE], tt->timer_opaque); + ThrottleDirection dir; + + for (dir = THROTTLE_READ; dir < THROTTLE_MAX; dir++) { + if (tt->timer_cb[dir]) { + tt->timers[dir] = + aio_timer_new(new_context, tt->clock_type, SCALE_NS, + tt->timer_cb[dir], tt->timer_opaque); + } + } } /* @@ -235,6 +238,7 @@ void throttle_timers_init(ThrottleTimers *tt, QEMUTimerCB *write_timer_cb, void *timer_opaque) { + assert(read_timer_cb || write_timer_cb); memset(tt, 0, sizeof(ThrottleTimers)); tt->clock_type = clock_type; @@ -247,7 +251,9 @@ void throttle_timers_init(ThrottleTimers *tt, /* destroy a timer */ static void throttle_timer_destroy(QEMUTimer **timer) { - assert(*timer != NULL); + if (*timer == NULL) { + return; + } timer_free(*timer); *timer = NULL; @@ -256,10 +262,10 @@ static void throttle_timer_destroy(QEMUTimer **timer) /* Remove timers from event loop */ void throttle_timers_detach_aio_context(ThrottleTimers *tt) { - int i; + ThrottleDirection dir; - for (i = 0; i < THROTTLE_MAX; i++) { - throttle_timer_destroy(&tt->timers[i]); + for (dir = THROTTLE_READ; dir < THROTTLE_MAX; dir++) { + throttle_timer_destroy(&tt->timers[dir]); } } @@ -272,8 +278,12 @@ void throttle_timers_destroy(ThrottleTimers *tt) /* is any throttling timer configured */ bool throttle_timers_are_initialized(ThrottleTimers *tt) { - if (tt->timers[0]) { - return true; + ThrottleDirection dir; + + for (dir = THROTTLE_READ; dir < THROTTLE_MAX; dir++) { + if (tt->timers[dir]) { + return true; + } } return false; @@ -424,8 +434,12 @@ bool throttle_schedule_timer(ThrottleState *ts, { int64_t now = qemu_clock_get_ns(tt->clock_type); int64_t next_timestamp; + QEMUTimer *timer; bool must_wait; + timer = is_write ? tt->timers[THROTTLE_WRITE] : tt->timers[THROTTLE_READ]; + assert(timer); + must_wait = throttle_compute_timer(ts, is_write, now, @@ -437,12 +451,12 @@ bool throttle_schedule_timer(ThrottleState *ts, } /* request throttled and timer pending -> do nothing */ - if (timer_pending(tt->timers[is_write])) { + if (timer_pending(timer)) { return true; } /* request throttled and timer not pending -> arm timer */ - timer_mod(tt->timers[is_write], next_timestamp); + timer_mod(timer, next_timestamp); return true; } From 02add531e1aa71bafb8185142bfc12e8c714b323 Mon Sep 17 00:00:00 2001 From: zhenwei pi Date: Fri, 28 Jul 2023 10:20:01 +0800 Subject: [PATCH 04/14] test-throttle: test read only and write only Reviewed-by: Alberto Garcia Reviewed-by: Hanna Czenczek Signed-off-by: zhenwei pi Message-Id: <20230728022006.1098509-5-pizhenwei@bytedance.com> Signed-off-by: Hanna Czenczek --- tests/unit/test-throttle.c | 66 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) diff --git a/tests/unit/test-throttle.c b/tests/unit/test-throttle.c index a60b5fe22e..5547837a58 100644 --- a/tests/unit/test-throttle.c +++ b/tests/unit/test-throttle.c @@ -184,6 +184,70 @@ static void test_init(void) throttle_timers_destroy(tt); } +static void test_init_readonly(void) +{ + int i; + + tt = &tgm.throttle_timers; + + /* fill the structures with crap */ + memset(&ts, 1, sizeof(ts)); + memset(tt, 1, sizeof(*tt)); + + /* init structures */ + throttle_init(&ts); + throttle_timers_init(tt, ctx, QEMU_CLOCK_VIRTUAL, + read_timer_cb, NULL, &ts); + + /* check initialized fields */ + g_assert(tt->clock_type == QEMU_CLOCK_VIRTUAL); + g_assert(tt->timers[THROTTLE_READ]); + g_assert(!tt->timers[THROTTLE_WRITE]); + + /* check other fields where cleared */ + g_assert(!ts.previous_leak); + g_assert(!ts.cfg.op_size); + for (i = 0; i < BUCKETS_COUNT; i++) { + g_assert(!ts.cfg.buckets[i].avg); + g_assert(!ts.cfg.buckets[i].max); + g_assert(!ts.cfg.buckets[i].level); + } + + throttle_timers_destroy(tt); +} + +static void test_init_writeonly(void) +{ + int i; + + tt = &tgm.throttle_timers; + + /* fill the structures with crap */ + memset(&ts, 1, sizeof(ts)); + memset(tt, 1, sizeof(*tt)); + + /* init structures */ + throttle_init(&ts); + throttle_timers_init(tt, ctx, QEMU_CLOCK_VIRTUAL, + NULL, write_timer_cb, &ts); + + /* check initialized fields */ + g_assert(tt->clock_type == QEMU_CLOCK_VIRTUAL); + g_assert(!tt->timers[THROTTLE_READ]); + g_assert(tt->timers[THROTTLE_WRITE]); + + /* check other fields where cleared */ + g_assert(!ts.previous_leak); + g_assert(!ts.cfg.op_size); + for (i = 0; i < BUCKETS_COUNT; i++) { + g_assert(!ts.cfg.buckets[i].avg); + g_assert(!ts.cfg.buckets[i].max); + g_assert(!ts.cfg.buckets[i].level); + } + + throttle_timers_destroy(tt); +} + static void test_destroy(void) { int i; @@ -752,6 +816,8 @@ int main(int argc, char **argv) g_test_add_func("/throttle/leak_bucket", test_leak_bucket); g_test_add_func("/throttle/compute_wait", test_compute_wait); g_test_add_func("/throttle/init", test_init); + g_test_add_func("/throttle/init_readonly", test_init_readonly); + g_test_add_func("/throttle/init_writeonly", test_init_writeonly); g_test_add_func("/throttle/destroy", test_destroy); g_test_add_func("/throttle/have_timer", test_have_timer); g_test_add_func("/throttle/detach_attach", test_detach_attach); From 27cf12298a2e3cb168880b6c7a36ea8cfbd8afb0 Mon Sep 17 00:00:00 2001 From: zhenwei pi Date: Fri, 28 Jul 2023 10:20:02 +0800 Subject: [PATCH 05/14] cryptodev: use NULL throttle timer cb for read direction Operations on a cryptodev are considered as *write* only, the callback of read direction is never invoked. Use NULL instead of an unreachable path(cryptodev_backend_throttle_timer_cb on read direction). The dummy read timer(never invoked) is already removed here, it means that the 'FIXME' tag is no longer needed. Reviewed-by: Alberto Garcia Reviewed-by: Hanna Czenczek Signed-off-by: zhenwei pi Message-Id: <20230728022006.1098509-6-pizhenwei@bytedance.com> Signed-off-by: Hanna Czenczek --- backends/cryptodev.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/backends/cryptodev.c b/backends/cryptodev.c index 4d183f7237..c2356550c8 100644 --- a/backends/cryptodev.c +++ b/backends/cryptodev.c @@ -341,8 +341,7 @@ static void cryptodev_backend_set_throttle(CryptoDevBackend *backend, int field, if (!enabled) { throttle_init(&backend->ts); throttle_timers_init(&backend->tt, qemu_get_aio_context(), - QEMU_CLOCK_REALTIME, - cryptodev_backend_throttle_timer_cb, /* FIXME */ + QEMU_CLOCK_REALTIME, NULL, cryptodev_backend_throttle_timer_cb, backend); } From e76f201f69e76653f3e7301f2183421d9267e2f5 Mon Sep 17 00:00:00 2001 From: zhenwei pi Date: Fri, 28 Jul 2023 10:20:03 +0800 Subject: [PATCH 06/14] throttle: use enum ThrottleDirection instead of bool is_write enum ThrottleDirection is already there, use ThrottleDirection instead of 'bool is_write' for throttle API, also modify related codes from block, fsdev, cryptodev and tests. Reviewed-by: Hanna Czenczek Signed-off-by: zhenwei pi Message-Id: <20230728022006.1098509-7-pizhenwei@bytedance.com> Signed-off-by: Hanna Czenczek --- backends/cryptodev.c | 9 +++++---- block/throttle-groups.c | 6 ++++-- fsdev/qemu-fsdev-throttle.c | 8 +++++--- include/qemu/throttle.h | 5 +++-- tests/unit/test-throttle.c | 4 ++-- util/throttle.c | 31 +++++++++++++++++-------------- 6 files changed, 36 insertions(+), 27 deletions(-) diff --git a/backends/cryptodev.c b/backends/cryptodev.c index c2356550c8..e5006bd215 100644 --- a/backends/cryptodev.c +++ b/backends/cryptodev.c @@ -252,10 +252,11 @@ static void cryptodev_backend_throttle_timer_cb(void *opaque) continue; } - throttle_account(&backend->ts, true, ret); + throttle_account(&backend->ts, THROTTLE_WRITE, ret); cryptodev_backend_operation(backend, op_info); if (throttle_enabled(&backend->tc) && - throttle_schedule_timer(&backend->ts, &backend->tt, true)) { + throttle_schedule_timer(&backend->ts, &backend->tt, + THROTTLE_WRITE)) { break; } } @@ -271,7 +272,7 @@ int cryptodev_backend_crypto_operation( goto do_account; } - if (throttle_schedule_timer(&backend->ts, &backend->tt, true) || + if (throttle_schedule_timer(&backend->ts, &backend->tt, THROTTLE_WRITE) || !QTAILQ_EMPTY(&backend->opinfos)) { QTAILQ_INSERT_TAIL(&backend->opinfos, op_info, next); return 0; @@ -283,7 +284,7 @@ do_account: return ret; } - throttle_account(&backend->ts, true, ret); + throttle_account(&backend->ts, THROTTLE_WRITE, ret); return cryptodev_backend_operation(backend, op_info); } diff --git a/block/throttle-groups.c b/block/throttle-groups.c index fb203c3ced..3847d27801 100644 --- a/block/throttle-groups.c +++ b/block/throttle-groups.c @@ -270,6 +270,7 @@ static bool throttle_group_schedule_timer(ThrottleGroupMember *tgm, ThrottleState *ts = tgm->throttle_state; ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts); ThrottleTimers *tt = &tgm->throttle_timers; + ThrottleDirection direction = is_write ? THROTTLE_WRITE : THROTTLE_READ; bool must_wait; if (qatomic_read(&tgm->io_limits_disabled)) { @@ -281,7 +282,7 @@ static bool throttle_group_schedule_timer(ThrottleGroupMember *tgm, return true; } - must_wait = throttle_schedule_timer(ts, tt, is_write); + must_wait = throttle_schedule_timer(ts, tt, direction); /* If a timer just got armed, set tgm as the current token */ if (must_wait) { @@ -364,6 +365,7 @@ void coroutine_fn throttle_group_co_io_limits_intercept(ThrottleGroupMember *tgm bool must_wait; ThrottleGroupMember *token; ThrottleGroup *tg = container_of(tgm->throttle_state, ThrottleGroup, ts); + ThrottleDirection direction = is_write ? THROTTLE_WRITE : THROTTLE_READ; assert(bytes >= 0); @@ -386,7 +388,7 @@ void coroutine_fn throttle_group_co_io_limits_intercept(ThrottleGroupMember *tgm } /* The I/O will be executed, so do the accounting */ - throttle_account(tgm->throttle_state, is_write, bytes); + throttle_account(tgm->throttle_state, direction, bytes); /* Schedule the next request */ schedule_next_request(tgm, is_write); diff --git a/fsdev/qemu-fsdev-throttle.c b/fsdev/qemu-fsdev-throttle.c index 5c83a1cc09..1c137d6f0f 100644 --- a/fsdev/qemu-fsdev-throttle.c +++ b/fsdev/qemu-fsdev-throttle.c @@ -97,16 +97,18 @@ void fsdev_throttle_init(FsThrottle *fst) void coroutine_fn fsdev_co_throttle_request(FsThrottle *fst, bool is_write, struct iovec *iov, int iovcnt) { + ThrottleDirection direction = is_write ? THROTTLE_WRITE : THROTTLE_READ; + if (throttle_enabled(&fst->cfg)) { - if (throttle_schedule_timer(&fst->ts, &fst->tt, is_write) || + if (throttle_schedule_timer(&fst->ts, &fst->tt, direction) || !qemu_co_queue_empty(&fst->throttled_reqs[is_write])) { qemu_co_queue_wait(&fst->throttled_reqs[is_write], NULL); } - throttle_account(&fst->ts, is_write, iov_size(iov, iovcnt)); + throttle_account(&fst->ts, direction, iov_size(iov, iovcnt)); if (!qemu_co_queue_empty(&fst->throttled_reqs[is_write]) && - !throttle_schedule_timer(&fst->ts, &fst->tt, is_write)) { + !throttle_schedule_timer(&fst->ts, &fst->tt, direction)) { qemu_co_queue_next(&fst->throttled_reqs[is_write]); } } diff --git a/include/qemu/throttle.h b/include/qemu/throttle.h index 9ca5ab8197..181245d29b 100644 --- a/include/qemu/throttle.h +++ b/include/qemu/throttle.h @@ -154,9 +154,10 @@ void throttle_config_init(ThrottleConfig *cfg); /* usage */ bool throttle_schedule_timer(ThrottleState *ts, ThrottleTimers *tt, - bool is_write); + ThrottleDirection direction); -void throttle_account(ThrottleState *ts, bool is_write, uint64_t size); +void throttle_account(ThrottleState *ts, ThrottleDirection direction, + uint64_t size); void throttle_limits_to_config(ThrottleLimits *arg, ThrottleConfig *cfg, Error **errp); void throttle_config_to_limits(ThrottleConfig *cfg, ThrottleLimits *var); diff --git a/tests/unit/test-throttle.c b/tests/unit/test-throttle.c index 5547837a58..2c4754fb8a 100644 --- a/tests/unit/test-throttle.c +++ b/tests/unit/test-throttle.c @@ -637,9 +637,9 @@ static bool do_test_accounting(bool is_ops, /* are we testing bps or ops */ throttle_config(&ts, QEMU_CLOCK_VIRTUAL, &cfg); /* account a read */ - throttle_account(&ts, false, size); + throttle_account(&ts, THROTTLE_READ, size); /* account a write */ - throttle_account(&ts, true, size); + throttle_account(&ts, THROTTLE_WRITE, size); /* check total result */ index = to_test[is_ops][0]; diff --git a/util/throttle.c b/util/throttle.c index 0439028d21..7d3eb6032f 100644 --- a/util/throttle.c +++ b/util/throttle.c @@ -136,11 +136,11 @@ int64_t throttle_compute_wait(LeakyBucket *bkt) /* This function compute the time that must be waited while this IO * - * @is_write: true if the current IO is a write, false if it's a read + * @direction: throttle direction * @ret: time to wait */ static int64_t throttle_compute_wait_for(ThrottleState *ts, - bool is_write) + ThrottleDirection direction) { BucketType to_check[2][4] = { {THROTTLE_BPS_TOTAL, THROTTLE_OPS_TOTAL, @@ -154,7 +154,7 @@ static int64_t throttle_compute_wait_for(ThrottleState *ts, int i; for (i = 0; i < 4; i++) { - BucketType index = to_check[is_write][i]; + BucketType index = to_check[direction][i]; wait = throttle_compute_wait(&ts->cfg.buckets[index]); if (wait > max_wait) { max_wait = wait; @@ -166,13 +166,13 @@ static int64_t throttle_compute_wait_for(ThrottleState *ts, /* compute the timer for this type of operation * - * @is_write: the type of operation + * @direction: throttle direction * @now: the current clock timestamp * @next_timestamp: the resulting timer * @ret: true if a timer must be set */ static bool throttle_compute_timer(ThrottleState *ts, - bool is_write, + ThrottleDirection direction, int64_t now, int64_t *next_timestamp) { @@ -182,7 +182,7 @@ static bool throttle_compute_timer(ThrottleState *ts, throttle_do_leak(ts, now); /* compute the wait time if any */ - wait = throttle_compute_wait_for(ts, is_write); + wait = throttle_compute_wait_for(ts, direction); /* if the code must wait compute when the next timer should fire */ if (wait) { @@ -425,23 +425,24 @@ void throttle_get_config(ThrottleState *ts, ThrottleConfig *cfg) * NOTE: this function is not unit tested due to it's usage of timer_mod * * @tt: the timers structure - * @is_write: the type of operation (read/write) + * @direction: throttle direction * @ret: true if the timer has been scheduled else false */ bool throttle_schedule_timer(ThrottleState *ts, ThrottleTimers *tt, - bool is_write) + ThrottleDirection direction) { int64_t now = qemu_clock_get_ns(tt->clock_type); int64_t next_timestamp; QEMUTimer *timer; bool must_wait; - timer = is_write ? tt->timers[THROTTLE_WRITE] : tt->timers[THROTTLE_READ]; + assert(direction < THROTTLE_MAX); + timer = tt->timers[direction]; assert(timer); must_wait = throttle_compute_timer(ts, - is_write, + direction, now, &next_timestamp); @@ -462,10 +463,11 @@ bool throttle_schedule_timer(ThrottleState *ts, /* do the accounting for this operation * - * @is_write: the type of operation (read/write) + * @direction: throttle direction * @size: the size of the operation */ -void throttle_account(ThrottleState *ts, bool is_write, uint64_t size) +void throttle_account(ThrottleState *ts, ThrottleDirection direction, + uint64_t size) { const BucketType bucket_types_size[2][2] = { { THROTTLE_BPS_TOTAL, THROTTLE_BPS_READ }, @@ -478,6 +480,7 @@ void throttle_account(ThrottleState *ts, bool is_write, uint64_t size) double units = 1.0; unsigned i; + assert(direction < THROTTLE_MAX); /* if cfg.op_size is defined and smaller than size we compute unit count */ if (ts->cfg.op_size && size > ts->cfg.op_size) { units = (double) size / ts->cfg.op_size; @@ -486,13 +489,13 @@ void throttle_account(ThrottleState *ts, bool is_write, uint64_t size) for (i = 0; i < 2; i++) { LeakyBucket *bkt; - bkt = &ts->cfg.buckets[bucket_types_size[is_write][i]]; + bkt = &ts->cfg.buckets[bucket_types_size[direction][i]]; bkt->level += size; if (bkt->burst_length > 1) { bkt->burst_level += size; } - bkt = &ts->cfg.buckets[bucket_types_units[is_write][i]]; + bkt = &ts->cfg.buckets[bucket_types_units[direction][i]]; bkt->level += units; if (bkt->burst_length > 1) { bkt->burst_level += units; From 7017313882e39b6285e1a47dc474a395ba6f0e7d Mon Sep 17 00:00:00 2001 From: zhenwei pi Date: Fri, 28 Jul 2023 10:20:04 +0800 Subject: [PATCH 07/14] throttle: use THROTTLE_MAX/ARRAY_SIZE for hard code The first dimension of both to_check and bucket_types_size/bucket_types_units is used as throttle direction, use THROTTLE_MAX instead of hard coded number. Also use ARRAY_SIZE() to avoid hard coded number for the second dimension. Hanna noticed that the two array should be static. Yes, turn them into static variables. Reviewed-by: Hanna Czenczek Signed-off-by: zhenwei pi Message-Id: <20230728022006.1098509-8-pizhenwei@bytedance.com> Signed-off-by: Hanna Czenczek --- util/throttle.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/util/throttle.c b/util/throttle.c index 7d3eb6032f..9582899da3 100644 --- a/util/throttle.c +++ b/util/throttle.c @@ -142,7 +142,8 @@ int64_t throttle_compute_wait(LeakyBucket *bkt) static int64_t throttle_compute_wait_for(ThrottleState *ts, ThrottleDirection direction) { - BucketType to_check[2][4] = { {THROTTLE_BPS_TOTAL, + static const BucketType to_check[THROTTLE_MAX][4] = { + {THROTTLE_BPS_TOTAL, THROTTLE_OPS_TOTAL, THROTTLE_BPS_READ, THROTTLE_OPS_READ}, @@ -153,7 +154,7 @@ static int64_t throttle_compute_wait_for(ThrottleState *ts, int64_t wait, max_wait = 0; int i; - for (i = 0; i < 4; i++) { + for (i = 0; i < ARRAY_SIZE(to_check[THROTTLE_READ]); i++) { BucketType index = to_check[direction][i]; wait = throttle_compute_wait(&ts->cfg.buckets[index]); if (wait > max_wait) { @@ -469,11 +470,11 @@ bool throttle_schedule_timer(ThrottleState *ts, void throttle_account(ThrottleState *ts, ThrottleDirection direction, uint64_t size) { - const BucketType bucket_types_size[2][2] = { + static const BucketType bucket_types_size[THROTTLE_MAX][2] = { { THROTTLE_BPS_TOTAL, THROTTLE_BPS_READ }, { THROTTLE_BPS_TOTAL, THROTTLE_BPS_WRITE } }; - const BucketType bucket_types_units[2][2] = { + static const BucketType bucket_types_units[THROTTLE_MAX][2] = { { THROTTLE_OPS_TOTAL, THROTTLE_OPS_READ }, { THROTTLE_OPS_TOTAL, THROTTLE_OPS_WRITE } }; @@ -486,7 +487,7 @@ void throttle_account(ThrottleState *ts, ThrottleDirection direction, units = (double) size / ts->cfg.op_size; } - for (i = 0; i < 2; i++) { + for (i = 0; i < ARRAY_SIZE(bucket_types_size[THROTTLE_READ]); i++) { LeakyBucket *bkt; bkt = &ts->cfg.buckets[bucket_types_size[direction][i]]; From 00ea69f50345258d1ff6262f24516abea5548d3a Mon Sep 17 00:00:00 2001 From: zhenwei pi Date: Fri, 28 Jul 2023 10:20:05 +0800 Subject: [PATCH 08/14] fsdev: Use ThrottleDirection instread of bool is_write 'bool is_write' style is obsolete from throttle framework, adapt fsdev to the new style. Cc: Greg Kurz Reviewed-by: Hanna Czenczek Signed-off-by: zhenwei pi Message-Id: <20230728022006.1098509-9-pizhenwei@bytedance.com> Reviewed-by: Greg Kurz Signed-off-by: Hanna Czenczek --- fsdev/qemu-fsdev-throttle.c | 14 +++++++------- fsdev/qemu-fsdev-throttle.h | 4 ++-- hw/9pfs/cofile.c | 4 ++-- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/fsdev/qemu-fsdev-throttle.c b/fsdev/qemu-fsdev-throttle.c index 1c137d6f0f..d912da906d 100644 --- a/fsdev/qemu-fsdev-throttle.c +++ b/fsdev/qemu-fsdev-throttle.c @@ -94,22 +94,22 @@ void fsdev_throttle_init(FsThrottle *fst) } } -void coroutine_fn fsdev_co_throttle_request(FsThrottle *fst, bool is_write, +void coroutine_fn fsdev_co_throttle_request(FsThrottle *fst, + ThrottleDirection direction, struct iovec *iov, int iovcnt) { - ThrottleDirection direction = is_write ? THROTTLE_WRITE : THROTTLE_READ; - + assert(direction < THROTTLE_MAX); if (throttle_enabled(&fst->cfg)) { if (throttle_schedule_timer(&fst->ts, &fst->tt, direction) || - !qemu_co_queue_empty(&fst->throttled_reqs[is_write])) { - qemu_co_queue_wait(&fst->throttled_reqs[is_write], NULL); + !qemu_co_queue_empty(&fst->throttled_reqs[direction])) { + qemu_co_queue_wait(&fst->throttled_reqs[direction], NULL); } throttle_account(&fst->ts, direction, iov_size(iov, iovcnt)); - if (!qemu_co_queue_empty(&fst->throttled_reqs[is_write]) && + if (!qemu_co_queue_empty(&fst->throttled_reqs[direction]) && !throttle_schedule_timer(&fst->ts, &fst->tt, direction)) { - qemu_co_queue_next(&fst->throttled_reqs[is_write]); + qemu_co_queue_next(&fst->throttled_reqs[direction]); } } } diff --git a/fsdev/qemu-fsdev-throttle.h b/fsdev/qemu-fsdev-throttle.h index a21aecddc7..daa8ca2494 100644 --- a/fsdev/qemu-fsdev-throttle.h +++ b/fsdev/qemu-fsdev-throttle.h @@ -23,14 +23,14 @@ typedef struct FsThrottle { ThrottleState ts; ThrottleTimers tt; ThrottleConfig cfg; - CoQueue throttled_reqs[2]; + CoQueue throttled_reqs[THROTTLE_MAX]; } FsThrottle; int fsdev_throttle_parse_opts(QemuOpts *, FsThrottle *, Error **); void fsdev_throttle_init(FsThrottle *); -void coroutine_fn fsdev_co_throttle_request(FsThrottle *, bool , +void coroutine_fn fsdev_co_throttle_request(FsThrottle *, ThrottleDirection , struct iovec *, int); void fsdev_throttle_cleanup(FsThrottle *); diff --git a/hw/9pfs/cofile.c b/hw/9pfs/cofile.c index 9c5344039e..71174c3e4a 100644 --- a/hw/9pfs/cofile.c +++ b/hw/9pfs/cofile.c @@ -252,7 +252,7 @@ int coroutine_fn v9fs_co_pwritev(V9fsPDU *pdu, V9fsFidState *fidp, if (v9fs_request_cancelled(pdu)) { return -EINTR; } - fsdev_co_throttle_request(s->ctx.fst, true, iov, iovcnt); + fsdev_co_throttle_request(s->ctx.fst, THROTTLE_WRITE, iov, iovcnt); v9fs_co_run_in_worker( { err = s->ops->pwritev(&s->ctx, &fidp->fs, iov, iovcnt, offset); @@ -272,7 +272,7 @@ int coroutine_fn v9fs_co_preadv(V9fsPDU *pdu, V9fsFidState *fidp, if (v9fs_request_cancelled(pdu)) { return -EINTR; } - fsdev_co_throttle_request(s->ctx.fst, false, iov, iovcnt); + fsdev_co_throttle_request(s->ctx.fst, THROTTLE_READ, iov, iovcnt); v9fs_co_run_in_worker( { err = s->ops->preadv(&s->ctx, &fidp->fs, iov, iovcnt, offset); From 3b2337eff03e23ffcc7d6b0a0f72bd3ab2135ae9 Mon Sep 17 00:00:00 2001 From: zhenwei pi Date: Fri, 28 Jul 2023 10:20:06 +0800 Subject: [PATCH 09/14] block/throttle-groups: Use ThrottleDirection instread of bool is_write 'bool is_write' style is obsolete from throttle framework, adapt block throttle groups to the new style: - use ThrottleDirection instead of 'bool is_write'. Ex, schedule_next_request(ThrottleGroupMember *tgm, bool is_write) -> schedule_next_request(ThrottleGroupMember *tgm, ThrottleDirection direction) - use THROTTLE_MAX instead of hard code. Ex, ThrottleGroupMember *tokens[2] -> ThrottleGroupMember *tokens[THROTTLE_MAX] - use ThrottleDirection instead of hard code on iteration. Ex, (i = 0; i < 2; i++) -> for (dir = THROTTLE_READ; dir < THROTTLE_MAX; dir++) Use a simple python script to test the new style: #!/usr/bin/python3 import subprocess import random import time commands = ['virsh blkdeviotune jammy vda --write-bytes-sec ', \ 'virsh blkdeviotune jammy vda --write-iops-sec ', \ 'virsh blkdeviotune jammy vda --read-bytes-sec ', \ 'virsh blkdeviotune jammy vda --read-iops-sec '] for loop in range(1, 1000): time.sleep(random.randrange(3, 5)) command = commands[random.randrange(0, 3)] + str(random.randrange(0, 1000000)) subprocess.run(command, shell=True, check=True) This works fine. Signed-off-by: zhenwei pi Message-Id: <20230728022006.1098509-10-pizhenwei@bytedance.com> Reviewed-by: Hanna Czenczek Signed-off-by: Hanna Czenczek --- block/block-backend.c | 4 +- block/throttle-groups.c | 161 ++++++++++++++++---------------- block/throttle.c | 8 +- include/block/throttle-groups.h | 6 +- 4 files changed, 90 insertions(+), 89 deletions(-) diff --git a/block/block-backend.c b/block/block-backend.c index 4009ed5fed..47d360c97a 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -1341,7 +1341,7 @@ blk_co_do_preadv_part(BlockBackend *blk, int64_t offset, int64_t bytes, /* throttling disk I/O */ if (blk->public.throttle_group_member.throttle_state) { throttle_group_co_io_limits_intercept(&blk->public.throttle_group_member, - bytes, false); + bytes, THROTTLE_READ); } ret = bdrv_co_preadv_part(blk->root, offset, bytes, qiov, qiov_offset, @@ -1415,7 +1415,7 @@ blk_co_do_pwritev_part(BlockBackend *blk, int64_t offset, int64_t bytes, /* throttling disk I/O */ if (blk->public.throttle_group_member.throttle_state) { throttle_group_co_io_limits_intercept(&blk->public.throttle_group_member, - bytes, true); + bytes, THROTTLE_WRITE); } if (!blk->enable_write_cache) { diff --git a/block/throttle-groups.c b/block/throttle-groups.c index 3847d27801..3eda4c4e3d 100644 --- a/block/throttle-groups.c +++ b/block/throttle-groups.c @@ -37,7 +37,7 @@ static void throttle_group_obj_init(Object *obj); static void throttle_group_obj_complete(UserCreatable *obj, Error **errp); -static void timer_cb(ThrottleGroupMember *tgm, bool is_write); +static void timer_cb(ThrottleGroupMember *tgm, ThrottleDirection direction); /* The ThrottleGroup structure (with its ThrottleState) is shared * among different ThrottleGroupMembers and it's independent from @@ -73,8 +73,8 @@ struct ThrottleGroup { QemuMutex lock; /* This lock protects the following four fields */ ThrottleState ts; QLIST_HEAD(, ThrottleGroupMember) head; - ThrottleGroupMember *tokens[2]; - bool any_timer_armed[2]; + ThrottleGroupMember *tokens[THROTTLE_MAX]; + bool any_timer_armed[THROTTLE_MAX]; QEMUClockType clock_type; /* This field is protected by the global QEMU mutex */ @@ -197,13 +197,13 @@ static ThrottleGroupMember *throttle_group_next_tgm(ThrottleGroupMember *tgm) * This assumes that tg->lock is held. * * @tgm: the ThrottleGroupMember - * @is_write: the type of operation (read/write) + * @direction: the ThrottleDirection * @ret: whether the ThrottleGroupMember has pending requests. */ static inline bool tgm_has_pending_reqs(ThrottleGroupMember *tgm, - bool is_write) + ThrottleDirection direction) { - return tgm->pending_reqs[is_write]; + return tgm->pending_reqs[direction]; } /* Return the next ThrottleGroupMember in the round-robin sequence with pending @@ -212,12 +212,12 @@ static inline bool tgm_has_pending_reqs(ThrottleGroupMember *tgm, * This assumes that tg->lock is held. * * @tgm: the current ThrottleGroupMember - * @is_write: the type of operation (read/write) + * @direction: the ThrottleDirection * @ret: the next ThrottleGroupMember with pending requests, or tgm if * there is none. */ static ThrottleGroupMember *next_throttle_token(ThrottleGroupMember *tgm, - bool is_write) + ThrottleDirection direction) { ThrottleState *ts = tgm->throttle_state; ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts); @@ -227,16 +227,16 @@ static ThrottleGroupMember *next_throttle_token(ThrottleGroupMember *tgm, * it's being drained. Skip the round-robin search and return tgm * immediately if it has pending requests. Otherwise we could be * forcing it to wait for other member's throttled requests. */ - if (tgm_has_pending_reqs(tgm, is_write) && + if (tgm_has_pending_reqs(tgm, direction) && qatomic_read(&tgm->io_limits_disabled)) { return tgm; } - start = token = tg->tokens[is_write]; + start = token = tg->tokens[direction]; /* get next bs round in round robin style */ token = throttle_group_next_tgm(token); - while (token != start && !tgm_has_pending_reqs(token, is_write)) { + while (token != start && !tgm_has_pending_reqs(token, direction)) { token = throttle_group_next_tgm(token); } @@ -244,12 +244,12 @@ static ThrottleGroupMember *next_throttle_token(ThrottleGroupMember *tgm, * then decide the token is the current tgm because chances are * the current tgm got the current request queued. */ - if (token == start && !tgm_has_pending_reqs(token, is_write)) { + if (token == start && !tgm_has_pending_reqs(token, direction)) { token = tgm; } /* Either we return the original TGM, or one with pending requests */ - assert(token == tgm || tgm_has_pending_reqs(token, is_write)); + assert(token == tgm || tgm_has_pending_reqs(token, direction)); return token; } @@ -261,16 +261,15 @@ static ThrottleGroupMember *next_throttle_token(ThrottleGroupMember *tgm, * This assumes that tg->lock is held. * * @tgm: the current ThrottleGroupMember - * @is_write: the type of operation (read/write) + * @direction: the ThrottleDirection * @ret: whether the I/O request needs to be throttled or not */ static bool throttle_group_schedule_timer(ThrottleGroupMember *tgm, - bool is_write) + ThrottleDirection direction) { ThrottleState *ts = tgm->throttle_state; ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts); ThrottleTimers *tt = &tgm->throttle_timers; - ThrottleDirection direction = is_write ? THROTTLE_WRITE : THROTTLE_READ; bool must_wait; if (qatomic_read(&tgm->io_limits_disabled)) { @@ -278,7 +277,7 @@ static bool throttle_group_schedule_timer(ThrottleGroupMember *tgm, } /* Check if any of the timers in this group is already armed */ - if (tg->any_timer_armed[is_write]) { + if (tg->any_timer_armed[direction]) { return true; } @@ -286,8 +285,8 @@ static bool throttle_group_schedule_timer(ThrottleGroupMember *tgm, /* If a timer just got armed, set tgm as the current token */ if (must_wait) { - tg->tokens[is_write] = tgm; - tg->any_timer_armed[is_write] = true; + tg->tokens[direction] = tgm; + tg->any_timer_armed[direction] = true; } return must_wait; @@ -297,15 +296,15 @@ static bool throttle_group_schedule_timer(ThrottleGroupMember *tgm, * any request was actually pending. * * @tgm: the current ThrottleGroupMember - * @is_write: the type of operation (read/write) + * @direction: the ThrottleDirection */ static bool coroutine_fn throttle_group_co_restart_queue(ThrottleGroupMember *tgm, - bool is_write) + ThrottleDirection direction) { bool ret; qemu_co_mutex_lock(&tgm->throttled_reqs_lock); - ret = qemu_co_queue_next(&tgm->throttled_reqs[is_write]); + ret = qemu_co_queue_next(&tgm->throttled_reqs[direction]); qemu_co_mutex_unlock(&tgm->throttled_reqs_lock); return ret; @@ -316,9 +315,10 @@ static bool coroutine_fn throttle_group_co_restart_queue(ThrottleGroupMember *tg * This assumes that tg->lock is held. * * @tgm: the current ThrottleGroupMember - * @is_write: the type of operation (read/write) + * @direction: the ThrottleDirection */ -static void schedule_next_request(ThrottleGroupMember *tgm, bool is_write) +static void schedule_next_request(ThrottleGroupMember *tgm, + ThrottleDirection direction) { ThrottleState *ts = tgm->throttle_state; ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts); @@ -326,27 +326,27 @@ static void schedule_next_request(ThrottleGroupMember *tgm, bool is_write) ThrottleGroupMember *token; /* Check if there's any pending request to schedule next */ - token = next_throttle_token(tgm, is_write); - if (!tgm_has_pending_reqs(token, is_write)) { + token = next_throttle_token(tgm, direction); + if (!tgm_has_pending_reqs(token, direction)) { return; } /* Set a timer for the request if it needs to be throttled */ - must_wait = throttle_group_schedule_timer(token, is_write); + must_wait = throttle_group_schedule_timer(token, direction); /* If it doesn't have to wait, queue it for immediate execution */ if (!must_wait) { /* Give preference to requests from the current tgm */ if (qemu_in_coroutine() && - throttle_group_co_restart_queue(tgm, is_write)) { + throttle_group_co_restart_queue(tgm, direction)) { token = tgm; } else { ThrottleTimers *tt = &token->throttle_timers; int64_t now = qemu_clock_get_ns(tg->clock_type); - timer_mod(tt->timers[is_write], now); - tg->any_timer_armed[is_write] = true; + timer_mod(tt->timers[direction], now); + tg->any_timer_armed[direction] = true; } - tg->tokens[is_write] = token; + tg->tokens[direction] = token; } } @@ -356,49 +356,49 @@ static void schedule_next_request(ThrottleGroupMember *tgm, bool is_write) * * @tgm: the current ThrottleGroupMember * @bytes: the number of bytes for this I/O - * @is_write: the type of operation (read/write) + * @direction: the ThrottleDirection */ void coroutine_fn throttle_group_co_io_limits_intercept(ThrottleGroupMember *tgm, int64_t bytes, - bool is_write) + ThrottleDirection direction) { bool must_wait; ThrottleGroupMember *token; ThrottleGroup *tg = container_of(tgm->throttle_state, ThrottleGroup, ts); - ThrottleDirection direction = is_write ? THROTTLE_WRITE : THROTTLE_READ; assert(bytes >= 0); + assert(direction < THROTTLE_MAX); qemu_mutex_lock(&tg->lock); /* First we check if this I/O has to be throttled. */ - token = next_throttle_token(tgm, is_write); - must_wait = throttle_group_schedule_timer(token, is_write); + token = next_throttle_token(tgm, direction); + must_wait = throttle_group_schedule_timer(token, direction); /* Wait if there's a timer set or queued requests of this type */ - if (must_wait || tgm->pending_reqs[is_write]) { - tgm->pending_reqs[is_write]++; + if (must_wait || tgm->pending_reqs[direction]) { + tgm->pending_reqs[direction]++; qemu_mutex_unlock(&tg->lock); qemu_co_mutex_lock(&tgm->throttled_reqs_lock); - qemu_co_queue_wait(&tgm->throttled_reqs[is_write], + qemu_co_queue_wait(&tgm->throttled_reqs[direction], &tgm->throttled_reqs_lock); qemu_co_mutex_unlock(&tgm->throttled_reqs_lock); qemu_mutex_lock(&tg->lock); - tgm->pending_reqs[is_write]--; + tgm->pending_reqs[direction]--; } /* The I/O will be executed, so do the accounting */ throttle_account(tgm->throttle_state, direction, bytes); /* Schedule the next request */ - schedule_next_request(tgm, is_write); + schedule_next_request(tgm, direction); qemu_mutex_unlock(&tg->lock); } typedef struct { ThrottleGroupMember *tgm; - bool is_write; + ThrottleDirection direction; } RestartData; static void coroutine_fn throttle_group_restart_queue_entry(void *opaque) @@ -407,16 +407,16 @@ static void coroutine_fn throttle_group_restart_queue_entry(void *opaque) ThrottleGroupMember *tgm = data->tgm; ThrottleState *ts = tgm->throttle_state; ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts); - bool is_write = data->is_write; + ThrottleDirection direction = data->direction; bool empty_queue; - empty_queue = !throttle_group_co_restart_queue(tgm, is_write); + empty_queue = !throttle_group_co_restart_queue(tgm, direction); /* If the request queue was empty then we have to take care of * scheduling the next one */ if (empty_queue) { qemu_mutex_lock(&tg->lock); - schedule_next_request(tgm, is_write); + schedule_next_request(tgm, direction); qemu_mutex_unlock(&tg->lock); } @@ -426,18 +426,19 @@ static void coroutine_fn throttle_group_restart_queue_entry(void *opaque) aio_wait_kick(); } -static void throttle_group_restart_queue(ThrottleGroupMember *tgm, bool is_write) +static void throttle_group_restart_queue(ThrottleGroupMember *tgm, + ThrottleDirection direction) { Coroutine *co; RestartData *rd = g_new0(RestartData, 1); rd->tgm = tgm; - rd->is_write = is_write; + rd->direction = direction; /* This function is called when a timer is fired or when * throttle_group_restart_tgm() is called. Either way, there can * be no timer pending on this tgm at this point */ - assert(!timer_pending(tgm->throttle_timers.timers[is_write])); + assert(!timer_pending(tgm->throttle_timers.timers[direction])); qatomic_inc(&tgm->restart_pending); @@ -447,18 +448,18 @@ static void throttle_group_restart_queue(ThrottleGroupMember *tgm, bool is_write void throttle_group_restart_tgm(ThrottleGroupMember *tgm) { - int i; + ThrottleDirection dir; if (tgm->throttle_state) { - for (i = 0; i < 2; i++) { - QEMUTimer *t = tgm->throttle_timers.timers[i]; + for (dir = THROTTLE_READ; dir < THROTTLE_MAX; dir++) { + QEMUTimer *t = tgm->throttle_timers.timers[dir]; if (timer_pending(t)) { /* If there's a pending timer on this tgm, fire it now */ timer_del(t); - timer_cb(tgm, i); + timer_cb(tgm, dir); } else { /* Else run the next request from the queue manually */ - throttle_group_restart_queue(tgm, i); + throttle_group_restart_queue(tgm, dir); } } } @@ -502,30 +503,30 @@ void throttle_group_get_config(ThrottleGroupMember *tgm, ThrottleConfig *cfg) * because it had been throttled. * * @tgm: the ThrottleGroupMember whose request had been throttled - * @is_write: the type of operation (read/write) + * @direction: the ThrottleDirection */ -static void timer_cb(ThrottleGroupMember *tgm, bool is_write) +static void timer_cb(ThrottleGroupMember *tgm, ThrottleDirection direction) { ThrottleState *ts = tgm->throttle_state; ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts); /* The timer has just been fired, so we can update the flag */ qemu_mutex_lock(&tg->lock); - tg->any_timer_armed[is_write] = false; + tg->any_timer_armed[direction] = false; qemu_mutex_unlock(&tg->lock); /* Run the request that was waiting for this timer */ - throttle_group_restart_queue(tgm, is_write); + throttle_group_restart_queue(tgm, direction); } static void read_timer_cb(void *opaque) { - timer_cb(opaque, false); + timer_cb(opaque, THROTTLE_READ); } static void write_timer_cb(void *opaque) { - timer_cb(opaque, true); + timer_cb(opaque, THROTTLE_WRITE); } /* Register a ThrottleGroupMember from the throttling group, also initializing @@ -543,7 +544,7 @@ void throttle_group_register_tgm(ThrottleGroupMember *tgm, const char *groupname, AioContext *ctx) { - int i; + ThrottleDirection dir; ThrottleState *ts = throttle_group_incref(groupname); ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts); @@ -553,10 +554,11 @@ void throttle_group_register_tgm(ThrottleGroupMember *tgm, QEMU_LOCK_GUARD(&tg->lock); /* If the ThrottleGroup is new set this ThrottleGroupMember as the token */ - for (i = 0; i < 2; i++) { - if (!tg->tokens[i]) { - tg->tokens[i] = tgm; + for (dir = THROTTLE_READ; dir < THROTTLE_MAX; dir++) { + if (!tg->tokens[dir]) { + tg->tokens[dir] = tgm; } + qemu_co_queue_init(&tgm->throttled_reqs[dir]); } QLIST_INSERT_HEAD(&tg->head, tgm, round_robin); @@ -568,8 +570,6 @@ void throttle_group_register_tgm(ThrottleGroupMember *tgm, write_timer_cb, tgm); qemu_co_mutex_init(&tgm->throttled_reqs_lock); - qemu_co_queue_init(&tgm->throttled_reqs[0]); - qemu_co_queue_init(&tgm->throttled_reqs[1]); } /* Unregister a ThrottleGroupMember from its group, removing it from the list, @@ -587,7 +587,7 @@ void throttle_group_unregister_tgm(ThrottleGroupMember *tgm) ThrottleState *ts = tgm->throttle_state; ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts); ThrottleGroupMember *token; - int i; + ThrottleDirection dir; if (!ts) { /* Discard already unregistered tgm */ @@ -598,17 +598,17 @@ void throttle_group_unregister_tgm(ThrottleGroupMember *tgm) AIO_WAIT_WHILE(tgm->aio_context, qatomic_read(&tgm->restart_pending) > 0); WITH_QEMU_LOCK_GUARD(&tg->lock) { - for (i = 0; i < 2; i++) { - assert(tgm->pending_reqs[i] == 0); - assert(qemu_co_queue_empty(&tgm->throttled_reqs[i])); - assert(!timer_pending(tgm->throttle_timers.timers[i])); - if (tg->tokens[i] == tgm) { + for (dir = THROTTLE_READ; dir < THROTTLE_MAX; dir++) { + assert(tgm->pending_reqs[dir] == 0); + assert(qemu_co_queue_empty(&tgm->throttled_reqs[dir])); + assert(!timer_pending(tgm->throttle_timers.timers[dir])); + if (tg->tokens[dir] == tgm) { token = throttle_group_next_tgm(tgm); /* Take care of the case where this is the last tgm in the group */ if (token == tgm) { token = NULL; } - tg->tokens[i] = token; + tg->tokens[dir] = token; } } @@ -633,19 +633,20 @@ void throttle_group_detach_aio_context(ThrottleGroupMember *tgm) { ThrottleGroup *tg = container_of(tgm->throttle_state, ThrottleGroup, ts); ThrottleTimers *tt = &tgm->throttle_timers; - int i; + ThrottleDirection dir; /* Requests must have been drained */ - assert(tgm->pending_reqs[0] == 0 && tgm->pending_reqs[1] == 0); - assert(qemu_co_queue_empty(&tgm->throttled_reqs[0])); - assert(qemu_co_queue_empty(&tgm->throttled_reqs[1])); + for (dir = THROTTLE_READ; dir < THROTTLE_MAX; dir++) { + assert(tgm->pending_reqs[dir] == 0); + assert(qemu_co_queue_empty(&tgm->throttled_reqs[dir])); + } /* Kick off next ThrottleGroupMember, if necessary */ WITH_QEMU_LOCK_GUARD(&tg->lock) { - for (i = 0; i < 2; i++) { - if (timer_pending(tt->timers[i])) { - tg->any_timer_armed[i] = false; - schedule_next_request(tgm, i); + for (dir = THROTTLE_READ; dir < THROTTLE_MAX; dir++) { + if (timer_pending(tt->timers[dir])) { + tg->any_timer_armed[dir] = false; + schedule_next_request(tgm, dir); } } } diff --git a/block/throttle.c b/block/throttle.c index 3aaef18d4e..1098a4ae9a 100644 --- a/block/throttle.c +++ b/block/throttle.c @@ -118,7 +118,7 @@ throttle_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes, { ThrottleGroupMember *tgm = bs->opaque; - throttle_group_co_io_limits_intercept(tgm, bytes, false); + throttle_group_co_io_limits_intercept(tgm, bytes, THROTTLE_READ); return bdrv_co_preadv(bs->file, offset, bytes, qiov, flags); } @@ -128,7 +128,7 @@ throttle_co_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes, QEMUIOVector *qiov, BdrvRequestFlags flags) { ThrottleGroupMember *tgm = bs->opaque; - throttle_group_co_io_limits_intercept(tgm, bytes, true); + throttle_group_co_io_limits_intercept(tgm, bytes, THROTTLE_WRITE); return bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags); } @@ -138,7 +138,7 @@ throttle_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int64_t bytes, BdrvRequestFlags flags) { ThrottleGroupMember *tgm = bs->opaque; - throttle_group_co_io_limits_intercept(tgm, bytes, true); + throttle_group_co_io_limits_intercept(tgm, bytes, THROTTLE_WRITE); return bdrv_co_pwrite_zeroes(bs->file, offset, bytes, flags); } @@ -147,7 +147,7 @@ static int coroutine_fn GRAPH_RDLOCK throttle_co_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes) { ThrottleGroupMember *tgm = bs->opaque; - throttle_group_co_io_limits_intercept(tgm, bytes, true); + throttle_group_co_io_limits_intercept(tgm, bytes, THROTTLE_WRITE); return bdrv_co_pdiscard(bs->file, offset, bytes); } diff --git a/include/block/throttle-groups.h b/include/block/throttle-groups.h index ff282fc0f8..2355e8d9de 100644 --- a/include/block/throttle-groups.h +++ b/include/block/throttle-groups.h @@ -37,7 +37,7 @@ typedef struct ThrottleGroupMember { AioContext *aio_context; /* throttled_reqs_lock protects the CoQueues for throttled requests. */ CoMutex throttled_reqs_lock; - CoQueue throttled_reqs[2]; + CoQueue throttled_reqs[THROTTLE_MAX]; /* Nonzero if the I/O limits are currently being ignored; generally * it is zero. Accessed with atomic operations. @@ -54,7 +54,7 @@ typedef struct ThrottleGroupMember { * throttle_state tells us if I/O limits are configured. */ ThrottleState *throttle_state; ThrottleTimers throttle_timers; - unsigned pending_reqs[2]; + unsigned pending_reqs[THROTTLE_MAX]; QLIST_ENTRY(ThrottleGroupMember) round_robin; } ThrottleGroupMember; @@ -78,7 +78,7 @@ void throttle_group_restart_tgm(ThrottleGroupMember *tgm); void coroutine_fn throttle_group_co_io_limits_intercept(ThrottleGroupMember *tgm, int64_t bytes, - bool is_write); + ThrottleDirection direction); void throttle_group_attach_aio_context(ThrottleGroupMember *tgm, AioContext *new_context); void throttle_group_detach_aio_context(ThrottleGroupMember *tgm); From 56d1a022a77ea2125564913665eeadf3e303a671 Mon Sep 17 00:00:00 2001 From: Hanna Czenczek Date: Thu, 24 Aug 2023 17:53:40 +0200 Subject: [PATCH 10/14] file-posix: Clear bs->bl.zoned on error bs->bl.zoned is what indicates whether the zone information is present and valid; it is the only thing that raw_refresh_zoned_limits() sets if CONFIG_BLKZONED is not defined, and it is also the only thing that it sets if CONFIG_BLKZONED is defined, but there are no zones. Make sure that it is always set to BLK_Z_NONE if there is an error anywhere in raw_refresh_zoned_limits() so that we do not accidentally announce zones while our information is incomplete or invalid. This also fixes a memory leak in the last error path in raw_refresh_zoned_limits(). Signed-off-by: Hanna Czenczek Message-Id: <20230824155345.109765-2-hreitz@redhat.com> Reviewed-by: Sam Li --- block/file-posix.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/block/file-posix.c b/block/file-posix.c index b16e9c21a1..2b88b9eefa 100644 --- a/block/file-posix.c +++ b/block/file-posix.c @@ -1412,11 +1412,9 @@ static void raw_refresh_zoned_limits(BlockDriverState *bs, struct stat *st, BlockZoneModel zoned; int ret; - bs->bl.zoned = BLK_Z_NONE; - ret = get_sysfs_zoned_model(st, &zoned); if (ret < 0 || zoned == BLK_Z_NONE) { - return; + goto no_zoned; } bs->bl.zoned = zoned; @@ -1437,10 +1435,10 @@ static void raw_refresh_zoned_limits(BlockDriverState *bs, struct stat *st, if (ret < 0) { error_setg_errno(errp, -ret, "Unable to read chunk_sectors " "sysfs attribute"); - return; + goto no_zoned; } else if (!ret) { error_setg(errp, "Read 0 from chunk_sectors sysfs attribute"); - return; + goto no_zoned; } bs->bl.zone_size = ret << BDRV_SECTOR_BITS; @@ -1448,10 +1446,10 @@ static void raw_refresh_zoned_limits(BlockDriverState *bs, struct stat *st, if (ret < 0) { error_setg_errno(errp, -ret, "Unable to read nr_zones " "sysfs attribute"); - return; + goto no_zoned; } else if (!ret) { error_setg(errp, "Read 0 from nr_zones sysfs attribute"); - return; + goto no_zoned; } bs->bl.nr_zones = ret; @@ -1472,10 +1470,15 @@ static void raw_refresh_zoned_limits(BlockDriverState *bs, struct stat *st, ret = get_zones_wp(bs, s->fd, 0, bs->bl.nr_zones, 0); if (ret < 0) { error_setg_errno(errp, -ret, "report wps failed"); - bs->wps = NULL; - return; + goto no_zoned; } qemu_co_mutex_init(&bs->wps->colock); + return; + +no_zoned: + bs->bl.zoned = BLK_Z_NONE; + g_free(bs->wps); + bs->wps = NULL; } #else /* !defined(CONFIG_BLKZONED) */ static void raw_refresh_zoned_limits(BlockDriverState *bs, struct stat *st, From 4b5d80f3d02096a9bb1f651f6b3401ba40877159 Mon Sep 17 00:00:00 2001 From: Hanna Czenczek Date: Thu, 24 Aug 2023 17:53:41 +0200 Subject: [PATCH 11/14] file-posix: Check bs->bl.zoned for zone info Instead of checking bs->wps or bs->bl.zone_size for whether zone information is present, check bs->bl.zoned. That is the flag that raw_refresh_zoned_limits() reliably sets to indicate zone support. If it is set to something other than BLK_Z_NONE, other values and objects like bs->wps and bs->bl.zone_size must be non-null/zero and valid; if it is not, we cannot rely on their validity. Signed-off-by: Hanna Czenczek Message-Id: <20230824155345.109765-3-hreitz@redhat.com> Reviewed-by: Sam Li --- block/file-posix.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/block/file-posix.c b/block/file-posix.c index 2b88b9eefa..46e22403fe 100644 --- a/block/file-posix.c +++ b/block/file-posix.c @@ -2455,9 +2455,10 @@ static int coroutine_fn raw_co_prw(BlockDriverState *bs, uint64_t offset, if (fd_open(bs) < 0) return -EIO; #if defined(CONFIG_BLKZONED) - if ((type & (QEMU_AIO_WRITE | QEMU_AIO_ZONE_APPEND)) && bs->wps) { + if ((type & (QEMU_AIO_WRITE | QEMU_AIO_ZONE_APPEND)) && + bs->bl.zoned != BLK_Z_NONE) { qemu_co_mutex_lock(&bs->wps->colock); - if (type & QEMU_AIO_ZONE_APPEND && bs->bl.zone_size) { + if (type & QEMU_AIO_ZONE_APPEND) { int index = offset / bs->bl.zone_size; offset = bs->wps->wp[index]; } @@ -2508,8 +2509,8 @@ out: { BlockZoneWps *wps = bs->wps; if (ret == 0) { - if ((type & (QEMU_AIO_WRITE | QEMU_AIO_ZONE_APPEND)) - && wps && bs->bl.zone_size) { + if ((type & (QEMU_AIO_WRITE | QEMU_AIO_ZONE_APPEND)) && + bs->bl.zoned != BLK_Z_NONE) { uint64_t *wp = &wps->wp[offset / bs->bl.zone_size]; if (!BDRV_ZT_IS_CONV(*wp)) { if (type & QEMU_AIO_ZONE_APPEND) { @@ -2529,7 +2530,8 @@ out: } } - if ((type & (QEMU_AIO_WRITE | QEMU_AIO_ZONE_APPEND)) && wps) { + if ((type & (QEMU_AIO_WRITE | QEMU_AIO_ZONE_APPEND)) && + bs->blk.zoned != BLK_Z_NONE) { qemu_co_mutex_unlock(&wps->colock); } } From deab5c9a4ed74f76a713008a42527762b30a7e84 Mon Sep 17 00:00:00 2001 From: Hanna Czenczek Date: Thu, 24 Aug 2023 17:53:42 +0200 Subject: [PATCH 12/14] file-posix: Fix zone update in I/O error path We must check that zone information is present before running update_zones_wp(). Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=2234374 Fixes: Coverity CID 1512459 Signed-off-by: Hanna Czenczek Message-Id: <20230824155345.109765-4-hreitz@redhat.com> Reviewed-by: Sam Li --- block/file-posix.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/block/file-posix.c b/block/file-posix.c index 46e22403fe..a050682e97 100644 --- a/block/file-posix.c +++ b/block/file-posix.c @@ -2525,7 +2525,8 @@ out: } } } else { - if (type & (QEMU_AIO_WRITE | QEMU_AIO_ZONE_APPEND)) { + if ((type & (QEMU_AIO_WRITE | QEMU_AIO_ZONE_APPEND)) && + bs->bl.zoned != BLK_Z_NONE) { update_zones_wp(bs, s->fd, 0, 1); } } From d31b50a15dd25a560749b25fc40b6484fd1a57b7 Mon Sep 17 00:00:00 2001 From: Hanna Czenczek Date: Thu, 24 Aug 2023 17:53:43 +0200 Subject: [PATCH 13/14] file-posix: Simplify raw_co_prw's 'out' zone code We duplicate the same condition three times here, pull it out to the top level. Signed-off-by: Hanna Czenczek Message-Id: <20230824155345.109765-5-hreitz@redhat.com> Reviewed-by: Sam Li --- block/file-posix.c | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/block/file-posix.c b/block/file-posix.c index a050682e97..aa89789737 100644 --- a/block/file-posix.c +++ b/block/file-posix.c @@ -2506,11 +2506,10 @@ static int coroutine_fn raw_co_prw(BlockDriverState *bs, uint64_t offset, out: #if defined(CONFIG_BLKZONED) -{ - BlockZoneWps *wps = bs->wps; - if (ret == 0) { - if ((type & (QEMU_AIO_WRITE | QEMU_AIO_ZONE_APPEND)) && - bs->bl.zoned != BLK_Z_NONE) { + if ((type & (QEMU_AIO_WRITE | QEMU_AIO_ZONE_APPEND)) && + bs->bl.zoned != BLK_Z_NONE) { + BlockZoneWps *wps = bs->wps; + if (ret == 0) { uint64_t *wp = &wps->wp[offset / bs->bl.zone_size]; if (!BDRV_ZT_IS_CONV(*wp)) { if (type & QEMU_AIO_ZONE_APPEND) { @@ -2523,19 +2522,12 @@ out: *wp = offset + bytes; } } - } - } else { - if ((type & (QEMU_AIO_WRITE | QEMU_AIO_ZONE_APPEND)) && - bs->bl.zoned != BLK_Z_NONE) { + } else { update_zones_wp(bs, s->fd, 0, 1); } - } - if ((type & (QEMU_AIO_WRITE | QEMU_AIO_ZONE_APPEND)) && - bs->blk.zoned != BLK_Z_NONE) { qemu_co_mutex_unlock(&wps->colock); } -} #endif return ret; } From 380448464dd89291cf7fd7434be6c225482a334d Mon Sep 17 00:00:00 2001 From: Hanna Czenczek Date: Thu, 24 Aug 2023 17:53:44 +0200 Subject: [PATCH 14/14] tests/file-io-error: New test This is a regression test for https://bugzilla.redhat.com/show_bug.cgi?id=2234374. All this test needs to do is trigger an I/O error inside of file-posix (specifically raw_co_prw()). One reliable way to do this without requiring special privileges is to use a FUSE export, which allows us to inject any error that we want, e.g. via blkdebug. Signed-off-by: Hanna Czenczek Message-Id: <20230824155345.109765-6-hreitz@redhat.com> [hreitz: Fixed test to be skipped when there is no FUSE support, to suppress fusermount's allow_other warning, and to be skipped with $IMGOPTSSYNTAX enabled] Signed-off-by: Hanna Czenczek --- tests/qemu-iotests/tests/file-io-error | 119 +++++++++++++++++++++ tests/qemu-iotests/tests/file-io-error.out | 33 ++++++ 2 files changed, 152 insertions(+) create mode 100755 tests/qemu-iotests/tests/file-io-error create mode 100644 tests/qemu-iotests/tests/file-io-error.out diff --git a/tests/qemu-iotests/tests/file-io-error b/tests/qemu-iotests/tests/file-io-error new file mode 100755 index 0000000000..88ee5f670c --- /dev/null +++ b/tests/qemu-iotests/tests/file-io-error @@ -0,0 +1,119 @@ +#!/usr/bin/env bash +# group: rw +# +# Produce an I/O error in file-posix, and hope that it is not catastrophic. +# Regression test for: https://bugzilla.redhat.com/show_bug.cgi?id=2234374 +# +# Copyright (C) 2023 Red Hat, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# + +seq=$(basename "$0") +echo "QA output created by $seq" + +status=1 # failure is the default! + +_cleanup() +{ + _cleanup_qemu + rm -f "$TEST_DIR/fuse-export" +} +trap "_cleanup; exit \$status" 0 1 2 3 15 + +# get standard environment, filters and checks +. ../common.rc +. ../common.filter +. ../common.qemu + +# Format-agnostic (we do not use any), but we do test the file protocol +_supported_proto file +_require_drivers blkdebug null-co + +if [ "$IMGOPTSSYNTAX" = "true" ]; then + # We need `$QEMU_IO -f file` to work; IMGOPTSSYNTAX uses --image-opts, + # breaking -f. + _unsupported_fmt $IMGFMT +fi + +# This is a regression test of a bug in which flie-posix would access zone +# information in case of an I/O error even when there is no zone information, +# resulting in a division by zero. +# To reproduce the problem, we need to trigger an I/O error inside of +# file-posix, which can be done (rootless) by providing a FUSE export that +# presents only errors when accessed. + +_launch_qemu +_send_qemu_cmd $QEMU_HANDLE \ + "{'execute': 'qmp_capabilities'}" \ + 'return' + +_send_qemu_cmd $QEMU_HANDLE \ + "{'execute': 'blockdev-add', + 'arguments': { + 'driver': 'blkdebug', + 'node-name': 'node0', + 'inject-error': [{'event': 'none'}], + 'image': { + 'driver': 'null-co' + } + }}" \ + 'return' + +# FUSE mountpoint must exist and be a regular file +touch "$TEST_DIR/fuse-export" + +# The grep -v to filter fusermount's (benign) error when /etc/fuse.conf does +# not contain user_allow_other and the subsequent check for missing FUSE support +# have both been taken from iotest 308. +output=$(_send_qemu_cmd $QEMU_HANDLE \ + "{'execute': 'block-export-add', + 'arguments': { + 'id': 'exp0', + 'type': 'fuse', + 'node-name': 'node0', + 'mountpoint': '$TEST_DIR/fuse-export', + 'writable': true + }}" \ + 'return' \ + | grep -v 'option allow_other only allowed if') + +if echo "$output" | grep -q "Parameter 'type' does not accept value 'fuse'"; then + _notrun 'No FUSE support' +fi +echo "$output" + +echo +# This should fail, but gracefully, i.e. just print an I/O error, not crash. +$QEMU_IO -f file -c 'write 0 64M' "$TEST_DIR/fuse-export" | _filter_qemu_io +echo + +_send_qemu_cmd $QEMU_HANDLE \ + "{'execute': 'block-export-del', + 'arguments': {'id': 'exp0'}}" \ + 'return' + +_send_qemu_cmd $QEMU_HANDLE \ + '' \ + 'BLOCK_EXPORT_DELETED' + +_send_qemu_cmd $QEMU_HANDLE \ + "{'execute': 'blockdev-del', + 'arguments': {'node-name': 'node0'}}" \ + 'return' + +# success, all done +echo "*** done" +rm -f $seq.full +status=0 diff --git a/tests/qemu-iotests/tests/file-io-error.out b/tests/qemu-iotests/tests/file-io-error.out new file mode 100644 index 0000000000..0f46455a94 --- /dev/null +++ b/tests/qemu-iotests/tests/file-io-error.out @@ -0,0 +1,33 @@ +QA output created by file-io-error +{'execute': 'qmp_capabilities'} +{"return": {}} +{'execute': 'blockdev-add', + 'arguments': { + 'driver': 'blkdebug', + 'node-name': 'node0', + 'inject-error': [{'event': 'none'}], + 'image': { + 'driver': 'null-co' + } + }} +{"return": {}} +{'execute': 'block-export-add', + 'arguments': { + 'id': 'exp0', + 'type': 'fuse', + 'node-name': 'node0', + 'mountpoint': 'TEST_DIR/fuse-export', + 'writable': true + }} +{"return": {}} + +write failed: Input/output error + +{'execute': 'block-export-del', + 'arguments': {'id': 'exp0'}} +{"return": {}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_EXPORT_DELETED", "data": {"id": "exp0"}} +{'execute': 'blockdev-del', + 'arguments': {'node-name': 'node0'}} +{"return": {}} +*** done