From cce10a1f0c827653c00e81b6d7865cf974275994 Mon Sep 17 00:00:00 2001 From: Hyman Huang Date: Wed, 1 Nov 2023 22:04:04 +0800 Subject: [PATCH 1/7] system/dirtylimit: Fix a race situation Fix a race situation for global variable dirtylimit_state. Also, replace usleep by g_usleep to increase platform accessibility to the sleep function. Signed-off-by: Hyman Huang Reviewed-by: Fabiano Rosas Reviewed-by: Juan Quintela Signed-off-by: Juan Quintela Message-ID: <27c86239e21eda03d11ce5a3d07da3c229f562e3.1698847223.git.yong.huang@smartx.com> --- system/dirtylimit.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/system/dirtylimit.c b/system/dirtylimit.c index fa959d7743..3666c4cb7c 100644 --- a/system/dirtylimit.c +++ b/system/dirtylimit.c @@ -411,12 +411,20 @@ void dirtylimit_set_all(uint64_t quota, void dirtylimit_vcpu_execute(CPUState *cpu) { - if (dirtylimit_in_service() && - dirtylimit_vcpu_get_state(cpu->cpu_index)->enabled && - cpu->throttle_us_per_full) { - trace_dirtylimit_vcpu_execute(cpu->cpu_index, - cpu->throttle_us_per_full); - usleep(cpu->throttle_us_per_full); + if (cpu->throttle_us_per_full) { + dirtylimit_state_lock(); + + if (dirtylimit_in_service() && + dirtylimit_vcpu_get_state(cpu->cpu_index)->enabled) { + dirtylimit_state_unlock(); + trace_dirtylimit_vcpu_execute(cpu->cpu_index, + cpu->throttle_us_per_full); + + g_usleep(cpu->throttle_us_per_full); + return; + } + + dirtylimit_state_unlock(); } } From 78a7ef15804c0c0a886fcf9f12e5464f8eddd20c Mon Sep 17 00:00:00 2001 From: Hyman Huang Date: Wed, 1 Nov 2023 22:04:05 +0800 Subject: [PATCH 2/7] system/dirtylimit: Drop the reduplicative check Checking if dirty limit is in service is done by the dirtylimit_query_all function, drop the reduplicative check in the qmp_query_vcpu_dirty_limit function. Signed-off-by: Hyman Huang Reviewed-by: Fabiano Rosas Reviewed-by: Juan Quintela Signed-off-by: Juan Quintela Message-ID: --- system/dirtylimit.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/system/dirtylimit.c b/system/dirtylimit.c index 3666c4cb7c..495c7a7082 100644 --- a/system/dirtylimit.c +++ b/system/dirtylimit.c @@ -652,10 +652,6 @@ static struct DirtyLimitInfoList *dirtylimit_query_all(void) struct DirtyLimitInfoList *qmp_query_vcpu_dirty_limit(Error **errp) { - if (!dirtylimit_in_service()) { - return NULL; - } - return dirtylimit_query_all(); } From 17257b90be4f51dd3040ca2e472201407b2327c4 Mon Sep 17 00:00:00 2001 From: Hyman Huang Date: Wed, 1 Nov 2023 22:04:06 +0800 Subject: [PATCH 3/7] tests: Add migration dirty-limit capability test Add migration dirty-limit capability test if kernel support dirty ring. Migration dirty-limit capability introduce dirty limit capability, two parameters: x-vcpu-dirty-limit-period and vcpu-dirty-limit are introduced to implement the live migration with dirty limit. The test case does the following things: 1. start src, dst vm and enable dirty-limit capability 2. start migrate and set cancel it to check if dirty limit stop working. 3. restart dst vm 4. start migrate and enable dirty-limit capability 5. check if migration satisfy the convergence condition during pre-switchover phase. Note that this test case involves many passes, so it runs in slow mode only. Signed-off-by: Hyman Huang Acked-by: Peter Xu Reviewed-by: Fabiano Rosas Reviewed-by: Juan Quintela Signed-off-by: Juan Quintela Message-ID: --- tests/qtest/migration-test.c | 164 +++++++++++++++++++++++++++++++++++ 1 file changed, 164 insertions(+) diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c index e803b46039..5752412b64 100644 --- a/tests/qtest/migration-test.c +++ b/tests/qtest/migration-test.c @@ -3091,6 +3091,166 @@ static void test_vcpu_dirty_limit(void) dirtylimit_stop_vm(vm); } +static void migrate_dirty_limit_wait_showup(QTestState *from, + const int64_t period, + const int64_t value) +{ + /* Enable dirty limit capability */ + migrate_set_capability(from, "dirty-limit", true); + + /* Set dirty limit parameters */ + migrate_set_parameter_int(from, "x-vcpu-dirty-limit-period", period); + migrate_set_parameter_int(from, "vcpu-dirty-limit", value); + + /* Make sure migrate can't converge */ + migrate_ensure_non_converge(from); + + /* To check limit rate after precopy */ + migrate_set_capability(from, "pause-before-switchover", true); + + /* Wait for the serial output from the source */ + wait_for_serial("src_serial"); +} + +/* + * This test does: + * source destination + * start vm + * start incoming vm + * migrate + * wait dirty limit to begin + * cancel migrate + * cancellation check + * restart incoming vm + * migrate + * wait dirty limit to begin + * wait pre-switchover event + * convergence condition check + * + * And see if dirty limit migration works correctly. + * This test case involves many passes, so it runs in slow mode only. + */ +static void test_migrate_dirty_limit(void) +{ + g_autofree char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs); + QTestState *from, *to; + int64_t remaining; + uint64_t throttle_us_per_full; + /* + * We want the test to be stable and as fast as possible. + * E.g., with 1Gb/s bandwith migration may pass without dirty limit, + * so we need to decrease a bandwidth. + */ + const int64_t dirtylimit_period = 1000, dirtylimit_value = 50; + const int64_t max_bandwidth = 400000000; /* ~400Mb/s */ + const int64_t downtime_limit = 250; /* 250ms */ + /* + * We migrate through unix-socket (> 500Mb/s). + * Thus, expected migration speed ~= bandwidth limit (< 500Mb/s). + * So, we can predict expected_threshold + */ + const int64_t expected_threshold = max_bandwidth * downtime_limit / 1000; + int max_try_count = 10; + MigrateCommon args = { + .start = { + .hide_stderr = true, + .use_dirty_ring = true, + }, + .listen_uri = uri, + .connect_uri = uri, + }; + + /* Start src, dst vm */ + if (test_migrate_start(&from, &to, args.listen_uri, &args.start)) { + return; + } + + /* Prepare for dirty limit migration and wait src vm show up */ + migrate_dirty_limit_wait_showup(from, dirtylimit_period, dirtylimit_value); + + /* Start migrate */ + migrate_qmp(from, uri, "{}"); + + /* Wait for dirty limit throttle begin */ + throttle_us_per_full = 0; + while (throttle_us_per_full == 0) { + throttle_us_per_full = + read_migrate_property_int(from, "dirty-limit-throttle-time-per-round"); + usleep(100); + g_assert_false(got_src_stop); + } + + /* Now cancel migrate and wait for dirty limit throttle switch off */ + migrate_cancel(from); + wait_for_migration_status(from, "cancelled", NULL); + + /* Check if dirty limit throttle switched off, set timeout 1ms */ + do { + throttle_us_per_full = + read_migrate_property_int(from, "dirty-limit-throttle-time-per-round"); + usleep(100); + g_assert_false(got_src_stop); + } while (throttle_us_per_full != 0 && --max_try_count); + + /* Assert dirty limit is not in service */ + g_assert_cmpint(throttle_us_per_full, ==, 0); + + args = (MigrateCommon) { + .start = { + .only_target = true, + .use_dirty_ring = true, + }, + .listen_uri = uri, + .connect_uri = uri, + }; + + /* Restart dst vm, src vm already show up so we needn't wait anymore */ + if (test_migrate_start(&from, &to, args.listen_uri, &args.start)) { + return; + } + + /* Start migrate */ + migrate_qmp(from, uri, "{}"); + + /* Wait for dirty limit throttle begin */ + throttle_us_per_full = 0; + while (throttle_us_per_full == 0) { + throttle_us_per_full = + read_migrate_property_int(from, "dirty-limit-throttle-time-per-round"); + usleep(100); + g_assert_false(got_src_stop); + } + + /* + * The dirty limit rate should equals the return value of + * query-vcpu-dirty-limit if dirty limit cap set + */ + g_assert_cmpint(dirtylimit_value, ==, get_limit_rate(from)); + + /* Now, we have tested if dirty limit works, let it converge */ + migrate_set_parameter_int(from, "downtime-limit", downtime_limit); + migrate_set_parameter_int(from, "max-bandwidth", max_bandwidth); + + /* + * Wait for pre-switchover status to check if migration + * satisfy the convergence condition + */ + wait_for_migration_status(from, "pre-switchover", NULL); + + remaining = read_ram_property_int(from, "remaining"); + g_assert_cmpint(remaining, <, + (expected_threshold + expected_threshold / 100)); + + migrate_continue(from, "pre-switchover"); + + qtest_qmp_eventwait(to, "RESUME"); + + wait_for_serial("dest_serial"); + wait_for_migration_complete(from); + + test_migrate_end(from, to, true); +} + static bool kvm_dirty_ring_supported(void) { #if defined(__linux__) && defined(HOST_X86_64) @@ -3301,6 +3461,10 @@ int main(int argc, char **argv) */ if (g_test_slow()) { qtest_add_func("/migration/auto_converge", test_migrate_auto_converge); + if (g_str_equal(arch, "x86_64") && + has_kvm && kvm_dirty_ring_supported()) { + qtest_add_func("/migration/dirty_limit", test_migrate_dirty_limit); + } } qtest_add_func("/migration/multifd/tcp/plain/none", test_multifd_tcp_none); From 4cc563d460b6e8a29d18b27381dab8ca0621f2da Mon Sep 17 00:00:00 2001 From: Hyman Huang Date: Wed, 1 Nov 2023 22:04:07 +0800 Subject: [PATCH 4/7] tests/migration: Introduce dirty-ring-size option into guestperf Dirty ring size configuration is not supported by guestperf tool. Introduce dirty-ring-size (ranges in [1024, 65536]) option so developers can play with dirty-ring and dirty-limit feature easier. To set dirty ring size with 4096 during migration test: $ ./tests/migration/guestperf.py --dirty-ring-size 4096 xxx Signed-off-by: Hyman Huang Reviewed-by: Fabiano Rosas Signed-off-by: Juan Quintela Message-ID: <8a388cec5c1f73a34d42515bbc43837e97ee3839.1698847223.git.yong.huang@smartx.com> --- tests/migration/guestperf/engine.py | 6 +++++- tests/migration/guestperf/hardware.py | 8 ++++++-- tests/migration/guestperf/shell.py | 6 +++++- 3 files changed, 16 insertions(+), 4 deletions(-) diff --git a/tests/migration/guestperf/engine.py b/tests/migration/guestperf/engine.py index da96ca034a..aabf6de4d9 100644 --- a/tests/migration/guestperf/engine.py +++ b/tests/migration/guestperf/engine.py @@ -325,7 +325,6 @@ class Engine(object): cmdline = "'" + cmdline + "'" argv = [ - "-accel", "kvm", "-cpu", "host", "-kernel", self._kernel, "-initrd", self._initrd, @@ -333,6 +332,11 @@ class Engine(object): "-m", str((hardware._mem * 1024) + 512), "-smp", str(hardware._cpus), ] + if hardware._dirty_ring_size: + argv.extend(["-accel", "kvm,dirty-ring-size=%s" % + hardware._dirty_ring_size]) + else: + argv.extend(["-accel", "kvm"]) argv.extend(self._get_qemu_serial_args()) diff --git a/tests/migration/guestperf/hardware.py b/tests/migration/guestperf/hardware.py index 3145785ffd..f779cc050b 100644 --- a/tests/migration/guestperf/hardware.py +++ b/tests/migration/guestperf/hardware.py @@ -23,7 +23,8 @@ class Hardware(object): src_cpu_bind=None, src_mem_bind=None, dst_cpu_bind=None, dst_mem_bind=None, prealloc_pages = False, - huge_pages=False, locked_pages=False): + huge_pages=False, locked_pages=False, + dirty_ring_size=0): self._cpus = cpus self._mem = mem # GiB self._src_mem_bind = src_mem_bind # List of NUMA nodes @@ -33,6 +34,7 @@ class Hardware(object): self._prealloc_pages = prealloc_pages self._huge_pages = huge_pages self._locked_pages = locked_pages + self._dirty_ring_size = dirty_ring_size def serialize(self): @@ -46,6 +48,7 @@ class Hardware(object): "prealloc_pages": self._prealloc_pages, "huge_pages": self._huge_pages, "locked_pages": self._locked_pages, + "dirty_ring_size": self._dirty_ring_size, } @classmethod @@ -59,4 +62,5 @@ class Hardware(object): data["dst_mem_bind"], data["prealloc_pages"], data["huge_pages"], - data["locked_pages"]) + data["locked_pages"], + data["dirty_ring_size"]) diff --git a/tests/migration/guestperf/shell.py b/tests/migration/guestperf/shell.py index 8a809e3dda..7d6b8cd7cf 100644 --- a/tests/migration/guestperf/shell.py +++ b/tests/migration/guestperf/shell.py @@ -60,6 +60,8 @@ class BaseShell(object): parser.add_argument("--prealloc-pages", dest="prealloc_pages", default=False) parser.add_argument("--huge-pages", dest="huge_pages", default=False) parser.add_argument("--locked-pages", dest="locked_pages", default=False) + parser.add_argument("--dirty-ring-size", dest="dirty_ring_size", + default=0, type=int) self._parser = parser @@ -89,7 +91,9 @@ class BaseShell(object): locked_pages=args.locked_pages, huge_pages=args.huge_pages, - prealloc_pages=args.prealloc_pages) + prealloc_pages=args.prealloc_pages, + + dirty_ring_size=args.dirty_ring_size) class Shell(BaseShell): From 22b7cb2c79d2df0946ec1cf88dfc1c6973e6008d Mon Sep 17 00:00:00 2001 From: Hyman Huang Date: Wed, 1 Nov 2023 22:04:08 +0800 Subject: [PATCH 5/7] tests/migration: Introduce dirty-limit into guestperf Currently, guestperf does not cover the dirty-limit migration, support this feature. Note that dirty-limit requires 'dirty-ring-size' set. To enable dirty-limit, setting x-vcpu-dirty-limit-period as 500ms and x-vcpu-dirty-limit as 10MB/s: $ ./tests/migration/guestperf.py \ --dirty-ring-size 4096 \ --dirty-limit --x-vcpu-dirty-limit-period 500 \ --vcpu-dirty-limit 10 --output output.json \ To run the entire standardized set of dirty-limit-enabled comparisons, with unix migration: $ ./tests/migration/guestperf-batch.py \ --dirty-ring-size 4096 \ --dst-host localhost --transport unix \ --filter compr-dirty-limit* --output outputdir Signed-off-by: Hyman Huang Reviewed-by: Fabiano Rosas Message-Id: <516e7a55dfc6e33d33510be37eb24223de5dc072.1697815117.git.yong.huang@smartx.com> Message-ID: Signed-off-by: Juan Quintela --- tests/migration/guestperf/comparison.py | 23 +++++++++++++++++++++++ tests/migration/guestperf/engine.py | 17 +++++++++++++++++ tests/migration/guestperf/progress.py | 16 ++++++++++++++-- tests/migration/guestperf/scenario.py | 11 ++++++++++- tests/migration/guestperf/shell.py | 18 +++++++++++++++++- 5 files changed, 81 insertions(+), 4 deletions(-) diff --git a/tests/migration/guestperf/comparison.py b/tests/migration/guestperf/comparison.py index c03b3f6d7e..42cc0372d1 100644 --- a/tests/migration/guestperf/comparison.py +++ b/tests/migration/guestperf/comparison.py @@ -135,4 +135,27 @@ COMPARISONS = [ Scenario("compr-multifd-channels-64", multifd=True, multifd_channels=64), ]), + + # Looking at effect of dirty-limit with + # varying x_vcpu_dirty_limit_period + Comparison("compr-dirty-limit-period", scenarios = [ + Scenario("compr-dirty-limit-period-500", + dirty_limit=True, x_vcpu_dirty_limit_period=500), + Scenario("compr-dirty-limit-period-800", + dirty_limit=True, x_vcpu_dirty_limit_period=800), + Scenario("compr-dirty-limit-period-1000", + dirty_limit=True, x_vcpu_dirty_limit_period=1000), + ]), + + + # Looking at effect of dirty-limit with + # varying vcpu_dirty_limit + Comparison("compr-dirty-limit", scenarios = [ + Scenario("compr-dirty-limit-10MB", + dirty_limit=True, vcpu_dirty_limit=10), + Scenario("compr-dirty-limit-20MB", + dirty_limit=True, vcpu_dirty_limit=20), + Scenario("compr-dirty-limit-50MB", + dirty_limit=True, vcpu_dirty_limit=50), + ]), ] diff --git a/tests/migration/guestperf/engine.py b/tests/migration/guestperf/engine.py index aabf6de4d9..608d7270f6 100644 --- a/tests/migration/guestperf/engine.py +++ b/tests/migration/guestperf/engine.py @@ -102,6 +102,8 @@ class Engine(object): info.get("expected-downtime", 0), info.get("setup-time", 0), info.get("cpu-throttle-percentage", 0), + info.get("dirty-limit-throttle-time-per-round", 0), + info.get("dirty-limit-ring-full-time", 0), ) def _migrate(self, hardware, scenario, src, dst, connect_uri): @@ -203,6 +205,21 @@ class Engine(object): resp = dst.cmd("migrate-set-parameters", multifd_channels=scenario._multifd_channels) + if scenario._dirty_limit: + if not hardware._dirty_ring_size: + raise Exception("dirty ring size must be configured when " + "testing dirty limit migration") + + resp = src.cmd("migrate-set-capabilities", + capabilities = [ + { "capability": "dirty-limit", + "state": True } + ]) + resp = src.cmd("migrate-set-parameters", + x_vcpu_dirty_limit_period=scenario._x_vcpu_dirty_limit_period) + resp = src.cmd("migrate-set-parameters", + vcpu_dirty_limit=scenario._vcpu_dirty_limit) + resp = src.cmd("migrate", uri=connect_uri) post_copy = False diff --git a/tests/migration/guestperf/progress.py b/tests/migration/guestperf/progress.py index ab1ee57273..d490584217 100644 --- a/tests/migration/guestperf/progress.py +++ b/tests/migration/guestperf/progress.py @@ -81,7 +81,9 @@ class Progress(object): downtime, downtime_expected, setup_time, - throttle_pcent): + throttle_pcent, + dirty_limit_throttle_time_per_round, + dirty_limit_ring_full_time): self._status = status self._ram = ram @@ -91,6 +93,10 @@ class Progress(object): self._downtime_expected = downtime_expected self._setup_time = setup_time self._throttle_pcent = throttle_pcent + self._dirty_limit_throttle_time_per_round = \ + dirty_limit_throttle_time_per_round + self._dirty_limit_ring_full_time = \ + dirty_limit_ring_full_time def serialize(self): return { @@ -102,6 +108,10 @@ class Progress(object): "downtime_expected": self._downtime_expected, "setup_time": self._setup_time, "throttle_pcent": self._throttle_pcent, + "dirty_limit_throttle_time_per_round": + self._dirty_limit_throttle_time_per_round, + "dirty_limit_ring_full_time": + self._dirty_limit_ring_full_time, } @classmethod @@ -114,4 +124,6 @@ class Progress(object): data["downtime"], data["downtime_expected"], data["setup_time"], - data["throttle_pcent"]) + data["throttle_pcent"], + data["dirty_limit_throttle_time_per_round"], + data["dirty_limit_ring_full_time"]) diff --git a/tests/migration/guestperf/scenario.py b/tests/migration/guestperf/scenario.py index de70d9b2f5..154c4f5d5f 100644 --- a/tests/migration/guestperf/scenario.py +++ b/tests/migration/guestperf/scenario.py @@ -30,7 +30,9 @@ class Scenario(object): auto_converge=False, auto_converge_step=10, compression_mt=False, compression_mt_threads=1, compression_xbzrle=False, compression_xbzrle_cache=10, - multifd=False, multifd_channels=2): + multifd=False, multifd_channels=2, + dirty_limit=False, x_vcpu_dirty_limit_period=500, + vcpu_dirty_limit=1): self._name = name @@ -60,6 +62,10 @@ class Scenario(object): self._multifd = multifd self._multifd_channels = multifd_channels + self._dirty_limit = dirty_limit + self._x_vcpu_dirty_limit_period = x_vcpu_dirty_limit_period + self._vcpu_dirty_limit = vcpu_dirty_limit + def serialize(self): return { "name": self._name, @@ -79,6 +85,9 @@ class Scenario(object): "compression_xbzrle_cache": self._compression_xbzrle_cache, "multifd": self._multifd, "multifd_channels": self._multifd_channels, + "dirty_limit": self._dirty_limit, + "x_vcpu_dirty_limit_period": self._x_vcpu_dirty_limit_period, + "vcpu_dirty_limit": self._vcpu_dirty_limit, } @classmethod diff --git a/tests/migration/guestperf/shell.py b/tests/migration/guestperf/shell.py index 7d6b8cd7cf..c85d89efec 100644 --- a/tests/migration/guestperf/shell.py +++ b/tests/migration/guestperf/shell.py @@ -131,6 +131,17 @@ class Shell(BaseShell): parser.add_argument("--multifd-channels", dest="multifd_channels", default=2, type=int) + parser.add_argument("--dirty-limit", dest="dirty_limit", default=False, + action="store_true") + + parser.add_argument("--x-vcpu-dirty-limit-period", + dest="x_vcpu_dirty_limit_period", + default=500, type=int) + + parser.add_argument("--vcpu-dirty-limit", + dest="vcpu_dirty_limit", + default=1, type=int) + def get_scenario(self, args): return Scenario(name="perfreport", downtime=args.downtime, @@ -154,7 +165,12 @@ class Shell(BaseShell): compression_xbzrle_cache=args.compression_xbzrle_cache, multifd=args.multifd, - multifd_channels=args.multifd_channels) + multifd_channels=args.multifd_channels, + + dirty_limit=args.dirty_limit, + x_vcpu_dirty_limit_period=\ + args.x_vcpu_dirty_limit_period, + vcpu_dirty_limit=args.vcpu_dirty_limit) def run(self, argv): args = self._parser.parse_args(argv) From ceddc48278a006e3f13b8a1881676cfb3a1ca99a Mon Sep 17 00:00:00 2001 From: Hyman Huang Date: Wed, 1 Nov 2023 22:04:09 +0800 Subject: [PATCH 6/7] docs/migration: Add the dirty limit section The dirty limit feature has been introduced since the 8.1 QEMU release but has not reflected in the document, add a section for that. Signed-off-by: Hyman Huang Reviewed-by: Fabiano Rosas Reviewed-by: Juan Quintela Signed-off-by: Juan Quintela Message-ID: <0f2b2c63fec22ea23e4926cdeb567b7a0ebd8152.1698847223.git.yong.huang@smartx.com> --- docs/devel/migration.rst | 71 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) diff --git a/docs/devel/migration.rst b/docs/devel/migration.rst index 240eb16d90..5adf4f12f7 100644 --- a/docs/devel/migration.rst +++ b/docs/devel/migration.rst @@ -594,6 +594,77 @@ path. Return path - opened by main thread, written by main thread AND postcopy thread (protected by rp_mutex) +Dirty limit +===================== +The dirty limit, short for dirty page rate upper limit, is a new capability +introduced in the 8.1 QEMU release that uses a new algorithm based on the KVM +dirty ring to throttle down the guest during live migration. + +The algorithm framework is as follows: + +:: + + ------------------------------------------------------------------------------ + main --------------> throttle thread ------------> PREPARE(1) <-------- + thread \ | | + \ | | + \ V | + -\ CALCULATE(2) | + \ | | + \ | | + \ V | + \ SET PENALTY(3) ----- + -\ | + \ | + \ V + -> virtual CPU thread -------> ACCEPT PENALTY(4) + ------------------------------------------------------------------------------ + +When the qmp command qmp_set_vcpu_dirty_limit is called for the first time, +the QEMU main thread starts the throttle thread. The throttle thread, once +launched, executes the loop, which consists of three steps: + + - PREPARE (1) + + The entire work of PREPARE (1) is preparation for the second stage, + CALCULATE(2), as the name implies. It involves preparing the dirty + page rate value and the corresponding upper limit of the VM: + The dirty page rate is calculated via the KVM dirty ring mechanism, + which tells QEMU how many dirty pages a virtual CPU has had since the + last KVM_EXIT_DIRTY_RING_FULL exception; The dirty page rate upper + limit is specified by caller, therefore fetch it directly. + + - CALCULATE (2) + + Calculate a suitable sleep period for each virtual CPU, which will be + used to determine the penalty for the target virtual CPU. The + computation must be done carefully in order to reduce the dirty page + rate progressively down to the upper limit without oscillation. To + achieve this, two strategies are provided: the first is to add or + subtract sleep time based on the ratio of the current dirty page rate + to the limit, which is used when the current dirty page rate is far + from the limit; the second is to add or subtract a fixed time when + the current dirty page rate is close to the limit. + + - SET PENALTY (3) + + Set the sleep time for each virtual CPU that should be penalized based + on the results of the calculation supplied by step CALCULATE (2). + +After completing the three above stages, the throttle thread loops back +to step PREPARE (1) until the dirty limit is reached. + +On the other hand, each virtual CPU thread reads the sleep duration and +sleeps in the path of the KVM_EXIT_DIRTY_RING_FULL exception handler, that +is ACCEPT PENALTY (4). Virtual CPUs tied with writing processes will +obviously exit to the path and get penalized, whereas virtual CPUs involved +with read processes will not. + +In summary, thanks to the KVM dirty ring technology, the dirty limit +algorithm will restrict virtual CPUs as needed to keep their dirty page +rate inside the limit. This leads to more steady reading performance during +live migration and can aid in improving large guest responsiveness. + Postcopy ======== From 0983125b405c479a6e7eb49b81cfdae969e28cee Mon Sep 17 00:00:00 2001 From: Juan Quintela Date: Fri, 3 Nov 2023 08:42:45 +0100 Subject: [PATCH 7/7] migration: Unlock mutex in error case MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We were not unlocking bitmap mutex on the error case. To fix it forever change to enclose the code with WITH_QEMU_LOCK_GUARD(). Coverity CID 1523750. Fixes: a2326705e5 ("migration: Stop migration immediately in RDMA error paths") Reviewed-by: Alex Bennée Signed-off-by: Juan Quintela Message-ID: <20231103074245.55166-1-quintela@redhat.com> --- migration/ram.c | 110 ++++++++++++++++++++++++------------------------ 1 file changed, 55 insertions(+), 55 deletions(-) diff --git a/migration/ram.c b/migration/ram.c index a0f3b86663..8c7886ab79 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -3030,71 +3030,71 @@ static int ram_save_iterate(QEMUFile *f, void *opaque) * MAX_WAIT (if curious, further see commit 4508bd9ed8053ce) below, which * guarantees that we'll at least released it in a regular basis. */ - qemu_mutex_lock(&rs->bitmap_mutex); - WITH_RCU_READ_LOCK_GUARD() { - if (ram_list.version != rs->last_version) { - ram_state_reset(rs); - } - - /* Read version before ram_list.blocks */ - smp_rmb(); - - ret = rdma_registration_start(f, RAM_CONTROL_ROUND); - if (ret < 0) { - qemu_file_set_error(f, ret); - goto out; - } - - t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); - i = 0; - while ((ret = migration_rate_exceeded(f)) == 0 || - postcopy_has_request(rs)) { - int pages; - - if (qemu_file_get_error(f)) { - break; + WITH_QEMU_LOCK_GUARD(&rs->bitmap_mutex) { + WITH_RCU_READ_LOCK_GUARD() { + if (ram_list.version != rs->last_version) { + ram_state_reset(rs); } - pages = ram_find_and_save_block(rs); - /* no more pages to sent */ - if (pages == 0) { - done = 1; - break; + /* Read version before ram_list.blocks */ + smp_rmb(); + + ret = rdma_registration_start(f, RAM_CONTROL_ROUND); + if (ret < 0) { + qemu_file_set_error(f, ret); + goto out; } - if (pages < 0) { - qemu_file_set_error(f, pages); - break; - } + t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); + i = 0; + while ((ret = migration_rate_exceeded(f)) == 0 || + postcopy_has_request(rs)) { + int pages; - rs->target_page_count += pages; - - /* - * During postcopy, it is necessary to make sure one whole host - * page is sent in one chunk. - */ - if (migrate_postcopy_ram()) { - compress_flush_data(); - } - - /* - * we want to check in the 1st loop, just in case it was the 1st - * time and we had to sync the dirty bitmap. - * qemu_clock_get_ns() is a bit expensive, so we only check each - * some iterations - */ - if ((i & 63) == 0) { - uint64_t t1 = (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - t0) / - 1000000; - if (t1 > MAX_WAIT) { - trace_ram_save_iterate_big_wait(t1, i); + if (qemu_file_get_error(f)) { break; } + + pages = ram_find_and_save_block(rs); + /* no more pages to sent */ + if (pages == 0) { + done = 1; + break; + } + + if (pages < 0) { + qemu_file_set_error(f, pages); + break; + } + + rs->target_page_count += pages; + + /* + * During postcopy, it is necessary to make sure one whole host + * page is sent in one chunk. + */ + if (migrate_postcopy_ram()) { + compress_flush_data(); + } + + /* + * we want to check in the 1st loop, just in case it was the 1st + * time and we had to sync the dirty bitmap. + * qemu_clock_get_ns() is a bit expensive, so we only check each + * some iterations + */ + if ((i & 63) == 0) { + uint64_t t1 = (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - t0) / + 1000000; + if (t1 > MAX_WAIT) { + trace_ram_save_iterate_big_wait(t1, i); + break; + } + } + i++; } - i++; } } - qemu_mutex_unlock(&rs->bitmap_mutex); /* * Must occur before EOS (or any QEMUFile operation)