From 242b74eb69d0e53b25e294331a192b7a458b8e46 Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Wed, 17 May 2023 15:37:48 +0300 Subject: [PATCH 1/6] runstate: add runstate_get() It's necessary to restore the state after failed/cancelled migration in further commit. Signed-off-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Juan Quintela Message-Id: <20230517123752.21615-2-vsementsov@yandex-team.ru> Signed-off-by: Juan Quintela --- include/sysemu/runstate.h | 1 + softmmu/runstate.c | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/include/sysemu/runstate.h b/include/sysemu/runstate.h index f3ed52548e..85f5d9a419 100644 --- a/include/sysemu/runstate.h +++ b/include/sysemu/runstate.h @@ -6,6 +6,7 @@ bool runstate_check(RunState state); void runstate_set(RunState new_state); +RunState runstate_get(void); bool runstate_is_running(void); bool runstate_needs_reset(void); bool runstate_store(char *str, size_t size); diff --git a/softmmu/runstate.c b/softmmu/runstate.c index 2f2396c819..1e6f0bcecc 100644 --- a/softmmu/runstate.c +++ b/softmmu/runstate.c @@ -221,6 +221,11 @@ void runstate_set(RunState new_state) current_run_state = new_state; } +RunState runstate_get(void) +{ + return current_run_state; +} + bool runstate_is_running(void) { return runstate_check(RUN_STATE_RUNNING); From c33f1829f891058442e9670325decff0c8a2e28c Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Wed, 17 May 2023 15:37:49 +0300 Subject: [PATCH 2/6] migration: never fail in global_state_store() Actually global_state_store() can never fail. Let's get rid of extra error paths. To make things clear, use new runstate_get() and use same approach for global_state_store() and global_state_store_running(). Signed-off-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Juan Quintela Message-Id: <20230517123752.21615-3-vsementsov@yandex-team.ru> Signed-off-by: Juan Quintela --- include/migration/global_state.h | 2 +- migration/global_state.c | 23 +++++++++--------- migration/migration.c | 41 +++++++++++++++----------------- migration/savevm.c | 6 +---- 4 files changed, 32 insertions(+), 40 deletions(-) diff --git a/include/migration/global_state.h b/include/migration/global_state.h index 945eb35d5b..d7c2cd3216 100644 --- a/include/migration/global_state.h +++ b/include/migration/global_state.h @@ -16,7 +16,7 @@ #include "qapi/qapi-types-run-state.h" void register_global_state(void); -int global_state_store(void); +void global_state_store(void); void global_state_store_running(void); bool global_state_received(void); RunState global_state_get_runstate(void); diff --git a/migration/global_state.c b/migration/global_state.c index a33947ca32..4e2a9d8ec0 100644 --- a/migration/global_state.c +++ b/migration/global_state.c @@ -29,23 +29,22 @@ typedef struct { static GlobalState global_state; -int global_state_store(void) +static void global_state_do_store(RunState state) { - if (!runstate_store((char *)global_state.runstate, - sizeof(global_state.runstate))) { - error_report("runstate name too big: %s", global_state.runstate); - trace_migrate_state_too_big(); - return -EINVAL; - } - return 0; + const char *state_str = RunState_str(state); + assert(strlen(state_str) < sizeof(global_state.runstate)); + strpadcpy((char *)global_state.runstate, sizeof(global_state.runstate), + state_str, '\0'); +} + +void global_state_store(void) +{ + global_state_do_store(runstate_get()); } void global_state_store_running(void) { - const char *state = RunState_str(RUN_STATE_RUNNING); - assert(strlen(state) < sizeof(global_state.runstate)); - strpadcpy((char *)global_state.runstate, sizeof(global_state.runstate), - state, '\0'); + global_state_do_store(RUN_STATE_RUNNING); } bool global_state_received(void) diff --git a/migration/migration.c b/migration/migration.c index 5de7f734b9..c75d5aa479 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -2288,27 +2288,26 @@ static void migration_completion(MigrationState *s) s->downtime_start = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, NULL); s->vm_was_running = runstate_is_running(); - ret = global_state_store(); + global_state_store(); - if (!ret) { - ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE); - trace_migration_completion_vm_stop(ret); - if (ret >= 0) { - ret = migration_maybe_pause(s, ¤t_active_state, - MIGRATION_STATUS_DEVICE); - } - if (ret >= 0) { - /* - * Inactivate disks except in COLO, and track that we - * have done so in order to remember to reactivate - * them if migration fails or is cancelled. - */ - s->block_inactive = !migrate_colo(); - migration_rate_set(RATE_LIMIT_DISABLED); - ret = qemu_savevm_state_complete_precopy(s->to_dst_file, false, - s->block_inactive); - } + ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE); + trace_migration_completion_vm_stop(ret); + if (ret >= 0) { + ret = migration_maybe_pause(s, ¤t_active_state, + MIGRATION_STATUS_DEVICE); } + if (ret >= 0) { + /* + * Inactivate disks except in COLO, and track that we + * have done so in order to remember to reactivate + * them if migration fails or is cancelled. + */ + s->block_inactive = !migrate_colo(); + migration_rate_set(RATE_LIMIT_DISABLED); + ret = qemu_savevm_state_complete_precopy(s->to_dst_file, false, + s->block_inactive); + } + qemu_mutex_unlock_iothread(); if (ret < 0) { @@ -3088,9 +3087,7 @@ static void *bg_migration_thread(void *opaque) qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, NULL); s->vm_was_running = runstate_is_running(); - if (global_state_store()) { - goto fail; - } + global_state_store(); /* Forcibly stop VM before saving state of vCPUs and devices */ if (vm_stop_force_state(RUN_STATE_PAUSED)) { goto fail; diff --git a/migration/savevm.c b/migration/savevm.c index 03795ce8dc..bc284087f9 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -2919,11 +2919,7 @@ bool save_snapshot(const char *name, bool overwrite, const char *vmstate, saved_vm_running = runstate_is_running(); - ret = global_state_store(); - if (ret) { - error_setg(errp, "Error saving global state"); - return false; - } + global_state_store(); vm_stop(RUN_STATE_SAVE_VM); bdrv_drain_all_begin(); From e76005a081d08d1e42d98811fba983c59b3f736b Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Wed, 17 May 2023 15:37:50 +0300 Subject: [PATCH 3/6] runstate: drop unused runstate_store() The function is unused since previous commit. Drop it. Signed-off-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Juan Quintela Message-Id: <20230517123752.21615-4-vsementsov@yandex-team.ru> Signed-off-by: Juan Quintela --- include/sysemu/runstate.h | 1 - softmmu/runstate.c | 12 ------------ 2 files changed, 13 deletions(-) diff --git a/include/sysemu/runstate.h b/include/sysemu/runstate.h index 85f5d9a419..7beb29c2e2 100644 --- a/include/sysemu/runstate.h +++ b/include/sysemu/runstate.h @@ -9,7 +9,6 @@ void runstate_set(RunState new_state); RunState runstate_get(void); bool runstate_is_running(void); bool runstate_needs_reset(void); -bool runstate_store(char *str, size_t size); typedef void VMChangeStateHandler(void *opaque, bool running, RunState state); diff --git a/softmmu/runstate.c b/softmmu/runstate.c index 1e6f0bcecc..0370230a5e 100644 --- a/softmmu/runstate.c +++ b/softmmu/runstate.c @@ -175,18 +175,6 @@ bool runstate_check(RunState state) return current_run_state == state; } -bool runstate_store(char *str, size_t size) -{ - const char *state = RunState_str(current_run_state); - size_t len = strlen(state) + 1; - - if (len > size) { - return false; - } - memcpy(str, state, len); - return true; -} - static void runstate_init(void) { const RunStateTransition *p; From f4584076fc318bb4ac762e3c09ff3544938fed5b Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Wed, 17 May 2023 15:37:51 +0300 Subject: [PATCH 4/6] migration: switch from .vm_was_running to .vm_old_state No logic change here, only refactoring. That's a preparation for next commit where we finally restore the stopped vm state on migration failure or cancellation. Signed-off-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Juan Quintela Message-Id: <20230517123752.21615-5-vsementsov@yandex-team.ru> Signed-off-by: Juan Quintela --- migration/migration.c | 11 ++++++----- migration/migration.h | 9 ++++++--- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/migration/migration.c b/migration/migration.c index c75d5aa479..033162cda0 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -1402,7 +1402,7 @@ void migrate_init(MigrationState *s) s->start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); s->total_time = 0; - s->vm_was_running = false; + s->vm_old_state = -1; s->iteration_initial_bytes = 0; s->threshold_size = 0; } @@ -2287,7 +2287,8 @@ static void migration_completion(MigrationState *s) qemu_mutex_lock_iothread(); s->downtime_start = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, NULL); - s->vm_was_running = runstate_is_running(); + + s->vm_old_state = runstate_get(); global_state_store(); ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE); @@ -2760,12 +2761,12 @@ static void migration_iteration_finish(MigrationState *s) case MIGRATION_STATUS_COLO: assert(migrate_colo()); migrate_start_colo_process(s); - s->vm_was_running = true; + s->vm_old_state = RUN_STATE_RUNNING; /* Fallthrough */ case MIGRATION_STATUS_FAILED: case MIGRATION_STATUS_CANCELLED: case MIGRATION_STATUS_CANCELLING: - if (s->vm_was_running) { + if (s->vm_old_state == RUN_STATE_RUNNING) { if (!runstate_check(RUN_STATE_SHUTDOWN)) { vm_start(); } @@ -3085,7 +3086,7 @@ static void *bg_migration_thread(void *opaque) * transition in vm_stop_force_state() we need to wakeup it up. */ qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, NULL); - s->vm_was_running = runstate_is_running(); + s->vm_old_state = runstate_get(); global_state_store(); /* Forcibly stop VM before saving state of vCPUs and devices */ diff --git a/migration/migration.h b/migration/migration.h index 48a46123a0..30c3e97635 100644 --- a/migration/migration.h +++ b/migration/migration.h @@ -25,6 +25,7 @@ #include "net/announce.h" #include "qom/object.h" #include "postcopy-ram.h" +#include "sysemu/runstate.h" struct PostcopyBlocktimeContext; @@ -317,12 +318,14 @@ struct MigrationState { int64_t expected_downtime; bool capabilities[MIGRATION_CAPABILITY__MAX]; int64_t setup_time; + /* - * Whether guest was running when we enter the completion stage. + * State before stopping the vm by vm_stop_force_state(). * If migration is interrupted by any reason, we need to continue - * running the guest on source. + * running the guest on source if it was running or restore its stopped + * state. */ - bool vm_was_running; + RunState vm_old_state; /* Flag set once the migration has been asked to enter postcopy */ bool start_postcopy; From a4c6275aa11f153a6a7e614dd867c62b904f8838 Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Wed, 17 May 2023 15:37:52 +0300 Subject: [PATCH 5/6] migration: restore vmstate on migration failure 1. Otherwise failed migration just drops guest-panicked state, which is not good for management software. 2. We do keep different paused states like guest-panicked during migration with help of global_state state. 3. We do restore running state on source when migration is cancelled or failed. 4. "postmigrate" state is documented as "guest is paused following a successful 'migrate'", so originally it's only for successful path and we never documented current behavior. Let's restore paused states like guest-panicked in case of cancel or fail too. Allow same transitions like for inmigrate state. This commit changes the behavior that was introduced by commit 42da5550d6 "migration: set state to post-migrate on failure" and provides a bit different fix on related https://bugzilla.redhat.com/show_bug.cgi?id=1355683 Signed-off-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Juan Quintela Message-Id: <20230517123752.21615-6-vsementsov@yandex-team.ru> Signed-off-by: Juan Quintela --- migration/migration.c | 2 +- softmmu/runstate.c | 8 +++++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/migration/migration.c b/migration/migration.c index 033162cda0..7c3425c6fe 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -2772,7 +2772,7 @@ static void migration_iteration_finish(MigrationState *s) } } else { if (runstate_check(RUN_STATE_FINISH_MIGRATE)) { - runstate_set(RUN_STATE_POSTMIGRATE); + runstate_set(s->vm_old_state); } } break; diff --git a/softmmu/runstate.c b/softmmu/runstate.c index 0370230a5e..1957caf73f 100644 --- a/softmmu/runstate.c +++ b/softmmu/runstate.c @@ -121,7 +121,13 @@ static const RunStateTransition runstate_transitions_def[] = { { RUN_STATE_FINISH_MIGRATE, RUN_STATE_PAUSED }, { RUN_STATE_FINISH_MIGRATE, RUN_STATE_POSTMIGRATE }, { RUN_STATE_FINISH_MIGRATE, RUN_STATE_PRELAUNCH }, - { RUN_STATE_FINISH_MIGRATE, RUN_STATE_COLO}, + { RUN_STATE_FINISH_MIGRATE, RUN_STATE_COLO }, + { RUN_STATE_FINISH_MIGRATE, RUN_STATE_INTERNAL_ERROR }, + { RUN_STATE_FINISH_MIGRATE, RUN_STATE_IO_ERROR }, + { RUN_STATE_FINISH_MIGRATE, RUN_STATE_SHUTDOWN }, + { RUN_STATE_FINISH_MIGRATE, RUN_STATE_SUSPENDED }, + { RUN_STATE_FINISH_MIGRATE, RUN_STATE_WATCHDOG }, + { RUN_STATE_FINISH_MIGRATE, RUN_STATE_GUEST_PANICKED }, { RUN_STATE_RESTORE_VM, RUN_STATE_RUNNING }, { RUN_STATE_RESTORE_VM, RUN_STATE_PRELAUNCH }, From 3a8b81f2e6393828589699bb0b8ef557b9ae5937 Mon Sep 17 00:00:00 2001 From: Fiona Ebner Date: Fri, 26 May 2023 13:59:08 +0200 Subject: [PATCH 6/6] migration: stop tracking ram writes when cancelling background migration Currently, it is only done when the iteration finishes successfully. Not cleaning up the userfaultfd write protection can lead to symptoms/issues such as the process hanging in memmove or GDB not being able to attach. Signed-off-by: Fiona Ebner Message-Id: <20230526115908.196171-1-f.ebner@proxmox.com> Signed-off-by: Juan Quintela --- migration/migration.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/migration/migration.c b/migration/migration.c index 7c3425c6fe..dc05c6f6ea 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -2400,13 +2400,6 @@ static void bg_migration_completion(MigrationState *s) { int current_active_state = s->state; - /* - * Stop tracking RAM writes - un-protect memory, un-register UFFD - * memory ranges, flush kernel wait queues and wake up threads - * waiting for write fault to be resolved. - */ - ram_write_tracking_stop(); - if (s->state == MIGRATION_STATUS_ACTIVE) { /* * By this moment we have RAM content saved into the migration stream. @@ -2788,6 +2781,13 @@ static void migration_iteration_finish(MigrationState *s) static void bg_migration_iteration_finish(MigrationState *s) { + /* + * Stop tracking RAM writes - un-protect memory, un-register UFFD + * memory ranges, flush kernel wait queues and wake up threads + * waiting for write fault to be resolved. + */ + ram_write_tracking_stop(); + qemu_mutex_lock_iothread(); switch (s->state) { case MIGRATION_STATUS_COMPLETED: