migration/savevm: don't worry if bitmap migration postcopy failed
First, if only bitmaps postcopy is enabled (and not ram postcopy) postcopy_pause_incoming crashes on an assertion assert(mis->to_src_file). And anyway, bitmaps postcopy is not prepared to be somehow recovered. The original idea instead is that if bitmaps postcopy failed, we just lose some bitmaps, which is not critical. So, on failure we just need to remove unfinished bitmaps and guest should continue execution on destination. Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com> Reviewed-by: Andrey Shinkevich <andrey.shinkevich@virtuozzo.com> Reviewed-by: Eric Blake <eblake@redhat.com> Message-Id: <20200727194236.19551-18-vsementsov@virtuozzo.com> Signed-off-by: Eric Blake <eblake@redhat.com>
This commit is contained in:
parent
1499ab0969
commit
ee64722514
@ -1813,6 +1813,9 @@ static void *postcopy_ram_listen_thread(void *opaque)
|
|||||||
MigrationIncomingState *mis = migration_incoming_get_current();
|
MigrationIncomingState *mis = migration_incoming_get_current();
|
||||||
QEMUFile *f = mis->from_src_file;
|
QEMUFile *f = mis->from_src_file;
|
||||||
int load_res;
|
int load_res;
|
||||||
|
MigrationState *migr = migrate_get_current();
|
||||||
|
|
||||||
|
object_ref(OBJECT(migr));
|
||||||
|
|
||||||
migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
|
migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
|
||||||
MIGRATION_STATUS_POSTCOPY_ACTIVE);
|
MIGRATION_STATUS_POSTCOPY_ACTIVE);
|
||||||
@ -1839,11 +1842,24 @@ static void *postcopy_ram_listen_thread(void *opaque)
|
|||||||
|
|
||||||
trace_postcopy_ram_listen_thread_exit();
|
trace_postcopy_ram_listen_thread_exit();
|
||||||
if (load_res < 0) {
|
if (load_res < 0) {
|
||||||
error_report("%s: loadvm failed: %d", __func__, load_res);
|
|
||||||
qemu_file_set_error(f, load_res);
|
qemu_file_set_error(f, load_res);
|
||||||
|
dirty_bitmap_mig_cancel_incoming();
|
||||||
|
if (postcopy_state_get() == POSTCOPY_INCOMING_RUNNING &&
|
||||||
|
!migrate_postcopy_ram() && migrate_dirty_bitmaps())
|
||||||
|
{
|
||||||
|
error_report("%s: loadvm failed during postcopy: %d. All states "
|
||||||
|
"are migrated except dirty bitmaps. Some dirty "
|
||||||
|
"bitmaps may be lost, and present migrated dirty "
|
||||||
|
"bitmaps are correctly migrated and valid.",
|
||||||
|
__func__, load_res);
|
||||||
|
load_res = 0; /* prevent further exit() */
|
||||||
|
} else {
|
||||||
|
error_report("%s: loadvm failed: %d", __func__, load_res);
|
||||||
migrate_set_state(&mis->state, MIGRATION_STATUS_POSTCOPY_ACTIVE,
|
migrate_set_state(&mis->state, MIGRATION_STATUS_POSTCOPY_ACTIVE,
|
||||||
MIGRATION_STATUS_FAILED);
|
MIGRATION_STATUS_FAILED);
|
||||||
} else {
|
}
|
||||||
|
}
|
||||||
|
if (load_res >= 0) {
|
||||||
/*
|
/*
|
||||||
* This looks good, but it's possible that the device loading in the
|
* This looks good, but it's possible that the device loading in the
|
||||||
* main thread hasn't finished yet, and so we might not be in 'RUN'
|
* main thread hasn't finished yet, and so we might not be in 'RUN'
|
||||||
@ -1879,6 +1895,8 @@ static void *postcopy_ram_listen_thread(void *opaque)
|
|||||||
mis->have_listen_thread = false;
|
mis->have_listen_thread = false;
|
||||||
postcopy_state_set(POSTCOPY_INCOMING_END);
|
postcopy_state_set(POSTCOPY_INCOMING_END);
|
||||||
|
|
||||||
|
object_unref(OBJECT(migr));
|
||||||
|
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2437,6 +2455,8 @@ static bool postcopy_pause_incoming(MigrationIncomingState *mis)
|
|||||||
{
|
{
|
||||||
trace_postcopy_pause_incoming();
|
trace_postcopy_pause_incoming();
|
||||||
|
|
||||||
|
assert(migrate_postcopy_ram());
|
||||||
|
|
||||||
/* Clear the triggered bit to allow one recovery */
|
/* Clear the triggered bit to allow one recovery */
|
||||||
mis->postcopy_recover_triggered = false;
|
mis->postcopy_recover_triggered = false;
|
||||||
|
|
||||||
@ -2521,15 +2541,22 @@ out:
|
|||||||
if (ret < 0) {
|
if (ret < 0) {
|
||||||
qemu_file_set_error(f, ret);
|
qemu_file_set_error(f, ret);
|
||||||
|
|
||||||
|
/* Cancel bitmaps incoming regardless of recovery */
|
||||||
|
dirty_bitmap_mig_cancel_incoming();
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If we are during an active postcopy, then we pause instead
|
* If we are during an active postcopy, then we pause instead
|
||||||
* of bail out to at least keep the VM's dirty data. Note
|
* of bail out to at least keep the VM's dirty data. Note
|
||||||
* that POSTCOPY_INCOMING_LISTENING stage is still not enough,
|
* that POSTCOPY_INCOMING_LISTENING stage is still not enough,
|
||||||
* during which we're still receiving device states and we
|
* during which we're still receiving device states and we
|
||||||
* still haven't yet started the VM on destination.
|
* still haven't yet started the VM on destination.
|
||||||
|
*
|
||||||
|
* Only RAM postcopy supports recovery. Still, if RAM postcopy is
|
||||||
|
* enabled, canceled bitmaps postcopy will not affect RAM postcopy
|
||||||
|
* recovering.
|
||||||
*/
|
*/
|
||||||
if (postcopy_state_get() == POSTCOPY_INCOMING_RUNNING &&
|
if (postcopy_state_get() == POSTCOPY_INCOMING_RUNNING &&
|
||||||
postcopy_pause_incoming(mis)) {
|
migrate_postcopy_ram() && postcopy_pause_incoming(mis)) {
|
||||||
/* Reset f to point to the newly created channel */
|
/* Reset f to point to the newly created channel */
|
||||||
f = mis->from_src_file;
|
f = mis->from_src_file;
|
||||||
goto retry;
|
goto retry;
|
||||||
|
Loading…
Reference in New Issue
Block a user