diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c index e61096adfe..571fc1334c 100644 --- a/tests/qtest/migration-test.c +++ b/tests/qtest/migration-test.c @@ -73,6 +73,17 @@ static QTestMigrationState dst_state; #define QEMU_ENV_SRC "QTEST_QEMU_BINARY_SRC" #define QEMU_ENV_DST "QTEST_QEMU_BINARY_DST" +typedef enum PostcopyRecoveryFailStage { + /* + * "no failure" must be 0 as it's the default. OTOH, real failure + * cases must be >0 to make sure they trigger by a "if" test. + */ + POSTCOPY_FAIL_NONE = 0, + POSTCOPY_FAIL_CHANNEL_ESTABLISH, + POSTCOPY_FAIL_RECOVERY, + POSTCOPY_FAIL_MAX +} PostcopyRecoveryFailStage; + #if defined(__linux__) #include #include @@ -693,7 +704,7 @@ typedef struct { /* Postcopy specific fields */ void *postcopy_data; bool postcopy_preempt; - bool postcopy_recovery_test_fail; + PostcopyRecoveryFailStage postcopy_recovery_fail_stage; } MigrateCommon; static int test_migrate_start(QTestState **from, QTestState **to, @@ -1371,12 +1382,16 @@ static void wait_for_postcopy_status(QTestState *one, const char *status) "completed", NULL }); } -static void postcopy_recover_fail(QTestState *from, QTestState *to) +static void postcopy_recover_fail(QTestState *from, QTestState *to, + PostcopyRecoveryFailStage stage) { #ifndef _WIN32 + bool fail_early = (stage == POSTCOPY_FAIL_CHANNEL_ESTABLISH); int ret, pair1[2], pair2[2]; char c; + g_assert(stage > POSTCOPY_FAIL_NONE && stage < POSTCOPY_FAIL_MAX); + /* Create two unrelated socketpairs */ ret = qemu_socketpair(PF_LOCAL, SOCK_STREAM, 0, pair1); g_assert_cmpint(ret, ==, 0); @@ -1410,6 +1425,14 @@ static void postcopy_recover_fail(QTestState *from, QTestState *to) ret = send(pair2[1], &c, 1, 0); g_assert_cmpint(ret, ==, 1); + if (stage == POSTCOPY_FAIL_CHANNEL_ESTABLISH) { + /* + * This will make src QEMU to fail at an early stage when trying to + * resume later, where it shouldn't reach RECOVER stage at all. + */ + close(pair1[1]); + } + migrate_recover(to, "fd:fd-mig"); migrate_qmp(from, to, "fd:fd-mig", NULL, "{'resume': true}"); @@ -1419,28 +1442,53 @@ static void postcopy_recover_fail(QTestState *from, QTestState *to) */ migration_event_wait(from, "postcopy-recover-setup"); + if (fail_early) { + /* + * When fails at reconnection, src QEMU will automatically goes + * back to PAUSED state. Making sure there is an event in this + * case: Libvirt relies on this to detect early reconnection + * errors. + */ + migration_event_wait(from, "postcopy-paused"); + } else { + /* + * We want to test "fail later" at RECOVER stage here. Make sure + * both QEMU instances will go into RECOVER stage first, then test + * kicking them out using migrate-pause. + * + * Explicitly check the RECOVER event on src, that's what Libvirt + * relies on, rather than polling. + */ + migration_event_wait(from, "postcopy-recover"); + wait_for_postcopy_status(from, "postcopy-recover"); + + /* Need an explicit kick on src QEMU in this case */ + migrate_pause(from); + } + /* - * Make sure both QEMU instances will go into RECOVER stage, then test - * kicking them out using migrate-pause. + * For all failure cases, we'll reach such states on both sides now. + * Check them. */ - wait_for_postcopy_status(from, "postcopy-recover"); + wait_for_postcopy_status(from, "postcopy-paused"); wait_for_postcopy_status(to, "postcopy-recover"); /* - * This would be issued by the admin upon noticing the hang, we should - * make sure we're able to kick this out. + * Kick dest QEMU out too. This is normally not needed in reality + * because when the channel is shutdown it should also happen on src. + * However here we used separate socket pairs so we need to do that + * explicitly. */ - migrate_pause(from); - wait_for_postcopy_status(from, "postcopy-paused"); - - /* Do the same test on dest */ migrate_pause(to); wait_for_postcopy_status(to, "postcopy-paused"); close(pair1[0]); - close(pair1[1]); close(pair2[0]); close(pair2[1]); + + if (stage != POSTCOPY_FAIL_CHANNEL_ESTABLISH) { + close(pair1[1]); + } #endif } @@ -1482,12 +1530,12 @@ static void test_postcopy_recovery_common(MigrateCommon *args) wait_for_postcopy_status(to, "postcopy-paused"); wait_for_postcopy_status(from, "postcopy-paused"); - if (args->postcopy_recovery_test_fail) { + if (args->postcopy_recovery_fail_stage) { /* * Test when a wrong socket specified for recover, and then the * ability to kick it out, and continue with a correct socket. */ - postcopy_recover_fail(from, to); + postcopy_recover_fail(from, to, args->postcopy_recovery_fail_stage); /* continue with a good recovery */ } @@ -1518,10 +1566,19 @@ static void test_postcopy_recovery(void) test_postcopy_recovery_common(&args); } -static void test_postcopy_recovery_double_fail(void) +static void test_postcopy_recovery_fail_handshake(void) { MigrateCommon args = { - .postcopy_recovery_test_fail = true, + .postcopy_recovery_fail_stage = POSTCOPY_FAIL_RECOVERY, + }; + + test_postcopy_recovery_common(&args); +} + +static void test_postcopy_recovery_fail_reconnect(void) +{ + MigrateCommon args = { + .postcopy_recovery_fail_stage = POSTCOPY_FAIL_CHANNEL_ESTABLISH, }; test_postcopy_recovery_common(&args); @@ -3791,8 +3848,10 @@ int main(int argc, char **argv) test_postcopy_preempt); migration_test_add("/migration/postcopy/preempt/recovery/plain", test_postcopy_preempt_recovery); - migration_test_add("/migration/postcopy/recovery/double-failures", - test_postcopy_recovery_double_fail); + migration_test_add("/migration/postcopy/recovery/double-failures/handshake", + test_postcopy_recovery_fail_handshake); + migration_test_add("/migration/postcopy/recovery/double-failures/reconnect", + test_postcopy_recovery_fail_reconnect); if (is_x86) { migration_test_add("/migration/postcopy/suspend", test_postcopy_suspend);