migration: fix migrate_cancel leads live_migration thread endless loop
When we 'migrate_cancel' a multifd migration, live_migration thread may go into endless loop in multifd_send_pages functions. Reproduce steps: (qemu) migrate_set_capability multifd on (qemu) migrate -d url (qemu) [wait a while] (qemu) migrate_cancel Then may get live_migration 100% cpu usage in following stack: pthread_mutex_lock qemu_mutex_lock_impl multifd_send_pages multifd_queue_page ram_save_multifd_page ram_save_target_page ram_save_host_page ram_find_and_save_block ram_find_and_save_block ram_save_iterate qemu_savevm_state_iterate migration_iteration_run migration_thread qemu_thread_start start_thread clone Signed-off-by: Ivan Ren <ivanren@tencent.com> Message-Id: <1561468699-9819-2-git-send-email-ivanren@tencent.com> Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com> Reviewed-by: Juan Quintela <quintela@redhat.com> Signed-off-by: Juan Quintela <quintela@redhat.com>
This commit is contained in:
parent
bf8b024372
commit
713f762a31
@ -920,7 +920,7 @@ struct {
|
||||
* false.
|
||||
*/
|
||||
|
||||
static void multifd_send_pages(void)
|
||||
static int multifd_send_pages(void)
|
||||
{
|
||||
int i;
|
||||
static int next_channel;
|
||||
@ -933,6 +933,11 @@ static void multifd_send_pages(void)
|
||||
p = &multifd_send_state->params[i];
|
||||
|
||||
qemu_mutex_lock(&p->mutex);
|
||||
if (p->quit) {
|
||||
error_report("%s: channel %d has already quit!", __func__, i);
|
||||
qemu_mutex_unlock(&p->mutex);
|
||||
return -1;
|
||||
}
|
||||
if (!p->pending_job) {
|
||||
p->pending_job++;
|
||||
next_channel = (i + 1) % migrate_multifd_channels();
|
||||
@ -951,9 +956,11 @@ static void multifd_send_pages(void)
|
||||
ram_counters.transferred += transferred;;
|
||||
qemu_mutex_unlock(&p->mutex);
|
||||
qemu_sem_post(&p->sem);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void multifd_queue_page(RAMBlock *block, ram_addr_t offset)
|
||||
static int multifd_queue_page(RAMBlock *block, ram_addr_t offset)
|
||||
{
|
||||
MultiFDPages_t *pages = multifd_send_state->pages;
|
||||
|
||||
@ -968,15 +975,19 @@ static void multifd_queue_page(RAMBlock *block, ram_addr_t offset)
|
||||
pages->used++;
|
||||
|
||||
if (pages->used < pages->allocated) {
|
||||
return;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
multifd_send_pages();
|
||||
if (multifd_send_pages() < 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (pages->block != block) {
|
||||
multifd_queue_page(block, offset);
|
||||
return multifd_queue_page(block, offset);
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void multifd_send_terminate_threads(Error *err)
|
||||
@ -1049,7 +1060,10 @@ static void multifd_send_sync_main(void)
|
||||
return;
|
||||
}
|
||||
if (multifd_send_state->pages->used) {
|
||||
multifd_send_pages();
|
||||
if (multifd_send_pages() < 0) {
|
||||
error_report("%s: multifd_send_pages fail", __func__);
|
||||
return;
|
||||
}
|
||||
}
|
||||
for (i = 0; i < migrate_multifd_channels(); i++) {
|
||||
MultiFDSendParams *p = &multifd_send_state->params[i];
|
||||
@ -1058,6 +1072,12 @@ static void multifd_send_sync_main(void)
|
||||
|
||||
qemu_mutex_lock(&p->mutex);
|
||||
|
||||
if (p->quit) {
|
||||
error_report("%s: channel %d has already quit", __func__, i);
|
||||
qemu_mutex_unlock(&p->mutex);
|
||||
return;
|
||||
}
|
||||
|
||||
p->packet_num = multifd_send_state->packet_num++;
|
||||
p->flags |= MULTIFD_FLAG_SYNC;
|
||||
p->pending_job++;
|
||||
@ -2033,7 +2053,9 @@ static int ram_save_page(RAMState *rs, PageSearchStatus *pss, bool last_stage)
|
||||
static int ram_save_multifd_page(RAMState *rs, RAMBlock *block,
|
||||
ram_addr_t offset)
|
||||
{
|
||||
multifd_queue_page(block, offset);
|
||||
if (multifd_queue_page(block, offset) < 0) {
|
||||
return -1;
|
||||
}
|
||||
ram_counters.normal++;
|
||||
|
||||
return 1;
|
||||
|
Loading…
Reference in New Issue
Block a user