Migration pull request

This series fixes problems with migration-cancel while using multifd.
 In some cases it can hang waiting in a semaphore.
 
 Please apply.
 -----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCAAdFiEEGJn/jt6/WMzuA0uC9IfvGFhy1yMFAl05ivkACgkQ9IfvGFhy
 1yMg7RAAvcTI2BX4npX2++vBHI8UBx++5SAKpKC8cwvo40jygBbQCdOr4SLkGaR1
 wJItPPagF6dGyQ3WXmGbGzfhlVrPyJXGntXrYZ0BNJVOJbMV7M+PxsIWQvUGfYA8
 XSnK0izVPFzdCIlhkbGYrK8mJnQ1sVXlNNQ2w3IW/MzS8QLAsfb8HcWXXowyqwDH
 yU/95ZdRbPeloinuxQD5ivojzpkpyuJMDEVtqgELSPIc/vTFc2chLUNn3zq+6o7e
 RTXylkHHta8BuLhMQGzDU0dzs5oaOVSsuApy5ELnbarpNxKNGAWLd6Uf87No4DoM
 BzKBEvdSQx5VxMkm0dYU02ybXkqVtLRPh/QZUEmUj3GezMKU45Ub38hLoyMzeXEK
 +m6BRdHQ8M5XGlRDCgMWuUjwmqqpyEk9HuW5Xw/WkSfBA3D7glHp72PRMGER0QeH
 JDUy9Ld9KXL+0kNiaT9uq7Ci+wXNIm4H483Xl6+HxTTKI6J6E8OlrO37o8w1lI5E
 JOlF8QsTVn2F+ItHFy/W2OLCTwz4VtiS/+NnISebRO/1AmX3oX+CkNNxXfmuzQHy
 FQX2eFIGIa/BIzVqjjeqr7NPusQSZ/TMnP7cdVTJU1G0ZBf3TGGeEcAGuVydddql
 Rwvh9fmkJZniaqZ0Z+ELtJ7A3vMyEyb0QFvupO7n8SWbAYDV5BE=
 =O166
 -----END PGP SIGNATURE-----

Merge remote-tracking branch 'remotes/juanquintela/tags/migration-pull-request' into staging

Migration pull request

This series fixes problems with migration-cancel while using multifd.
In some cases it can hang waiting in a semaphore.

Please apply.

# gpg: Signature made Thu 25 Jul 2019 11:56:57 BST
# gpg:                using RSA key 1899FF8EDEBF58CCEE034B82F487EF185872D723
# gpg: Good signature from "Juan Quintela <quintela@redhat.com>" [full]
# gpg:                 aka "Juan Quintela <quintela@trasno.org>" [full]
# Primary key fingerprint: 1899 FF8E DEBF 58CC EE03  4B82 F487 EF18 5872 D723

* remotes/juanquintela/tags/migration-pull-request:
  migration: fix migrate_cancel multifd migration leads destination hung forever
  migration: Make explicit that we are quitting multifd
  migration: fix migrate_cancel leads live_migration thread hung forever
  migration: fix migrate_cancel leads live_migration thread endless loop

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
Peter Maydell 2019-07-25 12:09:12 +01:00
commit b43bea01b8

View File

@ -677,6 +677,8 @@ typedef struct {
QemuMutex mutex; QemuMutex mutex;
/* is this channel thread running */ /* is this channel thread running */
bool running; bool running;
/* should this thread finish */
bool quit;
/* array of pages to receive */ /* array of pages to receive */
MultiFDPages_t *pages; MultiFDPages_t *pages;
/* packet allocated len */ /* packet allocated len */
@ -920,7 +922,7 @@ struct {
* false. * false.
*/ */
static void multifd_send_pages(void) static int multifd_send_pages(void)
{ {
int i; int i;
static int next_channel; static int next_channel;
@ -933,6 +935,11 @@ static void multifd_send_pages(void)
p = &multifd_send_state->params[i]; p = &multifd_send_state->params[i];
qemu_mutex_lock(&p->mutex); qemu_mutex_lock(&p->mutex);
if (p->quit) {
error_report("%s: channel %d has already quit!", __func__, i);
qemu_mutex_unlock(&p->mutex);
return -1;
}
if (!p->pending_job) { if (!p->pending_job) {
p->pending_job++; p->pending_job++;
next_channel = (i + 1) % migrate_multifd_channels(); next_channel = (i + 1) % migrate_multifd_channels();
@ -951,9 +958,11 @@ static void multifd_send_pages(void)
ram_counters.transferred += transferred;; ram_counters.transferred += transferred;;
qemu_mutex_unlock(&p->mutex); qemu_mutex_unlock(&p->mutex);
qemu_sem_post(&p->sem); qemu_sem_post(&p->sem);
return 1;
} }
static void multifd_queue_page(RAMBlock *block, ram_addr_t offset) static int multifd_queue_page(RAMBlock *block, ram_addr_t offset)
{ {
MultiFDPages_t *pages = multifd_send_state->pages; MultiFDPages_t *pages = multifd_send_state->pages;
@ -968,15 +977,19 @@ static void multifd_queue_page(RAMBlock *block, ram_addr_t offset)
pages->used++; pages->used++;
if (pages->used < pages->allocated) { if (pages->used < pages->allocated) {
return; return 1;
} }
} }
multifd_send_pages(); if (multifd_send_pages() < 0) {
return -1;
}
if (pages->block != block) { if (pages->block != block) {
multifd_queue_page(block, offset); return multifd_queue_page(block, offset);
} }
return 1;
} }
static void multifd_send_terminate_threads(Error *err) static void multifd_send_terminate_threads(Error *err)
@ -1049,7 +1062,10 @@ static void multifd_send_sync_main(void)
return; return;
} }
if (multifd_send_state->pages->used) { if (multifd_send_state->pages->used) {
multifd_send_pages(); if (multifd_send_pages() < 0) {
error_report("%s: multifd_send_pages fail", __func__);
return;
}
} }
for (i = 0; i < migrate_multifd_channels(); i++) { for (i = 0; i < migrate_multifd_channels(); i++) {
MultiFDSendParams *p = &multifd_send_state->params[i]; MultiFDSendParams *p = &multifd_send_state->params[i];
@ -1058,6 +1074,12 @@ static void multifd_send_sync_main(void)
qemu_mutex_lock(&p->mutex); qemu_mutex_lock(&p->mutex);
if (p->quit) {
error_report("%s: channel %d has already quit", __func__, i);
qemu_mutex_unlock(&p->mutex);
return;
}
p->packet_num = multifd_send_state->packet_num++; p->packet_num = multifd_send_state->packet_num++;
p->flags |= MULTIFD_FLAG_SYNC; p->flags |= MULTIFD_FLAG_SYNC;
p->pending_job++; p->pending_job++;
@ -1077,7 +1099,8 @@ static void *multifd_send_thread(void *opaque)
{ {
MultiFDSendParams *p = opaque; MultiFDSendParams *p = opaque;
Error *local_err = NULL; Error *local_err = NULL;
int ret; int ret = 0;
uint32_t flags = 0;
trace_multifd_send_thread_start(p->id); trace_multifd_send_thread_start(p->id);
rcu_register_thread(); rcu_register_thread();
@ -1095,7 +1118,7 @@ static void *multifd_send_thread(void *opaque)
if (p->pending_job) { if (p->pending_job) {
uint32_t used = p->pages->used; uint32_t used = p->pages->used;
uint64_t packet_num = p->packet_num; uint64_t packet_num = p->packet_num;
uint32_t flags = p->flags; flags = p->flags;
p->next_packet_size = used * qemu_target_page_size(); p->next_packet_size = used * qemu_target_page_size();
multifd_send_fill_packet(p); multifd_send_fill_packet(p);
@ -1144,6 +1167,17 @@ out:
multifd_send_terminate_threads(local_err); multifd_send_terminate_threads(local_err);
} }
/*
* Error happen, I will exit, but I can't just leave, tell
* who pay attention to me.
*/
if (ret != 0) {
if (flags & MULTIFD_FLAG_SYNC) {
qemu_sem_post(&multifd_send_state->sem_sync);
}
qemu_sem_post(&multifd_send_state->channels_ready);
}
qemu_mutex_lock(&p->mutex); qemu_mutex_lock(&p->mutex);
p->running = false; p->running = false;
qemu_mutex_unlock(&p->mutex); qemu_mutex_unlock(&p->mutex);
@ -1234,6 +1268,7 @@ static void multifd_recv_terminate_threads(Error *err)
MultiFDRecvParams *p = &multifd_recv_state->params[i]; MultiFDRecvParams *p = &multifd_recv_state->params[i];
qemu_mutex_lock(&p->mutex); qemu_mutex_lock(&p->mutex);
p->quit = true;
/* We could arrive here for two reasons: /* We could arrive here for two reasons:
- normal quit, i.e. everything went fine, just finished - normal quit, i.e. everything went fine, just finished
- error quit: We close the channels so the channel threads - error quit: We close the channels so the channel threads
@ -1256,6 +1291,12 @@ int multifd_load_cleanup(Error **errp)
MultiFDRecvParams *p = &multifd_recv_state->params[i]; MultiFDRecvParams *p = &multifd_recv_state->params[i];
if (p->running) { if (p->running) {
p->quit = true;
/*
* multifd_recv_thread may hung at MULTIFD_FLAG_SYNC handle code,
* however try to wakeup it without harm in cleanup phase.
*/
qemu_sem_post(&p->sem_sync);
qemu_thread_join(&p->thread); qemu_thread_join(&p->thread);
} }
object_unref(OBJECT(p->c)); object_unref(OBJECT(p->c));
@ -1319,6 +1360,10 @@ static void *multifd_recv_thread(void *opaque)
uint32_t used; uint32_t used;
uint32_t flags; uint32_t flags;
if (p->quit) {
break;
}
ret = qio_channel_read_all_eof(p->c, (void *)p->packet, ret = qio_channel_read_all_eof(p->c, (void *)p->packet,
p->packet_len, &local_err); p->packet_len, &local_err);
if (ret == 0) { /* EOF */ if (ret == 0) { /* EOF */
@ -1390,6 +1435,7 @@ int multifd_load_setup(void)
qemu_mutex_init(&p->mutex); qemu_mutex_init(&p->mutex);
qemu_sem_init(&p->sem_sync, 0); qemu_sem_init(&p->sem_sync, 0);
p->quit = false;
p->id = i; p->id = i;
p->pages = multifd_pages_init(page_count); p->pages = multifd_pages_init(page_count);
p->packet_len = sizeof(MultiFDPacket_t) p->packet_len = sizeof(MultiFDPacket_t)
@ -2033,7 +2079,9 @@ static int ram_save_page(RAMState *rs, PageSearchStatus *pss, bool last_stage)
static int ram_save_multifd_page(RAMState *rs, RAMBlock *block, static int ram_save_multifd_page(RAMState *rs, RAMBlock *block,
ram_addr_t offset) ram_addr_t offset)
{ {
multifd_queue_page(block, offset); if (multifd_queue_page(block, offset) < 0) {
return -1;
}
ram_counters.normal++; ram_counters.normal++;
return 1; return 1;