Migration pull request
This series fixes problems with migration-cancel while using multifd. In some cases it can hang waiting in a semaphore. Please apply. -----BEGIN PGP SIGNATURE----- iQIzBAABCAAdFiEEGJn/jt6/WMzuA0uC9IfvGFhy1yMFAl05ivkACgkQ9IfvGFhy 1yMg7RAAvcTI2BX4npX2++vBHI8UBx++5SAKpKC8cwvo40jygBbQCdOr4SLkGaR1 wJItPPagF6dGyQ3WXmGbGzfhlVrPyJXGntXrYZ0BNJVOJbMV7M+PxsIWQvUGfYA8 XSnK0izVPFzdCIlhkbGYrK8mJnQ1sVXlNNQ2w3IW/MzS8QLAsfb8HcWXXowyqwDH yU/95ZdRbPeloinuxQD5ivojzpkpyuJMDEVtqgELSPIc/vTFc2chLUNn3zq+6o7e RTXylkHHta8BuLhMQGzDU0dzs5oaOVSsuApy5ELnbarpNxKNGAWLd6Uf87No4DoM BzKBEvdSQx5VxMkm0dYU02ybXkqVtLRPh/QZUEmUj3GezMKU45Ub38hLoyMzeXEK +m6BRdHQ8M5XGlRDCgMWuUjwmqqpyEk9HuW5Xw/WkSfBA3D7glHp72PRMGER0QeH JDUy9Ld9KXL+0kNiaT9uq7Ci+wXNIm4H483Xl6+HxTTKI6J6E8OlrO37o8w1lI5E JOlF8QsTVn2F+ItHFy/W2OLCTwz4VtiS/+NnISebRO/1AmX3oX+CkNNxXfmuzQHy FQX2eFIGIa/BIzVqjjeqr7NPusQSZ/TMnP7cdVTJU1G0ZBf3TGGeEcAGuVydddql Rwvh9fmkJZniaqZ0Z+ELtJ7A3vMyEyb0QFvupO7n8SWbAYDV5BE= =O166 -----END PGP SIGNATURE----- Merge remote-tracking branch 'remotes/juanquintela/tags/migration-pull-request' into staging Migration pull request This series fixes problems with migration-cancel while using multifd. In some cases it can hang waiting in a semaphore. Please apply. # gpg: Signature made Thu 25 Jul 2019 11:56:57 BST # gpg: using RSA key 1899FF8EDEBF58CCEE034B82F487EF185872D723 # gpg: Good signature from "Juan Quintela <quintela@redhat.com>" [full] # gpg: aka "Juan Quintela <quintela@trasno.org>" [full] # Primary key fingerprint: 1899 FF8E DEBF 58CC EE03 4B82 F487 EF18 5872 D723 * remotes/juanquintela/tags/migration-pull-request: migration: fix migrate_cancel multifd migration leads destination hung forever migration: Make explicit that we are quitting multifd migration: fix migrate_cancel leads live_migration thread hung forever migration: fix migrate_cancel leads live_migration thread endless loop Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
commit
b43bea01b8
@ -677,6 +677,8 @@ typedef struct {
|
||||
QemuMutex mutex;
|
||||
/* is this channel thread running */
|
||||
bool running;
|
||||
/* should this thread finish */
|
||||
bool quit;
|
||||
/* array of pages to receive */
|
||||
MultiFDPages_t *pages;
|
||||
/* packet allocated len */
|
||||
@ -920,7 +922,7 @@ struct {
|
||||
* false.
|
||||
*/
|
||||
|
||||
static void multifd_send_pages(void)
|
||||
static int multifd_send_pages(void)
|
||||
{
|
||||
int i;
|
||||
static int next_channel;
|
||||
@ -933,6 +935,11 @@ static void multifd_send_pages(void)
|
||||
p = &multifd_send_state->params[i];
|
||||
|
||||
qemu_mutex_lock(&p->mutex);
|
||||
if (p->quit) {
|
||||
error_report("%s: channel %d has already quit!", __func__, i);
|
||||
qemu_mutex_unlock(&p->mutex);
|
||||
return -1;
|
||||
}
|
||||
if (!p->pending_job) {
|
||||
p->pending_job++;
|
||||
next_channel = (i + 1) % migrate_multifd_channels();
|
||||
@ -951,9 +958,11 @@ static void multifd_send_pages(void)
|
||||
ram_counters.transferred += transferred;;
|
||||
qemu_mutex_unlock(&p->mutex);
|
||||
qemu_sem_post(&p->sem);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void multifd_queue_page(RAMBlock *block, ram_addr_t offset)
|
||||
static int multifd_queue_page(RAMBlock *block, ram_addr_t offset)
|
||||
{
|
||||
MultiFDPages_t *pages = multifd_send_state->pages;
|
||||
|
||||
@ -968,15 +977,19 @@ static void multifd_queue_page(RAMBlock *block, ram_addr_t offset)
|
||||
pages->used++;
|
||||
|
||||
if (pages->used < pages->allocated) {
|
||||
return;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
multifd_send_pages();
|
||||
if (multifd_send_pages() < 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (pages->block != block) {
|
||||
multifd_queue_page(block, offset);
|
||||
return multifd_queue_page(block, offset);
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void multifd_send_terminate_threads(Error *err)
|
||||
@ -1049,7 +1062,10 @@ static void multifd_send_sync_main(void)
|
||||
return;
|
||||
}
|
||||
if (multifd_send_state->pages->used) {
|
||||
multifd_send_pages();
|
||||
if (multifd_send_pages() < 0) {
|
||||
error_report("%s: multifd_send_pages fail", __func__);
|
||||
return;
|
||||
}
|
||||
}
|
||||
for (i = 0; i < migrate_multifd_channels(); i++) {
|
||||
MultiFDSendParams *p = &multifd_send_state->params[i];
|
||||
@ -1058,6 +1074,12 @@ static void multifd_send_sync_main(void)
|
||||
|
||||
qemu_mutex_lock(&p->mutex);
|
||||
|
||||
if (p->quit) {
|
||||
error_report("%s: channel %d has already quit", __func__, i);
|
||||
qemu_mutex_unlock(&p->mutex);
|
||||
return;
|
||||
}
|
||||
|
||||
p->packet_num = multifd_send_state->packet_num++;
|
||||
p->flags |= MULTIFD_FLAG_SYNC;
|
||||
p->pending_job++;
|
||||
@ -1077,7 +1099,8 @@ static void *multifd_send_thread(void *opaque)
|
||||
{
|
||||
MultiFDSendParams *p = opaque;
|
||||
Error *local_err = NULL;
|
||||
int ret;
|
||||
int ret = 0;
|
||||
uint32_t flags = 0;
|
||||
|
||||
trace_multifd_send_thread_start(p->id);
|
||||
rcu_register_thread();
|
||||
@ -1095,7 +1118,7 @@ static void *multifd_send_thread(void *opaque)
|
||||
if (p->pending_job) {
|
||||
uint32_t used = p->pages->used;
|
||||
uint64_t packet_num = p->packet_num;
|
||||
uint32_t flags = p->flags;
|
||||
flags = p->flags;
|
||||
|
||||
p->next_packet_size = used * qemu_target_page_size();
|
||||
multifd_send_fill_packet(p);
|
||||
@ -1144,6 +1167,17 @@ out:
|
||||
multifd_send_terminate_threads(local_err);
|
||||
}
|
||||
|
||||
/*
|
||||
* Error happen, I will exit, but I can't just leave, tell
|
||||
* who pay attention to me.
|
||||
*/
|
||||
if (ret != 0) {
|
||||
if (flags & MULTIFD_FLAG_SYNC) {
|
||||
qemu_sem_post(&multifd_send_state->sem_sync);
|
||||
}
|
||||
qemu_sem_post(&multifd_send_state->channels_ready);
|
||||
}
|
||||
|
||||
qemu_mutex_lock(&p->mutex);
|
||||
p->running = false;
|
||||
qemu_mutex_unlock(&p->mutex);
|
||||
@ -1234,6 +1268,7 @@ static void multifd_recv_terminate_threads(Error *err)
|
||||
MultiFDRecvParams *p = &multifd_recv_state->params[i];
|
||||
|
||||
qemu_mutex_lock(&p->mutex);
|
||||
p->quit = true;
|
||||
/* We could arrive here for two reasons:
|
||||
- normal quit, i.e. everything went fine, just finished
|
||||
- error quit: We close the channels so the channel threads
|
||||
@ -1256,6 +1291,12 @@ int multifd_load_cleanup(Error **errp)
|
||||
MultiFDRecvParams *p = &multifd_recv_state->params[i];
|
||||
|
||||
if (p->running) {
|
||||
p->quit = true;
|
||||
/*
|
||||
* multifd_recv_thread may hung at MULTIFD_FLAG_SYNC handle code,
|
||||
* however try to wakeup it without harm in cleanup phase.
|
||||
*/
|
||||
qemu_sem_post(&p->sem_sync);
|
||||
qemu_thread_join(&p->thread);
|
||||
}
|
||||
object_unref(OBJECT(p->c));
|
||||
@ -1319,6 +1360,10 @@ static void *multifd_recv_thread(void *opaque)
|
||||
uint32_t used;
|
||||
uint32_t flags;
|
||||
|
||||
if (p->quit) {
|
||||
break;
|
||||
}
|
||||
|
||||
ret = qio_channel_read_all_eof(p->c, (void *)p->packet,
|
||||
p->packet_len, &local_err);
|
||||
if (ret == 0) { /* EOF */
|
||||
@ -1390,6 +1435,7 @@ int multifd_load_setup(void)
|
||||
|
||||
qemu_mutex_init(&p->mutex);
|
||||
qemu_sem_init(&p->sem_sync, 0);
|
||||
p->quit = false;
|
||||
p->id = i;
|
||||
p->pages = multifd_pages_init(page_count);
|
||||
p->packet_len = sizeof(MultiFDPacket_t)
|
||||
@ -2033,7 +2079,9 @@ static int ram_save_page(RAMState *rs, PageSearchStatus *pss, bool last_stage)
|
||||
static int ram_save_multifd_page(RAMState *rs, RAMBlock *block,
|
||||
ram_addr_t offset)
|
||||
{
|
||||
multifd_queue_page(block, offset);
|
||||
if (multifd_queue_page(block, offset) < 0) {
|
||||
return -1;
|
||||
}
|
||||
ram_counters.normal++;
|
||||
|
||||
return 1;
|
||||
|
Loading…
Reference in New Issue
Block a user