Migration pull 2019-09-25
me: test fixes from (should stop hangs in postcopy tests). me: An RDMA cleanup hang fix Wei: Tidy ups around postcopy Marc-Andre: mem leak fix -----BEGIN PGP SIGNATURE----- iQIzBAABCAAdFiEERfXHG0oMt/uXep+pBRYzHrxb/ecFAl2LgN0ACgkQBRYzHrxb /efO3hAAjwECXkswTQKSy/pWsTpxIwJaKh0RxYCUMOGW9SPaAZ6eFYYVlgyi6N3Z S0ibn3uhTKDm0Zu5yfNrF3BfqCT/H60qyepVX1CN+DBeFtwgpXEnivwnMZV36qnh /6PS4afsa/mHUC1TeDls7WZK0LPwSSpZGPdAbedaqzeN9D5ZXXqZ+S8cIyRFMwv/ 36uDXq9GmzwwEeKOTIl45Mn4g1jilcDmCvMuC8M9Ao2b2hoT/Lxb735CGe4P8MYl 0oP2COrDI1JKWwY+W0iA+m2pkomK+b7f0zGr/3/I/Y+jrfoab+bgcBpvA6PIidmX 25LHL9D6HfhA0v7Y5K+BOV4TK6hoXFC+RXCYozCJdxKGpa6JkTHZkLgq7wTyn2Zs 9Gyb6M01Y9XgsG5fS339KFNzzulkWOTNj0EH2Rdg/TYb5SJzxQhM3fEkIf9+/SFb WICXnuB8DTWwL20HdMJiNPxgHim6VG9HQFsY5KSXZiVDcga58d+Ij8S8NzeSuHL+ AIWwwsd6YhmzzVB4QLXct1uwSsepILM+67gxKeuCKZrQfsu4PuZBFVc1vH1OjjjV Vfz5pdHiYqP8EGIHDnW/W02EBmfXrXhklB6mUc+Fm9zZA7+gPZl56Qv51KwyUqN3 qwOuPZftFbzrgZpzD9CrMnrl05m0EE+nY41AkITkFaKafQLzZm8= =61eA -----END PGP SIGNATURE----- Merge remote-tracking branch 'remotes/dgilbert/tags/pull-migration-20190925a' into staging Migration pull 2019-09-25 me: test fixes from (should stop hangs in postcopy tests). me: An RDMA cleanup hang fix Wei: Tidy ups around postcopy Marc-Andre: mem leak fix # gpg: Signature made Wed 25 Sep 2019 15:59:41 BST # gpg: using RSA key 45F5C71B4A0CB7FB977A9FA90516331EBC5BFDE7 # gpg: Good signature from "Dr. David Alan Gilbert (RH2) <dgilbert@redhat.com>" [full] # Primary key fingerprint: 45F5 C71B 4A0C B7FB 977A 9FA9 0516 331E BC5B FDE7 * remotes/dgilbert/tags/pull-migration-20190925a: migration/postcopy: Recognise the recovery states as 'in_postcopy' tests/migration/postcopy: trim migration bandwidth tests/migration: Fail on unexpected migration states migration/rdma.c: Swap synchronize_rcu for call_rcu migration/rdma: Don't moan about disconnects at the end migration: remove sent parameter in get_queued_page_not_dirty migration/postcopy: unsentmap is not necessary for postcopy migration/postcopy: not necessary to do discard when canonicalizing bitmap migration: fix vmdesc leak on vmstate_save() error Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
commit
eb13d1cf4a
@ -44,12 +44,6 @@ struct RAMBlock {
|
||||
size_t page_size;
|
||||
/* dirty bitmap used during migration */
|
||||
unsigned long *bmap;
|
||||
/* bitmap of pages that haven't been sent even once
|
||||
* only maintained and used in postcopy at the moment
|
||||
* where it's used to send the dirtymap at the start
|
||||
* of the postcopy phase
|
||||
*/
|
||||
unsigned long *unsentmap;
|
||||
/* bitmap of already received pages in postcopy */
|
||||
unsigned long *receivedmap;
|
||||
|
||||
|
@ -1659,7 +1659,14 @@ bool migration_in_postcopy(void)
|
||||
{
|
||||
MigrationState *s = migrate_get_current();
|
||||
|
||||
return (s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE);
|
||||
switch (s->state) {
|
||||
case MIGRATION_STATUS_POSTCOPY_ACTIVE:
|
||||
case MIGRATION_STATUS_POSTCOPY_PAUSED:
|
||||
case MIGRATION_STATUS_POSTCOPY_RECOVER:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool migration_in_postcopy_after_devices(MigrationState *s)
|
||||
|
@ -24,4 +24,6 @@ void json_start_object(QJSON *json, const char *name);
|
||||
const char *qjson_get_str(QJSON *json);
|
||||
void qjson_finish(QJSON *json);
|
||||
|
||||
G_DEFINE_AUTOPTR_CLEANUP_FUNC(QJSON, qjson_destroy)
|
||||
|
||||
#endif /* QEMU_QJSON_H */
|
||||
|
@ -2348,7 +2348,7 @@ static bool get_queued_page(RAMState *rs, PageSearchStatus *pss)
|
||||
dirty = test_bit(page, block->bmap);
|
||||
if (!dirty) {
|
||||
trace_get_queued_page_not_dirty(block->idstr, (uint64_t)offset,
|
||||
page, test_bit(page, block->unsentmap));
|
||||
page);
|
||||
} else {
|
||||
trace_get_queued_page(block->idstr, (uint64_t)offset, page);
|
||||
}
|
||||
@ -2619,10 +2619,6 @@ static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss,
|
||||
}
|
||||
|
||||
pages += tmppages;
|
||||
if (pss->block->unsentmap) {
|
||||
clear_bit(pss->page, pss->block->unsentmap);
|
||||
}
|
||||
|
||||
pss->page++;
|
||||
} while ((pss->page & (pagesize_bits - 1)) &&
|
||||
offset_in_ramblock(pss->block, pss->page << TARGET_PAGE_BITS));
|
||||
@ -2776,8 +2772,6 @@ static void ram_save_cleanup(void *opaque)
|
||||
block->clear_bmap = NULL;
|
||||
g_free(block->bmap);
|
||||
block->bmap = NULL;
|
||||
g_free(block->unsentmap);
|
||||
block->unsentmap = NULL;
|
||||
}
|
||||
|
||||
xbzrle_cleanup();
|
||||
@ -2857,8 +2851,6 @@ void ram_postcopy_migrated_memory_release(MigrationState *ms)
|
||||
* Returns zero on success
|
||||
*
|
||||
* Callback from postcopy_each_ram_send_discard for each RAMBlock
|
||||
* Note: At this point the 'unsentmap' is the processed bitmap combined
|
||||
* with the dirtymap; so a '1' means it's either dirty or unsent.
|
||||
*
|
||||
* @ms: current migration state
|
||||
* @block: RAMBlock to discard
|
||||
@ -2867,17 +2859,17 @@ static int postcopy_send_discard_bm_ram(MigrationState *ms, RAMBlock *block)
|
||||
{
|
||||
unsigned long end = block->used_length >> TARGET_PAGE_BITS;
|
||||
unsigned long current;
|
||||
unsigned long *unsentmap = block->unsentmap;
|
||||
unsigned long *bitmap = block->bmap;
|
||||
|
||||
for (current = 0; current < end; ) {
|
||||
unsigned long one = find_next_bit(unsentmap, end, current);
|
||||
unsigned long one = find_next_bit(bitmap, end, current);
|
||||
unsigned long zero, discard_length;
|
||||
|
||||
if (one >= end) {
|
||||
break;
|
||||
}
|
||||
|
||||
zero = find_next_zero_bit(unsentmap, end, one + 1);
|
||||
zero = find_next_zero_bit(bitmap, end, one + 1);
|
||||
|
||||
if (zero >= end) {
|
||||
discard_length = end - one;
|
||||
@ -2928,7 +2920,7 @@ static int postcopy_each_ram_send_discard(MigrationState *ms)
|
||||
}
|
||||
|
||||
/**
|
||||
* postcopy_chunk_hostpages_pass: canocalize bitmap in hostpages
|
||||
* postcopy_chunk_hostpages_pass: canonicalize bitmap in hostpages
|
||||
*
|
||||
* Helper for postcopy_chunk_hostpages; it's called twice to
|
||||
* canonicalize the two bitmaps, that are similar, but one is
|
||||
@ -2938,16 +2930,12 @@ static int postcopy_each_ram_send_discard(MigrationState *ms)
|
||||
* clean, not a mix. This function canonicalizes the bitmaps.
|
||||
*
|
||||
* @ms: current migration state
|
||||
* @unsent_pass: if true we need to canonicalize partially unsent host pages
|
||||
* otherwise we need to canonicalize partially dirty host pages
|
||||
* @block: block that contains the page we want to canonicalize
|
||||
*/
|
||||
static void postcopy_chunk_hostpages_pass(MigrationState *ms, bool unsent_pass,
|
||||
RAMBlock *block)
|
||||
static void postcopy_chunk_hostpages_pass(MigrationState *ms, RAMBlock *block)
|
||||
{
|
||||
RAMState *rs = ram_state;
|
||||
unsigned long *bitmap = block->bmap;
|
||||
unsigned long *unsentmap = block->unsentmap;
|
||||
unsigned int host_ratio = block->page_size / TARGET_PAGE_SIZE;
|
||||
unsigned long pages = block->used_length >> TARGET_PAGE_BITS;
|
||||
unsigned long run_start;
|
||||
@ -2957,13 +2945,8 @@ static void postcopy_chunk_hostpages_pass(MigrationState *ms, bool unsent_pass,
|
||||
return;
|
||||
}
|
||||
|
||||
if (unsent_pass) {
|
||||
/* Find a sent page */
|
||||
run_start = find_next_zero_bit(unsentmap, pages, 0);
|
||||
} else {
|
||||
/* Find a dirty page */
|
||||
run_start = find_next_bit(bitmap, pages, 0);
|
||||
}
|
||||
/* Find a dirty page */
|
||||
run_start = find_next_bit(bitmap, pages, 0);
|
||||
|
||||
while (run_start < pages) {
|
||||
|
||||
@ -2973,11 +2956,7 @@ static void postcopy_chunk_hostpages_pass(MigrationState *ms, bool unsent_pass,
|
||||
*/
|
||||
if (QEMU_IS_ALIGNED(run_start, host_ratio)) {
|
||||
/* Find the end of this run */
|
||||
if (unsent_pass) {
|
||||
run_start = find_next_bit(unsentmap, pages, run_start + 1);
|
||||
} else {
|
||||
run_start = find_next_zero_bit(bitmap, pages, run_start + 1);
|
||||
}
|
||||
run_start = find_next_zero_bit(bitmap, pages, run_start + 1);
|
||||
/*
|
||||
* If the end isn't at the start of a host page, then the
|
||||
* run doesn't finish at the end of a host page
|
||||
@ -2991,24 +2970,9 @@ static void postcopy_chunk_hostpages_pass(MigrationState *ms, bool unsent_pass,
|
||||
host_ratio);
|
||||
run_start = QEMU_ALIGN_UP(run_start, host_ratio);
|
||||
|
||||
/* Tell the destination to discard this page */
|
||||
if (unsent_pass || !test_bit(fixup_start_addr, unsentmap)) {
|
||||
/* For the unsent_pass we:
|
||||
* discard partially sent pages
|
||||
* For the !unsent_pass (dirty) we:
|
||||
* discard partially dirty pages that were sent
|
||||
* (any partially sent pages were already discarded
|
||||
* by the previous unsent_pass)
|
||||
*/
|
||||
postcopy_discard_send_range(ms, fixup_start_addr, host_ratio);
|
||||
}
|
||||
|
||||
/* Clean up the bitmap */
|
||||
for (page = fixup_start_addr;
|
||||
page < fixup_start_addr + host_ratio; page++) {
|
||||
/* All pages in this host page are now not sent */
|
||||
set_bit(page, unsentmap);
|
||||
|
||||
/*
|
||||
* Remark them as dirty, updating the count for any pages
|
||||
* that weren't previously dirty.
|
||||
@ -3017,13 +2981,8 @@ static void postcopy_chunk_hostpages_pass(MigrationState *ms, bool unsent_pass,
|
||||
}
|
||||
}
|
||||
|
||||
if (unsent_pass) {
|
||||
/* Find the next sent page for the next iteration */
|
||||
run_start = find_next_zero_bit(unsentmap, pages, run_start);
|
||||
} else {
|
||||
/* Find the next dirty page for the next iteration */
|
||||
run_start = find_next_bit(bitmap, pages, run_start);
|
||||
}
|
||||
/* Find the next dirty page for the next iteration */
|
||||
run_start = find_next_bit(bitmap, pages, run_start);
|
||||
}
|
||||
}
|
||||
|
||||
@ -3045,13 +3004,10 @@ static int postcopy_chunk_hostpages(MigrationState *ms, RAMBlock *block)
|
||||
{
|
||||
postcopy_discard_send_init(ms, block->idstr);
|
||||
|
||||
/* First pass: Discard all partially sent host pages */
|
||||
postcopy_chunk_hostpages_pass(ms, true, block);
|
||||
/*
|
||||
* Second pass: Ensure that all partially dirty host pages are made
|
||||
* fully dirty.
|
||||
* Ensure that all partially dirty host pages are made fully dirty.
|
||||
*/
|
||||
postcopy_chunk_hostpages_pass(ms, false, block);
|
||||
postcopy_chunk_hostpages_pass(ms, block);
|
||||
|
||||
postcopy_discard_send_finish(ms);
|
||||
return 0;
|
||||
@ -3089,19 +3045,6 @@ int ram_postcopy_send_discard_bitmap(MigrationState *ms)
|
||||
rs->last_page = 0;
|
||||
|
||||
RAMBLOCK_FOREACH_NOT_IGNORED(block) {
|
||||
unsigned long pages = block->used_length >> TARGET_PAGE_BITS;
|
||||
unsigned long *bitmap = block->bmap;
|
||||
unsigned long *unsentmap = block->unsentmap;
|
||||
|
||||
if (!unsentmap) {
|
||||
/* We don't have a safe way to resize the sentmap, so
|
||||
* if the bitmap was resized it will be NULL at this
|
||||
* point.
|
||||
*/
|
||||
error_report("migration ram resized during precopy phase");
|
||||
rcu_read_unlock();
|
||||
return -EINVAL;
|
||||
}
|
||||
/* Deal with TPS != HPS and huge pages */
|
||||
ret = postcopy_chunk_hostpages(ms, block);
|
||||
if (ret) {
|
||||
@ -3109,12 +3052,9 @@ int ram_postcopy_send_discard_bitmap(MigrationState *ms)
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Update the unsentmap to be unsentmap = unsentmap | dirty
|
||||
*/
|
||||
bitmap_or(unsentmap, unsentmap, bitmap, pages);
|
||||
#ifdef DEBUG_POSTCOPY
|
||||
ram_debug_dump_bitmap(unsentmap, true, pages);
|
||||
ram_debug_dump_bitmap(block->bmap, true,
|
||||
block->used_length >> TARGET_PAGE_BITS);
|
||||
#endif
|
||||
}
|
||||
trace_ram_postcopy_send_discard_bitmap();
|
||||
@ -3282,10 +3222,6 @@ static void ram_list_init_bitmaps(void)
|
||||
bitmap_set(block->bmap, 0, pages);
|
||||
block->clear_bmap_shift = shift;
|
||||
block->clear_bmap = bitmap_new(clear_bmap_size(pages, shift));
|
||||
if (migrate_postcopy_ram()) {
|
||||
block->unsentmap = bitmap_new(pages);
|
||||
bitmap_set(block->unsentmap, 0, pages);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -3017,11 +3017,35 @@ static void qio_channel_rdma_set_aio_fd_handler(QIOChannel *ioc,
|
||||
}
|
||||
}
|
||||
|
||||
struct rdma_close_rcu {
|
||||
struct rcu_head rcu;
|
||||
RDMAContext *rdmain;
|
||||
RDMAContext *rdmaout;
|
||||
};
|
||||
|
||||
/* callback from qio_channel_rdma_close via call_rcu */
|
||||
static void qio_channel_rdma_close_rcu(struct rdma_close_rcu *rcu)
|
||||
{
|
||||
if (rcu->rdmain) {
|
||||
qemu_rdma_cleanup(rcu->rdmain);
|
||||
}
|
||||
|
||||
if (rcu->rdmaout) {
|
||||
qemu_rdma_cleanup(rcu->rdmaout);
|
||||
}
|
||||
|
||||
g_free(rcu->rdmain);
|
||||
g_free(rcu->rdmaout);
|
||||
g_free(rcu);
|
||||
}
|
||||
|
||||
static int qio_channel_rdma_close(QIOChannel *ioc,
|
||||
Error **errp)
|
||||
{
|
||||
QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(ioc);
|
||||
RDMAContext *rdmain, *rdmaout;
|
||||
struct rdma_close_rcu *rcu = g_new(struct rdma_close_rcu, 1);
|
||||
|
||||
trace_qemu_rdma_close();
|
||||
|
||||
rdmain = rioc->rdmain;
|
||||
@ -3034,18 +3058,9 @@ static int qio_channel_rdma_close(QIOChannel *ioc,
|
||||
atomic_rcu_set(&rioc->rdmaout, NULL);
|
||||
}
|
||||
|
||||
synchronize_rcu();
|
||||
|
||||
if (rdmain) {
|
||||
qemu_rdma_cleanup(rdmain);
|
||||
}
|
||||
|
||||
if (rdmaout) {
|
||||
qemu_rdma_cleanup(rdmaout);
|
||||
}
|
||||
|
||||
g_free(rdmain);
|
||||
g_free(rdmaout);
|
||||
rcu->rdmain = rdmain;
|
||||
rcu->rdmaout = rdmaout;
|
||||
call_rcu(rcu, qio_channel_rdma_close_rcu, rcu);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -3253,10 +3268,14 @@ static void rdma_cm_poll_handler(void *opaque)
|
||||
|
||||
if (cm_event->event == RDMA_CM_EVENT_DISCONNECTED ||
|
||||
cm_event->event == RDMA_CM_EVENT_DEVICE_REMOVAL) {
|
||||
error_report("receive cm event, cm event is %d", cm_event->event);
|
||||
rdma->error_state = -EPIPE;
|
||||
if (rdma->return_path) {
|
||||
rdma->return_path->error_state = -EPIPE;
|
||||
if (!rdma->error_state &&
|
||||
migration_incoming_get_current()->state !=
|
||||
MIGRATION_STATUS_COMPLETED) {
|
||||
error_report("receive cm event, cm event is %d", cm_event->event);
|
||||
rdma->error_state = -EPIPE;
|
||||
if (rdma->return_path) {
|
||||
rdma->return_path->error_state = -EPIPE;
|
||||
}
|
||||
}
|
||||
|
||||
if (mis->migration_incoming_co) {
|
||||
|
@ -1314,7 +1314,7 @@ int qemu_savevm_state_complete_precopy_non_iterable(QEMUFile *f,
|
||||
bool in_postcopy,
|
||||
bool inactivate_disks)
|
||||
{
|
||||
QJSON *vmdesc;
|
||||
g_autoptr(QJSON) vmdesc = NULL;
|
||||
int vmdesc_len;
|
||||
SaveStateEntry *se;
|
||||
int ret;
|
||||
@ -1375,7 +1375,6 @@ int qemu_savevm_state_complete_precopy_non_iterable(QEMUFile *f,
|
||||
qemu_put_be32(f, vmdesc_len);
|
||||
qemu_put_buffer(f, (uint8_t *)qjson_get_str(vmdesc), vmdesc_len);
|
||||
}
|
||||
qjson_destroy(vmdesc);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -76,7 +76,7 @@ qemu_file_fclose(void) ""
|
||||
|
||||
# ram.c
|
||||
get_queued_page(const char *block_name, uint64_t tmp_offset, unsigned long page_abs) "%s/0x%" PRIx64 " page_abs=0x%lx"
|
||||
get_queued_page_not_dirty(const char *block_name, uint64_t tmp_offset, unsigned long page_abs, int sent) "%s/0x%" PRIx64 " page_abs=0x%lx (sent=%d)"
|
||||
get_queued_page_not_dirty(const char *block_name, uint64_t tmp_offset, unsigned long page_abs) "%s/0x%" PRIx64 " page_abs=0x%lx"
|
||||
migration_bitmap_sync_start(void) ""
|
||||
migration_bitmap_sync_end(uint64_t dirty_pages) "dirty_pages %" PRIu64
|
||||
migration_bitmap_clear_dirty(char *str, uint64_t start, uint64_t size, unsigned long page) "rb %s start 0x%"PRIx64" size 0x%"PRIx64" page 0x%lx"
|
||||
|
@ -255,15 +255,19 @@ static void read_blocktime(QTestState *who)
|
||||
}
|
||||
|
||||
static void wait_for_migration_status(QTestState *who,
|
||||
const char *goal)
|
||||
const char *goal,
|
||||
const char **ungoals)
|
||||
{
|
||||
while (true) {
|
||||
bool completed;
|
||||
char *status;
|
||||
const char **ungoal;
|
||||
|
||||
status = migrate_query_status(who);
|
||||
completed = strcmp(status, goal) == 0;
|
||||
g_assert_cmpstr(status, !=, "failed");
|
||||
for (ungoal = ungoals; *ungoal; ungoal++) {
|
||||
g_assert_cmpstr(status, !=, *ungoal);
|
||||
}
|
||||
g_free(status);
|
||||
if (completed) {
|
||||
return;
|
||||
@ -274,7 +278,8 @@ static void wait_for_migration_status(QTestState *who,
|
||||
|
||||
static void wait_for_migration_complete(QTestState *who)
|
||||
{
|
||||
wait_for_migration_status(who, "completed");
|
||||
wait_for_migration_status(who, "completed",
|
||||
(const char * []) { "failed", NULL });
|
||||
}
|
||||
|
||||
static void wait_for_migration_pass(QTestState *who)
|
||||
@ -748,7 +753,7 @@ static int migrate_postcopy_prepare(QTestState **from_ptr,
|
||||
* quickly, but that it doesn't complete precopy even on a slow
|
||||
* machine, so also set the downtime.
|
||||
*/
|
||||
migrate_set_parameter_int(from, "max-bandwidth", 100000000);
|
||||
migrate_set_parameter_int(from, "max-bandwidth", 30000000);
|
||||
migrate_set_parameter_int(from, "downtime-limit", 1);
|
||||
|
||||
/* Wait for the first serial output from the source */
|
||||
@ -809,7 +814,9 @@ static void test_postcopy_recovery(void)
|
||||
* Wait until postcopy is really started; we can only run the
|
||||
* migrate-pause command during a postcopy
|
||||
*/
|
||||
wait_for_migration_status(from, "postcopy-active");
|
||||
wait_for_migration_status(from, "postcopy-active",
|
||||
(const char * []) { "failed",
|
||||
"completed", NULL });
|
||||
|
||||
/*
|
||||
* Manually stop the postcopy migration. This emulates a network
|
||||
@ -822,7 +829,9 @@ static void test_postcopy_recovery(void)
|
||||
* migrate-recover command can only succeed if destination machine
|
||||
* is in the paused state
|
||||
*/
|
||||
wait_for_migration_status(to, "postcopy-paused");
|
||||
wait_for_migration_status(to, "postcopy-paused",
|
||||
(const char * []) { "failed", "active",
|
||||
"completed", NULL });
|
||||
|
||||
/*
|
||||
* Create a new socket to emulate a new channel that is different
|
||||
@ -836,7 +845,9 @@ static void test_postcopy_recovery(void)
|
||||
* Try to rebuild the migration channel using the resume flag and
|
||||
* the newly created channel
|
||||
*/
|
||||
wait_for_migration_status(from, "postcopy-paused");
|
||||
wait_for_migration_status(from, "postcopy-paused",
|
||||
(const char * []) { "failed", "active",
|
||||
"completed", NULL });
|
||||
migrate(from, uri, "{'resume': true}");
|
||||
g_free(uri);
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user