From 99c3ac6dbe18cdab3686a830d129f8fa586846bb Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Thu, 1 Apr 2021 15:58:45 +0100 Subject: [PATCH 1/6] virtiofsd: Fix security.capability comparison My security fix for the security.capability remap has a silly early segfault in a simple case where there is an xattrmapping but it doesn't remap the security.capability. Fixes: e586edcb41054 ("virtiofs: drop remapped security.capability xattr as needed") Signed-off-by: Dr. David Alan Gilbert Message-Id: <20210401145845.78445-1-dgilbert@redhat.com> Reviewed-by: Connor Kuehl Signed-off-by: Dr. David Alan Gilbert --- tools/virtiofsd/passthrough_ll.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c index b144320e48..1553d2ef45 100644 --- a/tools/virtiofsd/passthrough_ll.c +++ b/tools/virtiofsd/passthrough_ll.c @@ -2636,7 +2636,8 @@ static void parse_xattrmap(struct lo_data *lo) strerror(ret)); exit(1); } - if (!strcmp(lo->xattr_security_capability, "security.capability")) { + if (!lo->xattr_security_capability || + !strcmp(lo->xattr_security_capability, "security.capability")) { /* 1-1 mapping, don't need to do anything */ free(lo->xattr_security_capability); lo->xattr_security_capability = NULL; From ecb23efea0899be6723f4ea9636de5cf7de90cfe Mon Sep 17 00:00:00 2001 From: Andrey Gruzdev Date: Thu, 1 Apr 2021 12:22:23 +0300 Subject: [PATCH 2/6] migration: Fix missing qemu_fflush() on buffer file in bg_migration_thread Added missing qemu_fflush() on buffer file holding precopy device state. Increased initial QIOChannelBuffer allocation to 512KB to avoid reallocs. Typical configurations often require >200KB for device state and VMDESC. Fixes: 8518278a6af589ccc401f06e35f171b1e6fae800 (migration: implementation of background snapshot thread) Signed-off-by: Andrey Gruzdev Message-Id: <20210401092226.102804-2-andrey.gruzdev@virtuozzo.com> Reviewed-by: Dr. David Alan Gilbert Signed-off-by: Dr. David Alan Gilbert --- migration/migration.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/migration/migration.c b/migration/migration.c index ca8b97baa5..00e13f9d58 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -3812,7 +3812,7 @@ static void *bg_migration_thread(void *opaque) * with vCPUs running and, finally, write stashed non-RAM part of * the vmstate from the buffer to the migration stream. */ - s->bioc = qio_channel_buffer_new(128 * 1024); + s->bioc = qio_channel_buffer_new(512 * 1024); qio_channel_set_name(QIO_CHANNEL(s->bioc), "vmstate-buffer"); fb = qemu_fopen_channel_output(QIO_CHANNEL(s->bioc)); object_unref(OBJECT(s->bioc)); @@ -3866,6 +3866,12 @@ static void *bg_migration_thread(void *opaque) if (qemu_savevm_state_complete_precopy_non_iterable(fb, false, false)) { goto fail; } + /* + * Since we are going to get non-iterable state data directly + * from s->bioc->data, explicit flush is needed here. + */ + qemu_fflush(fb); + /* Now initialize UFFD context and start tracking RAM writes */ if (ram_write_tracking_start()) { goto fail; From 1a8e44a89f1976e06300393337f78d561f95b339 Mon Sep 17 00:00:00 2001 From: Andrey Gruzdev Date: Thu, 1 Apr 2021 12:22:24 +0300 Subject: [PATCH 3/6] migration: Inhibit virtio-balloon for the duration of background snapshot The same thing as for incoming postcopy - we cannot deal with concurrent RAM discards when using background snapshot feature in outgoing migration. Fixes: 8518278a6af589ccc401f06e35f171b1e6fae800 (migration: implementation of background snapshot thread) Signed-off-by: Andrey Gruzdev Reported-by: David Hildenbrand Reviewed-by: David Hildenbrand Message-Id: <20210401092226.102804-3-andrey.gruzdev@virtuozzo.com> Signed-off-by: Dr. David Alan Gilbert --- hw/virtio/virtio-balloon.c | 8 ++++++-- include/migration/misc.h | 2 ++ migration/migration.c | 8 ++++++++ 3 files changed, 16 insertions(+), 2 deletions(-) diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c index e770955176..d120bf8f43 100644 --- a/hw/virtio/virtio-balloon.c +++ b/hw/virtio/virtio-balloon.c @@ -66,8 +66,12 @@ static bool virtio_balloon_pbp_matches(PartiallyBalloonedPage *pbp, static bool virtio_balloon_inhibited(void) { - /* Postcopy cannot deal with concurrent discards, so it's special. */ - return ram_block_discard_is_disabled() || migration_in_incoming_postcopy(); + /* + * Postcopy cannot deal with concurrent discards, + * so it's special, as well as background snapshots. + */ + return ram_block_discard_is_disabled() || migration_in_incoming_postcopy() || + migration_in_bg_snapshot(); } static void balloon_inflate_page(VirtIOBalloon *balloon, diff --git a/include/migration/misc.h b/include/migration/misc.h index bccc1b6b44..738675ef52 100644 --- a/include/migration/misc.h +++ b/include/migration/misc.h @@ -70,6 +70,8 @@ bool migration_in_postcopy_after_devices(MigrationState *); void migration_global_dump(Monitor *mon); /* True if incomming migration entered POSTCOPY_INCOMING_DISCARD */ bool migration_in_incoming_postcopy(void); +/* True if background snapshot is active */ +bool migration_in_bg_snapshot(void); /* migration/block-dirty-bitmap.c */ void dirty_bitmap_mig_init(void); diff --git a/migration/migration.c b/migration/migration.c index 00e13f9d58..be4729e7c8 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -1976,6 +1976,14 @@ bool migration_in_incoming_postcopy(void) return ps >= POSTCOPY_INCOMING_DISCARD && ps < POSTCOPY_INCOMING_END; } +bool migration_in_bg_snapshot(void) +{ + MigrationState *s = migrate_get_current(); + + return migrate_background_snapshot() && + migration_is_setup_or_active(s->state); +} + bool migration_is_idle(void) { MigrationState *s = current_migration; From eeccb99c9d28484303f721e94c5084e9c29a3d03 Mon Sep 17 00:00:00 2001 From: Andrey Gruzdev Date: Thu, 1 Apr 2021 12:22:25 +0300 Subject: [PATCH 4/6] migration: Pre-fault memory before starting background snasphot This commit solves the issue with userfault_fd WP feature that background snapshot is based on. For any never poluated or discarded memory page, the UFFDIO_WRITEPROTECT ioctl() would skip updating PTE for that page, thereby loosing WP setting for it. So we need to pre-fault pages for each RAM block to be protected before making a userfault_fd wr-protect ioctl(). Fixes: 278e2f551a095b234de74dca9c214d5502a1f72c (migration: support UFFD write fault processing in ram_save_iterate()) Signed-off-by: Andrey Gruzdev Reported-by: David Hildenbrand Reviewed-by: David Hildenbrand Message-Id: <20210401092226.102804-4-andrey.gruzdev@virtuozzo.com> Signed-off-by: Dr. David Alan Gilbert dgilbert: Bodged ifdef __linux__ on ram_write_tracking_prepare, should really go in a stub --- migration/migration.c | 8 +++++++ migration/ram.c | 49 +++++++++++++++++++++++++++++++++++++++++++ migration/ram.h | 1 + 3 files changed, 58 insertions(+) diff --git a/migration/migration.c b/migration/migration.c index be4729e7c8..8ca034136b 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -3827,6 +3827,14 @@ static void *bg_migration_thread(void *opaque) update_iteration_initial_status(s); + /* + * Prepare for tracking memory writes with UFFD-WP - populate + * RAM pages before protecting. + */ +#ifdef __linux__ + ram_write_tracking_prepare(); +#endif + qemu_savevm_state_header(s->to_dst_file); qemu_savevm_state_setup(s->to_dst_file); diff --git a/migration/ram.c b/migration/ram.c index 40e78952ad..7e2bc0fdd3 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -1560,6 +1560,55 @@ out: return ret; } +/* + * ram_block_populate_pages: populate memory in the RAM block by reading + * an integer from the beginning of each page. + * + * Since it's solely used for userfault_fd WP feature, here we just + * hardcode page size to qemu_real_host_page_size. + * + * @bs: RAM block to populate + */ +static void ram_block_populate_pages(RAMBlock *bs) +{ + char *ptr = (char *) bs->host; + + for (ram_addr_t offset = 0; offset < bs->used_length; + offset += qemu_real_host_page_size) { + char tmp = *(ptr + offset); + + /* Don't optimize the read out */ + asm volatile("" : "+r" (tmp)); + } +} + +/* + * ram_write_tracking_prepare: prepare for UFFD-WP memory tracking + */ +void ram_write_tracking_prepare(void) +{ + RAMBlock *bs; + + RCU_READ_LOCK_GUARD(); + + RAMBLOCK_FOREACH_NOT_IGNORED(bs) { + /* Nothing to do with read-only and MMIO-writable regions */ + if (bs->mr->readonly || bs->mr->rom_device) { + continue; + } + + /* + * Populate pages of the RAM block before enabling userfault_fd + * write protection. + * + * This stage is required since ioctl(UFFDIO_WRITEPROTECT) with + * UFFDIO_WRITEPROTECT_MODE_WP mode setting would silently skip + * pages with pte_none() entries in page table. + */ + ram_block_populate_pages(bs); + } +} + /* * ram_write_tracking_start: start UFFD-WP memory tracking * diff --git a/migration/ram.h b/migration/ram.h index 6378bb3ebc..4833e9fd5b 100644 --- a/migration/ram.h +++ b/migration/ram.h @@ -82,6 +82,7 @@ void colo_incoming_start_dirty_log(void); /* Background snapshot */ bool ram_write_tracking_available(void); bool ram_write_tracking_compatible(void); +void ram_write_tracking_prepare(void); int ram_write_tracking_start(void); void ram_write_tracking_stop(void); From 82ea3e3b9911ae05fcd9de5b8958795b9316cc83 Mon Sep 17 00:00:00 2001 From: Andrey Gruzdev Date: Thu, 1 Apr 2021 12:22:26 +0300 Subject: [PATCH 5/6] migration: Rename 'bs' to 'block' in background snapshot code Rename 'bs' to commonly used 'block' in migration/ram.c background snapshot code. Signed-off-by: Andrey Gruzdev Reported-by: David Hildenbrand Message-Id: <20210401092226.102804-5-andrey.gruzdev@virtuozzo.com> Reviewed-by: Dr. David Alan Gilbert Signed-off-by: Dr. David Alan Gilbert --- migration/ram.c | 86 +++++++++++++++++++++++++------------------------ 1 file changed, 44 insertions(+), 42 deletions(-) diff --git a/migration/ram.c b/migration/ram.c index 7e2bc0fdd3..4682f3625c 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -1455,7 +1455,7 @@ static RAMBlock *poll_fault_page(RAMState *rs, ram_addr_t *offset) { struct uffd_msg uffd_msg; void *page_address; - RAMBlock *bs; + RAMBlock *block; int res; if (!migrate_background_snapshot()) { @@ -1468,9 +1468,9 @@ static RAMBlock *poll_fault_page(RAMState *rs, ram_addr_t *offset) } page_address = (void *)(uintptr_t) uffd_msg.arg.pagefault.address; - bs = qemu_ram_block_from_host(page_address, false, offset); - assert(bs && (bs->flags & RAM_UF_WRITEPROTECT) != 0); - return bs; + block = qemu_ram_block_from_host(page_address, false, offset); + assert(block && (block->flags & RAM_UF_WRITEPROTECT) != 0); + return block; } /** @@ -1526,7 +1526,7 @@ bool ram_write_tracking_compatible(void) { const uint64_t uffd_ioctls_mask = BIT(_UFFDIO_WRITEPROTECT); int uffd_fd; - RAMBlock *bs; + RAMBlock *block; bool ret = false; /* Open UFFD file descriptor */ @@ -1537,15 +1537,15 @@ bool ram_write_tracking_compatible(void) RCU_READ_LOCK_GUARD(); - RAMBLOCK_FOREACH_NOT_IGNORED(bs) { + RAMBLOCK_FOREACH_NOT_IGNORED(block) { uint64_t uffd_ioctls; /* Nothing to do with read-only and MMIO-writable regions */ - if (bs->mr->readonly || bs->mr->rom_device) { + if (block->mr->readonly || block->mr->rom_device) { continue; } /* Try to register block memory via UFFD-IO to track writes */ - if (uffd_register_memory(uffd_fd, bs->host, bs->max_length, + if (uffd_register_memory(uffd_fd, block->host, block->max_length, UFFDIO_REGISTER_MODE_WP, &uffd_ioctls)) { goto out; } @@ -1567,13 +1567,13 @@ out: * Since it's solely used for userfault_fd WP feature, here we just * hardcode page size to qemu_real_host_page_size. * - * @bs: RAM block to populate + * @block: RAM block to populate */ -static void ram_block_populate_pages(RAMBlock *bs) +static void ram_block_populate_pages(RAMBlock *block) { - char *ptr = (char *) bs->host; + char *ptr = (char *) block->host; - for (ram_addr_t offset = 0; offset < bs->used_length; + for (ram_addr_t offset = 0; offset < block->used_length; offset += qemu_real_host_page_size) { char tmp = *(ptr + offset); @@ -1587,13 +1587,13 @@ static void ram_block_populate_pages(RAMBlock *bs) */ void ram_write_tracking_prepare(void) { - RAMBlock *bs; + RAMBlock *block; RCU_READ_LOCK_GUARD(); - RAMBLOCK_FOREACH_NOT_IGNORED(bs) { + RAMBLOCK_FOREACH_NOT_IGNORED(block) { /* Nothing to do with read-only and MMIO-writable regions */ - if (bs->mr->readonly || bs->mr->rom_device) { + if (block->mr->readonly || block->mr->rom_device) { continue; } @@ -1605,7 +1605,7 @@ void ram_write_tracking_prepare(void) * UFFDIO_WRITEPROTECT_MODE_WP mode setting would silently skip * pages with pte_none() entries in page table. */ - ram_block_populate_pages(bs); + ram_block_populate_pages(block); } } @@ -1618,7 +1618,7 @@ int ram_write_tracking_start(void) { int uffd_fd; RAMState *rs = ram_state; - RAMBlock *bs; + RAMBlock *block; /* Open UFFD file descriptor */ uffd_fd = uffd_create_fd(UFFD_FEATURE_PAGEFAULT_FLAG_WP, true); @@ -1629,27 +1629,27 @@ int ram_write_tracking_start(void) RCU_READ_LOCK_GUARD(); - RAMBLOCK_FOREACH_NOT_IGNORED(bs) { + RAMBLOCK_FOREACH_NOT_IGNORED(block) { /* Nothing to do with read-only and MMIO-writable regions */ - if (bs->mr->readonly || bs->mr->rom_device) { + if (block->mr->readonly || block->mr->rom_device) { continue; } /* Register block memory with UFFD to track writes */ - if (uffd_register_memory(rs->uffdio_fd, bs->host, - bs->max_length, UFFDIO_REGISTER_MODE_WP, NULL)) { + if (uffd_register_memory(rs->uffdio_fd, block->host, + block->max_length, UFFDIO_REGISTER_MODE_WP, NULL)) { goto fail; } /* Apply UFFD write protection to the block memory range */ - if (uffd_change_protection(rs->uffdio_fd, bs->host, - bs->max_length, true, false)) { + if (uffd_change_protection(rs->uffdio_fd, block->host, + block->max_length, true, false)) { goto fail; } - bs->flags |= RAM_UF_WRITEPROTECT; - memory_region_ref(bs->mr); + block->flags |= RAM_UF_WRITEPROTECT; + memory_region_ref(block->mr); - trace_ram_write_tracking_ramblock_start(bs->idstr, bs->page_size, - bs->host, bs->max_length); + trace_ram_write_tracking_ramblock_start(block->idstr, block->page_size, + block->host, block->max_length); } return 0; @@ -1657,19 +1657,20 @@ int ram_write_tracking_start(void) fail: error_report("ram_write_tracking_start() failed: restoring initial memory state"); - RAMBLOCK_FOREACH_NOT_IGNORED(bs) { - if ((bs->flags & RAM_UF_WRITEPROTECT) == 0) { + RAMBLOCK_FOREACH_NOT_IGNORED(block) { + if ((block->flags & RAM_UF_WRITEPROTECT) == 0) { continue; } /* * In case some memory block failed to be write-protected * remove protection and unregister all succeeded RAM blocks */ - uffd_change_protection(rs->uffdio_fd, bs->host, bs->max_length, false, false); - uffd_unregister_memory(rs->uffdio_fd, bs->host, bs->max_length); + uffd_change_protection(rs->uffdio_fd, block->host, block->max_length, + false, false); + uffd_unregister_memory(rs->uffdio_fd, block->host, block->max_length); /* Cleanup flags and remove reference */ - bs->flags &= ~RAM_UF_WRITEPROTECT; - memory_region_unref(bs->mr); + block->flags &= ~RAM_UF_WRITEPROTECT; + memory_region_unref(block->mr); } uffd_close_fd(uffd_fd); @@ -1683,24 +1684,25 @@ fail: void ram_write_tracking_stop(void) { RAMState *rs = ram_state; - RAMBlock *bs; + RAMBlock *block; RCU_READ_LOCK_GUARD(); - RAMBLOCK_FOREACH_NOT_IGNORED(bs) { - if ((bs->flags & RAM_UF_WRITEPROTECT) == 0) { + RAMBLOCK_FOREACH_NOT_IGNORED(block) { + if ((block->flags & RAM_UF_WRITEPROTECT) == 0) { continue; } /* Remove protection and unregister all affected RAM blocks */ - uffd_change_protection(rs->uffdio_fd, bs->host, bs->max_length, false, false); - uffd_unregister_memory(rs->uffdio_fd, bs->host, bs->max_length); + uffd_change_protection(rs->uffdio_fd, block->host, block->max_length, + false, false); + uffd_unregister_memory(rs->uffdio_fd, block->host, block->max_length); - trace_ram_write_tracking_ramblock_stop(bs->idstr, bs->page_size, - bs->host, bs->max_length); + trace_ram_write_tracking_ramblock_stop(block->idstr, block->page_size, + block->host, block->max_length); /* Cleanup flags and remove reference */ - bs->flags &= ~RAM_UF_WRITEPROTECT; - memory_region_unref(bs->mr); + block->flags &= ~RAM_UF_WRITEPROTECT; + memory_region_unref(block->mr); } /* Finally close UFFD file descriptor */ From e999fa47b220274082cb238d5ccb2c9bacd42bf1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hyman=20Huang=28=E9=BB=84=E5=8B=87=29?= Date: Tue, 23 Mar 2021 23:43:58 +0800 Subject: [PATCH 6/6] tests/migration: fix parameter of auto-converge migration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit when execute the following test command: $ ./guestperf-batch.py --auto-converge \ --auto-converge-step {percent} ... test aborts and error message be throwed as the following: "Parameter 'x-cpu-throttle-increment' is unexpected" The reason is that 'x-cpu-throttle-increment' has been deprecated and 'cpu-throttle-increment' was introduced Since v2.7. Use the new parameter instead. Signed-off-by: Hyman Huang(黄勇) Message-Id: <0195d34a317ce3cc417b3efd275e30cad35a7618.1616513998.git.huangy81@chinatelecom.cn> Reviewed-by: Dr. David Alan Gilbert Signed-off-by: Dr. David Alan Gilbert --- tests/migration/guestperf/engine.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/migration/guestperf/engine.py b/tests/migration/guestperf/engine.py index e399447940..6b49aed579 100644 --- a/tests/migration/guestperf/engine.py +++ b/tests/migration/guestperf/engine.py @@ -102,7 +102,7 @@ class Engine(object): info.get("downtime", 0), info.get("expected-downtime", 0), info.get("setup-time", 0), - info.get("x-cpu-throttle-percentage", 0), + info.get("cpu-throttle-percentage", 0), ) def _migrate(self, hardware, scenario, src, dst, connect_uri): @@ -135,7 +135,7 @@ class Engine(object): "state": True } ]) resp = src.command("migrate-set-parameters", - x_cpu_throttle_increment=scenario._auto_converge_step) + cpu_throttle_increment=scenario._auto_converge_step) if scenario._post_copy: resp = src.command("migrate-set-capabilities",