Migration Pull

[dropped fabiano's patch on modifying cpu model for arm migration tests for
  now]
 
 - Fabiano's patchset to fix migration state references in BHs
 - Fabiano's new 'n-1' migration test for CI
 - Het's fix on making "uri" optional in QMP migrate cmd
 - Markus's HMP leak fix reported by Coverity
 - Paolo's cleanup on uffd to replace u64 usage
 - Peter's small migration cleanup series all over the places
 -----BEGIN PGP SIGNATURE-----
 
 iIgEABYKADAWIQS5GE3CDMRX2s990ak7X8zN86vXBgUCZbcVeBIccGV0ZXJ4QHJl
 ZGhhdC5jb20ACgkQO1/MzfOr1wYHjgD9F2Fnrf4EuPNC/gF3yUvHVz1mgHqevb/g
 pw/ThcJF31wBALuWmwuUaNWm+VNtRc10YH6bY7HZW8oa1RefRN6QZn0L
 =JGTX
 -----END PGP SIGNATURE-----

Merge tag 'migration-20240126-pull-request' of https://gitlab.com/peterx/qemu into staging

Migration Pull

[dropped fabiano's patch on modifying cpu model for arm migration tests for
 now]

- Fabiano's patchset to fix migration state references in BHs
- Fabiano's new 'n-1' migration test for CI
- Het's fix on making "uri" optional in QMP migrate cmd
- Markus's HMP leak fix reported by Coverity
- Paolo's cleanup on uffd to replace u64 usage
- Peter's small migration cleanup series all over the places

# -----BEGIN PGP SIGNATURE-----
#
# iIgEABYKADAWIQS5GE3CDMRX2s990ak7X8zN86vXBgUCZbcVeBIccGV0ZXJ4QHJl
# ZGhhdC5jb20ACgkQO1/MzfOr1wYHjgD9F2Fnrf4EuPNC/gF3yUvHVz1mgHqevb/g
# pw/ThcJF31wBALuWmwuUaNWm+VNtRc10YH6bY7HZW8oa1RefRN6QZn0L
# =JGTX
# -----END PGP SIGNATURE-----
# gpg: Signature made Mon 29 Jan 2024 03:03:20 GMT
# gpg:                using EDDSA key B9184DC20CC457DACF7DD1A93B5FCCCDF3ABD706
# gpg:                issuer "peterx@redhat.com"
# gpg: Good signature from "Peter Xu <xzpeter@gmail.com>" [marginal]
# gpg:                 aka "Peter Xu <peterx@redhat.com>" [marginal]
# gpg: WARNING: This key is not certified with sufficiently trusted signatures!
# gpg:          It is not certain that the signature belongs to the owner.
# Primary key fingerprint: B918 4DC2 0CC4 57DA CF7D  D1A9 3B5F CCCD F3AB D706

* tag 'migration-20240126-pull-request' of https://gitlab.com/peterx/qemu:
  Make 'uri' optional for migrate QAPI
  migration: Centralize BH creation and dispatch
  migration: Add a wrapper to qemu_bh_schedule
  migration: Reference migration state around loadvm_postcopy_handle_run_bh
  migration: Take reference to migration state around bg_migration_vm_start_bh
  migration: Fix use-after-free of migration state object
  migration/yank: Use channel features
  ci: Disable migration compatibility tests for aarch64
  ci: Add a migration compatibility test job
  analyze-migration.py: Remove trick on parsing ramblocks
  migration: Drop unnecessary check in ram's pending_exact()
  migration: Make threshold_size an uint64_t
  migration: Plug memory leak on HMP migrate error path
  userfaultfd: use 1ULL to build ioctl masks

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
Peter Maydell 2024-01-29 10:53:42 +00:00
commit e7390150e7
12 changed files with 134 additions and 78 deletions

View File

@ -167,6 +167,70 @@ build-system-centos:
x86_64-softmmu rx-softmmu sh4-softmmu nios2-softmmu
MAKE_CHECK_ARGS: check-build
# Previous QEMU release. Used for cross-version migration tests.
build-previous-qemu:
extends: .native_build_job_template
artifacts:
when: on_success
expire_in: 2 days
paths:
- build-previous
exclude:
- build-previous/**/*.p
- build-previous/**/*.a.p
- build-previous/**/*.fa.p
- build-previous/**/*.c.o
- build-previous/**/*.c.o.d
- build-previous/**/*.fa
needs:
job: amd64-opensuse-leap-container
variables:
IMAGE: opensuse-leap
TARGETS: x86_64-softmmu aarch64-softmmu
before_script:
- export QEMU_PREV_VERSION="$(sed 's/\([0-9.]*\)\.[0-9]*/v\1.0/' VERSION)"
- git checkout $QEMU_PREV_VERSION
after_script:
- mv build build-previous
.migration-compat-common:
extends: .common_test_job_template
needs:
- job: build-previous-qemu
- job: build-system-opensuse
# The old QEMU could have bugs unrelated to migration that are
# already fixed in the current development branch, so this test
# might fail.
allow_failure: true
variables:
IMAGE: opensuse-leap
MAKE_CHECK_ARGS: check-build
script:
# Use the migration-tests from the older QEMU tree. This avoids
# testing an old QEMU against new features/tests that it is not
# compatible with.
- cd build-previous
# old to new
- QTEST_QEMU_BINARY_SRC=./qemu-system-${TARGET}
QTEST_QEMU_BINARY=../build/qemu-system-${TARGET} ./tests/qtest/migration-test
# new to old
- QTEST_QEMU_BINARY_DST=./qemu-system-${TARGET}
QTEST_QEMU_BINARY=../build/qemu-system-${TARGET} ./tests/qtest/migration-test
# This job is disabled until we release 9.0. The existing
# migration-test in 8.2 is broken on aarch64. The fix was already
# commited, but it will only take effect once 9.0 is out.
migration-compat-aarch64:
extends: .migration-compat-common
variables:
TARGET: aarch64
QEMU_JOB_SKIPPED: 1
migration-compat-x86_64:
extends: .migration-compat-common
variables:
TARGET: x86_64
check-system-centos:
extends: .native_test_job_template
needs:

View File

@ -764,7 +764,7 @@ void hmp_migrate(Monitor *mon, const QDict *qdict)
bool resume = qdict_get_try_bool(qdict, "resume", false);
const char *uri = qdict_get_str(qdict, "uri");
Error *err = NULL;
MigrationChannelList *caps = NULL;
g_autoptr(MigrationChannelList) caps = NULL;
g_autoptr(MigrationChannel) channel = NULL;
if (inc) {
@ -789,8 +789,6 @@ void hmp_migrate(Monitor *mon, const QDict *qdict)
return;
}
qapi_free_MigrationChannelList(caps);
if (!detach) {
HMPMigrationStatus *status;

View File

@ -199,6 +199,47 @@ void migration_object_init(void)
dirty_bitmap_mig_init();
}
typedef struct {
QEMUBH *bh;
QEMUBHFunc *cb;
void *opaque;
} MigrationBH;
static void migration_bh_dispatch_bh(void *opaque)
{
MigrationState *s = migrate_get_current();
MigrationBH *migbh = opaque;
/* cleanup this BH */
qemu_bh_delete(migbh->bh);
migbh->bh = NULL;
/* dispatch the other one */
migbh->cb(migbh->opaque);
object_unref(OBJECT(s));
g_free(migbh);
}
void migration_bh_schedule(QEMUBHFunc *cb, void *opaque)
{
MigrationState *s = migrate_get_current();
MigrationBH *migbh = g_new0(MigrationBH, 1);
QEMUBH *bh = qemu_bh_new(migration_bh_dispatch_bh, migbh);
/* Store these to dispatch when the BH runs */
migbh->bh = bh;
migbh->cb = cb;
migbh->opaque = opaque;
/*
* Ref the state for bh, because it may be called when
* there're already no other refs
*/
object_ref(OBJECT(s));
qemu_bh_schedule(bh);
}
void migration_cancel(const Error *error)
{
if (error) {
@ -646,7 +687,6 @@ static void process_incoming_migration_bh(void *opaque)
*/
migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
MIGRATION_STATUS_COMPLETED);
qemu_bh_delete(mis->bh);
migration_incoming_state_destroy();
}
@ -712,8 +752,7 @@ process_incoming_migration_co(void *opaque)
goto fail;
}
mis->bh = qemu_bh_new(process_incoming_migration_bh, mis);
qemu_bh_schedule(mis->bh);
migration_bh_schedule(process_incoming_migration_bh, mis);
return;
fail:
migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
@ -1274,9 +1313,6 @@ void migrate_set_state(int *state, int old_state, int new_state)
static void migrate_fd_cleanup(MigrationState *s)
{
qemu_bh_delete(s->cleanup_bh);
s->cleanup_bh = NULL;
g_free(s->hostname);
s->hostname = NULL;
json_writer_free(s->vmdesc);
@ -1330,21 +1366,9 @@ static void migrate_fd_cleanup(MigrationState *s)
yank_unregister_instance(MIGRATION_YANK_INSTANCE);
}
static void migrate_fd_cleanup_schedule(MigrationState *s)
{
/*
* Ref the state for bh, because it may be called when
* there're already no other refs
*/
object_ref(OBJECT(s));
qemu_bh_schedule(s->cleanup_bh);
}
static void migrate_fd_cleanup_bh(void *opaque)
{
MigrationState *s = opaque;
migrate_fd_cleanup(s);
object_unref(OBJECT(s));
migrate_fd_cleanup(opaque);
}
void migrate_set_error(MigrationState *s, const Error *error)
@ -1567,8 +1591,6 @@ int migrate_init(MigrationState *s, Error **errp)
* parameters/capabilities that the user set, and
* locks.
*/
s->cleanup_bh = 0;
s->vm_start_bh = 0;
s->to_dst_file = NULL;
s->state = MIGRATION_STATUS_NONE;
s->rp_state.from_dst_file = NULL;
@ -3138,7 +3160,8 @@ static void migration_iteration_finish(MigrationState *s)
error_report("%s: Unknown ending state %d", __func__, s->state);
break;
}
migrate_fd_cleanup_schedule(s);
migration_bh_schedule(migrate_fd_cleanup_bh, s);
bql_unlock();
}
@ -3169,7 +3192,7 @@ static void bg_migration_iteration_finish(MigrationState *s)
break;
}
migrate_fd_cleanup_schedule(s);
migration_bh_schedule(migrate_fd_cleanup_bh, s);
bql_unlock();
}
@ -3375,9 +3398,6 @@ static void bg_migration_vm_start_bh(void *opaque)
{
MigrationState *s = opaque;
qemu_bh_delete(s->vm_start_bh);
s->vm_start_bh = NULL;
vm_resume(s->vm_old_state);
migration_downtime_end(s);
}
@ -3483,9 +3503,7 @@ static void *bg_migration_thread(void *opaque)
* calling VM state change notifiers from vm_start() would initiate
* writes to virtio VQs memory which is in write-protected region.
*/
s->vm_start_bh = qemu_bh_new(bg_migration_vm_start_bh, s);
qemu_bh_schedule(s->vm_start_bh);
migration_bh_schedule(bg_migration_vm_start_bh, s);
bql_unlock();
while (migration_is_active(s)) {
@ -3541,12 +3559,6 @@ void migrate_fd_connect(MigrationState *s, Error *error_in)
migrate_error_free(s);
s->expected_downtime = migrate_downtime_limit();
if (resume) {
assert(s->cleanup_bh);
} else {
assert(!s->cleanup_bh);
s->cleanup_bh = qemu_bh_new(migrate_fd_cleanup_bh, s);
}
if (error_in) {
migrate_fd_error(s, error_in);
if (resume) {

View File

@ -159,8 +159,6 @@ struct MigrationIncomingState {
/* PostCopyFD's for external userfaultfds & handlers of shared memory */
GArray *postcopy_remote_fds;
QEMUBH *bh;
int state;
/*
@ -255,8 +253,6 @@ struct MigrationState {
/*< public >*/
QemuThread thread;
QEMUBH *vm_start_bh;
QEMUBH *cleanup_bh;
/* Protected by qemu_file_lock */
QEMUFile *to_dst_file;
/* Postcopy specific transfer channel */
@ -296,7 +292,7 @@ struct MigrationState {
* this threshold; it's calculated from the requested downtime and
* measured bandwidth, or avail-switchover-bandwidth if specified.
*/
int64_t threshold_size;
uint64_t threshold_size;
/* params from 'migrate-set-parameters' */
MigrationParameters parameters;
@ -528,6 +524,7 @@ int foreach_not_ignored_block(RAMBlockIterFunc func, void *opaque);
void migration_make_urgent_request(void);
void migration_consume_urgent_request(void);
bool migration_rate_limit(void);
void migration_bh_schedule(QEMUBHFunc *cb, void *opaque);
void migration_cancel(const Error *error);
void migration_populate_vfio_info(MigrationInfo *info);

View File

@ -102,11 +102,9 @@ void postcopy_thread_create(MigrationIncomingState *mis,
* are target OS specific.
*/
#if defined(__linux__)
#include <poll.h>
#include <sys/ioctl.h>
#include <sys/syscall.h>
#include <asm/types.h> /* for __u64 */
#endif
#if defined(__linux__) && defined(__NR_userfaultfd) && defined(CONFIG_EVENTFD)
@ -272,8 +270,8 @@ static bool request_ufd_features(int ufd, uint64_t features)
return false;
}
ioctl_mask = (__u64)1 << _UFFDIO_REGISTER |
(__u64)1 << _UFFDIO_UNREGISTER;
ioctl_mask = 1ULL << _UFFDIO_REGISTER |
1ULL << _UFFDIO_UNREGISTER;
if ((api_struct.ioctls & ioctl_mask) != ioctl_mask) {
error_report("Missing userfault features: %" PRIx64,
(uint64_t)(~api_struct.ioctls & ioctl_mask));
@ -462,9 +460,9 @@ bool postcopy_ram_supported_by_host(MigrationIncomingState *mis, Error **errp)
goto out;
}
feature_mask = (__u64)1 << _UFFDIO_WAKE |
(__u64)1 << _UFFDIO_COPY |
(__u64)1 << _UFFDIO_ZEROPAGE;
feature_mask = 1ULL << _UFFDIO_WAKE |
1ULL << _UFFDIO_COPY |
1ULL << _UFFDIO_ZEROPAGE;
if ((reg_struct.ioctls & feature_mask) != feature_mask) {
error_setg(errp, "Missing userfault map features: %" PRIx64,
(uint64_t)(~reg_struct.ioctls & feature_mask));
@ -733,11 +731,11 @@ static int ram_block_enable_notify(RAMBlock *rb, void *opaque)
error_report("%s userfault register: %s", __func__, strerror(errno));
return -1;
}
if (!(reg_struct.ioctls & ((__u64)1 << _UFFDIO_COPY))) {
if (!(reg_struct.ioctls & (1ULL << _UFFDIO_COPY))) {
error_report("%s userfault: Region doesn't support COPY", __func__);
return -1;
}
if (reg_struct.ioctls & ((__u64)1 << _UFFDIO_ZEROPAGE)) {
if (reg_struct.ioctls & (1ULL << _UFFDIO_ZEROPAGE)) {
qemu_ram_set_uf_zeroable(rb);
}

View File

@ -3213,21 +3213,20 @@ static void ram_state_pending_estimate(void *opaque, uint64_t *must_precopy,
static void ram_state_pending_exact(void *opaque, uint64_t *must_precopy,
uint64_t *can_postcopy)
{
MigrationState *s = migrate_get_current();
RAMState **temp = opaque;
RAMState *rs = *temp;
uint64_t remaining_size;
uint64_t remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE;
if (!migration_in_postcopy() && remaining_size < s->threshold_size) {
if (!migration_in_postcopy()) {
bql_lock();
WITH_RCU_READ_LOCK_GUARD() {
migration_bitmap_sync_precopy(rs, false);
}
bql_unlock();
remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE;
}
remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE;
if (migrate_postcopy_ram()) {
/* We can do postcopy, and all the data is postcopiable */
*can_postcopy += remaining_size;

View File

@ -2171,8 +2171,6 @@ static void loadvm_postcopy_handle_run_bh(void *opaque)
runstate_set(RUN_STATE_PAUSED);
}
qemu_bh_delete(mis->bh);
trace_vmstate_downtime_checkpoint("dst-postcopy-bh-vm-started");
}
@ -2188,8 +2186,7 @@ static int loadvm_postcopy_handle_run(MigrationIncomingState *mis)
}
postcopy_state_set(POSTCOPY_INCOMING_RUNNING);
mis->bh = qemu_bh_new(loadvm_postcopy_handle_run_bh, mis);
qemu_bh_schedule(mis->bh);
migration_bh_schedule(loadvm_postcopy_handle_run_bh, mis);
/* We need to finish reading the stream from the package
* and also stop reading anything more from the stream that loaded the

View File

@ -8,12 +8,9 @@
*/
#include "qemu/osdep.h"
#include "qapi/error.h"
#include "io/channel.h"
#include "yank_functions.h"
#include "qemu/yank.h"
#include "io/channel-socket.h"
#include "io/channel-tls.h"
#include "qemu-file.h"
void migration_yank_iochannel(void *opaque)
@ -26,8 +23,7 @@ void migration_yank_iochannel(void *opaque)
/* Return whether yank is supported on this ioc */
static bool migration_ioc_yank_supported(QIOChannel *ioc)
{
return object_dynamic_cast(OBJECT(ioc), TYPE_QIO_CHANNEL_SOCKET) ||
object_dynamic_cast(OBJECT(ioc), TYPE_QIO_CHANNEL_TLS);
return qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_SHUTDOWN);
}
void migration_ioc_register_yank(QIOChannel *ioc)

View File

@ -1757,7 +1757,7 @@
#
##
{ 'command': 'migrate',
'data': {'uri': 'str',
'data': {'*uri': 'str',
'*channels': [ 'MigrationChannel' ],
'*blk': { 'type': 'bool', 'features': [ 'deprecated' ] },
'*inc': { 'type': 'bool', 'features': [ 'deprecated' ] },

View File

@ -151,17 +151,12 @@ class RamSection(object):
addr &= ~(self.TARGET_PAGE_SIZE - 1)
if flags & self.RAM_SAVE_FLAG_MEM_SIZE:
while True:
total_length = addr
while total_length > 0:
namelen = self.file.read8()
# We assume that no RAM chunk is big enough to ever
# hit the first byte of the address, so when we see
# a zero here we know it has to be an address, not the
# length of the next block.
if namelen == 0:
self.file.file.seek(-1, 1)
break
self.name = self.file.readstr(len = namelen)
len = self.file.read64()
total_length -= len
self.sizeinfo[self.name] = '0x%016x' % len
if self.write_memory:
print(self.name)

View File

@ -684,7 +684,7 @@ generate_faults(VuDev *dev) {
dev->postcopy_ufd, strerror(errno));
return false;
}
if (!(reg_struct.ioctls & ((__u64)1 << _UFFDIO_COPY))) {
if (!(reg_struct.ioctls & (1ULL << _UFFDIO_COPY))) {
vu_panic(dev, "%s Region (%d) doesn't support COPY",
__func__, i);
return false;

View File

@ -104,8 +104,8 @@ static bool ufd_version_check(void)
}
uffd_feature_thread_id = api_struct.features & UFFD_FEATURE_THREAD_ID;
ioctl_mask = (__u64)1 << _UFFDIO_REGISTER |
(__u64)1 << _UFFDIO_UNREGISTER;
ioctl_mask = 1ULL << _UFFDIO_REGISTER |
1ULL << _UFFDIO_UNREGISTER;
if ((api_struct.ioctls & ioctl_mask) != ioctl_mask) {
g_test_message("Skipping test: Missing userfault feature");
return false;