2008-11-12 00:33:36 +03:00
|
|
|
/*
|
|
|
|
* QEMU System Emulator
|
|
|
|
*
|
|
|
|
* Copyright (c) 2003-2008 Fabrice Bellard
|
2015-05-08 14:20:21 +03:00
|
|
|
* Copyright (c) 2009-2015 Red Hat Inc
|
|
|
|
*
|
|
|
|
* Authors:
|
|
|
|
* Juan Quintela <quintela@redhat.com>
|
2008-11-12 00:33:36 +03:00
|
|
|
*
|
|
|
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
|
|
* of this software and associated documentation files (the "Software"), to deal
|
|
|
|
* in the Software without restriction, including without limitation the rights
|
|
|
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
|
|
* copies of the Software, and to permit persons to whom the Software is
|
|
|
|
* furnished to do so, subject to the following conditions:
|
|
|
|
*
|
|
|
|
* The above copyright notice and this permission notice shall be included in
|
|
|
|
* all copies or substantial portions of the Software.
|
|
|
|
*
|
|
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
|
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
|
|
* THE SOFTWARE.
|
|
|
|
*/
|
|
|
|
|
2016-01-26 21:16:54 +03:00
|
|
|
#include "qemu/osdep.h"
|
2014-06-20 17:26:08 +04:00
|
|
|
#include "hw/boards.h"
|
2012-10-24 10:43:34 +04:00
|
|
|
#include "net/net.h"
|
2017-04-24 21:07:27 +03:00
|
|
|
#include "migration.h"
|
2017-04-20 15:25:55 +03:00
|
|
|
#include "migration/snapshot.h"
|
2023-04-26 20:04:06 +03:00
|
|
|
#include "migration-stats.h"
|
2019-08-12 08:23:45 +03:00
|
|
|
#include "migration/vmstate.h"
|
2017-04-21 18:39:30 +03:00
|
|
|
#include "migration/misc.h"
|
2017-04-24 14:42:55 +03:00
|
|
|
#include "migration/register.h"
|
2017-04-24 19:53:30 +03:00
|
|
|
#include "migration/global_state.h"
|
2022-06-20 14:01:54 +03:00
|
|
|
#include "migration/channel-block.h"
|
2017-04-17 21:26:27 +03:00
|
|
|
#include "ram.h"
|
2017-04-20 19:52:18 +03:00
|
|
|
#include "qemu-file.h"
|
2017-04-20 15:48:46 +03:00
|
|
|
#include "savevm.h"
|
2017-04-20 14:12:24 +03:00
|
|
|
#include "postcopy-ram.h"
|
2018-02-01 14:18:31 +03:00
|
|
|
#include "qapi/error.h"
|
2018-02-27 02:13:27 +03:00
|
|
|
#include "qapi/qapi-commands-migration.h"
|
2021-02-04 15:48:30 +03:00
|
|
|
#include "qapi/clone-visitor.h"
|
|
|
|
#include "qapi/qapi-builtin-visit.h"
|
2015-03-17 20:29:20 +03:00
|
|
|
#include "qemu/error-report.h"
|
2012-12-17 21:20:04 +04:00
|
|
|
#include "sysemu/cpus.h"
|
2012-12-17 21:19:49 +04:00
|
|
|
#include "exec/memory.h"
|
2017-04-24 21:50:19 +03:00
|
|
|
#include "exec/target_page.h"
|
2012-05-22 01:46:44 +04:00
|
|
|
#include "trace.h"
|
2013-03-22 18:47:58 +04:00
|
|
|
#include "qemu/iov.h"
|
2022-12-21 16:35:49 +03:00
|
|
|
#include "qemu/job.h"
|
Include qemu/main-loop.h less
In my "build everything" tree, changing qemu/main-loop.h triggers a
recompile of some 5600 out of 6600 objects (not counting tests and
objects that don't depend on qemu/osdep.h). It includes block/aio.h,
which in turn includes qemu/event_notifier.h, qemu/notify.h,
qemu/processor.h, qemu/qsp.h, qemu/queue.h, qemu/thread-posix.h,
qemu/thread.h, qemu/timer.h, and a few more.
Include qemu/main-loop.h only where it's needed. Touching it now
recompiles only some 1700 objects. For block/aio.h and
qemu/event_notifier.h, these numbers drop from 5600 to 2800. For the
others, they shrink only slightly.
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <20190812052359.30071-21-armbru@redhat.com>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
Tested-by: Philippe Mathieu-Daudé <philmd@redhat.com>
2019-08-12 08:23:50 +03:00
|
|
|
#include "qemu/main-loop.h"
|
2013-05-25 07:09:43 +04:00
|
|
|
#include "block/snapshot.h"
|
2016-03-20 20:16:19 +03:00
|
|
|
#include "qemu/cutils.h"
|
2016-04-27 13:05:01 +03:00
|
|
|
#include "io/channel-buffer.h"
|
2016-04-27 13:05:08 +03:00
|
|
|
#include "io/channel-file.h"
|
2018-02-27 12:52:14 +03:00
|
|
|
#include "sysemu/replay.h"
|
2019-08-12 08:23:59 +03:00
|
|
|
#include "sysemu/runstate.h"
|
2019-08-12 08:23:57 +03:00
|
|
|
#include "sysemu/sysemu.h"
|
2020-05-08 13:02:22 +03:00
|
|
|
#include "sysemu/xen.h"
|
2018-09-03 07:38:47 +03:00
|
|
|
#include "migration/colo.h"
|
2019-02-15 20:45:48 +03:00
|
|
|
#include "qemu/bitmap.h"
|
2019-02-27 16:24:08 +03:00
|
|
|
#include "net/announce.h"
|
2020-12-28 18:08:52 +03:00
|
|
|
#include "qemu/yank.h"
|
2021-07-22 20:58:41 +03:00
|
|
|
#include "yank_functions.h"
|
2022-01-13 22:44:52 +03:00
|
|
|
#include "sysemu/qtest.h"
|
2023-03-01 23:18:45 +03:00
|
|
|
#include "options.h"
|
2009-10-16 03:53:55 +04:00
|
|
|
|
2020-10-20 06:10:46 +03:00
|
|
|
const unsigned int postcopy_ram_discard_version;
|
2015-11-05 21:10:52 +03:00
|
|
|
|
2017-04-20 15:48:46 +03:00
|
|
|
/* Subcommands for QEMU_VM_COMMAND */
|
|
|
|
enum qemu_vm_cmd {
|
|
|
|
MIG_CMD_INVALID = 0, /* Must be 0 */
|
|
|
|
MIG_CMD_OPEN_RETURN_PATH, /* Tell the dest to open the Return path */
|
|
|
|
MIG_CMD_PING, /* Request a PONG on the RP */
|
|
|
|
|
|
|
|
MIG_CMD_POSTCOPY_ADVISE, /* Prior to any page transfers, just
|
|
|
|
warn we might want to do PC */
|
|
|
|
MIG_CMD_POSTCOPY_LISTEN, /* Start listening for incoming
|
|
|
|
pages as it's running. */
|
|
|
|
MIG_CMD_POSTCOPY_RUN, /* Start execution */
|
|
|
|
|
|
|
|
MIG_CMD_POSTCOPY_RAM_DISCARD, /* A list of pages to discard that
|
|
|
|
were previously sent during
|
|
|
|
precopy but are dirty. */
|
|
|
|
MIG_CMD_PACKAGED, /* Send a wrapped stream within this stream */
|
2018-09-03 07:38:47 +03:00
|
|
|
MIG_CMD_ENABLE_COLO, /* Enable COLO */
|
2018-07-10 12:44:24 +03:00
|
|
|
MIG_CMD_POSTCOPY_RESUME, /* resume postcopy on dest */
|
2018-05-02 13:47:27 +03:00
|
|
|
MIG_CMD_RECV_BITMAP, /* Request for recved bitmap on dst */
|
2017-04-20 15:48:46 +03:00
|
|
|
MIG_CMD_MAX
|
|
|
|
};
|
|
|
|
|
migration/savevm.c: set MAX_VM_CMD_PACKAGED_SIZE to 1ul << 32
MAX_VM_CMD_PACKAGED_SIZE is a constant used in qemu_savevm_send_packaged
and loadvm_handle_cmd_packaged to determine whether a package is too
big to be sent or received. qemu_savevm_send_packaged is called inside
postcopy_start (migration/migration.c) to send the MigrationState
in a single blob to the destination, using the MIG_CMD_PACKAGED subcommand,
which will read it up using loadvm_handle_cmd_packaged. If the blob is
larger than MAX_VM_CMD_PACKAGED_SIZE, an error is thrown and the postcopy
migration is aborted. Both MAX_VM_CMD_PACKAGED_SIZE and MIG_CMD_PACKAGED
were introduced by commit 11cf1d984b ("MIG_CMD_PACKAGED: Send a packaged
chunk ..."). The constant has its original value of 1ul << 24 (16MB).
The current MAX_VM_CMD_PACKAGED_SIZE value is not enough to support postcopy
migration of bigger pseries guests. The blob size for a postcopy migration of
a pseries guest with the following setup:
qemu-system-ppc64 --nographic -vga none -machine pseries,accel=kvm -m 64G \
-smp 1,maxcpus=32 -device virtio-blk-pci,drive=rootdisk \
-drive file=f27.qcow2,if=none,cache=none,format=qcow2,id=rootdisk \
-netdev user,id=u1 -net nic,netdev=u1
Goes around 12MB. Bumping the RAM to 128G makes the blob sizes goes to 20MB.
With 256G the blob goes to 37MB - more than twice the current maximum size.
At this moment the pseries machine can handle guests with up to 1TB of RAM,
making this postcopy blob goes to 128MB of size approximately.
Following the discussions made in [1], there is a need to understand what
devices are aggressively consuming the blob in that manner and see if that
can be mitigated. Until then, we can set MAX_VM_CMD_PACKAGED_SIZE to the
maximum value allowed. Since the size is a 32 bit int variable, we can set
it as 1ul << 32, giving a maximum blob size of 4G that is enough to support
postcopy migration of 32TB RAM guests given the above constraints.
[1] https://lists.nongnu.org/archive/html/qemu-devel/2018-01/msg06313.html
Signed-off-by: Daniel Henrique Barboza <danielhb@linux.vnet.ibm.com>
Reported-by: Balamuruhan S <bala24@linux.vnet.ibm.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2018-01-26 18:59:40 +03:00
|
|
|
#define MAX_VM_CMD_PACKAGED_SIZE UINT32_MAX
|
2015-11-05 21:10:45 +03:00
|
|
|
static struct mig_cmd_args {
|
|
|
|
ssize_t len; /* -1 = variable */
|
|
|
|
const char *name;
|
|
|
|
} mig_cmd_args[] = {
|
|
|
|
[MIG_CMD_INVALID] = { .len = -1, .name = "INVALID" },
|
2015-11-05 21:10:46 +03:00
|
|
|
[MIG_CMD_OPEN_RETURN_PATH] = { .len = 0, .name = "OPEN_RETURN_PATH" },
|
|
|
|
[MIG_CMD_PING] = { .len = sizeof(uint32_t), .name = "PING" },
|
2017-07-10 19:30:16 +03:00
|
|
|
[MIG_CMD_POSTCOPY_ADVISE] = { .len = -1, .name = "POSTCOPY_ADVISE" },
|
2015-11-05 21:10:52 +03:00
|
|
|
[MIG_CMD_POSTCOPY_LISTEN] = { .len = 0, .name = "POSTCOPY_LISTEN" },
|
|
|
|
[MIG_CMD_POSTCOPY_RUN] = { .len = 0, .name = "POSTCOPY_RUN" },
|
|
|
|
[MIG_CMD_POSTCOPY_RAM_DISCARD] = {
|
|
|
|
.len = -1, .name = "POSTCOPY_RAM_DISCARD" },
|
2018-05-02 13:47:29 +03:00
|
|
|
[MIG_CMD_POSTCOPY_RESUME] = { .len = 0, .name = "POSTCOPY_RESUME" },
|
2015-11-05 21:10:53 +03:00
|
|
|
[MIG_CMD_PACKAGED] = { .len = 4, .name = "PACKAGED" },
|
2018-05-02 13:47:27 +03:00
|
|
|
[MIG_CMD_RECV_BITMAP] = { .len = -1, .name = "RECV_BITMAP" },
|
2015-11-05 21:10:45 +03:00
|
|
|
[MIG_CMD_MAX] = { .len = -1, .name = "MAX" },
|
|
|
|
};
|
|
|
|
|
2017-07-10 19:30:16 +03:00
|
|
|
/* Note for MIG_CMD_POSTCOPY_ADVISE:
|
|
|
|
* The format of arguments is depending on postcopy mode:
|
|
|
|
* - postcopy RAM only
|
|
|
|
* uint64_t host page size
|
2023-07-14 14:32:41 +03:00
|
|
|
* uint64_t target page size
|
2017-07-10 19:30:16 +03:00
|
|
|
*
|
|
|
|
* - postcopy RAM and postcopy dirty bitmaps
|
|
|
|
* format is the same as for postcopy RAM only
|
|
|
|
*
|
|
|
|
* - postcopy dirty bitmaps only
|
|
|
|
* Nothing. Command length field is 0.
|
|
|
|
*
|
|
|
|
* Be careful: adding a new postcopy entity with some other parameters should
|
|
|
|
* not break format self-description ability. Good way is to introduce some
|
|
|
|
* generic extendable format with an exception for two old entities.
|
|
|
|
*/
|
|
|
|
|
2008-11-12 00:33:36 +03:00
|
|
|
/***********************************************************/
|
|
|
|
/* savevm/loadvm support */
|
|
|
|
|
2009-07-11 01:11:57 +04:00
|
|
|
static QEMUFile *qemu_fopen_bdrv(BlockDriverState *bs, int is_writable)
|
2008-11-12 00:33:36 +03:00
|
|
|
{
|
2013-11-28 18:01:13 +04:00
|
|
|
if (is_writable) {
|
2022-06-20 14:02:05 +03:00
|
|
|
return qemu_file_new_output(QIO_CHANNEL(qio_channel_block_new(bs)));
|
2022-06-20 14:01:54 +03:00
|
|
|
} else {
|
2022-06-20 14:02:05 +03:00
|
|
|
return qemu_file_new_input(QIO_CHANNEL(qio_channel_block_new(bs)));
|
2013-11-28 18:01:13 +04:00
|
|
|
}
|
2008-11-12 00:33:36 +03:00
|
|
|
}
|
|
|
|
|
2011-09-12 18:21:44 +04:00
|
|
|
|
2013-11-29 18:26:02 +04:00
|
|
|
/* QEMUFile timer support.
|
|
|
|
* Not in qemu-file.c to not add qemu-timer.c as dependency to qemu-file.c
|
|
|
|
*/
|
2011-09-12 18:21:44 +04:00
|
|
|
|
2013-08-21 19:03:02 +04:00
|
|
|
void timer_put(QEMUFile *f, QEMUTimer *ts)
|
2011-09-12 18:21:44 +04:00
|
|
|
{
|
|
|
|
uint64_t expire_time;
|
|
|
|
|
2013-08-21 19:02:39 +04:00
|
|
|
expire_time = timer_expire_time_ns(ts);
|
2011-09-12 18:21:44 +04:00
|
|
|
qemu_put_be64(f, expire_time);
|
|
|
|
}
|
|
|
|
|
2013-08-21 19:03:02 +04:00
|
|
|
void timer_get(QEMUFile *f, QEMUTimer *ts)
|
2011-09-12 18:21:44 +04:00
|
|
|
{
|
|
|
|
uint64_t expire_time;
|
|
|
|
|
|
|
|
expire_time = qemu_get_be64(f);
|
|
|
|
if (expire_time != -1) {
|
2013-08-21 19:03:08 +04:00
|
|
|
timer_mod_ns(ts, expire_time);
|
2011-09-12 18:21:44 +04:00
|
|
|
} else {
|
2013-08-21 19:03:08 +04:00
|
|
|
timer_del(ts);
|
2011-09-12 18:21:44 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2013-11-29 18:26:02 +04:00
|
|
|
/* VMState timer support.
|
|
|
|
* Not in vmstate.c to not add qemu-timer.c as dependency to vmstate.c
|
|
|
|
*/
|
2009-08-20 21:42:26 +04:00
|
|
|
|
2018-11-14 16:29:30 +03:00
|
|
|
static int get_timer(QEMUFile *f, void *pv, size_t size,
|
|
|
|
const VMStateField *field)
|
2009-08-20 21:42:26 +04:00
|
|
|
{
|
|
|
|
QEMUTimer *v = pv;
|
2013-08-21 19:03:02 +04:00
|
|
|
timer_get(f, v);
|
2009-08-20 21:42:26 +04:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2018-11-14 16:29:30 +03:00
|
|
|
static int put_timer(QEMUFile *f, void *pv, size_t size,
|
2020-12-11 20:11:48 +03:00
|
|
|
const VMStateField *field, JSONWriter *vmdesc)
|
2009-08-20 21:42:26 +04:00
|
|
|
{
|
2009-09-30 00:48:20 +04:00
|
|
|
QEMUTimer *v = pv;
|
2013-08-21 19:03:02 +04:00
|
|
|
timer_put(f, v);
|
2017-01-19 22:00:50 +03:00
|
|
|
|
|
|
|
return 0;
|
2009-08-20 21:42:26 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
const VMStateInfo vmstate_info_timer = {
|
|
|
|
.name = "timer",
|
|
|
|
.get = get_timer,
|
|
|
|
.put = put_timer,
|
|
|
|
};
|
|
|
|
|
2012-10-30 11:45:12 +04:00
|
|
|
|
2010-06-25 21:09:14 +04:00
|
|
|
typedef struct CompatEntry {
|
|
|
|
char idstr[256];
|
|
|
|
int instance_id;
|
|
|
|
} CompatEntry;
|
|
|
|
|
2008-11-12 00:33:36 +03:00
|
|
|
typedef struct SaveStateEntry {
|
2009-09-12 11:36:22 +04:00
|
|
|
QTAILQ_ENTRY(SaveStateEntry) entry;
|
2008-11-12 00:33:36 +03:00
|
|
|
char idstr[256];
|
2019-10-16 05:29:31 +03:00
|
|
|
uint32_t instance_id;
|
2010-05-15 15:32:40 +04:00
|
|
|
int alias_id;
|
2008-11-12 00:33:36 +03:00
|
|
|
int version_id;
|
2017-05-24 10:09:58 +03:00
|
|
|
/* version id read from the stream */
|
|
|
|
int load_version_id;
|
2008-11-12 00:33:36 +03:00
|
|
|
int section_id;
|
2017-05-24 10:09:58 +03:00
|
|
|
/* section id read from the stream */
|
|
|
|
int load_section_id;
|
2018-11-14 16:31:39 +03:00
|
|
|
const SaveVMHandlers *ops;
|
2009-08-20 21:42:25 +04:00
|
|
|
const VMStateDescription *vmsd;
|
2008-11-12 00:33:36 +03:00
|
|
|
void *opaque;
|
2010-06-25 21:09:14 +04:00
|
|
|
CompatEntry *compat;
|
2012-01-25 16:24:51 +04:00
|
|
|
int is_ram;
|
2008-11-12 00:33:36 +03:00
|
|
|
} SaveStateEntry;
|
|
|
|
|
2015-05-13 14:37:04 +03:00
|
|
|
typedef struct SaveState {
|
|
|
|
QTAILQ_HEAD(, SaveStateEntry) handlers;
|
2019-10-17 23:59:53 +03:00
|
|
|
SaveStateEntry *handler_pri_head[MIG_PRI_MAX + 1];
|
2015-05-13 14:37:04 +03:00
|
|
|
int global_section_id;
|
2015-05-13 19:17:43 +03:00
|
|
|
uint32_t len;
|
|
|
|
const char *name;
|
2016-10-24 18:26:50 +03:00
|
|
|
uint32_t target_page_bits;
|
2019-02-15 20:45:48 +03:00
|
|
|
uint32_t caps_count;
|
|
|
|
MigrationCapability *capabilities;
|
2019-09-03 19:22:44 +03:00
|
|
|
QemuUUID uuid;
|
2015-05-13 14:37:04 +03:00
|
|
|
} SaveState;
|
|
|
|
|
|
|
|
static SaveState savevm_state = {
|
|
|
|
.handlers = QTAILQ_HEAD_INITIALIZER(savevm_state.handlers),
|
2019-10-17 23:59:53 +03:00
|
|
|
.handler_pri_head = { [MIG_PRI_DEFAULT ... MIG_PRI_MAX] = NULL },
|
2015-05-13 14:37:04 +03:00
|
|
|
.global_section_id = 0,
|
2015-05-13 19:17:43 +03:00
|
|
|
};
|
|
|
|
|
2023-10-20 12:07:27 +03:00
|
|
|
static SaveStateEntry *find_se(const char *idstr, uint32_t instance_id);
|
|
|
|
|
2019-02-15 20:45:48 +03:00
|
|
|
static bool should_validate_capability(int capability)
|
|
|
|
{
|
|
|
|
assert(capability >= 0 && capability < MIGRATION_CAPABILITY__MAX);
|
|
|
|
/* Validate only new capabilities to keep compatibility. */
|
|
|
|
switch (capability) {
|
|
|
|
case MIGRATION_CAPABILITY_X_IGNORE_SHARED:
|
2024-02-29 18:30:01 +03:00
|
|
|
case MIGRATION_CAPABILITY_MAPPED_RAM:
|
2019-02-15 20:45:48 +03:00
|
|
|
return true;
|
|
|
|
default:
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static uint32_t get_validatable_capabilities_count(void)
|
|
|
|
{
|
|
|
|
MigrationState *s = migrate_get_current();
|
|
|
|
uint32_t result = 0;
|
|
|
|
int i;
|
|
|
|
for (i = 0; i < MIGRATION_CAPABILITY__MAX; i++) {
|
2023-03-01 20:26:59 +03:00
|
|
|
if (should_validate_capability(i) && s->capabilities[i]) {
|
2019-02-15 20:45:48 +03:00
|
|
|
result++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
2017-09-25 14:29:12 +03:00
|
|
|
static int configuration_pre_save(void *opaque)
|
2015-05-13 19:17:43 +03:00
|
|
|
{
|
|
|
|
SaveState *state = opaque;
|
|
|
|
const char *current_name = MACHINE_GET_CLASS(current_machine)->name;
|
2019-02-15 20:45:48 +03:00
|
|
|
MigrationState *s = migrate_get_current();
|
|
|
|
int i, j;
|
2015-05-13 19:17:43 +03:00
|
|
|
|
|
|
|
state->len = strlen(current_name);
|
|
|
|
state->name = current_name;
|
2017-04-24 22:03:48 +03:00
|
|
|
state->target_page_bits = qemu_target_page_bits();
|
2017-09-25 14:29:12 +03:00
|
|
|
|
2019-02-15 20:45:48 +03:00
|
|
|
state->caps_count = get_validatable_capabilities_count();
|
|
|
|
state->capabilities = g_renew(MigrationCapability, state->capabilities,
|
|
|
|
state->caps_count);
|
|
|
|
for (i = j = 0; i < MIGRATION_CAPABILITY__MAX; i++) {
|
2023-03-01 20:26:59 +03:00
|
|
|
if (should_validate_capability(i) && s->capabilities[i]) {
|
2019-02-15 20:45:48 +03:00
|
|
|
state->capabilities[j++] = i;
|
|
|
|
}
|
|
|
|
}
|
2019-09-03 19:22:44 +03:00
|
|
|
state->uuid = qemu_uuid;
|
2019-02-15 20:45:48 +03:00
|
|
|
|
2017-09-25 14:29:12 +03:00
|
|
|
return 0;
|
2016-10-24 18:26:50 +03:00
|
|
|
}
|
|
|
|
|
2020-12-31 09:10:19 +03:00
|
|
|
static int configuration_post_save(void *opaque)
|
|
|
|
{
|
|
|
|
SaveState *state = opaque;
|
|
|
|
|
|
|
|
g_free(state->capabilities);
|
|
|
|
state->capabilities = NULL;
|
|
|
|
state->caps_count = 0;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2016-10-24 18:26:50 +03:00
|
|
|
static int configuration_pre_load(void *opaque)
|
|
|
|
{
|
|
|
|
SaveState *state = opaque;
|
|
|
|
|
|
|
|
/* If there is no target-page-bits subsection it means the source
|
|
|
|
* predates the variable-target-page-bits support and is using the
|
|
|
|
* minimum possible value for this CPU.
|
|
|
|
*/
|
2017-04-24 22:03:48 +03:00
|
|
|
state->target_page_bits = qemu_target_page_bits_min();
|
2016-10-24 18:26:50 +03:00
|
|
|
return 0;
|
2015-05-13 19:17:43 +03:00
|
|
|
}
|
|
|
|
|
2019-02-15 20:45:48 +03:00
|
|
|
static bool configuration_validate_capabilities(SaveState *state)
|
|
|
|
{
|
|
|
|
bool ret = true;
|
|
|
|
MigrationState *s = migrate_get_current();
|
|
|
|
unsigned long *source_caps_bm;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
source_caps_bm = bitmap_new(MIGRATION_CAPABILITY__MAX);
|
|
|
|
for (i = 0; i < state->caps_count; i++) {
|
|
|
|
MigrationCapability capability = state->capabilities[i];
|
|
|
|
set_bit(capability, source_caps_bm);
|
|
|
|
}
|
|
|
|
|
|
|
|
for (i = 0; i < MIGRATION_CAPABILITY__MAX; i++) {
|
|
|
|
bool source_state, target_state;
|
|
|
|
if (!should_validate_capability(i)) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
source_state = test_bit(i, source_caps_bm);
|
2023-03-01 20:26:59 +03:00
|
|
|
target_state = s->capabilities[i];
|
2019-02-15 20:45:48 +03:00
|
|
|
if (source_state != target_state) {
|
|
|
|
error_report("Capability %s is %s, but received capability is %s",
|
|
|
|
MigrationCapability_str(i),
|
|
|
|
target_state ? "on" : "off",
|
|
|
|
source_state ? "on" : "off");
|
|
|
|
ret = false;
|
|
|
|
/* Don't break here to report all failed capabilities */
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
g_free(source_caps_bm);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2015-05-13 19:17:43 +03:00
|
|
|
static int configuration_post_load(void *opaque, int version_id)
|
|
|
|
{
|
|
|
|
SaveState *state = opaque;
|
|
|
|
const char *current_name = MACHINE_GET_CLASS(current_machine)->name;
|
2020-12-31 09:10:19 +03:00
|
|
|
int ret = 0;
|
2015-05-13 19:17:43 +03:00
|
|
|
|
|
|
|
if (strncmp(state->name, current_name, state->len) != 0) {
|
2016-02-05 11:33:26 +03:00
|
|
|
error_report("Machine type received is '%.*s' and local is '%s'",
|
|
|
|
(int) state->len, state->name, current_name);
|
2020-12-31 09:10:19 +03:00
|
|
|
ret = -EINVAL;
|
|
|
|
goto out;
|
2015-05-13 19:17:43 +03:00
|
|
|
}
|
2016-10-24 18:26:50 +03:00
|
|
|
|
2017-04-24 22:03:48 +03:00
|
|
|
if (state->target_page_bits != qemu_target_page_bits()) {
|
2016-10-24 18:26:50 +03:00
|
|
|
error_report("Received TARGET_PAGE_BITS is %d but local is %d",
|
2017-04-24 22:03:48 +03:00
|
|
|
state->target_page_bits, qemu_target_page_bits());
|
2020-12-31 09:10:19 +03:00
|
|
|
ret = -EINVAL;
|
|
|
|
goto out;
|
2016-10-24 18:26:50 +03:00
|
|
|
}
|
|
|
|
|
2019-02-15 20:45:48 +03:00
|
|
|
if (!configuration_validate_capabilities(state)) {
|
2020-12-31 09:10:19 +03:00
|
|
|
ret = -EINVAL;
|
|
|
|
goto out;
|
2019-02-15 20:45:48 +03:00
|
|
|
}
|
|
|
|
|
2020-12-31 09:10:19 +03:00
|
|
|
out:
|
|
|
|
g_free((void *)state->name);
|
|
|
|
state->name = NULL;
|
|
|
|
state->len = 0;
|
|
|
|
g_free(state->capabilities);
|
|
|
|
state->capabilities = NULL;
|
|
|
|
state->caps_count = 0;
|
|
|
|
|
|
|
|
return ret;
|
2015-05-13 19:17:43 +03:00
|
|
|
}
|
|
|
|
|
2019-02-15 20:45:48 +03:00
|
|
|
static int get_capability(QEMUFile *f, void *pv, size_t size,
|
|
|
|
const VMStateField *field)
|
|
|
|
{
|
|
|
|
MigrationCapability *capability = pv;
|
|
|
|
char capability_str[UINT8_MAX + 1];
|
|
|
|
uint8_t len;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
len = qemu_get_byte(f);
|
|
|
|
qemu_get_buffer(f, (uint8_t *)capability_str, len);
|
|
|
|
capability_str[len] = '\0';
|
|
|
|
for (i = 0; i < MIGRATION_CAPABILITY__MAX; i++) {
|
|
|
|
if (!strcmp(MigrationCapability_str(i), capability_str)) {
|
|
|
|
*capability = i;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
error_report("Received unknown capability %s", capability_str);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int put_capability(QEMUFile *f, void *pv, size_t size,
|
2020-12-11 20:11:48 +03:00
|
|
|
const VMStateField *field, JSONWriter *vmdesc)
|
2019-02-15 20:45:48 +03:00
|
|
|
{
|
|
|
|
MigrationCapability *capability = pv;
|
|
|
|
const char *capability_str = MigrationCapability_str(*capability);
|
|
|
|
size_t len = strlen(capability_str);
|
|
|
|
assert(len <= UINT8_MAX);
|
|
|
|
|
|
|
|
qemu_put_byte(f, len);
|
|
|
|
qemu_put_buffer(f, (uint8_t *)capability_str, len);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static const VMStateInfo vmstate_info_capability = {
|
|
|
|
.name = "capability",
|
|
|
|
.get = get_capability,
|
|
|
|
.put = put_capability,
|
|
|
|
};
|
|
|
|
|
2016-10-24 18:26:50 +03:00
|
|
|
/* The target-page-bits subsection is present only if the
|
|
|
|
* target page size is not the same as the default (ie the
|
|
|
|
* minimum page size for a variable-page-size guest CPU).
|
|
|
|
* If it is present then it contains the actual target page
|
|
|
|
* bits for the machine, and migration will fail if the
|
|
|
|
* two ends don't agree about it.
|
|
|
|
*/
|
|
|
|
static bool vmstate_target_page_bits_needed(void *opaque)
|
|
|
|
{
|
2017-04-24 22:03:48 +03:00
|
|
|
return qemu_target_page_bits()
|
|
|
|
> qemu_target_page_bits_min();
|
2016-10-24 18:26:50 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
static const VMStateDescription vmstate_target_page_bits = {
|
|
|
|
.name = "configuration/target-page-bits",
|
|
|
|
.version_id = 1,
|
|
|
|
.minimum_version_id = 1,
|
|
|
|
.needed = vmstate_target_page_bits_needed,
|
2023-12-21 06:16:47 +03:00
|
|
|
.fields = (const VMStateField[]) {
|
2016-10-24 18:26:50 +03:00
|
|
|
VMSTATE_UINT32(target_page_bits, SaveState),
|
|
|
|
VMSTATE_END_OF_LIST()
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2019-02-15 20:45:48 +03:00
|
|
|
static bool vmstate_capabilites_needed(void *opaque)
|
|
|
|
{
|
|
|
|
return get_validatable_capabilities_count() > 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static const VMStateDescription vmstate_capabilites = {
|
|
|
|
.name = "configuration/capabilities",
|
|
|
|
.version_id = 1,
|
|
|
|
.minimum_version_id = 1,
|
|
|
|
.needed = vmstate_capabilites_needed,
|
2023-12-21 06:16:47 +03:00
|
|
|
.fields = (const VMStateField[]) {
|
2019-02-15 20:45:48 +03:00
|
|
|
VMSTATE_UINT32_V(caps_count, SaveState, 1),
|
|
|
|
VMSTATE_VARRAY_UINT32_ALLOC(capabilities, SaveState, caps_count, 1,
|
|
|
|
vmstate_info_capability,
|
|
|
|
MigrationCapability),
|
|
|
|
VMSTATE_END_OF_LIST()
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2019-09-03 19:22:44 +03:00
|
|
|
static bool vmstate_uuid_needed(void *opaque)
|
|
|
|
{
|
|
|
|
return qemu_uuid_set && migrate_validate_uuid();
|
|
|
|
}
|
|
|
|
|
|
|
|
static int vmstate_uuid_post_load(void *opaque, int version_id)
|
|
|
|
{
|
|
|
|
SaveState *state = opaque;
|
2023-10-26 10:06:34 +03:00
|
|
|
char uuid_src[UUID_STR_LEN];
|
|
|
|
char uuid_dst[UUID_STR_LEN];
|
2019-09-03 19:22:44 +03:00
|
|
|
|
|
|
|
if (!qemu_uuid_set) {
|
|
|
|
/*
|
|
|
|
* It's warning because user might not know UUID in some cases,
|
|
|
|
* e.g. load an old snapshot
|
|
|
|
*/
|
|
|
|
qemu_uuid_unparse(&state->uuid, uuid_src);
|
|
|
|
warn_report("UUID is received %s, but local uuid isn't set",
|
|
|
|
uuid_src);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
if (!qemu_uuid_is_equal(&state->uuid, &qemu_uuid)) {
|
|
|
|
qemu_uuid_unparse(&state->uuid, uuid_src);
|
|
|
|
qemu_uuid_unparse(&qemu_uuid, uuid_dst);
|
|
|
|
error_report("UUID received is %s and local is %s", uuid_src, uuid_dst);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static const VMStateDescription vmstate_uuid = {
|
|
|
|
.name = "configuration/uuid",
|
|
|
|
.version_id = 1,
|
|
|
|
.minimum_version_id = 1,
|
|
|
|
.needed = vmstate_uuid_needed,
|
|
|
|
.post_load = vmstate_uuid_post_load,
|
2023-12-21 06:16:47 +03:00
|
|
|
.fields = (const VMStateField[]) {
|
2019-09-03 19:22:44 +03:00
|
|
|
VMSTATE_UINT8_ARRAY_V(uuid.data, SaveState, sizeof(QemuUUID), 1),
|
|
|
|
VMSTATE_END_OF_LIST()
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2015-05-13 19:17:43 +03:00
|
|
|
static const VMStateDescription vmstate_configuration = {
|
|
|
|
.name = "configuration",
|
|
|
|
.version_id = 1,
|
2016-10-24 18:26:50 +03:00
|
|
|
.pre_load = configuration_pre_load,
|
2015-05-13 19:17:43 +03:00
|
|
|
.post_load = configuration_post_load,
|
|
|
|
.pre_save = configuration_pre_save,
|
2020-12-31 09:10:19 +03:00
|
|
|
.post_save = configuration_post_save,
|
2023-12-21 06:16:47 +03:00
|
|
|
.fields = (const VMStateField[]) {
|
2015-05-13 19:17:43 +03:00
|
|
|
VMSTATE_UINT32(len, SaveState),
|
2017-02-03 20:52:17 +03:00
|
|
|
VMSTATE_VBUFFER_ALLOC_UINT32(name, SaveState, 0, NULL, len),
|
2015-05-13 19:17:43 +03:00
|
|
|
VMSTATE_END_OF_LIST()
|
|
|
|
},
|
2023-12-21 06:16:47 +03:00
|
|
|
.subsections = (const VMStateDescription * const []) {
|
2016-10-24 18:26:50 +03:00
|
|
|
&vmstate_target_page_bits,
|
2019-02-15 20:45:48 +03:00
|
|
|
&vmstate_capabilites,
|
2019-09-03 19:22:44 +03:00
|
|
|
&vmstate_uuid,
|
2016-10-24 18:26:50 +03:00
|
|
|
NULL
|
|
|
|
}
|
2015-05-13 14:37:04 +03:00
|
|
|
};
|
2008-11-12 00:33:36 +03:00
|
|
|
|
2014-06-20 17:26:08 +04:00
|
|
|
static void dump_vmstate_vmsd(FILE *out_file,
|
|
|
|
const VMStateDescription *vmsd, int indent,
|
|
|
|
bool is_subsection);
|
|
|
|
|
|
|
|
static void dump_vmstate_vmsf(FILE *out_file, const VMStateField *field,
|
|
|
|
int indent)
|
|
|
|
{
|
|
|
|
fprintf(out_file, "%*s{\n", indent, "");
|
|
|
|
indent += 2;
|
|
|
|
fprintf(out_file, "%*s\"field\": \"%s\",\n", indent, "", field->name);
|
|
|
|
fprintf(out_file, "%*s\"version_id\": %d,\n", indent, "",
|
|
|
|
field->version_id);
|
|
|
|
fprintf(out_file, "%*s\"field_exists\": %s,\n", indent, "",
|
|
|
|
field->field_exists ? "true" : "false");
|
migration/vmstate-dump: Dump array size too as "num"
For VMS_ARRAY typed vmsd fields, also dump the number of entries in the
array in -vmstate-dump.
Without such information, vmstate static checker can report false negatives
of incompatible vmsd on VMS_ARRAY typed fields, when the src/dst do not
have the same type of array defined. It's because in the checker we only
check against size of fields within a VMSD field.
One example: e1000e used to have a field defined as a boolean array with 5
entries, then removed it and replaced it with UNUSED (in 31e3f318c8b535):
- VMSTATE_BOOL_ARRAY(core.eitr_intr_pending, E1000EState,
- E1000E_MSIX_VEC_NUM),
+ VMSTATE_UNUSED(E1000E_MSIX_VEC_NUM),
It's a legal replacement but vmstate static checker is not happy with it,
because it checks only against the "size" field between the two
fields (here one is BOOL_ARRAY, the other is UNUSED):
For BOOL_ARRAY:
{
"field": "core.eitr_intr_pending",
"version_id": 0,
"field_exists": false,
"size": 1
},
For UNUSED:
{
"field": "unused",
"version_id": 0,
"field_exists": false,
"size": 5
},
It's not the script to blame because there's just not enough information
dumped to show the total size of the entry for an array. Add it.
Note that this will not break old vmstate checker because the field will
just be ignored.
Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
2023-04-25 21:05:43 +03:00
|
|
|
if (field->flags & VMS_ARRAY) {
|
|
|
|
fprintf(out_file, "%*s\"num\": %d,\n", indent, "", field->num);
|
|
|
|
}
|
2014-06-20 17:26:08 +04:00
|
|
|
fprintf(out_file, "%*s\"size\": %zu", indent, "", field->size);
|
|
|
|
if (field->vmsd != NULL) {
|
|
|
|
fprintf(out_file, ",\n");
|
|
|
|
dump_vmstate_vmsd(out_file, field->vmsd, indent, false);
|
|
|
|
}
|
|
|
|
fprintf(out_file, "\n%*s}", indent - 2, "");
|
|
|
|
}
|
|
|
|
|
|
|
|
static void dump_vmstate_vmss(FILE *out_file,
|
2023-12-21 06:15:42 +03:00
|
|
|
const VMStateDescription *subsection,
|
2014-06-20 17:26:08 +04:00
|
|
|
int indent)
|
|
|
|
{
|
2023-12-21 06:15:42 +03:00
|
|
|
if (subsection != NULL) {
|
|
|
|
dump_vmstate_vmsd(out_file, subsection, indent, true);
|
2014-06-20 17:26:08 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void dump_vmstate_vmsd(FILE *out_file,
|
|
|
|
const VMStateDescription *vmsd, int indent,
|
|
|
|
bool is_subsection)
|
|
|
|
{
|
|
|
|
if (is_subsection) {
|
|
|
|
fprintf(out_file, "%*s{\n", indent, "");
|
|
|
|
} else {
|
|
|
|
fprintf(out_file, "%*s\"%s\": {\n", indent, "", "Description");
|
|
|
|
}
|
|
|
|
indent += 2;
|
|
|
|
fprintf(out_file, "%*s\"name\": \"%s\",\n", indent, "", vmsd->name);
|
|
|
|
fprintf(out_file, "%*s\"version_id\": %d,\n", indent, "",
|
|
|
|
vmsd->version_id);
|
|
|
|
fprintf(out_file, "%*s\"minimum_version_id\": %d", indent, "",
|
|
|
|
vmsd->minimum_version_id);
|
|
|
|
if (vmsd->fields != NULL) {
|
|
|
|
const VMStateField *field = vmsd->fields;
|
|
|
|
bool first;
|
|
|
|
|
|
|
|
fprintf(out_file, ",\n%*s\"Fields\": [\n", indent, "");
|
|
|
|
first = true;
|
|
|
|
while (field->name != NULL) {
|
|
|
|
if (field->flags & VMS_MUST_EXIST) {
|
|
|
|
/* Ignore VMSTATE_VALIDATE bits; these don't get migrated */
|
|
|
|
field++;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (!first) {
|
|
|
|
fprintf(out_file, ",\n");
|
|
|
|
}
|
|
|
|
dump_vmstate_vmsf(out_file, field, indent + 2);
|
|
|
|
field++;
|
|
|
|
first = false;
|
|
|
|
}
|
2022-01-13 22:44:51 +03:00
|
|
|
assert(field->flags == VMS_END);
|
2014-06-20 17:26:08 +04:00
|
|
|
fprintf(out_file, "\n%*s]", indent, "");
|
|
|
|
}
|
|
|
|
if (vmsd->subsections != NULL) {
|
2023-12-21 06:15:42 +03:00
|
|
|
const VMStateDescription * const *subsection = vmsd->subsections;
|
2014-06-20 17:26:08 +04:00
|
|
|
bool first;
|
|
|
|
|
|
|
|
fprintf(out_file, ",\n%*s\"Subsections\": [\n", indent, "");
|
|
|
|
first = true;
|
2014-09-23 16:09:54 +04:00
|
|
|
while (*subsection != NULL) {
|
2014-06-20 17:26:08 +04:00
|
|
|
if (!first) {
|
|
|
|
fprintf(out_file, ",\n");
|
|
|
|
}
|
2023-12-21 06:15:42 +03:00
|
|
|
dump_vmstate_vmss(out_file, *subsection, indent + 2);
|
2014-06-20 17:26:08 +04:00
|
|
|
subsection++;
|
|
|
|
first = false;
|
|
|
|
}
|
|
|
|
fprintf(out_file, "\n%*s]", indent, "");
|
|
|
|
}
|
|
|
|
fprintf(out_file, "\n%*s}", indent - 2, "");
|
|
|
|
}
|
|
|
|
|
|
|
|
static void dump_machine_type(FILE *out_file)
|
|
|
|
{
|
|
|
|
MachineClass *mc;
|
|
|
|
|
|
|
|
mc = MACHINE_GET_CLASS(current_machine);
|
|
|
|
|
|
|
|
fprintf(out_file, " \"vmschkmachine\": {\n");
|
|
|
|
fprintf(out_file, " \"Name\": \"%s\"\n", mc->name);
|
|
|
|
fprintf(out_file, " },\n");
|
|
|
|
}
|
|
|
|
|
|
|
|
void dump_vmstate_json_to_file(FILE *out_file)
|
|
|
|
{
|
|
|
|
GSList *list, *elt;
|
|
|
|
bool first;
|
|
|
|
|
|
|
|
fprintf(out_file, "{\n");
|
|
|
|
dump_machine_type(out_file);
|
|
|
|
|
|
|
|
first = true;
|
|
|
|
list = object_class_get_list(TYPE_DEVICE, true);
|
|
|
|
for (elt = list; elt; elt = elt->next) {
|
|
|
|
DeviceClass *dc = OBJECT_CLASS_CHECK(DeviceClass, elt->data,
|
|
|
|
TYPE_DEVICE);
|
|
|
|
const char *name;
|
|
|
|
int indent = 2;
|
|
|
|
|
|
|
|
if (!dc->vmsd) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!first) {
|
|
|
|
fprintf(out_file, ",\n");
|
|
|
|
}
|
|
|
|
name = object_class_get_name(OBJECT_CLASS(dc));
|
|
|
|
fprintf(out_file, "%*s\"%s\": {\n", indent, "", name);
|
|
|
|
indent += 2;
|
|
|
|
fprintf(out_file, "%*s\"Name\": \"%s\",\n", indent, "", name);
|
|
|
|
fprintf(out_file, "%*s\"version_id\": %d,\n", indent, "",
|
|
|
|
dc->vmsd->version_id);
|
|
|
|
fprintf(out_file, "%*s\"minimum_version_id\": %d,\n", indent, "",
|
|
|
|
dc->vmsd->minimum_version_id);
|
|
|
|
|
|
|
|
dump_vmstate_vmsd(out_file, dc->vmsd, indent, false);
|
|
|
|
|
|
|
|
fprintf(out_file, "\n%*s}", indent - 2, "");
|
|
|
|
first = false;
|
|
|
|
}
|
|
|
|
fprintf(out_file, "\n}\n");
|
|
|
|
fclose(out_file);
|
2020-02-19 12:47:05 +03:00
|
|
|
g_slist_free(list);
|
2014-06-20 17:26:08 +04:00
|
|
|
}
|
|
|
|
|
2019-10-16 05:29:31 +03:00
|
|
|
static uint32_t calculate_new_instance_id(const char *idstr)
|
2009-09-01 04:12:31 +04:00
|
|
|
{
|
|
|
|
SaveStateEntry *se;
|
2019-10-16 05:29:31 +03:00
|
|
|
uint32_t instance_id = 0;
|
2009-09-01 04:12:31 +04:00
|
|
|
|
2015-05-13 14:37:04 +03:00
|
|
|
QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
|
2009-09-01 04:12:31 +04:00
|
|
|
if (strcmp(idstr, se->idstr) == 0
|
|
|
|
&& instance_id <= se->instance_id) {
|
|
|
|
instance_id = se->instance_id + 1;
|
|
|
|
}
|
|
|
|
}
|
2019-10-16 05:29:31 +03:00
|
|
|
/* Make sure we never loop over without being noticed */
|
|
|
|
assert(instance_id != VMSTATE_INSTANCE_ID_ANY);
|
2009-09-01 04:12:31 +04:00
|
|
|
return instance_id;
|
|
|
|
}
|
|
|
|
|
2010-06-25 21:09:14 +04:00
|
|
|
static int calculate_compat_instance_id(const char *idstr)
|
|
|
|
{
|
|
|
|
SaveStateEntry *se;
|
|
|
|
int instance_id = 0;
|
|
|
|
|
2015-05-13 14:37:04 +03:00
|
|
|
QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
|
2013-11-28 18:01:13 +04:00
|
|
|
if (!se->compat) {
|
2010-06-25 21:09:14 +04:00
|
|
|
continue;
|
2013-11-28 18:01:13 +04:00
|
|
|
}
|
2010-06-25 21:09:14 +04:00
|
|
|
|
|
|
|
if (strcmp(idstr, se->compat->idstr) == 0
|
|
|
|
&& instance_id <= se->compat->instance_id) {
|
|
|
|
instance_id = se->compat->instance_id + 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return instance_id;
|
|
|
|
}
|
|
|
|
|
2017-01-06 07:06:12 +03:00
|
|
|
static inline MigrationPriority save_state_priority(SaveStateEntry *se)
|
|
|
|
{
|
|
|
|
if (se->vmsd) {
|
|
|
|
return se->vmsd->priority;
|
|
|
|
}
|
|
|
|
return MIG_PRI_DEFAULT;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void savevm_state_handler_insert(SaveStateEntry *nse)
|
|
|
|
{
|
|
|
|
MigrationPriority priority = save_state_priority(nse);
|
|
|
|
SaveStateEntry *se;
|
2019-10-17 23:59:53 +03:00
|
|
|
int i;
|
2017-01-06 07:06:12 +03:00
|
|
|
|
|
|
|
assert(priority <= MIG_PRI_MAX);
|
|
|
|
|
2023-10-20 12:07:27 +03:00
|
|
|
/*
|
|
|
|
* This should never happen otherwise migration will probably fail
|
|
|
|
* silently somewhere because we can be wrongly applying one
|
|
|
|
* object properties upon another one. Bail out ASAP.
|
|
|
|
*/
|
|
|
|
if (find_se(nse->idstr, nse->instance_id)) {
|
|
|
|
error_report("%s: Detected duplicate SaveStateEntry: "
|
|
|
|
"id=%s, instance_id=0x%"PRIx32, __func__,
|
|
|
|
nse->idstr, nse->instance_id);
|
|
|
|
exit(EXIT_FAILURE);
|
|
|
|
}
|
|
|
|
|
2019-10-17 23:59:53 +03:00
|
|
|
for (i = priority - 1; i >= 0; i--) {
|
|
|
|
se = savevm_state.handler_pri_head[i];
|
|
|
|
if (se != NULL) {
|
|
|
|
assert(save_state_priority(se) < priority);
|
2017-01-06 07:06:12 +03:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-10-17 23:59:53 +03:00
|
|
|
if (i >= 0) {
|
2017-01-06 07:06:12 +03:00
|
|
|
QTAILQ_INSERT_BEFORE(se, nse, entry);
|
|
|
|
} else {
|
|
|
|
QTAILQ_INSERT_TAIL(&savevm_state.handlers, nse, entry);
|
|
|
|
}
|
2019-10-17 23:59:53 +03:00
|
|
|
|
|
|
|
if (savevm_state.handler_pri_head[priority] == NULL) {
|
|
|
|
savevm_state.handler_pri_head[priority] = nse;
|
|
|
|
}
|
2017-01-06 07:06:12 +03:00
|
|
|
}
|
|
|
|
|
2019-10-17 23:59:52 +03:00
|
|
|
static void savevm_state_handler_remove(SaveStateEntry *se)
|
|
|
|
{
|
2019-10-17 23:59:53 +03:00
|
|
|
SaveStateEntry *next;
|
|
|
|
MigrationPriority priority = save_state_priority(se);
|
|
|
|
|
|
|
|
if (se == savevm_state.handler_pri_head[priority]) {
|
|
|
|
next = QTAILQ_NEXT(se, entry);
|
|
|
|
if (next != NULL && save_state_priority(next) == priority) {
|
|
|
|
savevm_state.handler_pri_head[priority] = next;
|
|
|
|
} else {
|
|
|
|
savevm_state.handler_pri_head[priority] = NULL;
|
|
|
|
}
|
|
|
|
}
|
2019-10-17 23:59:52 +03:00
|
|
|
QTAILQ_REMOVE(&savevm_state.handlers, se, entry);
|
|
|
|
}
|
|
|
|
|
2008-11-12 00:33:36 +03:00
|
|
|
/* TODO: Individual devices generally have very little idea about the rest
|
|
|
|
of the system, so instance_id should be removed/replaced.
|
|
|
|
Meanwhile pass -1 as instance_id if you do not already have a clearly
|
|
|
|
distinguishing id for all instances of your device class. */
|
2019-08-22 14:54:33 +03:00
|
|
|
int register_savevm_live(const char *idstr,
|
2019-10-16 05:29:31 +03:00
|
|
|
uint32_t instance_id,
|
2008-11-12 00:33:36 +03:00
|
|
|
int version_id,
|
2018-11-14 16:31:39 +03:00
|
|
|
const SaveVMHandlers *ops,
|
2008-11-12 00:33:36 +03:00
|
|
|
void *opaque)
|
|
|
|
{
|
2009-09-01 04:12:31 +04:00
|
|
|
SaveStateEntry *se;
|
2008-11-12 00:33:36 +03:00
|
|
|
|
2015-09-14 14:51:31 +03:00
|
|
|
se = g_new0(SaveStateEntry, 1);
|
2008-11-12 00:33:36 +03:00
|
|
|
se->version_id = version_id;
|
2015-05-13 14:37:04 +03:00
|
|
|
se->section_id = savevm_state.global_section_id++;
|
2012-06-26 20:46:10 +04:00
|
|
|
se->ops = ops;
|
2008-11-12 00:33:36 +03:00
|
|
|
se->opaque = opaque;
|
2009-08-20 21:42:25 +04:00
|
|
|
se->vmsd = NULL;
|
2012-01-25 16:24:51 +04:00
|
|
|
/* if this is a live_savem then set is_ram */
|
2017-06-28 12:52:24 +03:00
|
|
|
if (ops->save_setup != NULL) {
|
2012-01-25 16:24:51 +04:00
|
|
|
se->is_ram = 1;
|
|
|
|
}
|
2008-11-12 00:33:36 +03:00
|
|
|
|
2010-06-25 21:09:14 +04:00
|
|
|
pstrcat(se->idstr, sizeof(se->idstr), idstr);
|
|
|
|
|
2019-10-16 05:29:30 +03:00
|
|
|
if (instance_id == VMSTATE_INSTANCE_ID_ANY) {
|
2010-06-25 21:09:14 +04:00
|
|
|
se->instance_id = calculate_new_instance_id(se->idstr);
|
2009-09-01 04:12:31 +04:00
|
|
|
} else {
|
|
|
|
se->instance_id = instance_id;
|
2008-11-12 00:33:36 +03:00
|
|
|
}
|
2010-06-25 21:09:14 +04:00
|
|
|
assert(!se->compat || se->instance_id == 0);
|
2017-01-06 07:06:12 +03:00
|
|
|
savevm_state_handler_insert(se);
|
2008-11-12 00:33:36 +03:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2019-08-28 15:02:32 +03:00
|
|
|
void unregister_savevm(VMStateIf *obj, const char *idstr, void *opaque)
|
2009-04-17 21:10:59 +04:00
|
|
|
{
|
2009-09-01 04:12:31 +04:00
|
|
|
SaveStateEntry *se, *new_se;
|
2010-06-25 21:09:14 +04:00
|
|
|
char id[256] = "";
|
|
|
|
|
2019-08-28 15:02:32 +03:00
|
|
|
if (obj) {
|
|
|
|
char *oid = vmstate_if_get_id(obj);
|
|
|
|
if (oid) {
|
|
|
|
pstrcpy(id, sizeof(id), oid);
|
2010-06-25 21:09:14 +04:00
|
|
|
pstrcat(id, sizeof(id), "/");
|
2019-08-28 15:02:32 +03:00
|
|
|
g_free(oid);
|
2010-06-25 21:09:14 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
pstrcat(id, sizeof(id), idstr);
|
2009-04-17 21:10:59 +04:00
|
|
|
|
2015-05-13 14:37:04 +03:00
|
|
|
QTAILQ_FOREACH_SAFE(se, &savevm_state.handlers, entry, new_se) {
|
2010-06-25 21:09:14 +04:00
|
|
|
if (strcmp(se->idstr, id) == 0 && se->opaque == opaque) {
|
2019-10-17 23:59:52 +03:00
|
|
|
savevm_state_handler_remove(se);
|
2015-08-26 14:17:18 +03:00
|
|
|
g_free(se->compat);
|
2011-08-21 07:09:37 +04:00
|
|
|
g_free(se);
|
2009-04-17 21:10:59 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-01-13 22:44:52 +03:00
|
|
|
/*
|
|
|
|
* Perform some basic checks on vmsd's at registration
|
|
|
|
* time.
|
|
|
|
*/
|
|
|
|
static void vmstate_check(const VMStateDescription *vmsd)
|
|
|
|
{
|
|
|
|
const VMStateField *field = vmsd->fields;
|
2023-12-21 06:15:42 +03:00
|
|
|
const VMStateDescription * const *subsection = vmsd->subsections;
|
2022-01-13 22:44:52 +03:00
|
|
|
|
|
|
|
if (field) {
|
|
|
|
while (field->name) {
|
|
|
|
if (field->flags & (VMS_STRUCT | VMS_VSTRUCT)) {
|
|
|
|
/* Recurse to sub structures */
|
|
|
|
vmstate_check(field->vmsd);
|
|
|
|
}
|
|
|
|
/* Carry on */
|
|
|
|
field++;
|
|
|
|
}
|
|
|
|
/* Check for the end of field list canary */
|
|
|
|
if (field->flags != VMS_END) {
|
|
|
|
error_report("VMSTATE not ending with VMS_END: %s", vmsd->name);
|
|
|
|
g_assert_not_reached();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
while (subsection && *subsection) {
|
|
|
|
/*
|
|
|
|
* The name of a subsection should start with the name of the
|
|
|
|
* current object.
|
|
|
|
*/
|
|
|
|
assert(!strncmp(vmsd->name, (*subsection)->name, strlen(vmsd->name)));
|
|
|
|
vmstate_check(*subsection);
|
|
|
|
subsection++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-10-20 12:07:25 +03:00
|
|
|
/*
|
|
|
|
* See comment in hw/intc/xics.c:icp_realize()
|
|
|
|
*
|
|
|
|
* This function can be removed when
|
|
|
|
* pre_2_10_vmstate_register_dummy_icp() is removed.
|
|
|
|
*/
|
|
|
|
int vmstate_replace_hack_for_ppc(VMStateIf *obj, int instance_id,
|
|
|
|
const VMStateDescription *vmsd,
|
|
|
|
void *opaque)
|
|
|
|
{
|
|
|
|
SaveStateEntry *se = find_se(vmsd->name, instance_id);
|
|
|
|
|
|
|
|
if (se) {
|
|
|
|
savevm_state_handler_remove(se);
|
|
|
|
}
|
|
|
|
return vmstate_register(obj, instance_id, vmsd, opaque);
|
|
|
|
}
|
|
|
|
|
2019-10-16 05:29:31 +03:00
|
|
|
int vmstate_register_with_alias_id(VMStateIf *obj, uint32_t instance_id,
|
2010-05-15 15:32:40 +04:00
|
|
|
const VMStateDescription *vmsd,
|
|
|
|
void *opaque, int alias_id,
|
2017-02-02 15:59:54 +03:00
|
|
|
int required_for_version,
|
|
|
|
Error **errp)
|
2009-08-20 21:42:25 +04:00
|
|
|
{
|
2009-09-01 04:12:31 +04:00
|
|
|
SaveStateEntry *se;
|
2009-08-20 21:42:25 +04:00
|
|
|
|
2010-05-15 15:32:40 +04:00
|
|
|
/* If this triggers, alias support can be dropped for the vmsd. */
|
|
|
|
assert(alias_id == -1 || required_for_version >= vmsd->minimum_version_id);
|
|
|
|
|
2015-09-14 14:51:31 +03:00
|
|
|
se = g_new0(SaveStateEntry, 1);
|
2009-08-20 21:42:25 +04:00
|
|
|
se->version_id = vmsd->version_id;
|
2015-05-13 14:37:04 +03:00
|
|
|
se->section_id = savevm_state.global_section_id++;
|
2009-08-20 21:42:25 +04:00
|
|
|
se->opaque = opaque;
|
|
|
|
se->vmsd = vmsd;
|
2010-05-15 15:32:40 +04:00
|
|
|
se->alias_id = alias_id;
|
2009-08-20 21:42:25 +04:00
|
|
|
|
2019-08-28 15:02:32 +03:00
|
|
|
if (obj) {
|
|
|
|
char *id = vmstate_if_get_id(obj);
|
2010-06-25 21:09:14 +04:00
|
|
|
if (id) {
|
2017-02-02 15:59:55 +03:00
|
|
|
if (snprintf(se->idstr, sizeof(se->idstr), "%s/", id) >=
|
|
|
|
sizeof(se->idstr)) {
|
|
|
|
error_setg(errp, "Path too long for VMState (%s)", id);
|
|
|
|
g_free(id);
|
|
|
|
g_free(se);
|
|
|
|
|
|
|
|
return -1;
|
|
|
|
}
|
2017-02-21 17:14:51 +03:00
|
|
|
g_free(id);
|
2010-06-25 21:09:14 +04:00
|
|
|
|
2015-09-14 14:51:31 +03:00
|
|
|
se->compat = g_new0(CompatEntry, 1);
|
2010-06-25 21:09:14 +04:00
|
|
|
pstrcpy(se->compat->idstr, sizeof(se->compat->idstr), vmsd->name);
|
2019-10-16 05:29:30 +03:00
|
|
|
se->compat->instance_id = instance_id == VMSTATE_INSTANCE_ID_ANY ?
|
2010-06-25 21:09:14 +04:00
|
|
|
calculate_compat_instance_id(vmsd->name) : instance_id;
|
2019-10-16 05:29:30 +03:00
|
|
|
instance_id = VMSTATE_INSTANCE_ID_ANY;
|
2010-06-25 21:09:14 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
pstrcat(se->idstr, sizeof(se->idstr), vmsd->name);
|
|
|
|
|
2019-10-16 05:29:30 +03:00
|
|
|
if (instance_id == VMSTATE_INSTANCE_ID_ANY) {
|
2010-06-25 21:09:14 +04:00
|
|
|
se->instance_id = calculate_new_instance_id(se->idstr);
|
2009-09-01 04:12:31 +04:00
|
|
|
} else {
|
|
|
|
se->instance_id = instance_id;
|
2009-08-20 21:42:25 +04:00
|
|
|
}
|
2022-01-13 22:44:52 +03:00
|
|
|
|
|
|
|
/* Perform a recursive sanity check during the test runs */
|
|
|
|
if (qtest_enabled()) {
|
|
|
|
vmstate_check(vmsd);
|
|
|
|
}
|
2010-06-25 21:09:14 +04:00
|
|
|
assert(!se->compat || se->instance_id == 0);
|
2017-01-06 07:06:12 +03:00
|
|
|
savevm_state_handler_insert(se);
|
2009-08-20 21:42:25 +04:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2019-08-28 15:02:32 +03:00
|
|
|
void vmstate_unregister(VMStateIf *obj, const VMStateDescription *vmsd,
|
2010-06-25 21:09:07 +04:00
|
|
|
void *opaque)
|
2009-08-20 21:42:25 +04:00
|
|
|
{
|
2009-09-10 05:04:29 +04:00
|
|
|
SaveStateEntry *se, *new_se;
|
|
|
|
|
2015-05-13 14:37:04 +03:00
|
|
|
QTAILQ_FOREACH_SAFE(se, &savevm_state.handlers, entry, new_se) {
|
2009-09-10 05:04:29 +04:00
|
|
|
if (se->vmsd == vmsd && se->opaque == opaque) {
|
2019-10-17 23:59:52 +03:00
|
|
|
savevm_state_handler_remove(se);
|
2015-08-26 14:17:18 +03:00
|
|
|
g_free(se->compat);
|
2011-08-21 07:09:37 +04:00
|
|
|
g_free(se);
|
2009-09-10 05:04:29 +04:00
|
|
|
}
|
|
|
|
}
|
2009-08-20 21:42:25 +04:00
|
|
|
}
|
|
|
|
|
2017-05-24 10:28:47 +03:00
|
|
|
static int vmstate_load(QEMUFile *f, SaveStateEntry *se)
|
2009-08-20 21:42:24 +04:00
|
|
|
{
|
2014-03-11 03:42:29 +04:00
|
|
|
trace_vmstate_load(se->idstr, se->vmsd ? se->vmsd->name : "(old)");
|
2009-08-20 21:42:25 +04:00
|
|
|
if (!se->vmsd) { /* Old style */
|
2017-05-24 10:28:47 +03:00
|
|
|
return se->ops->load_state(f, se->opaque, se->load_version_id);
|
2009-08-20 21:42:25 +04:00
|
|
|
}
|
2017-05-24 10:28:47 +03:00
|
|
|
return vmstate_load_state(f, se->vmsd, se->opaque, se->load_version_id);
|
2009-08-20 21:42:24 +04:00
|
|
|
}
|
|
|
|
|
2020-12-11 20:11:48 +03:00
|
|
|
static void vmstate_save_old_style(QEMUFile *f, SaveStateEntry *se,
|
|
|
|
JSONWriter *vmdesc)
|
2015-01-22 17:01:39 +03:00
|
|
|
{
|
2023-10-25 12:11:11 +03:00
|
|
|
uint64_t old_offset = qemu_file_transferred(f);
|
2015-01-22 17:01:39 +03:00
|
|
|
se->ops->save_state(f, se->opaque);
|
2023-10-25 12:11:11 +03:00
|
|
|
uint64_t size = qemu_file_transferred(f) - old_offset;
|
2015-01-22 17:01:39 +03:00
|
|
|
|
|
|
|
if (vmdesc) {
|
2020-12-11 20:11:48 +03:00
|
|
|
json_writer_int64(vmdesc, "size", size);
|
|
|
|
json_writer_start_array(vmdesc, "fields");
|
|
|
|
json_writer_start_object(vmdesc, NULL);
|
|
|
|
json_writer_str(vmdesc, "name", "data");
|
|
|
|
json_writer_int64(vmdesc, "size", size);
|
|
|
|
json_writer_str(vmdesc, "type", "buffer");
|
|
|
|
json_writer_end_object(vmdesc);
|
|
|
|
json_writer_end_array(vmdesc);
|
2015-01-22 17:01:39 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-05-19 14:29:50 +03:00
|
|
|
/*
|
|
|
|
* Write the header for device section (QEMU_VM_SECTION START/END/PART/FULL)
|
|
|
|
*/
|
|
|
|
static void save_section_header(QEMUFile *f, SaveStateEntry *se,
|
|
|
|
uint8_t section_type)
|
|
|
|
{
|
|
|
|
qemu_put_byte(f, section_type);
|
|
|
|
qemu_put_be32(f, se->section_id);
|
|
|
|
|
|
|
|
if (section_type == QEMU_VM_SECTION_FULL ||
|
|
|
|
section_type == QEMU_VM_SECTION_START) {
|
|
|
|
/* ID string */
|
|
|
|
size_t len = strlen(se->idstr);
|
|
|
|
qemu_put_byte(f, len);
|
|
|
|
qemu_put_buffer(f, (uint8_t *)se->idstr, len);
|
|
|
|
|
|
|
|
qemu_put_be32(f, se->instance_id);
|
|
|
|
qemu_put_be32(f, se->version_id);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-05-19 14:29:52 +03:00
|
|
|
/*
|
|
|
|
* Write a footer onto device sections that catches cases misformatted device
|
|
|
|
* sections.
|
|
|
|
*/
|
|
|
|
static void save_section_footer(QEMUFile *f, SaveStateEntry *se)
|
|
|
|
{
|
2017-06-27 07:10:17 +03:00
|
|
|
if (migrate_get_current()->send_section_footer) {
|
2015-05-19 14:29:52 +03:00
|
|
|
qemu_put_byte(f, QEMU_VM_SECTION_FOOTER);
|
|
|
|
qemu_put_be32(f, se->section_id);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-03-20 09:49:01 +03:00
|
|
|
static int vmstate_save(QEMUFile *f, SaveStateEntry *se, JSONWriter *vmdesc,
|
|
|
|
Error **errp)
|
2023-01-17 14:22:42 +03:00
|
|
|
{
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
if ((!se->ops || !se->ops->save_state) && !se->vmsd) {
|
|
|
|
return 0;
|
|
|
|
}
|
2023-10-24 11:40:38 +03:00
|
|
|
if (se->vmsd && !vmstate_section_needed(se->vmsd, se->opaque)) {
|
2023-01-17 14:22:42 +03:00
|
|
|
trace_savevm_section_skip(se->idstr, se->section_id);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
trace_savevm_section_start(se->idstr, se->section_id);
|
|
|
|
save_section_header(f, se, QEMU_VM_SECTION_FULL);
|
|
|
|
if (vmdesc) {
|
|
|
|
json_writer_start_object(vmdesc, NULL);
|
|
|
|
json_writer_str(vmdesc, "name", se->idstr);
|
|
|
|
json_writer_int64(vmdesc, "instance_id", se->instance_id);
|
|
|
|
}
|
|
|
|
|
|
|
|
trace_vmstate_save(se->idstr, se->vmsd ? se->vmsd->name : "(old)");
|
|
|
|
if (!se->vmsd) {
|
|
|
|
vmstate_save_old_style(f, se, vmdesc);
|
|
|
|
} else {
|
2024-03-20 09:49:01 +03:00
|
|
|
ret = vmstate_save_state_with_err(f, se->vmsd, se->opaque, vmdesc,
|
|
|
|
errp);
|
2023-01-17 14:22:42 +03:00
|
|
|
if (ret) {
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
trace_savevm_section_end(se->idstr, se->section_id, 0);
|
|
|
|
save_section_footer(f, se);
|
|
|
|
if (vmdesc) {
|
|
|
|
json_writer_end_object(vmdesc);
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
2015-11-05 21:10:45 +03:00
|
|
|
/**
|
|
|
|
* qemu_savevm_command_send: Send a 'QEMU_VM_COMMAND' type element with the
|
|
|
|
* command and associated data.
|
|
|
|
*
|
|
|
|
* @f: File to send command on
|
|
|
|
* @command: Command type to send
|
|
|
|
* @len: Length of associated data
|
|
|
|
* @data: Data associated with command.
|
|
|
|
*/
|
2017-04-20 15:48:46 +03:00
|
|
|
static void qemu_savevm_command_send(QEMUFile *f,
|
|
|
|
enum qemu_vm_cmd command,
|
|
|
|
uint16_t len,
|
|
|
|
uint8_t *data)
|
2015-11-05 21:10:45 +03:00
|
|
|
{
|
|
|
|
trace_savevm_command_send(command, len);
|
|
|
|
qemu_put_byte(f, QEMU_VM_COMMAND);
|
|
|
|
qemu_put_be16(f, (uint16_t)command);
|
|
|
|
qemu_put_be16(f, len);
|
|
|
|
qemu_put_buffer(f, data, len);
|
|
|
|
qemu_fflush(f);
|
|
|
|
}
|
|
|
|
|
2018-09-03 07:38:47 +03:00
|
|
|
void qemu_savevm_send_colo_enable(QEMUFile *f)
|
|
|
|
{
|
|
|
|
trace_savevm_send_colo_enable();
|
|
|
|
qemu_savevm_command_send(f, MIG_CMD_ENABLE_COLO, 0, NULL);
|
|
|
|
}
|
|
|
|
|
2015-11-05 21:10:46 +03:00
|
|
|
void qemu_savevm_send_ping(QEMUFile *f, uint32_t value)
|
|
|
|
{
|
|
|
|
uint32_t buf;
|
|
|
|
|
|
|
|
trace_savevm_send_ping(value);
|
|
|
|
buf = cpu_to_be32(value);
|
|
|
|
qemu_savevm_command_send(f, MIG_CMD_PING, sizeof(value), (uint8_t *)&buf);
|
|
|
|
}
|
|
|
|
|
|
|
|
void qemu_savevm_send_open_return_path(QEMUFile *f)
|
|
|
|
{
|
|
|
|
trace_savevm_send_open_return_path();
|
|
|
|
qemu_savevm_command_send(f, MIG_CMD_OPEN_RETURN_PATH, 0, NULL);
|
|
|
|
}
|
|
|
|
|
2015-11-05 21:10:53 +03:00
|
|
|
/* We have a buffer of data to send; we don't want that all to be loaded
|
|
|
|
* by the command itself, so the command contains just the length of the
|
|
|
|
* extra buffer that we then send straight after it.
|
|
|
|
* TODO: Must be a better way to organise that
|
|
|
|
*
|
|
|
|
* Returns:
|
|
|
|
* 0 on success
|
|
|
|
* -ve on error
|
|
|
|
*/
|
2016-04-27 13:05:01 +03:00
|
|
|
int qemu_savevm_send_packaged(QEMUFile *f, const uint8_t *buf, size_t len)
|
2015-11-05 21:10:53 +03:00
|
|
|
{
|
|
|
|
uint32_t tmp;
|
2023-10-03 09:55:38 +03:00
|
|
|
MigrationState *ms = migrate_get_current();
|
|
|
|
Error *local_err = NULL;
|
2015-11-05 21:10:53 +03:00
|
|
|
|
|
|
|
if (len > MAX_VM_CMD_PACKAGED_SIZE) {
|
2023-10-03 09:55:38 +03:00
|
|
|
error_setg(&local_err, "%s: Unreasonably large packaged state: %zu",
|
2015-11-05 21:10:53 +03:00
|
|
|
__func__, len);
|
2023-10-03 09:55:38 +03:00
|
|
|
migrate_set_error(ms, local_err);
|
|
|
|
error_report_err(local_err);
|
2015-11-05 21:10:53 +03:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
tmp = cpu_to_be32(len);
|
|
|
|
|
|
|
|
trace_qemu_savevm_send_packaged();
|
|
|
|
qemu_savevm_command_send(f, MIG_CMD_PACKAGED, 4, (uint8_t *)&tmp);
|
|
|
|
|
2016-04-27 13:05:01 +03:00
|
|
|
qemu_put_buffer(f, buf, len);
|
2015-11-05 21:10:53 +03:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2015-11-05 21:10:52 +03:00
|
|
|
/* Send prior to any postcopy transfer */
|
|
|
|
void qemu_savevm_send_postcopy_advise(QEMUFile *f)
|
|
|
|
{
|
2017-07-10 19:30:16 +03:00
|
|
|
if (migrate_postcopy_ram()) {
|
|
|
|
uint64_t tmp[2];
|
|
|
|
tmp[0] = cpu_to_be64(ram_pagesize_summary());
|
|
|
|
tmp[1] = cpu_to_be64(qemu_target_page_size());
|
|
|
|
|
|
|
|
trace_qemu_savevm_send_postcopy_advise();
|
|
|
|
qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_ADVISE,
|
|
|
|
16, (uint8_t *)tmp);
|
|
|
|
} else {
|
|
|
|
qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_ADVISE, 0, NULL);
|
|
|
|
}
|
2015-11-05 21:10:52 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Sent prior to starting the destination running in postcopy, discard pages
|
|
|
|
* that have already been sent but redirtied on the source.
|
|
|
|
* CMD_POSTCOPY_RAM_DISCARD consist of:
|
|
|
|
* byte version (0)
|
|
|
|
* byte Length of name field (not including 0)
|
|
|
|
* n x byte RAM block name
|
|
|
|
* byte 0 terminator (just for safety)
|
|
|
|
* n x Byte ranges within the named RAMBlock
|
|
|
|
* be64 Start of the range
|
|
|
|
* be64 Length
|
|
|
|
*
|
|
|
|
* name: RAMBlock name that these entries are part of
|
|
|
|
* len: Number of page entries
|
|
|
|
* start_list: 'len' addresses
|
|
|
|
* length_list: 'len' addresses
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
void qemu_savevm_send_postcopy_ram_discard(QEMUFile *f, const char *name,
|
|
|
|
uint16_t len,
|
|
|
|
uint64_t *start_list,
|
|
|
|
uint64_t *length_list)
|
|
|
|
{
|
|
|
|
uint8_t *buf;
|
|
|
|
uint16_t tmplen;
|
|
|
|
uint16_t t;
|
|
|
|
size_t name_len = strlen(name);
|
|
|
|
|
|
|
|
trace_qemu_savevm_send_postcopy_ram_discard(name, len);
|
|
|
|
assert(name_len < 256);
|
|
|
|
buf = g_malloc0(1 + 1 + name_len + 1 + (8 + 8) * len);
|
|
|
|
buf[0] = postcopy_ram_discard_version;
|
|
|
|
buf[1] = name_len;
|
|
|
|
memcpy(buf + 2, name, name_len);
|
|
|
|
tmplen = 2 + name_len;
|
|
|
|
buf[tmplen++] = '\0';
|
|
|
|
|
|
|
|
for (t = 0; t < len; t++) {
|
2016-06-10 19:09:22 +03:00
|
|
|
stq_be_p(buf + tmplen, start_list[t]);
|
2015-11-05 21:10:52 +03:00
|
|
|
tmplen += 8;
|
2016-06-10 19:09:22 +03:00
|
|
|
stq_be_p(buf + tmplen, length_list[t]);
|
2015-11-05 21:10:52 +03:00
|
|
|
tmplen += 8;
|
|
|
|
}
|
|
|
|
qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_RAM_DISCARD, tmplen, buf);
|
|
|
|
g_free(buf);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Get the destination into a state where it can receive postcopy data. */
|
|
|
|
void qemu_savevm_send_postcopy_listen(QEMUFile *f)
|
|
|
|
{
|
|
|
|
trace_savevm_send_postcopy_listen();
|
|
|
|
qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_LISTEN, 0, NULL);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Kick the destination into running */
|
|
|
|
void qemu_savevm_send_postcopy_run(QEMUFile *f)
|
|
|
|
{
|
|
|
|
trace_savevm_send_postcopy_run();
|
|
|
|
qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_RUN, 0, NULL);
|
|
|
|
}
|
|
|
|
|
2018-05-02 13:47:29 +03:00
|
|
|
void qemu_savevm_send_postcopy_resume(QEMUFile *f)
|
|
|
|
{
|
|
|
|
trace_savevm_send_postcopy_resume();
|
|
|
|
qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_RESUME, 0, NULL);
|
|
|
|
}
|
|
|
|
|
2018-05-02 13:47:27 +03:00
|
|
|
void qemu_savevm_send_recv_bitmap(QEMUFile *f, char *block_name)
|
|
|
|
{
|
|
|
|
size_t len;
|
|
|
|
char buf[256];
|
|
|
|
|
|
|
|
trace_savevm_send_recv_bitmap(block_name);
|
|
|
|
|
|
|
|
buf[0] = len = strlen(block_name);
|
|
|
|
memcpy(buf + 1, block_name, len);
|
|
|
|
|
|
|
|
qemu_savevm_command_send(f, MIG_CMD_RECV_BITMAP, len + 1, (uint8_t *)buf);
|
|
|
|
}
|
|
|
|
|
2011-12-05 20:48:01 +04:00
|
|
|
bool qemu_savevm_state_blocked(Error **errp)
|
2011-01-12 00:39:43 +03:00
|
|
|
{
|
|
|
|
SaveStateEntry *se;
|
|
|
|
|
2015-05-13 14:37:04 +03:00
|
|
|
QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
|
2014-05-14 23:30:08 +04:00
|
|
|
if (se->vmsd && se->vmsd->unmigratable) {
|
2014-03-22 03:42:26 +04:00
|
|
|
error_setg(errp, "State blocked by non-migratable device '%s'",
|
|
|
|
se->idstr);
|
2011-01-12 00:39:43 +03:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2021-02-02 16:55:21 +03:00
|
|
|
void qemu_savevm_non_migratable_list(strList **reasons)
|
|
|
|
{
|
|
|
|
SaveStateEntry *se;
|
|
|
|
|
|
|
|
QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
|
|
|
|
if (se->vmsd && se->vmsd->unmigratable) {
|
|
|
|
QAPI_LIST_PREPEND(*reasons,
|
|
|
|
g_strdup_printf("non-migratable device: %s",
|
|
|
|
se->idstr));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-05-21 15:24:12 +03:00
|
|
|
void qemu_savevm_state_header(QEMUFile *f)
|
|
|
|
{
|
2023-10-09 21:43:21 +03:00
|
|
|
MigrationState *s = migrate_get_current();
|
|
|
|
|
|
|
|
s->vmdesc = json_writer_new(false);
|
|
|
|
|
2015-05-21 15:24:12 +03:00
|
|
|
trace_savevm_state_header();
|
|
|
|
qemu_put_be32(f, QEMU_VM_FILE_MAGIC);
|
|
|
|
qemu_put_be32(f, QEMU_VM_FILE_VERSION);
|
2015-11-05 21:10:30 +03:00
|
|
|
|
2023-10-09 21:43:21 +03:00
|
|
|
if (s->send_configuration) {
|
2015-11-05 21:10:30 +03:00
|
|
|
qemu_put_byte(f, QEMU_VM_CONFIGURATION);
|
2023-10-09 21:43:21 +03:00
|
|
|
|
|
|
|
/*
|
|
|
|
* This starts the main json object and is paired with the
|
|
|
|
* json_writer_end_object in
|
|
|
|
* qemu_savevm_state_complete_precopy_non_iterable
|
|
|
|
*/
|
|
|
|
json_writer_start_object(s->vmdesc, NULL);
|
|
|
|
|
|
|
|
json_writer_start_object(s->vmdesc, "configuration");
|
|
|
|
vmstate_save_state(f, &vmstate_configuration, &savevm_state, s->vmdesc);
|
|
|
|
json_writer_end_object(s->vmdesc);
|
2015-11-05 21:10:30 +03:00
|
|
|
}
|
2015-05-21 15:24:12 +03:00
|
|
|
}
|
|
|
|
|
2020-02-04 08:08:41 +03:00
|
|
|
bool qemu_savevm_state_guest_unplug_pending(void)
|
2019-10-29 14:49:02 +03:00
|
|
|
{
|
|
|
|
SaveStateEntry *se;
|
|
|
|
|
|
|
|
QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
|
2019-11-20 18:49:48 +03:00
|
|
|
if (se->vmsd && se->vmsd->dev_unplug_pending &&
|
|
|
|
se->vmsd->dev_unplug_pending(se->opaque)) {
|
2020-02-04 08:08:41 +03:00
|
|
|
return true;
|
2019-10-29 14:49:02 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-02-04 08:08:41 +03:00
|
|
|
return false;
|
2019-10-29 14:49:02 +03:00
|
|
|
}
|
|
|
|
|
2023-09-06 18:08:51 +03:00
|
|
|
int qemu_savevm_state_prepare(Error **errp)
|
|
|
|
{
|
|
|
|
SaveStateEntry *se;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
|
|
|
|
if (!se->ops || !se->ops->save_prepare) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (se->ops->is_active) {
|
|
|
|
if (!se->ops->is_active(se->opaque)) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
ret = se->ops->save_prepare(se->opaque, errp);
|
|
|
|
if (ret < 0) {
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2024-03-20 09:49:02 +03:00
|
|
|
int qemu_savevm_state_setup(QEMUFile *f, Error **errp)
|
2008-11-12 00:33:36 +03:00
|
|
|
{
|
2024-03-20 09:49:02 +03:00
|
|
|
ERRP_GUARD();
|
2023-01-17 14:22:43 +03:00
|
|
|
MigrationState *ms = migrate_get_current();
|
2008-11-12 00:33:36 +03:00
|
|
|
SaveStateEntry *se;
|
2024-03-04 15:28:27 +03:00
|
|
|
int ret = 0;
|
2008-11-12 00:33:36 +03:00
|
|
|
|
2023-01-17 14:22:43 +03:00
|
|
|
json_writer_int64(ms->vmdesc, "page_size", qemu_target_page_size());
|
|
|
|
json_writer_start_array(ms->vmdesc, "devices");
|
|
|
|
|
2017-06-28 12:52:24 +03:00
|
|
|
trace_savevm_state_setup();
|
2015-05-13 14:37:04 +03:00
|
|
|
QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
|
migration/savevm: Allow immutable device state to be migrated early (i.e., before RAM)
For virtio-mem, we want to have the plugged/unplugged state of memory
blocks available before migrating any actual RAM content, and perform
sanity checks before touching anything on the destination. This
information is immutable on the migration source while migration is active,
We want to use this information for proper preallocation support with
migration: currently, we don't preallocate memory on the migration target,
and especially with hugetlb, we can easily run out of hugetlb pages during
RAM migration and will crash (SIGBUS) instead of catching this gracefully
via preallocation.
Migrating device state via a VMSD before we start iterating is currently
impossible: the only approach that would be possible is avoiding a VMSD
and migrating state manually during save_setup(), to be restored during
load_state().
Let's allow for migrating device state via a VMSD early, during the
setup phase in qemu_savevm_state_setup(). To keep it simple, we
indicate applicable VMSD's using an "early_setup" flag.
Note that only very selected devices (i.e., ones seriously messing with
RAM setup) are supposed to make use of such early state migration.
While at it, also use a bool for the "unmigratable" member.
Reviewed-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>S
Signed-off-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
2023-01-17 14:22:44 +03:00
|
|
|
if (se->vmsd && se->vmsd->early_setup) {
|
2024-03-20 09:49:02 +03:00
|
|
|
ret = vmstate_save(f, se, ms->vmdesc, errp);
|
migration/savevm: Allow immutable device state to be migrated early (i.e., before RAM)
For virtio-mem, we want to have the plugged/unplugged state of memory
blocks available before migrating any actual RAM content, and perform
sanity checks before touching anything on the destination. This
information is immutable on the migration source while migration is active,
We want to use this information for proper preallocation support with
migration: currently, we don't preallocate memory on the migration target,
and especially with hugetlb, we can easily run out of hugetlb pages during
RAM migration and will crash (SIGBUS) instead of catching this gracefully
via preallocation.
Migrating device state via a VMSD before we start iterating is currently
impossible: the only approach that would be possible is avoiding a VMSD
and migrating state manually during save_setup(), to be restored during
load_state().
Let's allow for migrating device state via a VMSD early, during the
setup phase in qemu_savevm_state_setup(). To keep it simple, we
indicate applicable VMSD's using an "early_setup" flag.
Note that only very selected devices (i.e., ones seriously messing with
RAM setup) are supposed to make use of such early state migration.
While at it, also use a bool for the "unmigratable" member.
Reviewed-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>S
Signed-off-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
2023-01-17 14:22:44 +03:00
|
|
|
if (ret) {
|
2024-03-20 09:49:02 +03:00
|
|
|
migrate_set_error(ms, *errp);
|
migration/savevm: Allow immutable device state to be migrated early (i.e., before RAM)
For virtio-mem, we want to have the plugged/unplugged state of memory
blocks available before migrating any actual RAM content, and perform
sanity checks before touching anything on the destination. This
information is immutable on the migration source while migration is active,
We want to use this information for proper preallocation support with
migration: currently, we don't preallocate memory on the migration target,
and especially with hugetlb, we can easily run out of hugetlb pages during
RAM migration and will crash (SIGBUS) instead of catching this gracefully
via preallocation.
Migrating device state via a VMSD before we start iterating is currently
impossible: the only approach that would be possible is avoiding a VMSD
and migrating state manually during save_setup(), to be restored during
load_state().
Let's allow for migrating device state via a VMSD early, during the
setup phase in qemu_savevm_state_setup(). To keep it simple, we
indicate applicable VMSD's using an "early_setup" flag.
Note that only very selected devices (i.e., ones seriously messing with
RAM setup) are supposed to make use of such early state migration.
While at it, also use a bool for the "unmigratable" member.
Reviewed-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>S
Signed-off-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
2023-01-17 14:22:44 +03:00
|
|
|
qemu_file_set_error(f, ret);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2017-06-28 12:52:24 +03:00
|
|
|
if (!se->ops || !se->ops->save_setup) {
|
2008-11-12 00:33:36 +03:00
|
|
|
continue;
|
2012-06-26 19:19:10 +04:00
|
|
|
}
|
2019-08-19 06:28:04 +03:00
|
|
|
if (se->ops->is_active) {
|
2012-06-27 12:59:15 +04:00
|
|
|
if (!se->ops->is_active(se->opaque)) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
}
|
2015-05-19 14:29:50 +03:00
|
|
|
save_section_header(f, se, QEMU_VM_SECTION_START);
|
2008-11-12 00:33:36 +03:00
|
|
|
|
2024-03-20 09:49:03 +03:00
|
|
|
ret = se->ops->save_setup(f, se->opaque, errp);
|
2015-05-19 14:29:52 +03:00
|
|
|
save_section_footer(f, se);
|
2011-10-19 17:22:18 +04:00
|
|
|
if (ret < 0) {
|
2013-02-22 20:36:13 +04:00
|
|
|
qemu_file_set_error(f, ret);
|
|
|
|
break;
|
2011-10-19 17:22:18 +04:00
|
|
|
}
|
2008-11-12 00:33:36 +03:00
|
|
|
}
|
2018-12-11 11:24:51 +03:00
|
|
|
|
2024-03-04 15:28:27 +03:00
|
|
|
if (ret) {
|
2024-03-20 09:49:02 +03:00
|
|
|
return ret;
|
2024-03-04 15:28:27 +03:00
|
|
|
}
|
|
|
|
|
2024-03-20 09:49:02 +03:00
|
|
|
/* TODO: Should we check that errp is set in case of failure ? */
|
|
|
|
return precopy_notify(PRECOPY_NOTIFY_SETUP, errp);
|
2008-11-12 00:33:36 +03:00
|
|
|
}
|
|
|
|
|
2018-05-02 13:47:31 +03:00
|
|
|
int qemu_savevm_state_resume_prepare(MigrationState *s)
|
|
|
|
{
|
|
|
|
SaveStateEntry *se;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
trace_savevm_state_resume_prepare();
|
|
|
|
|
|
|
|
QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
|
|
|
|
if (!se->ops || !se->ops->resume_prepare) {
|
|
|
|
continue;
|
|
|
|
}
|
2019-08-19 06:28:04 +03:00
|
|
|
if (se->ops->is_active) {
|
2018-05-02 13:47:31 +03:00
|
|
|
if (!se->ops->is_active(se->opaque)) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
ret = se->ops->resume_prepare(s, se->opaque);
|
|
|
|
if (ret < 0) {
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2011-09-22 13:02:14 +04:00
|
|
|
/*
|
2011-11-22 14:06:26 +04:00
|
|
|
* this function has three return values:
|
2011-09-22 13:02:14 +04:00
|
|
|
* negative: there was one error, and we have -errno.
|
|
|
|
* 0 : We haven't finished, caller have to go again
|
|
|
|
* 1 : We have finished, we can go to complete phase
|
|
|
|
*/
|
2015-11-05 21:11:14 +03:00
|
|
|
int qemu_savevm_state_iterate(QEMUFile *f, bool postcopy)
|
2008-11-12 00:33:36 +03:00
|
|
|
{
|
|
|
|
SaveStateEntry *se;
|
migration: Don't serialize devices in qemu_savevm_state_iterate()
Commit 90697be8896c ("live migration: Serialize vmstate saving in stage
2") introduced device serialization in qemu_savevm_state_iterate(). The
rationale behind it was to first complete migration of slower changing
block devices and only then migrate the RAM, to avoid sending fast
changing RAM pages over and over.
This commit was added a long time ago, and while it was useful back
then, it is not the case anymore:
1. Block migration is deprecated, see commit 66db46ca83b8 ("migration:
Deprecate block migration").
2. Today there are other iterative devices besides RAM and block, such
as VFIO, which are registered for migration after RAM. With current
serialization behavior, a fast changing device can block other
devices from sending their data, which may prevent migration from
converging in some cases.
The issue described in item 2 was observed in several VFIO migration
scenarios with switchover-ack capability enabled, where some workload on
the VM prevented RAM from ever reaching a hard zero, thus blocking VFIO
initial pre-copy data from being sent. Hence, destination could not ack
switchover and migration could not converge.
Fix that by not serializing iterative devices in
qemu_savevm_state_iterate().
Note that this still doesn't fully prevent device starvation. As
correctly pointed out by Peter [1], a fast changing device might
constantly consume all allocated bandwidth and block the following
devices. However, this scenario is more likely to happen only if
max-bandwidth is low.
[1] https://lore.kernel.org/qemu-devel/Zd6iw9dBhW6wKNxx@x1n/
Signed-off-by: Avihai Horon <avihaih@nvidia.com>
Reviewed-by: Fabiano Rosas <farosas@suse.de>
Link: https://lore.kernel.org/r/20240304105339.20713-2-avihaih@nvidia.com
Signed-off-by: Peter Xu <peterx@redhat.com>
2024-03-04 13:53:37 +03:00
|
|
|
bool all_finished = true;
|
|
|
|
int ret;
|
2008-11-12 00:33:36 +03:00
|
|
|
|
2014-03-11 03:42:29 +04:00
|
|
|
trace_savevm_state_iterate();
|
2015-05-13 14:37:04 +03:00
|
|
|
QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
|
2012-06-28 17:31:37 +04:00
|
|
|
if (!se->ops || !se->ops->save_live_iterate) {
|
2008-11-12 00:33:36 +03:00
|
|
|
continue;
|
2012-06-26 19:19:10 +04:00
|
|
|
}
|
2019-03-27 04:31:30 +03:00
|
|
|
if (se->ops->is_active &&
|
|
|
|
!se->ops->is_active(se->opaque)) {
|
|
|
|
continue;
|
2012-06-27 12:59:15 +04:00
|
|
|
}
|
2019-03-27 04:31:30 +03:00
|
|
|
if (se->ops->is_active_iterate &&
|
|
|
|
!se->ops->is_active_iterate(se->opaque)) {
|
|
|
|
continue;
|
2018-03-13 22:34:01 +03:00
|
|
|
}
|
2015-11-05 21:11:14 +03:00
|
|
|
/*
|
|
|
|
* In the postcopy phase, any device that doesn't know how to
|
|
|
|
* do postcopy should have saved it's state in the _complete
|
|
|
|
* call that's already run, it might get confused if we call
|
|
|
|
* iterate afterwards.
|
|
|
|
*/
|
2017-07-10 19:30:14 +03:00
|
|
|
if (postcopy &&
|
|
|
|
!(se->ops->has_postcopy && se->ops->has_postcopy(se->opaque))) {
|
2015-11-05 21:11:14 +03:00
|
|
|
continue;
|
|
|
|
}
|
2023-05-15 22:56:58 +03:00
|
|
|
if (migration_rate_exceeded(f)) {
|
2012-05-22 02:38:26 +04:00
|
|
|
return 0;
|
|
|
|
}
|
2014-03-07 00:03:37 +04:00
|
|
|
trace_savevm_section_start(se->idstr, se->section_id);
|
2015-05-19 14:29:50 +03:00
|
|
|
|
|
|
|
save_section_header(f, se, QEMU_VM_SECTION_PART);
|
2008-11-12 00:33:36 +03:00
|
|
|
|
2012-06-28 17:31:37 +04:00
|
|
|
ret = se->ops->save_live_iterate(f, se->opaque);
|
2015-01-21 13:14:48 +03:00
|
|
|
trace_savevm_section_end(se->idstr, se->section_id, ret);
|
2015-05-19 14:29:52 +03:00
|
|
|
save_section_footer(f, se);
|
2012-05-22 01:46:44 +04:00
|
|
|
|
2013-02-22 20:36:13 +04:00
|
|
|
if (ret < 0) {
|
2021-12-15 17:14:37 +03:00
|
|
|
error_report("failed to save SaveStateEntry with id(name): "
|
|
|
|
"%d(%s): %d",
|
|
|
|
se->section_id, se->idstr, ret);
|
2013-02-22 20:36:13 +04:00
|
|
|
qemu_file_set_error(f, ret);
|
migration: Don't serialize devices in qemu_savevm_state_iterate()
Commit 90697be8896c ("live migration: Serialize vmstate saving in stage
2") introduced device serialization in qemu_savevm_state_iterate(). The
rationale behind it was to first complete migration of slower changing
block devices and only then migrate the RAM, to avoid sending fast
changing RAM pages over and over.
This commit was added a long time ago, and while it was useful back
then, it is not the case anymore:
1. Block migration is deprecated, see commit 66db46ca83b8 ("migration:
Deprecate block migration").
2. Today there are other iterative devices besides RAM and block, such
as VFIO, which are registered for migration after RAM. With current
serialization behavior, a fast changing device can block other
devices from sending their data, which may prevent migration from
converging in some cases.
The issue described in item 2 was observed in several VFIO migration
scenarios with switchover-ack capability enabled, where some workload on
the VM prevented RAM from ever reaching a hard zero, thus blocking VFIO
initial pre-copy data from being sent. Hence, destination could not ack
switchover and migration could not converge.
Fix that by not serializing iterative devices in
qemu_savevm_state_iterate().
Note that this still doesn't fully prevent device starvation. As
correctly pointed out by Peter [1], a fast changing device might
constantly consume all allocated bandwidth and block the following
devices. However, this scenario is more likely to happen only if
max-bandwidth is low.
[1] https://lore.kernel.org/qemu-devel/Zd6iw9dBhW6wKNxx@x1n/
Signed-off-by: Avihai Horon <avihaih@nvidia.com>
Reviewed-by: Fabiano Rosas <farosas@suse.de>
Link: https://lore.kernel.org/r/20240304105339.20713-2-avihaih@nvidia.com
Signed-off-by: Peter Xu <peterx@redhat.com>
2024-03-04 13:53:37 +03:00
|
|
|
return ret;
|
|
|
|
} else if (!ret) {
|
|
|
|
all_finished = false;
|
2009-12-01 17:19:55 +03:00
|
|
|
}
|
2008-11-12 00:33:36 +03:00
|
|
|
}
|
migration: Don't serialize devices in qemu_savevm_state_iterate()
Commit 90697be8896c ("live migration: Serialize vmstate saving in stage
2") introduced device serialization in qemu_savevm_state_iterate(). The
rationale behind it was to first complete migration of slower changing
block devices and only then migrate the RAM, to avoid sending fast
changing RAM pages over and over.
This commit was added a long time ago, and while it was useful back
then, it is not the case anymore:
1. Block migration is deprecated, see commit 66db46ca83b8 ("migration:
Deprecate block migration").
2. Today there are other iterative devices besides RAM and block, such
as VFIO, which are registered for migration after RAM. With current
serialization behavior, a fast changing device can block other
devices from sending their data, which may prevent migration from
converging in some cases.
The issue described in item 2 was observed in several VFIO migration
scenarios with switchover-ack capability enabled, where some workload on
the VM prevented RAM from ever reaching a hard zero, thus blocking VFIO
initial pre-copy data from being sent. Hence, destination could not ack
switchover and migration could not converge.
Fix that by not serializing iterative devices in
qemu_savevm_state_iterate().
Note that this still doesn't fully prevent device starvation. As
correctly pointed out by Peter [1], a fast changing device might
constantly consume all allocated bandwidth and block the following
devices. However, this scenario is more likely to happen only if
max-bandwidth is low.
[1] https://lore.kernel.org/qemu-devel/Zd6iw9dBhW6wKNxx@x1n/
Signed-off-by: Avihai Horon <avihaih@nvidia.com>
Reviewed-by: Fabiano Rosas <farosas@suse.de>
Link: https://lore.kernel.org/r/20240304105339.20713-2-avihaih@nvidia.com
Signed-off-by: Peter Xu <peterx@redhat.com>
2024-03-04 13:53:37 +03:00
|
|
|
return all_finished;
|
2008-11-12 00:33:36 +03:00
|
|
|
}
|
|
|
|
|
2015-02-23 15:56:42 +03:00
|
|
|
static bool should_send_vmdesc(void)
|
|
|
|
{
|
|
|
|
MachineState *machine = MACHINE(qdev_get_machine());
|
2017-03-21 00:25:28 +03:00
|
|
|
bool in_postcopy = migration_in_postcopy();
|
2015-11-05 21:10:59 +03:00
|
|
|
return !machine->suppress_vmdesc && !in_postcopy;
|
2015-02-23 15:56:42 +03:00
|
|
|
}
|
|
|
|
|
2015-11-05 21:11:00 +03:00
|
|
|
/*
|
|
|
|
* Calls the save_live_complete_postcopy methods
|
|
|
|
* causing the last few pages to be sent immediately and doing any associated
|
|
|
|
* cleanup.
|
|
|
|
* Note postcopy also calls qemu_savevm_state_complete_precopy to complete
|
|
|
|
* all the other devices, but that happens at the point we switch to postcopy.
|
|
|
|
*/
|
|
|
|
void qemu_savevm_state_complete_postcopy(QEMUFile *f)
|
|
|
|
{
|
|
|
|
SaveStateEntry *se;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
|
|
|
|
if (!se->ops || !se->ops->save_live_complete_postcopy) {
|
|
|
|
continue;
|
|
|
|
}
|
2019-08-19 06:28:04 +03:00
|
|
|
if (se->ops->is_active) {
|
2015-11-05 21:11:00 +03:00
|
|
|
if (!se->ops->is_active(se->opaque)) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
trace_savevm_section_start(se->idstr, se->section_id);
|
|
|
|
/* Section type */
|
|
|
|
qemu_put_byte(f, QEMU_VM_SECTION_END);
|
|
|
|
qemu_put_be32(f, se->section_id);
|
|
|
|
|
|
|
|
ret = se->ops->save_live_complete_postcopy(f, se->opaque);
|
|
|
|
trace_savevm_section_end(se->idstr, se->section_id, ret);
|
|
|
|
save_section_footer(f, se);
|
|
|
|
if (ret < 0) {
|
|
|
|
qemu_file_set_error(f, ret);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
qemu_put_byte(f, QEMU_VM_EOF);
|
|
|
|
qemu_fflush(f);
|
|
|
|
}
|
|
|
|
|
2019-07-09 17:09:23 +03:00
|
|
|
static
|
2019-07-09 17:09:24 +03:00
|
|
|
int qemu_savevm_state_complete_precopy_iterable(QEMUFile *f, bool in_postcopy)
|
2008-11-12 00:33:36 +03:00
|
|
|
{
|
migration: Add per vmstate downtime tracepoints
We have a bunch of savevm_section* tracepoints, they're good to analyze
migration stream, but not always suitable if someone would like to analyze
the migration downtime. Two major problems:
- savevm_section* tracepoints are dumping all sections, we only care
about the sections that contribute to the downtime
- They don't have an identifier to show the type of sections, so no way
to filter downtime information either easily.
We can add type into the tracepoints, but instead of doing so, this patch
kept them untouched, instead of adding a bunch of downtime specific
tracepoints, so one can enable "vmstate_downtime*" tracepoints and get a
full picture of how the downtime is distributed across iterative and
non-iterative vmstate save/load.
Note that here both save() and load() need to be traced, because both of
them may contribute to the downtime. The contribution is not a simple "add
them together", though: consider when the src is doing a save() of device1
while the dest can be load()ing for device2, so they can happen
concurrently.
Tracking both sides make sense because device load() and save() can be
imbalanced, one device can save() super fast, but load() super slow, vice
versa. We can't figure that out without tracing both.
Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
Message-ID: <20231030163346.765724-4-peterx@redhat.com>
2023-10-30 19:33:44 +03:00
|
|
|
int64_t start_ts_each, end_ts_each;
|
2008-11-12 00:33:36 +03:00
|
|
|
SaveStateEntry *se;
|
2011-10-19 17:22:18 +04:00
|
|
|
int ret;
|
2010-03-01 21:10:30 +03:00
|
|
|
|
2015-05-13 14:37:04 +03:00
|
|
|
QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
|
2015-11-05 21:11:00 +03:00
|
|
|
if (!se->ops ||
|
2017-07-10 19:30:14 +03:00
|
|
|
(in_postcopy && se->ops->has_postcopy &&
|
|
|
|
se->ops->has_postcopy(se->opaque)) ||
|
2015-11-05 21:11:00 +03:00
|
|
|
!se->ops->save_live_complete_precopy) {
|
2008-11-12 00:33:36 +03:00
|
|
|
continue;
|
2012-06-26 19:19:10 +04:00
|
|
|
}
|
2015-11-11 17:02:27 +03:00
|
|
|
|
2019-08-19 06:28:04 +03:00
|
|
|
if (se->ops->is_active) {
|
2012-06-27 12:59:15 +04:00
|
|
|
if (!se->ops->is_active(se->opaque)) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
}
|
migration: Add per vmstate downtime tracepoints
We have a bunch of savevm_section* tracepoints, they're good to analyze
migration stream, but not always suitable if someone would like to analyze
the migration downtime. Two major problems:
- savevm_section* tracepoints are dumping all sections, we only care
about the sections that contribute to the downtime
- They don't have an identifier to show the type of sections, so no way
to filter downtime information either easily.
We can add type into the tracepoints, but instead of doing so, this patch
kept them untouched, instead of adding a bunch of downtime specific
tracepoints, so one can enable "vmstate_downtime*" tracepoints and get a
full picture of how the downtime is distributed across iterative and
non-iterative vmstate save/load.
Note that here both save() and load() need to be traced, because both of
them may contribute to the downtime. The contribution is not a simple "add
them together", though: consider when the src is doing a save() of device1
while the dest can be load()ing for device2, so they can happen
concurrently.
Tracking both sides make sense because device load() and save() can be
imbalanced, one device can save() super fast, but load() super slow, vice
versa. We can't figure that out without tracing both.
Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
Message-ID: <20231030163346.765724-4-peterx@redhat.com>
2023-10-30 19:33:44 +03:00
|
|
|
|
|
|
|
start_ts_each = qemu_clock_get_us(QEMU_CLOCK_REALTIME);
|
2014-03-07 00:03:37 +04:00
|
|
|
trace_savevm_section_start(se->idstr, se->section_id);
|
2015-05-19 14:29:50 +03:00
|
|
|
|
|
|
|
save_section_header(f, se, QEMU_VM_SECTION_END);
|
2008-11-12 00:33:36 +03:00
|
|
|
|
2015-11-05 21:10:41 +03:00
|
|
|
ret = se->ops->save_live_complete_precopy(f, se->opaque);
|
2015-01-21 13:14:48 +03:00
|
|
|
trace_savevm_section_end(se->idstr, se->section_id, ret);
|
2015-05-19 14:29:52 +03:00
|
|
|
save_section_footer(f, se);
|
2011-10-19 17:22:18 +04:00
|
|
|
if (ret < 0) {
|
2013-02-22 20:36:13 +04:00
|
|
|
qemu_file_set_error(f, ret);
|
2017-06-16 19:06:58 +03:00
|
|
|
return -1;
|
2011-10-19 17:22:18 +04:00
|
|
|
}
|
migration: Add per vmstate downtime tracepoints
We have a bunch of savevm_section* tracepoints, they're good to analyze
migration stream, but not always suitable if someone would like to analyze
the migration downtime. Two major problems:
- savevm_section* tracepoints are dumping all sections, we only care
about the sections that contribute to the downtime
- They don't have an identifier to show the type of sections, so no way
to filter downtime information either easily.
We can add type into the tracepoints, but instead of doing so, this patch
kept them untouched, instead of adding a bunch of downtime specific
tracepoints, so one can enable "vmstate_downtime*" tracepoints and get a
full picture of how the downtime is distributed across iterative and
non-iterative vmstate save/load.
Note that here both save() and load() need to be traced, because both of
them may contribute to the downtime. The contribution is not a simple "add
them together", though: consider when the src is doing a save() of device1
while the dest can be load()ing for device2, so they can happen
concurrently.
Tracking both sides make sense because device load() and save() can be
imbalanced, one device can save() super fast, but load() super slow, vice
versa. We can't figure that out without tracing both.
Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
Message-ID: <20231030163346.765724-4-peterx@redhat.com>
2023-10-30 19:33:44 +03:00
|
|
|
end_ts_each = qemu_clock_get_us(QEMU_CLOCK_REALTIME);
|
|
|
|
trace_vmstate_downtime_save("iterable", se->idstr, se->instance_id,
|
|
|
|
end_ts_each - start_ts_each);
|
2008-11-12 00:33:36 +03:00
|
|
|
}
|
|
|
|
|
2023-10-30 19:33:46 +03:00
|
|
|
trace_vmstate_downtime_checkpoint("src-iterable-saved");
|
|
|
|
|
2019-07-09 17:09:23 +03:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
int qemu_savevm_state_complete_precopy_non_iterable(QEMUFile *f,
|
|
|
|
bool in_postcopy,
|
|
|
|
bool inactivate_disks)
|
|
|
|
{
|
2023-01-17 14:22:43 +03:00
|
|
|
MigrationState *ms = migrate_get_current();
|
migration: Add per vmstate downtime tracepoints
We have a bunch of savevm_section* tracepoints, they're good to analyze
migration stream, but not always suitable if someone would like to analyze
the migration downtime. Two major problems:
- savevm_section* tracepoints are dumping all sections, we only care
about the sections that contribute to the downtime
- They don't have an identifier to show the type of sections, so no way
to filter downtime information either easily.
We can add type into the tracepoints, but instead of doing so, this patch
kept them untouched, instead of adding a bunch of downtime specific
tracepoints, so one can enable "vmstate_downtime*" tracepoints and get a
full picture of how the downtime is distributed across iterative and
non-iterative vmstate save/load.
Note that here both save() and load() need to be traced, because both of
them may contribute to the downtime. The contribution is not a simple "add
them together", though: consider when the src is doing a save() of device1
while the dest can be load()ing for device2, so they can happen
concurrently.
Tracking both sides make sense because device load() and save() can be
imbalanced, one device can save() super fast, but load() super slow, vice
versa. We can't figure that out without tracing both.
Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
Message-ID: <20231030163346.765724-4-peterx@redhat.com>
2023-10-30 19:33:44 +03:00
|
|
|
int64_t start_ts_each, end_ts_each;
|
2023-01-17 14:22:43 +03:00
|
|
|
JSONWriter *vmdesc = ms->vmdesc;
|
2019-07-09 17:09:23 +03:00
|
|
|
int vmdesc_len;
|
|
|
|
SaveStateEntry *se;
|
2024-03-20 09:49:01 +03:00
|
|
|
Error *local_err = NULL;
|
2019-07-09 17:09:23 +03:00
|
|
|
int ret;
|
2015-11-11 17:02:27 +03:00
|
|
|
|
2015-05-13 14:37:04 +03:00
|
|
|
QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
|
migration/savevm: Allow immutable device state to be migrated early (i.e., before RAM)
For virtio-mem, we want to have the plugged/unplugged state of memory
blocks available before migrating any actual RAM content, and perform
sanity checks before touching anything on the destination. This
information is immutable on the migration source while migration is active,
We want to use this information for proper preallocation support with
migration: currently, we don't preallocate memory on the migration target,
and especially with hugetlb, we can easily run out of hugetlb pages during
RAM migration and will crash (SIGBUS) instead of catching this gracefully
via preallocation.
Migrating device state via a VMSD before we start iterating is currently
impossible: the only approach that would be possible is avoiding a VMSD
and migrating state manually during save_setup(), to be restored during
load_state().
Let's allow for migrating device state via a VMSD early, during the
setup phase in qemu_savevm_state_setup(). To keep it simple, we
indicate applicable VMSD's using an "early_setup" flag.
Note that only very selected devices (i.e., ones seriously messing with
RAM setup) are supposed to make use of such early state migration.
While at it, also use a bool for the "unmigratable" member.
Reviewed-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>S
Signed-off-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
2023-01-17 14:22:44 +03:00
|
|
|
if (se->vmsd && se->vmsd->early_setup) {
|
|
|
|
/* Already saved during qemu_savevm_state_setup(). */
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
migration: Add per vmstate downtime tracepoints
We have a bunch of savevm_section* tracepoints, they're good to analyze
migration stream, but not always suitable if someone would like to analyze
the migration downtime. Two major problems:
- savevm_section* tracepoints are dumping all sections, we only care
about the sections that contribute to the downtime
- They don't have an identifier to show the type of sections, so no way
to filter downtime information either easily.
We can add type into the tracepoints, but instead of doing so, this patch
kept them untouched, instead of adding a bunch of downtime specific
tracepoints, so one can enable "vmstate_downtime*" tracepoints and get a
full picture of how the downtime is distributed across iterative and
non-iterative vmstate save/load.
Note that here both save() and load() need to be traced, because both of
them may contribute to the downtime. The contribution is not a simple "add
them together", though: consider when the src is doing a save() of device1
while the dest can be load()ing for device2, so they can happen
concurrently.
Tracking both sides make sense because device load() and save() can be
imbalanced, one device can save() super fast, but load() super slow, vice
versa. We can't figure that out without tracing both.
Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
Message-ID: <20231030163346.765724-4-peterx@redhat.com>
2023-10-30 19:33:44 +03:00
|
|
|
start_ts_each = qemu_clock_get_us(QEMU_CLOCK_REALTIME);
|
|
|
|
|
2024-03-20 09:49:01 +03:00
|
|
|
ret = vmstate_save(f, se, vmdesc, &local_err);
|
2017-09-25 14:29:16 +03:00
|
|
|
if (ret) {
|
2024-03-20 09:49:01 +03:00
|
|
|
migrate_set_error(ms, local_err);
|
|
|
|
error_report_err(local_err);
|
2017-09-25 14:29:16 +03:00
|
|
|
qemu_file_set_error(f, ret);
|
|
|
|
return ret;
|
|
|
|
}
|
migration: Add per vmstate downtime tracepoints
We have a bunch of savevm_section* tracepoints, they're good to analyze
migration stream, but not always suitable if someone would like to analyze
the migration downtime. Two major problems:
- savevm_section* tracepoints are dumping all sections, we only care
about the sections that contribute to the downtime
- They don't have an identifier to show the type of sections, so no way
to filter downtime information either easily.
We can add type into the tracepoints, but instead of doing so, this patch
kept them untouched, instead of adding a bunch of downtime specific
tracepoints, so one can enable "vmstate_downtime*" tracepoints and get a
full picture of how the downtime is distributed across iterative and
non-iterative vmstate save/load.
Note that here both save() and load() need to be traced, because both of
them may contribute to the downtime. The contribution is not a simple "add
them together", though: consider when the src is doing a save() of device1
while the dest can be load()ing for device2, so they can happen
concurrently.
Tracking both sides make sense because device load() and save() can be
imbalanced, one device can save() super fast, but load() super slow, vice
versa. We can't figure that out without tracing both.
Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
Message-ID: <20231030163346.765724-4-peterx@redhat.com>
2023-10-30 19:33:44 +03:00
|
|
|
|
|
|
|
end_ts_each = qemu_clock_get_us(QEMU_CLOCK_REALTIME);
|
|
|
|
trace_vmstate_downtime_save("non-iterable", se->idstr, se->instance_id,
|
|
|
|
end_ts_each - start_ts_each);
|
2008-11-12 00:33:36 +03:00
|
|
|
}
|
|
|
|
|
2017-06-16 19:06:58 +03:00
|
|
|
if (inactivate_disks) {
|
|
|
|
/* Inactivate before sending QEMU_VM_EOF so that the
|
2022-02-09 13:54:51 +03:00
|
|
|
* bdrv_activate_all() on the other end won't fail. */
|
2017-06-16 19:06:58 +03:00
|
|
|
ret = bdrv_inactivate_all();
|
|
|
|
if (ret) {
|
2023-10-03 09:55:38 +03:00
|
|
|
error_setg(&local_err, "%s: bdrv_inactivate_all() failed (%d)",
|
|
|
|
__func__, ret);
|
|
|
|
migrate_set_error(ms, local_err);
|
|
|
|
error_report_err(local_err);
|
2017-06-16 19:06:58 +03:00
|
|
|
qemu_file_set_error(f, ret);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
}
|
2015-11-05 21:11:00 +03:00
|
|
|
if (!in_postcopy) {
|
|
|
|
/* Postcopy stream will still be going */
|
|
|
|
qemu_put_byte(f, QEMU_VM_EOF);
|
|
|
|
}
|
2015-01-22 17:01:39 +03:00
|
|
|
|
2020-12-11 20:11:48 +03:00
|
|
|
json_writer_end_array(vmdesc);
|
|
|
|
json_writer_end_object(vmdesc);
|
|
|
|
vmdesc_len = strlen(json_writer_get(vmdesc));
|
2015-01-22 17:01:39 +03:00
|
|
|
|
2015-02-23 15:56:42 +03:00
|
|
|
if (should_send_vmdesc()) {
|
|
|
|
qemu_put_byte(f, QEMU_VM_VMDESCRIPTION);
|
|
|
|
qemu_put_be32(f, vmdesc_len);
|
2020-12-11 20:11:48 +03:00
|
|
|
qemu_put_buffer(f, (uint8_t *)json_writer_get(vmdesc), vmdesc_len);
|
2015-02-23 15:56:42 +03:00
|
|
|
}
|
2015-01-22 17:01:39 +03:00
|
|
|
|
2023-01-17 14:22:43 +03:00
|
|
|
/* Free it now to detect any inconsistencies. */
|
|
|
|
json_writer_free(vmdesc);
|
|
|
|
ms->vmdesc = NULL;
|
|
|
|
|
2023-10-30 19:33:46 +03:00
|
|
|
trace_vmstate_downtime_checkpoint("src-non-iterable-saved");
|
|
|
|
|
2019-07-09 17:09:23 +03:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
int qemu_savevm_state_complete_precopy(QEMUFile *f, bool iterable_only,
|
|
|
|
bool inactivate_disks)
|
|
|
|
{
|
|
|
|
int ret;
|
|
|
|
Error *local_err = NULL;
|
|
|
|
bool in_postcopy = migration_in_postcopy();
|
|
|
|
|
|
|
|
if (precopy_notify(PRECOPY_NOTIFY_COMPLETE, &local_err)) {
|
|
|
|
error_report_err(local_err);
|
|
|
|
}
|
|
|
|
|
|
|
|
trace_savevm_state_complete_precopy();
|
|
|
|
|
|
|
|
cpu_synchronize_all_states();
|
|
|
|
|
2019-07-09 17:09:24 +03:00
|
|
|
if (!in_postcopy || iterable_only) {
|
|
|
|
ret = qemu_savevm_state_complete_precopy_iterable(f, in_postcopy);
|
|
|
|
if (ret) {
|
|
|
|
return ret;
|
|
|
|
}
|
2019-07-09 17:09:23 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
if (iterable_only) {
|
|
|
|
goto flush;
|
|
|
|
}
|
|
|
|
|
|
|
|
ret = qemu_savevm_state_complete_precopy_non_iterable(f, in_postcopy,
|
|
|
|
inactivate_disks);
|
|
|
|
if (ret) {
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2019-07-09 17:09:22 +03:00
|
|
|
flush:
|
2023-10-25 12:11:17 +03:00
|
|
|
return qemu_fflush(f);
|
2008-11-12 00:33:36 +03:00
|
|
|
}
|
|
|
|
|
2015-11-05 21:10:54 +03:00
|
|
|
/* Give an estimate of the amount left to be transferred,
|
|
|
|
* the result is split into the amount for units that can and
|
|
|
|
* for units that can't do postcopy.
|
|
|
|
*/
|
2023-02-08 16:48:02 +03:00
|
|
|
void qemu_savevm_state_pending_estimate(uint64_t *must_precopy,
|
|
|
|
uint64_t *can_postcopy)
|
2012-09-21 13:18:18 +04:00
|
|
|
{
|
|
|
|
SaveStateEntry *se;
|
2015-11-05 21:10:54 +03:00
|
|
|
|
2023-02-08 16:48:02 +03:00
|
|
|
*must_precopy = 0;
|
|
|
|
*can_postcopy = 0;
|
2015-11-05 21:10:54 +03:00
|
|
|
|
2015-05-13 14:37:04 +03:00
|
|
|
QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
|
2023-02-09 16:29:45 +03:00
|
|
|
if (!se->ops || !se->ops->state_pending_estimate) {
|
2012-09-21 13:18:18 +04:00
|
|
|
continue;
|
|
|
|
}
|
2019-08-19 06:28:04 +03:00
|
|
|
if (se->ops->is_active) {
|
2012-09-21 13:18:18 +04:00
|
|
|
if (!se->ops->is_active(se->opaque)) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
}
|
2023-02-08 16:48:02 +03:00
|
|
|
se->ops->state_pending_estimate(se->opaque, must_precopy, can_postcopy);
|
2022-10-03 03:00:03 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-02-08 16:48:02 +03:00
|
|
|
void qemu_savevm_state_pending_exact(uint64_t *must_precopy,
|
|
|
|
uint64_t *can_postcopy)
|
2022-10-03 03:00:03 +03:00
|
|
|
{
|
|
|
|
SaveStateEntry *se;
|
|
|
|
|
2023-02-08 16:48:02 +03:00
|
|
|
*must_precopy = 0;
|
|
|
|
*can_postcopy = 0;
|
2022-10-03 03:00:03 +03:00
|
|
|
|
|
|
|
QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
|
2023-02-09 16:29:45 +03:00
|
|
|
if (!se->ops || !se->ops->state_pending_exact) {
|
2022-10-03 03:00:03 +03:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (se->ops->is_active) {
|
|
|
|
if (!se->ops->is_active(se->opaque)) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
}
|
2023-02-08 16:48:02 +03:00
|
|
|
se->ops->state_pending_exact(se->opaque, must_precopy, can_postcopy);
|
2012-09-21 13:18:18 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-11-02 10:37:01 +03:00
|
|
|
void qemu_savevm_state_cleanup(void)
|
2009-11-30 20:21:21 +03:00
|
|
|
{
|
|
|
|
SaveStateEntry *se;
|
2018-12-11 11:24:51 +03:00
|
|
|
Error *local_err = NULL;
|
|
|
|
|
|
|
|
if (precopy_notify(PRECOPY_NOTIFY_CLEANUP, &local_err)) {
|
|
|
|
error_report_err(local_err);
|
|
|
|
}
|
2009-11-30 20:21:21 +03:00
|
|
|
|
2015-11-02 10:37:01 +03:00
|
|
|
trace_savevm_state_cleanup();
|
2015-05-13 14:37:04 +03:00
|
|
|
QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
|
2017-06-28 12:52:25 +03:00
|
|
|
if (se->ops && se->ops->save_cleanup) {
|
|
|
|
se->ops->save_cleanup(se->opaque);
|
2009-11-30 20:21:21 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-02-10 16:25:02 +03:00
|
|
|
static int qemu_savevm_state(QEMUFile *f, Error **errp)
|
2008-11-12 00:33:36 +03:00
|
|
|
{
|
|
|
|
int ret;
|
2018-02-08 13:31:15 +03:00
|
|
|
MigrationState *ms = migrate_get_current();
|
2016-06-15 18:06:43 +03:00
|
|
|
MigrationStatus status;
|
2018-02-08 13:31:15 +03:00
|
|
|
|
2024-03-11 20:48:51 +03:00
|
|
|
if (migration_is_running()) {
|
2024-03-12 17:13:42 +03:00
|
|
|
error_setg(errp, "There's a migration process in progress");
|
2018-10-26 11:36:20 +03:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
2008-11-12 00:33:36 +03:00
|
|
|
|
2023-09-06 18:08:51 +03:00
|
|
|
ret = migrate_init(ms, errp);
|
|
|
|
if (ret) {
|
|
|
|
return ret;
|
|
|
|
}
|
2018-10-26 11:36:20 +03:00
|
|
|
ms->to_dst_file = f;
|
|
|
|
|
2015-05-21 15:24:12 +03:00
|
|
|
qemu_savevm_state_header(f);
|
2024-03-20 09:49:02 +03:00
|
|
|
ret = qemu_savevm_state_setup(f, errp);
|
|
|
|
if (ret) {
|
|
|
|
goto cleanup;
|
|
|
|
}
|
2013-02-22 20:36:28 +04:00
|
|
|
|
2013-02-22 20:36:13 +04:00
|
|
|
while (qemu_file_get_error(f) == 0) {
|
2015-11-05 21:11:14 +03:00
|
|
|
if (qemu_savevm_state_iterate(f, false) > 0) {
|
2013-02-22 20:36:13 +04:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
2008-11-12 00:33:36 +03:00
|
|
|
|
2013-02-22 20:36:13 +04:00
|
|
|
ret = qemu_file_get_error(f);
|
2011-09-22 13:02:14 +04:00
|
|
|
if (ret == 0) {
|
2017-06-16 19:06:58 +03:00
|
|
|
qemu_savevm_state_complete_precopy(f, false, false);
|
2011-10-05 03:02:52 +04:00
|
|
|
ret = qemu_file_get_error(f);
|
2011-09-22 13:02:14 +04:00
|
|
|
}
|
2013-02-22 20:36:10 +04:00
|
|
|
if (ret != 0) {
|
2015-02-10 16:25:02 +03:00
|
|
|
error_setg_errno(errp, -ret, "Error while writing VM state");
|
2013-02-22 20:36:10 +04:00
|
|
|
}
|
2024-03-20 09:49:02 +03:00
|
|
|
cleanup:
|
|
|
|
qemu_savevm_state_cleanup();
|
2016-06-15 18:06:43 +03:00
|
|
|
|
|
|
|
if (ret != 0) {
|
|
|
|
status = MIGRATION_STATUS_FAILED;
|
|
|
|
} else {
|
|
|
|
status = MIGRATION_STATUS_COMPLETED;
|
|
|
|
}
|
|
|
|
migrate_set_state(&ms->state, MIGRATION_STATUS_SETUP, status);
|
2017-02-25 22:31:55 +03:00
|
|
|
|
|
|
|
/* f is outer parameter, it should not stay in global migration state after
|
|
|
|
* this function finished */
|
|
|
|
ms->to_dst_file = NULL;
|
|
|
|
|
2008-11-12 00:33:36 +03:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2018-09-03 07:38:54 +03:00
|
|
|
void qemu_savevm_live_state(QEMUFile *f)
|
2012-01-25 16:24:51 +04:00
|
|
|
{
|
2018-09-03 07:38:54 +03:00
|
|
|
/* save QEMU_VM_SECTION_END section */
|
|
|
|
qemu_savevm_state_complete_precopy(f, true, false);
|
|
|
|
qemu_put_byte(f, QEMU_VM_EOF);
|
|
|
|
}
|
2012-01-25 16:24:51 +04:00
|
|
|
|
2018-09-03 07:38:54 +03:00
|
|
|
int qemu_save_device_state(QEMUFile *f)
|
|
|
|
{
|
2024-03-20 09:49:01 +03:00
|
|
|
MigrationState *ms = migrate_get_current();
|
|
|
|
Error *local_err = NULL;
|
2018-09-03 07:38:54 +03:00
|
|
|
SaveStateEntry *se;
|
2012-01-25 16:24:51 +04:00
|
|
|
|
2018-09-03 07:38:54 +03:00
|
|
|
if (!migration_in_colo_state()) {
|
|
|
|
qemu_put_be32(f, QEMU_VM_FILE_MAGIC);
|
|
|
|
qemu_put_be32(f, QEMU_VM_FILE_VERSION);
|
|
|
|
}
|
2012-01-25 16:24:51 +04:00
|
|
|
cpu_synchronize_all_states();
|
|
|
|
|
2015-05-13 14:37:04 +03:00
|
|
|
QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
|
2017-09-25 14:29:16 +03:00
|
|
|
int ret;
|
|
|
|
|
2012-01-25 16:24:51 +04:00
|
|
|
if (se->is_ram) {
|
|
|
|
continue;
|
|
|
|
}
|
2024-03-20 09:49:01 +03:00
|
|
|
ret = vmstate_save(f, se, NULL, &local_err);
|
2017-09-25 14:29:16 +03:00
|
|
|
if (ret) {
|
2024-03-20 09:49:01 +03:00
|
|
|
migrate_set_error(ms, local_err);
|
|
|
|
error_report_err(local_err);
|
2017-09-25 14:29:16 +03:00
|
|
|
return ret;
|
|
|
|
}
|
2012-01-25 16:24:51 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
qemu_put_byte(f, QEMU_VM_EOF);
|
|
|
|
|
|
|
|
return qemu_file_get_error(f);
|
|
|
|
}
|
|
|
|
|
2019-10-16 05:29:31 +03:00
|
|
|
static SaveStateEntry *find_se(const char *idstr, uint32_t instance_id)
|
2008-11-12 00:33:36 +03:00
|
|
|
{
|
|
|
|
SaveStateEntry *se;
|
|
|
|
|
2015-05-13 14:37:04 +03:00
|
|
|
QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
|
2008-11-12 00:33:36 +03:00
|
|
|
if (!strcmp(se->idstr, idstr) &&
|
2010-05-15 15:32:40 +04:00
|
|
|
(instance_id == se->instance_id ||
|
|
|
|
instance_id == se->alias_id))
|
2008-11-12 00:33:36 +03:00
|
|
|
return se;
|
2010-06-25 21:09:14 +04:00
|
|
|
/* Migrating from an older version? */
|
|
|
|
if (strstr(se->idstr, idstr) && se->compat) {
|
|
|
|
if (!strcmp(se->compat->idstr, idstr) &&
|
|
|
|
(instance_id == se->compat->instance_id ||
|
|
|
|
instance_id == se->alias_id))
|
|
|
|
return se;
|
|
|
|
}
|
2008-11-12 00:33:36 +03:00
|
|
|
}
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2015-11-05 21:10:50 +03:00
|
|
|
enum LoadVMExitCodes {
|
|
|
|
/* Allow a command to quit all layers of nested loadvm loops */
|
|
|
|
LOADVM_QUIT = 1,
|
|
|
|
};
|
|
|
|
|
2015-11-05 21:10:52 +03:00
|
|
|
/* ------ incoming postcopy messages ------ */
|
|
|
|
/* 'advise' arrives before any transfers just to tell us that a postcopy
|
|
|
|
* *might* happen - it might be skipped if precopy transferred everything
|
|
|
|
* quickly.
|
|
|
|
*/
|
2018-02-06 14:23:30 +03:00
|
|
|
static int loadvm_postcopy_handle_advise(MigrationIncomingState *mis,
|
|
|
|
uint16_t len)
|
2015-11-05 21:10:52 +03:00
|
|
|
{
|
|
|
|
PostcopyState ps = postcopy_state_set(POSTCOPY_INCOMING_ADVISE);
|
2017-02-24 21:28:29 +03:00
|
|
|
uint64_t remote_pagesize_summary, local_pagesize_summary, remote_tps;
|
2021-11-22 18:21:16 +03:00
|
|
|
size_t page_size = qemu_target_page_size();
|
2018-03-12 20:21:01 +03:00
|
|
|
Error *local_err = NULL;
|
2015-11-05 21:10:52 +03:00
|
|
|
|
|
|
|
trace_loadvm_postcopy_handle_advise();
|
|
|
|
if (ps != POSTCOPY_INCOMING_NONE) {
|
|
|
|
error_report("CMD_POSTCOPY_ADVISE in wrong postcopy state (%d)", ps);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2018-02-06 14:23:30 +03:00
|
|
|
switch (len) {
|
|
|
|
case 0:
|
|
|
|
if (migrate_postcopy_ram()) {
|
|
|
|
error_report("RAM postcopy is enabled but have 0 byte advise");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
2017-07-10 19:30:16 +03:00
|
|
|
return 0;
|
2018-02-06 14:23:30 +03:00
|
|
|
case 8 + 8:
|
|
|
|
if (!migrate_postcopy_ram()) {
|
|
|
|
error_report("RAM postcopy is disabled but have 16 byte advise");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
error_report("CMD_POSTCOPY_ADVISE invalid length (%d)", len);
|
|
|
|
return -EINVAL;
|
2017-07-10 19:30:16 +03:00
|
|
|
}
|
|
|
|
|
2023-04-26 04:15:14 +03:00
|
|
|
if (!postcopy_ram_supported_by_host(mis, &local_err)) {
|
|
|
|
error_report_err(local_err);
|
2017-02-02 18:59:08 +03:00
|
|
|
postcopy_state_set(POSTCOPY_INCOMING_NONE);
|
2015-11-05 21:10:55 +03:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2017-02-24 21:28:29 +03:00
|
|
|
remote_pagesize_summary = qemu_get_be64(mis->from_src_file);
|
|
|
|
local_pagesize_summary = ram_pagesize_summary();
|
|
|
|
|
|
|
|
if (remote_pagesize_summary != local_pagesize_summary) {
|
2015-11-05 21:10:52 +03:00
|
|
|
/*
|
2017-02-24 21:28:29 +03:00
|
|
|
* This detects two potential causes of mismatch:
|
|
|
|
* a) A mismatch in host page sizes
|
|
|
|
* Some combinations of mismatch are probably possible but it gets
|
|
|
|
* a bit more complicated. In particular we need to place whole
|
|
|
|
* host pages on the dest at once, and we need to ensure that we
|
|
|
|
* handle dirtying to make sure we never end up sending part of
|
|
|
|
* a hostpage on it's own.
|
|
|
|
* b) The use of different huge page sizes on source/destination
|
|
|
|
* a more fine grain test is performed during RAM block migration
|
|
|
|
* but this test here causes a nice early clear failure, and
|
|
|
|
* also fails when passed to an older qemu that doesn't
|
|
|
|
* do huge pages.
|
2015-11-05 21:10:52 +03:00
|
|
|
*/
|
2017-02-24 21:28:29 +03:00
|
|
|
error_report("Postcopy needs matching RAM page sizes (s=%" PRIx64
|
|
|
|
" d=%" PRIx64 ")",
|
|
|
|
remote_pagesize_summary, local_pagesize_summary);
|
2015-11-05 21:10:52 +03:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
remote_tps = qemu_get_be64(mis->from_src_file);
|
2021-11-22 18:21:16 +03:00
|
|
|
if (remote_tps != page_size) {
|
2015-11-05 21:10:52 +03:00
|
|
|
/*
|
|
|
|
* Again, some differences could be dealt with, but for now keep it
|
|
|
|
* simple.
|
|
|
|
*/
|
2017-03-21 11:09:14 +03:00
|
|
|
error_report("Postcopy needs matching target page sizes (s=%d d=%zd)",
|
2021-11-22 18:21:16 +03:00
|
|
|
(int)remote_tps, page_size);
|
2015-11-05 21:10:52 +03:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2018-03-12 20:21:01 +03:00
|
|
|
if (postcopy_notify(POSTCOPY_NOTIFY_INBOUND_ADVISE, &local_err)) {
|
|
|
|
error_report_err(local_err);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2015-11-05 21:11:03 +03:00
|
|
|
if (ram_postcopy_incoming_init(mis)) {
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2015-11-05 21:10:52 +03:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* After postcopy we will be told to throw some pages away since they're
|
|
|
|
* dirty and will have to be demand fetched. Must happen before CPU is
|
|
|
|
* started.
|
|
|
|
* There can be 0..many of these messages, each encoding multiple pages.
|
|
|
|
*/
|
|
|
|
static int loadvm_postcopy_ram_handle_discard(MigrationIncomingState *mis,
|
|
|
|
uint16_t len)
|
|
|
|
{
|
|
|
|
int tmp;
|
|
|
|
char ramid[256];
|
|
|
|
PostcopyState ps = postcopy_state_get();
|
|
|
|
|
|
|
|
trace_loadvm_postcopy_ram_handle_discard();
|
|
|
|
|
|
|
|
switch (ps) {
|
|
|
|
case POSTCOPY_INCOMING_ADVISE:
|
|
|
|
/* 1st discard */
|
2015-11-05 21:11:20 +03:00
|
|
|
tmp = postcopy_ram_prepare_discard(mis);
|
2015-11-05 21:10:52 +03:00
|
|
|
if (tmp) {
|
|
|
|
return tmp;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case POSTCOPY_INCOMING_DISCARD:
|
|
|
|
/* Expected state */
|
|
|
|
break;
|
|
|
|
|
|
|
|
default:
|
|
|
|
error_report("CMD_POSTCOPY_RAM_DISCARD in wrong postcopy state (%d)",
|
|
|
|
ps);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
/* We're expecting a
|
|
|
|
* Version (0)
|
|
|
|
* a RAM ID string (length byte, name, 0 term)
|
|
|
|
* then at least 1 16 byte chunk
|
|
|
|
*/
|
|
|
|
if (len < (1 + 1 + 1 + 1 + 2 * 8)) {
|
|
|
|
error_report("CMD_POSTCOPY_RAM_DISCARD invalid length (%d)", len);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
tmp = qemu_get_byte(mis->from_src_file);
|
|
|
|
if (tmp != postcopy_ram_discard_version) {
|
|
|
|
error_report("CMD_POSTCOPY_RAM_DISCARD invalid version (%d)", tmp);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!qemu_get_counted_string(mis->from_src_file, ramid)) {
|
|
|
|
error_report("CMD_POSTCOPY_RAM_DISCARD Failed to read RAMBlock ID");
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
tmp = qemu_get_byte(mis->from_src_file);
|
|
|
|
if (tmp != 0) {
|
|
|
|
error_report("CMD_POSTCOPY_RAM_DISCARD missing nil (%d)", tmp);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
len -= 3 + strlen(ramid);
|
|
|
|
if (len % 16) {
|
|
|
|
error_report("CMD_POSTCOPY_RAM_DISCARD invalid length (%d)", len);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
trace_loadvm_postcopy_ram_handle_discard_header(ramid, len);
|
|
|
|
while (len) {
|
|
|
|
uint64_t start_addr, block_length;
|
|
|
|
start_addr = qemu_get_be64(mis->from_src_file);
|
|
|
|
block_length = qemu_get_be64(mis->from_src_file);
|
|
|
|
|
|
|
|
len -= 16;
|
2017-03-21 13:35:24 +03:00
|
|
|
int ret = ram_discard_range(ramid, start_addr, block_length);
|
2015-11-05 21:10:52 +03:00
|
|
|
if (ret) {
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
trace_loadvm_postcopy_ram_handle_discard_end();
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2015-11-05 21:11:18 +03:00
|
|
|
/*
|
|
|
|
* Triggered by a postcopy_listen command; this thread takes over reading
|
|
|
|
* the input stream, leaving the main thread free to carry on loading the rest
|
|
|
|
* of the device state (from RAM).
|
|
|
|
* (TODO:This could do with being in a postcopy file - but there again it's
|
|
|
|
* just another input loop, not that postcopy specific)
|
|
|
|
*/
|
|
|
|
static void *postcopy_ram_listen_thread(void *opaque)
|
|
|
|
{
|
|
|
|
MigrationIncomingState *mis = migration_incoming_get_current();
|
2018-05-02 13:47:20 +03:00
|
|
|
QEMUFile *f = mis->from_src_file;
|
2015-11-05 21:11:18 +03:00
|
|
|
int load_res;
|
2020-07-27 22:42:32 +03:00
|
|
|
MigrationState *migr = migrate_get_current();
|
|
|
|
|
|
|
|
object_ref(OBJECT(migr));
|
2015-11-05 21:11:18 +03:00
|
|
|
|
2015-12-16 14:47:35 +03:00
|
|
|
migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
|
|
|
|
MIGRATION_STATUS_POSTCOPY_ACTIVE);
|
2022-03-01 11:39:06 +03:00
|
|
|
qemu_sem_post(&mis->thread_sync_sem);
|
2015-11-05 21:11:18 +03:00
|
|
|
trace_postcopy_ram_listen_thread_start();
|
|
|
|
|
2018-08-06 16:29:29 +03:00
|
|
|
rcu_register_thread();
|
2015-11-05 21:11:18 +03:00
|
|
|
/*
|
|
|
|
* Because we're a thread and not a coroutine we can't yield
|
|
|
|
* in qemu_file, and thus we must be blocking now.
|
|
|
|
*/
|
|
|
|
qemu_file_set_blocking(f, true);
|
|
|
|
load_res = qemu_loadvm_state_main(f, mis);
|
2018-05-02 13:47:20 +03:00
|
|
|
|
|
|
|
/*
|
|
|
|
* This is tricky, but, mis->from_src_file can change after it
|
|
|
|
* returns, when postcopy recovery happened. In the future, we may
|
|
|
|
* want a wrapper for the QEMUFile handle.
|
|
|
|
*/
|
|
|
|
f = mis->from_src_file;
|
|
|
|
|
2015-11-05 21:11:18 +03:00
|
|
|
/* And non-blocking again so we don't block in any cleanup */
|
|
|
|
qemu_file_set_blocking(f, false);
|
|
|
|
|
|
|
|
trace_postcopy_ram_listen_thread_exit();
|
|
|
|
if (load_res < 0) {
|
|
|
|
qemu_file_set_error(f, load_res);
|
2020-07-27 22:42:32 +03:00
|
|
|
dirty_bitmap_mig_cancel_incoming();
|
|
|
|
if (postcopy_state_get() == POSTCOPY_INCOMING_RUNNING &&
|
|
|
|
!migrate_postcopy_ram() && migrate_dirty_bitmaps())
|
|
|
|
{
|
|
|
|
error_report("%s: loadvm failed during postcopy: %d. All states "
|
|
|
|
"are migrated except dirty bitmaps. Some dirty "
|
|
|
|
"bitmaps may be lost, and present migrated dirty "
|
|
|
|
"bitmaps are correctly migrated and valid.",
|
|
|
|
__func__, load_res);
|
|
|
|
load_res = 0; /* prevent further exit() */
|
|
|
|
} else {
|
|
|
|
error_report("%s: loadvm failed: %d", __func__, load_res);
|
|
|
|
migrate_set_state(&mis->state, MIGRATION_STATUS_POSTCOPY_ACTIVE,
|
|
|
|
MIGRATION_STATUS_FAILED);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (load_res >= 0) {
|
2015-11-05 21:11:18 +03:00
|
|
|
/*
|
|
|
|
* This looks good, but it's possible that the device loading in the
|
|
|
|
* main thread hasn't finished yet, and so we might not be in 'RUN'
|
|
|
|
* state yet; wait for the end of the main thread.
|
|
|
|
*/
|
|
|
|
qemu_event_wait(&mis->main_thread_load_event);
|
|
|
|
}
|
|
|
|
postcopy_ram_incoming_cleanup(mis);
|
|
|
|
|
|
|
|
if (load_res < 0) {
|
|
|
|
/*
|
|
|
|
* If something went wrong then we have a bad state so exit;
|
|
|
|
* depending how far we got it might be possible at this point
|
|
|
|
* to leave the guest running and fire MCEs for pages that never
|
|
|
|
* arrived as a desperate recovery step.
|
|
|
|
*/
|
2018-08-06 16:29:29 +03:00
|
|
|
rcu_unregister_thread();
|
2015-11-05 21:11:18 +03:00
|
|
|
exit(EXIT_FAILURE);
|
|
|
|
}
|
|
|
|
|
2015-12-16 14:47:35 +03:00
|
|
|
migrate_set_state(&mis->state, MIGRATION_STATUS_POSTCOPY_ACTIVE,
|
|
|
|
MIGRATION_STATUS_COMPLETED);
|
|
|
|
/*
|
|
|
|
* If everything has worked fine, then the main thread has waited
|
|
|
|
* for us to start, and we're the last use of the mis.
|
|
|
|
* (If something broke then qemu will have to exit anyway since it's
|
|
|
|
* got a bad migration state).
|
|
|
|
*/
|
|
|
|
migration_incoming_state_destroy();
|
2017-06-28 12:52:26 +03:00
|
|
|
qemu_loadvm_state_cleanup();
|
2015-12-16 14:47:35 +03:00
|
|
|
|
2018-08-06 16:29:29 +03:00
|
|
|
rcu_unregister_thread();
|
2018-09-14 20:04:29 +03:00
|
|
|
mis->have_listen_thread = false;
|
2019-10-06 03:02:48 +03:00
|
|
|
postcopy_state_set(POSTCOPY_INCOMING_END);
|
|
|
|
|
2020-07-27 22:42:32 +03:00
|
|
|
object_unref(OBJECT(migr));
|
|
|
|
|
2015-11-05 21:11:18 +03:00
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2015-11-05 21:10:52 +03:00
|
|
|
/* After this message we must be able to immediately receive postcopy data */
|
|
|
|
static int loadvm_postcopy_handle_listen(MigrationIncomingState *mis)
|
|
|
|
{
|
|
|
|
PostcopyState ps = postcopy_state_set(POSTCOPY_INCOMING_LISTENING);
|
2018-03-12 20:21:06 +03:00
|
|
|
Error *local_err = NULL;
|
|
|
|
|
2022-03-01 11:39:02 +03:00
|
|
|
trace_loadvm_postcopy_handle_listen("enter");
|
|
|
|
|
2015-11-05 21:10:52 +03:00
|
|
|
if (ps != POSTCOPY_INCOMING_ADVISE && ps != POSTCOPY_INCOMING_DISCARD) {
|
|
|
|
error_report("CMD_POSTCOPY_LISTEN in wrong postcopy state (%d)", ps);
|
|
|
|
return -1;
|
|
|
|
}
|
2015-11-05 21:11:20 +03:00
|
|
|
if (ps == POSTCOPY_INCOMING_ADVISE) {
|
|
|
|
/*
|
|
|
|
* A rare case, we entered listen without having to do any discards,
|
|
|
|
* so do the setup that's normally done at the time of the 1st discard.
|
|
|
|
*/
|
2017-07-10 19:30:16 +03:00
|
|
|
if (migrate_postcopy_ram()) {
|
|
|
|
postcopy_ram_prepare_discard(mis);
|
|
|
|
}
|
2015-11-05 21:11:20 +03:00
|
|
|
}
|
2015-11-05 21:10:52 +03:00
|
|
|
|
2022-03-01 11:39:02 +03:00
|
|
|
trace_loadvm_postcopy_handle_listen("after discard");
|
|
|
|
|
2015-11-05 21:11:04 +03:00
|
|
|
/*
|
|
|
|
* Sensitise RAM - can now generate requests for blocks that don't exist
|
|
|
|
* However, at this point the CPU shouldn't be running, and the IO
|
|
|
|
* shouldn't be doing anything yet so don't actually expect requests
|
|
|
|
*/
|
2017-07-10 19:30:16 +03:00
|
|
|
if (migrate_postcopy_ram()) {
|
2019-10-10 04:13:15 +03:00
|
|
|
if (postcopy_ram_incoming_setup(mis)) {
|
2019-01-13 17:08:48 +03:00
|
|
|
postcopy_ram_incoming_cleanup(mis);
|
2017-07-10 19:30:16 +03:00
|
|
|
return -1;
|
|
|
|
}
|
2015-11-05 21:11:04 +03:00
|
|
|
}
|
|
|
|
|
2022-03-01 11:39:02 +03:00
|
|
|
trace_loadvm_postcopy_handle_listen("after uffd");
|
|
|
|
|
2018-03-12 20:21:06 +03:00
|
|
|
if (postcopy_notify(POSTCOPY_NOTIFY_INBOUND_LISTEN, &local_err)) {
|
|
|
|
error_report_err(local_err);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2015-11-05 21:11:18 +03:00
|
|
|
mis->have_listen_thread = true;
|
migration: Rename thread debug names
The postcopy thread names on dest QEMU are slightly confusing, partly I'll
need to blame myself on 36f62f11e4 ("migration: Postcopy preemption
preparation on channel creation"). E.g., "fault-fast" reads like a fast
version of "fault-default", but it's actually the fast version of
"postcopy/listen".
Taking this chance, rename all the migration threads with proper rules.
Considering we only have 15 chars usable, prefix all threads with "mig/",
meanwhile identify src/dst threads properly this time. So now most thread
names will look like "mig/DIR/xxx", where DIR will be "src"/"dst", except
the bg-snapshot thread which doesn't have a direction.
For multifd threads, making them "mig/{src|dst}/{send|recv}_%d".
We used to have "live_migration" thread for a very long time, now it's
called "mig/src/main". We may hope to have "mig/dst/main" soon but not
yet.
Reviewed-by: Fabiano Rosas <farosas@suse.de>
Reviewed-by: Zhijian Li (Fujitsu) <lizhijian@fujitsu.com>
Signed-off-by: Peter Xu <peterx@redhat.com>
Signed-off-by: Fabiano Rosas <farosas@suse.de>
2024-06-20 01:30:37 +03:00
|
|
|
postcopy_thread_create(mis, &mis->listen_thread, "mig/dst/listen",
|
2022-03-01 11:39:06 +03:00
|
|
|
postcopy_ram_listen_thread, QEMU_THREAD_DETACHED);
|
2022-03-01 11:39:02 +03:00
|
|
|
trace_loadvm_postcopy_handle_listen("return");
|
|
|
|
|
2015-11-05 21:10:52 +03:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2016-02-24 11:53:39 +03:00
|
|
|
static void loadvm_postcopy_handle_run_bh(void *opaque)
|
2015-11-05 21:10:52 +03:00
|
|
|
{
|
2015-11-05 21:11:19 +03:00
|
|
|
Error *local_err = NULL;
|
2019-08-05 08:31:46 +03:00
|
|
|
MigrationIncomingState *mis = opaque;
|
2015-11-05 21:10:52 +03:00
|
|
|
|
2023-10-30 19:33:46 +03:00
|
|
|
trace_vmstate_downtime_checkpoint("dst-postcopy-bh-enter");
|
2022-03-01 11:39:03 +03:00
|
|
|
|
2015-11-05 21:11:19 +03:00
|
|
|
/* TODO we should move all of this lot into postcopy_ram.c or a shared code
|
|
|
|
* in migration.c
|
|
|
|
*/
|
|
|
|
cpu_synchronize_all_post_init();
|
|
|
|
|
2023-10-30 19:33:46 +03:00
|
|
|
trace_vmstate_downtime_checkpoint("dst-postcopy-bh-cpu-synced");
|
2022-03-01 11:39:03 +03:00
|
|
|
|
2019-02-27 16:24:08 +03:00
|
|
|
qemu_announce_self(&mis->announce_timer, migrate_announce_params());
|
2015-11-05 21:11:19 +03:00
|
|
|
|
2023-10-30 19:33:46 +03:00
|
|
|
trace_vmstate_downtime_checkpoint("dst-postcopy-bh-announced");
|
2022-03-01 11:39:03 +03:00
|
|
|
|
2022-02-09 13:54:51 +03:00
|
|
|
/* Make sure all file formats throw away their mutable metadata.
|
2017-05-04 19:52:36 +03:00
|
|
|
* If we get an error here, just don't restart the VM yet. */
|
2022-02-09 13:54:51 +03:00
|
|
|
bdrv_activate_all(&local_err);
|
2017-04-13 19:38:28 +03:00
|
|
|
if (local_err) {
|
2017-05-04 19:52:36 +03:00
|
|
|
error_report_err(local_err);
|
2017-04-13 19:38:28 +03:00
|
|
|
local_err = NULL;
|
|
|
|
autostart = false;
|
|
|
|
}
|
|
|
|
|
2023-10-30 19:33:46 +03:00
|
|
|
trace_vmstate_downtime_checkpoint("dst-postcopy-bh-cache-invalidated");
|
2015-11-05 21:11:19 +03:00
|
|
|
|
2018-03-13 22:34:01 +03:00
|
|
|
dirty_bitmap_mig_before_vm_start();
|
|
|
|
|
2015-11-05 21:10:52 +03:00
|
|
|
if (autostart) {
|
|
|
|
/* Hold onto your hats, starting the CPU */
|
|
|
|
vm_start();
|
|
|
|
} else {
|
|
|
|
/* leave it paused and let management decide when to start the CPU */
|
|
|
|
runstate_set(RUN_STATE_PAUSED);
|
|
|
|
}
|
|
|
|
|
2023-10-30 19:33:46 +03:00
|
|
|
trace_vmstate_downtime_checkpoint("dst-postcopy-bh-vm-started");
|
2016-02-24 11:53:39 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
/* After all discards we can start running and asking for pages */
|
|
|
|
static int loadvm_postcopy_handle_run(MigrationIncomingState *mis)
|
|
|
|
{
|
2019-10-10 04:13:16 +03:00
|
|
|
PostcopyState ps = postcopy_state_get();
|
2016-02-24 11:53:39 +03:00
|
|
|
|
|
|
|
trace_loadvm_postcopy_handle_run();
|
|
|
|
if (ps != POSTCOPY_INCOMING_LISTENING) {
|
|
|
|
error_report("CMD_POSTCOPY_RUN in wrong postcopy state (%d)", ps);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2019-10-10 04:13:16 +03:00
|
|
|
postcopy_state_set(POSTCOPY_INCOMING_RUNNING);
|
2024-01-20 02:39:22 +03:00
|
|
|
migration_bh_schedule(loadvm_postcopy_handle_run_bh, mis);
|
2016-02-24 11:53:39 +03:00
|
|
|
|
2015-11-05 21:11:19 +03:00
|
|
|
/* We need to finish reading the stream from the package
|
|
|
|
* and also stop reading anything more from the stream that loaded the
|
|
|
|
* package (since it's now being read by the listener thread).
|
|
|
|
* LOADVM_QUIT will quit all the layers of nested loadvm loops.
|
|
|
|
*/
|
|
|
|
return LOADVM_QUIT;
|
2015-11-05 21:10:52 +03:00
|
|
|
}
|
|
|
|
|
2020-10-22 00:27:19 +03:00
|
|
|
/* We must be with page_request_mutex held */
|
|
|
|
static gboolean postcopy_sync_page_req(gpointer key, gpointer value,
|
|
|
|
gpointer data)
|
|
|
|
{
|
|
|
|
MigrationIncomingState *mis = data;
|
|
|
|
void *host_addr = (void *) key;
|
|
|
|
ram_addr_t rb_offset;
|
|
|
|
RAMBlock *rb;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
rb = qemu_ram_block_from_host(host_addr, true, &rb_offset);
|
|
|
|
if (!rb) {
|
|
|
|
/*
|
|
|
|
* This should _never_ happen. However be nice for a migrating VM to
|
|
|
|
* not crash/assert. Post an error (note: intended to not use *_once
|
|
|
|
* because we do want to see all the illegal addresses; and this can
|
|
|
|
* never be triggered by the guest so we're safe) and move on next.
|
|
|
|
*/
|
|
|
|
error_report("%s: illegal host addr %p", __func__, host_addr);
|
|
|
|
/* Try the next entry */
|
|
|
|
return FALSE;
|
|
|
|
}
|
|
|
|
|
|
|
|
ret = migrate_send_rp_message_req_pages(mis, rb, rb_offset);
|
|
|
|
if (ret) {
|
|
|
|
/* Please refer to above comment. */
|
|
|
|
error_report("%s: send rp message failed for addr %p",
|
|
|
|
__func__, host_addr);
|
|
|
|
return FALSE;
|
|
|
|
}
|
|
|
|
|
|
|
|
trace_postcopy_page_req_sync(host_addr);
|
|
|
|
|
|
|
|
return FALSE;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void migrate_send_rp_req_pages_pending(MigrationIncomingState *mis)
|
|
|
|
{
|
|
|
|
WITH_QEMU_LOCK_GUARD(&mis->page_request_mutex) {
|
|
|
|
g_tree_foreach(mis->page_requested, postcopy_sync_page_req, mis);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-05-02 13:47:29 +03:00
|
|
|
static int loadvm_postcopy_handle_resume(MigrationIncomingState *mis)
|
|
|
|
{
|
|
|
|
if (mis->state != MIGRATION_STATUS_POSTCOPY_RECOVER) {
|
|
|
|
error_report("%s: illegal resume received", __func__);
|
|
|
|
/* Don't fail the load, only for this. */
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2020-11-02 18:30:09 +03:00
|
|
|
/*
|
|
|
|
* Reset the last_rb before we resend any page req to source again, since
|
|
|
|
* the source should have it reset already.
|
|
|
|
*/
|
|
|
|
mis->last_rb = NULL;
|
|
|
|
|
2018-05-02 13:47:29 +03:00
|
|
|
/*
|
|
|
|
* This means source VM is ready to resume the postcopy migration.
|
|
|
|
*/
|
|
|
|
migrate_set_state(&mis->state, MIGRATION_STATUS_POSTCOPY_RECOVER,
|
|
|
|
MIGRATION_STATUS_POSTCOPY_ACTIVE);
|
|
|
|
|
|
|
|
trace_loadvm_postcopy_handle_resume();
|
|
|
|
|
2018-05-02 13:47:30 +03:00
|
|
|
/* Tell source that "we are ready" */
|
|
|
|
migrate_send_rp_resume_ack(mis, MIGRATION_RESUME_ACK_VALUE);
|
2018-05-02 13:47:29 +03:00
|
|
|
|
2020-10-22 00:27:19 +03:00
|
|
|
/*
|
|
|
|
* After a postcopy recovery, the source should have lost the postcopy
|
|
|
|
* queue, or potentially the requested pages could have been lost during
|
|
|
|
* the network down phase. Let's re-sync with the source VM by re-sending
|
|
|
|
* all the pending pages that we eagerly need, so these threads won't get
|
|
|
|
* blocked too long due to the recovery.
|
|
|
|
*
|
|
|
|
* Without this procedure, the faulted destination VM threads (waiting for
|
|
|
|
* page requests right before the postcopy is interrupted) can keep hanging
|
|
|
|
* until the pages are sent by the source during the background copying of
|
|
|
|
* pages, or another thread faulted on the same address accidentally.
|
|
|
|
*/
|
|
|
|
migrate_send_rp_req_pages_pending(mis);
|
|
|
|
|
2020-11-02 18:30:10 +03:00
|
|
|
/*
|
|
|
|
* It's time to switch state and release the fault thread to continue
|
|
|
|
* service page faults. Note that this should be explicitly after the
|
|
|
|
* above call to migrate_send_rp_req_pages_pending(). In short:
|
|
|
|
* migrate_send_rp_message_req_pages() is not thread safe, yet.
|
|
|
|
*/
|
|
|
|
qemu_sem_post(&mis->postcopy_pause_sem_fault);
|
|
|
|
|
2022-07-07 21:55:06 +03:00
|
|
|
if (migrate_postcopy_preempt()) {
|
2023-02-08 23:28:13 +03:00
|
|
|
/*
|
|
|
|
* The preempt channel will be created in async manner, now let's
|
|
|
|
* wait for it and make sure it's created.
|
|
|
|
*/
|
|
|
|
qemu_sem_wait(&mis->postcopy_qemufile_dst_done);
|
2022-07-07 21:55:06 +03:00
|
|
|
assert(mis->postcopy_qemufile_dst);
|
|
|
|
/* Kick the fast ram load thread too */
|
|
|
|
qemu_sem_post(&mis->postcopy_pause_sem_fast_load);
|
|
|
|
}
|
|
|
|
|
2018-05-02 13:47:29 +03:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2015-11-05 21:10:45 +03:00
|
|
|
/**
|
2015-11-05 21:10:53 +03:00
|
|
|
* Immediately following this command is a blob of data containing an embedded
|
|
|
|
* chunk of migration stream; read it and load it.
|
|
|
|
*
|
|
|
|
* @mis: Incoming state
|
|
|
|
* @length: Length of packaged data to read
|
2015-11-05 21:10:45 +03:00
|
|
|
*
|
2015-11-05 21:10:53 +03:00
|
|
|
* Returns: Negative values on error
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
static int loadvm_handle_cmd_packaged(MigrationIncomingState *mis)
|
|
|
|
{
|
|
|
|
int ret;
|
2016-04-27 13:05:01 +03:00
|
|
|
size_t length;
|
|
|
|
QIOChannelBuffer *bioc;
|
2015-11-05 21:10:53 +03:00
|
|
|
|
|
|
|
length = qemu_get_be32(mis->from_src_file);
|
|
|
|
trace_loadvm_handle_cmd_packaged(length);
|
|
|
|
|
|
|
|
if (length > MAX_VM_CMD_PACKAGED_SIZE) {
|
2016-04-27 13:05:01 +03:00
|
|
|
error_report("Unreasonably large packaged state: %zu", length);
|
2015-11-05 21:10:53 +03:00
|
|
|
return -1;
|
|
|
|
}
|
2016-04-27 13:05:01 +03:00
|
|
|
|
|
|
|
bioc = qio_channel_buffer_new(length);
|
2016-09-30 13:57:14 +03:00
|
|
|
qio_channel_set_name(QIO_CHANNEL(bioc), "migration-loadvm-buffer");
|
2016-04-27 13:05:01 +03:00
|
|
|
ret = qemu_get_buffer(mis->from_src_file,
|
|
|
|
bioc->data,
|
|
|
|
length);
|
2015-11-05 21:10:53 +03:00
|
|
|
if (ret != length) {
|
2016-04-27 13:05:01 +03:00
|
|
|
object_unref(OBJECT(bioc));
|
|
|
|
error_report("CMD_PACKAGED: Buffer receive fail ret=%d length=%zu",
|
2015-12-18 18:35:19 +03:00
|
|
|
ret, length);
|
2015-11-05 21:10:53 +03:00
|
|
|
return (ret < 0) ? ret : -EAGAIN;
|
|
|
|
}
|
2016-04-27 13:05:01 +03:00
|
|
|
bioc->usage += length;
|
2015-11-05 21:10:53 +03:00
|
|
|
trace_loadvm_handle_cmd_packaged_received(ret);
|
|
|
|
|
2022-06-20 14:02:05 +03:00
|
|
|
QEMUFile *packf = qemu_file_new_input(QIO_CHANNEL(bioc));
|
2015-11-05 21:10:53 +03:00
|
|
|
|
2024-04-05 06:40:56 +03:00
|
|
|
/*
|
|
|
|
* Before loading the guest states, ensure that the preempt channel has
|
|
|
|
* been ready to use, as some of the states (e.g. via virtio_load) might
|
|
|
|
* trigger page faults that will be handled through the preempt channel.
|
|
|
|
* So yield to the main thread in the case that the channel create event
|
|
|
|
* hasn't been dispatched.
|
|
|
|
*
|
|
|
|
* TODO: if we can move migration loadvm out of main thread, then we
|
|
|
|
* won't block main thread from polling the accept() fds. We can drop
|
|
|
|
* this as a whole when that is done.
|
|
|
|
*/
|
|
|
|
do {
|
|
|
|
if (!migrate_postcopy_preempt() || !qemu_in_coroutine() ||
|
|
|
|
mis->postcopy_qemufile_dst) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
aio_co_schedule(qemu_get_current_aio_context(), qemu_coroutine_self());
|
|
|
|
qemu_coroutine_yield();
|
|
|
|
} while (1);
|
|
|
|
|
2015-11-05 21:10:53 +03:00
|
|
|
ret = qemu_loadvm_state_main(packf, mis);
|
|
|
|
trace_loadvm_handle_cmd_packaged_main(ret);
|
|
|
|
qemu_fclose(packf);
|
2016-04-27 13:05:01 +03:00
|
|
|
object_unref(OBJECT(bioc));
|
2015-11-05 21:10:53 +03:00
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2018-05-02 13:47:27 +03:00
|
|
|
/*
|
|
|
|
* Handle request that source requests for recved_bitmap on
|
|
|
|
* destination. Payload format:
|
|
|
|
*
|
|
|
|
* len (1 byte) + ramblock_name (<255 bytes)
|
|
|
|
*/
|
|
|
|
static int loadvm_handle_recv_bitmap(MigrationIncomingState *mis,
|
|
|
|
uint16_t len)
|
|
|
|
{
|
|
|
|
QEMUFile *file = mis->from_src_file;
|
|
|
|
RAMBlock *rb;
|
|
|
|
char block_name[256];
|
|
|
|
size_t cnt;
|
|
|
|
|
|
|
|
cnt = qemu_get_counted_string(file, block_name);
|
|
|
|
if (!cnt) {
|
|
|
|
error_report("%s: failed to read block name", __func__);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Validate before using the data */
|
|
|
|
if (qemu_file_get_error(file)) {
|
|
|
|
return qemu_file_get_error(file);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (len != cnt + 1) {
|
|
|
|
error_report("%s: invalid payload length (%d)", __func__, len);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
rb = qemu_ram_block_by_name(block_name);
|
|
|
|
if (!rb) {
|
|
|
|
error_report("%s: block '%s' not found", __func__, block_name);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2018-05-02 13:47:28 +03:00
|
|
|
migrate_send_rp_recv_bitmap(mis, block_name);
|
2018-05-02 13:47:27 +03:00
|
|
|
|
|
|
|
trace_loadvm_handle_recv_bitmap(block_name);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2018-09-03 07:38:47 +03:00
|
|
|
static int loadvm_process_enable_colo(MigrationIncomingState *mis)
|
|
|
|
{
|
2020-06-26 10:22:36 +03:00
|
|
|
int ret = migration_incoming_enable_colo();
|
|
|
|
|
|
|
|
if (!ret) {
|
|
|
|
ret = colo_init_ram_cache();
|
|
|
|
if (ret) {
|
|
|
|
migration_incoming_disable_colo();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return ret;
|
2018-09-03 07:38:47 +03:00
|
|
|
}
|
|
|
|
|
2015-11-05 21:10:53 +03:00
|
|
|
/*
|
|
|
|
* Process an incoming 'QEMU_VM_COMMAND'
|
|
|
|
* 0 just a normal return
|
|
|
|
* LOADVM_QUIT All good, but exit the loop
|
|
|
|
* <0 Error
|
2015-11-05 21:10:45 +03:00
|
|
|
*/
|
|
|
|
static int loadvm_process_command(QEMUFile *f)
|
|
|
|
{
|
2015-11-05 21:10:46 +03:00
|
|
|
MigrationIncomingState *mis = migration_incoming_get_current();
|
2015-11-05 21:10:45 +03:00
|
|
|
uint16_t cmd;
|
|
|
|
uint16_t len;
|
2015-11-05 21:10:46 +03:00
|
|
|
uint32_t tmp32;
|
2015-11-05 21:10:45 +03:00
|
|
|
|
|
|
|
cmd = qemu_get_be16(f);
|
|
|
|
len = qemu_get_be16(f);
|
|
|
|
|
2018-02-08 13:31:05 +03:00
|
|
|
/* Check validity before continue processing of cmds */
|
|
|
|
if (qemu_file_get_error(f)) {
|
|
|
|
return qemu_file_get_error(f);
|
|
|
|
}
|
|
|
|
|
2015-11-05 21:10:45 +03:00
|
|
|
if (cmd >= MIG_CMD_MAX || cmd == MIG_CMD_INVALID) {
|
|
|
|
error_report("MIG_CMD 0x%x unknown (len 0x%x)", cmd, len);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2022-03-01 11:39:01 +03:00
|
|
|
trace_loadvm_process_command(mig_cmd_args[cmd].name, len);
|
|
|
|
|
2015-11-05 21:10:45 +03:00
|
|
|
if (mig_cmd_args[cmd].len != -1 && mig_cmd_args[cmd].len != len) {
|
|
|
|
error_report("%s received with bad length - expecting %zu, got %d",
|
|
|
|
mig_cmd_args[cmd].name,
|
|
|
|
(size_t)mig_cmd_args[cmd].len, len);
|
|
|
|
return -ERANGE;
|
|
|
|
}
|
|
|
|
|
|
|
|
switch (cmd) {
|
2015-11-05 21:10:46 +03:00
|
|
|
case MIG_CMD_OPEN_RETURN_PATH:
|
|
|
|
if (mis->to_src_file) {
|
|
|
|
error_report("CMD_OPEN_RETURN_PATH called when RP already open");
|
|
|
|
/* Not really a problem, so don't give up */
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
mis->to_src_file = qemu_file_get_return_path(f);
|
|
|
|
if (!mis->to_src_file) {
|
|
|
|
error_report("CMD_OPEN_RETURN_PATH failed");
|
|
|
|
return -1;
|
|
|
|
}
|
2023-06-21 14:11:55 +03:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Switchover ack is enabled but no device uses it, so send an ACK to
|
|
|
|
* source that it's OK to switchover. Do it here, after return path has
|
|
|
|
* been created.
|
|
|
|
*/
|
|
|
|
if (migrate_switchover_ack() && !mis->switchover_ack_pending_num) {
|
|
|
|
int ret = migrate_send_rp_switchover_ack(mis);
|
|
|
|
if (ret) {
|
|
|
|
error_report(
|
|
|
|
"Could not send switchover ack RP MSG, err %d (%s)", ret,
|
|
|
|
strerror(-ret));
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
}
|
2015-11-05 21:10:46 +03:00
|
|
|
break;
|
|
|
|
|
|
|
|
case MIG_CMD_PING:
|
|
|
|
tmp32 = qemu_get_be32(f);
|
|
|
|
trace_loadvm_process_command_ping(tmp32);
|
|
|
|
if (!mis->to_src_file) {
|
|
|
|
error_report("CMD_PING (0x%x) received with no return path",
|
|
|
|
tmp32);
|
|
|
|
return -1;
|
|
|
|
}
|
2015-11-05 21:10:47 +03:00
|
|
|
migrate_send_rp_pong(mis, tmp32);
|
2015-11-05 21:10:46 +03:00
|
|
|
break;
|
2015-11-05 21:10:52 +03:00
|
|
|
|
2015-11-05 21:10:53 +03:00
|
|
|
case MIG_CMD_PACKAGED:
|
|
|
|
return loadvm_handle_cmd_packaged(mis);
|
|
|
|
|
2015-11-05 21:10:52 +03:00
|
|
|
case MIG_CMD_POSTCOPY_ADVISE:
|
2018-02-06 14:23:30 +03:00
|
|
|
return loadvm_postcopy_handle_advise(mis, len);
|
2015-11-05 21:10:52 +03:00
|
|
|
|
|
|
|
case MIG_CMD_POSTCOPY_LISTEN:
|
|
|
|
return loadvm_postcopy_handle_listen(mis);
|
|
|
|
|
|
|
|
case MIG_CMD_POSTCOPY_RUN:
|
|
|
|
return loadvm_postcopy_handle_run(mis);
|
|
|
|
|
|
|
|
case MIG_CMD_POSTCOPY_RAM_DISCARD:
|
|
|
|
return loadvm_postcopy_ram_handle_discard(mis, len);
|
2018-05-02 13:47:27 +03:00
|
|
|
|
2018-05-02 13:47:29 +03:00
|
|
|
case MIG_CMD_POSTCOPY_RESUME:
|
|
|
|
return loadvm_postcopy_handle_resume(mis);
|
|
|
|
|
2018-05-02 13:47:27 +03:00
|
|
|
case MIG_CMD_RECV_BITMAP:
|
|
|
|
return loadvm_handle_recv_bitmap(mis, len);
|
2018-09-03 07:38:47 +03:00
|
|
|
|
|
|
|
case MIG_CMD_ENABLE_COLO:
|
|
|
|
return loadvm_process_enable_colo(mis);
|
2015-11-05 21:10:45 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2015-07-02 11:22:03 +03:00
|
|
|
/*
|
|
|
|
* Read a footer off the wire and check that it matches the expected section
|
|
|
|
*
|
|
|
|
* Returns: true if the footer was good
|
|
|
|
* false if there is a problem (and calls error_report to say why)
|
|
|
|
*/
|
2017-05-24 10:09:58 +03:00
|
|
|
static bool check_section_footer(QEMUFile *f, SaveStateEntry *se)
|
2015-07-02 11:22:03 +03:00
|
|
|
{
|
2018-02-08 13:31:05 +03:00
|
|
|
int ret;
|
2015-07-02 11:22:03 +03:00
|
|
|
uint8_t read_mark;
|
|
|
|
uint32_t read_section_id;
|
|
|
|
|
2017-06-27 07:10:17 +03:00
|
|
|
if (!migrate_get_current()->send_section_footer) {
|
2015-07-02 11:22:03 +03:00
|
|
|
/* No footer to check */
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
read_mark = qemu_get_byte(f);
|
|
|
|
|
2018-02-08 13:31:05 +03:00
|
|
|
ret = qemu_file_get_error(f);
|
|
|
|
if (ret) {
|
|
|
|
error_report("%s: Read section footer failed: %d",
|
|
|
|
__func__, ret);
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2015-07-02 11:22:03 +03:00
|
|
|
if (read_mark != QEMU_VM_SECTION_FOOTER) {
|
2017-05-24 10:09:58 +03:00
|
|
|
error_report("Missing section footer for %s", se->idstr);
|
2015-07-02 11:22:03 +03:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
read_section_id = qemu_get_be32(f);
|
2017-05-24 10:09:58 +03:00
|
|
|
if (read_section_id != se->load_section_id) {
|
2015-07-02 11:22:03 +03:00
|
|
|
error_report("Mismatched section id in footer for %s -"
|
|
|
|
" read 0x%x expected 0x%x",
|
2017-05-24 10:09:58 +03:00
|
|
|
se->idstr, read_section_id, se->load_section_id);
|
2015-07-02 11:22:03 +03:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* All good */
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2016-01-15 06:37:43 +03:00
|
|
|
static int
|
migration: Add per vmstate downtime tracepoints
We have a bunch of savevm_section* tracepoints, they're good to analyze
migration stream, but not always suitable if someone would like to analyze
the migration downtime. Two major problems:
- savevm_section* tracepoints are dumping all sections, we only care
about the sections that contribute to the downtime
- They don't have an identifier to show the type of sections, so no way
to filter downtime information either easily.
We can add type into the tracepoints, but instead of doing so, this patch
kept them untouched, instead of adding a bunch of downtime specific
tracepoints, so one can enable "vmstate_downtime*" tracepoints and get a
full picture of how the downtime is distributed across iterative and
non-iterative vmstate save/load.
Note that here both save() and load() need to be traced, because both of
them may contribute to the downtime. The contribution is not a simple "add
them together", though: consider when the src is doing a save() of device1
while the dest can be load()ing for device2, so they can happen
concurrently.
Tracking both sides make sense because device load() and save() can be
imbalanced, one device can save() super fast, but load() super slow, vice
versa. We can't figure that out without tracing both.
Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
Message-ID: <20231030163346.765724-4-peterx@redhat.com>
2023-10-30 19:33:44 +03:00
|
|
|
qemu_loadvm_section_start_full(QEMUFile *f, MigrationIncomingState *mis,
|
|
|
|
uint8_t type)
|
2016-01-15 06:37:43 +03:00
|
|
|
{
|
migration: Add per vmstate downtime tracepoints
We have a bunch of savevm_section* tracepoints, they're good to analyze
migration stream, but not always suitable if someone would like to analyze
the migration downtime. Two major problems:
- savevm_section* tracepoints are dumping all sections, we only care
about the sections that contribute to the downtime
- They don't have an identifier to show the type of sections, so no way
to filter downtime information either easily.
We can add type into the tracepoints, but instead of doing so, this patch
kept them untouched, instead of adding a bunch of downtime specific
tracepoints, so one can enable "vmstate_downtime*" tracepoints and get a
full picture of how the downtime is distributed across iterative and
non-iterative vmstate save/load.
Note that here both save() and load() need to be traced, because both of
them may contribute to the downtime. The contribution is not a simple "add
them together", though: consider when the src is doing a save() of device1
while the dest can be load()ing for device2, so they can happen
concurrently.
Tracking both sides make sense because device load() and save() can be
imbalanced, one device can save() super fast, but load() super slow, vice
versa. We can't figure that out without tracing both.
Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
Message-ID: <20231030163346.765724-4-peterx@redhat.com>
2023-10-30 19:33:44 +03:00
|
|
|
bool trace_downtime = (type == QEMU_VM_SECTION_FULL);
|
2016-01-15 06:37:43 +03:00
|
|
|
uint32_t instance_id, version_id, section_id;
|
migration: Add per vmstate downtime tracepoints
We have a bunch of savevm_section* tracepoints, they're good to analyze
migration stream, but not always suitable if someone would like to analyze
the migration downtime. Two major problems:
- savevm_section* tracepoints are dumping all sections, we only care
about the sections that contribute to the downtime
- They don't have an identifier to show the type of sections, so no way
to filter downtime information either easily.
We can add type into the tracepoints, but instead of doing so, this patch
kept them untouched, instead of adding a bunch of downtime specific
tracepoints, so one can enable "vmstate_downtime*" tracepoints and get a
full picture of how the downtime is distributed across iterative and
non-iterative vmstate save/load.
Note that here both save() and load() need to be traced, because both of
them may contribute to the downtime. The contribution is not a simple "add
them together", though: consider when the src is doing a save() of device1
while the dest can be load()ing for device2, so they can happen
concurrently.
Tracking both sides make sense because device load() and save() can be
imbalanced, one device can save() super fast, but load() super slow, vice
versa. We can't figure that out without tracing both.
Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
Message-ID: <20231030163346.765724-4-peterx@redhat.com>
2023-10-30 19:33:44 +03:00
|
|
|
int64_t start_ts, end_ts;
|
2016-01-15 06:37:43 +03:00
|
|
|
SaveStateEntry *se;
|
|
|
|
char idstr[256];
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
/* Read section start */
|
|
|
|
section_id = qemu_get_be32(f);
|
|
|
|
if (!qemu_get_counted_string(f, idstr)) {
|
|
|
|
error_report("Unable to read ID string for section %u",
|
|
|
|
section_id);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
instance_id = qemu_get_be32(f);
|
|
|
|
version_id = qemu_get_be32(f);
|
|
|
|
|
2018-02-08 13:31:05 +03:00
|
|
|
ret = qemu_file_get_error(f);
|
|
|
|
if (ret) {
|
|
|
|
error_report("%s: Failed to read instance/version ID: %d",
|
|
|
|
__func__, ret);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2016-01-15 06:37:43 +03:00
|
|
|
trace_qemu_loadvm_state_section_startfull(section_id, idstr,
|
|
|
|
instance_id, version_id);
|
|
|
|
/* Find savevm section */
|
|
|
|
se = find_se(idstr, instance_id);
|
|
|
|
if (se == NULL) {
|
2019-10-16 05:29:31 +03:00
|
|
|
error_report("Unknown savevm section or instance '%s' %"PRIu32". "
|
2018-09-03 19:26:13 +03:00
|
|
|
"Make sure that your current VM setup matches your "
|
|
|
|
"saved VM setup, including any hotplugged devices",
|
2016-01-15 06:37:43 +03:00
|
|
|
idstr, instance_id);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Validate version */
|
|
|
|
if (version_id > se->version_id) {
|
|
|
|
error_report("savevm: unsupported version %d for '%s' v%d",
|
|
|
|
version_id, idstr, se->version_id);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
2017-05-24 10:09:58 +03:00
|
|
|
se->load_version_id = version_id;
|
|
|
|
se->load_section_id = section_id;
|
2016-01-15 06:37:43 +03:00
|
|
|
|
2016-06-03 12:58:34 +03:00
|
|
|
/* Validate if it is a device's state */
|
|
|
|
if (xen_enabled() && se->is_ram) {
|
|
|
|
error_report("loadvm: %s RAM loading not allowed on Xen", idstr);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
migration: Add per vmstate downtime tracepoints
We have a bunch of savevm_section* tracepoints, they're good to analyze
migration stream, but not always suitable if someone would like to analyze
the migration downtime. Two major problems:
- savevm_section* tracepoints are dumping all sections, we only care
about the sections that contribute to the downtime
- They don't have an identifier to show the type of sections, so no way
to filter downtime information either easily.
We can add type into the tracepoints, but instead of doing so, this patch
kept them untouched, instead of adding a bunch of downtime specific
tracepoints, so one can enable "vmstate_downtime*" tracepoints and get a
full picture of how the downtime is distributed across iterative and
non-iterative vmstate save/load.
Note that here both save() and load() need to be traced, because both of
them may contribute to the downtime. The contribution is not a simple "add
them together", though: consider when the src is doing a save() of device1
while the dest can be load()ing for device2, so they can happen
concurrently.
Tracking both sides make sense because device load() and save() can be
imbalanced, one device can save() super fast, but load() super slow, vice
versa. We can't figure that out without tracing both.
Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
Message-ID: <20231030163346.765724-4-peterx@redhat.com>
2023-10-30 19:33:44 +03:00
|
|
|
if (trace_downtime) {
|
|
|
|
start_ts = qemu_clock_get_us(QEMU_CLOCK_REALTIME);
|
|
|
|
}
|
|
|
|
|
2017-05-24 10:28:47 +03:00
|
|
|
ret = vmstate_load(f, se);
|
2016-01-15 06:37:43 +03:00
|
|
|
if (ret < 0) {
|
2019-10-16 05:29:31 +03:00
|
|
|
error_report("error while loading state for instance 0x%"PRIx32" of"
|
2016-01-15 06:37:43 +03:00
|
|
|
" device '%s'", instance_id, idstr);
|
|
|
|
return ret;
|
|
|
|
}
|
migration: Add per vmstate downtime tracepoints
We have a bunch of savevm_section* tracepoints, they're good to analyze
migration stream, but not always suitable if someone would like to analyze
the migration downtime. Two major problems:
- savevm_section* tracepoints are dumping all sections, we only care
about the sections that contribute to the downtime
- They don't have an identifier to show the type of sections, so no way
to filter downtime information either easily.
We can add type into the tracepoints, but instead of doing so, this patch
kept them untouched, instead of adding a bunch of downtime specific
tracepoints, so one can enable "vmstate_downtime*" tracepoints and get a
full picture of how the downtime is distributed across iterative and
non-iterative vmstate save/load.
Note that here both save() and load() need to be traced, because both of
them may contribute to the downtime. The contribution is not a simple "add
them together", though: consider when the src is doing a save() of device1
while the dest can be load()ing for device2, so they can happen
concurrently.
Tracking both sides make sense because device load() and save() can be
imbalanced, one device can save() super fast, but load() super slow, vice
versa. We can't figure that out without tracing both.
Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
Message-ID: <20231030163346.765724-4-peterx@redhat.com>
2023-10-30 19:33:44 +03:00
|
|
|
|
|
|
|
if (trace_downtime) {
|
|
|
|
end_ts = qemu_clock_get_us(QEMU_CLOCK_REALTIME);
|
|
|
|
trace_vmstate_downtime_load("non-iterable", se->idstr,
|
|
|
|
se->instance_id, end_ts - start_ts);
|
|
|
|
}
|
|
|
|
|
2017-05-24 10:09:58 +03:00
|
|
|
if (!check_section_footer(f, se)) {
|
2016-01-15 06:37:43 +03:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
migration: Add per vmstate downtime tracepoints
We have a bunch of savevm_section* tracepoints, they're good to analyze
migration stream, but not always suitable if someone would like to analyze
the migration downtime. Two major problems:
- savevm_section* tracepoints are dumping all sections, we only care
about the sections that contribute to the downtime
- They don't have an identifier to show the type of sections, so no way
to filter downtime information either easily.
We can add type into the tracepoints, but instead of doing so, this patch
kept them untouched, instead of adding a bunch of downtime specific
tracepoints, so one can enable "vmstate_downtime*" tracepoints and get a
full picture of how the downtime is distributed across iterative and
non-iterative vmstate save/load.
Note that here both save() and load() need to be traced, because both of
them may contribute to the downtime. The contribution is not a simple "add
them together", though: consider when the src is doing a save() of device1
while the dest can be load()ing for device2, so they can happen
concurrently.
Tracking both sides make sense because device load() and save() can be
imbalanced, one device can save() super fast, but load() super slow, vice
versa. We can't figure that out without tracing both.
Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
Message-ID: <20231030163346.765724-4-peterx@redhat.com>
2023-10-30 19:33:44 +03:00
|
|
|
qemu_loadvm_section_part_end(QEMUFile *f, MigrationIncomingState *mis,
|
|
|
|
uint8_t type)
|
2016-01-15 06:37:43 +03:00
|
|
|
{
|
migration: Add per vmstate downtime tracepoints
We have a bunch of savevm_section* tracepoints, they're good to analyze
migration stream, but not always suitable if someone would like to analyze
the migration downtime. Two major problems:
- savevm_section* tracepoints are dumping all sections, we only care
about the sections that contribute to the downtime
- They don't have an identifier to show the type of sections, so no way
to filter downtime information either easily.
We can add type into the tracepoints, but instead of doing so, this patch
kept them untouched, instead of adding a bunch of downtime specific
tracepoints, so one can enable "vmstate_downtime*" tracepoints and get a
full picture of how the downtime is distributed across iterative and
non-iterative vmstate save/load.
Note that here both save() and load() need to be traced, because both of
them may contribute to the downtime. The contribution is not a simple "add
them together", though: consider when the src is doing a save() of device1
while the dest can be load()ing for device2, so they can happen
concurrently.
Tracking both sides make sense because device load() and save() can be
imbalanced, one device can save() super fast, but load() super slow, vice
versa. We can't figure that out without tracing both.
Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
Message-ID: <20231030163346.765724-4-peterx@redhat.com>
2023-10-30 19:33:44 +03:00
|
|
|
bool trace_downtime = (type == QEMU_VM_SECTION_END);
|
|
|
|
int64_t start_ts, end_ts;
|
2016-01-15 06:37:43 +03:00
|
|
|
uint32_t section_id;
|
2017-05-24 10:09:58 +03:00
|
|
|
SaveStateEntry *se;
|
2016-01-15 06:37:43 +03:00
|
|
|
int ret;
|
|
|
|
|
|
|
|
section_id = qemu_get_be32(f);
|
|
|
|
|
2018-02-08 13:31:05 +03:00
|
|
|
ret = qemu_file_get_error(f);
|
|
|
|
if (ret) {
|
|
|
|
error_report("%s: Failed to read section ID: %d",
|
|
|
|
__func__, ret);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2016-01-15 06:37:43 +03:00
|
|
|
trace_qemu_loadvm_state_section_partend(section_id);
|
2017-05-24 10:09:58 +03:00
|
|
|
QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
|
|
|
|
if (se->load_section_id == section_id) {
|
2016-01-15 06:37:43 +03:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
2017-05-24 10:09:58 +03:00
|
|
|
if (se == NULL) {
|
2016-01-15 06:37:43 +03:00
|
|
|
error_report("Unknown savevm section %d", section_id);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
migration: Add per vmstate downtime tracepoints
We have a bunch of savevm_section* tracepoints, they're good to analyze
migration stream, but not always suitable if someone would like to analyze
the migration downtime. Two major problems:
- savevm_section* tracepoints are dumping all sections, we only care
about the sections that contribute to the downtime
- They don't have an identifier to show the type of sections, so no way
to filter downtime information either easily.
We can add type into the tracepoints, but instead of doing so, this patch
kept them untouched, instead of adding a bunch of downtime specific
tracepoints, so one can enable "vmstate_downtime*" tracepoints and get a
full picture of how the downtime is distributed across iterative and
non-iterative vmstate save/load.
Note that here both save() and load() need to be traced, because both of
them may contribute to the downtime. The contribution is not a simple "add
them together", though: consider when the src is doing a save() of device1
while the dest can be load()ing for device2, so they can happen
concurrently.
Tracking both sides make sense because device load() and save() can be
imbalanced, one device can save() super fast, but load() super slow, vice
versa. We can't figure that out without tracing both.
Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
Message-ID: <20231030163346.765724-4-peterx@redhat.com>
2023-10-30 19:33:44 +03:00
|
|
|
if (trace_downtime) {
|
|
|
|
start_ts = qemu_clock_get_us(QEMU_CLOCK_REALTIME);
|
|
|
|
}
|
|
|
|
|
2017-05-24 10:28:47 +03:00
|
|
|
ret = vmstate_load(f, se);
|
2016-01-15 06:37:43 +03:00
|
|
|
if (ret < 0) {
|
|
|
|
error_report("error while loading state section id %d(%s)",
|
2017-05-24 10:09:58 +03:00
|
|
|
section_id, se->idstr);
|
2016-01-15 06:37:43 +03:00
|
|
|
return ret;
|
|
|
|
}
|
migration: Add per vmstate downtime tracepoints
We have a bunch of savevm_section* tracepoints, they're good to analyze
migration stream, but not always suitable if someone would like to analyze
the migration downtime. Two major problems:
- savevm_section* tracepoints are dumping all sections, we only care
about the sections that contribute to the downtime
- They don't have an identifier to show the type of sections, so no way
to filter downtime information either easily.
We can add type into the tracepoints, but instead of doing so, this patch
kept them untouched, instead of adding a bunch of downtime specific
tracepoints, so one can enable "vmstate_downtime*" tracepoints and get a
full picture of how the downtime is distributed across iterative and
non-iterative vmstate save/load.
Note that here both save() and load() need to be traced, because both of
them may contribute to the downtime. The contribution is not a simple "add
them together", though: consider when the src is doing a save() of device1
while the dest can be load()ing for device2, so they can happen
concurrently.
Tracking both sides make sense because device load() and save() can be
imbalanced, one device can save() super fast, but load() super slow, vice
versa. We can't figure that out without tracing both.
Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
Message-ID: <20231030163346.765724-4-peterx@redhat.com>
2023-10-30 19:33:44 +03:00
|
|
|
|
|
|
|
if (trace_downtime) {
|
|
|
|
end_ts = qemu_clock_get_us(QEMU_CLOCK_REALTIME);
|
|
|
|
trace_vmstate_downtime_load("iterable", se->idstr,
|
|
|
|
se->instance_id, end_ts - start_ts);
|
|
|
|
}
|
|
|
|
|
2017-05-24 10:09:58 +03:00
|
|
|
if (!check_section_footer(f, se)) {
|
2016-01-15 06:37:43 +03:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2019-04-24 03:47:00 +03:00
|
|
|
static int qemu_loadvm_state_header(QEMUFile *f)
|
|
|
|
{
|
|
|
|
unsigned int v;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
v = qemu_get_be32(f);
|
|
|
|
if (v != QEMU_VM_FILE_MAGIC) {
|
|
|
|
error_report("Not a migration stream");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
v = qemu_get_be32(f);
|
|
|
|
if (v == QEMU_VM_FILE_VERSION_COMPAT) {
|
|
|
|
error_report("SaveVM v2 format is obsolete and don't work anymore");
|
|
|
|
return -ENOTSUP;
|
|
|
|
}
|
|
|
|
if (v != QEMU_VM_FILE_VERSION) {
|
|
|
|
error_report("Unsupported migration stream version");
|
|
|
|
return -ENOTSUP;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (migrate_get_current()->send_configuration) {
|
|
|
|
if (qemu_get_byte(f) != QEMU_VM_CONFIGURATION) {
|
|
|
|
error_report("Configuration section missing");
|
|
|
|
qemu_loadvm_state_cleanup();
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
ret = vmstate_load_state(f, &vmstate_configuration, &savevm_state, 0);
|
|
|
|
|
|
|
|
if (ret) {
|
|
|
|
qemu_loadvm_state_cleanup();
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2023-06-21 14:11:55 +03:00
|
|
|
static void qemu_loadvm_state_switchover_ack_needed(MigrationIncomingState *mis)
|
|
|
|
{
|
|
|
|
SaveStateEntry *se;
|
|
|
|
|
|
|
|
QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
|
|
|
|
if (!se->ops || !se->ops->switchover_ack_needed) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (se->ops->switchover_ack_needed(se->opaque)) {
|
|
|
|
mis->switchover_ack_pending_num++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
trace_loadvm_state_switchover_ack_needed(mis->switchover_ack_pending_num);
|
|
|
|
}
|
|
|
|
|
2024-03-20 09:49:04 +03:00
|
|
|
static int qemu_loadvm_state_setup(QEMUFile *f, Error **errp)
|
2017-06-28 12:52:26 +03:00
|
|
|
{
|
2024-03-20 09:49:04 +03:00
|
|
|
ERRP_GUARD();
|
2017-06-28 12:52:26 +03:00
|
|
|
SaveStateEntry *se;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
trace_loadvm_state_setup();
|
|
|
|
QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
|
|
|
|
if (!se->ops || !se->ops->load_setup) {
|
|
|
|
continue;
|
|
|
|
}
|
2019-08-19 06:28:04 +03:00
|
|
|
if (se->ops->is_active) {
|
2017-06-28 12:52:26 +03:00
|
|
|
if (!se->ops->is_active(se->opaque)) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-03-20 09:49:04 +03:00
|
|
|
ret = se->ops->load_setup(f, se->opaque, errp);
|
2017-06-28 12:52:26 +03:00
|
|
|
if (ret < 0) {
|
2024-03-20 09:49:04 +03:00
|
|
|
error_prepend(errp, "Load state of device %s failed: ",
|
|
|
|
se->idstr);
|
2017-06-28 12:52:26 +03:00
|
|
|
qemu_file_set_error(f, ret);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
void qemu_loadvm_state_cleanup(void)
|
|
|
|
{
|
|
|
|
SaveStateEntry *se;
|
|
|
|
|
|
|
|
trace_loadvm_state_cleanup();
|
|
|
|
QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
|
|
|
|
if (se->ops && se->ops->load_cleanup) {
|
|
|
|
se->ops->load_cleanup(se->opaque);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-05-02 13:47:20 +03:00
|
|
|
/* Return true if we should continue the migration, or false. */
|
|
|
|
static bool postcopy_pause_incoming(MigrationIncomingState *mis)
|
|
|
|
{
|
migration: Introduce postcopy channels on dest node
Postcopy handles huge pages in a special way that currently we can only have
one "channel" to transfer the page.
It's because when we install pages using UFFDIO_COPY, we need to have the whole
huge page ready, it also means we need to have a temp huge page when trying to
receive the whole content of the page.
Currently all maintainance around this tmp page is global: firstly we'll
allocate a temp huge page, then we maintain its status mostly within
ram_load_postcopy().
To enable multiple channels for postcopy, the first thing we need to do is to
prepare N temp huge pages as caching, one for each channel.
Meanwhile we need to maintain the tmp huge page status per-channel too.
To give some example, some local variables maintained in ram_load_postcopy()
are listed; they are responsible for maintaining temp huge page status:
- all_zero: this keeps whether this huge page contains all zeros
- target_pages: this counts how many target pages have been copied
- host_page: this keeps the host ptr for the page to install
Move all these fields to be together with the temp huge pages to form a new
structure called PostcopyTmpPage. Then for each (future) postcopy channel, we
need one structure to keep the state around.
For vanilla postcopy, obviously there's only one channel. It contains both
precopy and postcopy pages.
This patch teaches the dest migration node to start realize the possible number
of postcopy channels by introducing the "postcopy_channels" variable. Its
value is calculated when setup postcopy on dest node (during POSTCOPY_LISTEN
phase).
Vanilla postcopy will have channels=1, but when postcopy-preempt capability is
enabled (in the future), we will boost it to 2 because even during partial
sending of a precopy huge page we still want to preempt it and start sending
the postcopy requested page right away (so we start to keep two temp huge
pages; more if we want to enable multifd). In this patch there's a TODO marked
for that; so far the channels is always set to 1.
We need to send one "host huge page" on one channel only and we cannot split
them, because otherwise the data upon the same huge page can locate on more
than one channel so we need more complicated logic to manage. One temp host
huge page for each channel will be enough for us for now.
Postcopy will still always use the index=0 huge page even after this patch.
However it prepares for the latter patches where it can start to use multiple
channels (which needs src intervention, because only src knows which channel we
should use).
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Signed-off-by: Peter Xu <peterx@redhat.com>
Message-Id: <20220301083925.33483-5-peterx@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
dgilbert: Fixed up long line
2022-03-01 11:39:04 +03:00
|
|
|
int i;
|
|
|
|
|
2018-05-02 13:47:20 +03:00
|
|
|
trace_postcopy_pause_incoming();
|
|
|
|
|
2020-07-27 22:42:32 +03:00
|
|
|
assert(migrate_postcopy_ram());
|
|
|
|
|
2021-07-22 20:58:41 +03:00
|
|
|
/*
|
|
|
|
* Unregister yank with either from/to src would work, since ioc behind it
|
|
|
|
* is the same
|
|
|
|
*/
|
|
|
|
migration_ioc_unregister_yank_from_file(mis->from_src_file);
|
|
|
|
|
2018-05-02 13:47:20 +03:00
|
|
|
assert(mis->from_src_file);
|
|
|
|
qemu_file_shutdown(mis->from_src_file);
|
|
|
|
qemu_fclose(mis->from_src_file);
|
|
|
|
mis->from_src_file = NULL;
|
|
|
|
|
|
|
|
assert(mis->to_src_file);
|
|
|
|
qemu_file_shutdown(mis->to_src_file);
|
|
|
|
qemu_mutex_lock(&mis->rp_mutex);
|
|
|
|
qemu_fclose(mis->to_src_file);
|
|
|
|
mis->to_src_file = NULL;
|
|
|
|
qemu_mutex_unlock(&mis->rp_mutex);
|
|
|
|
|
2022-07-07 21:55:06 +03:00
|
|
|
/*
|
|
|
|
* NOTE: this must happen before reset the PostcopyTmpPages below,
|
|
|
|
* otherwise it's racy to reset those fields when the fast load thread
|
|
|
|
* can be accessing it in parallel.
|
|
|
|
*/
|
|
|
|
if (mis->postcopy_qemufile_dst) {
|
|
|
|
qemu_file_shutdown(mis->postcopy_qemufile_dst);
|
|
|
|
/* Take the mutex to make sure the fast ram load thread halted */
|
|
|
|
qemu_mutex_lock(&mis->postcopy_prio_thread_mutex);
|
|
|
|
migration_ioc_unregister_yank_from_file(mis->postcopy_qemufile_dst);
|
|
|
|
qemu_fclose(mis->postcopy_qemufile_dst);
|
|
|
|
mis->postcopy_qemufile_dst = NULL;
|
|
|
|
qemu_mutex_unlock(&mis->postcopy_prio_thread_mutex);
|
|
|
|
}
|
|
|
|
|
2023-10-05 01:02:39 +03:00
|
|
|
/* Current state can be either ACTIVE or RECOVER */
|
|
|
|
migrate_set_state(&mis->state, mis->state,
|
2018-06-27 16:22:43 +03:00
|
|
|
MIGRATION_STATUS_POSTCOPY_PAUSED);
|
|
|
|
|
2018-05-02 13:47:22 +03:00
|
|
|
/* Notify the fault thread for the invalidated file handle */
|
|
|
|
postcopy_fault_thread_notify(mis);
|
|
|
|
|
2022-07-07 21:55:02 +03:00
|
|
|
/*
|
|
|
|
* If network is interrupted, any temp page we received will be useless
|
|
|
|
* because we didn't mark them as "received" in receivedmap. After a
|
|
|
|
* proper recovery later (which will sync src dirty bitmap with receivedmap
|
|
|
|
* on dest) these cached small pages will be resent again.
|
|
|
|
*/
|
|
|
|
for (i = 0; i < mis->postcopy_channels; i++) {
|
|
|
|
postcopy_temp_page_reset(&mis->postcopy_tmp_pages[i]);
|
|
|
|
}
|
|
|
|
|
2018-05-02 13:47:20 +03:00
|
|
|
error_report("Detected IO failure for postcopy. "
|
|
|
|
"Migration paused.");
|
|
|
|
|
migration/postcopy: Add postcopy-recover-setup phase
This patch adds a migration state on src called "postcopy-recover-setup".
The new state will describe the intermediate step starting from when the
src QEMU received a postcopy recovery request, until the migration channels
are properly established, but before the recovery process take place.
The request came from Libvirt where Libvirt currently rely on the migration
state events to detect migration state changes. That works for most of the
migration process but except postcopy recovery failures at the beginning.
Currently postcopy recovery only has two major states:
- postcopy-paused: this is the state that both sides of QEMU will be in
for a long time as long as the migration channel was interrupted.
- postcopy-recover: this is the state where both sides of QEMU handshake
with each other, preparing for a continuation of postcopy which used to
be interrupted.
The issue here is when the recovery port is invalid, the src QEMU will take
the URI/channels, noticing the ports are not valid, and it'll silently keep
in the postcopy-paused state, with no event sent to Libvirt. In this case,
the only thing Libvirt can do is to poll the migration status with a proper
interval, however that's less optimal.
Considering that this is the only case where Libvirt won't get a
notification from QEMU on such events, let's add postcopy-recover-setup
state to mimic what we have with the "setup" state of a newly initialized
migration, describing the phase of connection establishment.
With that, postcopy recovery will have two paths to go now, and either path
will guarantee an event generated. Now the events will look like this
during a recovery process on src QEMU:
- Initially when the recovery is initiated on src, QEMU will go from
"postcopy-paused" -> "postcopy-recover-setup". Old QEMUs don't have
this event.
- Depending on whether the channel re-establishment is succeeded:
- In succeeded case, src QEMU will move from "postcopy-recover-setup"
to "postcopy-recover". Old QEMUs also have this event.
- In failure case, src QEMU will move from "postcopy-recover-setup" to
"postcopy-paused" again. Old QEMUs don't have this event.
This guarantees that Libvirt will always receive a notification for
recovery process properly.
One thing to mention is, such new status is only needed on src QEMU not
both. On dest QEMU, the state machine doesn't change. Hence the events
don't change either. It's done like so because dest QEMU may not have an
explicit point of setup start. E.g., it can happen that when dest QEMUs
doesn't use migrate-recover command to use a new URI/channel, but the old
URI/channels can be reused in recovery, in which case the old ports simply
can work again after the network routes are fixed up.
Add a new helper postcopy_is_paused() detecting whether postcopy is still
paused, taking RECOVER_SETUP into account too. When using it on both
src/dst, a slight change is done altogether to always wait for the
semaphore before checking the status, because for both sides a sem_post()
will be required for a recovery.
Cc: Jiri Denemark <jdenemar@redhat.com>
Cc: Prasad Pandit <ppandit@redhat.com>
Reviewed-by: Fabiano Rosas <farosas@suse.de>
Buglink: https://issues.redhat.com/browse/RHEL-38485
Signed-off-by: Peter Xu <peterx@redhat.com>
Signed-off-by: Fabiano Rosas <farosas@suse.de>
2024-06-20 01:30:40 +03:00
|
|
|
do {
|
2018-05-02 13:47:20 +03:00
|
|
|
qemu_sem_wait(&mis->postcopy_pause_sem_dst);
|
migration/postcopy: Add postcopy-recover-setup phase
This patch adds a migration state on src called "postcopy-recover-setup".
The new state will describe the intermediate step starting from when the
src QEMU received a postcopy recovery request, until the migration channels
are properly established, but before the recovery process take place.
The request came from Libvirt where Libvirt currently rely on the migration
state events to detect migration state changes. That works for most of the
migration process but except postcopy recovery failures at the beginning.
Currently postcopy recovery only has two major states:
- postcopy-paused: this is the state that both sides of QEMU will be in
for a long time as long as the migration channel was interrupted.
- postcopy-recover: this is the state where both sides of QEMU handshake
with each other, preparing for a continuation of postcopy which used to
be interrupted.
The issue here is when the recovery port is invalid, the src QEMU will take
the URI/channels, noticing the ports are not valid, and it'll silently keep
in the postcopy-paused state, with no event sent to Libvirt. In this case,
the only thing Libvirt can do is to poll the migration status with a proper
interval, however that's less optimal.
Considering that this is the only case where Libvirt won't get a
notification from QEMU on such events, let's add postcopy-recover-setup
state to mimic what we have with the "setup" state of a newly initialized
migration, describing the phase of connection establishment.
With that, postcopy recovery will have two paths to go now, and either path
will guarantee an event generated. Now the events will look like this
during a recovery process on src QEMU:
- Initially when the recovery is initiated on src, QEMU will go from
"postcopy-paused" -> "postcopy-recover-setup". Old QEMUs don't have
this event.
- Depending on whether the channel re-establishment is succeeded:
- In succeeded case, src QEMU will move from "postcopy-recover-setup"
to "postcopy-recover". Old QEMUs also have this event.
- In failure case, src QEMU will move from "postcopy-recover-setup" to
"postcopy-paused" again. Old QEMUs don't have this event.
This guarantees that Libvirt will always receive a notification for
recovery process properly.
One thing to mention is, such new status is only needed on src QEMU not
both. On dest QEMU, the state machine doesn't change. Hence the events
don't change either. It's done like so because dest QEMU may not have an
explicit point of setup start. E.g., it can happen that when dest QEMUs
doesn't use migrate-recover command to use a new URI/channel, but the old
URI/channels can be reused in recovery, in which case the old ports simply
can work again after the network routes are fixed up.
Add a new helper postcopy_is_paused() detecting whether postcopy is still
paused, taking RECOVER_SETUP into account too. When using it on both
src/dst, a slight change is done altogether to always wait for the
semaphore before checking the status, because for both sides a sem_post()
will be required for a recovery.
Cc: Jiri Denemark <jdenemar@redhat.com>
Cc: Prasad Pandit <ppandit@redhat.com>
Reviewed-by: Fabiano Rosas <farosas@suse.de>
Buglink: https://issues.redhat.com/browse/RHEL-38485
Signed-off-by: Peter Xu <peterx@redhat.com>
Signed-off-by: Fabiano Rosas <farosas@suse.de>
2024-06-20 01:30:40 +03:00
|
|
|
} while (postcopy_is_paused(mis->state));
|
2018-05-02 13:47:20 +03:00
|
|
|
|
|
|
|
trace_postcopy_pause_incoming_continued();
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2018-09-03 07:38:54 +03:00
|
|
|
int qemu_loadvm_state_main(QEMUFile *f, MigrationIncomingState *mis)
|
2015-05-21 15:24:16 +03:00
|
|
|
{
|
2008-11-12 00:33:36 +03:00
|
|
|
uint8_t section_type;
|
2016-09-23 22:14:03 +03:00
|
|
|
int ret = 0;
|
2015-05-13 19:17:43 +03:00
|
|
|
|
2018-05-02 13:47:20 +03:00
|
|
|
retry:
|
2018-02-08 13:31:05 +03:00
|
|
|
while (true) {
|
|
|
|
section_type = qemu_get_byte(f);
|
|
|
|
|
2022-07-07 21:55:06 +03:00
|
|
|
ret = qemu_file_get_error_obj_any(f, mis->postcopy_qemufile_dst, NULL);
|
|
|
|
if (ret) {
|
2018-02-08 13:31:05 +03:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2015-01-21 13:14:48 +03:00
|
|
|
trace_qemu_loadvm_state_section(section_type);
|
2008-11-12 00:33:36 +03:00
|
|
|
switch (section_type) {
|
|
|
|
case QEMU_VM_SECTION_START:
|
|
|
|
case QEMU_VM_SECTION_FULL:
|
migration: Add per vmstate downtime tracepoints
We have a bunch of savevm_section* tracepoints, they're good to analyze
migration stream, but not always suitable if someone would like to analyze
the migration downtime. Two major problems:
- savevm_section* tracepoints are dumping all sections, we only care
about the sections that contribute to the downtime
- They don't have an identifier to show the type of sections, so no way
to filter downtime information either easily.
We can add type into the tracepoints, but instead of doing so, this patch
kept them untouched, instead of adding a bunch of downtime specific
tracepoints, so one can enable "vmstate_downtime*" tracepoints and get a
full picture of how the downtime is distributed across iterative and
non-iterative vmstate save/load.
Note that here both save() and load() need to be traced, because both of
them may contribute to the downtime. The contribution is not a simple "add
them together", though: consider when the src is doing a save() of device1
while the dest can be load()ing for device2, so they can happen
concurrently.
Tracking both sides make sense because device load() and save() can be
imbalanced, one device can save() super fast, but load() super slow, vice
versa. We can't figure that out without tracing both.
Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
Message-ID: <20231030163346.765724-4-peterx@redhat.com>
2023-10-30 19:33:44 +03:00
|
|
|
ret = qemu_loadvm_section_start_full(f, mis, section_type);
|
2009-08-20 21:42:23 +04:00
|
|
|
if (ret < 0) {
|
2016-09-23 22:14:03 +03:00
|
|
|
goto out;
|
2009-08-20 21:42:23 +04:00
|
|
|
}
|
2008-11-12 00:33:36 +03:00
|
|
|
break;
|
|
|
|
case QEMU_VM_SECTION_PART:
|
|
|
|
case QEMU_VM_SECTION_END:
|
migration: Add per vmstate downtime tracepoints
We have a bunch of savevm_section* tracepoints, they're good to analyze
migration stream, but not always suitable if someone would like to analyze
the migration downtime. Two major problems:
- savevm_section* tracepoints are dumping all sections, we only care
about the sections that contribute to the downtime
- They don't have an identifier to show the type of sections, so no way
to filter downtime information either easily.
We can add type into the tracepoints, but instead of doing so, this patch
kept them untouched, instead of adding a bunch of downtime specific
tracepoints, so one can enable "vmstate_downtime*" tracepoints and get a
full picture of how the downtime is distributed across iterative and
non-iterative vmstate save/load.
Note that here both save() and load() need to be traced, because both of
them may contribute to the downtime. The contribution is not a simple "add
them together", though: consider when the src is doing a save() of device1
while the dest can be load()ing for device2, so they can happen
concurrently.
Tracking both sides make sense because device load() and save() can be
imbalanced, one device can save() super fast, but load() super slow, vice
versa. We can't figure that out without tracing both.
Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
Message-ID: <20231030163346.765724-4-peterx@redhat.com>
2023-10-30 19:33:44 +03:00
|
|
|
ret = qemu_loadvm_section_part_end(f, mis, section_type);
|
2009-08-20 21:42:23 +04:00
|
|
|
if (ret < 0) {
|
2016-09-23 22:14:03 +03:00
|
|
|
goto out;
|
2009-08-20 21:42:23 +04:00
|
|
|
}
|
2008-11-12 00:33:36 +03:00
|
|
|
break;
|
2015-11-05 21:10:45 +03:00
|
|
|
case QEMU_VM_COMMAND:
|
|
|
|
ret = loadvm_process_command(f);
|
2015-11-05 21:10:50 +03:00
|
|
|
trace_qemu_loadvm_state_section_command(ret);
|
2019-07-18 09:42:57 +03:00
|
|
|
if ((ret < 0) || (ret == LOADVM_QUIT)) {
|
2016-09-23 22:14:03 +03:00
|
|
|
goto out;
|
2015-11-05 21:10:45 +03:00
|
|
|
}
|
|
|
|
break;
|
2018-02-08 13:31:05 +03:00
|
|
|
case QEMU_VM_EOF:
|
|
|
|
/* This is the end of migration */
|
|
|
|
goto out;
|
2008-11-12 00:33:36 +03:00
|
|
|
default:
|
2015-01-21 13:14:47 +03:00
|
|
|
error_report("Unknown savevm section type %d", section_type);
|
2016-09-23 22:14:03 +03:00
|
|
|
ret = -EINVAL;
|
|
|
|
goto out;
|
2008-11-12 00:33:36 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-09-23 22:14:03 +03:00
|
|
|
out:
|
|
|
|
if (ret < 0) {
|
|
|
|
qemu_file_set_error(f, ret);
|
2018-05-02 13:47:20 +03:00
|
|
|
|
2020-07-27 22:42:32 +03:00
|
|
|
/* Cancel bitmaps incoming regardless of recovery */
|
|
|
|
dirty_bitmap_mig_cancel_incoming();
|
|
|
|
|
2018-05-02 13:47:20 +03:00
|
|
|
/*
|
2018-07-10 12:18:54 +03:00
|
|
|
* If we are during an active postcopy, then we pause instead
|
|
|
|
* of bail out to at least keep the VM's dirty data. Note
|
|
|
|
* that POSTCOPY_INCOMING_LISTENING stage is still not enough,
|
|
|
|
* during which we're still receiving device states and we
|
|
|
|
* still haven't yet started the VM on destination.
|
2020-07-27 22:42:32 +03:00
|
|
|
*
|
|
|
|
* Only RAM postcopy supports recovery. Still, if RAM postcopy is
|
|
|
|
* enabled, canceled bitmaps postcopy will not affect RAM postcopy
|
|
|
|
* recovering.
|
2018-05-02 13:47:20 +03:00
|
|
|
*/
|
|
|
|
if (postcopy_state_get() == POSTCOPY_INCOMING_RUNNING &&
|
2020-07-27 22:42:32 +03:00
|
|
|
migrate_postcopy_ram() && postcopy_pause_incoming(mis)) {
|
2018-05-02 13:47:20 +03:00
|
|
|
/* Reset f to point to the newly created channel */
|
|
|
|
f = mis->from_src_file;
|
|
|
|
goto retry;
|
|
|
|
}
|
2016-09-23 22:14:03 +03:00
|
|
|
}
|
|
|
|
return ret;
|
2015-11-05 21:10:50 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
int qemu_loadvm_state(QEMUFile *f)
|
|
|
|
{
|
|
|
|
MigrationIncomingState *mis = migration_incoming_get_current();
|
|
|
|
Error *local_err = NULL;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
if (qemu_savevm_state_blocked(&local_err)) {
|
|
|
|
error_report_err(local_err);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2019-04-24 03:47:00 +03:00
|
|
|
ret = qemu_loadvm_state_header(f);
|
|
|
|
if (ret) {
|
|
|
|
return ret;
|
2015-11-05 21:10:50 +03:00
|
|
|
}
|
|
|
|
|
2024-03-20 09:49:04 +03:00
|
|
|
if (qemu_loadvm_state_setup(f, &local_err) != 0) {
|
|
|
|
error_report_err(local_err);
|
2019-04-24 03:46:59 +03:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2023-06-21 14:11:55 +03:00
|
|
|
if (migrate_switchover_ack()) {
|
|
|
|
qemu_loadvm_state_switchover_ack_needed(mis);
|
|
|
|
}
|
|
|
|
|
2017-05-26 07:46:28 +03:00
|
|
|
cpu_synchronize_all_pre_loadvm();
|
|
|
|
|
2015-11-05 21:10:50 +03:00
|
|
|
ret = qemu_loadvm_state_main(f, mis);
|
|
|
|
qemu_event_set(&mis->main_thread_load_event);
|
|
|
|
|
|
|
|
trace_qemu_loadvm_state_post_main(ret);
|
|
|
|
|
2015-11-05 21:11:18 +03:00
|
|
|
if (mis->have_listen_thread) {
|
|
|
|
/* Listen thread still going, can't clean up yet */
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2015-11-05 21:10:50 +03:00
|
|
|
if (ret == 0) {
|
|
|
|
ret = qemu_file_get_error(f);
|
|
|
|
}
|
2015-02-23 15:56:41 +03:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Try to read in the VMDESC section as well, so that dumping tools that
|
|
|
|
* intercept our migration stream have the chance to see it.
|
|
|
|
*/
|
2015-06-23 19:34:35 +03:00
|
|
|
|
|
|
|
/* We've got to be careful; if we don't read the data and just shut the fd
|
|
|
|
* then the sender can error if we close while it's still sending.
|
|
|
|
* We also mustn't read data that isn't there; some transports (RDMA)
|
|
|
|
* will stall waiting for that data when the source has already closed.
|
|
|
|
*/
|
2015-11-05 21:10:50 +03:00
|
|
|
if (ret == 0 && should_send_vmdesc()) {
|
2015-06-23 19:34:35 +03:00
|
|
|
uint8_t *buf;
|
|
|
|
uint32_t size;
|
2015-11-05 21:10:50 +03:00
|
|
|
uint8_t section_type = qemu_get_byte(f);
|
2015-06-23 19:34:35 +03:00
|
|
|
|
|
|
|
if (section_type != QEMU_VM_VMDESCRIPTION) {
|
|
|
|
error_report("Expected vmdescription section, but got %d",
|
|
|
|
section_type);
|
|
|
|
/*
|
|
|
|
* It doesn't seem worth failing at this point since
|
|
|
|
* we apparently have an otherwise valid VM state
|
|
|
|
*/
|
|
|
|
} else {
|
|
|
|
buf = g_malloc(0x1000);
|
|
|
|
size = qemu_get_be32(f);
|
|
|
|
|
|
|
|
while (size > 0) {
|
|
|
|
uint32_t read_chunk = MIN(size, 0x1000);
|
|
|
|
qemu_get_buffer(f, buf, read_chunk);
|
|
|
|
size -= read_chunk;
|
|
|
|
}
|
|
|
|
g_free(buf);
|
2015-02-23 15:56:41 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-06-28 12:52:26 +03:00
|
|
|
qemu_loadvm_state_cleanup();
|
2010-03-01 21:10:30 +03:00
|
|
|
cpu_synchronize_all_post_init();
|
|
|
|
|
2008-11-12 00:33:36 +03:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2018-09-03 07:38:54 +03:00
|
|
|
int qemu_load_device_state(QEMUFile *f)
|
|
|
|
{
|
|
|
|
MigrationIncomingState *mis = migration_incoming_get_current();
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
/* Load QEMU_VM_SECTION_FULL section */
|
|
|
|
ret = qemu_loadvm_state_main(f, mis);
|
|
|
|
if (ret < 0) {
|
|
|
|
error_report("Failed to load device state: %d", ret);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
cpu_synchronize_all_post_init();
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2023-06-21 14:11:55 +03:00
|
|
|
int qemu_loadvm_approve_switchover(void)
|
|
|
|
{
|
|
|
|
MigrationIncomingState *mis = migration_incoming_get_current();
|
|
|
|
|
|
|
|
if (!mis->switchover_ack_pending_num) {
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
mis->switchover_ack_pending_num--;
|
|
|
|
trace_loadvm_approve_switchover(mis->switchover_ack_pending_num);
|
|
|
|
|
|
|
|
if (mis->switchover_ack_pending_num) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
return migrate_send_rp_switchover_ack(mis);
|
|
|
|
}
|
|
|
|
|
2021-02-04 15:48:30 +03:00
|
|
|
bool save_snapshot(const char *name, bool overwrite, const char *vmstate,
|
|
|
|
bool has_devices, strList *devices, Error **errp)
|
2008-11-12 00:33:36 +03:00
|
|
|
{
|
2021-02-04 15:48:23 +03:00
|
|
|
BlockDriverState *bs;
|
2020-12-08 09:53:35 +03:00
|
|
|
QEMUSnapshotInfo sn1, *sn = &sn1;
|
2020-07-03 19:11:24 +03:00
|
|
|
int ret = -1, ret2;
|
2008-11-12 00:33:36 +03:00
|
|
|
QEMUFile *f;
|
migration: preserve suspended for snapshot
Restoring a snapshot can break a suspended guest. Snapshots suffer from
the same suspended-state issues that affect live migration, plus they must
handle an additional problematic scenario, which is that a running vm must
remain running if it loads a suspended snapshot.
To save, the existing vm_stop call now completely stops the suspended
state. Finish with vm_resume to leave the vm in the state it had prior
to the save, correctly restoring the suspended state.
To load, if the snapshot is not suspended, then vm_stop + vm_resume
correctly handles all states, and leaves the vm in the state it had prior
to the load. However, if the snapshot is suspended, restoration is
trickier. First, call vm_resume to restore the state to suspended so the
current state matches the saved state. Then, if the pre-load state is
running, call wakeup to resume running.
Prior to these changes, the vm_stop to RUN_STATE_SAVE_VM and
RUN_STATE_RESTORE_VM did not change runstate if the current state was
suspended, but now it does, so allow these transitions.
Signed-off-by: Steve Sistare <steven.sistare@oracle.com>
Reviewed-by: Peter Xu <peterx@redhat.com>
Link: https://lore.kernel.org/r/1704312341-66640-8-git-send-email-steven.sistare@oracle.com
Signed-off-by: Peter Xu <peterx@redhat.com>
2024-01-03 23:05:36 +03:00
|
|
|
RunState saved_state = runstate_get();
|
2011-11-16 14:35:54 +04:00
|
|
|
uint64_t vm_state_size;
|
2021-04-30 14:59:06 +03:00
|
|
|
g_autoptr(GDateTime) now = g_date_time_new_now_local();
|
2008-11-12 00:33:36 +03:00
|
|
|
|
2022-03-03 18:16:07 +03:00
|
|
|
GLOBAL_STATE_CODE();
|
|
|
|
|
2018-11-14 12:48:00 +03:00
|
|
|
if (migration_is_blocked(errp)) {
|
2021-02-04 15:48:24 +03:00
|
|
|
return false;
|
2018-11-14 12:48:00 +03:00
|
|
|
}
|
|
|
|
|
2018-02-27 12:52:14 +03:00
|
|
|
if (!replay_can_snapshot()) {
|
2018-10-17 11:26:36 +03:00
|
|
|
error_setg(errp, "Record/replay does not allow making snapshot "
|
|
|
|
"right now. Try once more later.");
|
2021-02-04 15:48:24 +03:00
|
|
|
return false;
|
2018-02-27 12:52:14 +03:00
|
|
|
}
|
|
|
|
|
2021-02-04 15:48:30 +03:00
|
|
|
if (!bdrv_all_can_snapshot(has_devices, devices, errp)) {
|
2021-02-04 15:48:24 +03:00
|
|
|
return false;
|
savevm: Really verify if a drive supports snapshots
Both bdrv_can_snapshot() and bdrv_has_snapshot() does not work as advertized.
First issue: Their names implies different porpouses, but they do the same thing
and have exactly the same code. Maybe copied and pasted and forgotten?
bdrv_has_snapshot() is called in various places for actually checking if there
is snapshots or not.
Second issue: the way bdrv_can_snapshot() verifies if a block driver supports or
not snapshots does not catch all cases. E.g.: a raw image.
So when do_savevm() is called, first thing it does is to set a global
BlockDriverState to save the VM memory state calling get_bs_snapshots().
static BlockDriverState *get_bs_snapshots(void)
{
BlockDriverState *bs;
DriveInfo *dinfo;
if (bs_snapshots)
return bs_snapshots;
QTAILQ_FOREACH(dinfo, &drives, next) {
bs = dinfo->bdrv;
if (bdrv_can_snapshot(bs))
goto ok;
}
return NULL;
ok:
bs_snapshots = bs;
return bs;
}
bdrv_can_snapshot() may return a BlockDriverState that does not support
snapshots and do_savevm() goes on.
Later on in do_savevm(), we find:
QTAILQ_FOREACH(dinfo, &drives, next) {
bs1 = dinfo->bdrv;
if (bdrv_has_snapshot(bs1)) {
/* Write VM state size only to the image that contains the state */
sn->vm_state_size = (bs == bs1 ? vm_state_size : 0);
ret = bdrv_snapshot_create(bs1, sn);
if (ret < 0) {
monitor_printf(mon, "Error while creating snapshot on '%s'\n",
bdrv_get_device_name(bs1));
}
}
}
bdrv_has_snapshot(bs1) is not checking if the device does support or has
snapshots as explained above. Only in bdrv_snapshot_create() the device is
actually checked for snapshot support.
So, in cases where the first device supports snapshots, and the second does not,
the snapshot on the first will happen anyways. I believe this is not a good
behavior. It should be an all or nothing process.
This patch addresses these issues by making bdrv_can_snapshot() actually do
what it must do and enforces better tests to avoid errors in the middle of
do_savevm(). bdrv_has_snapshot() is removed and replaced by bdrv_can_snapshot()
where appropriate.
bdrv_can_snapshot() was moved from savevm.c to block.c. It makes more sense to me.
The loadvm_state() function was updated too to enforce that when loading a VM at
least all writable devices must support snapshots too.
Signed-off-by: Miguel Di Ciurcio Filho <miguel.filho@gmail.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2010-06-08 17:40:55 +04:00
|
|
|
}
|
|
|
|
|
2015-11-19 09:42:09 +03:00
|
|
|
/* Delete old snapshots of the same name */
|
2017-01-24 10:17:41 +03:00
|
|
|
if (name) {
|
2021-02-04 15:48:29 +03:00
|
|
|
if (overwrite) {
|
2021-02-04 15:48:30 +03:00
|
|
|
if (bdrv_all_delete_snapshot(name, has_devices,
|
|
|
|
devices, errp) < 0) {
|
2021-02-04 15:48:29 +03:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
} else {
|
2021-02-04 15:48:30 +03:00
|
|
|
ret2 = bdrv_all_has_snapshot(name, has_devices, devices, errp);
|
2021-02-04 15:48:29 +03:00
|
|
|
if (ret2 < 0) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
if (ret2 == 1) {
|
|
|
|
error_setg(errp,
|
|
|
|
"Snapshot '%s' already exists in one or more devices",
|
|
|
|
name);
|
|
|
|
return false;
|
|
|
|
}
|
2017-01-24 10:17:41 +03:00
|
|
|
}
|
2015-11-19 09:42:09 +03:00
|
|
|
}
|
|
|
|
|
2021-02-04 15:48:30 +03:00
|
|
|
bs = bdrv_all_find_vmstate_bs(vmstate, has_devices, devices, errp);
|
2015-11-19 09:42:10 +03:00
|
|
|
if (bs == NULL) {
|
2021-02-04 15:48:24 +03:00
|
|
|
return false;
|
2008-11-12 00:33:36 +03:00
|
|
|
}
|
|
|
|
|
2023-05-17 15:37:49 +03:00
|
|
|
global_state_store();
|
2011-09-30 21:45:27 +04:00
|
|
|
vm_stop(RUN_STATE_SAVE_VM);
|
2008-11-12 00:33:36 +03:00
|
|
|
|
2017-05-22 16:57:04 +03:00
|
|
|
bdrv_drain_all_begin();
|
|
|
|
|
2009-11-03 19:34:37 +03:00
|
|
|
memset(sn, 0, sizeof(*sn));
|
2008-11-12 00:33:36 +03:00
|
|
|
|
|
|
|
/* fill auxiliary fields */
|
2021-04-30 14:59:06 +03:00
|
|
|
sn->date_sec = g_date_time_to_unix(now);
|
|
|
|
sn->date_nsec = g_date_time_get_microsecond(now) * 1000;
|
2013-08-21 19:03:08 +04:00
|
|
|
sn->vm_clock_nsec = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
|
2020-10-03 20:13:08 +03:00
|
|
|
if (replay_mode != REPLAY_MODE_NONE) {
|
|
|
|
sn->icount = replay_get_current_icount();
|
|
|
|
} else {
|
|
|
|
sn->icount = -1ULL;
|
|
|
|
}
|
2008-11-12 00:33:36 +03:00
|
|
|
|
2010-08-04 21:55:49 +04:00
|
|
|
if (name) {
|
2020-12-08 09:53:35 +03:00
|
|
|
pstrcpy(sn->name, sizeof(sn->name), name);
|
2010-08-04 21:55:49 +04:00
|
|
|
} else {
|
2021-04-30 14:59:06 +03:00
|
|
|
g_autofree char *autoname = g_date_time_format(now, "vm-%Y%m%d%H%M%S");
|
|
|
|
pstrcpy(sn->name, sizeof(sn->name), autoname);
|
2010-08-04 21:55:49 +04:00
|
|
|
}
|
|
|
|
|
2008-11-12 00:33:36 +03:00
|
|
|
/* save the VM state */
|
2009-07-11 01:11:57 +04:00
|
|
|
f = qemu_fopen_bdrv(bs, 1);
|
2008-11-12 00:33:36 +03:00
|
|
|
if (!f) {
|
2017-04-18 19:12:35 +03:00
|
|
|
error_setg(errp, "Could not open VM state file");
|
2008-11-12 00:33:36 +03:00
|
|
|
goto the_end;
|
|
|
|
}
|
2017-04-18 19:12:35 +03:00
|
|
|
ret = qemu_savevm_state(f, errp);
|
2023-10-25 12:11:11 +03:00
|
|
|
vm_state_size = qemu_file_transferred(f);
|
2020-07-03 19:11:24 +03:00
|
|
|
ret2 = qemu_fclose(f);
|
2008-11-12 00:33:36 +03:00
|
|
|
if (ret < 0) {
|
|
|
|
goto the_end;
|
|
|
|
}
|
2020-07-03 19:11:24 +03:00
|
|
|
if (ret2 < 0) {
|
|
|
|
ret = ret2;
|
|
|
|
goto the_end;
|
|
|
|
}
|
2008-11-12 00:33:36 +03:00
|
|
|
|
2021-02-04 15:48:30 +03:00
|
|
|
ret = bdrv_all_create_snapshot(sn, bs, vm_state_size,
|
|
|
|
has_devices, devices, errp);
|
2015-11-19 09:42:08 +03:00
|
|
|
if (ret < 0) {
|
2021-02-04 15:48:30 +03:00
|
|
|
bdrv_all_delete_snapshot(sn->name, has_devices, devices, NULL);
|
2017-01-24 10:17:41 +03:00
|
|
|
goto the_end;
|
2008-11-12 00:33:36 +03:00
|
|
|
}
|
|
|
|
|
2017-01-24 10:17:41 +03:00
|
|
|
ret = 0;
|
|
|
|
|
2008-11-12 00:33:36 +03:00
|
|
|
the_end:
|
2017-05-22 16:57:04 +03:00
|
|
|
bdrv_drain_all_end();
|
|
|
|
|
migration: preserve suspended for snapshot
Restoring a snapshot can break a suspended guest. Snapshots suffer from
the same suspended-state issues that affect live migration, plus they must
handle an additional problematic scenario, which is that a running vm must
remain running if it loads a suspended snapshot.
To save, the existing vm_stop call now completely stops the suspended
state. Finish with vm_resume to leave the vm in the state it had prior
to the save, correctly restoring the suspended state.
To load, if the snapshot is not suspended, then vm_stop + vm_resume
correctly handles all states, and leaves the vm in the state it had prior
to the load. However, if the snapshot is suspended, restoration is
trickier. First, call vm_resume to restore the state to suspended so the
current state matches the saved state. Then, if the pre-load state is
running, call wakeup to resume running.
Prior to these changes, the vm_stop to RUN_STATE_SAVE_VM and
RUN_STATE_RESTORE_VM did not change runstate if the current state was
suspended, but now it does, so allow these transitions.
Signed-off-by: Steve Sistare <steven.sistare@oracle.com>
Reviewed-by: Peter Xu <peterx@redhat.com>
Link: https://lore.kernel.org/r/1704312341-66640-8-git-send-email-steven.sistare@oracle.com
Signed-off-by: Peter Xu <peterx@redhat.com>
2024-01-03 23:05:36 +03:00
|
|
|
vm_resume(saved_state);
|
2021-02-04 15:48:24 +03:00
|
|
|
return ret == 0;
|
2017-01-24 10:17:41 +03:00
|
|
|
}
|
|
|
|
|
2017-11-16 18:14:19 +03:00
|
|
|
void qmp_xen_save_devices_state(const char *filename, bool has_live, bool live,
|
|
|
|
Error **errp)
|
2012-01-25 16:24:51 +04:00
|
|
|
{
|
|
|
|
QEMUFile *f;
|
2016-04-27 13:05:08 +03:00
|
|
|
QIOChannelFile *ioc;
|
2012-01-25 16:24:51 +04:00
|
|
|
int saved_vm_running;
|
|
|
|
int ret;
|
|
|
|
|
2017-11-16 18:14:19 +03:00
|
|
|
if (!has_live) {
|
|
|
|
/* live default to true so old version of Xen tool stack can have a
|
2020-09-17 10:50:21 +03:00
|
|
|
* successful live migration */
|
2017-11-16 18:14:19 +03:00
|
|
|
live = true;
|
|
|
|
}
|
|
|
|
|
2012-01-25 16:24:51 +04:00
|
|
|
saved_vm_running = runstate_is_running();
|
|
|
|
vm_stop(RUN_STATE_SAVE_VM);
|
2015-08-03 17:29:19 +03:00
|
|
|
global_state_store_running();
|
2012-01-25 16:24:51 +04:00
|
|
|
|
2020-09-21 12:48:30 +03:00
|
|
|
ioc = qio_channel_file_new_path(filename, O_WRONLY | O_CREAT | O_TRUNC,
|
|
|
|
0660, errp);
|
2016-04-27 13:05:08 +03:00
|
|
|
if (!ioc) {
|
2012-01-25 16:24:51 +04:00
|
|
|
goto the_end;
|
|
|
|
}
|
2016-09-30 13:57:14 +03:00
|
|
|
qio_channel_set_name(QIO_CHANNEL(ioc), "migration-xen-save-state");
|
2022-06-20 14:02:05 +03:00
|
|
|
f = qemu_file_new_output(QIO_CHANNEL(ioc));
|
2017-11-01 17:25:23 +03:00
|
|
|
object_unref(OBJECT(ioc));
|
2012-01-25 16:24:51 +04:00
|
|
|
ret = qemu_save_device_state(f);
|
2018-02-06 19:30:39 +03:00
|
|
|
if (ret < 0 || qemu_fclose(f) < 0) {
|
2024-05-13 17:17:02 +03:00
|
|
|
error_setg(errp, "saving Xen device state failed");
|
2017-11-16 18:14:19 +03:00
|
|
|
} else {
|
|
|
|
/* libxl calls the QMP command "stop" before calling
|
|
|
|
* "xen-save-devices-state" and in case of migration failure, libxl
|
|
|
|
* would call "cont".
|
|
|
|
* So call bdrv_inactivate_all (release locks) here to let the other
|
2020-09-17 10:50:21 +03:00
|
|
|
* side of the migration take control of the images.
|
2017-11-16 18:14:19 +03:00
|
|
|
*/
|
|
|
|
if (live && !saved_vm_running) {
|
|
|
|
ret = bdrv_inactivate_all();
|
|
|
|
if (ret) {
|
|
|
|
error_setg(errp, "%s: bdrv_inactivate_all() failed (%d)",
|
|
|
|
__func__, ret);
|
|
|
|
}
|
|
|
|
}
|
2012-01-25 16:24:51 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
the_end:
|
2013-11-28 18:01:13 +04:00
|
|
|
if (saved_vm_running) {
|
2012-01-25 16:24:51 +04:00
|
|
|
vm_start();
|
2013-11-28 18:01:13 +04:00
|
|
|
}
|
2012-01-25 16:24:51 +04:00
|
|
|
}
|
|
|
|
|
2016-06-03 12:58:34 +03:00
|
|
|
void qmp_xen_load_devices_state(const char *filename, Error **errp)
|
|
|
|
{
|
|
|
|
QEMUFile *f;
|
|
|
|
QIOChannelFile *ioc;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
/* Guest must be paused before loading the device state; the RAM state
|
|
|
|
* will already have been loaded by xc
|
|
|
|
*/
|
|
|
|
if (runstate_is_running()) {
|
|
|
|
error_setg(errp, "Cannot update device state while vm is running");
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
vm_stop(RUN_STATE_RESTORE_VM);
|
|
|
|
|
|
|
|
ioc = qio_channel_file_new_path(filename, O_RDONLY | O_BINARY, 0, errp);
|
|
|
|
if (!ioc) {
|
|
|
|
return;
|
|
|
|
}
|
2016-09-30 13:57:14 +03:00
|
|
|
qio_channel_set_name(QIO_CHANNEL(ioc), "migration-xen-load-state");
|
2022-06-20 14:02:05 +03:00
|
|
|
f = qemu_file_new_input(QIO_CHANNEL(ioc));
|
2017-11-01 17:25:23 +03:00
|
|
|
object_unref(OBJECT(ioc));
|
2016-06-03 12:58:34 +03:00
|
|
|
|
|
|
|
ret = qemu_loadvm_state(f);
|
|
|
|
qemu_fclose(f);
|
|
|
|
if (ret < 0) {
|
2024-05-13 17:17:02 +03:00
|
|
|
error_setg(errp, "loading Xen device state failed");
|
2016-06-03 12:58:34 +03:00
|
|
|
}
|
|
|
|
migration_incoming_state_destroy();
|
|
|
|
}
|
|
|
|
|
2021-02-04 15:48:30 +03:00
|
|
|
bool load_snapshot(const char *name, const char *vmstate,
|
|
|
|
bool has_devices, strList *devices, Error **errp)
|
2008-11-12 00:33:36 +03:00
|
|
|
{
|
2021-02-04 15:48:23 +03:00
|
|
|
BlockDriverState *bs_vm_state;
|
2008-12-12 00:06:49 +03:00
|
|
|
QEMUSnapshotInfo sn;
|
2008-11-12 00:33:36 +03:00
|
|
|
QEMUFile *f;
|
2009-07-22 18:42:57 +04:00
|
|
|
int ret;
|
2017-01-24 00:32:06 +03:00
|
|
|
MigrationIncomingState *mis = migration_incoming_get_current();
|
2008-11-12 00:33:36 +03:00
|
|
|
|
2021-02-04 15:48:30 +03:00
|
|
|
if (!bdrv_all_can_snapshot(has_devices, devices, errp)) {
|
2021-02-04 15:48:25 +03:00
|
|
|
return false;
|
2015-11-19 09:42:05 +03:00
|
|
|
}
|
2021-02-04 15:48:30 +03:00
|
|
|
ret = bdrv_all_has_snapshot(name, has_devices, devices, errp);
|
2015-11-19 09:42:06 +03:00
|
|
|
if (ret < 0) {
|
2021-02-04 15:48:25 +03:00
|
|
|
return false;
|
2015-11-19 09:42:06 +03:00
|
|
|
}
|
2021-02-04 15:48:28 +03:00
|
|
|
if (ret == 0) {
|
|
|
|
error_setg(errp, "Snapshot '%s' does not exist in one or more devices",
|
|
|
|
name);
|
|
|
|
return false;
|
|
|
|
}
|
2015-11-19 09:42:05 +03:00
|
|
|
|
2021-02-04 15:48:30 +03:00
|
|
|
bs_vm_state = bdrv_all_find_vmstate_bs(vmstate, has_devices, devices, errp);
|
2010-07-19 22:25:01 +04:00
|
|
|
if (!bs_vm_state) {
|
2021-02-04 15:48:25 +03:00
|
|
|
return false;
|
2010-07-19 22:25:01 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Don't even try to load empty VM states */
|
|
|
|
ret = bdrv_snapshot_find(bs_vm_state, &sn, name);
|
|
|
|
if (ret < 0) {
|
2021-02-04 15:48:25 +03:00
|
|
|
return false;
|
2010-07-19 22:25:01 +04:00
|
|
|
} else if (sn.vm_state_size == 0) {
|
2017-04-18 19:12:35 +03:00
|
|
|
error_setg(errp, "This is a disk-only snapshot. Revert to it "
|
|
|
|
" offline using qemu-img");
|
2021-02-04 15:48:25 +03:00
|
|
|
return false;
|
2010-07-19 22:25:01 +04:00
|
|
|
}
|
|
|
|
|
2020-10-03 20:13:37 +03:00
|
|
|
/*
|
|
|
|
* Flush the record/replay queue. Now the VM state is going
|
|
|
|
* to change. Therefore we don't need to preserve its consistency
|
|
|
|
*/
|
|
|
|
replay_flush_events();
|
|
|
|
|
2008-11-12 00:33:36 +03:00
|
|
|
/* Flush all IO requests so they don't interfere with the new state. */
|
2017-05-22 16:57:04 +03:00
|
|
|
bdrv_drain_all_begin();
|
2008-11-12 00:33:36 +03:00
|
|
|
|
2021-02-04 15:48:30 +03:00
|
|
|
ret = bdrv_all_goto_snapshot(name, has_devices, devices, errp);
|
2015-11-19 09:42:04 +03:00
|
|
|
if (ret < 0) {
|
2017-05-22 16:57:04 +03:00
|
|
|
goto err_drain;
|
2008-11-12 00:33:36 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
/* restore the VM state */
|
2010-07-19 22:25:01 +04:00
|
|
|
f = qemu_fopen_bdrv(bs_vm_state, 0);
|
2008-11-12 00:33:36 +03:00
|
|
|
if (!f) {
|
2017-04-18 19:12:35 +03:00
|
|
|
error_setg(errp, "Could not open VM state file");
|
2017-05-22 16:57:04 +03:00
|
|
|
goto err_drain;
|
2008-11-12 00:33:36 +03:00
|
|
|
}
|
2010-07-19 22:25:01 +04:00
|
|
|
|
2022-10-25 03:43:17 +03:00
|
|
|
qemu_system_reset(SHUTDOWN_CAUSE_SNAPSHOT_LOAD);
|
2017-01-24 00:32:06 +03:00
|
|
|
mis->from_src_file = f;
|
2010-07-19 22:25:01 +04:00
|
|
|
|
2020-12-28 18:08:52 +03:00
|
|
|
if (!yank_register_instance(MIGRATION_YANK_INSTANCE, errp)) {
|
|
|
|
ret = -EINVAL;
|
|
|
|
goto err_drain;
|
|
|
|
}
|
2015-11-19 09:42:11 +03:00
|
|
|
ret = qemu_loadvm_state(f);
|
2017-06-15 19:38:10 +03:00
|
|
|
migration_incoming_state_destroy();
|
2015-11-19 09:42:11 +03:00
|
|
|
|
2017-05-22 16:57:04 +03:00
|
|
|
bdrv_drain_all_end();
|
|
|
|
|
2008-11-12 00:33:36 +03:00
|
|
|
if (ret < 0) {
|
2017-04-18 19:12:35 +03:00
|
|
|
error_setg(errp, "Error %d while loading VM state", ret);
|
2021-02-04 15:48:25 +03:00
|
|
|
return false;
|
2008-11-12 00:33:36 +03:00
|
|
|
}
|
2010-07-19 22:25:01 +04:00
|
|
|
|
2021-02-04 15:48:25 +03:00
|
|
|
return true;
|
2017-05-22 16:57:04 +03:00
|
|
|
|
|
|
|
err_drain:
|
|
|
|
bdrv_drain_all_end();
|
2021-02-04 15:48:25 +03:00
|
|
|
return false;
|
2009-08-20 21:42:20 +04:00
|
|
|
}
|
|
|
|
|
migration: preserve suspended for snapshot
Restoring a snapshot can break a suspended guest. Snapshots suffer from
the same suspended-state issues that affect live migration, plus they must
handle an additional problematic scenario, which is that a running vm must
remain running if it loads a suspended snapshot.
To save, the existing vm_stop call now completely stops the suspended
state. Finish with vm_resume to leave the vm in the state it had prior
to the save, correctly restoring the suspended state.
To load, if the snapshot is not suspended, then vm_stop + vm_resume
correctly handles all states, and leaves the vm in the state it had prior
to the load. However, if the snapshot is suspended, restoration is
trickier. First, call vm_resume to restore the state to suspended so the
current state matches the saved state. Then, if the pre-load state is
running, call wakeup to resume running.
Prior to these changes, the vm_stop to RUN_STATE_SAVE_VM and
RUN_STATE_RESTORE_VM did not change runstate if the current state was
suspended, but now it does, so allow these transitions.
Signed-off-by: Steve Sistare <steven.sistare@oracle.com>
Reviewed-by: Peter Xu <peterx@redhat.com>
Link: https://lore.kernel.org/r/1704312341-66640-8-git-send-email-steven.sistare@oracle.com
Signed-off-by: Peter Xu <peterx@redhat.com>
2024-01-03 23:05:36 +03:00
|
|
|
void load_snapshot_resume(RunState state)
|
|
|
|
{
|
|
|
|
vm_resume(state);
|
|
|
|
if (state == RUN_STATE_RUNNING && runstate_get() == RUN_STATE_SUSPENDED) {
|
|
|
|
qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, &error_abort);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-02-04 15:48:31 +03:00
|
|
|
bool delete_snapshot(const char *name, bool has_devices,
|
|
|
|
strList *devices, Error **errp)
|
|
|
|
{
|
|
|
|
if (!bdrv_all_can_snapshot(has_devices, devices, errp)) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (bdrv_all_delete_snapshot(name, has_devices, devices, errp) < 0) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2011-12-20 17:59:12 +04:00
|
|
|
void vmstate_register_ram(MemoryRegion *mr, DeviceState *dev)
|
|
|
|
{
|
2016-05-10 05:04:59 +03:00
|
|
|
qemu_ram_set_idstr(mr->ram_block,
|
2011-12-20 17:59:12 +04:00
|
|
|
memory_region_name(mr), dev);
|
2018-05-14 09:57:00 +03:00
|
|
|
qemu_ram_set_migratable(mr->ram_block);
|
2011-12-20 17:59:12 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
void vmstate_unregister_ram(MemoryRegion *mr, DeviceState *dev)
|
|
|
|
{
|
2016-05-10 05:04:59 +03:00
|
|
|
qemu_ram_unset_idstr(mr->ram_block);
|
2018-05-14 09:57:00 +03:00
|
|
|
qemu_ram_unset_migratable(mr->ram_block);
|
2011-12-20 17:59:12 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
void vmstate_register_ram_global(MemoryRegion *mr)
|
|
|
|
{
|
|
|
|
vmstate_register_ram(mr, NULL);
|
|
|
|
}
|
2017-04-17 15:57:54 +03:00
|
|
|
|
|
|
|
bool vmstate_check_only_migratable(const VMStateDescription *vmsd)
|
|
|
|
{
|
|
|
|
/* check needed if --only-migratable is specified */
|
Revert "migration: move only_migratable to MigrationState"
This reverts commit 3df663e575f1876d7f3bc684f80e72fca0703d39.
This reverts commit b605c47b57b58e61a901a50a0762dccf43d94783.
Command line option --only-migratable is for disallowing any
configuration that can block migration.
Initially, --only-migratable set global variable @only_migratable.
Commit 3df663e575 "migration: move only_migratable to MigrationState"
replaced it by MigrationState member @only_migratable. That was a
mistake.
First, it doesn't make sense on the design level. MigrationState
captures the state of an individual migration, but --only-migratable
isn't a property of an individual migration, it's a restriction on
QEMU configuration. With fault tolerance, we could have several
migrations at once. --only-migratable would certainly protect all of
them. Storing it in MigrationState feels inappropriate.
Second, it contributes to a dependency cycle that manifests itself as
a bug now.
Putting @only_migratable into MigrationState means its available only
after migration_object_init().
We can't set it before migration_object_init(), so we delay setting it
with a global property (this is fixup commit b605c47b57 "migration:
fix handling for --only-migratable").
We can't get it before migration_object_init(), so anything that uses
it can only run afterwards.
Since migrate_add_blocker() needs to obey --only-migratable, any code
adding migration blockers can run only afterwards. This contributes
to the following dependency cycle:
* configure_blockdev() must run before machine_set_property()
so machine properties can refer to block backends
* machine_set_property() before configure_accelerator()
so machine properties like kvm-irqchip get applied
* configure_accelerator() before migration_object_init()
so that Xen's accelerator compat properties get applied.
* migration_object_init() before configure_blockdev()
so configure_blockdev() can add migration blockers
The cycle was closed when recent commit cda4aa9a5a0 "Create block
backends before setting machine properties" added the first
dependency, and satisfied it by violating the last one. Broke block
backends that add migration blockers.
Moving @only_migratable into MigrationState was a mistake. Revert it.
This doesn't quite break the "migration_object_init() before
configure_blockdev() dependency, since migrate_add_blocker() still has
another dependency on migration_object_init(). To be addressed the
next commit.
Note that the reverted commit made -only-migratable sugar for -global
migration.only-migratable=on below the hood. Documentation has only
ever mentioned -only-migratable. This commit removes the arcane &
undocumented alternative to -only-migratable again. Nobody should be
using it.
Conflicts:
include/migration/misc.h
migration/migration.c
migration/migration.h
vl.c
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <20190401090827.20793-3-armbru@redhat.com>
Reviewed-by: Igor Mammedov <imammedo@redhat.com>
2019-04-01 12:08:24 +03:00
|
|
|
if (!only_migratable) {
|
2017-04-17 15:57:54 +03:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
return !(vmsd && vmsd->unmigratable);
|
|
|
|
}
|
migration: introduce snapshot-{save, load, delete} QMP commands
savevm, loadvm and delvm are some of the few HMP commands that have never
been converted to use QMP. The reasons for the lack of conversion are
that they blocked execution of the event thread, and the semantics
around choice of disks were ill-defined.
Despite this downside, however, libvirt and applications using libvirt
have used these commands for as long as QMP has existed, via the
"human-monitor-command" passthrough command. IOW, while it is clearly
desirable to be able to fix the problems, they are not a blocker to
all real world usage.
Meanwhile there is a need for other features which involve adding new
parameters to the commands. This is possible with HMP passthrough, but
it provides no reliable way for apps to introspect features, so using
QAPI modelling is highly desirable.
This patch thus introduces new snapshot-{load,save,delete} commands to
QMP that are intended to replace the old HMP counterparts. The new
commands are given different names, because they will be using the new
QEMU job framework and thus will have diverging behaviour from the HMP
originals. It would thus be misleading to keep the same name.
While this design uses the generic job framework, the current impl is
still blocking. The intention that the blocking problem is fixed later.
None the less applications using these new commands should assume that
they are asynchronous and thus wait for the job status change event to
indicate completion.
In addition to using the job framework, the new commands require the
caller to be explicit about all the block device nodes used in the
snapshot operations, with no built-in default heuristics in use.
Note that the existing "query-named-block-nodes" can be used to query
what snapshots currently exist for block nodes.
Acked-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Daniel P. Berrangé <berrange@redhat.com>
Message-Id: <20210204124834.774401-13-berrange@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
dgilbert: removed tests for now, the output ordering isn't
deterministic
2021-02-04 15:48:34 +03:00
|
|
|
|
|
|
|
typedef struct SnapshotJob {
|
|
|
|
Job common;
|
|
|
|
char *tag;
|
|
|
|
char *vmstate;
|
|
|
|
strList *devices;
|
|
|
|
Coroutine *co;
|
|
|
|
Error **errp;
|
|
|
|
bool ret;
|
|
|
|
} SnapshotJob;
|
|
|
|
|
|
|
|
static void qmp_snapshot_job_free(SnapshotJob *s)
|
|
|
|
{
|
|
|
|
g_free(s->tag);
|
|
|
|
g_free(s->vmstate);
|
|
|
|
qapi_free_strList(s->devices);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static void snapshot_load_job_bh(void *opaque)
|
|
|
|
{
|
|
|
|
Job *job = opaque;
|
|
|
|
SnapshotJob *s = container_of(job, SnapshotJob, common);
|
migration: preserve suspended for snapshot
Restoring a snapshot can break a suspended guest. Snapshots suffer from
the same suspended-state issues that affect live migration, plus they must
handle an additional problematic scenario, which is that a running vm must
remain running if it loads a suspended snapshot.
To save, the existing vm_stop call now completely stops the suspended
state. Finish with vm_resume to leave the vm in the state it had prior
to the save, correctly restoring the suspended state.
To load, if the snapshot is not suspended, then vm_stop + vm_resume
correctly handles all states, and leaves the vm in the state it had prior
to the load. However, if the snapshot is suspended, restoration is
trickier. First, call vm_resume to restore the state to suspended so the
current state matches the saved state. Then, if the pre-load state is
running, call wakeup to resume running.
Prior to these changes, the vm_stop to RUN_STATE_SAVE_VM and
RUN_STATE_RESTORE_VM did not change runstate if the current state was
suspended, but now it does, so allow these transitions.
Signed-off-by: Steve Sistare <steven.sistare@oracle.com>
Reviewed-by: Peter Xu <peterx@redhat.com>
Link: https://lore.kernel.org/r/1704312341-66640-8-git-send-email-steven.sistare@oracle.com
Signed-off-by: Peter Xu <peterx@redhat.com>
2024-01-03 23:05:36 +03:00
|
|
|
RunState orig_state = runstate_get();
|
migration: introduce snapshot-{save, load, delete} QMP commands
savevm, loadvm and delvm are some of the few HMP commands that have never
been converted to use QMP. The reasons for the lack of conversion are
that they blocked execution of the event thread, and the semantics
around choice of disks were ill-defined.
Despite this downside, however, libvirt and applications using libvirt
have used these commands for as long as QMP has existed, via the
"human-monitor-command" passthrough command. IOW, while it is clearly
desirable to be able to fix the problems, they are not a blocker to
all real world usage.
Meanwhile there is a need for other features which involve adding new
parameters to the commands. This is possible with HMP passthrough, but
it provides no reliable way for apps to introspect features, so using
QAPI modelling is highly desirable.
This patch thus introduces new snapshot-{load,save,delete} commands to
QMP that are intended to replace the old HMP counterparts. The new
commands are given different names, because they will be using the new
QEMU job framework and thus will have diverging behaviour from the HMP
originals. It would thus be misleading to keep the same name.
While this design uses the generic job framework, the current impl is
still blocking. The intention that the blocking problem is fixed later.
None the less applications using these new commands should assume that
they are asynchronous and thus wait for the job status change event to
indicate completion.
In addition to using the job framework, the new commands require the
caller to be explicit about all the block device nodes used in the
snapshot operations, with no built-in default heuristics in use.
Note that the existing "query-named-block-nodes" can be used to query
what snapshots currently exist for block nodes.
Acked-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Daniel P. Berrangé <berrange@redhat.com>
Message-Id: <20210204124834.774401-13-berrange@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
dgilbert: removed tests for now, the output ordering isn't
deterministic
2021-02-04 15:48:34 +03:00
|
|
|
|
|
|
|
job_progress_set_remaining(&s->common, 1);
|
|
|
|
|
|
|
|
vm_stop(RUN_STATE_RESTORE_VM);
|
|
|
|
|
|
|
|
s->ret = load_snapshot(s->tag, s->vmstate, true, s->devices, s->errp);
|
migration: preserve suspended for snapshot
Restoring a snapshot can break a suspended guest. Snapshots suffer from
the same suspended-state issues that affect live migration, plus they must
handle an additional problematic scenario, which is that a running vm must
remain running if it loads a suspended snapshot.
To save, the existing vm_stop call now completely stops the suspended
state. Finish with vm_resume to leave the vm in the state it had prior
to the save, correctly restoring the suspended state.
To load, if the snapshot is not suspended, then vm_stop + vm_resume
correctly handles all states, and leaves the vm in the state it had prior
to the load. However, if the snapshot is suspended, restoration is
trickier. First, call vm_resume to restore the state to suspended so the
current state matches the saved state. Then, if the pre-load state is
running, call wakeup to resume running.
Prior to these changes, the vm_stop to RUN_STATE_SAVE_VM and
RUN_STATE_RESTORE_VM did not change runstate if the current state was
suspended, but now it does, so allow these transitions.
Signed-off-by: Steve Sistare <steven.sistare@oracle.com>
Reviewed-by: Peter Xu <peterx@redhat.com>
Link: https://lore.kernel.org/r/1704312341-66640-8-git-send-email-steven.sistare@oracle.com
Signed-off-by: Peter Xu <peterx@redhat.com>
2024-01-03 23:05:36 +03:00
|
|
|
if (s->ret) {
|
|
|
|
load_snapshot_resume(orig_state);
|
migration: introduce snapshot-{save, load, delete} QMP commands
savevm, loadvm and delvm are some of the few HMP commands that have never
been converted to use QMP. The reasons for the lack of conversion are
that they blocked execution of the event thread, and the semantics
around choice of disks were ill-defined.
Despite this downside, however, libvirt and applications using libvirt
have used these commands for as long as QMP has existed, via the
"human-monitor-command" passthrough command. IOW, while it is clearly
desirable to be able to fix the problems, they are not a blocker to
all real world usage.
Meanwhile there is a need for other features which involve adding new
parameters to the commands. This is possible with HMP passthrough, but
it provides no reliable way for apps to introspect features, so using
QAPI modelling is highly desirable.
This patch thus introduces new snapshot-{load,save,delete} commands to
QMP that are intended to replace the old HMP counterparts. The new
commands are given different names, because they will be using the new
QEMU job framework and thus will have diverging behaviour from the HMP
originals. It would thus be misleading to keep the same name.
While this design uses the generic job framework, the current impl is
still blocking. The intention that the blocking problem is fixed later.
None the less applications using these new commands should assume that
they are asynchronous and thus wait for the job status change event to
indicate completion.
In addition to using the job framework, the new commands require the
caller to be explicit about all the block device nodes used in the
snapshot operations, with no built-in default heuristics in use.
Note that the existing "query-named-block-nodes" can be used to query
what snapshots currently exist for block nodes.
Acked-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Daniel P. Berrangé <berrange@redhat.com>
Message-Id: <20210204124834.774401-13-berrange@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
dgilbert: removed tests for now, the output ordering isn't
deterministic
2021-02-04 15:48:34 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
job_progress_update(&s->common, 1);
|
|
|
|
|
|
|
|
qmp_snapshot_job_free(s);
|
|
|
|
aio_co_wake(s->co);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void snapshot_save_job_bh(void *opaque)
|
|
|
|
{
|
|
|
|
Job *job = opaque;
|
|
|
|
SnapshotJob *s = container_of(job, SnapshotJob, common);
|
|
|
|
|
|
|
|
job_progress_set_remaining(&s->common, 1);
|
|
|
|
s->ret = save_snapshot(s->tag, false, s->vmstate,
|
|
|
|
true, s->devices, s->errp);
|
|
|
|
job_progress_update(&s->common, 1);
|
|
|
|
|
|
|
|
qmp_snapshot_job_free(s);
|
|
|
|
aio_co_wake(s->co);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void snapshot_delete_job_bh(void *opaque)
|
|
|
|
{
|
|
|
|
Job *job = opaque;
|
|
|
|
SnapshotJob *s = container_of(job, SnapshotJob, common);
|
|
|
|
|
|
|
|
job_progress_set_remaining(&s->common, 1);
|
|
|
|
s->ret = delete_snapshot(s->tag, true, s->devices, s->errp);
|
|
|
|
job_progress_update(&s->common, 1);
|
|
|
|
|
|
|
|
qmp_snapshot_job_free(s);
|
|
|
|
aio_co_wake(s->co);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int coroutine_fn snapshot_save_job_run(Job *job, Error **errp)
|
|
|
|
{
|
|
|
|
SnapshotJob *s = container_of(job, SnapshotJob, common);
|
|
|
|
s->errp = errp;
|
|
|
|
s->co = qemu_coroutine_self();
|
|
|
|
aio_bh_schedule_oneshot(qemu_get_aio_context(),
|
|
|
|
snapshot_save_job_bh, job);
|
|
|
|
qemu_coroutine_yield();
|
|
|
|
return s->ret ? 0 : -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int coroutine_fn snapshot_load_job_run(Job *job, Error **errp)
|
|
|
|
{
|
|
|
|
SnapshotJob *s = container_of(job, SnapshotJob, common);
|
|
|
|
s->errp = errp;
|
|
|
|
s->co = qemu_coroutine_self();
|
|
|
|
aio_bh_schedule_oneshot(qemu_get_aio_context(),
|
|
|
|
snapshot_load_job_bh, job);
|
|
|
|
qemu_coroutine_yield();
|
|
|
|
return s->ret ? 0 : -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int coroutine_fn snapshot_delete_job_run(Job *job, Error **errp)
|
|
|
|
{
|
|
|
|
SnapshotJob *s = container_of(job, SnapshotJob, common);
|
|
|
|
s->errp = errp;
|
|
|
|
s->co = qemu_coroutine_self();
|
|
|
|
aio_bh_schedule_oneshot(qemu_get_aio_context(),
|
|
|
|
snapshot_delete_job_bh, job);
|
|
|
|
qemu_coroutine_yield();
|
|
|
|
return s->ret ? 0 : -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static const JobDriver snapshot_load_job_driver = {
|
|
|
|
.instance_size = sizeof(SnapshotJob),
|
|
|
|
.job_type = JOB_TYPE_SNAPSHOT_LOAD,
|
|
|
|
.run = snapshot_load_job_run,
|
|
|
|
};
|
|
|
|
|
|
|
|
static const JobDriver snapshot_save_job_driver = {
|
|
|
|
.instance_size = sizeof(SnapshotJob),
|
|
|
|
.job_type = JOB_TYPE_SNAPSHOT_SAVE,
|
|
|
|
.run = snapshot_save_job_run,
|
|
|
|
};
|
|
|
|
|
|
|
|
static const JobDriver snapshot_delete_job_driver = {
|
|
|
|
.instance_size = sizeof(SnapshotJob),
|
|
|
|
.job_type = JOB_TYPE_SNAPSHOT_DELETE,
|
|
|
|
.run = snapshot_delete_job_run,
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
void qmp_snapshot_save(const char *job_id,
|
|
|
|
const char *tag,
|
|
|
|
const char *vmstate,
|
|
|
|
strList *devices,
|
|
|
|
Error **errp)
|
|
|
|
{
|
|
|
|
SnapshotJob *s;
|
|
|
|
|
|
|
|
s = job_create(job_id, &snapshot_save_job_driver, NULL,
|
|
|
|
qemu_get_aio_context(), JOB_MANUAL_DISMISS,
|
|
|
|
NULL, NULL, errp);
|
|
|
|
if (!s) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
s->tag = g_strdup(tag);
|
|
|
|
s->vmstate = g_strdup(vmstate);
|
|
|
|
s->devices = QAPI_CLONE(strList, devices);
|
|
|
|
|
|
|
|
job_start(&s->common);
|
|
|
|
}
|
|
|
|
|
|
|
|
void qmp_snapshot_load(const char *job_id,
|
|
|
|
const char *tag,
|
|
|
|
const char *vmstate,
|
|
|
|
strList *devices,
|
|
|
|
Error **errp)
|
|
|
|
{
|
|
|
|
SnapshotJob *s;
|
|
|
|
|
|
|
|
s = job_create(job_id, &snapshot_load_job_driver, NULL,
|
|
|
|
qemu_get_aio_context(), JOB_MANUAL_DISMISS,
|
|
|
|
NULL, NULL, errp);
|
|
|
|
if (!s) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
s->tag = g_strdup(tag);
|
|
|
|
s->vmstate = g_strdup(vmstate);
|
|
|
|
s->devices = QAPI_CLONE(strList, devices);
|
|
|
|
|
|
|
|
job_start(&s->common);
|
|
|
|
}
|
|
|
|
|
|
|
|
void qmp_snapshot_delete(const char *job_id,
|
|
|
|
const char *tag,
|
|
|
|
strList *devices,
|
|
|
|
Error **errp)
|
|
|
|
{
|
|
|
|
SnapshotJob *s;
|
|
|
|
|
|
|
|
s = job_create(job_id, &snapshot_delete_job_driver, NULL,
|
|
|
|
qemu_get_aio_context(), JOB_MANUAL_DISMISS,
|
|
|
|
NULL, NULL, errp);
|
|
|
|
if (!s) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
s->tag = g_strdup(tag);
|
|
|
|
s->devices = QAPI_CLONE(strList, devices);
|
|
|
|
|
|
|
|
job_start(&s->common);
|
|
|
|
}
|