Merge remote-tracking branch 'quintela/migration.next' into staging
# By Michael R. Hines (8) and others # Via Juan Quintela * quintela/migration.next: migration: add autoconvergence documentation Fix real mode guest segments dpl value in savevm Fix real mode guest migration rdma: account for the time spent in MIG_STATE_SETUP through QMP rdma: introduce MIG_STATE_NONE and change MIG_STATE_SETUP state transition rdma: allow state transitions between other states besides ACTIVE rdma: send pc.ram rdma: core logic rdma: introduce ram_handle_compressed() rdma: bugfix: ram_control_save_page() rdma: update documentation to reflect new unpin support Message-id: 1374590725-14144-1-git-send-email-quintela@redhat.com Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>
This commit is contained in:
commit
f03d07d468
@ -51,6 +51,7 @@ common-obj-$(CONFIG_POSIX) += os-posix.o
|
||||
common-obj-$(CONFIG_LINUX) += fsdev/
|
||||
|
||||
common-obj-y += migration.o migration-tcp.o
|
||||
common-obj-$(CONFIG_RDMA) += migration-rdma.o
|
||||
common-obj-y += qemu-char.o #aio.o
|
||||
common-obj-y += block-migration.o
|
||||
common-obj-y += page_cache.o xbzrle.o
|
||||
|
62
arch_init.c
62
arch_init.c
@ -118,6 +118,7 @@ static void check_guest_throttling(void);
|
||||
#define RAM_SAVE_FLAG_EOS 0x10
|
||||
#define RAM_SAVE_FLAG_CONTINUE 0x20
|
||||
#define RAM_SAVE_FLAG_XBZRLE 0x40
|
||||
/* 0x80 is reserved in migration.h start with 0x100 next */
|
||||
|
||||
|
||||
static struct defconfig_file {
|
||||
@ -475,6 +476,7 @@ static int ram_save_block(QEMUFile *f, bool last_stage)
|
||||
ram_bulk_stage = false;
|
||||
}
|
||||
} else {
|
||||
int ret;
|
||||
uint8_t *p;
|
||||
int cont = (block == last_sent_block) ?
|
||||
RAM_SAVE_FLAG_CONTINUE : 0;
|
||||
@ -483,7 +485,18 @@ static int ram_save_block(QEMUFile *f, bool last_stage)
|
||||
|
||||
/* In doubt sent page as normal */
|
||||
bytes_sent = -1;
|
||||
if (is_zero_page(p)) {
|
||||
ret = ram_control_save_page(f, block->offset,
|
||||
offset, TARGET_PAGE_SIZE, &bytes_sent);
|
||||
|
||||
if (ret != RAM_SAVE_CONTROL_NOT_SUPP) {
|
||||
if (ret != RAM_SAVE_CONTROL_DELAYED) {
|
||||
if (bytes_sent > 0) {
|
||||
acct_info.norm_pages++;
|
||||
} else if (bytes_sent == 0) {
|
||||
acct_info.dup_pages++;
|
||||
}
|
||||
}
|
||||
} else if (is_zero_page(p)) {
|
||||
acct_info.dup_pages++;
|
||||
bytes_sent = save_block_hdr(f, block, offset, cont,
|
||||
RAM_SAVE_FLAG_COMPRESS);
|
||||
@ -635,6 +648,10 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
|
||||
}
|
||||
|
||||
qemu_mutex_unlock_ramlist();
|
||||
|
||||
ram_control_before_iterate(f, RAM_CONTROL_SETUP);
|
||||
ram_control_after_iterate(f, RAM_CONTROL_SETUP);
|
||||
|
||||
qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
|
||||
|
||||
return 0;
|
||||
@ -653,6 +670,8 @@ static int ram_save_iterate(QEMUFile *f, void *opaque)
|
||||
reset_ram_globals();
|
||||
}
|
||||
|
||||
ram_control_before_iterate(f, RAM_CONTROL_ROUND);
|
||||
|
||||
t0 = qemu_get_clock_ns(rt_clock);
|
||||
i = 0;
|
||||
while ((ret = qemu_file_rate_limit(f)) == 0) {
|
||||
@ -684,6 +703,12 @@ static int ram_save_iterate(QEMUFile *f, void *opaque)
|
||||
|
||||
qemu_mutex_unlock_ramlist();
|
||||
|
||||
/*
|
||||
* Must occur before EOS (or any QEMUFile operation)
|
||||
* because of RDMA protocol.
|
||||
*/
|
||||
ram_control_after_iterate(f, RAM_CONTROL_ROUND);
|
||||
|
||||
if (ret < 0) {
|
||||
bytes_transferred += total_sent;
|
||||
return ret;
|
||||
@ -701,6 +726,8 @@ static int ram_save_complete(QEMUFile *f, void *opaque)
|
||||
qemu_mutex_lock_ramlist();
|
||||
migration_bitmap_sync();
|
||||
|
||||
ram_control_before_iterate(f, RAM_CONTROL_FINISH);
|
||||
|
||||
/* try transferring iterative blocks of memory */
|
||||
|
||||
/* flush all remaining blocks regardless of rate limiting */
|
||||
@ -714,6 +741,8 @@ static int ram_save_complete(QEMUFile *f, void *opaque)
|
||||
}
|
||||
bytes_transferred += bytes_sent;
|
||||
}
|
||||
|
||||
ram_control_after_iterate(f, RAM_CONTROL_FINISH);
|
||||
migration_end();
|
||||
|
||||
qemu_mutex_unlock_ramlist();
|
||||
@ -808,6 +837,24 @@ static inline void *host_from_stream_offset(QEMUFile *f,
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* If a page (or a whole RDMA chunk) has been
|
||||
* determined to be zero, then zap it.
|
||||
*/
|
||||
void ram_handle_compressed(void *host, uint8_t ch, uint64_t size)
|
||||
{
|
||||
if (ch != 0 || !is_zero_page(host)) {
|
||||
memset(host, ch, size);
|
||||
#ifndef _WIN32
|
||||
if (ch == 0 &&
|
||||
(!kvm_enabled() || kvm_has_sync_mmu()) &&
|
||||
getpagesize() <= TARGET_PAGE_SIZE) {
|
||||
qemu_madvise(host, TARGET_PAGE_SIZE, QEMU_MADV_DONTNEED);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
static int ram_load(QEMUFile *f, void *opaque, int version_id)
|
||||
{
|
||||
ram_addr_t addr;
|
||||
@ -879,16 +926,7 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
|
||||
}
|
||||
|
||||
ch = qemu_get_byte(f);
|
||||
if (ch != 0 || !is_zero_page(host)) {
|
||||
memset(host, ch, TARGET_PAGE_SIZE);
|
||||
#ifndef _WIN32
|
||||
if (ch == 0 &&
|
||||
(!kvm_enabled() || kvm_has_sync_mmu()) &&
|
||||
getpagesize() <= TARGET_PAGE_SIZE) {
|
||||
qemu_madvise(host, TARGET_PAGE_SIZE, QEMU_MADV_DONTNEED);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
ram_handle_compressed(host, ch, TARGET_PAGE_SIZE);
|
||||
} else if (flags & RAM_SAVE_FLAG_PAGE) {
|
||||
void *host;
|
||||
|
||||
@ -908,6 +946,8 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
|
||||
ret = -EINVAL;
|
||||
goto done;
|
||||
}
|
||||
} else if (flags & RAM_SAVE_FLAG_HOOK) {
|
||||
ram_control_load_hook(f, flags);
|
||||
}
|
||||
error = qemu_file_get_error(f);
|
||||
if (error) {
|
||||
|
40
configure
vendored
40
configure
vendored
@ -180,6 +180,7 @@ xfs=""
|
||||
vhost_net="no"
|
||||
vhost_scsi="no"
|
||||
kvm="no"
|
||||
rdma=""
|
||||
gprof="no"
|
||||
debug_tcg="no"
|
||||
debug="no"
|
||||
@ -937,6 +938,10 @@ for opt do
|
||||
;;
|
||||
--enable-gtk) gtk="yes"
|
||||
;;
|
||||
--enable-rdma) rdma="yes"
|
||||
;;
|
||||
--disable-rdma) rdma="no"
|
||||
;;
|
||||
--with-gtkabi=*) gtkabi="$optarg"
|
||||
;;
|
||||
--enable-tpm) tpm="yes"
|
||||
@ -1095,6 +1100,8 @@ echo " --enable-bluez enable bluez stack connectivity"
|
||||
echo " --disable-slirp disable SLIRP userspace network connectivity"
|
||||
echo " --disable-kvm disable KVM acceleration support"
|
||||
echo " --enable-kvm enable KVM acceleration support"
|
||||
echo " --disable-rdma disable RDMA-based migration support"
|
||||
echo " --enable-rdma enable RDMA-based migration support"
|
||||
echo " --enable-tcg-interpreter enable TCG with bytecode interpreter (TCI)"
|
||||
echo " --disable-nptl disable usermode NPTL support"
|
||||
echo " --enable-nptl enable usermode NPTL support"
|
||||
@ -1801,6 +1808,30 @@ EOF
|
||||
libs_softmmu="$sdl_libs $libs_softmmu"
|
||||
fi
|
||||
|
||||
##########################################
|
||||
# RDMA needs OpenFabrics libraries
|
||||
if test "$rdma" != "no" ; then
|
||||
cat > $TMPC <<EOF
|
||||
#include <rdma/rdma_cma.h>
|
||||
int main(void) { return 0; }
|
||||
EOF
|
||||
rdma_libs="-lrdmacm -libverbs"
|
||||
if compile_prog "" "$rdma_libs" ; then
|
||||
rdma="yes"
|
||||
libs_softmmu="$libs_softmmu $rdma_libs"
|
||||
else
|
||||
if test "$rdma" = "yes" ; then
|
||||
error_exit \
|
||||
" OpenFabrics librdmacm/libibverbs not present." \
|
||||
" Your options:" \
|
||||
" (1) Fast: Install infiniband packages from your distro." \
|
||||
" (2) Cleanest: Install libraries from www.openfabrics.org" \
|
||||
" (3) Also: Install softiwarp if you don't have RDMA hardware"
|
||||
fi
|
||||
rdma="no"
|
||||
fi
|
||||
fi
|
||||
|
||||
##########################################
|
||||
# VNC TLS/WS detection
|
||||
if test "$vnc" = "yes" -a \( "$vnc_tls" != "no" -o "$vnc_ws" != "no" \) ; then
|
||||
@ -3558,6 +3589,7 @@ echo "Linux AIO support $linux_aio"
|
||||
echo "ATTR/XATTR support $attr"
|
||||
echo "Install blobs $blobs"
|
||||
echo "KVM support $kvm"
|
||||
echo "RDMA support $rdma"
|
||||
echo "TCG interpreter $tcg_interpreter"
|
||||
echo "fdt support $fdt"
|
||||
echo "preadv support $preadv"
|
||||
@ -4046,6 +4078,10 @@ if test "$trace_default" = "yes"; then
|
||||
echo "CONFIG_TRACE_DEFAULT=y" >> $config_host_mak
|
||||
fi
|
||||
|
||||
if test "$rdma" = "yes" ; then
|
||||
echo "CONFIG_RDMA=y" >> $config_host_mak
|
||||
fi
|
||||
|
||||
if test "$tcg_interpreter" = "yes"; then
|
||||
QEMU_INCLUDES="-I\$(SRC_PATH)/tcg/tci $QEMU_INCLUDES"
|
||||
elif test "$ARCH" = "sparc64" ; then
|
||||
@ -4485,6 +4521,10 @@ if [ "$pixman" = "internal" ]; then
|
||||
echo "config-host.h: subdir-pixman" >> $config_host_mak
|
||||
fi
|
||||
|
||||
if test "$rdma" = "yes" ; then
|
||||
echo "CONFIG_RDMA=y" >> $config_host_mak
|
||||
fi
|
||||
|
||||
if [ "$dtc_internal" = "yes" ]; then
|
||||
echo "config-host.h: subdir-dtc" >> $config_host_mak
|
||||
fi
|
||||
|
@ -35,7 +35,7 @@ memory tracked during each live migration iteration round cannot keep pace
|
||||
with the rate of dirty memory produced by the workload.
|
||||
|
||||
RDMA currently comes in two flavors: both Ethernet based (RoCE, or RDMA
|
||||
over Convered Ethernet) as well as Infiniband-based. This implementation of
|
||||
over Converged Ethernet) as well as Infiniband-based. This implementation of
|
||||
migration using RDMA is capable of using both technologies because of
|
||||
the use of the OpenFabrics OFED software stack that abstracts out the
|
||||
programming model irrespective of the underlying hardware.
|
||||
@ -188,9 +188,9 @@ header portion and a data portion (but together are transmitted
|
||||
as a single SEND message).
|
||||
|
||||
Header:
|
||||
* Length (of the data portion, uint32, network byte order)
|
||||
* Type (what command to perform, uint32, network byte order)
|
||||
* Repeat (Number of commands in data portion, same type only)
|
||||
* Length (of the data portion, uint32, network byte order)
|
||||
* Type (what command to perform, uint32, network byte order)
|
||||
* Repeat (Number of commands in data portion, same type only)
|
||||
|
||||
The 'Repeat' field is here to support future multiple page registrations
|
||||
in a single message without any need to change the protocol itself
|
||||
@ -202,17 +202,19 @@ The maximum number of repeats is hard-coded to 4096. This is a conservative
|
||||
limit based on the maximum size of a SEND message along with emperical
|
||||
observations on the maximum future benefit of simultaneous page registrations.
|
||||
|
||||
The 'type' field has 10 different command values:
|
||||
1. Unused
|
||||
2. Error (sent to the source during bad things)
|
||||
3. Ready (control-channel is available)
|
||||
4. QEMU File (for sending non-live device state)
|
||||
5. RAM Blocks request (used right after connection setup)
|
||||
6. RAM Blocks result (used right after connection setup)
|
||||
7. Compress page (zap zero page and skip registration)
|
||||
8. Register request (dynamic chunk registration)
|
||||
9. Register result ('rkey' to be used by sender)
|
||||
10. Register finished (registration for current iteration finished)
|
||||
The 'type' field has 12 different command values:
|
||||
1. Unused
|
||||
2. Error (sent to the source during bad things)
|
||||
3. Ready (control-channel is available)
|
||||
4. QEMU File (for sending non-live device state)
|
||||
5. RAM Blocks request (used right after connection setup)
|
||||
6. RAM Blocks result (used right after connection setup)
|
||||
7. Compress page (zap zero page and skip registration)
|
||||
8. Register request (dynamic chunk registration)
|
||||
9. Register result ('rkey' to be used by sender)
|
||||
10. Register finished (registration for current iteration finished)
|
||||
11. Unregister request (unpin previously registered memory)
|
||||
12. Unregister finished (confirmation that unpin completed)
|
||||
|
||||
A single control message, as hinted above, can contain within the data
|
||||
portion an array of many commands of the same type. If there is more than
|
||||
@ -243,7 +245,7 @@ qemu_rdma_exchange_send(header, data, optional response header & data):
|
||||
from the receiver to tell us that the receiver
|
||||
is *ready* for us to transmit some new bytes.
|
||||
2. Optionally: if we are expecting a response from the command
|
||||
(that we have no yet transmitted), let's post an RQ
|
||||
(that we have not yet transmitted), let's post an RQ
|
||||
work request to receive that data a few moments later.
|
||||
3. When the READY arrives, librdmacm will
|
||||
unblock us and we immediately post a RQ work request
|
||||
@ -293,8 +295,10 @@ librdmacm provides the user with a 'private data' area to be exchanged
|
||||
at connection-setup time before any infiniband traffic is generated.
|
||||
|
||||
Header:
|
||||
* Version (protocol version validated before send/recv occurs), uint32, network byte order
|
||||
* Flags (bitwise OR of each capability), uint32, network byte order
|
||||
* Version (protocol version validated before send/recv occurs),
|
||||
uint32, network byte order
|
||||
* Flags (bitwise OR of each capability),
|
||||
uint32, network byte order
|
||||
|
||||
There is no data portion of this header right now, so there is
|
||||
no length field. The maximum size of the 'private data' section
|
||||
@ -313,7 +317,7 @@ If the version is invalid, we throw an error.
|
||||
If the version is new, we only negotiate the capabilities that the
|
||||
requested version is able to perform and ignore the rest.
|
||||
|
||||
Currently there is only *one* capability in Version #1: dynamic page registration
|
||||
Currently there is only one capability in Version #1: dynamic page registration
|
||||
|
||||
Finally: Negotiation happens with the Flags field: If the primary-VM
|
||||
sets a flag, but the destination does not support this capability, it
|
||||
@ -326,8 +330,8 @@ QEMUFileRDMA Interface:
|
||||
|
||||
QEMUFileRDMA introduces a couple of new functions:
|
||||
|
||||
1. qemu_rdma_get_buffer() (QEMUFileOps rdma_read_ops)
|
||||
2. qemu_rdma_put_buffer() (QEMUFileOps rdma_write_ops)
|
||||
1. qemu_rdma_get_buffer() (QEMUFileOps rdma_read_ops)
|
||||
2. qemu_rdma_put_buffer() (QEMUFileOps rdma_write_ops)
|
||||
|
||||
These two functions are very short and simply use the protocol
|
||||
describe above to deliver bytes without changing the upper-level
|
||||
@ -413,3 +417,8 @@ TODO:
|
||||
the use of KSM and ballooning while using RDMA.
|
||||
4. Also, some form of balloon-device usage tracking would also
|
||||
help alleviate some issues.
|
||||
5. Move UNREGISTER requests to a separate thread.
|
||||
6. Use LRU to provide more fine-grained direction of UNREGISTER
|
||||
requests for unpinning memory in an overcommitted environment.
|
||||
7. Expose UNREGISTER support to the user by way of workload-specific
|
||||
hints about application behavior.
|
||||
|
4
hmp.c
4
hmp.c
@ -164,6 +164,10 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict)
|
||||
monitor_printf(mon, "downtime: %" PRIu64 " milliseconds\n",
|
||||
info->downtime);
|
||||
}
|
||||
if (info->has_setup_time) {
|
||||
monitor_printf(mon, "setup: %" PRIu64 " milliseconds\n",
|
||||
info->setup_time);
|
||||
}
|
||||
}
|
||||
|
||||
if (info->has_ram) {
|
||||
|
@ -49,6 +49,7 @@ struct MigrationState
|
||||
int64_t dirty_bytes_rate;
|
||||
bool enabled_capabilities[MIGRATION_CAPABILITY_MAX];
|
||||
int64_t xbzrle_cache_size;
|
||||
int64_t setup_time;
|
||||
};
|
||||
|
||||
void process_incoming_migration(QEMUFile *f);
|
||||
@ -77,6 +78,10 @@ void fd_start_incoming_migration(const char *path, Error **errp);
|
||||
|
||||
void fd_start_outgoing_migration(MigrationState *s, const char *fdname, Error **errp);
|
||||
|
||||
void rdma_start_outgoing_migration(void *opaque, const char *host_port, Error **errp);
|
||||
|
||||
void rdma_start_incoming_migration(const char *host_port, Error **errp);
|
||||
|
||||
void migrate_fd_error(MigrationState *s);
|
||||
|
||||
void migrate_fd_connect(MigrationState *s);
|
||||
@ -109,6 +114,8 @@ uint64_t xbzrle_mig_pages_transferred(void);
|
||||
uint64_t xbzrle_mig_pages_overflow(void);
|
||||
uint64_t xbzrle_mig_pages_cache_miss(void);
|
||||
|
||||
void ram_handle_compressed(void *host, uint8_t ch, uint64_t size);
|
||||
|
||||
/**
|
||||
* @migrate_add_blocker - prevent migration from proceeding
|
||||
*
|
||||
|
3249
migration-rdma.c
Normal file
3249
migration-rdma.c
Normal file
File diff suppressed because it is too large
Load Diff
50
migration.c
50
migration.c
@ -36,7 +36,8 @@
|
||||
#endif
|
||||
|
||||
enum {
|
||||
MIG_STATE_ERROR,
|
||||
MIG_STATE_ERROR = -1,
|
||||
MIG_STATE_NONE,
|
||||
MIG_STATE_SETUP,
|
||||
MIG_STATE_CANCELLED,
|
||||
MIG_STATE_ACTIVE,
|
||||
@ -63,7 +64,7 @@ static NotifierList migration_state_notifiers =
|
||||
MigrationState *migrate_get_current(void)
|
||||
{
|
||||
static MigrationState current_migration = {
|
||||
.state = MIG_STATE_SETUP,
|
||||
.state = MIG_STATE_NONE,
|
||||
.bandwidth_limit = MAX_THROTTLE,
|
||||
.xbzrle_cache_size = DEFAULT_MIGRATE_CACHE_SIZE,
|
||||
.mbps = -1,
|
||||
@ -78,6 +79,10 @@ void qemu_start_incoming_migration(const char *uri, Error **errp)
|
||||
|
||||
if (strstart(uri, "tcp:", &p))
|
||||
tcp_start_incoming_migration(p, errp);
|
||||
#ifdef CONFIG_RDMA
|
||||
else if (strstart(uri, "x-rdma:", &p))
|
||||
rdma_start_incoming_migration(p, errp);
|
||||
#endif
|
||||
#if !defined(WIN32)
|
||||
else if (strstart(uri, "exec:", &p))
|
||||
exec_start_incoming_migration(p, errp);
|
||||
@ -180,9 +185,14 @@ MigrationInfo *qmp_query_migrate(Error **errp)
|
||||
MigrationState *s = migrate_get_current();
|
||||
|
||||
switch (s->state) {
|
||||
case MIG_STATE_SETUP:
|
||||
case MIG_STATE_NONE:
|
||||
/* no migration has happened ever */
|
||||
break;
|
||||
case MIG_STATE_SETUP:
|
||||
info->has_status = true;
|
||||
info->status = g_strdup("setup");
|
||||
info->has_total_time = false;
|
||||
break;
|
||||
case MIG_STATE_ACTIVE:
|
||||
info->has_status = true;
|
||||
info->status = g_strdup("active");
|
||||
@ -191,6 +201,8 @@ MigrationInfo *qmp_query_migrate(Error **errp)
|
||||
- s->total_time;
|
||||
info->has_expected_downtime = true;
|
||||
info->expected_downtime = s->expected_downtime;
|
||||
info->has_setup_time = true;
|
||||
info->setup_time = s->setup_time;
|
||||
|
||||
info->has_ram = true;
|
||||
info->ram = g_malloc0(sizeof(*info->ram));
|
||||
@ -222,6 +234,8 @@ MigrationInfo *qmp_query_migrate(Error **errp)
|
||||
info->total_time = s->total_time;
|
||||
info->has_downtime = true;
|
||||
info->downtime = s->downtime;
|
||||
info->has_setup_time = true;
|
||||
info->setup_time = s->setup_time;
|
||||
|
||||
info->has_ram = true;
|
||||
info->ram = g_malloc0(sizeof(*info->ram));
|
||||
@ -253,7 +267,7 @@ void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params,
|
||||
MigrationState *s = migrate_get_current();
|
||||
MigrationCapabilityStatusList *cap;
|
||||
|
||||
if (s->state == MIG_STATE_ACTIVE) {
|
||||
if (s->state == MIG_STATE_ACTIVE || s->state == MIG_STATE_SETUP) {
|
||||
error_set(errp, QERR_MIGRATION_ACTIVE);
|
||||
return;
|
||||
}
|
||||
@ -291,9 +305,9 @@ static void migrate_fd_cleanup(void *opaque)
|
||||
notifier_list_notify(&migration_state_notifiers, s);
|
||||
}
|
||||
|
||||
static void migrate_finish_set_state(MigrationState *s, int new_state)
|
||||
static void migrate_set_state(MigrationState *s, int old_state, int new_state)
|
||||
{
|
||||
if (atomic_cmpxchg(&s->state, MIG_STATE_ACTIVE, new_state) == new_state) {
|
||||
if (atomic_cmpxchg(&s->state, old_state, new_state) == new_state) {
|
||||
trace_migrate_set_state(new_state);
|
||||
}
|
||||
}
|
||||
@ -311,7 +325,7 @@ static void migrate_fd_cancel(MigrationState *s)
|
||||
{
|
||||
DPRINTF("cancelling migration\n");
|
||||
|
||||
migrate_finish_set_state(s, MIG_STATE_CANCELLED);
|
||||
migrate_set_state(s, s->state, MIG_STATE_CANCELLED);
|
||||
}
|
||||
|
||||
void add_migration_state_change_notifier(Notifier *notify)
|
||||
@ -388,7 +402,7 @@ void qmp_migrate(const char *uri, bool has_blk, bool blk,
|
||||
params.blk = blk;
|
||||
params.shared = inc;
|
||||
|
||||
if (s->state == MIG_STATE_ACTIVE) {
|
||||
if (s->state == MIG_STATE_ACTIVE || s->state == MIG_STATE_SETUP) {
|
||||
error_set(errp, QERR_MIGRATION_ACTIVE);
|
||||
return;
|
||||
}
|
||||
@ -406,6 +420,10 @@ void qmp_migrate(const char *uri, bool has_blk, bool blk,
|
||||
|
||||
if (strstart(uri, "tcp:", &p)) {
|
||||
tcp_start_outgoing_migration(s, p, &local_err);
|
||||
#ifdef CONFIG_RDMA
|
||||
} else if (strstart(uri, "x-rdma:", &p)) {
|
||||
rdma_start_outgoing_migration(s, p, &local_err);
|
||||
#endif
|
||||
#if !defined(WIN32)
|
||||
} else if (strstart(uri, "exec:", &p)) {
|
||||
exec_start_outgoing_migration(s, p, &local_err);
|
||||
@ -526,6 +544,7 @@ static void *migration_thread(void *opaque)
|
||||
{
|
||||
MigrationState *s = opaque;
|
||||
int64_t initial_time = qemu_get_clock_ms(rt_clock);
|
||||
int64_t setup_start = qemu_get_clock_ms(host_clock);
|
||||
int64_t initial_bytes = 0;
|
||||
int64_t max_size = 0;
|
||||
int64_t start_time = initial_time;
|
||||
@ -534,6 +553,11 @@ static void *migration_thread(void *opaque)
|
||||
DPRINTF("beginning savevm\n");
|
||||
qemu_savevm_state_begin(s->file, &s->params);
|
||||
|
||||
s->setup_time = qemu_get_clock_ms(host_clock) - setup_start;
|
||||
migrate_set_state(s, MIG_STATE_SETUP, MIG_STATE_ACTIVE);
|
||||
|
||||
DPRINTF("setup complete\n");
|
||||
|
||||
while (s->state == MIG_STATE_ACTIVE) {
|
||||
int64_t current_time;
|
||||
uint64_t pending_size;
|
||||
@ -561,19 +585,19 @@ static void *migration_thread(void *opaque)
|
||||
qemu_mutex_unlock_iothread();
|
||||
|
||||
if (ret < 0) {
|
||||
migrate_finish_set_state(s, MIG_STATE_ERROR);
|
||||
migrate_set_state(s, MIG_STATE_ACTIVE, MIG_STATE_ERROR);
|
||||
break;
|
||||
}
|
||||
|
||||
if (!qemu_file_get_error(s->file)) {
|
||||
migrate_finish_set_state(s, MIG_STATE_COMPLETED);
|
||||
migrate_set_state(s, MIG_STATE_ACTIVE, MIG_STATE_COMPLETED);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (qemu_file_get_error(s->file)) {
|
||||
migrate_finish_set_state(s, MIG_STATE_ERROR);
|
||||
migrate_set_state(s, MIG_STATE_ACTIVE, MIG_STATE_ERROR);
|
||||
break;
|
||||
}
|
||||
current_time = qemu_get_clock_ms(rt_clock);
|
||||
@ -624,8 +648,8 @@ static void *migration_thread(void *opaque)
|
||||
|
||||
void migrate_fd_connect(MigrationState *s)
|
||||
{
|
||||
s->state = MIG_STATE_ACTIVE;
|
||||
trace_migrate_set_state(MIG_STATE_ACTIVE);
|
||||
s->state = MIG_STATE_SETUP;
|
||||
trace_migrate_set_state(MIG_STATE_SETUP);
|
||||
|
||||
/* This is a best 1st approximation. ns to ms */
|
||||
s->expected_downtime = max_downtime/1000000;
|
||||
|
@ -578,6 +578,12 @@
|
||||
# expected downtime in milliseconds for the guest in last walk
|
||||
# of the dirty bitmap. (since 1.3)
|
||||
#
|
||||
# @setup-time: #optional amount of setup time in milliseconds _before_ the
|
||||
# iterations begin but _after_ the QMP command is issued. This is designed
|
||||
# to provide an accounting of any activities (such as RDMA pinning) which
|
||||
# may be expensive, but do not actually occur during the iterative
|
||||
# migration rounds themselves. (since 1.6)
|
||||
#
|
||||
# Since: 0.14.0
|
||||
##
|
||||
{ 'type': 'MigrationInfo',
|
||||
@ -586,7 +592,8 @@
|
||||
'*xbzrle-cache': 'XBZRLECacheStats',
|
||||
'*total-time': 'int',
|
||||
'*expected-downtime': 'int',
|
||||
'*downtime': 'int'} }
|
||||
'*downtime': 'int',
|
||||
'*setup-time': 'int'} }
|
||||
|
||||
##
|
||||
# @query-migrate
|
||||
@ -619,6 +626,9 @@
|
||||
# to enable the capability on the source VM. The feature is disabled by
|
||||
# default. (since 1.6)
|
||||
#
|
||||
# @auto-converge: If enabled, QEMU will automatically throttle down the guest
|
||||
# to speed up convergence of RAM migration. (since 1.6)
|
||||
#
|
||||
# Since: 1.2
|
||||
##
|
||||
{ 'enum': 'MigrationCapability',
|
||||
|
2
savevm.c
2
savevm.c
@ -662,7 +662,7 @@ size_t ram_control_save_page(QEMUFile *f, ram_addr_t block_offset,
|
||||
offset, size, bytes_sent);
|
||||
|
||||
if (ret != RAM_SAVE_CONTROL_DELAYED) {
|
||||
if (*bytes_sent > 0) {
|
||||
if (bytes_sent && *bytes_sent > 0) {
|
||||
qemu_update_position(f, *bytes_sent);
|
||||
} else if (ret < 0) {
|
||||
qemu_file_set_error(f, ret);
|
||||
|
@ -252,6 +252,24 @@ static void cpu_pre_save(void *opaque)
|
||||
}
|
||||
|
||||
env->fpregs_format_vmstate = 0;
|
||||
|
||||
/*
|
||||
* Real mode guest segments register DPL should be zero.
|
||||
* Older KVM version were setting it wrongly.
|
||||
* Fixing it will allow live migration to host with unrestricted guest
|
||||
* support (otherwise the migration will fail with invalid guest state
|
||||
* error).
|
||||
*/
|
||||
if (!(env->cr[0] & CR0_PE_MASK) &&
|
||||
(env->segs[R_CS].flags >> DESC_DPL_SHIFT & 3) != 0) {
|
||||
env->segs[R_CS].flags &= ~(env->segs[R_CS].flags & DESC_DPL_MASK);
|
||||
env->segs[R_DS].flags &= ~(env->segs[R_DS].flags & DESC_DPL_MASK);
|
||||
env->segs[R_ES].flags &= ~(env->segs[R_ES].flags & DESC_DPL_MASK);
|
||||
env->segs[R_FS].flags &= ~(env->segs[R_FS].flags & DESC_DPL_MASK);
|
||||
env->segs[R_GS].flags &= ~(env->segs[R_GS].flags & DESC_DPL_MASK);
|
||||
env->segs[R_SS].flags &= ~(env->segs[R_SS].flags & DESC_DPL_MASK);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
static int cpu_post_load(void *opaque, int version_id)
|
||||
@ -260,6 +278,24 @@ static int cpu_post_load(void *opaque, int version_id)
|
||||
CPUX86State *env = &cpu->env;
|
||||
int i;
|
||||
|
||||
/*
|
||||
* Real mode guest segments register DPL should be zero.
|
||||
* Older KVM version were setting it wrongly.
|
||||
* Fixing it will allow live migration from such host that don't have
|
||||
* restricted guest support to a host with unrestricted guest support
|
||||
* (otherwise the migration will fail with invalid guest state
|
||||
* error).
|
||||
*/
|
||||
if (!(env->cr[0] & CR0_PE_MASK) &&
|
||||
(env->segs[R_CS].flags >> DESC_DPL_SHIFT & 3) != 0) {
|
||||
env->segs[R_CS].flags &= ~(env->segs[R_CS].flags & DESC_DPL_MASK);
|
||||
env->segs[R_DS].flags &= ~(env->segs[R_DS].flags & DESC_DPL_MASK);
|
||||
env->segs[R_ES].flags &= ~(env->segs[R_ES].flags & DESC_DPL_MASK);
|
||||
env->segs[R_FS].flags &= ~(env->segs[R_FS].flags & DESC_DPL_MASK);
|
||||
env->segs[R_GS].flags &= ~(env->segs[R_GS].flags & DESC_DPL_MASK);
|
||||
env->segs[R_SS].flags &= ~(env->segs[R_SS].flags & DESC_DPL_MASK);
|
||||
}
|
||||
|
||||
/* XXX: restore FPU round state */
|
||||
env->fpstt = (env->fpus_vmstate >> 11) & 7;
|
||||
env->fpus = env->fpus_vmstate & ~0x3800;
|
||||
|
Loading…
Reference in New Issue
Block a user