pseries: savevm support with KVM
At present, the savevm / migration support for the pseries machine will not work when KVM is enabled. That's because KVM manages the guest's hash page table in the host kernel, so qemu has no visibility of it. This patch fixes this by using new kernel interfaces to extract and reinsert the guest's hash table during the migration process. Signed-off-by: David Gibson <david@gibson.dropbear.id.au> Message-id: 1374175984-8930-11-git-send-email-aliguori@us.ibm.com Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>
This commit is contained in:
parent
1112cf94c8
commit
e68cb8b4fa
@ -735,16 +735,26 @@ static int htab_save_setup(QEMUFile *f, void *opaque)
|
|||||||
{
|
{
|
||||||
sPAPREnvironment *spapr = opaque;
|
sPAPREnvironment *spapr = opaque;
|
||||||
|
|
||||||
spapr->htab_save_index = 0;
|
|
||||||
spapr->htab_first_pass = true;
|
|
||||||
|
|
||||||
/* "Iteration" header */
|
/* "Iteration" header */
|
||||||
qemu_put_be32(f, spapr->htab_shift);
|
qemu_put_be32(f, spapr->htab_shift);
|
||||||
|
|
||||||
return 0;
|
if (spapr->htab) {
|
||||||
|
spapr->htab_save_index = 0;
|
||||||
|
spapr->htab_first_pass = true;
|
||||||
|
} else {
|
||||||
|
assert(kvm_enabled());
|
||||||
|
|
||||||
|
spapr->htab_fd = kvmppc_get_htab_fd(false);
|
||||||
|
if (spapr->htab_fd < 0) {
|
||||||
|
fprintf(stderr, "Unable to open fd for reading hash table from KVM: %s\n",
|
||||||
|
strerror(errno));
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#define MAX_ITERATION_NS 5000000 /* 5 ms */
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
static void htab_save_first_pass(QEMUFile *f, sPAPREnvironment *spapr,
|
static void htab_save_first_pass(QEMUFile *f, sPAPREnvironment *spapr,
|
||||||
int64_t max_ns)
|
int64_t max_ns)
|
||||||
@ -796,7 +806,7 @@ static void htab_save_first_pass(QEMUFile *f, sPAPREnvironment *spapr,
|
|||||||
spapr->htab_save_index = index;
|
spapr->htab_save_index = index;
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool htab_save_later_pass(QEMUFile *f, sPAPREnvironment *spapr,
|
static int htab_save_later_pass(QEMUFile *f, sPAPREnvironment *spapr,
|
||||||
int64_t max_ns)
|
int64_t max_ns)
|
||||||
{
|
{
|
||||||
bool final = max_ns < 0;
|
bool final = max_ns < 0;
|
||||||
@ -870,21 +880,32 @@ static bool htab_save_later_pass(QEMUFile *f, sPAPREnvironment *spapr,
|
|||||||
|
|
||||||
spapr->htab_save_index = index;
|
spapr->htab_save_index = index;
|
||||||
|
|
||||||
return (examined >= htabslots) && (sent == 0);
|
return (examined >= htabslots) && (sent == 0) ? 1 : 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#define MAX_ITERATION_NS 5000000 /* 5 ms */
|
||||||
|
#define MAX_KVM_BUF_SIZE 2048
|
||||||
|
|
||||||
static int htab_save_iterate(QEMUFile *f, void *opaque)
|
static int htab_save_iterate(QEMUFile *f, void *opaque)
|
||||||
{
|
{
|
||||||
sPAPREnvironment *spapr = opaque;
|
sPAPREnvironment *spapr = opaque;
|
||||||
bool nothingleft = false;;
|
int rc = 0;
|
||||||
|
|
||||||
/* Iteration header */
|
/* Iteration header */
|
||||||
qemu_put_be32(f, 0);
|
qemu_put_be32(f, 0);
|
||||||
|
|
||||||
if (spapr->htab_first_pass) {
|
if (!spapr->htab) {
|
||||||
|
assert(kvm_enabled());
|
||||||
|
|
||||||
|
rc = kvmppc_save_htab(f, spapr->htab_fd,
|
||||||
|
MAX_KVM_BUF_SIZE, MAX_ITERATION_NS);
|
||||||
|
if (rc < 0) {
|
||||||
|
return rc;
|
||||||
|
}
|
||||||
|
} else if (spapr->htab_first_pass) {
|
||||||
htab_save_first_pass(f, spapr, MAX_ITERATION_NS);
|
htab_save_first_pass(f, spapr, MAX_ITERATION_NS);
|
||||||
} else {
|
} else {
|
||||||
nothingleft = htab_save_later_pass(f, spapr, MAX_ITERATION_NS);
|
rc = htab_save_later_pass(f, spapr, MAX_ITERATION_NS);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* End marker */
|
/* End marker */
|
||||||
@ -892,7 +913,7 @@ static int htab_save_iterate(QEMUFile *f, void *opaque)
|
|||||||
qemu_put_be16(f, 0);
|
qemu_put_be16(f, 0);
|
||||||
qemu_put_be16(f, 0);
|
qemu_put_be16(f, 0);
|
||||||
|
|
||||||
return nothingleft ? 1 : 0;
|
return rc;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int htab_save_complete(QEMUFile *f, void *opaque)
|
static int htab_save_complete(QEMUFile *f, void *opaque)
|
||||||
@ -902,7 +923,20 @@ static int htab_save_complete(QEMUFile *f, void *opaque)
|
|||||||
/* Iteration header */
|
/* Iteration header */
|
||||||
qemu_put_be32(f, 0);
|
qemu_put_be32(f, 0);
|
||||||
|
|
||||||
|
if (!spapr->htab) {
|
||||||
|
int rc;
|
||||||
|
|
||||||
|
assert(kvm_enabled());
|
||||||
|
|
||||||
|
rc = kvmppc_save_htab(f, spapr->htab_fd, MAX_KVM_BUF_SIZE, -1);
|
||||||
|
if (rc < 0) {
|
||||||
|
return rc;
|
||||||
|
}
|
||||||
|
close(spapr->htab_fd);
|
||||||
|
spapr->htab_fd = -1;
|
||||||
|
} else {
|
||||||
htab_save_later_pass(f, spapr, -1);
|
htab_save_later_pass(f, spapr, -1);
|
||||||
|
}
|
||||||
|
|
||||||
/* End marker */
|
/* End marker */
|
||||||
qemu_put_be32(f, 0);
|
qemu_put_be32(f, 0);
|
||||||
@ -916,6 +950,7 @@ static int htab_load(QEMUFile *f, void *opaque, int version_id)
|
|||||||
{
|
{
|
||||||
sPAPREnvironment *spapr = opaque;
|
sPAPREnvironment *spapr = opaque;
|
||||||
uint32_t section_hdr;
|
uint32_t section_hdr;
|
||||||
|
int fd = -1;
|
||||||
|
|
||||||
if (version_id < 1 || version_id > 1) {
|
if (version_id < 1 || version_id > 1) {
|
||||||
fprintf(stderr, "htab_load() bad version\n");
|
fprintf(stderr, "htab_load() bad version\n");
|
||||||
@ -932,6 +967,16 @@ static int htab_load(QEMUFile *f, void *opaque, int version_id)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!spapr->htab) {
|
||||||
|
assert(kvm_enabled());
|
||||||
|
|
||||||
|
fd = kvmppc_get_htab_fd(true);
|
||||||
|
if (fd < 0) {
|
||||||
|
fprintf(stderr, "Unable to open fd to restore KVM hash table: %s\n",
|
||||||
|
strerror(errno));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
while (true) {
|
while (true) {
|
||||||
uint32_t index;
|
uint32_t index;
|
||||||
uint16_t n_valid, n_invalid;
|
uint16_t n_valid, n_invalid;
|
||||||
@ -945,14 +990,16 @@ static int htab_load(QEMUFile *f, void *opaque, int version_id)
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((index + n_valid + n_invalid) >=
|
if ((index + n_valid + n_invalid) >
|
||||||
(HTAB_SIZE(spapr) / HASH_PTE_SIZE_64)) {
|
(HTAB_SIZE(spapr) / HASH_PTE_SIZE_64)) {
|
||||||
/* Bad index in stream */
|
/* Bad index in stream */
|
||||||
fprintf(stderr, "htab_load() bad index %d (%hd+%hd entries) "
|
fprintf(stderr, "htab_load() bad index %d (%hd+%hd entries) "
|
||||||
"in htab stream\n", index, n_valid, n_invalid);
|
"in htab stream (htab_shift=%d)\n", index, n_valid, n_invalid,
|
||||||
|
spapr->htab_shift);
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (spapr->htab) {
|
||||||
if (n_valid) {
|
if (n_valid) {
|
||||||
qemu_get_buffer(f, HPTE(spapr->htab, index),
|
qemu_get_buffer(f, HPTE(spapr->htab, index),
|
||||||
HASH_PTE_SIZE_64 * n_valid);
|
HASH_PTE_SIZE_64 * n_valid);
|
||||||
@ -961,6 +1008,21 @@ static int htab_load(QEMUFile *f, void *opaque, int version_id)
|
|||||||
memset(HPTE(spapr->htab, index + n_valid), 0,
|
memset(HPTE(spapr->htab, index + n_valid), 0,
|
||||||
HASH_PTE_SIZE_64 * n_invalid);
|
HASH_PTE_SIZE_64 * n_invalid);
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
int rc;
|
||||||
|
|
||||||
|
assert(fd >= 0);
|
||||||
|
|
||||||
|
rc = kvmppc_load_htab_chunk(f, fd, index, n_valid, n_invalid);
|
||||||
|
if (rc < 0) {
|
||||||
|
return rc;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!spapr->htab) {
|
||||||
|
assert(fd >= 0);
|
||||||
|
close(fd);
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -37,6 +37,7 @@ typedef struct sPAPREnvironment {
|
|||||||
/* Migration state */
|
/* Migration state */
|
||||||
int htab_save_index;
|
int htab_save_index;
|
||||||
bool htab_first_pass;
|
bool htab_first_pass;
|
||||||
|
int htab_fd;
|
||||||
} sPAPREnvironment;
|
} sPAPREnvironment;
|
||||||
|
|
||||||
#define H_SUCCESS 0
|
#define H_SUCCESS 0
|
||||||
|
@ -65,6 +65,7 @@ static int cap_one_reg;
|
|||||||
static int cap_epr;
|
static int cap_epr;
|
||||||
static int cap_ppc_watchdog;
|
static int cap_ppc_watchdog;
|
||||||
static int cap_papr;
|
static int cap_papr;
|
||||||
|
static int cap_htab_fd;
|
||||||
|
|
||||||
/* XXX We have a race condition where we actually have a level triggered
|
/* XXX We have a race condition where we actually have a level triggered
|
||||||
* interrupt, but the infrastructure can't expose that yet, so the guest
|
* interrupt, but the infrastructure can't expose that yet, so the guest
|
||||||
@ -101,6 +102,7 @@ int kvm_arch_init(KVMState *s)
|
|||||||
cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
|
cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
|
||||||
/* Note: we don't set cap_papr here, because this capability is
|
/* Note: we don't set cap_papr here, because this capability is
|
||||||
* only activated after this by kvmppc_set_papr() */
|
* only activated after this by kvmppc_set_papr() */
|
||||||
|
cap_htab_fd = kvm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
|
||||||
|
|
||||||
if (!cap_interrupt_level) {
|
if (!cap_interrupt_level) {
|
||||||
fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
|
fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
|
||||||
@ -1788,6 +1790,73 @@ static int kvm_ppc_register_host_cpu_type(void)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int kvmppc_get_htab_fd(bool write)
|
||||||
|
{
|
||||||
|
struct kvm_get_htab_fd s = {
|
||||||
|
.flags = write ? KVM_GET_HTAB_WRITE : 0,
|
||||||
|
.start_index = 0,
|
||||||
|
};
|
||||||
|
|
||||||
|
if (!cap_htab_fd) {
|
||||||
|
fprintf(stderr, "KVM version doesn't support saving the hash table\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
return kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
|
||||||
|
}
|
||||||
|
|
||||||
|
int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
|
||||||
|
{
|
||||||
|
int64_t starttime = qemu_get_clock_ns(rt_clock);
|
||||||
|
uint8_t buf[bufsize];
|
||||||
|
ssize_t rc;
|
||||||
|
|
||||||
|
do {
|
||||||
|
rc = read(fd, buf, bufsize);
|
||||||
|
if (rc < 0) {
|
||||||
|
fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
|
||||||
|
strerror(errno));
|
||||||
|
return rc;
|
||||||
|
} else if (rc) {
|
||||||
|
/* Kernel already retuns data in BE format for the file */
|
||||||
|
qemu_put_buffer(f, buf, rc);
|
||||||
|
}
|
||||||
|
} while ((rc != 0)
|
||||||
|
&& ((max_ns < 0)
|
||||||
|
|| ((qemu_get_clock_ns(rt_clock) - starttime) < max_ns)));
|
||||||
|
|
||||||
|
return (rc == 0) ? 1 : 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
|
||||||
|
uint16_t n_valid, uint16_t n_invalid)
|
||||||
|
{
|
||||||
|
struct kvm_get_htab_header *buf;
|
||||||
|
size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
|
||||||
|
ssize_t rc;
|
||||||
|
|
||||||
|
buf = alloca(chunksize);
|
||||||
|
/* This is KVM on ppc, so this is all big-endian */
|
||||||
|
buf->index = index;
|
||||||
|
buf->n_valid = n_valid;
|
||||||
|
buf->n_invalid = n_invalid;
|
||||||
|
|
||||||
|
qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
|
||||||
|
|
||||||
|
rc = write(fd, buf, chunksize);
|
||||||
|
if (rc < 0) {
|
||||||
|
fprintf(stderr, "Error writing KVM hash table: %s\n",
|
||||||
|
strerror(errno));
|
||||||
|
return rc;
|
||||||
|
}
|
||||||
|
if (rc != chunksize) {
|
||||||
|
/* We should never get a short write on a single chunk */
|
||||||
|
fprintf(stderr, "Short write, restoring KVM hash table\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
|
bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
|
||||||
{
|
{
|
||||||
return true;
|
return true;
|
||||||
|
@ -38,6 +38,10 @@ uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift);
|
|||||||
#endif /* !CONFIG_USER_ONLY */
|
#endif /* !CONFIG_USER_ONLY */
|
||||||
int kvmppc_fixup_cpu(PowerPCCPU *cpu);
|
int kvmppc_fixup_cpu(PowerPCCPU *cpu);
|
||||||
bool kvmppc_has_cap_epr(void);
|
bool kvmppc_has_cap_epr(void);
|
||||||
|
int kvmppc_get_htab_fd(bool write);
|
||||||
|
int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns);
|
||||||
|
int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
|
||||||
|
uint16_t n_valid, uint16_t n_invalid);
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
@ -159,6 +163,24 @@ static inline bool kvmppc_has_cap_epr(void)
|
|||||||
{
|
{
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline int kvmppc_get_htab_fd(bool write)
|
||||||
|
{
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize,
|
||||||
|
int64_t max_ns)
|
||||||
|
{
|
||||||
|
abort();
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
|
||||||
|
uint16_t n_valid, uint16_t n_invalid)
|
||||||
|
{
|
||||||
|
abort();
|
||||||
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef CONFIG_KVM
|
#ifndef CONFIG_KVM
|
||||||
|
Loading…
Reference in New Issue
Block a user