From 79e9ebebbf2a00c46fcedb6dc7dd5e12bbd30216 Mon Sep 17 00:00:00 2001 From: Liu Jinsong Date: Thu, 5 Dec 2013 08:32:12 +0800 Subject: [PATCH 1/9] target-i386: Intel MPX Add some MPX related definiation, and hardcode sizes and offsets of xsave features 3 and 4. It also add corresponding part to kvm_get/put_xsave, and vmstate. Signed-off-by: Liu Jinsong Signed-off-by: Paolo Bonzini --- target-i386/cpu.c | 4 ++++ target-i386/cpu.h | 25 ++++++++++++++++++--- target-i386/kvm.c | 24 ++++++++++++++++++++ target-i386/machine.c | 51 +++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 101 insertions(+), 3 deletions(-) diff --git a/target-i386/cpu.c b/target-i386/cpu.c index bb98f6defc..5076a94aa2 100644 --- a/target-i386/cpu.c +++ b/target-i386/cpu.c @@ -336,6 +336,10 @@ typedef struct ExtSaveArea { static const ExtSaveArea ext_save_areas[] = { [2] = { .feature = FEAT_1_ECX, .bits = CPUID_EXT_AVX, .offset = 0x240, .size = 0x100 }, + [3] = { .feature = FEAT_7_0_EBX, .bits = CPUID_7_0_EBX_MPX, + .offset = 0x3c0, .size = 0x40 }, + [4] = { .feature = FEAT_7_0_EBX, .bits = CPUID_7_0_EBX_MPX, + .offset = 0x400, .size = 0x10 }, }; const char *get_register_name_32(unsigned int reg) diff --git a/target-i386/cpu.h b/target-i386/cpu.h index ea373e82dc..bbec228679 100644 --- a/target-i386/cpu.h +++ b/target-i386/cpu.h @@ -380,9 +380,14 @@ #define MSR_VM_HSAVE_PA 0xc0010117 -#define XSTATE_FP 1 -#define XSTATE_SSE 2 -#define XSTATE_YMM 4 +#define MSR_IA32_BNDCFGS 0x00000d90 + +#define XSTATE_FP (1ULL << 0) +#define XSTATE_SSE (1ULL << 1) +#define XSTATE_YMM (1ULL << 2) +#define XSTATE_BNDREGS (1ULL << 3) +#define XSTATE_BNDCSR (1ULL << 4) + /* CPUID feature words */ typedef enum FeatureWord { @@ -545,6 +550,7 @@ typedef uint32_t FeatureWordArray[FEATURE_WORDS]; #define CPUID_7_0_EBX_ERMS (1 << 9) #define CPUID_7_0_EBX_INVPCID (1 << 10) #define CPUID_7_0_EBX_RTM (1 << 11) +#define CPUID_7_0_EBX_MPX (1 << 14) #define CPUID_7_0_EBX_RDSEED (1 << 18) #define CPUID_7_0_EBX_ADX (1 << 19) #define CPUID_7_0_EBX_SMAP (1 << 20) @@ -695,6 +701,16 @@ typedef union { uint64_t q; } MMXReg; +typedef struct BNDReg { + uint64_t lb; + uint64_t ub; +} BNDReg; + +typedef struct BNDCSReg { + uint64_t cfgu; + uint64_t sts; +} BNDCSReg; + #ifdef HOST_WORDS_BIGENDIAN #define XMM_B(n) _b[15 - (n)] #define XMM_W(n) _w[7 - (n)] @@ -912,6 +928,9 @@ typedef struct CPUX86State { uint64_t xstate_bv; XMMReg ymmh_regs[CPU_NB_REGS]; + BNDReg bnd_regs[4]; + BNDCSReg bndcs_regs; + uint64_t msr_bndcfgs; uint64_t xcr0; diff --git a/target-i386/kvm.c b/target-i386/kvm.c index 1188482359..01ebca24f0 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -69,6 +69,7 @@ static bool has_msr_feature_control; static bool has_msr_async_pf_en; static bool has_msr_pv_eoi_en; static bool has_msr_misc_enable; +static bool has_msr_bndcfgs; static bool has_msr_kvm_steal_time; static int lm_capable_kernel; @@ -772,6 +773,10 @@ static int kvm_get_supported_msrs(KVMState *s) has_msr_misc_enable = true; continue; } + if (kvm_msr_list->indices[i] == MSR_IA32_BNDCFGS) { + has_msr_bndcfgs = true; + continue; + } } } @@ -975,6 +980,8 @@ static int kvm_put_fpu(X86CPU *cpu) #define XSAVE_XMM_SPACE 40 #define XSAVE_XSTATE_BV 128 #define XSAVE_YMMH_SPACE 144 +#define XSAVE_BNDREGS 240 +#define XSAVE_BNDCSR 256 static int kvm_put_xsave(X86CPU *cpu) { @@ -1007,6 +1014,10 @@ static int kvm_put_xsave(X86CPU *cpu) *(uint64_t *)&xsave->region[XSAVE_XSTATE_BV] = env->xstate_bv; memcpy(&xsave->region[XSAVE_YMMH_SPACE], env->ymmh_regs, sizeof env->ymmh_regs); + memcpy(&xsave->region[XSAVE_BNDREGS], env->bnd_regs, + sizeof env->bnd_regs); + memcpy(&xsave->region[XSAVE_BNDCSR], &env->bndcs_regs, + sizeof(env->bndcs_regs)); r = kvm_vcpu_ioctl(CPU(cpu), KVM_SET_XSAVE, xsave); return r; } @@ -1208,6 +1219,9 @@ static int kvm_put_msrs(X86CPU *cpu, int level) kvm_msr_entry_set(&msrs[n++], MSR_IA32_FEATURE_CONTROL, env->msr_ia32_feature_control); } + if (has_msr_bndcfgs) { + kvm_msr_entry_set(&msrs[n++], MSR_IA32_BNDCFGS, env->msr_bndcfgs); + } } if (env->mcg_cap) { int i; @@ -1289,6 +1303,10 @@ static int kvm_get_xsave(X86CPU *cpu) env->xstate_bv = *(uint64_t *)&xsave->region[XSAVE_XSTATE_BV]; memcpy(env->ymmh_regs, &xsave->region[XSAVE_YMMH_SPACE], sizeof env->ymmh_regs); + memcpy(env->bnd_regs, &xsave->region[XSAVE_BNDREGS], + sizeof env->bnd_regs); + memcpy(&env->bndcs_regs, &xsave->region[XSAVE_BNDCSR], + sizeof(env->bndcs_regs)); return 0; } @@ -1435,6 +1453,9 @@ static int kvm_get_msrs(X86CPU *cpu) if (has_msr_feature_control) { msrs[n++].index = MSR_IA32_FEATURE_CONTROL; } + if (has_msr_bndcfgs) { + msrs[n++].index = MSR_IA32_BNDCFGS; + } if (!env->tsc_valid) { msrs[n++].index = MSR_IA32_TSC; @@ -1550,6 +1571,9 @@ static int kvm_get_msrs(X86CPU *cpu) case MSR_IA32_FEATURE_CONTROL: env->msr_ia32_feature_control = msrs[i].data; break; + case MSR_IA32_BNDCFGS: + env->msr_bndcfgs = msrs[i].data; + break; default: if (msrs[i].index >= MSR_MC0_CTL && msrs[i].index < MSR_MC0_CTL + (env->mcg_cap & 0xff) * 4) { diff --git a/target-i386/machine.c b/target-i386/machine.c index e568da2ba4..2de196428d 100644 --- a/target-i386/machine.c +++ b/target-i386/machine.c @@ -63,6 +63,21 @@ static const VMStateDescription vmstate_ymmh_reg = { #define VMSTATE_YMMH_REGS_VARS(_field, _state, _n, _v) \ VMSTATE_STRUCT_ARRAY(_field, _state, _n, _v, vmstate_ymmh_reg, XMMReg) +static const VMStateDescription vmstate_bnd_regs = { + .name = "bnd_regs", + .version_id = 1, + .minimum_version_id = 1, + .minimum_version_id_old = 1, + .fields = (VMStateField[]) { + VMSTATE_UINT64(lb, BNDReg), + VMSTATE_UINT64(ub, BNDReg), + VMSTATE_END_OF_LIST() + } +}; + +#define VMSTATE_BND_REGS(_field, _state, _n) \ + VMSTATE_STRUCT_ARRAY(_field, _state, _n, 0, vmstate_bnd_regs, BNDReg) + static const VMStateDescription vmstate_mtrr_var = { .name = "mtrr_var", .version_id = 1, @@ -506,6 +521,39 @@ static const VMStateDescription vmstate_msr_architectural_pmu = { } }; +static bool mpx_needed(void *opaque) +{ + X86CPU *cpu = opaque; + CPUX86State *env = &cpu->env; + unsigned int i; + + for (i = 0; i < 4; i++) { + if (env->bnd_regs[i].lb || env->bnd_regs[i].ub) { + return true; + } + } + + if (env->bndcs_regs.cfgu || env->bndcs_regs.sts) { + return true; + } + + return !!env->msr_bndcfgs; +} + +static const VMStateDescription vmstate_mpx = { + .name = "cpu/mpx", + .version_id = 1, + .minimum_version_id = 1, + .minimum_version_id_old = 1, + .fields = (VMStateField[]) { + VMSTATE_BND_REGS(env.bnd_regs, X86CPU, 4), + VMSTATE_UINT64(env.bndcs_regs.cfgu, X86CPU), + VMSTATE_UINT64(env.bndcs_regs.sts, X86CPU), + VMSTATE_UINT64(env.msr_bndcfgs, X86CPU), + VMSTATE_END_OF_LIST() + } +}; + const VMStateDescription vmstate_x86_cpu = { .name = "cpu", .version_id = 12, @@ -637,6 +685,9 @@ const VMStateDescription vmstate_x86_cpu = { }, { .vmsd = &vmstate_msr_architectural_pmu, .needed = pmu_enable_needed, + } , { + .vmsd = &vmstate_mpx, + .needed = mpx_needed, } , { /* empty */ } From f86746c263753cf7a7e4bdb8829c70272dfcf36c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fernando=20Luis=20V=C3=A1zquez=20Cao?= Date: Fri, 6 Dec 2013 17:38:24 +0900 Subject: [PATCH 2/9] target-i386: do not special case TSC writeback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Newer kernels are capable of synchronizing TSC values of multiple VCPUs on writeback, but we were excluding the power up case, which is not needed anymore. Signed-off-by: Fernando Luis Vazquez Cao Signed-off-by: Paolo Bonzini Signed-off-by: Fernando Luis Vázquez Cao --- target-i386/kvm.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/target-i386/kvm.c b/target-i386/kvm.c index 01ebca24f0..312a46bcb9 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -1151,15 +1151,7 @@ static int kvm_put_msrs(X86CPU *cpu, int level) } #endif if (level == KVM_PUT_FULL_STATE) { - /* - * KVM is yet unable to synchronize TSC values of multiple VCPUs on - * writeback. Until this is fixed, we only write the offset to SMP - * guests after migration, desynchronizing the VCPUs, but avoiding - * huge jump-backs that would occur without any writeback at all. - */ - if (smp_cpus == 1 || env->tsc != 0) { - kvm_msr_entry_set(&msrs[n++], MSR_IA32_TSC, env->tsc); - } + kvm_msr_entry_set(&msrs[n++], MSR_IA32_TSC, env->tsc); } /* * The following MSRs have side effects on the guest or are too heavy From 0522604b09b8cff54ba2450a7478da2a4d084817 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fernando=20Luis=20V=C3=A1zquez=20Cao?= Date: Fri, 6 Dec 2013 17:33:01 +0900 Subject: [PATCH 3/9] target-i386: clear guest TSC on reset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit VCPU TSC is not cleared by a warm reset (*), which leaves some types of Linux guests (non-pvops guests and those with the kernel parameter no-kvmclock set) vulnerable to the overflow in cyc2ns_offset fixed by upstream commit 9993bc635d01a6ee7f6b833b4ee65ce7c06350b1 ("sched/x86: Fix overflow in cyc2ns_offset"). To put it in a nutshell, if such a Linux guest without the patch above applied has been up more than 208 days and attempts a warm reset chances are that the newly booted kernel will panic or hang. (*) Intel Xeon E5 processors show the same broken behavior due to the errata "TSC is Not Affected by Warm Reset" (Intel® Xeon® Processor E5 Family Specification Update - August 2013): "The TSC (Time Stamp Counter MSR 10H) should be cleared on reset. Due to this erratum the TSC is not affected by warm reset." Cc: Will Auld Cc: Marcelo Tosatti Signed-off-by: Fernando Luis Vazquez Cao Signed-off-by: Paolo Bonzini Signed-off-by: Fernando Luis Vázquez Cao --- target-i386/cpu.c | 3 +++ target-i386/kvm.c | 4 +--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/target-i386/cpu.c b/target-i386/cpu.c index 5076a94aa2..bc4cb9d220 100644 --- a/target-i386/cpu.c +++ b/target-i386/cpu.c @@ -2450,6 +2450,9 @@ static void x86_cpu_reset(CPUState *s) cpu_breakpoint_remove_all(env, BP_CPU); cpu_watchpoint_remove_all(env, BP_CPU); + env->tsc_adjust = 0; + env->tsc = 0; + #if !defined(CONFIG_USER_ONLY) /* We hard-wire the BSP to the first CPU. */ if (s->cpu_index == 0) { diff --git a/target-i386/kvm.c b/target-i386/kvm.c index 312a46bcb9..285e1a3dc8 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -1150,14 +1150,12 @@ static int kvm_put_msrs(X86CPU *cpu, int level) kvm_msr_entry_set(&msrs[n++], MSR_LSTAR, env->lstar); } #endif - if (level == KVM_PUT_FULL_STATE) { - kvm_msr_entry_set(&msrs[n++], MSR_IA32_TSC, env->tsc); - } /* * The following MSRs have side effects on the guest or are too heavy * for normal writeback. Limit them to reset or full state updates. */ if (level >= KVM_PUT_RESET_STATE) { + kvm_msr_entry_set(&msrs[n++], MSR_IA32_TSC, env->tsc); kvm_msr_entry_set(&msrs[n++], MSR_KVM_SYSTEM_TIME, env->system_time_msr); kvm_msr_entry_set(&msrs[n++], MSR_KVM_WALL_CLOCK, env->wall_clock_msr); From a94b36ddd6af28593c8a5171b5100e6c3dfc063e Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Thu, 12 Dec 2013 10:29:19 +0100 Subject: [PATCH 4/9] roms: Flush icache when writing roms to guest memory We use the rom infrastructure to write firmware and/or initial kernel blobs into guest address space. So we're basically emulating the cache off phase on very early system bootup. That phase is usually responsible for clearing the instruction cache for anything it writes into cachable memory, to ensure that after reboot we don't happen to execute stale bits from the instruction cache. So we need to invalidate the icache every time we write a rom into guest address space. We do not need to do this for every DMA since the guest expects it has to flush the icache manually in that case. This fixes random reboot issues on e5500 (booke ppc) for me. Signed-off-by: Alexander Graf Signed-off-by: Paolo Bonzini --- exec.c | 44 ++++++++++++++++++++++++++++++++++----- hw/core/loader.c | 7 +++++++ include/exec/cpu-common.h | 1 + 3 files changed, 47 insertions(+), 5 deletions(-) diff --git a/exec.c b/exec.c index f4b9ef25f5..896f7b83fc 100644 --- a/exec.c +++ b/exec.c @@ -50,6 +50,7 @@ #include "translate-all.h" #include "exec/memory-internal.h" +#include "qemu/cache-utils.h" //#define DEBUG_SUBPAGE @@ -2010,9 +2011,13 @@ void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf, address_space_rw(&address_space_memory, addr, buf, len, is_write); } -/* used for ROM loading : can write in RAM and ROM */ -void cpu_physical_memory_write_rom(hwaddr addr, - const uint8_t *buf, int len) +enum write_rom_type { + WRITE_DATA, + FLUSH_CACHE, +}; + +static inline void cpu_physical_memory_write_rom_internal( + hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type) { hwaddr l; uint8_t *ptr; @@ -2031,8 +2036,15 @@ void cpu_physical_memory_write_rom(hwaddr addr, addr1 += memory_region_get_ram_addr(mr); /* ROM/RAM case */ ptr = qemu_get_ram_ptr(addr1); - memcpy(ptr, buf, l); - invalidate_and_set_dirty(addr1, l); + switch (type) { + case WRITE_DATA: + memcpy(ptr, buf, l); + invalidate_and_set_dirty(addr1, l); + break; + case FLUSH_CACHE: + flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l); + break; + } } len -= l; buf += l; @@ -2040,6 +2052,28 @@ void cpu_physical_memory_write_rom(hwaddr addr, } } +/* used for ROM loading : can write in RAM and ROM */ +void cpu_physical_memory_write_rom(hwaddr addr, + const uint8_t *buf, int len) +{ + cpu_physical_memory_write_rom_internal(addr, buf, len, WRITE_DATA); +} + +void cpu_flush_icache_range(hwaddr start, int len) +{ + /* + * This function should do the same thing as an icache flush that was + * triggered from within the guest. For TCG we are always cache coherent, + * so there is no need to flush anything. For KVM / Xen we need to flush + * the host's instruction cache at least. + */ + if (tcg_enabled()) { + return; + } + + cpu_physical_memory_write_rom_internal(start, NULL, len, FLUSH_CACHE); +} + typedef struct { MemoryRegion *mr; void *buffer; diff --git a/hw/core/loader.c b/hw/core/loader.c index 60d2ebd4ac..0634bee20c 100644 --- a/hw/core/loader.c +++ b/hw/core/loader.c @@ -785,6 +785,13 @@ static void rom_reset(void *unused) g_free(rom->data); rom->data = NULL; } + /* + * The rom loader is really on the same level as firmware in the guest + * shadowing a ROM into RAM. Such a shadowing mechanism needs to ensure + * that the instruction cache for that new region is clear, so that the + * CPU definitely fetches its instructions from the just written data. + */ + cpu_flush_icache_range(rom->addr, rom->datasize); } } diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h index e4996e19c3..8f33122c9f 100644 --- a/include/exec/cpu-common.h +++ b/include/exec/cpu-common.h @@ -110,6 +110,7 @@ void stq_phys(hwaddr addr, uint64_t val); void cpu_physical_memory_write_rom(hwaddr addr, const uint8_t *buf, int len); +void cpu_flush_icache_range(hwaddr start, int len); extern struct MemoryRegion io_mem_rom; extern struct MemoryRegion io_mem_notdirty; From 6bdf863d942a267f984e4bd82be80cb2ac5b9915 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Tue, 17 Dec 2013 20:05:13 +0100 Subject: [PATCH 5/9] kvm: x86: Separately write feature control MSR on reset If the guest is running in nested mode on system reset, clearing the feature MSR signals the kernel to leave this mode. Recent kernels processes this properly, but leave the VCPU state undefined behind. It is the job of userspace to bring it to a proper shape. Therefore, write this specific MSR first so that no state transfer gets lost. This allows to cleanly reset a guest with VMX in use. Signed-off-by: Jan Kiszka Signed-off-by: Paolo Bonzini --- target-i386/kvm.c | 33 +++++++++++++++++++++++++++++---- 1 file changed, 29 insertions(+), 4 deletions(-) diff --git a/target-i386/kvm.c b/target-i386/kvm.c index 285e1a3dc8..221c8a036e 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -1115,6 +1115,25 @@ static int kvm_put_tscdeadline_msr(X86CPU *cpu) return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_MSRS, &msr_data); } +/* + * Provide a separate write service for the feature control MSR in order to + * kick the VCPU out of VMXON or even guest mode on reset. This has to be done + * before writing any other state because forcibly leaving nested mode + * invalidates the VCPU state. + */ +static int kvm_put_msr_feature_control(X86CPU *cpu) +{ + struct { + struct kvm_msrs info; + struct kvm_msr_entry entry; + } msr_data; + + kvm_msr_entry_set(&msr_data.entry, MSR_IA32_FEATURE_CONTROL, + cpu->env.msr_ia32_feature_control); + msr_data.info.nmsrs = 1; + return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_MSRS, &msr_data); +} + static int kvm_put_msrs(X86CPU *cpu, int level) { CPUX86State *env = &cpu->env; @@ -1205,13 +1224,12 @@ static int kvm_put_msrs(X86CPU *cpu, int level) if (cpu->hyperv_vapic) { kvm_msr_entry_set(&msrs[n++], HV_X64_MSR_APIC_ASSIST_PAGE, 0); } - if (has_msr_feature_control) { - kvm_msr_entry_set(&msrs[n++], MSR_IA32_FEATURE_CONTROL, - env->msr_ia32_feature_control); - } if (has_msr_bndcfgs) { kvm_msr_entry_set(&msrs[n++], MSR_IA32_BNDCFGS, env->msr_bndcfgs); } + + /* Note: MSR_IA32_FEATURE_CONTROL is written separately, see + * kvm_put_msr_feature_control. */ } if (env->mcg_cap) { int i; @@ -1815,6 +1833,13 @@ int kvm_arch_put_registers(CPUState *cpu, int level) assert(cpu_is_stopped(cpu) || qemu_cpu_is_self(cpu)); + if (level >= KVM_PUT_RESET_STATE && has_msr_feature_control) { + ret = kvm_put_msr_feature_control(x86_cpu); + if (ret < 0) { + return ret; + } + } + ret = kvm_getput_regs(x86_cpu, 1); if (ret < 0) { return ret; From 2ba82852894c762299b7d05e9a2be184116b80f0 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Wed, 18 Dec 2013 16:42:17 -0200 Subject: [PATCH 6/9] mempath prefault: fix off-by-one error Fix off-by-one error (noticed by Andrea Arcangeli). Reviewed-by: Paolo Bonzini Signed-off-by: Marcelo Tosatti --- exec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exec.c b/exec.c index 896f7b83fc..52d451b547 100644 --- a/exec.c +++ b/exec.c @@ -1001,7 +1001,7 @@ static void *file_ram_alloc(RAMBlock *block, } /* MAP_POPULATE silently ignores failures */ - for (i = 0; i < (memory/hpagesize)-1; i++) { + for (i = 0; i < (memory/hpagesize); i++) { memset(area + (hpagesize*i), 0, 1); } From 94ccff133820552a859c0fb95e33a539e0b90a75 Mon Sep 17 00:00:00 2001 From: thomas knych Date: Thu, 9 Jan 2014 13:14:23 -0800 Subject: [PATCH 7/9] KVM: Retry KVM_CREATE_VM on EINTR Upstreaming this change from Android (https://android-review.googlesource.com/54211). On heavily loaded machines with many VM instances we see KVM_CREATE_VM failing with EINTR on this path: kvm_dev_ioctl_create_vm -> kvm_create_vm -> kvm_init_mmu_notifier -> mmu_notifier_register -> do_mmu_notifier_register -> mm_take_all_locks which checks if any signals have been raised while it was attaining locks and returns EINTR. Retrying the system call greatly improves reliability. Cc: qemu-stable@nongnu.org Signed-off-by: thomas knych Signed-off-by: Paolo Bonzini --- kvm-all.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/kvm-all.c b/kvm-all.c index 393775459d..6df2ee1635 100644 --- a/kvm-all.c +++ b/kvm-all.c @@ -1442,16 +1442,22 @@ int kvm_init(void) nc++; } - s->vmfd = kvm_ioctl(s, KVM_CREATE_VM, 0); - if (s->vmfd < 0) { + do { + ret = kvm_ioctl(s, KVM_CREATE_VM, 0); + } while (ret == -EINTR); + + if (ret < 0) { + fprintf(stderr, "ioctl(KVM_CREATE_VM) failed: %d %s\n", -s->vmfd, + strerror(-ret)); + #ifdef TARGET_S390X fprintf(stderr, "Please add the 'switch_amode' kernel parameter to " "your host kernel command line\n"); #endif - ret = s->vmfd; goto err; } + s->vmfd = ret; missing_cap = kvm_check_extension_list(s, kvm_required_capabilites); if (!missing_cap) { missing_cap = From 584f2be79de148b0765a758ac0c1036a29c5e830 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Fri, 10 Jan 2014 18:20:18 +1100 Subject: [PATCH 8/9] KVM: fix addr type for KVM_IOEVENTFD The @addr here is a guest physical address and can easily be bigger than 4G. This changes uint32_t to hwaddr. Cc: qemu-stable@nongnu.org Cc: Michael S. Tsirkin Signed-off-by: Alexey Kardashevskiy Signed-off-by: Paolo Bonzini --- kvm-all.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kvm-all.c b/kvm-all.c index 6df2ee1635..eb38ee4eb9 100644 --- a/kvm-all.c +++ b/kvm-all.c @@ -519,7 +519,7 @@ int kvm_check_extension(KVMState *s, unsigned int extension) return ret; } -static int kvm_set_ioeventfd_mmio(int fd, uint32_t addr, uint32_t val, +static int kvm_set_ioeventfd_mmio(int fd, hwaddr addr, uint32_t val, bool assign, uint32_t size, bool datamatch) { int ret; From 439d19f2922ac409ee224bc1e5522cee7009d829 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 20 Jan 2014 14:22:25 +0100 Subject: [PATCH 9/9] kvm: always update the MPX model specific register The original patch from Liu Jinsong restricted them to reset or full state updates, but that's unnecessary (and wrong) since the BNDCFGS MSR has no side effects. Cc: Liu Jinsong Signed-off-by: Paolo Bonzini --- target-i386/kvm.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/target-i386/kvm.c b/target-i386/kvm.c index 221c8a036e..d34981fd63 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -1161,6 +1161,9 @@ static int kvm_put_msrs(X86CPU *cpu, int level) kvm_msr_entry_set(&msrs[n++], MSR_IA32_MISC_ENABLE, env->msr_ia32_misc_enable); } + if (has_msr_bndcfgs) { + kvm_msr_entry_set(&msrs[n++], MSR_IA32_BNDCFGS, env->msr_bndcfgs); + } #ifdef TARGET_X86_64 if (lm_capable_kernel) { kvm_msr_entry_set(&msrs[n++], MSR_CSTAR, env->cstar); @@ -1224,9 +1227,6 @@ static int kvm_put_msrs(X86CPU *cpu, int level) if (cpu->hyperv_vapic) { kvm_msr_entry_set(&msrs[n++], HV_X64_MSR_APIC_ASSIST_PAGE, 0); } - if (has_msr_bndcfgs) { - kvm_msr_entry_set(&msrs[n++], MSR_IA32_BNDCFGS, env->msr_bndcfgs); - } /* Note: MSR_IA32_FEATURE_CONTROL is written separately, see * kvm_put_msr_feature_control. */