Merge remote-tracking branch 'remotes/kvm/uq/master' into staging

* remotes/kvm/uq/master:
  kvm: Fix eax for cpuid leaf 0x40000000
  kvmclock: Ensure proper env->tsc value for kvmclock_current_nsec calculation
  kvm: Enable -cpu option to hide KVM
  kvm: Ensure negative return value on kvm_init() error handling path
  target-i386: set CC_OP to CC_OP_EFLAGS in cpu_load_eflags
  target-i386: get CPL from SS.DPL
  target-i386: rework CPL checks during task switch, preparing for next patch
  target-i386: fix segment flags for SMM and VM86 mode
  target-i386: Fix vm86 mode regression introduced in fd460606fd.
  kvm_stat: allow choosing between tracepoints and old stats
  kvmclock: Ensure time in migration never goes backward

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
Peter Maydell 2014-06-05 19:16:28 +01:00
commit 9f0355b590
14 changed files with 175 additions and 77 deletions

View File

@ -1004,7 +1004,7 @@ int main(int argc, char **argv)
#if defined(TARGET_I386)
env->cr[0] = CR0_PG_MASK | CR0_WP_MASK | CR0_PE_MASK;
env->hflags |= HF_PE_MASK;
env->hflags |= HF_PE_MASK | HF_CPL_MASK;
if (env->features[FEAT_1_EDX] & CPUID_SSE) {
env->cr[4] |= CR4_OSFXSR_MASK;
env->hflags |= HF_OSFXSR_MASK;

View File

@ -14,8 +14,10 @@
*/
#include "qemu-common.h"
#include "qemu/host-utils.h"
#include "sysemu/sysemu.h"
#include "sysemu/kvm.h"
#include "sysemu/cpus.h"
#include "hw/sysbus.h"
#include "hw/kvm/clock.h"
@ -34,6 +36,48 @@ typedef struct KVMClockState {
bool clock_valid;
} KVMClockState;
struct pvclock_vcpu_time_info {
uint32_t version;
uint32_t pad0;
uint64_t tsc_timestamp;
uint64_t system_time;
uint32_t tsc_to_system_mul;
int8_t tsc_shift;
uint8_t flags;
uint8_t pad[2];
} __attribute__((__packed__)); /* 32 bytes */
static uint64_t kvmclock_current_nsec(KVMClockState *s)
{
CPUState *cpu = first_cpu;
CPUX86State *env = cpu->env_ptr;
hwaddr kvmclock_struct_pa = env->system_time_msr & ~1ULL;
uint64_t migration_tsc = env->tsc;
struct pvclock_vcpu_time_info time;
uint64_t delta;
uint64_t nsec_lo;
uint64_t nsec_hi;
uint64_t nsec;
if (!(env->system_time_msr & 1ULL)) {
/* KVM clock not active */
return 0;
}
cpu_physical_memory_read(kvmclock_struct_pa, &time, sizeof(time));
assert(time.tsc_timestamp <= migration_tsc);
delta = migration_tsc - time.tsc_timestamp;
if (time.tsc_shift < 0) {
delta >>= -time.tsc_shift;
} else {
delta <<= time.tsc_shift;
}
mulu64(&nsec_lo, &nsec_hi, delta, time.tsc_to_system_mul);
nsec = (nsec_lo >> 32) | (nsec_hi << 32);
return nsec + time.system_time;
}
static void kvmclock_vm_state_change(void *opaque, int running,
RunState state)
@ -45,9 +89,15 @@ static void kvmclock_vm_state_change(void *opaque, int running,
if (running) {
struct kvm_clock_data data;
uint64_t time_at_migration = kvmclock_current_nsec(s);
s->clock_valid = false;
/* We can't rely on the migrated clock value, just discard it */
if (time_at_migration) {
s->clock = time_at_migration;
}
data.clock = s->clock;
data.flags = 0;
ret = kvm_vm_ioctl(kvm_state, KVM_SET_CLOCK, &data);
@ -75,6 +125,8 @@ static void kvmclock_vm_state_change(void *opaque, int running,
if (s->clock_valid) {
return;
}
cpu_synchronize_all_states();
ret = kvm_vm_ioctl(kvm_state, KVM_GET_CLOCK, &data);
if (ret < 0) {
fprintf(stderr, "KVM_GET_CLOCK failed: %s\n", strerror(ret));

View File

@ -1410,7 +1410,7 @@ int kvm_init(MachineClass *mc)
ret = kvm_ioctl(s, KVM_GET_API_VERSION, 0);
if (ret < KVM_API_VERSION) {
if (ret > 0) {
if (ret >= 0) {
ret = -EINVAL;
}
fprintf(stderr, "kvm version too old\n");
@ -1461,6 +1461,7 @@ int kvm_init(MachineClass *mc)
if (mc->kvm_type) {
type = mc->kvm_type(kvm_type);
} else if (kvm_type) {
ret = -EINVAL;
fprintf(stderr, "Invalid argument kvm-type=%s\n", kvm_type);
goto err;
}
@ -1561,6 +1562,7 @@ int kvm_init(MachineClass *mc)
return 0;
err:
assert(ret < 0);
if (s->vmfd >= 0) {
close(s->vmfd);
}

View File

@ -4052,7 +4052,7 @@ int main(int argc, char **argv, char **envp)
#if defined(TARGET_I386)
env->cr[0] = CR0_PG_MASK | CR0_WP_MASK | CR0_PE_MASK;
env->hflags |= HF_PE_MASK;
env->hflags |= HF_PE_MASK | HF_CPL_MASK;
if (env->features[FEAT_1_EDX] & CPUID_SSE) {
env->cr[4] |= CR4_OSFXSR_MASK;
env->hflags |= HF_OSFXSR_MASK;

View File

@ -352,8 +352,8 @@ class TracepointProvider(object):
return ret
class Stats:
def __init__(self, provider, fields = None):
self.provider = provider
def __init__(self, providers, fields = None):
self.providers = providers
self.fields_filter = fields
self._update()
def _update(self):
@ -362,16 +362,19 @@ class Stats:
if not self.fields_filter:
return True
return re.match(self.fields_filter, key) is not None
self.values = dict([(key, None)
for key in provider.fields()
if wanted(key)])
self.provider.select(self.values.keys())
self.values = dict()
for d in providers:
provider_fields = [key for key in d.fields() if wanted(key)]
for key in provider_fields:
self.values[key] = None
d.select(provider_fields)
def set_fields_filter(self, fields_filter):
self.fields_filter = fields_filter
self._update()
def get(self):
new = self.provider.read()
for key in self.provider.fields():
for d in providers:
new = d.read()
for key in d.fields():
oldval = self.values.get(key, (0, 0))
newval = new[key]
newdelta = None
@ -487,6 +490,18 @@ options.add_option('-l', '--log',
dest = 'log',
help = 'run in logging mode (like vmstat)',
)
options.add_option('-t', '--tracepoints',
action = 'store_true',
default = False,
dest = 'tracepoints',
help = 'retrieve statistics from tracepoints',
)
options.add_option('-d', '--debugfs',
action = 'store_true',
default = False,
dest = 'debugfs',
help = 'retrieve statistics from debugfs',
)
options.add_option('-f', '--fields',
action = 'store',
default = None,
@ -495,12 +510,19 @@ options.add_option('-f', '--fields',
)
(options, args) = options.parse_args(sys.argv)
try:
provider = TracepointProvider()
except:
provider = DebugfsProvider()
providers = []
if options.tracepoints:
providers.append(TracepointProvider())
if options.debugfs:
providers.append(DebugfsProvider())
stats = Stats(provider, fields = options.fields)
if len(providers) == 0:
try:
providers = [TracepointProvider()]
except:
providers = [DebugfsProvider()]
stats = Stats(providers, fields = options.fields)
if options.log:
log(stats)

View File

@ -87,6 +87,7 @@ typedef struct X86CPU {
bool hyperv_time;
bool check_cpuid;
bool enforce_cpuid;
bool expose_kvm;
/* if true the CPUID code directly forward host cache leaves to the guest */
bool cache_info_passthrough;

View File

@ -2792,6 +2792,7 @@ static Property x86_cpu_properties[] = {
DEFINE_PROP_BOOL("hv-time", X86CPU, hyperv_time, false),
DEFINE_PROP_BOOL("check", X86CPU, check_cpuid, false),
DEFINE_PROP_BOOL("enforce", X86CPU, enforce_cpuid, false),
DEFINE_PROP_BOOL("kvm", X86CPU, expose_kvm, true),
DEFINE_PROP_END_OF_LIST()
};

View File

@ -986,7 +986,6 @@ static inline void cpu_x86_load_seg_cache(CPUX86State *env,
/* update the hidden flags */
{
if (seg_reg == R_CS) {
int cpl = selector & 3;
#ifdef TARGET_X86_64
if ((env->hflags & HF_LMA_MASK) && (flags & DESC_L_MASK)) {
/* long mode */
@ -996,15 +995,14 @@ static inline void cpu_x86_load_seg_cache(CPUX86State *env,
#endif
{
/* legacy / compatibility case */
if (!(env->cr[0] & CR0_PE_MASK))
cpl = 0;
else if (env->eflags & VM_MASK)
cpl = 3;
new_hflags = (env->segs[R_CS].flags & DESC_B_MASK)
>> (DESC_B_SHIFT - HF_CS32_SHIFT);
env->hflags = (env->hflags & ~(HF_CS32_MASK | HF_CS64_MASK)) |
new_hflags;
}
}
if (seg_reg == R_SS) {
int cpl = (flags >> DESC_DPL_SHIFT) & 3;
#if HF_CPL_MASK != 3
#error HF_CPL_MASK is hardcoded
#endif
@ -1234,11 +1232,14 @@ static inline uint32_t cpu_compute_eflags(CPUX86State *env)
return env->eflags | cpu_cc_compute_all(env, CC_OP) | (env->df & DF_MASK);
}
/* NOTE: CC_OP must be modified manually to CC_OP_EFLAGS */
/* NOTE: the translator must set DisasContext.cc_op to CC_OP_EFLAGS
* after generating a call to a helper that uses this.
*/
static inline void cpu_load_eflags(CPUX86State *env, int eflags,
int update_mask)
{
CC_SRC = eflags & (CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C);
CC_OP = CC_OP_EFLAGS;
env->df = 1 - (2 * ((eflags >> 10) & 1));
env->eflags = (env->eflags & ~update_mask) |
(eflags & update_mask) | 0x2;

View File

@ -127,9 +127,11 @@ static int x86_cpu_gdb_load_seg(X86CPU *cpu, int sreg, uint8_t *mem_buf)
target_ulong base;
if (!(env->cr[0] & CR0_PE_MASK) || (env->eflags & VM_MASK)) {
int dpl = (env->eflags & VM_MASK) ? 3 : 0;
base = selector << 4;
limit = 0xffff;
flags = 0;
flags = DESC_P_MASK | DESC_S_MASK | DESC_W_MASK |
DESC_A_MASK | (dpl << DESC_DPL_SHIFT);
} else {
if (!cpu_x86_get_descr_debug(env, selector, &base, &limit,
&flags)) {

View File

@ -528,10 +528,11 @@ int kvm_arch_init_vcpu(CPUState *cs)
has_msr_hv_hypercall = true;
}
if (cpu->expose_kvm) {
memcpy(signature, "KVMKVMKVM\0\0\0", 12);
c = &cpuid_data.entries[cpuid_i++];
c->function = KVM_CPUID_SIGNATURE | kvm_base;
c->eax = 0;
c->eax = KVM_CPUID_FEATURES | kvm_base;
c->ebx = signature[0];
c->ecx = signature[1];
c->edx = signature[2];
@ -545,6 +546,7 @@ int kvm_arch_init_vcpu(CPUState *cs)
has_msr_pv_eoi_en = c->eax & (1 << KVM_FEATURE_PV_EOI);
has_msr_kvm_steal_time = c->eax & (1 << KVM_FEATURE_STEAL_TIME);
}
cpu_x86_cpuid(env, 0, 0, &limit, &unused, &unused, &unused);
@ -1430,7 +1432,7 @@ static int kvm_get_sregs(X86CPU *cpu)
HF_OSFXSR_MASK | HF_LMA_MASK | HF_CS32_MASK | \
HF_SS32_MASK | HF_CS64_MASK | HF_ADDSEG_MASK)
hflags = (env->segs[R_CS].flags >> DESC_DPL_SHIFT) & HF_CPL_MASK;
hflags = (env->segs[R_SS].flags >> DESC_DPL_SHIFT) & HF_CPL_MASK;
hflags |= (env->cr[0] & CR0_PE_MASK) << (HF_PE_SHIFT - CR0_PE_SHIFT);
hflags |= (env->cr[0] << (HF_MP_SHIFT - CR0_MP_SHIFT)) &
(HF_MP_MASK | HF_EM_MASK | HF_TS_MASK);

View File

@ -312,6 +312,14 @@ static int cpu_post_load(void *opaque, int version_id)
env->segs[R_SS].flags &= ~(env->segs[R_SS].flags & DESC_DPL_MASK);
}
/* Older versions of QEMU incorrectly used CS.DPL as the CPL when
* running under KVM. This is wrong for conforming code segments.
* Luckily, in our implementation the CPL field of hflags is redundant
* and we can get the right value from the SS descriptor privilege level.
*/
env->hflags &= ~HF_CPL_MASK;
env->hflags |= (env->segs[R_SS].flags >> DESC_DPL_SHIFT) & HF_CPL_MASK;
/* XXX: restore FPU round state */
env->fpstt = (env->fpus_vmstate >> 11) & 7;
env->fpus = env->fpus_vmstate & ~0x3800;

View File

@ -88,8 +88,10 @@ static inline void load_seg_cache_raw_dt(SegmentCache *sc, uint32_t e1,
static inline void load_seg_vm(CPUX86State *env, int seg, int selector)
{
selector &= 0xffff;
cpu_x86_load_seg_cache(env, seg, selector,
(selector << 4), 0xffff, 0);
cpu_x86_load_seg_cache(env, seg, selector, (selector << 4), 0xffff,
DESC_P_MASK | DESC_S_MASK | DESC_W_MASK |
DESC_A_MASK | (3 << DESC_DPL_SHIFT));
}
static inline void get_ss_esp_from_tss(CPUX86State *env, uint32_t *ss_ptr,
@ -133,11 +135,10 @@ static inline void get_ss_esp_from_tss(CPUX86State *env, uint32_t *ss_ptr,
}
}
/* XXX: merge with load_seg() */
static void tss_load_seg(CPUX86State *env, int seg_reg, int selector)
static void tss_load_seg(CPUX86State *env, int seg_reg, int selector, int cpl)
{
uint32_t e1, e2;
int rpl, dpl, cpl;
int rpl, dpl;
if ((selector & 0xfffc) != 0) {
if (load_segment(env, &e1, &e2, selector) != 0) {
@ -148,18 +149,13 @@ static void tss_load_seg(CPUX86State *env, int seg_reg, int selector)
}
rpl = selector & 3;
dpl = (e2 >> DESC_DPL_SHIFT) & 3;
cpl = env->hflags & HF_CPL_MASK;
if (seg_reg == R_CS) {
if (!(e2 & DESC_CS_MASK)) {
raise_exception_err(env, EXCP0A_TSS, selector & 0xfffc);
}
/* XXX: is it correct? */
if (dpl != rpl) {
raise_exception_err(env, EXCP0A_TSS, selector & 0xfffc);
}
if ((e2 & DESC_C_MASK) && dpl > rpl) {
raise_exception_err(env, EXCP0A_TSS, selector & 0xfffc);
}
} else if (seg_reg == R_SS) {
/* SS must be writable data */
if ((e2 & DESC_CS_MASK) || !(e2 & DESC_W_MASK)) {
@ -446,12 +442,13 @@ static void switch_tss(CPUX86State *env, int tss_selector,
/* load the segments */
if (!(new_eflags & VM_MASK)) {
tss_load_seg(env, R_CS, new_segs[R_CS]);
tss_load_seg(env, R_SS, new_segs[R_SS]);
tss_load_seg(env, R_ES, new_segs[R_ES]);
tss_load_seg(env, R_DS, new_segs[R_DS]);
tss_load_seg(env, R_FS, new_segs[R_FS]);
tss_load_seg(env, R_GS, new_segs[R_GS]);
int cpl = new_segs[R_CS] & 3;
tss_load_seg(env, R_CS, new_segs[R_CS], cpl);
tss_load_seg(env, R_SS, new_segs[R_SS], cpl);
tss_load_seg(env, R_ES, new_segs[R_ES], cpl);
tss_load_seg(env, R_DS, new_segs[R_DS], cpl);
tss_load_seg(env, R_FS, new_segs[R_FS], cpl);
tss_load_seg(env, R_GS, new_segs[R_GS], cpl);
}
/* check that env->eip is in the CS segment limits */
@ -558,6 +555,7 @@ static void do_interrupt_protected(CPUX86State *env, int intno, int is_int,
int has_error_code, new_stack, shift;
uint32_t e1, e2, offset, ss = 0, esp, ss_e1 = 0, ss_e2 = 0;
uint32_t old_eip, sp_mask;
int vm86 = env->eflags & VM_MASK;
has_error_code = 0;
if (!is_int && !is_hw) {
@ -673,7 +671,7 @@ static void do_interrupt_protected(CPUX86State *env, int intno, int is_int,
ssp = get_seg_base(ss_e1, ss_e2);
} else if ((e2 & DESC_C_MASK) || dpl == cpl) {
/* to same privilege */
if (env->eflags & VM_MASK) {
if (vm86) {
raise_exception_err(env, EXCP0D_GPF, selector & 0xfffc);
}
new_stack = 0;
@ -694,14 +692,14 @@ static void do_interrupt_protected(CPUX86State *env, int intno, int is_int,
#if 0
/* XXX: check that enough room is available */
push_size = 6 + (new_stack << 2) + (has_error_code << 1);
if (env->eflags & VM_MASK) {
if (vm86) {
push_size += 8;
}
push_size <<= shift;
#endif
if (shift == 1) {
if (new_stack) {
if (env->eflags & VM_MASK) {
if (vm86) {
PUSHL(ssp, esp, sp_mask, env->segs[R_GS].selector);
PUSHL(ssp, esp, sp_mask, env->segs[R_FS].selector);
PUSHL(ssp, esp, sp_mask, env->segs[R_DS].selector);
@ -718,7 +716,7 @@ static void do_interrupt_protected(CPUX86State *env, int intno, int is_int,
}
} else {
if (new_stack) {
if (env->eflags & VM_MASK) {
if (vm86) {
PUSHW(ssp, esp, sp_mask, env->segs[R_GS].selector);
PUSHW(ssp, esp, sp_mask, env->segs[R_FS].selector);
PUSHW(ssp, esp, sp_mask, env->segs[R_DS].selector);
@ -742,7 +740,7 @@ static void do_interrupt_protected(CPUX86State *env, int intno, int is_int,
env->eflags &= ~(TF_MASK | VM_MASK | RF_MASK | NT_MASK);
if (new_stack) {
if (env->eflags & VM_MASK) {
if (vm86) {
cpu_x86_load_seg_cache(env, R_ES, 0, 0, 0, 0);
cpu_x86_load_seg_cache(env, R_DS, 0, 0, 0, 0);
cpu_x86_load_seg_cache(env, R_FS, 0, 0, 0, 0);
@ -1600,7 +1598,6 @@ void helper_ljmp_protected(CPUX86State *env, int new_cs, target_ulong new_eip,
}
next_eip = env->eip + next_eip_addend;
switch_tss(env, new_cs, e1, e2, SWITCH_TSS_JMP, next_eip);
CC_OP = CC_OP_EFLAGS;
break;
case 4: /* 286 call gate */
case 12: /* 386 call gate */
@ -1769,7 +1766,6 @@ void helper_lcall_protected(CPUX86State *env, int new_cs, target_ulong new_eip,
raise_exception_err(env, EXCP0D_GPF, new_cs & 0xfffc);
}
switch_tss(env, new_cs, e1, e2, SWITCH_TSS_CALL, next_eip);
CC_OP = CC_OP_EFLAGS;
return;
case 4: /* 286 call gate */
case 12: /* 386 call gate */
@ -2464,9 +2460,12 @@ void helper_verw(CPUX86State *env, target_ulong selector1)
void cpu_x86_load_seg(CPUX86State *env, int seg_reg, int selector)
{
if (!(env->cr[0] & CR0_PE_MASK) || (env->eflags & VM_MASK)) {
int dpl = (env->eflags & VM_MASK) ? 3 : 0;
selector &= 0xffff;
cpu_x86_load_seg_cache(env, seg_reg, selector,
(selector << 4), 0xffff, 0);
(selector << 4), 0xffff,
DESC_P_MASK | DESC_S_MASK | DESC_W_MASK |
DESC_A_MASK | (dpl << DESC_DPL_SHIFT));
} else {
helper_load_seg(env, seg_reg, selector);
}

View File

@ -168,15 +168,26 @@ void do_smm_enter(X86CPU *cpu)
CR0_PG_MASK));
cpu_x86_update_cr4(env, 0);
env->dr[7] = 0x00000400;
CC_OP = CC_OP_EFLAGS;
cpu_x86_load_seg_cache(env, R_CS, (env->smbase >> 4) & 0xffff, env->smbase,
0xffffffff, 0);
cpu_x86_load_seg_cache(env, R_DS, 0, 0, 0xffffffff, 0);
cpu_x86_load_seg_cache(env, R_ES, 0, 0, 0xffffffff, 0);
cpu_x86_load_seg_cache(env, R_SS, 0, 0, 0xffffffff, 0);
cpu_x86_load_seg_cache(env, R_FS, 0, 0, 0xffffffff, 0);
cpu_x86_load_seg_cache(env, R_GS, 0, 0, 0xffffffff, 0);
0xffffffff,
DESC_P_MASK | DESC_S_MASK | DESC_W_MASK |
DESC_A_MASK);
cpu_x86_load_seg_cache(env, R_DS, 0, 0, 0xffffffff,
DESC_P_MASK | DESC_S_MASK | DESC_W_MASK |
DESC_A_MASK);
cpu_x86_load_seg_cache(env, R_ES, 0, 0, 0xffffffff,
DESC_P_MASK | DESC_S_MASK | DESC_W_MASK |
DESC_A_MASK);
cpu_x86_load_seg_cache(env, R_SS, 0, 0, 0xffffffff,
DESC_P_MASK | DESC_S_MASK | DESC_W_MASK |
DESC_A_MASK);
cpu_x86_load_seg_cache(env, R_FS, 0, 0, 0xffffffff,
DESC_P_MASK | DESC_S_MASK | DESC_W_MASK |
DESC_A_MASK);
cpu_x86_load_seg_cache(env, R_GS, 0, 0, 0xffffffff,
DESC_P_MASK | DESC_S_MASK | DESC_W_MASK |
DESC_A_MASK);
}
void helper_rsm(CPUX86State *env)
@ -296,7 +307,6 @@ void helper_rsm(CPUX86State *env)
env->smbase = ldl_phys(cs->as, sm_state + 0x7ef8) & ~0x7fff;
}
#endif
CC_OP = CC_OP_EFLAGS;
env->hflags &= ~HF_SMM_MASK;
cpu_smm_update(env);

View File

@ -260,7 +260,6 @@ void helper_vmrun(CPUX86State *env, int aflag, int next_eip_addend)
env->vm_vmcb + offsetof(struct vmcb,
save.rflags)),
~(CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C | DF_MASK));
CC_OP = CC_OP_EFLAGS;
svm_load_seg_cache(env, env->vm_vmcb + offsetof(struct vmcb, save.es),
R_ES);
@ -702,7 +701,6 @@ void helper_vmexit(CPUX86State *env, uint32_t exit_code, uint64_t exit_info_1)
save.rflags)),
~(CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C | DF_MASK |
VM_MASK));
CC_OP = CC_OP_EFLAGS;
svm_load_seg_cache(env, env->vm_hsave + offsetof(struct vmcb, save.es),
R_ES);