* VMX feature fix (myself)

* HVF fixes (Cameron)
 -----BEGIN PGP SIGNATURE-----
 
 iQFIBAABCAAyFiEE8TM4V0tmI4mGbHaCv/vSX3jHroMFAl3c6UMUHHBib256aW5p
 QHJlZGhhdC5jb20ACgkQv/vSX3jHroNPxgf9FteAWW0LfRiCa1Rw4iujvqwiNLiM
 VrxlSqmQ65n+0Ao2CA6/qJ4KqDQMDZKmDIX4PHi39BtkWskIDbl6fl4Gwbxw7FYu
 7VbmCNnFB+/osz/w4n5jHkomlGJJSj32qWh830GIF5a0+F8Mey9KZl2PHLespLeB
 Dmbge8cYP3am96gzLXEgB+FvpdUjYWaSgynjmhl4p3tv3xDzTJSG6E53Ksd4wiSX
 4u5plnVjdVZqKdcKrU+9c1h65jlwheHGBr5q7paUuZ93h6/eAdC9h8Nimn5I2/9Z
 fy6FHFCYTvCWQnFFHxvrlesxJRGFmKS0LXNOcM8zUdsx/Mr6Hk8HGITyrQ==
 =fulk
 -----END PGP SIGNATURE-----

Merge remote-tracking branch 'remotes/bonzini/tags/for-upstream' into staging

* VMX feature fix (myself)
* HVF fixes (Cameron)

# gpg: Signature made Tue 26 Nov 2019 08:58:43 GMT
# gpg:                using RSA key F13338574B662389866C7682BFFBD25F78C7AE83
# gpg:                issuer "pbonzini@redhat.com"
# gpg: Good signature from "Paolo Bonzini <bonzini@gnu.org>" [full]
# gpg:                 aka "Paolo Bonzini <pbonzini@redhat.com>" [full]
# Primary key fingerprint: 46F5 9FBD 57D6 12E7 BFD4  E2F7 7E15 100C CD36 69B1
#      Subkey fingerprint: F133 3857 4B66 2389 866C  7682 BFFB D25F 78C7 AE83

* remotes/bonzini/tags/for-upstream:
  hvf: more accurately match SDM when setting CR0 and PDPTE registers
  hvf: correctly handle REX prefix in relation to legacy prefixes
  hvf: remove TSC synchronization code because it isn't fully complete
  hvf: non-RAM, non-ROMD memory ranges are now correctly mapped in
  target/i386: add two missing VMX features for Skylake and CascadeLake Server

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
Peter Maydell 2019-11-26 19:48:01 +00:00
commit e0d79c9435
7 changed files with 104 additions and 72 deletions

View File

@ -3006,7 +3006,8 @@ static X86CPUDefinition builtin_x86_defs[] = {
VMX_SECONDARY_EXEC_APIC_REGISTER_VIRT |
VMX_SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
VMX_SECONDARY_EXEC_RDRAND_EXITING | VMX_SECONDARY_EXEC_ENABLE_INVPCID |
VMX_SECONDARY_EXEC_ENABLE_VMFUNC | VMX_SECONDARY_EXEC_SHADOW_VMCS,
VMX_SECONDARY_EXEC_ENABLE_VMFUNC | VMX_SECONDARY_EXEC_SHADOW_VMCS |
VMX_SECONDARY_EXEC_RDSEED_EXITING | VMX_SECONDARY_EXEC_ENABLE_PML,
.xlevel = 0x80000008,
.model_id = "Intel Xeon Processor (Skylake)",
.versions = (X86CPUVersionDefinition[]) {
@ -3131,7 +3132,8 @@ static X86CPUDefinition builtin_x86_defs[] = {
VMX_SECONDARY_EXEC_APIC_REGISTER_VIRT |
VMX_SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
VMX_SECONDARY_EXEC_RDRAND_EXITING | VMX_SECONDARY_EXEC_ENABLE_INVPCID |
VMX_SECONDARY_EXEC_ENABLE_VMFUNC | VMX_SECONDARY_EXEC_SHADOW_VMCS,
VMX_SECONDARY_EXEC_ENABLE_VMFUNC | VMX_SECONDARY_EXEC_SHADOW_VMCS |
VMX_SECONDARY_EXEC_RDSEED_EXITING | VMX_SECONDARY_EXEC_ENABLE_PML,
.xlevel = 0x80000008,
.model_id = "Intel Xeon Processor (Cascadelake)",
.versions = (X86CPUVersionDefinition[]) {

View File

@ -107,14 +107,14 @@ static void assert_hvf_ok(hv_return_t ret)
}
/* Memory slots */
hvf_slot *hvf_find_overlap_slot(uint64_t start, uint64_t end)
hvf_slot *hvf_find_overlap_slot(uint64_t start, uint64_t size)
{
hvf_slot *slot;
int x;
for (x = 0; x < hvf_state->num_slots; ++x) {
slot = &hvf_state->slots[x];
if (slot->size && start < (slot->start + slot->size) &&
end > slot->start) {
(start + size) > slot->start) {
return slot;
}
}
@ -129,12 +129,10 @@ struct mac_slot {
};
struct mac_slot mac_slots[32];
#define ALIGN(x, y) (((x) + (y) - 1) & ~((y) - 1))
static int do_hvf_set_memory(hvf_slot *slot)
static int do_hvf_set_memory(hvf_slot *slot, hv_memory_flags_t flags)
{
struct mac_slot *macslot;
hv_memory_flags_t flags;
hv_return_t ret;
macslot = &mac_slots[slot->slot_id];
@ -151,8 +149,6 @@ static int do_hvf_set_memory(hvf_slot *slot)
return 0;
}
flags = HV_MEMORY_READ | HV_MEMORY_WRITE | HV_MEMORY_EXEC;
macslot->present = 1;
macslot->gpa_start = slot->start;
macslot->size = slot->size;
@ -165,14 +161,24 @@ void hvf_set_phys_mem(MemoryRegionSection *section, bool add)
{
hvf_slot *mem;
MemoryRegion *area = section->mr;
bool writeable = !area->readonly && !area->rom_device;
hv_memory_flags_t flags;
if (!memory_region_is_ram(area)) {
return;
if (writeable) {
return;
} else if (!memory_region_is_romd(area)) {
/*
* If the memory device is not in romd_mode, then we actually want
* to remove the hvf memory slot so all accesses will trap.
*/
add = false;
}
}
mem = hvf_find_overlap_slot(
section->offset_within_address_space,
section->offset_within_address_space + int128_get64(section->size));
int128_get64(section->size));
if (mem && add) {
if (mem->size == int128_get64(section->size) &&
@ -186,7 +192,7 @@ void hvf_set_phys_mem(MemoryRegionSection *section, bool add)
/* Region needs to be reset. set the size to 0 and remap it. */
if (mem) {
mem->size = 0;
if (do_hvf_set_memory(mem)) {
if (do_hvf_set_memory(mem, 0)) {
error_report("Failed to reset overlapping slot");
abort();
}
@ -196,6 +202,13 @@ void hvf_set_phys_mem(MemoryRegionSection *section, bool add)
return;
}
if (area->readonly ||
(!memory_region_is_ram(area) && memory_region_is_romd(area))) {
flags = HV_MEMORY_READ | HV_MEMORY_EXEC;
} else {
flags = HV_MEMORY_READ | HV_MEMORY_WRITE | HV_MEMORY_EXEC;
}
/* Now make a new slot. */
int x;
@ -216,7 +229,7 @@ void hvf_set_phys_mem(MemoryRegionSection *section, bool add)
mem->start = section->offset_within_address_space;
mem->region = area;
if (do_hvf_set_memory(mem)) {
if (do_hvf_set_memory(mem, flags)) {
error_report("Error registering new memory slot");
abort();
}
@ -345,7 +358,14 @@ static bool ept_emulation_fault(hvf_slot *slot, uint64_t gpa, uint64_t ept_qual)
return false;
}
return !slot;
if (!slot) {
return true;
}
if (!memory_region_is_ram(slot->region) &&
!(read && memory_region_is_romd(slot->region))) {
return true;
}
return false;
}
static void hvf_set_dirty_tracking(MemoryRegionSection *section, bool on)
@ -354,7 +374,7 @@ static void hvf_set_dirty_tracking(MemoryRegionSection *section, bool on)
slot = hvf_find_overlap_slot(
section->offset_within_address_space,
section->offset_within_address_space + int128_get64(section->size));
int128_get64(section->size));
/* protect region against writes; begin tracking it */
if (on) {
@ -421,12 +441,20 @@ static MemoryListener hvf_memory_listener = {
};
void hvf_reset_vcpu(CPUState *cpu) {
uint64_t pdpte[4] = {0, 0, 0, 0};
int i;
/* TODO: this shouldn't be needed; there is already a call to
* cpu_synchronize_all_post_reset in vl.c
*/
wvmcs(cpu->hvf_fd, VMCS_ENTRY_CTLS, 0);
wvmcs(cpu->hvf_fd, VMCS_GUEST_IA32_EFER, 0);
/* Initialize PDPTE */
for (i = 0; i < 4; i++) {
wvmcs(cpu->hvf_fd, VMCS_GUEST_PDPTE0 + i * 2, pdpte[i]);
}
macvm_set_cr0(cpu->hvf_fd, 0x60000010);
wvmcs(cpu->hvf_fd, VMCS_CR4_MASK, CR4_VMXE_MASK);
@ -498,7 +526,6 @@ void hvf_reset_vcpu(CPUState *cpu) {
wreg(cpu->hvf_fd, HV_X86_R8 + i, 0x0);
}
hv_vm_sync_tsc(0);
hv_vcpu_invalidate_tlb(cpu->hvf_fd);
hv_vcpu_flush(cpu->hvf_fd);
}
@ -592,7 +619,7 @@ int hvf_init_vcpu(CPUState *cpu)
hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_GSBASE, 1);
hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_KERNELGSBASE, 1);
hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_TSC_AUX, 1);
/*hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_IA32_TSC, 1);*/
hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_IA32_TSC, 1);
hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_IA32_SYSENTER_CS, 1);
hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_IA32_SYSENTER_EIP, 1);
hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_IA32_SYSENTER_ESP, 1);
@ -720,7 +747,7 @@ int hvf_vcpu_exec(CPUState *cpu)
ret = EXCP_INTERRUPT;
break;
}
/* Need to check if MMIO or unmmaped fault */
/* Need to check if MMIO or unmapped fault */
case EXIT_REASON_EPT_FAULT:
{
hvf_slot *slot;
@ -731,7 +758,7 @@ int hvf_vcpu_exec(CPUState *cpu)
vmx_set_nmi_blocking(cpu);
}
slot = hvf_find_overlap_slot(gpa, gpa);
slot = hvf_find_overlap_slot(gpa, 1);
/* mmio */
if (ept_emulation_fault(slot, gpa, exit_qual)) {
struct x86_decode decode;

View File

@ -121,6 +121,7 @@ static inline void macvm_set_cr0(hv_vcpuid_t vcpu, uint64_t cr0)
uint64_t pdpte[4] = {0, 0, 0, 0};
uint64_t efer = rvmcs(vcpu, VMCS_GUEST_IA32_EFER);
uint64_t old_cr0 = rvmcs(vcpu, VMCS_GUEST_CR0);
uint64_t mask = CR0_PG | CR0_CD | CR0_NW | CR0_NE | CR0_ET;
if ((cr0 & CR0_PG) && (rvmcs(vcpu, VMCS_GUEST_CR4) & CR4_PAE) &&
!(efer & MSR_EFER_LME)) {
@ -128,18 +129,15 @@ static inline void macvm_set_cr0(hv_vcpuid_t vcpu, uint64_t cr0)
rvmcs(vcpu, VMCS_GUEST_CR3) & ~0x1f,
MEMTXATTRS_UNSPECIFIED,
(uint8_t *)pdpte, 32, 0);
/* Only set PDPTE when appropriate. */
for (i = 0; i < 4; i++) {
wvmcs(vcpu, VMCS_GUEST_PDPTE0 + i * 2, pdpte[i]);
}
}
for (i = 0; i < 4; i++) {
wvmcs(vcpu, VMCS_GUEST_PDPTE0 + i * 2, pdpte[i]);
}
wvmcs(vcpu, VMCS_CR0_MASK, CR0_CD | CR0_NE | CR0_PG);
wvmcs(vcpu, VMCS_CR0_MASK, mask);
wvmcs(vcpu, VMCS_CR0_SHADOW, cr0);
cr0 &= ~CR0_CD;
wvmcs(vcpu, VMCS_GUEST_CR0, cr0 | CR0_NE | CR0_ET);
if (efer & MSR_EFER_LME) {
if (!(old_cr0 & CR0_PG) && (cr0 & CR0_PG)) {
enter_long_mode(vcpu, cr0, efer);
@ -149,6 +147,10 @@ static inline void macvm_set_cr0(hv_vcpuid_t vcpu, uint64_t cr0)
}
}
/* Filter new CR0 after we are finished examining it above. */
cr0 = (cr0 & ~(mask & ~CR0_PG));
wvmcs(vcpu, VMCS_GUEST_CR0, cr0 | CR0_NE | CR0_ET);
hv_vcpu_invalidate_tlb(vcpu);
hv_vcpu_flush(vcpu);
}

View File

@ -122,7 +122,8 @@ static void decode_rax(CPUX86State *env, struct x86_decode *decode,
{
op->type = X86_VAR_REG;
op->reg = R_EAX;
op->ptr = get_reg_ref(env, op->reg, decode->rex.rex, 0,
/* Since reg is always AX, REX prefix has no impact. */
op->ptr = get_reg_ref(env, op->reg, false, 0,
decode->operand_size);
}
@ -1687,40 +1688,37 @@ calc_addr:
}
}
target_ulong get_reg_ref(CPUX86State *env, int reg, int rex, int is_extended,
int size)
target_ulong get_reg_ref(CPUX86State *env, int reg, int rex_present,
int is_extended, int size)
{
target_ulong ptr = 0;
int which = 0;
if (is_extended) {
reg |= R_R8;
}
switch (size) {
case 1:
if (is_extended || reg < 4 || rex) {
which = 1;
if (is_extended || reg < 4 || rex_present) {
ptr = (target_ulong)&RL(env, reg);
} else {
which = 2;
ptr = (target_ulong)&RH(env, reg - 4);
}
break;
default:
which = 3;
ptr = (target_ulong)&RRX(env, reg);
break;
}
return ptr;
}
target_ulong get_reg_val(CPUX86State *env, int reg, int rex, int is_extended,
int size)
target_ulong get_reg_val(CPUX86State *env, int reg, int rex_present,
int is_extended, int size)
{
target_ulong val = 0;
memcpy(&val, (void *)get_reg_ref(env, reg, rex, is_extended, size), size);
memcpy(&val,
(void *)get_reg_ref(env, reg, rex_present, is_extended, size),
size);
return val;
}
@ -1853,28 +1851,38 @@ void calc_modrm_operand(CPUX86State *env, struct x86_decode *decode,
static void decode_prefix(CPUX86State *env, struct x86_decode *decode)
{
while (1) {
/*
* REX prefix must come after legacy prefixes.
* REX before legacy is ignored.
* Clear rex to simulate this.
*/
uint8_t byte = decode_byte(env, decode);
switch (byte) {
case PREFIX_LOCK:
decode->lock = byte;
decode->rex.rex = 0;
break;
case PREFIX_REPN:
case PREFIX_REP:
decode->rep = byte;
decode->rex.rex = 0;
break;
case PREFIX_CS_SEG_OVEERIDE:
case PREFIX_SS_SEG_OVEERIDE:
case PREFIX_DS_SEG_OVEERIDE:
case PREFIX_ES_SEG_OVEERIDE:
case PREFIX_FS_SEG_OVEERIDE:
case PREFIX_GS_SEG_OVEERIDE:
case PREFIX_CS_SEG_OVERRIDE:
case PREFIX_SS_SEG_OVERRIDE:
case PREFIX_DS_SEG_OVERRIDE:
case PREFIX_ES_SEG_OVERRIDE:
case PREFIX_FS_SEG_OVERRIDE:
case PREFIX_GS_SEG_OVERRIDE:
decode->segment_override = byte;
decode->rex.rex = 0;
break;
case PREFIX_OP_SIZE_OVERRIDE:
decode->op_size_override = byte;
decode->rex.rex = 0;
break;
case PREFIX_ADDR_SIZE_OVERRIDE:
decode->addr_size_override = byte;
decode->rex.rex = 0;
break;
case PREFIX_REX ... (PREFIX_REX + 0xf):
if (x86_is_long_mode(env_cpu(env))) {
@ -2111,14 +2119,14 @@ void init_decoder()
{
int i;
for (i = 0; i < ARRAY_SIZE(_decode_tbl2); i++) {
memcpy(_decode_tbl1, &invl_inst, sizeof(invl_inst));
for (i = 0; i < ARRAY_SIZE(_decode_tbl1); i++) {
memcpy(&_decode_tbl1[i], &invl_inst, sizeof(invl_inst));
}
for (i = 0; i < ARRAY_SIZE(_decode_tbl2); i++) {
memcpy(_decode_tbl2, &invl_inst, sizeof(invl_inst));
memcpy(&_decode_tbl2[i], &invl_inst, sizeof(invl_inst));
}
for (i = 0; i < ARRAY_SIZE(_decode_tbl3); i++) {
memcpy(_decode_tbl3, &invl_inst, sizeof(invl_inst_x87));
memcpy(&_decode_tbl3[i], &invl_inst_x87, sizeof(invl_inst_x87));
}
for (i = 0; i < ARRAY_SIZE(_1op_inst); i++) {
@ -2167,22 +2175,22 @@ target_ulong decode_linear_addr(CPUX86State *env, struct x86_decode *decode,
target_ulong addr, X86Seg seg)
{
switch (decode->segment_override) {
case PREFIX_CS_SEG_OVEERIDE:
case PREFIX_CS_SEG_OVERRIDE:
seg = R_CS;
break;
case PREFIX_SS_SEG_OVEERIDE:
case PREFIX_SS_SEG_OVERRIDE:
seg = R_SS;
break;
case PREFIX_DS_SEG_OVEERIDE:
case PREFIX_DS_SEG_OVERRIDE:
seg = R_DS;
break;
case PREFIX_ES_SEG_OVEERIDE:
case PREFIX_ES_SEG_OVERRIDE:
seg = R_ES;
break;
case PREFIX_FS_SEG_OVEERIDE:
case PREFIX_FS_SEG_OVERRIDE:
seg = R_FS;
break;
case PREFIX_GS_SEG_OVEERIDE:
case PREFIX_GS_SEG_OVERRIDE:
seg = R_GS;
break;
default:

View File

@ -27,12 +27,12 @@ typedef enum x86_prefix {
PREFIX_REPN = 0xf2,
PREFIX_REP = 0xf3,
/* group 2 */
PREFIX_CS_SEG_OVEERIDE = 0x2e,
PREFIX_SS_SEG_OVEERIDE = 0x36,
PREFIX_DS_SEG_OVEERIDE = 0x3e,
PREFIX_ES_SEG_OVEERIDE = 0x26,
PREFIX_FS_SEG_OVEERIDE = 0x64,
PREFIX_GS_SEG_OVEERIDE = 0x65,
PREFIX_CS_SEG_OVERRIDE = 0x2e,
PREFIX_SS_SEG_OVERRIDE = 0x36,
PREFIX_DS_SEG_OVERRIDE = 0x3e,
PREFIX_ES_SEG_OVERRIDE = 0x26,
PREFIX_FS_SEG_OVERRIDE = 0x64,
PREFIX_GS_SEG_OVERRIDE = 0x65,
/* group 3 */
PREFIX_OP_SIZE_OVERRIDE = 0x66,
/* group 4 */
@ -303,10 +303,10 @@ uint64_t sign(uint64_t val, int size);
uint32_t decode_instruction(CPUX86State *env, struct x86_decode *decode);
target_ulong get_reg_ref(CPUX86State *env, int reg, int rex, int is_extended,
int size);
target_ulong get_reg_val(CPUX86State *env, int reg, int rex, int is_extended,
int size);
target_ulong get_reg_ref(CPUX86State *env, int reg, int rex_present,
int is_extended, int size);
target_ulong get_reg_val(CPUX86State *env, int reg, int rex_present,
int is_extended, int size);
void calc_modrm_operand(CPUX86State *env, struct x86_decode *decode,
struct x86_decode_op *op);
target_ulong decode_linear_addr(CPUX86State *env, struct x86_decode *decode,

View File

@ -772,9 +772,6 @@ void simulate_wrmsr(struct CPUState *cpu)
switch (msr) {
case MSR_IA32_TSC:
/* if (!osx_is_sierra())
wvmcs(cpu->hvf_fd, VMCS_TSC_OFFSET, data - rdtscp());
hv_vm_sync_tsc(data);*/
break;
case MSR_IA32_APICBASE:
cpu_set_apic_base(X86_CPU(cpu)->apic_state, data);

View File

@ -152,10 +152,6 @@ void hvf_put_msrs(CPUState *cpu_state)
hv_vcpu_write_msr(cpu_state->hvf_fd, MSR_GSBASE, env->segs[R_GS].base);
hv_vcpu_write_msr(cpu_state->hvf_fd, MSR_FSBASE, env->segs[R_FS].base);
/* if (!osx_is_sierra())
wvmcs(cpu_state->hvf_fd, VMCS_TSC_OFFSET, env->tsc - rdtscp());*/
hv_vm_sync_tsc(env->tsc);
}