qemu/target/arm/kvm32.c
Peter Maydell b698e4eef5 arm: Allow system registers for KVM guests to be changed by QEMU code
At the moment the Arm implementations of kvm_arch_{get,put}_registers()
don't support having QEMU change the values of system registers
(aka coprocessor registers for AArch32). This is because although
kvm_arch_get_registers() calls write_list_to_cpustate() to
update the CPU state struct fields (so QEMU code can read the
values in the usual way), kvm_arch_put_registers() does not
call write_cpustate_to_list(), meaning that any changes to
the CPU state struct fields will not be passed back to KVM.

The rationale for this design is documented in a comment in the
AArch32 kvm_arch_put_registers() -- writing the values in the
cpregs list into the CPU state struct is "lossy" because the
write of a register might not succeed, and so if we blindly
copy the CPU state values back again we will incorrectly
change register values for the guest. The assumption was that
no QEMU code would need to write to the registers.

However, when we implemented debug support for KVM guests, we
broke that assumption: the code to handle "set the guest up
to take a breakpoint exception" does so by updating various
guest registers including ESR_EL1.

Support this by making kvm_arch_put_registers() synchronize
CPU state back into the list. We sync only those registers
where the initial write succeeds, which should be sufficient.

This commit is the same as commit 823e1b3818 which we
had to revert in commit 942f99c825, except that the bug
which was preventing EDK2 guest firmware running has been fixed:
kvm_arm_reset_vcpu() now calls write_list_to_cpustate().

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Tested-by: Eric Auger <eric.auger@redhat.com>
2019-05-07 12:55:02 +01:00

532 lines
16 KiB
C

/*
* ARM implementation of KVM hooks, 32 bit specific code.
*
* Copyright Christoffer Dall 2009-2010
*
* This work is licensed under the terms of the GNU GPL, version 2 or later.
* See the COPYING file in the top-level directory.
*
*/
#include "qemu/osdep.h"
#include <sys/ioctl.h>
#include <linux/kvm.h>
#include "qemu-common.h"
#include "cpu.h"
#include "qemu/timer.h"
#include "sysemu/sysemu.h"
#include "sysemu/kvm.h"
#include "kvm_arm.h"
#include "internals.h"
#include "hw/arm/arm.h"
#include "qemu/log.h"
static inline void set_feature(uint64_t *features, int feature)
{
*features |= 1ULL << feature;
}
static int read_sys_reg32(int fd, uint32_t *pret, uint64_t id)
{
struct kvm_one_reg idreg = { .id = id, .addr = (uintptr_t)pret };
assert((id & KVM_REG_SIZE_MASK) == KVM_REG_SIZE_U32);
return ioctl(fd, KVM_GET_ONE_REG, &idreg);
}
bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf)
{
/* Identify the feature bits corresponding to the host CPU, and
* fill out the ARMHostCPUClass fields accordingly. To do this
* we have to create a scratch VM, create a single CPU inside it,
* and then query that CPU for the relevant ID registers.
*/
int err = 0, fdarray[3];
uint32_t midr, id_pfr0;
uint64_t features = 0;
/* Old kernels may not know about the PREFERRED_TARGET ioctl: however
* we know these will only support creating one kind of guest CPU,
* which is its preferred CPU type.
*/
static const uint32_t cpus_to_try[] = {
QEMU_KVM_ARM_TARGET_CORTEX_A15,
QEMU_KVM_ARM_TARGET_NONE
};
struct kvm_vcpu_init init;
if (!kvm_arm_create_scratch_host_vcpu(cpus_to_try, fdarray, &init)) {
return false;
}
ahcf->target = init.target;
/* This is not strictly blessed by the device tree binding docs yet,
* but in practice the kernel does not care about this string so
* there is no point maintaining an KVM_ARM_TARGET_* -> string table.
*/
ahcf->dtb_compatible = "arm,arm-v7";
err |= read_sys_reg32(fdarray[2], &midr, ARM_CP15_REG32(0, 0, 0, 0));
err |= read_sys_reg32(fdarray[2], &id_pfr0, ARM_CP15_REG32(0, 0, 1, 0));
err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar0,
ARM_CP15_REG32(0, 0, 2, 0));
err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar1,
ARM_CP15_REG32(0, 0, 2, 1));
err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar2,
ARM_CP15_REG32(0, 0, 2, 2));
err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar3,
ARM_CP15_REG32(0, 0, 2, 3));
err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar4,
ARM_CP15_REG32(0, 0, 2, 4));
err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar5,
ARM_CP15_REG32(0, 0, 2, 5));
if (read_sys_reg32(fdarray[2], &ahcf->isar.id_isar6,
ARM_CP15_REG32(0, 0, 2, 7))) {
/*
* Older kernels don't support reading ID_ISAR6. This register was
* only introduced in ARMv8, so we can assume that it is zero on a
* CPU that a kernel this old is running on.
*/
ahcf->isar.id_isar6 = 0;
}
err |= read_sys_reg32(fdarray[2], &ahcf->isar.mvfr0,
KVM_REG_ARM | KVM_REG_SIZE_U32 |
KVM_REG_ARM_VFP | KVM_REG_ARM_VFP_MVFR0);
err |= read_sys_reg32(fdarray[2], &ahcf->isar.mvfr1,
KVM_REG_ARM | KVM_REG_SIZE_U32 |
KVM_REG_ARM_VFP | KVM_REG_ARM_VFP_MVFR1);
/*
* FIXME: There is not yet a way to read MVFR2.
* Fortunately there is not yet anything in there that affects migration.
*/
kvm_arm_destroy_scratch_host_vcpu(fdarray);
if (err < 0) {
return false;
}
/* Now we've retrieved all the register information we can
* set the feature bits based on the ID register fields.
* We can assume any KVM supporting CPU is at least a v7
* with VFPv3, virtualization extensions, and the generic
* timers; this in turn implies most of the other feature
* bits, but a few must be tested.
*/
set_feature(&features, ARM_FEATURE_V7VE);
set_feature(&features, ARM_FEATURE_VFP3);
set_feature(&features, ARM_FEATURE_GENERIC_TIMER);
if (extract32(id_pfr0, 12, 4) == 1) {
set_feature(&features, ARM_FEATURE_THUMB2EE);
}
if (extract32(ahcf->isar.mvfr1, 12, 4) == 1) {
set_feature(&features, ARM_FEATURE_NEON);
}
if (extract32(ahcf->isar.mvfr1, 28, 4) == 1) {
/* FMAC support implies VFPv4 */
set_feature(&features, ARM_FEATURE_VFP4);
}
ahcf->features = features;
return true;
}
bool kvm_arm_reg_syncs_via_cpreg_list(uint64_t regidx)
{
/* Return true if the regidx is a register we should synchronize
* via the cpreg_tuples array (ie is not a core reg we sync by
* hand in kvm_arch_get/put_registers())
*/
switch (regidx & KVM_REG_ARM_COPROC_MASK) {
case KVM_REG_ARM_CORE:
case KVM_REG_ARM_VFP:
return false;
default:
return true;
}
}
typedef struct CPRegStateLevel {
uint64_t regidx;
int level;
} CPRegStateLevel;
/* All coprocessor registers not listed in the following table are assumed to
* be of the level KVM_PUT_RUNTIME_STATE. If a register should be written less
* often, you must add it to this table with a state of either
* KVM_PUT_RESET_STATE or KVM_PUT_FULL_STATE.
*/
static const CPRegStateLevel non_runtime_cpregs[] = {
{ KVM_REG_ARM_TIMER_CNT, KVM_PUT_FULL_STATE },
};
int kvm_arm_cpreg_level(uint64_t regidx)
{
int i;
for (i = 0; i < ARRAY_SIZE(non_runtime_cpregs); i++) {
const CPRegStateLevel *l = &non_runtime_cpregs[i];
if (l->regidx == regidx) {
return l->level;
}
}
return KVM_PUT_RUNTIME_STATE;
}
#define ARM_CPU_ID_MPIDR 0, 0, 0, 5
int kvm_arch_init_vcpu(CPUState *cs)
{
int ret;
uint64_t v;
uint32_t mpidr;
struct kvm_one_reg r;
ARMCPU *cpu = ARM_CPU(cs);
if (cpu->kvm_target == QEMU_KVM_ARM_TARGET_NONE) {
fprintf(stderr, "KVM is not supported for this guest CPU type\n");
return -EINVAL;
}
/* Determine init features for this CPU */
memset(cpu->kvm_init_features, 0, sizeof(cpu->kvm_init_features));
if (cpu->start_powered_off) {
cpu->kvm_init_features[0] |= 1 << KVM_ARM_VCPU_POWER_OFF;
}
if (kvm_check_extension(cs->kvm_state, KVM_CAP_ARM_PSCI_0_2)) {
cpu->psci_version = 2;
cpu->kvm_init_features[0] |= 1 << KVM_ARM_VCPU_PSCI_0_2;
}
/* Do KVM_ARM_VCPU_INIT ioctl */
ret = kvm_arm_vcpu_init(cs);
if (ret) {
return ret;
}
/* Query the kernel to make sure it supports 32 VFP
* registers: QEMU's "cortex-a15" CPU is always a
* VFP-D32 core. The simplest way to do this is just
* to attempt to read register d31.
*/
r.id = KVM_REG_ARM | KVM_REG_SIZE_U64 | KVM_REG_ARM_VFP | 31;
r.addr = (uintptr_t)(&v);
ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &r);
if (ret == -ENOENT) {
return -EINVAL;
}
/*
* When KVM is in use, PSCI is emulated in-kernel and not by qemu.
* Currently KVM has its own idea about MPIDR assignment, so we
* override our defaults with what we get from KVM.
*/
ret = kvm_get_one_reg(cs, ARM_CP15_REG32(ARM_CPU_ID_MPIDR), &mpidr);
if (ret) {
return ret;
}
cpu->mp_affinity = mpidr & ARM32_AFFINITY_MASK;
/* Check whether userspace can specify guest syndrome value */
kvm_arm_init_serror_injection(cs);
return kvm_arm_init_cpreg_list(cpu);
}
typedef struct Reg {
uint64_t id;
int offset;
} Reg;
#define COREREG(KERNELNAME, QEMUFIELD) \
{ \
KVM_REG_ARM | KVM_REG_SIZE_U32 | \
KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(KERNELNAME), \
offsetof(CPUARMState, QEMUFIELD) \
}
#define VFPSYSREG(R) \
{ \
KVM_REG_ARM | KVM_REG_SIZE_U32 | KVM_REG_ARM_VFP | \
KVM_REG_ARM_VFP_##R, \
offsetof(CPUARMState, vfp.xregs[ARM_VFP_##R]) \
}
/* Like COREREG, but handle fields which are in a uint64_t in CPUARMState. */
#define COREREG64(KERNELNAME, QEMUFIELD) \
{ \
KVM_REG_ARM | KVM_REG_SIZE_U32 | \
KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(KERNELNAME), \
offsetoflow32(CPUARMState, QEMUFIELD) \
}
static const Reg regs[] = {
/* R0_usr .. R14_usr */
COREREG(usr_regs.uregs[0], regs[0]),
COREREG(usr_regs.uregs[1], regs[1]),
COREREG(usr_regs.uregs[2], regs[2]),
COREREG(usr_regs.uregs[3], regs[3]),
COREREG(usr_regs.uregs[4], regs[4]),
COREREG(usr_regs.uregs[5], regs[5]),
COREREG(usr_regs.uregs[6], regs[6]),
COREREG(usr_regs.uregs[7], regs[7]),
COREREG(usr_regs.uregs[8], usr_regs[0]),
COREREG(usr_regs.uregs[9], usr_regs[1]),
COREREG(usr_regs.uregs[10], usr_regs[2]),
COREREG(usr_regs.uregs[11], usr_regs[3]),
COREREG(usr_regs.uregs[12], usr_regs[4]),
COREREG(usr_regs.uregs[13], banked_r13[BANK_USRSYS]),
COREREG(usr_regs.uregs[14], banked_r14[BANK_USRSYS]),
/* R13, R14, SPSR for SVC, ABT, UND, IRQ banks */
COREREG(svc_regs[0], banked_r13[BANK_SVC]),
COREREG(svc_regs[1], banked_r14[BANK_SVC]),
COREREG64(svc_regs[2], banked_spsr[BANK_SVC]),
COREREG(abt_regs[0], banked_r13[BANK_ABT]),
COREREG(abt_regs[1], banked_r14[BANK_ABT]),
COREREG64(abt_regs[2], banked_spsr[BANK_ABT]),
COREREG(und_regs[0], banked_r13[BANK_UND]),
COREREG(und_regs[1], banked_r14[BANK_UND]),
COREREG64(und_regs[2], banked_spsr[BANK_UND]),
COREREG(irq_regs[0], banked_r13[BANK_IRQ]),
COREREG(irq_regs[1], banked_r14[BANK_IRQ]),
COREREG64(irq_regs[2], banked_spsr[BANK_IRQ]),
/* R8_fiq .. R14_fiq and SPSR_fiq */
COREREG(fiq_regs[0], fiq_regs[0]),
COREREG(fiq_regs[1], fiq_regs[1]),
COREREG(fiq_regs[2], fiq_regs[2]),
COREREG(fiq_regs[3], fiq_regs[3]),
COREREG(fiq_regs[4], fiq_regs[4]),
COREREG(fiq_regs[5], banked_r13[BANK_FIQ]),
COREREG(fiq_regs[6], banked_r14[BANK_FIQ]),
COREREG64(fiq_regs[7], banked_spsr[BANK_FIQ]),
/* R15 */
COREREG(usr_regs.uregs[15], regs[15]),
/* VFP system registers */
VFPSYSREG(FPSID),
VFPSYSREG(MVFR1),
VFPSYSREG(MVFR0),
VFPSYSREG(FPEXC),
VFPSYSREG(FPINST),
VFPSYSREG(FPINST2),
};
int kvm_arch_put_registers(CPUState *cs, int level)
{
ARMCPU *cpu = ARM_CPU(cs);
CPUARMState *env = &cpu->env;
struct kvm_one_reg r;
int mode, bn;
int ret, i;
uint32_t cpsr, fpscr;
/* Make sure the banked regs are properly set */
mode = env->uncached_cpsr & CPSR_M;
bn = bank_number(mode);
if (mode == ARM_CPU_MODE_FIQ) {
memcpy(env->fiq_regs, env->regs + 8, 5 * sizeof(uint32_t));
} else {
memcpy(env->usr_regs, env->regs + 8, 5 * sizeof(uint32_t));
}
env->banked_r13[bn] = env->regs[13];
env->banked_spsr[bn] = env->spsr;
env->banked_r14[r14_bank_number(mode)] = env->regs[14];
/* Now we can safely copy stuff down to the kernel */
for (i = 0; i < ARRAY_SIZE(regs); i++) {
r.id = regs[i].id;
r.addr = (uintptr_t)(env) + regs[i].offset;
ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &r);
if (ret) {
return ret;
}
}
/* Special cases which aren't a single CPUARMState field */
cpsr = cpsr_read(env);
r.id = KVM_REG_ARM | KVM_REG_SIZE_U32 |
KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(usr_regs.ARM_cpsr);
r.addr = (uintptr_t)(&cpsr);
ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &r);
if (ret) {
return ret;
}
/* VFP registers */
r.id = KVM_REG_ARM | KVM_REG_SIZE_U64 | KVM_REG_ARM_VFP;
for (i = 0; i < 32; i++) {
r.addr = (uintptr_t)aa32_vfp_dreg(env, i);
ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &r);
if (ret) {
return ret;
}
r.id++;
}
r.id = KVM_REG_ARM | KVM_REG_SIZE_U32 | KVM_REG_ARM_VFP |
KVM_REG_ARM_VFP_FPSCR;
fpscr = vfp_get_fpscr(env);
r.addr = (uintptr_t)&fpscr;
ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &r);
if (ret) {
return ret;
}
ret = kvm_put_vcpu_events(cpu);
if (ret) {
return ret;
}
write_cpustate_to_list(cpu, true);
if (!write_list_to_kvmstate(cpu, level)) {
return EINVAL;
}
kvm_arm_sync_mpstate_to_kvm(cpu);
return ret;
}
int kvm_arch_get_registers(CPUState *cs)
{
ARMCPU *cpu = ARM_CPU(cs);
CPUARMState *env = &cpu->env;
struct kvm_one_reg r;
int mode, bn;
int ret, i;
uint32_t cpsr, fpscr;
for (i = 0; i < ARRAY_SIZE(regs); i++) {
r.id = regs[i].id;
r.addr = (uintptr_t)(env) + regs[i].offset;
ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &r);
if (ret) {
return ret;
}
}
/* Special cases which aren't a single CPUARMState field */
r.id = KVM_REG_ARM | KVM_REG_SIZE_U32 |
KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(usr_regs.ARM_cpsr);
r.addr = (uintptr_t)(&cpsr);
ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &r);
if (ret) {
return ret;
}
cpsr_write(env, cpsr, 0xffffffff, CPSRWriteRaw);
/* Make sure the current mode regs are properly set */
mode = env->uncached_cpsr & CPSR_M;
bn = bank_number(mode);
if (mode == ARM_CPU_MODE_FIQ) {
memcpy(env->regs + 8, env->fiq_regs, 5 * sizeof(uint32_t));
} else {
memcpy(env->regs + 8, env->usr_regs, 5 * sizeof(uint32_t));
}
env->regs[13] = env->banked_r13[bn];
env->spsr = env->banked_spsr[bn];
env->regs[14] = env->banked_r14[r14_bank_number(mode)];
/* VFP registers */
r.id = KVM_REG_ARM | KVM_REG_SIZE_U64 | KVM_REG_ARM_VFP;
for (i = 0; i < 32; i++) {
r.addr = (uintptr_t)aa32_vfp_dreg(env, i);
ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &r);
if (ret) {
return ret;
}
r.id++;
}
r.id = KVM_REG_ARM | KVM_REG_SIZE_U32 | KVM_REG_ARM_VFP |
KVM_REG_ARM_VFP_FPSCR;
r.addr = (uintptr_t)&fpscr;
ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &r);
if (ret) {
return ret;
}
vfp_set_fpscr(env, fpscr);
ret = kvm_get_vcpu_events(cpu);
if (ret) {
return ret;
}
if (!write_kvmstate_to_list(cpu)) {
return EINVAL;
}
/* Note that it's OK to have registers which aren't in CPUState,
* so we can ignore a failure return here.
*/
write_list_to_cpustate(cpu);
kvm_arm_sync_mpstate_to_qemu(cpu);
return 0;
}
int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
{
qemu_log_mask(LOG_UNIMP, "%s: guest debug not yet implemented\n", __func__);
return -EINVAL;
}
int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
{
qemu_log_mask(LOG_UNIMP, "%s: guest debug not yet implemented\n", __func__);
return -EINVAL;
}
bool kvm_arm_handle_debug(CPUState *cs, struct kvm_debug_exit_arch *debug_exit)
{
qemu_log_mask(LOG_UNIMP, "%s: guest debug not yet implemented\n", __func__);
return false;
}
int kvm_arch_insert_hw_breakpoint(target_ulong addr,
target_ulong len, int type)
{
qemu_log_mask(LOG_UNIMP, "%s: not implemented\n", __func__);
return -EINVAL;
}
int kvm_arch_remove_hw_breakpoint(target_ulong addr,
target_ulong len, int type)
{
qemu_log_mask(LOG_UNIMP, "%s: not implemented\n", __func__);
return -EINVAL;
}
void kvm_arch_remove_all_hw_breakpoints(void)
{
qemu_log_mask(LOG_UNIMP, "%s: not implemented\n", __func__);
}
void kvm_arm_copy_hw_debug_data(struct kvm_guest_debug_arch *ptr)
{
qemu_log_mask(LOG_UNIMP, "%s: not implemented\n", __func__);
}
bool kvm_arm_hw_debug_active(CPUState *cs)
{
return false;
}
void kvm_arm_pmu_set_irq(CPUState *cs, int irq)
{
qemu_log_mask(LOG_UNIMP, "%s: not implemented\n", __func__);
}
void kvm_arm_pmu_init(CPUState *cs)
{
qemu_log_mask(LOG_UNIMP, "%s: not implemented\n", __func__);
}