qemu/target/ppc/kvm_ppc.h

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

480 lines
11 KiB
C
Raw Normal View History

/*
* Copyright 2008 IBM Corporation.
* Authors: Hollis Blanchard <hollisb@us.ibm.com>
*
* This work is licensed under the GNU GPL license version 2 or later.
*
*/
#ifndef KVM_PPC_H
#define KVM_PPC_H
#include "sysemu/kvm.h"
#include "exec/hwaddr.h"
#include "cpu.h"
#ifdef CONFIG_USER_ONLY
#error Cannot include kvm_ppc.h from user emulation
#endif
#ifdef CONFIG_KVM
uint32_t kvmppc_get_tbfreq(void);
uint64_t kvmppc_get_clockfreq(void);
bool kvmppc_get_host_model(char **buf);
bool kvmppc_get_host_serial(char **buf);
int kvmppc_get_hasidle(CPUPPCState *env);
int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len);
int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level);
void kvmppc_enable_logical_ci_hcalls(void);
void kvmppc_enable_set_mode_hcall(void);
void kvmppc_enable_clear_ref_mod_hcalls(void);
void kvmppc_enable_h_page_init(void);
void kvmppc_enable_h_rpt_invalidate(void);
void kvmppc_set_papr(PowerPCCPU *cpu);
int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr);
void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy);
bool kvmppc_get_fwnmi(void);
pseries: fix kvmppc_set_fwnmi() QEMU issues the ioctl(KVM_CAP_PPC_FWNMI) on the first vCPU. If the first vCPU is currently running, the vCPU mutex is held and the ioctl() cannot be done and waits until the mutex is released. This never happens and the VM is stuck. To avoid this deadlock, issue the ioctl on the same vCPU doing the RTAS call. The problem can be reproduced by booting a guest with several vCPUs (the probability to have the problem is (n - 1) / n, n = # of CPUs), and then by triggering a kernel crash with "echo c >/proc/sysrq-trigger". On the reboot, the kernel hangs after: ... [ 0.000000] ----------------------------------------------------- [ 0.000000] ppc64_pft_size = 0x0 [ 0.000000] phys_mem_size = 0x48000000 [ 0.000000] dcache_bsize = 0x80 [ 0.000000] icache_bsize = 0x80 [ 0.000000] cpu_features = 0x0001c06f8f4f91a7 [ 0.000000] possible = 0x0003fbffcf5fb1a7 [ 0.000000] always = 0x00000003800081a1 [ 0.000000] cpu_user_features = 0xdc0065c2 0xaee00000 [ 0.000000] mmu_features = 0x3c006041 [ 0.000000] firmware_features = 0x00000085455a445f [ 0.000000] physical_start = 0x8000000 [ 0.000000] ----------------------------------------------------- [ 0.000000] numa: NODE_DATA [mem 0x47f33c80-0x47f3ffff] Fixes: ec010c00665b ("ppc/spapr: KVM FWNMI should not be enabled until guest requests it") Cc: npiggin@gmail.com Signed-off-by: Laurent Vivier <lvivier@redhat.com> Message-Id: <20200724083533.281700-1-lvivier@redhat.com> Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2020-07-24 11:35:33 +03:00
int kvmppc_set_fwnmi(PowerPCCPU *cpu);
int kvmppc_smt_threads(void);
void kvmppc_error_append_smt_possible_hint(Error *const *errp);
int kvmppc_set_smt_threads(int smt);
int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits);
int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits);
int kvmppc_set_tcr(PowerPCCPU *cpu);
int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu);
target_ulong kvmppc_configure_v3_mmu(PowerPCCPU *cpu,
bool radix, bool gtse,
uint64_t proc_tbl);
bool kvmppc_spapr_use_multitce(void);
int kvmppc_spapr_enable_inkernel_multitce(void);
void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t page_shift,
uint64_t bus_offset, uint32_t nb_table,
int *pfd, bool need_vfio);
int kvmppc_remove_spapr_tce(void *table, int pfd, uint32_t window_size);
int kvmppc_reset_htab(int shift_hint);
uint64_t kvmppc_vrma_limit(unsigned int hash_shift);
bool kvmppc_has_cap_spapr_vfio(void);
bool kvmppc_has_cap_epr(void);
int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function);
int kvmppc_get_htab_fd(bool write, uint64_t index, Error **errp);
int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns);
int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
uint16_t n_valid, uint16_t n_invalid, Error **errp);
target/ppc: Fix KVM-HV HPTE accessors When a 'pseries' guest is running with KVM-HV, the guest's hashed page table (HPT) is stored within the host kernel, so it is not directly accessible to qemu. Most of the time, qemu doesn't need to access it: we're using the hardware MMU, and KVM itself implements the guest hypercalls for manipulating the HPT. However, qemu does need access to the in-KVM HPT to implement get_phys_page_debug() for the benefit of the gdbstub, and maybe for other debug operations. To allow this, 7c43bca "target-ppc: Fix page table lookup with kvm enabled" added kvmppc_hash64_read_pteg() to target/ppc/kvm.c to read in a batch of HPTEs from the KVM table. Unfortunately, there are a couple of problems with this: First, the name of the function implies it always reads a whole PTEG from the HPT, but in fact in some cases it's used to grab individual HPTEs (which ends up pulling 8 HPTEs, not aligned to a PTEG from the kernel). Second, and more importantly, the code to read the HPTEs from KVM is simply wrong, in general. The data from the fd that KVM provides is designed mostly for compact migration rather than this sort of one-off access, and so needs some decoding for this purpose. The current code will work in some cases, but if there are invalid HPTEs then it will not get sane results. This patch rewrite the HPTE reading function to have a simpler interface (just read n HPTEs into a caller provided buffer), and to correctly decode the stream from the kernel. For consistency we also clean up the similar function for altering HPTEs within KVM (introduced in c138593 "target-ppc: Update ppc_hash64_store_hpte to support updating in-kernel htab"). Cc: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-27 07:34:19 +03:00
void kvmppc_read_hptes(ppc_hash_pte64_t *hptes, hwaddr ptex, int n);
void kvmppc_write_hpte(hwaddr ptex, uint64_t pte0, uint64_t pte1);
bool kvmppc_has_cap_fixup_hcalls(void);
bool kvmppc_has_cap_htm(void);
bool kvmppc_has_cap_mmu_radix(void);
bool kvmppc_has_cap_mmu_hash_v3(void);
bool kvmppc_has_cap_xive(void);
int kvmppc_get_cap_safe_cache(void);
int kvmppc_get_cap_safe_bounds_check(void);
int kvmppc_get_cap_safe_indirect_branch(void);
int kvmppc_get_cap_count_cache_flush_assist(void);
bool kvmppc_has_cap_nested_kvm_hv(void);
int kvmppc_set_cap_nested_kvm_hv(int enable);
int kvmppc_get_cap_large_decr(void);
int kvmppc_enable_cap_large_decr(PowerPCCPU *cpu, int enable);
int kvmppc_has_cap_rpt_invalidate(void);
spapr: Add SPAPR_CAP_AIL_MODE_3 for AIL mode 3 support for H_SET_MODE hcall The behaviour of the Address Translation Mode on Interrupt resource is not consistently supported by all CPU versions or all KVM versions: KVM HV does not support mode 2, and does not support mode 3 on POWER7 or early POWER9 processesors. KVM PR only supports mode 0. TCG supports all modes (0, 2, 3) on CPUs with support for the corresonding LPCR[AIL] mode. This leads to inconsistencies in guest behaviour and could cause problems migrating guests. This was not noticable for Linux guests for a long time because the kernel only uses modes 0 and 3, and it used to consider AIL-3 to be advisory in that it would always keep the AIL-0 vectors around, so it did not matter whether or not interrupts were delivered according to the AIL mode. Recent Linux guests depend on AIL mode 3 working as specified in order to support the SCV facility interrupt. If AIL-3 can not be provided, then H_SET_MODE must return an error to Linux so it can disable the SCV facility (failure to do so can lead to userspace being able to crash the guest kernel). Add the ail-mode-3 capability to specify that AIL-3 is supported. AIL-0 is implied as the baseline, and AIL-2 is no longer supported by spapr. AIL-2 is not known to be used by any software, but support in TCG could be restored with an ail-mode-2 capability quite easily if a regression is reported. Modify the H_SET_MODE Address Translation Mode on Interrupt resource handler to check capabilities and correctly return error if not supported. KVM has a cap to advertise support for AIL-3. Reviewed-by: David Gibson <david@gibson.dropbear.id.au> Signed-off-by: Nicholas Piggin <npiggin@gmail.com> Message-Id: <20230515160216.394612-1-npiggin@gmail.com> Signed-off-by: Daniel Henrique Barboza <danielhb413@gmail.com>
2023-05-15 19:02:16 +03:00
bool kvmppc_supports_ail_3(void);
int kvmppc_enable_hwrng(void);
int kvmppc_put_books_sregs(PowerPCCPU *cpu);
PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void);
void kvmppc_check_papr_resize_hpt(Error **errp);
int kvmppc_resize_hpt_prepare(PowerPCCPU *cpu, target_ulong flags, int shift);
int kvmppc_resize_hpt_commit(PowerPCCPU *cpu, target_ulong flags, int shift);
target/ppc: 'PVR != host PVR' in KVM_SET_SREGS workaround Commit d5fc133eed ("ppc: Rework CPU compatibility testing across migration") changed the way cpu_post_load behaves with the PVR setting, causing an unexpected bug in KVM-HV migrations between hosts that are compatible (POWER8 and POWER8E, for example). Even with pvr_match() returning true, the guest freezes right after cpu_post_load. The reason is that the guest kernel can't handle a different PVR value other that the running host in KVM_SET_SREGS. In [1] it was discussed the possibility of a new KVM capability that would indicate that the guest kernel can handle a different PVR in KVM_SET_SREGS. Even if such feature is implemented, there is still the problem with older kernels that will not have this capability and will fail to migrate. This patch implements a workaround for that scenario. If running with KVM, check if the guest kernel does not have the capability (named here as 'cap_ppc_pvr_compat'). If it doesn't, calls kvmppc_is_pr() to see if the guest is running in KVM-HV. If all this happens, set env->spr[SPR_PVR] to the same value as the current host PVR. This ensures that we allow migrations with 'close enough' PVRs to still work in KVM-HV but also makes the code ready for this new KVM capability when it is done. A new function called 'kvmppc_pvr_workaround_required' was created to encapsulate the conditions said above and to avoid calling too many kvm.c internals inside cpu_post_load. [1] https://lists.gnu.org/archive/html/qemu-ppc/2017-06/msg00503.html Signed-off-by: Daniel Henrique Barboza <danielhb@linux.vnet.ibm.com> [dwg: Fix for the case of using TCG on a PPC host] Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-08-09 23:43:46 +03:00
bool kvmppc_pvr_workaround_required(PowerPCCPU *cpu);
bool kvmppc_hpt_needs_host_contiguous_pages(void);
2018-04-16 09:19:52 +03:00
void kvm_check_mmu(PowerPCCPU *cpu, Error **errp);
void kvmppc_set_reg_ppc_online(PowerPCCPU *cpu, unsigned int online);
void kvmppc_set_reg_tb_offset(PowerPCCPU *cpu, int64_t tb_offset);
int kvm_handle_nmi(PowerPCCPU *cpu, struct kvm_run *run);
#define kvmppc_eieio() \
do { \
if (kvm_enabled()) { \
asm volatile("eieio" : : : "memory"); \
} \
} while (0)
/* Store data cache blocks back to memory */
static inline void kvmppc_dcbst_range(PowerPCCPU *cpu, uint8_t *addr, int len)
{
uint8_t *p;
for (p = addr; p < addr + len; p += cpu->env.dcache_line_size) {
asm volatile("dcbst 0,%0" : : "r"(p) : "memory");
}
}
/* Invalidate instruction cache blocks */
static inline void kvmppc_icbi_range(PowerPCCPU *cpu, uint8_t *addr, int len)
{
uint8_t *p;
for (p = addr; p < addr + len; p += cpu->env.icache_line_size) {
asm volatile("icbi 0,%0" : : "r"(p));
}
}
#else /* !CONFIG_KVM */
static inline uint32_t kvmppc_get_tbfreq(void)
{
return 0;
}
static inline bool kvmppc_get_host_model(char **buf)
{
return false;
}
static inline bool kvmppc_get_host_serial(char **buf)
{
return false;
}
static inline uint64_t kvmppc_get_clockfreq(void)
{
return 0;
}
static inline uint32_t kvmppc_get_vmx(void)
{
return 0;
}
static inline uint32_t kvmppc_get_dfp(void)
{
return 0;
}
static inline int kvmppc_get_hasidle(CPUPPCState *env)
{
return 0;
}
static inline int kvmppc_get_hypercall(CPUPPCState *env,
uint8_t *buf, int buf_len)
{
return -1;
}
static inline int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
{
return -1;
}
static inline void kvmppc_enable_logical_ci_hcalls(void)
{
}
static inline void kvmppc_enable_set_mode_hcall(void)
{
}
static inline void kvmppc_enable_clear_ref_mod_hcalls(void)
{
}
static inline void kvmppc_enable_h_page_init(void)
{
}
static inline void kvmppc_enable_h_rpt_invalidate(void)
{
g_assert_not_reached();
}
static inline void kvmppc_set_papr(PowerPCCPU *cpu)
{
}
static inline int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr)
{
return 0;
}
static inline void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
{
}
static inline bool kvmppc_get_fwnmi(void)
{
return false;
}
pseries: fix kvmppc_set_fwnmi() QEMU issues the ioctl(KVM_CAP_PPC_FWNMI) on the first vCPU. If the first vCPU is currently running, the vCPU mutex is held and the ioctl() cannot be done and waits until the mutex is released. This never happens and the VM is stuck. To avoid this deadlock, issue the ioctl on the same vCPU doing the RTAS call. The problem can be reproduced by booting a guest with several vCPUs (the probability to have the problem is (n - 1) / n, n = # of CPUs), and then by triggering a kernel crash with "echo c >/proc/sysrq-trigger". On the reboot, the kernel hangs after: ... [ 0.000000] ----------------------------------------------------- [ 0.000000] ppc64_pft_size = 0x0 [ 0.000000] phys_mem_size = 0x48000000 [ 0.000000] dcache_bsize = 0x80 [ 0.000000] icache_bsize = 0x80 [ 0.000000] cpu_features = 0x0001c06f8f4f91a7 [ 0.000000] possible = 0x0003fbffcf5fb1a7 [ 0.000000] always = 0x00000003800081a1 [ 0.000000] cpu_user_features = 0xdc0065c2 0xaee00000 [ 0.000000] mmu_features = 0x3c006041 [ 0.000000] firmware_features = 0x00000085455a445f [ 0.000000] physical_start = 0x8000000 [ 0.000000] ----------------------------------------------------- [ 0.000000] numa: NODE_DATA [mem 0x47f33c80-0x47f3ffff] Fixes: ec010c00665b ("ppc/spapr: KVM FWNMI should not be enabled until guest requests it") Cc: npiggin@gmail.com Signed-off-by: Laurent Vivier <lvivier@redhat.com> Message-Id: <20200724083533.281700-1-lvivier@redhat.com> Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2020-07-24 11:35:33 +03:00
static inline int kvmppc_set_fwnmi(PowerPCCPU *cpu)
{
return -1;
}
static inline int kvmppc_smt_threads(void)
{
return 1;
}
static inline void kvmppc_error_append_smt_possible_hint(Error *const *errp)
{
return;
}
static inline int kvmppc_set_smt_threads(int smt)
{
return 0;
}
static inline int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
{
return 0;
}
static inline int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
{
return 0;
}
static inline int kvmppc_set_tcr(PowerPCCPU *cpu)
{
return 0;
}
static inline int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
{
return -1;
}
static inline target_ulong kvmppc_configure_v3_mmu(PowerPCCPU *cpu,
bool radix, bool gtse,
uint64_t proc_tbl)
{
return 0;
}
static inline void kvmppc_set_reg_ppc_online(PowerPCCPU *cpu,
unsigned int online)
{
return;
}
static inline void kvmppc_set_reg_tb_offset(PowerPCCPU *cpu, int64_t tb_offset)
{
}
static inline bool kvmppc_spapr_use_multitce(void)
{
return false;
}
static inline int kvmppc_spapr_enable_inkernel_multitce(void)
{
return -1;
}
static inline void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t page_shift,
uint64_t bus_offset,
uint32_t nb_table,
int *pfd, bool need_vfio)
{
return NULL;
}
static inline int kvmppc_remove_spapr_tce(void *table, int pfd,
uint32_t nb_table)
{
return -1;
}
static inline int kvmppc_reset_htab(int shift_hint)
{
return 0;
}
static inline uint64_t kvmppc_vrma_limit(unsigned int hash_shift)
{
g_assert_not_reached();
}
static inline bool kvmppc_hpt_needs_host_contiguous_pages(void)
{
return false;
}
2018-04-16 09:19:52 +03:00
static inline void kvm_check_mmu(PowerPCCPU *cpu, Error **errp)
{
}
static inline bool kvmppc_has_cap_spapr_vfio(void)
{
return false;
}
static inline void kvmppc_read_hptes(ppc_hash_pte64_t *hptes,
hwaddr ptex, int n)
{
abort();
}
static inline void kvmppc_write_hpte(hwaddr ptex, uint64_t pte0, uint64_t pte1)
{
abort();
}
static inline bool kvmppc_has_cap_epr(void)
{
return false;
}
static inline int kvmppc_define_rtas_kernel_token(uint32_t token,
const char *function)
{
return -1;
}
static inline int kvmppc_get_htab_fd(bool write, uint64_t index, Error **errp)
{
return -1;
}
static inline int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize,
int64_t max_ns)
{
abort();
}
static inline int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
uint16_t n_valid, uint16_t n_invalid,
Error **errp)
{
abort();
}
static inline bool kvmppc_has_cap_fixup_hcalls(void)
{
abort();
}
static inline bool kvmppc_has_cap_htm(void)
{
return false;
}
static inline bool kvmppc_has_cap_mmu_radix(void)
{
return false;
}
static inline bool kvmppc_has_cap_mmu_hash_v3(void)
{
return false;
}
static inline bool kvmppc_has_cap_xive(void)
{
return false;
}
static inline int kvmppc_get_cap_safe_cache(void)
{
return 0;
}
static inline int kvmppc_get_cap_safe_bounds_check(void)
{
return 0;
}
static inline int kvmppc_get_cap_safe_indirect_branch(void)
{
return 0;
}
static inline int kvmppc_get_cap_count_cache_flush_assist(void)
{
return 0;
}
static inline bool kvmppc_has_cap_nested_kvm_hv(void)
{
return false;
}
static inline int kvmppc_set_cap_nested_kvm_hv(int enable)
{
return -1;
}
static inline int kvmppc_get_cap_large_decr(void)
{
return 0;
}
static inline int kvmppc_enable_cap_large_decr(PowerPCCPU *cpu, int enable)
{
return -1;
}
static inline int kvmppc_has_cap_rpt_invalidate(void)
{
return false;
}
spapr: Add SPAPR_CAP_AIL_MODE_3 for AIL mode 3 support for H_SET_MODE hcall The behaviour of the Address Translation Mode on Interrupt resource is not consistently supported by all CPU versions or all KVM versions: KVM HV does not support mode 2, and does not support mode 3 on POWER7 or early POWER9 processesors. KVM PR only supports mode 0. TCG supports all modes (0, 2, 3) on CPUs with support for the corresonding LPCR[AIL] mode. This leads to inconsistencies in guest behaviour and could cause problems migrating guests. This was not noticable for Linux guests for a long time because the kernel only uses modes 0 and 3, and it used to consider AIL-3 to be advisory in that it would always keep the AIL-0 vectors around, so it did not matter whether or not interrupts were delivered according to the AIL mode. Recent Linux guests depend on AIL mode 3 working as specified in order to support the SCV facility interrupt. If AIL-3 can not be provided, then H_SET_MODE must return an error to Linux so it can disable the SCV facility (failure to do so can lead to userspace being able to crash the guest kernel). Add the ail-mode-3 capability to specify that AIL-3 is supported. AIL-0 is implied as the baseline, and AIL-2 is no longer supported by spapr. AIL-2 is not known to be used by any software, but support in TCG could be restored with an ail-mode-2 capability quite easily if a regression is reported. Modify the H_SET_MODE Address Translation Mode on Interrupt resource handler to check capabilities and correctly return error if not supported. KVM has a cap to advertise support for AIL-3. Reviewed-by: David Gibson <david@gibson.dropbear.id.au> Signed-off-by: Nicholas Piggin <npiggin@gmail.com> Message-Id: <20230515160216.394612-1-npiggin@gmail.com> Signed-off-by: Daniel Henrique Barboza <danielhb413@gmail.com>
2023-05-15 19:02:16 +03:00
static inline bool kvmppc_supports_ail_3(void)
{
return false;
}
static inline int kvmppc_enable_hwrng(void)
{
return -1;
}
static inline int kvmppc_put_books_sregs(PowerPCCPU *cpu)
{
abort();
}
static inline PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void)
{
return NULL;
}
static inline void kvmppc_check_papr_resize_hpt(Error **errp)
{
return;
}
static inline int kvmppc_resize_hpt_prepare(PowerPCCPU *cpu,
target_ulong flags, int shift)
{
return -ENOSYS;
}
static inline int kvmppc_resize_hpt_commit(PowerPCCPU *cpu,
target_ulong flags, int shift)
{
return -ENOSYS;
}
static inline bool kvmppc_pvr_workaround_required(PowerPCCPU *cpu)
{
return false;
}
#define kvmppc_eieio() do { } while (0)
static inline void kvmppc_dcbst_range(PowerPCCPU *cpu, uint8_t *addr, int len)
{
}
static inline void kvmppc_icbi_range(PowerPCCPU *cpu, uint8_t *addr, int len)
{
}
#endif /* CONFIG_KVM */
#endif /* KVM_PPC_H */