2008-11-05 19:29:27 +03:00
|
|
|
/*
|
|
|
|
* QEMU KVM support
|
|
|
|
*
|
|
|
|
* Copyright IBM, Corp. 2008
|
2008-11-24 22:36:26 +03:00
|
|
|
* Red Hat, Inc. 2008
|
2008-11-05 19:29:27 +03:00
|
|
|
*
|
|
|
|
* Authors:
|
|
|
|
* Anthony Liguori <aliguori@us.ibm.com>
|
2008-11-24 22:36:26 +03:00
|
|
|
* Glauber Costa <gcosta@redhat.com>
|
2008-11-05 19:29:27 +03:00
|
|
|
*
|
|
|
|
* This work is licensed under the terms of the GNU GPL, version 2 or later.
|
|
|
|
* See the COPYING file in the top-level directory.
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
2016-01-29 20:50:05 +03:00
|
|
|
#include "qemu/osdep.h"
|
2008-11-05 19:29:27 +03:00
|
|
|
#include <sys/ioctl.h>
|
|
|
|
|
|
|
|
#include <linux/kvm.h>
|
|
|
|
|
|
|
|
#include "qemu-common.h"
|
2012-12-17 21:20:00 +04:00
|
|
|
#include "qemu/atomic.h"
|
|
|
|
#include "qemu/option.h"
|
|
|
|
#include "qemu/config-file.h"
|
2015-09-24 03:29:36 +03:00
|
|
|
#include "qemu/error-report.h"
|
2017-06-13 16:57:00 +03:00
|
|
|
#include "qapi/error.h"
|
2009-05-02 02:29:37 +04:00
|
|
|
#include "hw/hw.h"
|
2012-12-12 16:24:50 +04:00
|
|
|
#include "hw/pci/msi.h"
|
2016-07-14 08:56:30 +03:00
|
|
|
#include "hw/pci/msix.h"
|
2013-07-15 19:45:03 +04:00
|
|
|
#include "hw/s390x/adapter.h"
|
2012-12-17 21:19:49 +04:00
|
|
|
#include "exec/gdbstub.h"
|
2015-06-18 19:28:45 +03:00
|
|
|
#include "sysemu/kvm_int.h"
|
2017-03-03 14:01:16 +03:00
|
|
|
#include "sysemu/cpus.h"
|
2012-12-17 21:20:00 +04:00
|
|
|
#include "qemu/bswap.h"
|
2012-12-17 21:19:49 +04:00
|
|
|
#include "exec/memory.h"
|
2013-11-04 15:59:02 +04:00
|
|
|
#include "exec/ram_addr.h"
|
2012-12-17 21:19:49 +04:00
|
|
|
#include "exec/address-spaces.h"
|
2012-12-17 21:20:00 +04:00
|
|
|
#include "qemu/event_notifier.h"
|
2017-06-02 09:06:46 +03:00
|
|
|
#include "trace.h"
|
2015-07-06 21:15:13 +03:00
|
|
|
#include "hw/irq.h"
|
2018-03-08 15:48:44 +03:00
|
|
|
#include "sysemu/sev.h"
|
2008-11-05 19:29:27 +03:00
|
|
|
|
2013-12-23 19:40:40 +04:00
|
|
|
#include "hw/boards.h"
|
|
|
|
|
2011-01-10 14:50:05 +03:00
|
|
|
/* This check must be after config-host.h is included */
|
|
|
|
#ifdef CONFIG_EVENTFD
|
|
|
|
#include <sys/eventfd.h>
|
|
|
|
#endif
|
|
|
|
|
2015-11-10 03:23:42 +03:00
|
|
|
/* KVM uses PAGE_SIZE in its definition of KVM_COALESCED_MMIO_MAX. We
|
|
|
|
* need to use the real host PAGE_SIZE, as that's what KVM will use.
|
|
|
|
*/
|
|
|
|
#define PAGE_SIZE getpagesize()
|
2008-12-09 23:09:57 +03:00
|
|
|
|
2008-11-05 19:29:27 +03:00
|
|
|
//#define DEBUG_KVM
|
|
|
|
|
|
|
|
#ifdef DEBUG_KVM
|
2010-04-18 18:22:14 +04:00
|
|
|
#define DPRINTF(fmt, ...) \
|
2008-11-05 19:29:27 +03:00
|
|
|
do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
|
|
|
|
#else
|
2010-04-18 18:22:14 +04:00
|
|
|
#define DPRINTF(fmt, ...) \
|
2008-11-05 19:29:27 +03:00
|
|
|
do { } while (0)
|
|
|
|
#endif
|
|
|
|
|
2012-05-16 22:41:10 +04:00
|
|
|
#define KVM_MSI_HASHTAB_SIZE 256
|
|
|
|
|
2016-05-12 06:48:13 +03:00
|
|
|
struct KVMParkedVcpu {
|
|
|
|
unsigned long vcpu_id;
|
|
|
|
int kvm_fd;
|
|
|
|
QLIST_ENTRY(KVMParkedVcpu) node;
|
|
|
|
};
|
|
|
|
|
2014-10-10 14:23:35 +04:00
|
|
|
struct KVMState
|
2008-11-05 19:29:27 +03:00
|
|
|
{
|
2014-09-27 00:45:32 +04:00
|
|
|
AccelState parent_obj;
|
|
|
|
|
2013-11-22 23:12:44 +04:00
|
|
|
int nr_slots;
|
2008-11-05 19:29:27 +03:00
|
|
|
int fd;
|
|
|
|
int vmfd;
|
2008-12-09 23:09:57 +03:00
|
|
|
int coalesced_mmio;
|
2010-01-26 14:21:16 +03:00
|
|
|
struct kvm_coalesced_mmio_ring *coalesced_mmio_ring;
|
2011-10-18 21:43:12 +04:00
|
|
|
bool coalesced_flush_in_progress;
|
2009-11-25 02:33:03 +03:00
|
|
|
int vcpu_events;
|
2010-03-01 21:10:29 +03:00
|
|
|
int robust_singlestep;
|
2010-03-12 17:20:49 +03:00
|
|
|
int debugregs;
|
2009-03-12 23:12:48 +03:00
|
|
|
#ifdef KVM_CAP_SET_GUEST_DEBUG
|
|
|
|
struct kvm_sw_breakpoint_head kvm_sw_breakpoints;
|
|
|
|
#endif
|
2011-01-10 14:50:05 +03:00
|
|
|
int many_ioeventfds;
|
2012-08-27 10:28:39 +04:00
|
|
|
int intx_set_mask;
|
kvm: check KVM_CAP_SYNC_MMU with kvm_vm_check_extension()
On a server-class ppc host, this capability depends on the KVM type,
ie, HV or PR. If both KVM are present in the kernel, we will always
get the HV specific value, even if we explicitely requested PR on
the command line.
This can have an impact if we're using hugepages or a balloon device.
Since we've already created the VM at the time any user calls
kvm_has_sync_mmu(), switching to kvm_vm_check_extension() is
enough to fix any potential issue.
It is okay for the other archs that also implement KVM_CAP_SYNC_MMU,
ie, mips, s390, x86 and arm, because they don't depend on the VM being
created or not.
While here, let's cache the state of this extension in a bool variable,
since it has several users in the code, as suggested by Thomas Huth.
Signed-off-by: Greg Kurz <groug@kaod.org>
Message-Id: <150600965332.30533.14702405809647835716.stgit@bahia.lan>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-09-21 19:00:53 +03:00
|
|
|
bool sync_mmu;
|
kvm: Comparison with ioctl number macros needs to be unsigned
In kvm-all.c we store an ioctl cmd number in the irqchip_inject_ioctl field
of KVMState, which has type 'int'. This seems to make sense since the
ioctl() man page says that the cmd parameter has type int.
However, the kernel treats ioctl numbers as unsigned - sys_ioctl() takes an
unsigned int, and the macros which generate ioctl numbers expand to
unsigned expressions. Furthermore, some ioctls (IOC_READ ioctls on x86
and IOC_WRITE ioctls on powerpc) have bit 31 set, and so would be negative
if interpreted as an int. This has the surprising and compile-breaking
consequence that in kvm_irqchip_set_irq() where we do:
return (s->irqchip_inject_ioctl == KVM_IRQ_LINE) ? 1 : event.status;
We will get a "comparison is always false due to limited range of data
type" warning from gcc if KVM_IRQ_LINE is one of the bit-31-set ioctls,
which it is on powerpc.
So, despite the fact that the man page and posix say ioctl numbers are
signed, they're actually unsigned. The kernel uses unsigned, the glibc
header uses unsigned long, and FreeBSD, NetBSD and OSX also use unsigned
long ioctl numbers in the code.
Therefore, this patch changes the variable to be unsigned, fixing the
compile.
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Alexander Graf <agraf@suse.de>
2012-03-07 18:41:09 +04:00
|
|
|
/* The man page (and posix) say ioctl numbers are signed int, but
|
|
|
|
* they're not. Linux, glibc and *BSD all treat ioctl numbers as
|
|
|
|
* unsigned, and treating them as signed here can break things */
|
2012-08-24 15:34:47 +04:00
|
|
|
unsigned irq_set_ioctl;
|
2014-06-18 02:10:31 +04:00
|
|
|
unsigned int sigmask_len;
|
2015-07-06 21:15:13 +03:00
|
|
|
GHashTable *gsimap;
|
2011-10-15 13:49:47 +04:00
|
|
|
#ifdef KVM_CAP_IRQ_ROUTING
|
|
|
|
struct kvm_irq_routing *irq_routes;
|
|
|
|
int nr_allocated_irq_routes;
|
2016-03-06 04:57:25 +03:00
|
|
|
unsigned long *used_gsi_bitmap;
|
2012-05-16 22:41:08 +04:00
|
|
|
unsigned int gsi_count;
|
2012-05-16 22:41:10 +04:00
|
|
|
QTAILQ_HEAD(msi_hashtab, KVMMSIRoute) msi_hashtab[KVM_MSI_HASHTAB_SIZE];
|
2011-10-15 13:49:47 +04:00
|
|
|
#endif
|
2015-06-18 19:30:13 +03:00
|
|
|
KVMMemoryListener memory_listener;
|
2016-05-12 06:48:13 +03:00
|
|
|
QLIST_HEAD(, KVMParkedVcpu) kvm_parked_vcpus;
|
2018-03-08 15:48:44 +03:00
|
|
|
|
|
|
|
/* memory encryption */
|
|
|
|
void *memcrypt_handle;
|
2014-10-10 14:23:35 +04:00
|
|
|
};
|
2008-11-05 19:29:27 +03:00
|
|
|
|
2011-02-07 14:19:25 +03:00
|
|
|
KVMState *kvm_state;
|
2012-01-31 22:17:52 +04:00
|
|
|
bool kvm_kernel_irqchip;
|
2015-12-17 19:16:08 +03:00
|
|
|
bool kvm_split_irqchip;
|
2012-07-26 18:35:11 +04:00
|
|
|
bool kvm_async_interrupts_allowed;
|
2013-04-25 00:24:12 +04:00
|
|
|
bool kvm_halt_in_kernel_allowed;
|
2014-05-27 16:03:35 +04:00
|
|
|
bool kvm_eventfds_allowed;
|
2012-07-26 18:35:14 +04:00
|
|
|
bool kvm_irqfds_allowed;
|
2014-10-31 16:38:18 +03:00
|
|
|
bool kvm_resamplefds_allowed;
|
2012-07-26 18:35:15 +04:00
|
|
|
bool kvm_msi_via_irqfd_allowed;
|
2012-07-26 18:35:16 +04:00
|
|
|
bool kvm_gsi_routing_allowed;
|
2013-09-03 12:08:25 +04:00
|
|
|
bool kvm_gsi_direct_mapping;
|
2013-04-23 12:29:36 +04:00
|
|
|
bool kvm_allowed;
|
2013-05-29 12:27:25 +04:00
|
|
|
bool kvm_readonly_mem_allowed;
|
2015-03-12 15:53:49 +03:00
|
|
|
bool kvm_vm_attributes_allowed;
|
2015-10-15 16:44:50 +03:00
|
|
|
bool kvm_direct_msi_allowed;
|
2015-11-06 11:02:46 +03:00
|
|
|
bool kvm_ioeventfd_any_length_allowed;
|
2016-10-04 15:28:09 +03:00
|
|
|
bool kvm_msi_use_devid;
|
2017-02-08 15:52:50 +03:00
|
|
|
static bool kvm_immediate_exit;
|
2008-11-05 19:29:27 +03:00
|
|
|
|
2011-01-21 23:48:17 +03:00
|
|
|
static const KVMCapabilityInfo kvm_required_capabilites[] = {
|
|
|
|
KVM_CAP_INFO(USER_MEMORY),
|
|
|
|
KVM_CAP_INFO(DESTROY_MEMORY_REGION_WORKS),
|
2017-09-11 20:49:28 +03:00
|
|
|
KVM_CAP_INFO(JOIN_MEMORY_REGIONS_WORKS),
|
2011-01-21 23:48:17 +03:00
|
|
|
KVM_CAP_LAST_INFO
|
|
|
|
};
|
|
|
|
|
2016-06-01 12:51:24 +03:00
|
|
|
int kvm_get_max_memslots(void)
|
|
|
|
{
|
|
|
|
KVMState *s = KVM_STATE(current_machine->accelerator);
|
|
|
|
|
|
|
|
return s->nr_slots;
|
|
|
|
}
|
|
|
|
|
2018-03-08 15:48:44 +03:00
|
|
|
bool kvm_memcrypt_enabled(void)
|
|
|
|
{
|
|
|
|
if (kvm_state && kvm_state->memcrypt_handle) {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2015-06-18 19:30:13 +03:00
|
|
|
static KVMSlot *kvm_get_free_slot(KVMMemoryListener *kml)
|
2008-11-05 19:29:27 +03:00
|
|
|
{
|
2015-06-18 19:30:13 +03:00
|
|
|
KVMState *s = kvm_state;
|
2008-11-05 19:29:27 +03:00
|
|
|
int i;
|
|
|
|
|
2013-11-22 23:12:44 +04:00
|
|
|
for (i = 0; i < s->nr_slots; i++) {
|
2015-06-18 19:30:13 +03:00
|
|
|
if (kml->slots[i].memory_size == 0) {
|
|
|
|
return &kml->slots[i];
|
2011-01-04 11:32:13 +03:00
|
|
|
}
|
2008-11-05 19:29:27 +03:00
|
|
|
}
|
|
|
|
|
2014-10-31 19:38:32 +03:00
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool kvm_has_free_slot(MachineState *ms)
|
|
|
|
{
|
2015-06-18 19:30:13 +03:00
|
|
|
KVMState *s = KVM_STATE(ms->accelerator);
|
|
|
|
|
|
|
|
return kvm_get_free_slot(&s->memory_listener);
|
2014-10-31 19:38:32 +03:00
|
|
|
}
|
|
|
|
|
2015-06-18 19:30:13 +03:00
|
|
|
static KVMSlot *kvm_alloc_slot(KVMMemoryListener *kml)
|
2014-10-31 19:38:32 +03:00
|
|
|
{
|
2015-06-18 19:30:13 +03:00
|
|
|
KVMSlot *slot = kvm_get_free_slot(kml);
|
2014-10-31 19:38:32 +03:00
|
|
|
|
|
|
|
if (slot) {
|
|
|
|
return slot;
|
|
|
|
}
|
|
|
|
|
2009-04-17 18:26:29 +04:00
|
|
|
fprintf(stderr, "%s: no free slot available\n", __func__);
|
|
|
|
abort();
|
|
|
|
}
|
|
|
|
|
2015-06-18 19:30:13 +03:00
|
|
|
static KVMSlot *kvm_lookup_matching_slot(KVMMemoryListener *kml,
|
2012-10-23 14:30:10 +04:00
|
|
|
hwaddr start_addr,
|
2017-09-11 20:49:30 +03:00
|
|
|
hwaddr size)
|
2009-04-17 18:26:29 +04:00
|
|
|
{
|
2015-06-18 19:30:13 +03:00
|
|
|
KVMState *s = kvm_state;
|
2009-04-17 18:26:29 +04:00
|
|
|
int i;
|
|
|
|
|
2013-11-22 23:12:44 +04:00
|
|
|
for (i = 0; i < s->nr_slots; i++) {
|
2015-06-18 19:30:13 +03:00
|
|
|
KVMSlot *mem = &kml->slots[i];
|
2009-04-17 18:26:29 +04:00
|
|
|
|
2017-09-11 20:49:30 +03:00
|
|
|
if (start_addr == mem->start_addr && size == mem->memory_size) {
|
2009-04-17 18:26:29 +04:00
|
|
|
return mem;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2008-11-05 19:29:27 +03:00
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2017-09-11 20:49:29 +03:00
|
|
|
/*
|
|
|
|
* Calculate and align the start address and the size of the section.
|
|
|
|
* Return the size. If the size is 0, the aligned section is empty.
|
|
|
|
*/
|
|
|
|
static hwaddr kvm_align_section(MemoryRegionSection *section,
|
|
|
|
hwaddr *start)
|
|
|
|
{
|
|
|
|
hwaddr size = int128_get64(section->size);
|
2017-10-16 17:43:01 +03:00
|
|
|
hwaddr delta, aligned;
|
2017-09-11 20:49:29 +03:00
|
|
|
|
|
|
|
/* kvm works in page size chunks, but the function may be called
|
|
|
|
with sub-page size and unaligned start address. Pad the start
|
|
|
|
address to next and truncate size to previous page boundary. */
|
2017-10-16 17:43:01 +03:00
|
|
|
aligned = ROUND_UP(section->offset_within_address_space,
|
|
|
|
qemu_real_host_page_size);
|
|
|
|
delta = aligned - section->offset_within_address_space;
|
|
|
|
*start = aligned;
|
2017-09-11 20:49:29 +03:00
|
|
|
if (delta > size) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2017-10-16 17:43:01 +03:00
|
|
|
return (size - delta) & qemu_real_host_page_mask;
|
2017-09-11 20:49:29 +03:00
|
|
|
}
|
|
|
|
|
2011-12-15 21:55:26 +04:00
|
|
|
int kvm_physical_memory_addr_from_host(KVMState *s, void *ram,
|
2012-10-23 14:30:10 +04:00
|
|
|
hwaddr *phys_addr)
|
2010-10-11 22:31:20 +04:00
|
|
|
{
|
2015-06-18 19:30:13 +03:00
|
|
|
KVMMemoryListener *kml = &s->memory_listener;
|
2010-10-11 22:31:20 +04:00
|
|
|
int i;
|
|
|
|
|
2013-11-22 23:12:44 +04:00
|
|
|
for (i = 0; i < s->nr_slots; i++) {
|
2015-06-18 19:30:13 +03:00
|
|
|
KVMSlot *mem = &kml->slots[i];
|
2010-10-11 22:31:20 +04:00
|
|
|
|
2011-12-15 21:55:26 +04:00
|
|
|
if (ram >= mem->ram && ram < mem->ram + mem->memory_size) {
|
|
|
|
*phys_addr = mem->start_addr + (ram - mem->ram);
|
2010-10-11 22:31:20 +04:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2015-06-18 19:30:13 +03:00
|
|
|
static int kvm_set_user_memory_region(KVMMemoryListener *kml, KVMSlot *slot)
|
2008-11-24 22:36:26 +03:00
|
|
|
{
|
2015-06-18 19:30:13 +03:00
|
|
|
KVMState *s = kvm_state;
|
2008-11-24 22:36:26 +03:00
|
|
|
struct kvm_userspace_memory_region mem;
|
2017-12-15 08:23:26 +03:00
|
|
|
int ret;
|
2008-11-24 22:36:26 +03:00
|
|
|
|
2015-06-18 19:30:14 +03:00
|
|
|
mem.slot = slot->slot | (kml->as_id << 16);
|
2008-11-24 22:36:26 +03:00
|
|
|
mem.guest_phys_addr = slot->start_addr;
|
2011-12-15 21:55:26 +04:00
|
|
|
mem.userspace_addr = (unsigned long)slot->ram;
|
2008-11-24 22:36:26 +03:00
|
|
|
mem.flags = slot->flags;
|
2013-05-31 12:52:18 +04:00
|
|
|
|
|
|
|
if (slot->memory_size && mem.flags & KVM_MEM_READONLY) {
|
2013-05-29 12:27:26 +04:00
|
|
|
/* Set the slot size to 0 before setting the slot to the desired
|
|
|
|
* value. This is needed based on KVM commit 75d61fbc. */
|
|
|
|
mem.memory_size = 0;
|
|
|
|
kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, &mem);
|
|
|
|
}
|
|
|
|
mem.memory_size = slot->memory_size;
|
2017-12-15 08:23:26 +03:00
|
|
|
ret = kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, &mem);
|
|
|
|
trace_kvm_set_user_memory(mem.slot, mem.flags, mem.guest_phys_addr,
|
|
|
|
mem.memory_size, mem.userspace_addr, ret);
|
|
|
|
return ret;
|
2008-11-24 22:36:26 +03:00
|
|
|
}
|
|
|
|
|
2016-05-12 06:48:13 +03:00
|
|
|
int kvm_destroy_vcpu(CPUState *cpu)
|
|
|
|
{
|
|
|
|
KVMState *s = kvm_state;
|
|
|
|
long mmap_size;
|
|
|
|
struct KVMParkedVcpu *vcpu = NULL;
|
|
|
|
int ret = 0;
|
|
|
|
|
|
|
|
DPRINTF("kvm_destroy_vcpu\n");
|
|
|
|
|
|
|
|
mmap_size = kvm_ioctl(s, KVM_GET_VCPU_MMAP_SIZE, 0);
|
|
|
|
if (mmap_size < 0) {
|
|
|
|
ret = mmap_size;
|
|
|
|
DPRINTF("KVM_GET_VCPU_MMAP_SIZE failed\n");
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
|
|
|
ret = munmap(cpu->kvm_run, mmap_size);
|
|
|
|
if (ret < 0) {
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
|
|
|
vcpu = g_malloc0(sizeof(*vcpu));
|
|
|
|
vcpu->vcpu_id = kvm_arch_vcpu_id(cpu);
|
|
|
|
vcpu->kvm_fd = cpu->kvm_fd;
|
|
|
|
QLIST_INSERT_HEAD(&kvm_state->kvm_parked_vcpus, vcpu, node);
|
|
|
|
err:
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int kvm_get_vcpu(KVMState *s, unsigned long vcpu_id)
|
|
|
|
{
|
|
|
|
struct KVMParkedVcpu *cpu;
|
|
|
|
|
|
|
|
QLIST_FOREACH(cpu, &s->kvm_parked_vcpus, node) {
|
|
|
|
if (cpu->vcpu_id == vcpu_id) {
|
|
|
|
int kvm_fd;
|
|
|
|
|
|
|
|
QLIST_REMOVE(cpu, node);
|
|
|
|
kvm_fd = cpu->kvm_fd;
|
|
|
|
g_free(cpu);
|
|
|
|
return kvm_fd;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return kvm_vm_ioctl(s, KVM_CREATE_VCPU, (void *)vcpu_id);
|
|
|
|
}
|
|
|
|
|
2012-12-17 09:38:45 +04:00
|
|
|
int kvm_init_vcpu(CPUState *cpu)
|
2008-11-05 19:29:27 +03:00
|
|
|
{
|
|
|
|
KVMState *s = kvm_state;
|
|
|
|
long mmap_size;
|
|
|
|
int ret;
|
|
|
|
|
2010-04-18 18:22:14 +04:00
|
|
|
DPRINTF("kvm_init_vcpu\n");
|
2008-11-05 19:29:27 +03:00
|
|
|
|
2016-05-12 06:48:13 +03:00
|
|
|
ret = kvm_get_vcpu(s, kvm_arch_vcpu_id(cpu));
|
2008-11-05 19:29:27 +03:00
|
|
|
if (ret < 0) {
|
2010-04-18 18:22:14 +04:00
|
|
|
DPRINTF("kvm_create_vcpu failed\n");
|
2008-11-05 19:29:27 +03:00
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
2012-10-31 08:29:00 +04:00
|
|
|
cpu->kvm_fd = ret;
|
2012-12-01 08:35:08 +04:00
|
|
|
cpu->kvm_state = s;
|
2017-06-18 22:11:01 +03:00
|
|
|
cpu->vcpu_dirty = true;
|
2008-11-05 19:29:27 +03:00
|
|
|
|
|
|
|
mmap_size = kvm_ioctl(s, KVM_GET_VCPU_MMAP_SIZE, 0);
|
|
|
|
if (mmap_size < 0) {
|
2011-02-02 00:15:48 +03:00
|
|
|
ret = mmap_size;
|
2010-04-18 18:22:14 +04:00
|
|
|
DPRINTF("KVM_GET_VCPU_MMAP_SIZE failed\n");
|
2008-11-05 19:29:27 +03:00
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
2012-12-01 09:18:14 +04:00
|
|
|
cpu->kvm_run = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED,
|
2012-10-31 08:29:00 +04:00
|
|
|
cpu->kvm_fd, 0);
|
2012-12-01 09:18:14 +04:00
|
|
|
if (cpu->kvm_run == MAP_FAILED) {
|
2008-11-05 19:29:27 +03:00
|
|
|
ret = -errno;
|
2010-04-18 18:22:14 +04:00
|
|
|
DPRINTF("mmap'ing vcpu state failed\n");
|
2008-11-05 19:29:27 +03:00
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
2011-01-04 11:32:13 +03:00
|
|
|
if (s->coalesced_mmio && !s->coalesced_mmio_ring) {
|
|
|
|
s->coalesced_mmio_ring =
|
2012-12-01 09:18:14 +04:00
|
|
|
(void *)cpu->kvm_run + s->coalesced_mmio * PAGE_SIZE;
|
2011-01-04 11:32:13 +03:00
|
|
|
}
|
2010-01-26 14:21:16 +03:00
|
|
|
|
2012-10-31 09:57:49 +04:00
|
|
|
ret = kvm_arch_init_vcpu(cpu);
|
2008-11-05 19:29:27 +03:00
|
|
|
err:
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2008-11-24 22:36:26 +03:00
|
|
|
/*
|
|
|
|
* dirty pages logging control
|
|
|
|
*/
|
2011-04-06 23:09:54 +04:00
|
|
|
|
2015-06-18 19:28:43 +03:00
|
|
|
static int kvm_mem_flags(MemoryRegion *mr)
|
2011-04-06 23:09:54 +04:00
|
|
|
{
|
2015-06-18 19:28:43 +03:00
|
|
|
bool readonly = mr->readonly || memory_region_is_romd(mr);
|
2013-05-29 12:27:26 +04:00
|
|
|
int flags = 0;
|
2015-06-18 19:28:43 +03:00
|
|
|
|
|
|
|
if (memory_region_get_dirty_log_mask(mr) != 0) {
|
|
|
|
flags |= KVM_MEM_LOG_DIRTY_PAGES;
|
|
|
|
}
|
2013-05-29 12:27:26 +04:00
|
|
|
if (readonly && kvm_readonly_mem_allowed) {
|
|
|
|
flags |= KVM_MEM_READONLY;
|
|
|
|
}
|
|
|
|
return flags;
|
2011-04-06 23:09:54 +04:00
|
|
|
}
|
|
|
|
|
2015-06-18 19:30:13 +03:00
|
|
|
static int kvm_slot_update_flags(KVMMemoryListener *kml, KVMSlot *mem,
|
|
|
|
MemoryRegion *mr)
|
2008-11-24 22:36:26 +03:00
|
|
|
{
|
2009-05-01 22:52:46 +04:00
|
|
|
int old_flags;
|
|
|
|
|
|
|
|
old_flags = mem->flags;
|
2015-06-18 19:28:43 +03:00
|
|
|
mem->flags = kvm_mem_flags(mr);
|
2008-11-24 22:36:26 +03:00
|
|
|
|
2009-05-01 22:52:46 +04:00
|
|
|
/* If nothing changed effectively, no need to issue ioctl */
|
2015-06-18 19:28:43 +03:00
|
|
|
if (mem->flags == old_flags) {
|
2011-04-06 23:09:54 +04:00
|
|
|
return 0;
|
2009-05-01 22:52:46 +04:00
|
|
|
}
|
|
|
|
|
2015-06-18 19:30:13 +03:00
|
|
|
return kvm_set_user_memory_region(kml, mem);
|
2008-11-24 22:36:26 +03:00
|
|
|
}
|
|
|
|
|
2015-06-18 19:30:13 +03:00
|
|
|
static int kvm_section_update_flags(KVMMemoryListener *kml,
|
|
|
|
MemoryRegionSection *section)
|
2011-04-06 23:09:54 +04:00
|
|
|
{
|
2017-09-11 20:49:32 +03:00
|
|
|
hwaddr start_addr, size;
|
|
|
|
KVMSlot *mem;
|
2011-04-06 23:09:54 +04:00
|
|
|
|
2017-09-11 20:49:32 +03:00
|
|
|
size = kvm_align_section(section, &start_addr);
|
|
|
|
if (!size) {
|
2015-04-27 15:51:31 +03:00
|
|
|
return 0;
|
2011-04-06 23:09:54 +04:00
|
|
|
}
|
2017-09-11 20:49:32 +03:00
|
|
|
|
|
|
|
mem = kvm_lookup_matching_slot(kml, start_addr, size);
|
|
|
|
if (!mem) {
|
2017-10-16 17:42:58 +03:00
|
|
|
/* We don't have a slot if we want to trap every access. */
|
|
|
|
return 0;
|
2017-09-11 20:49:32 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
return kvm_slot_update_flags(kml, mem, section->mr);
|
2011-04-06 23:09:54 +04:00
|
|
|
}
|
|
|
|
|
2011-12-18 16:06:05 +04:00
|
|
|
static void kvm_log_start(MemoryListener *listener,
|
2015-04-25 15:38:30 +03:00
|
|
|
MemoryRegionSection *section,
|
|
|
|
int old, int new)
|
2008-11-24 22:36:26 +03:00
|
|
|
{
|
2015-06-18 19:30:13 +03:00
|
|
|
KVMMemoryListener *kml = container_of(listener, KVMMemoryListener, listener);
|
2011-12-18 16:06:05 +04:00
|
|
|
int r;
|
|
|
|
|
2015-04-25 15:38:30 +03:00
|
|
|
if (old != 0) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2015-06-18 19:30:13 +03:00
|
|
|
r = kvm_section_update_flags(kml, section);
|
2011-12-18 16:06:05 +04:00
|
|
|
if (r < 0) {
|
|
|
|
abort();
|
|
|
|
}
|
2008-11-24 22:36:26 +03:00
|
|
|
}
|
|
|
|
|
2011-12-18 16:06:05 +04:00
|
|
|
static void kvm_log_stop(MemoryListener *listener,
|
2015-04-25 15:38:30 +03:00
|
|
|
MemoryRegionSection *section,
|
|
|
|
int old, int new)
|
2008-11-24 22:36:26 +03:00
|
|
|
{
|
2015-06-18 19:30:13 +03:00
|
|
|
KVMMemoryListener *kml = container_of(listener, KVMMemoryListener, listener);
|
2011-12-18 16:06:05 +04:00
|
|
|
int r;
|
|
|
|
|
2015-04-25 15:38:30 +03:00
|
|
|
if (new != 0) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2015-06-18 19:30:13 +03:00
|
|
|
r = kvm_section_update_flags(kml, section);
|
2011-12-18 16:06:05 +04:00
|
|
|
if (r < 0) {
|
|
|
|
abort();
|
|
|
|
}
|
2008-11-24 22:36:26 +03:00
|
|
|
}
|
|
|
|
|
2010-04-23 21:04:14 +04:00
|
|
|
/* get kvm's dirty pages bitmap and update qemu's */
|
2011-12-19 15:18:13 +04:00
|
|
|
static int kvm_get_dirty_pages_log_range(MemoryRegionSection *section,
|
|
|
|
unsigned long *bitmap)
|
2009-07-27 14:49:56 +04:00
|
|
|
{
|
2016-03-01 09:18:21 +03:00
|
|
|
ram_addr_t start = section->offset_within_region +
|
|
|
|
memory_region_get_ram_addr(section->mr);
|
2013-11-05 18:52:54 +04:00
|
|
|
ram_addr_t pages = int128_get64(section->size) / getpagesize();
|
|
|
|
|
|
|
|
cpu_physical_memory_set_dirty_lebitmap(bitmap, start, pages);
|
2010-04-23 21:04:14 +04:00
|
|
|
return 0;
|
2009-07-27 14:49:56 +04:00
|
|
|
}
|
|
|
|
|
2010-04-23 21:04:14 +04:00
|
|
|
#define ALIGN(x, y) (((x)+(y)-1) & ~((y)-1))
|
|
|
|
|
2008-11-24 22:36:26 +03:00
|
|
|
/**
|
|
|
|
* kvm_physical_sync_dirty_bitmap - Grab dirty bitmap from kernel space
|
2011-10-16 20:04:59 +04:00
|
|
|
* This function updates qemu's dirty bitmap using
|
|
|
|
* memory_region_set_dirty(). This means all bits are set
|
|
|
|
* to dirty.
|
2008-11-24 22:36:26 +03:00
|
|
|
*
|
2009-04-17 18:26:29 +04:00
|
|
|
* @start_add: start of logged region.
|
2008-11-24 22:36:26 +03:00
|
|
|
* @end_addr: end of logged region.
|
|
|
|
*/
|
2015-06-18 19:30:13 +03:00
|
|
|
static int kvm_physical_sync_dirty_bitmap(KVMMemoryListener *kml,
|
|
|
|
MemoryRegionSection *section)
|
2008-11-24 22:36:26 +03:00
|
|
|
{
|
|
|
|
KVMState *s = kvm_state;
|
2015-06-18 19:28:44 +03:00
|
|
|
struct kvm_dirty_log d = {};
|
2009-05-01 22:52:47 +04:00
|
|
|
KVMSlot *mem;
|
2017-09-11 20:49:33 +03:00
|
|
|
hwaddr start_addr, size;
|
|
|
|
|
|
|
|
size = kvm_align_section(section, &start_addr);
|
|
|
|
if (size) {
|
|
|
|
mem = kvm_lookup_matching_slot(kml, start_addr, size);
|
|
|
|
if (!mem) {
|
2017-10-16 17:42:58 +03:00
|
|
|
/* We don't have a slot if we want to trap every access. */
|
|
|
|
return 0;
|
2009-05-01 22:52:47 +04:00
|
|
|
}
|
2008-11-24 22:36:26 +03:00
|
|
|
|
fix crash in migration, 32-bit userspace on 64-bit host
This change fixes a long-standing immediate crash (memory corruption
and abort in glibc malloc code) in migration on 32bits.
The bug is present since this commit:
commit 692d9aca97b865b0f7903565274a52606910f129
Author: Bruce Rogers <brogers@novell.com>
Date: Wed Sep 23 16:13:18 2009 -0600
qemu-kvm: allocate correct size for dirty bitmap
The dirty bitmap copied out to userspace is stored in a long array,
and gets copied out to userspace accordingly. This patch accounts
for that correctly. Currently I'm seeing kvm crashing due to writing
beyond the end of the alloc'd dirty bitmap memory, because the buffer
has the wrong size.
Signed-off-by: Bruce Rogers
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
--- a/qemu-kvm.c
+++ b/qemu-kvm.c
@@ int kvm_get_dirty_pages_range(kvm_context_t kvm, unsigned long phys_addr,
- buf = qemu_malloc((slots[i].len / 4096 + 7) / 8 + 2);
+ buf = qemu_malloc(BITMAP_SIZE(slots[i].len));
r = kvm_get_map(kvm, KVM_GET_DIRTY_LOG, i, buf);
BITMAP_SIZE is now open-coded in that function, like this:
size = ALIGN(((mem->memory_size) >> TARGET_PAGE_BITS), HOST_LONG_BITS) / 8;
The problem is that HOST_LONG_BITS in 32bit userspace is 32
but it's 64 in 64bit kernel. So userspace aligns this to
32, and kernel to 64, but since no length is passed from
userspace to kernel on ioctl, kernel uses its size calculation
and copies 4 extra bytes to userspace, corrupting memory.
Here's how it looks like during migrate execution:
our=20, kern=24
our=4, kern=8
...
our=4, kern=8
our=4064, kern=4064
our=512, kern=512
our=4, kern=8
our=20, kern=24
our=4, kern=8
...
our=4, kern=8
our=4064, kern=4064
*** glibc detected *** ./x86_64-softmmu/qemu-system-x86_64: realloc(): invalid next size: 0x08f20528 ***
(our is userspace size above, kern is the size as calculated
by the kernel).
Fix this by always aligning to 64 in a hope that no platform will
have sizeof(long)>8 any time soon, and add a comment describing it
all. It's a small price to pay for bad kernel design.
Alternatively it's possible to fix that in the kernel by using
different size calculation depending on the current process.
But this becomes quite ugly.
Special thanks goes to Stefan Hajnoczi for spotting the fundamental
cause of the issue, and to Alexander Graf for his support in #qemu.
Signed-off-by: Michael Tokarev <mjt@tls.msk.ru>
CC: Bruce Rogers <brogers@novell.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
2011-04-26 20:13:49 +04:00
|
|
|
/* XXX bad kernel interface alert
|
|
|
|
* For dirty bitmap, kernel allocates array of size aligned to
|
|
|
|
* bits-per-long. But for case when the kernel is 64bits and
|
|
|
|
* the userspace is 32bits, userspace can't align to the same
|
|
|
|
* bits-per-long, since sizeof(long) is different between kernel
|
|
|
|
* and user space. This way, userspace will provide buffer which
|
|
|
|
* may be 4 bytes less than the kernel will use, resulting in
|
|
|
|
* userspace memory corruption (which is not detectable by valgrind
|
|
|
|
* too, in most cases).
|
|
|
|
* So for now, let's align to 64 instead of HOST_LONG_BITS here, in
|
2016-03-23 17:59:57 +03:00
|
|
|
* a hope that sizeof(long) won't become >8 any time soon.
|
fix crash in migration, 32-bit userspace on 64-bit host
This change fixes a long-standing immediate crash (memory corruption
and abort in glibc malloc code) in migration on 32bits.
The bug is present since this commit:
commit 692d9aca97b865b0f7903565274a52606910f129
Author: Bruce Rogers <brogers@novell.com>
Date: Wed Sep 23 16:13:18 2009 -0600
qemu-kvm: allocate correct size for dirty bitmap
The dirty bitmap copied out to userspace is stored in a long array,
and gets copied out to userspace accordingly. This patch accounts
for that correctly. Currently I'm seeing kvm crashing due to writing
beyond the end of the alloc'd dirty bitmap memory, because the buffer
has the wrong size.
Signed-off-by: Bruce Rogers
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
--- a/qemu-kvm.c
+++ b/qemu-kvm.c
@@ int kvm_get_dirty_pages_range(kvm_context_t kvm, unsigned long phys_addr,
- buf = qemu_malloc((slots[i].len / 4096 + 7) / 8 + 2);
+ buf = qemu_malloc(BITMAP_SIZE(slots[i].len));
r = kvm_get_map(kvm, KVM_GET_DIRTY_LOG, i, buf);
BITMAP_SIZE is now open-coded in that function, like this:
size = ALIGN(((mem->memory_size) >> TARGET_PAGE_BITS), HOST_LONG_BITS) / 8;
The problem is that HOST_LONG_BITS in 32bit userspace is 32
but it's 64 in 64bit kernel. So userspace aligns this to
32, and kernel to 64, but since no length is passed from
userspace to kernel on ioctl, kernel uses its size calculation
and copies 4 extra bytes to userspace, corrupting memory.
Here's how it looks like during migrate execution:
our=20, kern=24
our=4, kern=8
...
our=4, kern=8
our=4064, kern=4064
our=512, kern=512
our=4, kern=8
our=20, kern=24
our=4, kern=8
...
our=4, kern=8
our=4064, kern=4064
*** glibc detected *** ./x86_64-softmmu/qemu-system-x86_64: realloc(): invalid next size: 0x08f20528 ***
(our is userspace size above, kern is the size as calculated
by the kernel).
Fix this by always aligning to 64 in a hope that no platform will
have sizeof(long)>8 any time soon, and add a comment describing it
all. It's a small price to pay for bad kernel design.
Alternatively it's possible to fix that in the kernel by using
different size calculation depending on the current process.
But this becomes quite ugly.
Special thanks goes to Stefan Hajnoczi for spotting the fundamental
cause of the issue, and to Alexander Graf for his support in #qemu.
Signed-off-by: Michael Tokarev <mjt@tls.msk.ru>
CC: Bruce Rogers <brogers@novell.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
2011-04-26 20:13:49 +04:00
|
|
|
*/
|
|
|
|
size = ALIGN(((mem->memory_size) >> TARGET_PAGE_BITS),
|
|
|
|
/*HOST_LONG_BITS*/ 64) / 8;
|
2017-09-11 20:49:33 +03:00
|
|
|
d.dirty_bitmap = g_malloc0(size);
|
2008-11-24 22:36:26 +03:00
|
|
|
|
2015-06-18 19:30:14 +03:00
|
|
|
d.slot = mem->slot | (kml->as_id << 16);
|
2014-04-14 16:14:04 +04:00
|
|
|
if (kvm_vm_ioctl(s, KVM_GET_DIRTY_LOG, &d) == -1) {
|
2010-04-18 18:22:14 +04:00
|
|
|
DPRINTF("ioctl failed %d\n", errno);
|
2017-09-11 20:49:33 +03:00
|
|
|
g_free(d.dirty_bitmap);
|
|
|
|
return -1;
|
2009-05-01 22:52:47 +04:00
|
|
|
}
|
2008-11-24 22:36:26 +03:00
|
|
|
|
2011-12-19 15:18:13 +04:00
|
|
|
kvm_get_dirty_pages_log_range(section, d.dirty_bitmap);
|
2017-09-11 20:49:33 +03:00
|
|
|
g_free(d.dirty_bitmap);
|
2008-11-24 22:36:26 +03:00
|
|
|
}
|
2009-05-01 22:52:47 +04:00
|
|
|
|
2017-09-11 20:49:33 +03:00
|
|
|
return 0;
|
2008-11-24 22:36:26 +03:00
|
|
|
}
|
|
|
|
|
2012-10-02 20:21:54 +04:00
|
|
|
static void kvm_coalesce_mmio_region(MemoryListener *listener,
|
|
|
|
MemoryRegionSection *secion,
|
2012-10-23 14:30:10 +04:00
|
|
|
hwaddr start, hwaddr size)
|
2008-12-09 23:09:57 +03:00
|
|
|
{
|
|
|
|
KVMState *s = kvm_state;
|
|
|
|
|
|
|
|
if (s->coalesced_mmio) {
|
|
|
|
struct kvm_coalesced_mmio_zone zone;
|
|
|
|
|
|
|
|
zone.addr = start;
|
|
|
|
zone.size = size;
|
2012-02-29 19:54:29 +04:00
|
|
|
zone.pad = 0;
|
2008-12-09 23:09:57 +03:00
|
|
|
|
2012-10-02 20:21:54 +04:00
|
|
|
(void)kvm_vm_ioctl(s, KVM_REGISTER_COALESCED_MMIO, &zone);
|
2008-12-09 23:09:57 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2012-10-02 20:21:54 +04:00
|
|
|
static void kvm_uncoalesce_mmio_region(MemoryListener *listener,
|
|
|
|
MemoryRegionSection *secion,
|
2012-10-23 14:30:10 +04:00
|
|
|
hwaddr start, hwaddr size)
|
2008-12-09 23:09:57 +03:00
|
|
|
{
|
|
|
|
KVMState *s = kvm_state;
|
|
|
|
|
|
|
|
if (s->coalesced_mmio) {
|
|
|
|
struct kvm_coalesced_mmio_zone zone;
|
|
|
|
|
|
|
|
zone.addr = start;
|
|
|
|
zone.size = size;
|
2012-02-29 19:54:29 +04:00
|
|
|
zone.pad = 0;
|
2008-12-09 23:09:57 +03:00
|
|
|
|
2012-10-02 20:21:54 +04:00
|
|
|
(void)kvm_vm_ioctl(s, KVM_UNREGISTER_COALESCED_MMIO, &zone);
|
2008-12-09 23:09:57 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2009-05-09 00:33:24 +04:00
|
|
|
int kvm_check_extension(KVMState *s, unsigned int extension)
|
|
|
|
{
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
ret = kvm_ioctl(s, KVM_CHECK_EXTENSION, extension);
|
|
|
|
if (ret < 0) {
|
|
|
|
ret = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2014-07-14 21:15:15 +04:00
|
|
|
int kvm_vm_check_extension(KVMState *s, unsigned int extension)
|
|
|
|
{
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
ret = kvm_vm_ioctl(s, KVM_CHECK_EXTENSION, extension);
|
|
|
|
if (ret < 0) {
|
|
|
|
/* VM wide version not implemented, use global one instead */
|
|
|
|
ret = kvm_check_extension(s, extension);
|
|
|
|
}
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2015-03-14 00:23:37 +03:00
|
|
|
static uint32_t adjust_ioeventfd_endianness(uint32_t val, uint32_t size)
|
|
|
|
{
|
|
|
|
#if defined(HOST_WORDS_BIGENDIAN) != defined(TARGET_WORDS_BIGENDIAN)
|
|
|
|
/* The kernel expects ioeventfd values in HOST_WORDS_BIGENDIAN
|
|
|
|
* endianness, but the memory core hands them in target endianness.
|
|
|
|
* For example, PPC is always treated as big-endian even if running
|
|
|
|
* on KVM and on PPC64LE. Correct here.
|
|
|
|
*/
|
|
|
|
switch (size) {
|
|
|
|
case 2:
|
|
|
|
val = bswap16(val);
|
|
|
|
break;
|
|
|
|
case 4:
|
|
|
|
val = bswap32(val);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
return val;
|
|
|
|
}
|
|
|
|
|
2014-01-10 11:20:18 +04:00
|
|
|
static int kvm_set_ioeventfd_mmio(int fd, hwaddr addr, uint32_t val,
|
2013-04-02 17:52:25 +04:00
|
|
|
bool assign, uint32_t size, bool datamatch)
|
2013-04-02 01:05:21 +04:00
|
|
|
{
|
|
|
|
int ret;
|
2015-04-27 19:59:04 +03:00
|
|
|
struct kvm_ioeventfd iofd = {
|
|
|
|
.datamatch = datamatch ? adjust_ioeventfd_endianness(val, size) : 0,
|
|
|
|
.addr = addr,
|
|
|
|
.len = size,
|
|
|
|
.flags = 0,
|
|
|
|
.fd = fd,
|
|
|
|
};
|
2013-04-02 01:05:21 +04:00
|
|
|
|
|
|
|
if (!kvm_enabled()) {
|
|
|
|
return -ENOSYS;
|
|
|
|
}
|
|
|
|
|
2013-04-02 17:52:25 +04:00
|
|
|
if (datamatch) {
|
|
|
|
iofd.flags |= KVM_IOEVENTFD_FLAG_DATAMATCH;
|
|
|
|
}
|
2013-04-02 01:05:21 +04:00
|
|
|
if (!assign) {
|
|
|
|
iofd.flags |= KVM_IOEVENTFD_FLAG_DEASSIGN;
|
|
|
|
}
|
|
|
|
|
|
|
|
ret = kvm_vm_ioctl(kvm_state, KVM_IOEVENTFD, &iofd);
|
|
|
|
|
|
|
|
if (ret < 0) {
|
|
|
|
return -errno;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2013-04-02 01:54:45 +04:00
|
|
|
static int kvm_set_ioeventfd_pio(int fd, uint16_t addr, uint16_t val,
|
2013-04-02 17:52:25 +04:00
|
|
|
bool assign, uint32_t size, bool datamatch)
|
2013-04-02 01:05:21 +04:00
|
|
|
{
|
|
|
|
struct kvm_ioeventfd kick = {
|
2015-03-14 00:23:37 +03:00
|
|
|
.datamatch = datamatch ? adjust_ioeventfd_endianness(val, size) : 0,
|
2013-04-02 01:05:21 +04:00
|
|
|
.addr = addr,
|
2013-04-02 17:52:25 +04:00
|
|
|
.flags = KVM_IOEVENTFD_FLAG_PIO,
|
2013-04-02 01:54:45 +04:00
|
|
|
.len = size,
|
2013-04-02 01:05:21 +04:00
|
|
|
.fd = fd,
|
|
|
|
};
|
|
|
|
int r;
|
|
|
|
if (!kvm_enabled()) {
|
|
|
|
return -ENOSYS;
|
|
|
|
}
|
2013-04-02 17:52:25 +04:00
|
|
|
if (datamatch) {
|
|
|
|
kick.flags |= KVM_IOEVENTFD_FLAG_DATAMATCH;
|
|
|
|
}
|
2013-04-02 01:05:21 +04:00
|
|
|
if (!assign) {
|
|
|
|
kick.flags |= KVM_IOEVENTFD_FLAG_DEASSIGN;
|
|
|
|
}
|
|
|
|
r = kvm_vm_ioctl(kvm_state, KVM_IOEVENTFD, &kick);
|
|
|
|
if (r < 0) {
|
|
|
|
return r;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2011-01-10 14:50:05 +03:00
|
|
|
static int kvm_check_many_ioeventfds(void)
|
|
|
|
{
|
2011-01-25 19:17:14 +03:00
|
|
|
/* Userspace can use ioeventfd for io notification. This requires a host
|
|
|
|
* that supports eventfd(2) and an I/O thread; since eventfd does not
|
|
|
|
* support SIGIO it cannot interrupt the vcpu.
|
|
|
|
*
|
|
|
|
* Older kernels have a 6 device limit on the KVM io bus. Find out so we
|
2011-01-10 14:50:05 +03:00
|
|
|
* can avoid creating too many ioeventfds.
|
|
|
|
*/
|
2011-08-22 17:24:58 +04:00
|
|
|
#if defined(CONFIG_EVENTFD)
|
2011-01-10 14:50:05 +03:00
|
|
|
int ioeventfds[7];
|
|
|
|
int i, ret = 0;
|
|
|
|
for (i = 0; i < ARRAY_SIZE(ioeventfds); i++) {
|
|
|
|
ioeventfds[i] = eventfd(0, EFD_CLOEXEC);
|
|
|
|
if (ioeventfds[i] < 0) {
|
|
|
|
break;
|
|
|
|
}
|
2013-04-02 17:52:25 +04:00
|
|
|
ret = kvm_set_ioeventfd_pio(ioeventfds[i], 0, i, true, 2, true);
|
2011-01-10 14:50:05 +03:00
|
|
|
if (ret < 0) {
|
|
|
|
close(ioeventfds[i]);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Decide whether many devices are supported or not */
|
|
|
|
ret = i == ARRAY_SIZE(ioeventfds);
|
|
|
|
|
|
|
|
while (i-- > 0) {
|
2013-04-02 17:52:25 +04:00
|
|
|
kvm_set_ioeventfd_pio(ioeventfds[i], 0, i, false, 2, true);
|
2011-01-10 14:50:05 +03:00
|
|
|
close(ioeventfds[i]);
|
|
|
|
}
|
|
|
|
return ret;
|
|
|
|
#else
|
|
|
|
return 0;
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
2011-01-21 23:48:17 +03:00
|
|
|
static const KVMCapabilityInfo *
|
|
|
|
kvm_check_extension_list(KVMState *s, const KVMCapabilityInfo *list)
|
|
|
|
{
|
|
|
|
while (list->name) {
|
|
|
|
if (!kvm_check_extension(s, list->value)) {
|
|
|
|
return list;
|
|
|
|
}
|
|
|
|
list++;
|
|
|
|
}
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2015-06-18 19:30:13 +03:00
|
|
|
static void kvm_set_phys_mem(KVMMemoryListener *kml,
|
|
|
|
MemoryRegionSection *section, bool add)
|
2010-01-27 23:07:08 +03:00
|
|
|
{
|
2017-09-11 20:49:31 +03:00
|
|
|
KVMSlot *mem;
|
2010-01-27 23:07:08 +03:00
|
|
|
int err;
|
2011-12-18 16:06:05 +04:00
|
|
|
MemoryRegion *mr = section->mr;
|
2013-05-29 12:27:26 +04:00
|
|
|
bool writeable = !mr->readonly && !mr->rom_device;
|
2017-09-11 20:49:29 +03:00
|
|
|
hwaddr start_addr, size;
|
|
|
|
void *ram;
|
2010-01-27 23:07:08 +03:00
|
|
|
|
2011-12-18 16:06:05 +04:00
|
|
|
if (!memory_region_is_ram(mr)) {
|
2013-05-29 12:27:26 +04:00
|
|
|
if (writeable || !kvm_readonly_mem_allowed) {
|
|
|
|
return;
|
|
|
|
} else if (!mr->romd_mode) {
|
|
|
|
/* If the memory device is not in romd_mode, then we actually want
|
|
|
|
* to remove the kvm memory slot so all accesses will trap. */
|
|
|
|
add = false;
|
|
|
|
}
|
2011-12-15 21:55:26 +04:00
|
|
|
}
|
|
|
|
|
2017-09-11 20:49:29 +03:00
|
|
|
size = kvm_align_section(section, &start_addr);
|
|
|
|
if (!size) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2017-10-16 17:42:57 +03:00
|
|
|
/* use aligned delta to align the ram address */
|
2017-09-11 20:49:29 +03:00
|
|
|
ram = memory_region_get_ram_ptr(mr) + section->offset_within_region +
|
2017-10-16 17:42:57 +03:00
|
|
|
(start_addr - section->offset_within_address_space);
|
2011-12-18 16:06:05 +04:00
|
|
|
|
2017-09-11 20:49:31 +03:00
|
|
|
if (!add) {
|
2017-10-16 17:43:00 +03:00
|
|
|
mem = kvm_lookup_matching_slot(kml, start_addr, size);
|
2010-01-27 23:07:08 +03:00
|
|
|
if (!mem) {
|
|
|
|
return;
|
|
|
|
}
|
2015-03-23 12:57:21 +03:00
|
|
|
if (mem->flags & KVM_MEM_LOG_DIRTY_PAGES) {
|
2015-06-18 19:30:13 +03:00
|
|
|
kvm_physical_sync_dirty_bitmap(kml, section);
|
2012-01-15 18:13:59 +04:00
|
|
|
}
|
|
|
|
|
2017-09-11 20:49:31 +03:00
|
|
|
/* unregister the slot */
|
2010-01-27 23:07:08 +03:00
|
|
|
mem->memory_size = 0;
|
2015-06-18 19:30:13 +03:00
|
|
|
err = kvm_set_user_memory_region(kml, mem);
|
2010-01-27 23:07:08 +03:00
|
|
|
if (err) {
|
2017-10-16 17:42:59 +03:00
|
|
|
fprintf(stderr, "%s: error unregistering slot: %s\n",
|
2010-01-27 23:07:08 +03:00
|
|
|
__func__, strerror(-err));
|
|
|
|
abort();
|
|
|
|
}
|
2017-09-11 20:49:31 +03:00
|
|
|
return;
|
2010-01-27 23:07:08 +03:00
|
|
|
}
|
|
|
|
|
2017-09-11 20:49:31 +03:00
|
|
|
/* register the new slot */
|
2015-06-18 19:30:13 +03:00
|
|
|
mem = kvm_alloc_slot(kml);
|
2010-01-27 23:07:08 +03:00
|
|
|
mem->memory_size = size;
|
|
|
|
mem->start_addr = start_addr;
|
2011-12-15 21:55:26 +04:00
|
|
|
mem->ram = ram;
|
2015-06-18 19:28:43 +03:00
|
|
|
mem->flags = kvm_mem_flags(mr);
|
2010-01-27 23:07:08 +03:00
|
|
|
|
2015-06-18 19:30:13 +03:00
|
|
|
err = kvm_set_user_memory_region(kml, mem);
|
2010-01-27 23:07:08 +03:00
|
|
|
if (err) {
|
|
|
|
fprintf(stderr, "%s: error registering slot: %s\n", __func__,
|
|
|
|
strerror(-err));
|
|
|
|
abort();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2011-12-18 16:06:05 +04:00
|
|
|
static void kvm_region_add(MemoryListener *listener,
|
|
|
|
MemoryRegionSection *section)
|
|
|
|
{
|
2015-06-18 19:30:13 +03:00
|
|
|
KVMMemoryListener *kml = container_of(listener, KVMMemoryListener, listener);
|
|
|
|
|
2013-05-06 12:46:11 +04:00
|
|
|
memory_region_ref(section->mr);
|
2015-06-18 19:30:13 +03:00
|
|
|
kvm_set_phys_mem(kml, section, true);
|
2011-12-18 16:06:05 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
static void kvm_region_del(MemoryListener *listener,
|
|
|
|
MemoryRegionSection *section)
|
|
|
|
{
|
2015-06-18 19:30:13 +03:00
|
|
|
KVMMemoryListener *kml = container_of(listener, KVMMemoryListener, listener);
|
|
|
|
|
|
|
|
kvm_set_phys_mem(kml, section, false);
|
2013-05-06 12:46:11 +04:00
|
|
|
memory_region_unref(section->mr);
|
2011-12-18 16:06:05 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
static void kvm_log_sync(MemoryListener *listener,
|
|
|
|
MemoryRegionSection *section)
|
2010-01-27 23:07:21 +03:00
|
|
|
{
|
2015-06-18 19:30:13 +03:00
|
|
|
KVMMemoryListener *kml = container_of(listener, KVMMemoryListener, listener);
|
2011-12-18 16:06:05 +04:00
|
|
|
int r;
|
|
|
|
|
2015-06-18 19:30:13 +03:00
|
|
|
r = kvm_physical_sync_dirty_bitmap(kml, section);
|
2011-12-18 16:06:05 +04:00
|
|
|
if (r < 0) {
|
|
|
|
abort();
|
|
|
|
}
|
2010-01-27 23:07:21 +03:00
|
|
|
}
|
|
|
|
|
2012-10-01 00:21:11 +04:00
|
|
|
static void kvm_mem_ioeventfd_add(MemoryListener *listener,
|
|
|
|
MemoryRegionSection *section,
|
|
|
|
bool match_data, uint64_t data,
|
|
|
|
EventNotifier *e)
|
|
|
|
{
|
|
|
|
int fd = event_notifier_get_fd(e);
|
2012-02-08 18:39:06 +04:00
|
|
|
int r;
|
|
|
|
|
2012-03-20 16:31:38 +04:00
|
|
|
r = kvm_set_ioeventfd_mmio(fd, section->offset_within_address_space,
|
2013-05-27 12:08:27 +04:00
|
|
|
data, true, int128_get64(section->size),
|
|
|
|
match_data);
|
2012-02-08 18:39:06 +04:00
|
|
|
if (r < 0) {
|
2013-05-22 08:57:35 +04:00
|
|
|
fprintf(stderr, "%s: error adding ioeventfd: %s\n",
|
|
|
|
__func__, strerror(-r));
|
2012-02-08 18:39:06 +04:00
|
|
|
abort();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2012-10-01 00:21:11 +04:00
|
|
|
static void kvm_mem_ioeventfd_del(MemoryListener *listener,
|
|
|
|
MemoryRegionSection *section,
|
|
|
|
bool match_data, uint64_t data,
|
|
|
|
EventNotifier *e)
|
2012-02-08 18:39:06 +04:00
|
|
|
{
|
2012-10-01 00:21:11 +04:00
|
|
|
int fd = event_notifier_get_fd(e);
|
2012-02-08 18:39:06 +04:00
|
|
|
int r;
|
|
|
|
|
2012-03-20 16:31:38 +04:00
|
|
|
r = kvm_set_ioeventfd_mmio(fd, section->offset_within_address_space,
|
2013-05-27 12:08:27 +04:00
|
|
|
data, false, int128_get64(section->size),
|
|
|
|
match_data);
|
2012-02-08 18:39:06 +04:00
|
|
|
if (r < 0) {
|
|
|
|
abort();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2012-10-01 00:21:11 +04:00
|
|
|
static void kvm_io_ioeventfd_add(MemoryListener *listener,
|
|
|
|
MemoryRegionSection *section,
|
|
|
|
bool match_data, uint64_t data,
|
|
|
|
EventNotifier *e)
|
2012-02-08 18:39:06 +04:00
|
|
|
{
|
2012-10-01 00:21:11 +04:00
|
|
|
int fd = event_notifier_get_fd(e);
|
2012-02-08 18:39:06 +04:00
|
|
|
int r;
|
|
|
|
|
2013-04-02 01:54:45 +04:00
|
|
|
r = kvm_set_ioeventfd_pio(fd, section->offset_within_address_space,
|
2013-05-27 12:08:27 +04:00
|
|
|
data, true, int128_get64(section->size),
|
|
|
|
match_data);
|
2012-02-08 18:39:06 +04:00
|
|
|
if (r < 0) {
|
2013-05-22 08:57:35 +04:00
|
|
|
fprintf(stderr, "%s: error adding ioeventfd: %s\n",
|
|
|
|
__func__, strerror(-r));
|
2012-02-08 18:39:06 +04:00
|
|
|
abort();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2012-10-01 00:21:11 +04:00
|
|
|
static void kvm_io_ioeventfd_del(MemoryListener *listener,
|
|
|
|
MemoryRegionSection *section,
|
|
|
|
bool match_data, uint64_t data,
|
|
|
|
EventNotifier *e)
|
2012-02-08 18:39:06 +04:00
|
|
|
|
|
|
|
{
|
2012-10-01 00:21:11 +04:00
|
|
|
int fd = event_notifier_get_fd(e);
|
2012-02-08 18:39:06 +04:00
|
|
|
int r;
|
|
|
|
|
2013-04-02 01:54:45 +04:00
|
|
|
r = kvm_set_ioeventfd_pio(fd, section->offset_within_address_space,
|
2013-05-27 12:08:27 +04:00
|
|
|
data, false, int128_get64(section->size),
|
|
|
|
match_data);
|
2012-02-08 18:39:06 +04:00
|
|
|
if (r < 0) {
|
|
|
|
abort();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-06-18 19:30:14 +03:00
|
|
|
void kvm_memory_listener_register(KVMState *s, KVMMemoryListener *kml,
|
|
|
|
AddressSpace *as, int as_id)
|
2015-06-18 19:30:13 +03:00
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
|
|
|
kml->slots = g_malloc0(s->nr_slots * sizeof(KVMSlot));
|
2015-06-18 19:30:14 +03:00
|
|
|
kml->as_id = as_id;
|
2015-06-18 19:30:13 +03:00
|
|
|
|
|
|
|
for (i = 0; i < s->nr_slots; i++) {
|
|
|
|
kml->slots[i].slot = i;
|
|
|
|
}
|
|
|
|
|
|
|
|
kml->listener.region_add = kvm_region_add;
|
|
|
|
kml->listener.region_del = kvm_region_del;
|
|
|
|
kml->listener.log_start = kvm_log_start;
|
|
|
|
kml->listener.log_stop = kvm_log_stop;
|
|
|
|
kml->listener.log_sync = kvm_log_sync;
|
|
|
|
kml->listener.priority = 10;
|
|
|
|
|
|
|
|
memory_listener_register(&kml->listener, as);
|
|
|
|
}
|
2012-10-01 00:21:11 +04:00
|
|
|
|
|
|
|
static MemoryListener kvm_io_listener = {
|
|
|
|
.eventfd_add = kvm_io_ioeventfd_add,
|
|
|
|
.eventfd_del = kvm_io_ioeventfd_del,
|
2012-02-08 17:05:50 +04:00
|
|
|
.priority = 10,
|
2010-01-27 23:07:21 +03:00
|
|
|
};
|
|
|
|
|
2012-07-26 18:35:12 +04:00
|
|
|
int kvm_set_irq(KVMState *s, int irq, int level)
|
2011-10-15 13:49:47 +04:00
|
|
|
{
|
|
|
|
struct kvm_irq_level event;
|
|
|
|
int ret;
|
|
|
|
|
2012-07-26 18:35:11 +04:00
|
|
|
assert(kvm_async_interrupts_enabled());
|
2011-10-15 13:49:47 +04:00
|
|
|
|
|
|
|
event.level = level;
|
|
|
|
event.irq = irq;
|
2012-08-24 15:34:47 +04:00
|
|
|
ret = kvm_vm_ioctl(s, s->irq_set_ioctl, &event);
|
2011-10-15 13:49:47 +04:00
|
|
|
if (ret < 0) {
|
2012-07-26 18:35:12 +04:00
|
|
|
perror("kvm_set_irq");
|
2011-10-15 13:49:47 +04:00
|
|
|
abort();
|
|
|
|
}
|
|
|
|
|
2012-08-24 15:34:47 +04:00
|
|
|
return (s->irq_set_ioctl == KVM_IRQ_LINE) ? 1 : event.status;
|
2011-10-15 13:49:47 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
#ifdef KVM_CAP_IRQ_ROUTING
|
2012-06-05 23:03:57 +04:00
|
|
|
typedef struct KVMMSIRoute {
|
|
|
|
struct kvm_irq_routing_entry kroute;
|
|
|
|
QTAILQ_ENTRY(KVMMSIRoute) entry;
|
|
|
|
} KVMMSIRoute;
|
|
|
|
|
2011-10-15 13:49:47 +04:00
|
|
|
static void set_gsi(KVMState *s, unsigned int gsi)
|
|
|
|
{
|
2016-03-06 04:57:25 +03:00
|
|
|
set_bit(gsi, s->used_gsi_bitmap);
|
2011-10-15 13:49:47 +04:00
|
|
|
}
|
|
|
|
|
2012-05-16 22:41:10 +04:00
|
|
|
static void clear_gsi(KVMState *s, unsigned int gsi)
|
|
|
|
{
|
2016-03-06 04:57:25 +03:00
|
|
|
clear_bit(gsi, s->used_gsi_bitmap);
|
2012-05-16 22:41:10 +04:00
|
|
|
}
|
|
|
|
|
2013-04-16 17:58:13 +04:00
|
|
|
void kvm_init_irq_routing(KVMState *s)
|
2011-10-15 13:49:47 +04:00
|
|
|
{
|
2012-05-16 22:41:10 +04:00
|
|
|
int gsi_count, i;
|
2011-10-15 13:49:47 +04:00
|
|
|
|
2014-06-06 16:46:05 +04:00
|
|
|
gsi_count = kvm_check_extension(s, KVM_CAP_IRQ_ROUTING) - 1;
|
2011-10-15 13:49:47 +04:00
|
|
|
if (gsi_count > 0) {
|
|
|
|
/* Round up so we can search ints using ffs */
|
2016-03-06 04:57:25 +03:00
|
|
|
s->used_gsi_bitmap = bitmap_new(gsi_count);
|
2012-05-16 22:41:08 +04:00
|
|
|
s->gsi_count = gsi_count;
|
2011-10-15 13:49:47 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
s->irq_routes = g_malloc0(sizeof(*s->irq_routes));
|
|
|
|
s->nr_allocated_irq_routes = 0;
|
|
|
|
|
2015-10-15 16:44:50 +03:00
|
|
|
if (!kvm_direct_msi_allowed) {
|
2012-05-16 22:41:14 +04:00
|
|
|
for (i = 0; i < KVM_MSI_HASHTAB_SIZE; i++) {
|
|
|
|
QTAILQ_INIT(&s->msi_hashtab[i]);
|
|
|
|
}
|
2012-05-16 22:41:10 +04:00
|
|
|
}
|
|
|
|
|
2011-10-15 13:49:47 +04:00
|
|
|
kvm_arch_init_irq_routing(s);
|
|
|
|
}
|
|
|
|
|
2013-04-17 03:11:55 +04:00
|
|
|
void kvm_irqchip_commit_routes(KVMState *s)
|
2012-05-17 17:32:35 +04:00
|
|
|
{
|
|
|
|
int ret;
|
|
|
|
|
2016-08-03 06:07:21 +03:00
|
|
|
if (kvm_gsi_direct_mapping()) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!kvm_gsi_routing_enabled()) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2012-05-17 17:32:35 +04:00
|
|
|
s->irq_routes->flags = 0;
|
2016-07-14 08:56:35 +03:00
|
|
|
trace_kvm_irqchip_commit_routes();
|
2012-05-17 17:32:35 +04:00
|
|
|
ret = kvm_vm_ioctl(s, KVM_SET_GSI_ROUTING, s->irq_routes);
|
|
|
|
assert(ret == 0);
|
|
|
|
}
|
|
|
|
|
2011-10-15 13:49:47 +04:00
|
|
|
static void kvm_add_routing_entry(KVMState *s,
|
|
|
|
struct kvm_irq_routing_entry *entry)
|
|
|
|
{
|
|
|
|
struct kvm_irq_routing_entry *new;
|
|
|
|
int n, size;
|
|
|
|
|
|
|
|
if (s->irq_routes->nr == s->nr_allocated_irq_routes) {
|
|
|
|
n = s->nr_allocated_irq_routes * 2;
|
|
|
|
if (n < 64) {
|
|
|
|
n = 64;
|
|
|
|
}
|
|
|
|
size = sizeof(struct kvm_irq_routing);
|
|
|
|
size += n * sizeof(*new);
|
|
|
|
s->irq_routes = g_realloc(s->irq_routes, size);
|
|
|
|
s->nr_allocated_irq_routes = n;
|
|
|
|
}
|
|
|
|
n = s->irq_routes->nr++;
|
|
|
|
new = &s->irq_routes->entries[n];
|
2013-06-04 15:52:32 +04:00
|
|
|
|
|
|
|
*new = *entry;
|
2011-10-15 13:49:47 +04:00
|
|
|
|
|
|
|
set_gsi(s, entry->gsi);
|
|
|
|
}
|
|
|
|
|
2012-08-27 10:28:38 +04:00
|
|
|
static int kvm_update_routing_entry(KVMState *s,
|
|
|
|
struct kvm_irq_routing_entry *new_entry)
|
|
|
|
{
|
|
|
|
struct kvm_irq_routing_entry *entry;
|
|
|
|
int n;
|
|
|
|
|
|
|
|
for (n = 0; n < s->irq_routes->nr; n++) {
|
|
|
|
entry = &s->irq_routes->entries[n];
|
|
|
|
if (entry->gsi != new_entry->gsi) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2013-06-04 15:52:35 +04:00
|
|
|
if(!memcmp(entry, new_entry, sizeof *entry)) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2013-06-04 15:52:32 +04:00
|
|
|
*entry = *new_entry;
|
2012-08-27 10:28:38 +04:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
return -ESRCH;
|
|
|
|
}
|
|
|
|
|
2012-05-17 17:32:32 +04:00
|
|
|
void kvm_irqchip_add_irq_route(KVMState *s, int irq, int irqchip, int pin)
|
2011-10-15 13:49:47 +04:00
|
|
|
{
|
2013-06-04 15:52:32 +04:00
|
|
|
struct kvm_irq_routing_entry e = {};
|
2011-10-15 13:49:47 +04:00
|
|
|
|
2012-05-16 22:41:08 +04:00
|
|
|
assert(pin < s->gsi_count);
|
|
|
|
|
2011-10-15 13:49:47 +04:00
|
|
|
e.gsi = irq;
|
|
|
|
e.type = KVM_IRQ_ROUTING_IRQCHIP;
|
|
|
|
e.flags = 0;
|
|
|
|
e.u.irqchip.irqchip = irqchip;
|
|
|
|
e.u.irqchip.pin = pin;
|
|
|
|
kvm_add_routing_entry(s, &e);
|
|
|
|
}
|
|
|
|
|
2012-05-17 17:32:34 +04:00
|
|
|
void kvm_irqchip_release_virq(KVMState *s, int virq)
|
2012-05-16 22:41:10 +04:00
|
|
|
{
|
|
|
|
struct kvm_irq_routing_entry *e;
|
|
|
|
int i;
|
|
|
|
|
2013-09-03 12:08:25 +04:00
|
|
|
if (kvm_gsi_direct_mapping()) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2012-05-16 22:41:10 +04:00
|
|
|
for (i = 0; i < s->irq_routes->nr; i++) {
|
|
|
|
e = &s->irq_routes->entries[i];
|
|
|
|
if (e->gsi == virq) {
|
|
|
|
s->irq_routes->nr--;
|
|
|
|
*e = s->irq_routes->entries[s->irq_routes->nr];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
clear_gsi(s, virq);
|
2016-07-14 08:56:31 +03:00
|
|
|
kvm_arch_release_virq_post(virq);
|
2017-05-09 09:00:42 +03:00
|
|
|
trace_kvm_irqchip_release_virq(virq);
|
2012-05-16 22:41:10 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
static unsigned int kvm_hash_msi(uint32_t data)
|
|
|
|
{
|
|
|
|
/* This is optimized for IA32 MSI layout. However, no other arch shall
|
|
|
|
* repeat the mistake of not providing a direct MSI injection API. */
|
|
|
|
return data & 0xff;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void kvm_flush_dynamic_msi_routes(KVMState *s)
|
|
|
|
{
|
|
|
|
KVMMSIRoute *route, *next;
|
|
|
|
unsigned int hash;
|
|
|
|
|
|
|
|
for (hash = 0; hash < KVM_MSI_HASHTAB_SIZE; hash++) {
|
|
|
|
QTAILQ_FOREACH_SAFE(route, &s->msi_hashtab[hash], entry, next) {
|
|
|
|
kvm_irqchip_release_virq(s, route->kroute.gsi);
|
|
|
|
QTAILQ_REMOVE(&s->msi_hashtab[hash], route, entry);
|
|
|
|
g_free(route);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static int kvm_irqchip_get_virq(KVMState *s)
|
|
|
|
{
|
2016-03-06 04:57:25 +03:00
|
|
|
int next_virq;
|
2012-05-16 22:41:10 +04:00
|
|
|
|
Fix irq route entries exceeding KVM_MAX_IRQ_ROUTES
Last month, we experienced several guests crash(6cores-8cores), qemu logs
display the following messages:
qemu-system-x86_64: /build/qemu-2.1.2/kvm-all.c:976:
kvm_irqchip_commit_routes: Assertion `ret == 0' failed.
After analysis and verification, we can confirm it's irq-balance
daemon(in guest) leads to the assertion failure. Start a 8 core guest with
two disks, execute the following scripts will reproduce the BUG quickly:
irq_affinity.sh
========================================================================
vda_irq_num=25
vdb_irq_num=27
while [ 1 ]
do
for irq in {1,2,4,8,10,20,40,80}
do
echo $irq > /proc/irq/$vda_irq_num/smp_affinity
echo $irq > /proc/irq/$vdb_irq_num/smp_affinity
dd if=/dev/vda of=/dev/zero bs=4K count=100 iflag=direct
dd if=/dev/vdb of=/dev/zero bs=4K count=100 iflag=direct
done
done
========================================================================
QEMU setup static irq route entries in kvm_pc_setup_irq_routing(), PIC and
IOAPIC share the first 15 GSI numbers, take up 23 GSI numbers, but take up
38 irq route entries. When change irq smp_affinity in guest, a dynamic route
entry may be setup, the current logic is: if allocate GSI number succeeds,
a new route entry can be added. The available dynamic GSI numbers is
1021(KVM_MAX_IRQ_ROUTES-23), but available irq route entries is only
986(KVM_MAX_IRQ_ROUTES-38), GSI numbers greater than route entries.
irq-balance's behavior will eventually leads to total irq route entries
exceed KVM_MAX_IRQ_ROUTES, ioctl(KVM_SET_GSI_ROUTING) fail and
kvm_irqchip_commit_routes() trigger assertion failure.
This patch fix the BUG.
Signed-off-by: Wenshuang Ma <kevinnma@tencent.com>
Cc: qemu-stable@nongnu.org
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2015-07-01 16:41:41 +03:00
|
|
|
/*
|
|
|
|
* PIC and IOAPIC share the first 16 GSI numbers, thus the available
|
|
|
|
* GSI numbers are more than the number of IRQ route. Allocating a GSI
|
|
|
|
* number can succeed even though a new route entry cannot be added.
|
|
|
|
* When this happens, flush dynamic MSI entries to free IRQ route entries.
|
|
|
|
*/
|
2015-10-15 16:44:50 +03:00
|
|
|
if (!kvm_direct_msi_allowed && s->irq_routes->nr == s->gsi_count) {
|
Fix irq route entries exceeding KVM_MAX_IRQ_ROUTES
Last month, we experienced several guests crash(6cores-8cores), qemu logs
display the following messages:
qemu-system-x86_64: /build/qemu-2.1.2/kvm-all.c:976:
kvm_irqchip_commit_routes: Assertion `ret == 0' failed.
After analysis and verification, we can confirm it's irq-balance
daemon(in guest) leads to the assertion failure. Start a 8 core guest with
two disks, execute the following scripts will reproduce the BUG quickly:
irq_affinity.sh
========================================================================
vda_irq_num=25
vdb_irq_num=27
while [ 1 ]
do
for irq in {1,2,4,8,10,20,40,80}
do
echo $irq > /proc/irq/$vda_irq_num/smp_affinity
echo $irq > /proc/irq/$vdb_irq_num/smp_affinity
dd if=/dev/vda of=/dev/zero bs=4K count=100 iflag=direct
dd if=/dev/vdb of=/dev/zero bs=4K count=100 iflag=direct
done
done
========================================================================
QEMU setup static irq route entries in kvm_pc_setup_irq_routing(), PIC and
IOAPIC share the first 15 GSI numbers, take up 23 GSI numbers, but take up
38 irq route entries. When change irq smp_affinity in guest, a dynamic route
entry may be setup, the current logic is: if allocate GSI number succeeds,
a new route entry can be added. The available dynamic GSI numbers is
1021(KVM_MAX_IRQ_ROUTES-23), but available irq route entries is only
986(KVM_MAX_IRQ_ROUTES-38), GSI numbers greater than route entries.
irq-balance's behavior will eventually leads to total irq route entries
exceed KVM_MAX_IRQ_ROUTES, ioctl(KVM_SET_GSI_ROUTING) fail and
kvm_irqchip_commit_routes() trigger assertion failure.
This patch fix the BUG.
Signed-off-by: Wenshuang Ma <kevinnma@tencent.com>
Cc: qemu-stable@nongnu.org
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2015-07-01 16:41:41 +03:00
|
|
|
kvm_flush_dynamic_msi_routes(s);
|
|
|
|
}
|
|
|
|
|
2012-05-16 22:41:10 +04:00
|
|
|
/* Return the lowest unused GSI in the bitmap */
|
2016-03-06 04:57:25 +03:00
|
|
|
next_virq = find_first_zero_bit(s->used_gsi_bitmap, s->gsi_count);
|
|
|
|
if (next_virq >= s->gsi_count) {
|
|
|
|
return -ENOSPC;
|
|
|
|
} else {
|
|
|
|
return next_virq;
|
2012-05-16 22:41:10 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static KVMMSIRoute *kvm_lookup_msi_route(KVMState *s, MSIMessage msg)
|
|
|
|
{
|
|
|
|
unsigned int hash = kvm_hash_msi(msg.data);
|
|
|
|
KVMMSIRoute *route;
|
|
|
|
|
|
|
|
QTAILQ_FOREACH(route, &s->msi_hashtab[hash], entry) {
|
|
|
|
if (route->kroute.u.msi.address_lo == (uint32_t)msg.address &&
|
|
|
|
route->kroute.u.msi.address_hi == (msg.address >> 32) &&
|
2013-04-16 17:05:22 +04:00
|
|
|
route->kroute.u.msi.data == le32_to_cpu(msg.data)) {
|
2012-05-16 22:41:10 +04:00
|
|
|
return route;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
int kvm_irqchip_send_msi(KVMState *s, MSIMessage msg)
|
|
|
|
{
|
2012-05-16 22:41:14 +04:00
|
|
|
struct kvm_msi msi;
|
2012-05-16 22:41:10 +04:00
|
|
|
KVMMSIRoute *route;
|
|
|
|
|
2015-10-15 16:44:50 +03:00
|
|
|
if (kvm_direct_msi_allowed) {
|
2012-05-16 22:41:14 +04:00
|
|
|
msi.address_lo = (uint32_t)msg.address;
|
|
|
|
msi.address_hi = msg.address >> 32;
|
2013-04-16 17:05:22 +04:00
|
|
|
msi.data = le32_to_cpu(msg.data);
|
2012-05-16 22:41:14 +04:00
|
|
|
msi.flags = 0;
|
|
|
|
memset(msi.pad, 0, sizeof(msi.pad));
|
|
|
|
|
|
|
|
return kvm_vm_ioctl(s, KVM_SIGNAL_MSI, &msi);
|
|
|
|
}
|
|
|
|
|
2012-05-16 22:41:10 +04:00
|
|
|
route = kvm_lookup_msi_route(s, msg);
|
|
|
|
if (!route) {
|
2012-05-17 17:32:35 +04:00
|
|
|
int virq;
|
2012-05-16 22:41:10 +04:00
|
|
|
|
|
|
|
virq = kvm_irqchip_get_virq(s);
|
|
|
|
if (virq < 0) {
|
|
|
|
return virq;
|
|
|
|
}
|
|
|
|
|
2013-06-04 15:52:32 +04:00
|
|
|
route = g_malloc0(sizeof(KVMMSIRoute));
|
2012-05-16 22:41:10 +04:00
|
|
|
route->kroute.gsi = virq;
|
|
|
|
route->kroute.type = KVM_IRQ_ROUTING_MSI;
|
|
|
|
route->kroute.flags = 0;
|
|
|
|
route->kroute.u.msi.address_lo = (uint32_t)msg.address;
|
|
|
|
route->kroute.u.msi.address_hi = msg.address >> 32;
|
2013-04-16 17:05:22 +04:00
|
|
|
route->kroute.u.msi.data = le32_to_cpu(msg.data);
|
2012-05-16 22:41:10 +04:00
|
|
|
|
|
|
|
kvm_add_routing_entry(s, &route->kroute);
|
2013-04-17 03:11:55 +04:00
|
|
|
kvm_irqchip_commit_routes(s);
|
2012-05-16 22:41:10 +04:00
|
|
|
|
|
|
|
QTAILQ_INSERT_TAIL(&s->msi_hashtab[kvm_hash_msi(msg.data)], route,
|
|
|
|
entry);
|
|
|
|
}
|
|
|
|
|
|
|
|
assert(route->kroute.type == KVM_IRQ_ROUTING_MSI);
|
|
|
|
|
2012-07-26 18:35:12 +04:00
|
|
|
return kvm_set_irq(s, route->kroute.gsi, 1);
|
2012-05-16 22:41:10 +04:00
|
|
|
}
|
|
|
|
|
2016-07-14 08:56:30 +03:00
|
|
|
int kvm_irqchip_add_msi_route(KVMState *s, int vector, PCIDevice *dev)
|
2012-05-17 17:32:33 +04:00
|
|
|
{
|
2013-06-04 15:52:32 +04:00
|
|
|
struct kvm_irq_routing_entry kroute = {};
|
2012-05-17 17:32:33 +04:00
|
|
|
int virq;
|
2016-07-14 08:56:30 +03:00
|
|
|
MSIMessage msg = {0, 0};
|
|
|
|
|
2017-07-07 12:45:26 +03:00
|
|
|
if (pci_available && dev) {
|
2016-07-14 08:56:32 +03:00
|
|
|
msg = pci_get_msi_message(dev, vector);
|
2016-07-14 08:56:30 +03:00
|
|
|
}
|
2012-05-17 17:32:33 +04:00
|
|
|
|
2013-09-03 12:08:25 +04:00
|
|
|
if (kvm_gsi_direct_mapping()) {
|
2015-06-02 16:56:23 +03:00
|
|
|
return kvm_arch_msi_data_to_gsi(msg.data);
|
2013-09-03 12:08:25 +04:00
|
|
|
}
|
|
|
|
|
2012-07-26 18:35:16 +04:00
|
|
|
if (!kvm_gsi_routing_enabled()) {
|
2012-05-17 17:32:33 +04:00
|
|
|
return -ENOSYS;
|
|
|
|
}
|
|
|
|
|
|
|
|
virq = kvm_irqchip_get_virq(s);
|
|
|
|
if (virq < 0) {
|
|
|
|
return virq;
|
|
|
|
}
|
|
|
|
|
|
|
|
kroute.gsi = virq;
|
|
|
|
kroute.type = KVM_IRQ_ROUTING_MSI;
|
|
|
|
kroute.flags = 0;
|
|
|
|
kroute.u.msi.address_lo = (uint32_t)msg.address;
|
|
|
|
kroute.u.msi.address_hi = msg.address >> 32;
|
2013-04-16 17:05:22 +04:00
|
|
|
kroute.u.msi.data = le32_to_cpu(msg.data);
|
2017-07-07 12:45:26 +03:00
|
|
|
if (pci_available && kvm_msi_devid_required()) {
|
2016-10-04 15:28:09 +03:00
|
|
|
kroute.flags = KVM_MSI_VALID_DEVID;
|
|
|
|
kroute.u.msi.devid = pci_requester_id(dev);
|
|
|
|
}
|
2015-10-15 16:44:52 +03:00
|
|
|
if (kvm_arch_fixup_msi_route(&kroute, msg.address, msg.data, dev)) {
|
2015-01-09 11:04:40 +03:00
|
|
|
kvm_irqchip_release_virq(s, virq);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
2012-05-17 17:32:33 +04:00
|
|
|
|
2017-05-09 09:00:42 +03:00
|
|
|
trace_kvm_irqchip_add_msi_route(dev ? dev->name : (char *)"N/A",
|
|
|
|
vector, virq);
|
2016-07-14 08:56:35 +03:00
|
|
|
|
2012-05-17 17:32:33 +04:00
|
|
|
kvm_add_routing_entry(s, &kroute);
|
2016-07-14 08:56:31 +03:00
|
|
|
kvm_arch_add_msi_route_post(&kroute, vector, dev);
|
2013-04-17 03:11:55 +04:00
|
|
|
kvm_irqchip_commit_routes(s);
|
2012-05-17 17:32:33 +04:00
|
|
|
|
|
|
|
return virq;
|
|
|
|
}
|
|
|
|
|
2015-10-15 16:44:52 +03:00
|
|
|
int kvm_irqchip_update_msi_route(KVMState *s, int virq, MSIMessage msg,
|
|
|
|
PCIDevice *dev)
|
2012-08-27 10:28:38 +04:00
|
|
|
{
|
2013-06-04 15:52:32 +04:00
|
|
|
struct kvm_irq_routing_entry kroute = {};
|
2012-08-27 10:28:38 +04:00
|
|
|
|
2013-09-03 12:08:25 +04:00
|
|
|
if (kvm_gsi_direct_mapping()) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2012-08-27 10:28:38 +04:00
|
|
|
if (!kvm_irqchip_in_kernel()) {
|
|
|
|
return -ENOSYS;
|
|
|
|
}
|
|
|
|
|
|
|
|
kroute.gsi = virq;
|
|
|
|
kroute.type = KVM_IRQ_ROUTING_MSI;
|
|
|
|
kroute.flags = 0;
|
|
|
|
kroute.u.msi.address_lo = (uint32_t)msg.address;
|
|
|
|
kroute.u.msi.address_hi = msg.address >> 32;
|
2013-04-16 17:05:22 +04:00
|
|
|
kroute.u.msi.data = le32_to_cpu(msg.data);
|
2017-07-07 12:45:26 +03:00
|
|
|
if (pci_available && kvm_msi_devid_required()) {
|
2016-10-04 15:28:09 +03:00
|
|
|
kroute.flags = KVM_MSI_VALID_DEVID;
|
|
|
|
kroute.u.msi.devid = pci_requester_id(dev);
|
|
|
|
}
|
2015-10-15 16:44:52 +03:00
|
|
|
if (kvm_arch_fixup_msi_route(&kroute, msg.address, msg.data, dev)) {
|
2015-01-09 11:04:40 +03:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
2012-08-27 10:28:38 +04:00
|
|
|
|
2016-07-14 08:56:35 +03:00
|
|
|
trace_kvm_irqchip_update_msi_route(virq);
|
|
|
|
|
2012-08-27 10:28:38 +04:00
|
|
|
return kvm_update_routing_entry(s, &kroute);
|
|
|
|
}
|
|
|
|
|
2013-07-22 13:51:33 +04:00
|
|
|
static int kvm_irqchip_assign_irqfd(KVMState *s, int fd, int rfd, int virq,
|
|
|
|
bool assign)
|
2012-05-17 17:32:36 +04:00
|
|
|
{
|
|
|
|
struct kvm_irqfd irqfd = {
|
|
|
|
.fd = fd,
|
|
|
|
.gsi = virq,
|
|
|
|
.flags = assign ? 0 : KVM_IRQFD_FLAG_DEASSIGN,
|
|
|
|
};
|
|
|
|
|
2013-07-22 13:51:33 +04:00
|
|
|
if (rfd != -1) {
|
|
|
|
irqfd.flags |= KVM_IRQFD_FLAG_RESAMPLE;
|
|
|
|
irqfd.resamplefd = rfd;
|
|
|
|
}
|
|
|
|
|
2012-07-26 18:35:14 +04:00
|
|
|
if (!kvm_irqfds_enabled()) {
|
2012-05-17 17:32:36 +04:00
|
|
|
return -ENOSYS;
|
|
|
|
}
|
|
|
|
|
|
|
|
return kvm_vm_ioctl(s, KVM_IRQFD, &irqfd);
|
|
|
|
}
|
|
|
|
|
2013-07-15 19:45:03 +04:00
|
|
|
int kvm_irqchip_add_adapter_route(KVMState *s, AdapterInfo *adapter)
|
|
|
|
{
|
2014-11-21 00:10:58 +03:00
|
|
|
struct kvm_irq_routing_entry kroute = {};
|
2013-07-15 19:45:03 +04:00
|
|
|
int virq;
|
|
|
|
|
|
|
|
if (!kvm_gsi_routing_enabled()) {
|
|
|
|
return -ENOSYS;
|
|
|
|
}
|
|
|
|
|
|
|
|
virq = kvm_irqchip_get_virq(s);
|
|
|
|
if (virq < 0) {
|
|
|
|
return virq;
|
|
|
|
}
|
|
|
|
|
|
|
|
kroute.gsi = virq;
|
|
|
|
kroute.type = KVM_IRQ_ROUTING_S390_ADAPTER;
|
|
|
|
kroute.flags = 0;
|
|
|
|
kroute.u.adapter.summary_addr = adapter->summary_addr;
|
|
|
|
kroute.u.adapter.ind_addr = adapter->ind_addr;
|
|
|
|
kroute.u.adapter.summary_offset = adapter->summary_offset;
|
|
|
|
kroute.u.adapter.ind_offset = adapter->ind_offset;
|
|
|
|
kroute.u.adapter.adapter_id = adapter->adapter_id;
|
|
|
|
|
|
|
|
kvm_add_routing_entry(s, &kroute);
|
|
|
|
|
|
|
|
return virq;
|
|
|
|
}
|
|
|
|
|
2015-11-10 15:52:42 +03:00
|
|
|
int kvm_irqchip_add_hv_sint_route(KVMState *s, uint32_t vcpu, uint32_t sint)
|
|
|
|
{
|
|
|
|
struct kvm_irq_routing_entry kroute = {};
|
|
|
|
int virq;
|
|
|
|
|
|
|
|
if (!kvm_gsi_routing_enabled()) {
|
|
|
|
return -ENOSYS;
|
|
|
|
}
|
|
|
|
if (!kvm_check_extension(s, KVM_CAP_HYPERV_SYNIC)) {
|
|
|
|
return -ENOSYS;
|
|
|
|
}
|
|
|
|
virq = kvm_irqchip_get_virq(s);
|
|
|
|
if (virq < 0) {
|
|
|
|
return virq;
|
|
|
|
}
|
|
|
|
|
|
|
|
kroute.gsi = virq;
|
|
|
|
kroute.type = KVM_IRQ_ROUTING_HV_SINT;
|
|
|
|
kroute.flags = 0;
|
|
|
|
kroute.u.hv_sint.vcpu = vcpu;
|
|
|
|
kroute.u.hv_sint.sint = sint;
|
|
|
|
|
|
|
|
kvm_add_routing_entry(s, &kroute);
|
|
|
|
kvm_irqchip_commit_routes(s);
|
|
|
|
|
|
|
|
return virq;
|
|
|
|
}
|
|
|
|
|
2011-10-15 13:49:47 +04:00
|
|
|
#else /* !KVM_CAP_IRQ_ROUTING */
|
|
|
|
|
2013-04-16 17:58:13 +04:00
|
|
|
void kvm_init_irq_routing(KVMState *s)
|
2011-10-15 13:49:47 +04:00
|
|
|
{
|
|
|
|
}
|
2012-05-16 22:41:10 +04:00
|
|
|
|
2012-06-05 23:03:57 +04:00
|
|
|
void kvm_irqchip_release_virq(KVMState *s, int virq)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
2012-05-16 22:41:10 +04:00
|
|
|
int kvm_irqchip_send_msi(KVMState *s, MSIMessage msg)
|
|
|
|
{
|
|
|
|
abort();
|
|
|
|
}
|
2012-05-17 17:32:33 +04:00
|
|
|
|
2016-07-14 08:56:30 +03:00
|
|
|
int kvm_irqchip_add_msi_route(KVMState *s, int vector, PCIDevice *dev)
|
2012-05-17 17:32:33 +04:00
|
|
|
{
|
2012-06-25 19:40:39 +04:00
|
|
|
return -ENOSYS;
|
2012-05-17 17:32:33 +04:00
|
|
|
}
|
2012-05-17 17:32:36 +04:00
|
|
|
|
2013-07-15 19:45:03 +04:00
|
|
|
int kvm_irqchip_add_adapter_route(KVMState *s, AdapterInfo *adapter)
|
|
|
|
{
|
|
|
|
return -ENOSYS;
|
|
|
|
}
|
|
|
|
|
2015-11-10 15:52:42 +03:00
|
|
|
int kvm_irqchip_add_hv_sint_route(KVMState *s, uint32_t vcpu, uint32_t sint)
|
|
|
|
{
|
|
|
|
return -ENOSYS;
|
|
|
|
}
|
|
|
|
|
2012-05-17 17:32:36 +04:00
|
|
|
static int kvm_irqchip_assign_irqfd(KVMState *s, int fd, int virq, bool assign)
|
|
|
|
{
|
|
|
|
abort();
|
|
|
|
}
|
2013-01-15 21:50:13 +04:00
|
|
|
|
|
|
|
int kvm_irqchip_update_msi_route(KVMState *s, int virq, MSIMessage msg)
|
|
|
|
{
|
|
|
|
return -ENOSYS;
|
|
|
|
}
|
2011-10-15 13:49:47 +04:00
|
|
|
#endif /* !KVM_CAP_IRQ_ROUTING */
|
|
|
|
|
2015-07-06 21:15:13 +03:00
|
|
|
int kvm_irqchip_add_irqfd_notifier_gsi(KVMState *s, EventNotifier *n,
|
|
|
|
EventNotifier *rn, int virq)
|
2012-05-17 17:32:36 +04:00
|
|
|
{
|
2013-07-22 13:51:33 +04:00
|
|
|
return kvm_irqchip_assign_irqfd(s, event_notifier_get_fd(n),
|
|
|
|
rn ? event_notifier_get_fd(rn) : -1, virq, true);
|
2012-05-17 17:32:36 +04:00
|
|
|
}
|
|
|
|
|
2015-07-06 21:15:13 +03:00
|
|
|
int kvm_irqchip_remove_irqfd_notifier_gsi(KVMState *s, EventNotifier *n,
|
|
|
|
int virq)
|
2012-07-05 19:16:30 +04:00
|
|
|
{
|
2013-07-22 13:51:33 +04:00
|
|
|
return kvm_irqchip_assign_irqfd(s, event_notifier_get_fd(n), -1, virq,
|
|
|
|
false);
|
2012-07-05 19:16:30 +04:00
|
|
|
}
|
|
|
|
|
2015-07-06 21:15:13 +03:00
|
|
|
int kvm_irqchip_add_irqfd_notifier(KVMState *s, EventNotifier *n,
|
|
|
|
EventNotifier *rn, qemu_irq irq)
|
|
|
|
{
|
|
|
|
gpointer key, gsi;
|
|
|
|
gboolean found = g_hash_table_lookup_extended(s->gsimap, irq, &key, &gsi);
|
|
|
|
|
|
|
|
if (!found) {
|
|
|
|
return -ENXIO;
|
|
|
|
}
|
|
|
|
return kvm_irqchip_add_irqfd_notifier_gsi(s, n, rn, GPOINTER_TO_INT(gsi));
|
|
|
|
}
|
|
|
|
|
|
|
|
int kvm_irqchip_remove_irqfd_notifier(KVMState *s, EventNotifier *n,
|
|
|
|
qemu_irq irq)
|
|
|
|
{
|
|
|
|
gpointer key, gsi;
|
|
|
|
gboolean found = g_hash_table_lookup_extended(s->gsimap, irq, &key, &gsi);
|
|
|
|
|
|
|
|
if (!found) {
|
|
|
|
return -ENXIO;
|
|
|
|
}
|
|
|
|
return kvm_irqchip_remove_irqfd_notifier_gsi(s, n, GPOINTER_TO_INT(gsi));
|
|
|
|
}
|
|
|
|
|
|
|
|
void kvm_irqchip_set_qemuirq_gsi(KVMState *s, qemu_irq irq, int gsi)
|
|
|
|
{
|
|
|
|
g_hash_table_insert(s->gsimap, irq, GINT_TO_POINTER(gsi));
|
|
|
|
}
|
|
|
|
|
2015-06-18 19:30:15 +03:00
|
|
|
static void kvm_irqchip_create(MachineState *machine, KVMState *s)
|
2011-10-15 13:49:47 +04:00
|
|
|
{
|
|
|
|
int ret;
|
|
|
|
|
2015-06-18 19:30:15 +03:00
|
|
|
if (kvm_check_extension(s, KVM_CAP_IRQCHIP)) {
|
|
|
|
;
|
|
|
|
} else if (kvm_check_extension(s, KVM_CAP_S390_IRQCHIP)) {
|
|
|
|
ret = kvm_vm_enable_cap(s, KVM_CAP_S390_IRQCHIP, 0);
|
|
|
|
if (ret < 0) {
|
|
|
|
fprintf(stderr, "Enable kernel irqchip failed: %s\n", strerror(-ret));
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
return;
|
2011-10-15 13:49:47 +04:00
|
|
|
}
|
|
|
|
|
2014-02-26 21:20:00 +04:00
|
|
|
/* First probe and see if there's a arch-specific hook to create the
|
|
|
|
* in-kernel irqchip for us */
|
2015-12-17 19:16:08 +03:00
|
|
|
ret = kvm_arch_irqchip_create(machine, s);
|
2015-06-18 19:30:15 +03:00
|
|
|
if (ret == 0) {
|
2015-12-17 19:16:08 +03:00
|
|
|
if (machine_kernel_irqchip_split(machine)) {
|
|
|
|
perror("Split IRQ chip mode not supported.");
|
|
|
|
exit(1);
|
|
|
|
} else {
|
|
|
|
ret = kvm_vm_ioctl(s, KVM_CREATE_IRQCHIP);
|
|
|
|
}
|
2015-06-18 19:30:15 +03:00
|
|
|
}
|
|
|
|
if (ret < 0) {
|
|
|
|
fprintf(stderr, "Create kernel irqchip failed: %s\n", strerror(-ret));
|
|
|
|
exit(1);
|
2011-10-15 13:49:47 +04:00
|
|
|
}
|
|
|
|
|
2012-01-31 22:17:52 +04:00
|
|
|
kvm_kernel_irqchip = true;
|
2012-07-26 18:35:11 +04:00
|
|
|
/* If we have an in-kernel IRQ chip then we must have asynchronous
|
|
|
|
* interrupt delivery (though the reverse is not necessarily true)
|
|
|
|
*/
|
|
|
|
kvm_async_interrupts_allowed = true;
|
2013-04-25 00:24:12 +04:00
|
|
|
kvm_halt_in_kernel_allowed = true;
|
2011-10-15 13:49:47 +04:00
|
|
|
|
|
|
|
kvm_init_irq_routing(s);
|
|
|
|
|
2015-07-06 21:15:13 +03:00
|
|
|
s->gsimap = g_hash_table_new(g_direct_hash, g_direct_equal);
|
2011-10-15 13:49:47 +04:00
|
|
|
}
|
|
|
|
|
2013-08-23 17:24:37 +04:00
|
|
|
/* Find number of supported CPUs using the recommended
|
|
|
|
* procedure from the kernel API documentation to cope with
|
|
|
|
* older kernels that may be missing capabilities.
|
|
|
|
*/
|
|
|
|
static int kvm_recommended_vcpus(KVMState *s)
|
2012-07-31 15:18:17 +04:00
|
|
|
{
|
kvm: check KVM_CAP_NR_VCPUS with kvm_vm_check_extension()
On a modern server-class ppc host with the following CPU topology:
Architecture: ppc64le
Byte Order: Little Endian
CPU(s): 32
On-line CPU(s) list: 0,8,16,24
Off-line CPU(s) list: 1-7,9-15,17-23,25-31
Thread(s) per core: 1
If both KVM PR and KVM HV loaded and we pass:
-machine pseries,accel=kvm,kvm-type=PR -smp 8
We expect QEMU to warn that this exceeds the number of online CPUs:
Warning: Number of SMP cpus requested (8) exceeds the recommended
cpus supported by KVM (4)
Warning: Number of hotpluggable cpus requested (8) exceeds the
recommended cpus supported by KVM (4)
but nothing is printed...
This happens because on ppc the KVM_CAP_NR_VCPUS capability is VM
specific ndreally depends on the KVM type, but we currently use it
as a global capability. And KVM returns a fallback value based on
KVM HV being present. Maybe KVM on POWER shouldn't presume anything
as long as it doesn't have a VM, but in all cases, we should call
KVM_CREATE_VM first and use KVM_CAP_NR_VCPUS as a VM capability.
This patch hence changes kvm_recommended_vcpus() accordingly and
moves the sanity checking of smp_cpus after the VM creation.
It is okay for the other archs that also implement KVM_CAP_NR_VCPUS,
ie, mips, s390, x86 and arm, because they don't depend on the VM
being created or not.
Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Thomas Huth <thuth@redhat.com>
Message-Id: <150600966286.30533.10909862523552370889.stgit@bahia.lan>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-09-21 19:01:02 +03:00
|
|
|
int ret = kvm_vm_check_extension(s, KVM_CAP_NR_VCPUS);
|
2013-08-23 17:24:37 +04:00
|
|
|
return (ret) ? ret : 4;
|
|
|
|
}
|
2012-07-31 15:18:17 +04:00
|
|
|
|
2013-08-23 17:24:37 +04:00
|
|
|
static int kvm_max_vcpus(KVMState *s)
|
|
|
|
{
|
|
|
|
int ret = kvm_check_extension(s, KVM_CAP_MAX_VCPUS);
|
|
|
|
return (ret) ? ret : kvm_recommended_vcpus(s);
|
2012-07-31 15:18:17 +04:00
|
|
|
}
|
|
|
|
|
2016-05-26 11:02:23 +03:00
|
|
|
static int kvm_max_vcpu_id(KVMState *s)
|
|
|
|
{
|
|
|
|
int ret = kvm_check_extension(s, KVM_CAP_MAX_VCPU_ID);
|
|
|
|
return (ret) ? ret : kvm_max_vcpus(s);
|
|
|
|
}
|
|
|
|
|
2016-04-26 16:41:04 +03:00
|
|
|
bool kvm_vcpu_id_is_valid(int vcpu_id)
|
|
|
|
{
|
|
|
|
KVMState *s = KVM_STATE(current_machine->accelerator);
|
2016-05-26 11:02:23 +03:00
|
|
|
return vcpu_id >= 0 && vcpu_id < kvm_max_vcpu_id(s);
|
2016-04-26 16:41:04 +03:00
|
|
|
}
|
|
|
|
|
2014-09-27 00:45:30 +04:00
|
|
|
static int kvm_init(MachineState *ms)
|
2008-11-05 19:29:27 +03:00
|
|
|
{
|
2014-09-27 00:45:30 +04:00
|
|
|
MachineClass *mc = MACHINE_GET_CLASS(ms);
|
2009-06-07 13:30:25 +04:00
|
|
|
static const char upgrade_note[] =
|
|
|
|
"Please upgrade to at least kernel 2.6.29 or recent kvm-kmod\n"
|
|
|
|
"(see http://sourceforge.net/projects/kvm).\n";
|
2013-08-23 17:24:37 +04:00
|
|
|
struct {
|
|
|
|
const char *name;
|
|
|
|
int num;
|
|
|
|
} num_cpus[] = {
|
|
|
|
{ "SMP", smp_cpus },
|
|
|
|
{ "hotpluggable", max_cpus },
|
|
|
|
{ NULL, }
|
|
|
|
}, *nc = num_cpus;
|
|
|
|
int soft_vcpus_limit, hard_vcpus_limit;
|
2008-11-05 19:29:27 +03:00
|
|
|
KVMState *s;
|
2011-01-21 23:48:17 +03:00
|
|
|
const KVMCapabilityInfo *missing_cap;
|
2008-11-05 19:29:27 +03:00
|
|
|
int ret;
|
2015-06-18 19:30:13 +03:00
|
|
|
int type = 0;
|
2013-12-23 19:40:40 +04:00
|
|
|
const char *kvm_type;
|
2008-11-05 19:29:27 +03:00
|
|
|
|
2014-09-27 00:45:32 +04:00
|
|
|
s = KVM_STATE(ms->accelerator);
|
2008-11-05 19:29:27 +03:00
|
|
|
|
2012-04-04 05:15:54 +04:00
|
|
|
/*
|
|
|
|
* On systems where the kernel can support different base page
|
|
|
|
* sizes, host page size may be different from TARGET_PAGE_SIZE,
|
|
|
|
* even with KVM. TARGET_PAGE_SIZE is assumed to be the minimum
|
|
|
|
* page size for the system though.
|
|
|
|
*/
|
|
|
|
assert(TARGET_PAGE_SIZE <= getpagesize());
|
|
|
|
|
2014-06-18 02:10:31 +04:00
|
|
|
s->sigmask_len = 8;
|
|
|
|
|
2009-03-12 23:12:48 +03:00
|
|
|
#ifdef KVM_CAP_SET_GUEST_DEBUG
|
2009-09-12 11:36:22 +04:00
|
|
|
QTAILQ_INIT(&s->kvm_sw_breakpoints);
|
2009-03-12 23:12:48 +03:00
|
|
|
#endif
|
2016-05-12 06:48:13 +03:00
|
|
|
QLIST_INIT(&s->kvm_parked_vcpus);
|
2008-11-05 19:29:27 +03:00
|
|
|
s->vmfd = -1;
|
2009-12-02 14:24:42 +03:00
|
|
|
s->fd = qemu_open("/dev/kvm", O_RDWR);
|
2008-11-05 19:29:27 +03:00
|
|
|
if (s->fd == -1) {
|
|
|
|
fprintf(stderr, "Could not access KVM kernel module: %m\n");
|
|
|
|
ret = -errno;
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
|
|
|
ret = kvm_ioctl(s, KVM_GET_API_VERSION, 0);
|
|
|
|
if (ret < KVM_API_VERSION) {
|
2014-05-31 00:26:22 +04:00
|
|
|
if (ret >= 0) {
|
2008-11-05 19:29:27 +03:00
|
|
|
ret = -EINVAL;
|
2011-01-04 11:32:13 +03:00
|
|
|
}
|
2008-11-05 19:29:27 +03:00
|
|
|
fprintf(stderr, "kvm version too old\n");
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (ret > KVM_API_VERSION) {
|
|
|
|
ret = -EINVAL;
|
|
|
|
fprintf(stderr, "kvm version not supported\n");
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
2017-02-08 15:52:50 +03:00
|
|
|
kvm_immediate_exit = kvm_check_extension(s, KVM_CAP_IMMEDIATE_EXIT);
|
2013-11-22 23:12:44 +04:00
|
|
|
s->nr_slots = kvm_check_extension(s, KVM_CAP_NR_MEMSLOTS);
|
|
|
|
|
|
|
|
/* If unspecified, use the default value */
|
|
|
|
if (!s->nr_slots) {
|
|
|
|
s->nr_slots = 32;
|
|
|
|
}
|
|
|
|
|
2013-12-23 19:40:40 +04:00
|
|
|
kvm_type = qemu_opt_get(qemu_get_machine_opts(), "kvm-type");
|
2014-04-09 21:34:52 +04:00
|
|
|
if (mc->kvm_type) {
|
|
|
|
type = mc->kvm_type(kvm_type);
|
2013-12-23 19:40:40 +04:00
|
|
|
} else if (kvm_type) {
|
2014-05-31 00:26:22 +04:00
|
|
|
ret = -EINVAL;
|
2013-12-23 19:40:40 +04:00
|
|
|
fprintf(stderr, "Invalid argument kvm-type=%s\n", kvm_type);
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
2014-01-10 01:14:23 +04:00
|
|
|
do {
|
2013-12-23 19:40:40 +04:00
|
|
|
ret = kvm_ioctl(s, KVM_CREATE_VM, type);
|
2014-01-10 01:14:23 +04:00
|
|
|
} while (ret == -EINTR);
|
|
|
|
|
|
|
|
if (ret < 0) {
|
2014-01-27 18:18:09 +04:00
|
|
|
fprintf(stderr, "ioctl(KVM_CREATE_VM) failed: %d %s\n", -ret,
|
2014-01-10 01:14:23 +04:00
|
|
|
strerror(-ret));
|
|
|
|
|
2010-04-01 20:42:37 +04:00
|
|
|
#ifdef TARGET_S390X
|
2015-04-23 18:03:46 +03:00
|
|
|
if (ret == -EINVAL) {
|
|
|
|
fprintf(stderr,
|
|
|
|
"Host kernel setup problem detected. Please verify:\n");
|
|
|
|
fprintf(stderr, "- for kernels supporting the switch_amode or"
|
|
|
|
" user_mode parameters, whether\n");
|
|
|
|
fprintf(stderr,
|
|
|
|
" user space is running in primary address space\n");
|
|
|
|
fprintf(stderr,
|
|
|
|
"- for kernels supporting the vm.allocate_pgste sysctl, "
|
|
|
|
"whether it is enabled\n");
|
|
|
|
}
|
2010-04-01 20:42:37 +04:00
|
|
|
#endif
|
2008-11-05 19:29:27 +03:00
|
|
|
goto err;
|
2010-04-01 20:42:37 +04:00
|
|
|
}
|
2008-11-05 19:29:27 +03:00
|
|
|
|
2014-01-10 01:14:23 +04:00
|
|
|
s->vmfd = ret;
|
kvm: check KVM_CAP_NR_VCPUS with kvm_vm_check_extension()
On a modern server-class ppc host with the following CPU topology:
Architecture: ppc64le
Byte Order: Little Endian
CPU(s): 32
On-line CPU(s) list: 0,8,16,24
Off-line CPU(s) list: 1-7,9-15,17-23,25-31
Thread(s) per core: 1
If both KVM PR and KVM HV loaded and we pass:
-machine pseries,accel=kvm,kvm-type=PR -smp 8
We expect QEMU to warn that this exceeds the number of online CPUs:
Warning: Number of SMP cpus requested (8) exceeds the recommended
cpus supported by KVM (4)
Warning: Number of hotpluggable cpus requested (8) exceeds the
recommended cpus supported by KVM (4)
but nothing is printed...
This happens because on ppc the KVM_CAP_NR_VCPUS capability is VM
specific ndreally depends on the KVM type, but we currently use it
as a global capability. And KVM returns a fallback value based on
KVM HV being present. Maybe KVM on POWER shouldn't presume anything
as long as it doesn't have a VM, but in all cases, we should call
KVM_CREATE_VM first and use KVM_CAP_NR_VCPUS as a VM capability.
This patch hence changes kvm_recommended_vcpus() accordingly and
moves the sanity checking of smp_cpus after the VM creation.
It is okay for the other archs that also implement KVM_CAP_NR_VCPUS,
ie, mips, s390, x86 and arm, because they don't depend on the VM
being created or not.
Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Thomas Huth <thuth@redhat.com>
Message-Id: <150600966286.30533.10909862523552370889.stgit@bahia.lan>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-09-21 19:01:02 +03:00
|
|
|
|
|
|
|
/* check the vcpu limits */
|
|
|
|
soft_vcpus_limit = kvm_recommended_vcpus(s);
|
|
|
|
hard_vcpus_limit = kvm_max_vcpus(s);
|
|
|
|
|
|
|
|
while (nc->name) {
|
|
|
|
if (nc->num > soft_vcpus_limit) {
|
|
|
|
warn_report("Number of %s cpus requested (%d) exceeds "
|
|
|
|
"the recommended cpus supported by KVM (%d)",
|
|
|
|
nc->name, nc->num, soft_vcpus_limit);
|
|
|
|
|
|
|
|
if (nc->num > hard_vcpus_limit) {
|
|
|
|
fprintf(stderr, "Number of %s cpus requested (%d) exceeds "
|
|
|
|
"the maximum cpus supported by KVM (%d)\n",
|
|
|
|
nc->name, nc->num, hard_vcpus_limit);
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
nc++;
|
|
|
|
}
|
|
|
|
|
2011-01-21 23:48:17 +03:00
|
|
|
missing_cap = kvm_check_extension_list(s, kvm_required_capabilites);
|
|
|
|
if (!missing_cap) {
|
|
|
|
missing_cap =
|
|
|
|
kvm_check_extension_list(s, kvm_arch_required_capabilities);
|
2008-11-05 19:29:27 +03:00
|
|
|
}
|
2011-01-21 23:48:17 +03:00
|
|
|
if (missing_cap) {
|
2009-05-09 00:33:24 +04:00
|
|
|
ret = -EINVAL;
|
2011-01-21 23:48:17 +03:00
|
|
|
fprintf(stderr, "kvm does not support %s\n%s",
|
|
|
|
missing_cap->name, upgrade_note);
|
2008-12-09 22:59:09 +03:00
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
2009-05-09 00:33:24 +04:00
|
|
|
s->coalesced_mmio = kvm_check_extension(s, KVM_CAP_COALESCED_MMIO);
|
2008-12-09 23:09:57 +03:00
|
|
|
|
2009-11-25 02:33:03 +03:00
|
|
|
#ifdef KVM_CAP_VCPU_EVENTS
|
|
|
|
s->vcpu_events = kvm_check_extension(s, KVM_CAP_VCPU_EVENTS);
|
|
|
|
#endif
|
|
|
|
|
2010-03-01 21:10:29 +03:00
|
|
|
s->robust_singlestep =
|
|
|
|
kvm_check_extension(s, KVM_CAP_X86_ROBUST_SINGLESTEP);
|
|
|
|
|
2010-03-12 17:20:49 +03:00
|
|
|
#ifdef KVM_CAP_DEBUGREGS
|
|
|
|
s->debugregs = kvm_check_extension(s, KVM_CAP_DEBUGREGS);
|
|
|
|
#endif
|
|
|
|
|
2012-06-05 23:03:57 +04:00
|
|
|
#ifdef KVM_CAP_IRQ_ROUTING
|
2015-10-15 16:44:50 +03:00
|
|
|
kvm_direct_msi_allowed = (kvm_check_extension(s, KVM_CAP_SIGNAL_MSI) > 0);
|
2012-06-05 23:03:57 +04:00
|
|
|
#endif
|
2012-05-16 22:41:14 +04:00
|
|
|
|
2012-08-27 10:28:39 +04:00
|
|
|
s->intx_set_mask = kvm_check_extension(s, KVM_CAP_PCI_2_3);
|
|
|
|
|
2012-08-24 15:34:47 +04:00
|
|
|
s->irq_set_ioctl = KVM_IRQ_LINE;
|
2012-08-15 15:08:13 +04:00
|
|
|
if (kvm_check_extension(s, KVM_CAP_IRQ_INJECT_STATUS)) {
|
2012-08-24 15:34:47 +04:00
|
|
|
s->irq_set_ioctl = KVM_IRQ_LINE_STATUS;
|
2012-08-15 15:08:13 +04:00
|
|
|
}
|
|
|
|
|
2013-05-29 12:27:25 +04:00
|
|
|
#ifdef KVM_CAP_READONLY_MEM
|
|
|
|
kvm_readonly_mem_allowed =
|
|
|
|
(kvm_check_extension(s, KVM_CAP_READONLY_MEM) > 0);
|
|
|
|
#endif
|
|
|
|
|
2014-05-27 16:03:35 +04:00
|
|
|
kvm_eventfds_allowed =
|
|
|
|
(kvm_check_extension(s, KVM_CAP_IOEVENTFD) > 0);
|
|
|
|
|
2014-10-31 16:38:18 +03:00
|
|
|
kvm_irqfds_allowed =
|
|
|
|
(kvm_check_extension(s, KVM_CAP_IRQFD) > 0);
|
|
|
|
|
|
|
|
kvm_resamplefds_allowed =
|
|
|
|
(kvm_check_extension(s, KVM_CAP_IRQFD_RESAMPLE) > 0);
|
|
|
|
|
2015-03-12 15:53:49 +03:00
|
|
|
kvm_vm_attributes_allowed =
|
|
|
|
(kvm_check_extension(s, KVM_CAP_VM_ATTRIBUTES) > 0);
|
|
|
|
|
2015-11-06 11:02:46 +03:00
|
|
|
kvm_ioeventfd_any_length_allowed =
|
|
|
|
(kvm_check_extension(s, KVM_CAP_IOEVENTFD_ANY_LENGTH) > 0);
|
|
|
|
|
2017-06-01 14:35:15 +03:00
|
|
|
kvm_state = s;
|
|
|
|
|
2018-03-08 15:48:44 +03:00
|
|
|
/*
|
|
|
|
* if memory encryption object is specified then initialize the memory
|
|
|
|
* encryption context.
|
|
|
|
*/
|
|
|
|
if (ms->memory_encryption) {
|
|
|
|
kvm_state->memcrypt_handle = sev_guest_init(ms->memory_encryption);
|
|
|
|
if (!kvm_state->memcrypt_handle) {
|
|
|
|
ret = -1;
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-02-04 18:43:51 +03:00
|
|
|
ret = kvm_arch_init(ms, s);
|
2011-01-04 11:32:13 +03:00
|
|
|
if (ret < 0) {
|
2008-11-05 19:29:27 +03:00
|
|
|
goto err;
|
2011-01-04 11:32:13 +03:00
|
|
|
}
|
2008-11-05 19:29:27 +03:00
|
|
|
|
2015-06-18 19:30:15 +03:00
|
|
|
if (machine_kernel_irqchip_allowed(ms)) {
|
|
|
|
kvm_irqchip_create(ms, s);
|
2011-10-15 13:49:47 +04:00
|
|
|
}
|
|
|
|
|
2015-11-20 12:37:16 +03:00
|
|
|
if (kvm_eventfds_allowed) {
|
|
|
|
s->memory_listener.listener.eventfd_add = kvm_mem_ioeventfd_add;
|
|
|
|
s->memory_listener.listener.eventfd_del = kvm_mem_ioeventfd_del;
|
|
|
|
}
|
2015-06-18 19:30:13 +03:00
|
|
|
s->memory_listener.listener.coalesced_mmio_add = kvm_coalesce_mmio_region;
|
|
|
|
s->memory_listener.listener.coalesced_mmio_del = kvm_uncoalesce_mmio_region;
|
|
|
|
|
|
|
|
kvm_memory_listener_register(s, &s->memory_listener,
|
2015-06-18 19:30:14 +03:00
|
|
|
&address_space_memory, 0);
|
2015-06-18 19:30:13 +03:00
|
|
|
memory_listener_register(&kvm_io_listener,
|
|
|
|
&address_space_io);
|
2008-11-05 19:29:27 +03:00
|
|
|
|
2011-01-10 14:50:05 +03:00
|
|
|
s->many_ioeventfds = kvm_check_many_ioeventfds();
|
|
|
|
|
kvm: check KVM_CAP_SYNC_MMU with kvm_vm_check_extension()
On a server-class ppc host, this capability depends on the KVM type,
ie, HV or PR. If both KVM are present in the kernel, we will always
get the HV specific value, even if we explicitely requested PR on
the command line.
This can have an impact if we're using hugepages or a balloon device.
Since we've already created the VM at the time any user calls
kvm_has_sync_mmu(), switching to kvm_vm_check_extension() is
enough to fix any potential issue.
It is okay for the other archs that also implement KVM_CAP_SYNC_MMU,
ie, mips, s390, x86 and arm, because they don't depend on the VM being
created or not.
While here, let's cache the state of this extension in a bool variable,
since it has several users in the code, as suggested by Thomas Huth.
Signed-off-by: Greg Kurz <groug@kaod.org>
Message-Id: <150600965332.30533.14702405809647835716.stgit@bahia.lan>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-09-21 19:00:53 +03:00
|
|
|
s->sync_mmu = !!kvm_vm_check_extension(kvm_state, KVM_CAP_SYNC_MMU);
|
|
|
|
|
2008-11-05 19:29:27 +03:00
|
|
|
return 0;
|
|
|
|
|
|
|
|
err:
|
2014-05-31 00:26:22 +04:00
|
|
|
assert(ret < 0);
|
2012-09-04 00:40:40 +04:00
|
|
|
if (s->vmfd >= 0) {
|
|
|
|
close(s->vmfd);
|
|
|
|
}
|
|
|
|
if (s->fd != -1) {
|
|
|
|
close(s->fd);
|
2008-11-05 19:29:27 +03:00
|
|
|
}
|
2015-06-18 19:30:13 +03:00
|
|
|
g_free(s->memory_listener.slots);
|
2008-11-05 19:29:27 +03:00
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2014-06-18 02:10:31 +04:00
|
|
|
void kvm_set_sigmask_len(KVMState *s, unsigned int sigmask_len)
|
|
|
|
{
|
|
|
|
s->sigmask_len = sigmask_len;
|
|
|
|
}
|
|
|
|
|
2015-04-08 14:30:58 +03:00
|
|
|
static void kvm_handle_io(uint16_t port, MemTxAttrs attrs, void *data, int direction,
|
|
|
|
int size, uint32_t count)
|
2008-11-05 19:29:27 +03:00
|
|
|
{
|
|
|
|
int i;
|
|
|
|
uint8_t *ptr = data;
|
|
|
|
|
|
|
|
for (i = 0; i < count; i++) {
|
2015-04-08 14:30:58 +03:00
|
|
|
address_space_rw(&address_space_io, port, attrs,
|
2015-04-26 18:49:24 +03:00
|
|
|
ptr, size,
|
2013-08-13 16:43:57 +04:00
|
|
|
direction == KVM_EXIT_IO_OUT);
|
2008-11-05 19:29:27 +03:00
|
|
|
ptr += size;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-05-27 03:55:29 +04:00
|
|
|
static int kvm_handle_internal_error(CPUState *cpu, struct kvm_run *run)
|
2010-03-23 19:37:11 +03:00
|
|
|
{
|
2014-01-21 21:11:31 +04:00
|
|
|
fprintf(stderr, "KVM internal error. Suberror: %d\n",
|
|
|
|
run->internal.suberror);
|
|
|
|
|
2010-03-23 19:37:11 +03:00
|
|
|
if (kvm_check_extension(kvm_state, KVM_CAP_INTERNAL_ERROR_DATA)) {
|
|
|
|
int i;
|
|
|
|
|
|
|
|
for (i = 0; i < run->internal.ndata; ++i) {
|
|
|
|
fprintf(stderr, "extra data[%d]: %"PRIx64"\n",
|
|
|
|
i, (uint64_t)run->internal.data[i]);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (run->internal.suberror == KVM_INTERNAL_ERROR_EMULATION) {
|
|
|
|
fprintf(stderr, "emulation failure\n");
|
2012-10-31 09:57:49 +04:00
|
|
|
if (!kvm_arch_stop_on_emulation_error(cpu)) {
|
2013-05-27 03:33:50 +04:00
|
|
|
cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_CODE);
|
2011-03-15 14:26:27 +03:00
|
|
|
return EXCP_INTERRUPT;
|
2011-01-04 11:32:13 +03:00
|
|
|
}
|
2010-03-23 19:37:11 +03:00
|
|
|
}
|
|
|
|
/* FIXME: Should trigger a qmp message to let management know
|
|
|
|
* something went wrong.
|
|
|
|
*/
|
2011-01-21 23:48:06 +03:00
|
|
|
return -1;
|
2010-03-23 19:37:11 +03:00
|
|
|
}
|
|
|
|
|
2010-01-26 14:21:16 +03:00
|
|
|
void kvm_flush_coalesced_mmio_buffer(void)
|
2008-12-09 23:09:57 +03:00
|
|
|
{
|
|
|
|
KVMState *s = kvm_state;
|
2011-10-18 21:43:12 +04:00
|
|
|
|
|
|
|
if (s->coalesced_flush_in_progress) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
s->coalesced_flush_in_progress = true;
|
|
|
|
|
2010-01-26 14:21:16 +03:00
|
|
|
if (s->coalesced_mmio_ring) {
|
|
|
|
struct kvm_coalesced_mmio_ring *ring = s->coalesced_mmio_ring;
|
2008-12-09 23:09:57 +03:00
|
|
|
while (ring->first != ring->last) {
|
|
|
|
struct kvm_coalesced_mmio *ent;
|
|
|
|
|
|
|
|
ent = &ring->coalesced_mmio[ring->first];
|
|
|
|
|
|
|
|
cpu_physical_memory_write(ent->phys_addr, ent->data, ent->len);
|
2010-02-22 19:57:54 +03:00
|
|
|
smp_wmb();
|
2008-12-09 23:09:57 +03:00
|
|
|
ring->first = (ring->first + 1) % KVM_COALESCED_MMIO_MAX;
|
|
|
|
}
|
|
|
|
}
|
2011-10-18 21:43:12 +04:00
|
|
|
|
|
|
|
s->coalesced_flush_in_progress = false;
|
2008-12-09 23:09:57 +03:00
|
|
|
}
|
|
|
|
|
2016-10-31 12:36:08 +03:00
|
|
|
static void do_kvm_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg)
|
2009-08-18 00:19:53 +04:00
|
|
|
{
|
2017-06-18 22:11:01 +03:00
|
|
|
if (!cpu->vcpu_dirty) {
|
2012-10-31 09:57:49 +04:00
|
|
|
kvm_arch_get_registers(cpu);
|
2017-06-18 22:11:01 +03:00
|
|
|
cpu->vcpu_dirty = true;
|
2009-08-18 00:19:53 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-05-01 15:45:44 +04:00
|
|
|
void kvm_cpu_synchronize_state(CPUState *cpu)
|
2010-05-04 16:45:23 +04:00
|
|
|
{
|
2017-06-18 22:11:01 +03:00
|
|
|
if (!cpu->vcpu_dirty) {
|
2016-10-31 12:36:08 +03:00
|
|
|
run_on_cpu(cpu, do_kvm_cpu_synchronize_state, RUN_ON_CPU_NULL);
|
2011-01-04 11:32:13 +03:00
|
|
|
}
|
2010-05-04 16:45:23 +04:00
|
|
|
}
|
|
|
|
|
2016-10-31 12:36:08 +03:00
|
|
|
static void do_kvm_cpu_synchronize_post_reset(CPUState *cpu, run_on_cpu_data arg)
|
2010-03-01 21:10:30 +03:00
|
|
|
{
|
2012-10-31 09:57:49 +04:00
|
|
|
kvm_arch_put_registers(cpu, KVM_PUT_RESET_STATE);
|
2017-06-18 22:11:01 +03:00
|
|
|
cpu->vcpu_dirty = false;
|
2010-03-01 21:10:30 +03:00
|
|
|
}
|
|
|
|
|
2014-08-20 16:55:25 +04:00
|
|
|
void kvm_cpu_synchronize_post_reset(CPUState *cpu)
|
|
|
|
{
|
2016-10-31 12:36:08 +03:00
|
|
|
run_on_cpu(cpu, do_kvm_cpu_synchronize_post_reset, RUN_ON_CPU_NULL);
|
2014-08-20 16:55:25 +04:00
|
|
|
}
|
|
|
|
|
2016-10-31 12:36:08 +03:00
|
|
|
static void do_kvm_cpu_synchronize_post_init(CPUState *cpu, run_on_cpu_data arg)
|
2010-03-01 21:10:30 +03:00
|
|
|
{
|
2012-10-31 09:57:49 +04:00
|
|
|
kvm_arch_put_registers(cpu, KVM_PUT_FULL_STATE);
|
2017-06-18 22:11:01 +03:00
|
|
|
cpu->vcpu_dirty = false;
|
2010-03-01 21:10:30 +03:00
|
|
|
}
|
|
|
|
|
2014-08-20 16:55:25 +04:00
|
|
|
void kvm_cpu_synchronize_post_init(CPUState *cpu)
|
|
|
|
{
|
2016-10-31 12:36:08 +03:00
|
|
|
run_on_cpu(cpu, do_kvm_cpu_synchronize_post_init, RUN_ON_CPU_NULL);
|
2014-08-20 16:55:25 +04:00
|
|
|
}
|
|
|
|
|
2017-05-26 07:46:28 +03:00
|
|
|
static void do_kvm_cpu_synchronize_pre_loadvm(CPUState *cpu, run_on_cpu_data arg)
|
|
|
|
{
|
2017-06-18 22:11:01 +03:00
|
|
|
cpu->vcpu_dirty = true;
|
2017-05-26 07:46:28 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
void kvm_cpu_synchronize_pre_loadvm(CPUState *cpu)
|
|
|
|
{
|
|
|
|
run_on_cpu(cpu, do_kvm_cpu_synchronize_pre_loadvm, RUN_ON_CPU_NULL);
|
|
|
|
}
|
|
|
|
|
2017-02-08 14:48:54 +03:00
|
|
|
#ifdef KVM_HAVE_MCE_INJECTION
|
|
|
|
static __thread void *pending_sigbus_addr;
|
|
|
|
static __thread int pending_sigbus_code;
|
|
|
|
static __thread bool have_sigbus_pending;
|
|
|
|
#endif
|
|
|
|
|
2017-02-08 15:52:50 +03:00
|
|
|
static void kvm_cpu_kick(CPUState *cpu)
|
|
|
|
{
|
|
|
|
atomic_set(&cpu->kvm_run->immediate_exit, 1);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void kvm_cpu_kick_self(void)
|
|
|
|
{
|
|
|
|
if (kvm_immediate_exit) {
|
|
|
|
kvm_cpu_kick(current_cpu);
|
|
|
|
} else {
|
|
|
|
qemu_cpu_kick_self();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-02-09 11:41:14 +03:00
|
|
|
static void kvm_eat_signals(CPUState *cpu)
|
|
|
|
{
|
|
|
|
struct timespec ts = { 0, 0 };
|
|
|
|
siginfo_t siginfo;
|
|
|
|
sigset_t waitset;
|
|
|
|
sigset_t chkset;
|
|
|
|
int r;
|
|
|
|
|
2017-02-08 15:52:50 +03:00
|
|
|
if (kvm_immediate_exit) {
|
|
|
|
atomic_set(&cpu->kvm_run->immediate_exit, 0);
|
|
|
|
/* Write kvm_run->immediate_exit before the cpu->exit_request
|
|
|
|
* write in kvm_cpu_exec.
|
|
|
|
*/
|
|
|
|
smp_wmb();
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2017-02-09 11:41:14 +03:00
|
|
|
sigemptyset(&waitset);
|
|
|
|
sigaddset(&waitset, SIG_IPI);
|
|
|
|
|
|
|
|
do {
|
|
|
|
r = sigtimedwait(&waitset, &siginfo, &ts);
|
|
|
|
if (r == -1 && !(errno == EAGAIN || errno == EINTR)) {
|
|
|
|
perror("sigtimedwait");
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
|
|
|
|
r = sigpending(&chkset);
|
|
|
|
if (r == -1) {
|
|
|
|
perror("sigpending");
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
} while (sigismember(&chkset, SIG_IPI));
|
|
|
|
}
|
|
|
|
|
2013-05-27 01:46:55 +04:00
|
|
|
int kvm_cpu_exec(CPUState *cpu)
|
2008-11-05 19:29:27 +03:00
|
|
|
{
|
2012-12-01 09:18:14 +04:00
|
|
|
struct kvm_run *run = cpu->kvm_run;
|
2011-03-15 14:26:25 +03:00
|
|
|
int ret, run_ret;
|
2008-11-05 19:29:27 +03:00
|
|
|
|
2010-04-18 18:22:14 +04:00
|
|
|
DPRINTF("kvm_cpu_exec()\n");
|
2008-11-05 19:29:27 +03:00
|
|
|
|
2012-10-31 09:57:49 +04:00
|
|
|
if (kvm_arch_process_async_events(cpu)) {
|
2017-02-15 17:36:11 +03:00
|
|
|
atomic_set(&cpu->exit_request, 0);
|
2011-02-07 14:19:18 +03:00
|
|
|
return EXCP_HLT;
|
2011-02-02 00:16:00 +03:00
|
|
|
}
|
2010-05-04 16:45:27 +04:00
|
|
|
|
2015-06-18 19:47:23 +03:00
|
|
|
qemu_mutex_unlock_iothread();
|
2017-06-06 21:19:39 +03:00
|
|
|
cpu_exec_start(cpu);
|
2015-06-18 19:47:23 +03:00
|
|
|
|
2011-02-02 00:16:00 +03:00
|
|
|
do {
|
2015-04-08 14:30:58 +03:00
|
|
|
MemTxAttrs attrs;
|
|
|
|
|
2017-06-18 22:11:01 +03:00
|
|
|
if (cpu->vcpu_dirty) {
|
2012-10-31 09:57:49 +04:00
|
|
|
kvm_arch_put_registers(cpu, KVM_PUT_RUNTIME_STATE);
|
2017-06-18 22:11:01 +03:00
|
|
|
cpu->vcpu_dirty = false;
|
2009-08-18 00:19:53 +04:00
|
|
|
}
|
|
|
|
|
2012-10-31 09:57:49 +04:00
|
|
|
kvm_arch_pre_run(cpu, run);
|
2017-02-15 17:36:11 +03:00
|
|
|
if (atomic_read(&cpu->exit_request)) {
|
2011-02-02 00:16:00 +03:00
|
|
|
DPRINTF("interrupt exit requested\n");
|
|
|
|
/*
|
|
|
|
* KVM requires us to reenter the kernel after IO exits to complete
|
|
|
|
* instruction emulation. This self-signal will ensure that we
|
|
|
|
* leave ASAP again.
|
|
|
|
*/
|
2017-02-08 15:52:50 +03:00
|
|
|
kvm_cpu_kick_self();
|
2011-02-02 00:16:00 +03:00
|
|
|
}
|
|
|
|
|
2017-02-08 15:52:50 +03:00
|
|
|
/* Read cpu->exit_request before KVM_RUN reads run->immediate_exit.
|
|
|
|
* Matching barrier in kvm_eat_signals.
|
|
|
|
*/
|
|
|
|
smp_rmb();
|
|
|
|
|
2012-10-31 09:06:49 +04:00
|
|
|
run_ret = kvm_vcpu_ioctl(cpu, KVM_RUN, 0);
|
2011-02-02 00:16:00 +03:00
|
|
|
|
2015-04-08 14:30:58 +03:00
|
|
|
attrs = kvm_arch_post_run(cpu, run);
|
2008-11-05 19:29:27 +03:00
|
|
|
|
2017-02-08 14:48:54 +03:00
|
|
|
#ifdef KVM_HAVE_MCE_INJECTION
|
|
|
|
if (unlikely(have_sigbus_pending)) {
|
|
|
|
qemu_mutex_lock_iothread();
|
|
|
|
kvm_arch_on_sigbus_vcpu(cpu, pending_sigbus_code,
|
|
|
|
pending_sigbus_addr);
|
|
|
|
have_sigbus_pending = false;
|
|
|
|
qemu_mutex_unlock_iothread();
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2011-03-15 14:26:25 +03:00
|
|
|
if (run_ret < 0) {
|
2011-03-15 14:26:26 +03:00
|
|
|
if (run_ret == -EINTR || run_ret == -EAGAIN) {
|
|
|
|
DPRINTF("io window exit\n");
|
2017-02-09 11:41:14 +03:00
|
|
|
kvm_eat_signals(cpu);
|
2011-03-15 14:26:27 +03:00
|
|
|
ret = EXCP_INTERRUPT;
|
2011-03-15 14:26:26 +03:00
|
|
|
break;
|
|
|
|
}
|
2011-12-16 04:20:20 +04:00
|
|
|
fprintf(stderr, "error: kvm run failed %s\n",
|
|
|
|
strerror(-run_ret));
|
2015-05-18 22:06:47 +03:00
|
|
|
#ifdef TARGET_PPC
|
|
|
|
if (run_ret == -EBUSY) {
|
|
|
|
fprintf(stderr,
|
|
|
|
"This is probably because your SMT is enabled.\n"
|
|
|
|
"VCPU can only run on primary threads with all "
|
|
|
|
"secondary threads offline.\n");
|
|
|
|
}
|
|
|
|
#endif
|
2014-08-29 17:58:20 +04:00
|
|
|
ret = -1;
|
|
|
|
break;
|
2008-11-05 19:29:27 +03:00
|
|
|
}
|
|
|
|
|
2013-03-29 08:27:52 +04:00
|
|
|
trace_kvm_run_exit(cpu->cpu_index, run->exit_reason);
|
2008-11-05 19:29:27 +03:00
|
|
|
switch (run->exit_reason) {
|
|
|
|
case KVM_EXIT_IO:
|
2010-04-18 18:22:14 +04:00
|
|
|
DPRINTF("handle_io\n");
|
2015-06-18 19:47:24 +03:00
|
|
|
/* Called outside BQL */
|
2015-04-08 14:30:58 +03:00
|
|
|
kvm_handle_io(run->io.port, attrs,
|
2011-02-02 00:16:01 +03:00
|
|
|
(uint8_t *)run + run->io.data_offset,
|
|
|
|
run->io.direction,
|
|
|
|
run->io.size,
|
|
|
|
run->io.count);
|
2011-03-15 14:26:27 +03:00
|
|
|
ret = 0;
|
2008-11-05 19:29:27 +03:00
|
|
|
break;
|
|
|
|
case KVM_EXIT_MMIO:
|
2010-04-18 18:22:14 +04:00
|
|
|
DPRINTF("handle_mmio\n");
|
2015-06-18 19:47:26 +03:00
|
|
|
/* Called outside BQL */
|
2015-04-08 14:30:58 +03:00
|
|
|
address_space_rw(&address_space_memory,
|
|
|
|
run->mmio.phys_addr, attrs,
|
|
|
|
run->mmio.data,
|
|
|
|
run->mmio.len,
|
|
|
|
run->mmio.is_write);
|
2011-03-15 14:26:27 +03:00
|
|
|
ret = 0;
|
2008-11-05 19:29:27 +03:00
|
|
|
break;
|
|
|
|
case KVM_EXIT_IRQ_WINDOW_OPEN:
|
2010-04-18 18:22:14 +04:00
|
|
|
DPRINTF("irq_window_open\n");
|
2011-03-15 14:26:27 +03:00
|
|
|
ret = EXCP_INTERRUPT;
|
2008-11-05 19:29:27 +03:00
|
|
|
break;
|
|
|
|
case KVM_EXIT_SHUTDOWN:
|
2010-04-18 18:22:14 +04:00
|
|
|
DPRINTF("shutdown\n");
|
2017-05-16 00:41:13 +03:00
|
|
|
qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
|
2011-03-15 14:26:27 +03:00
|
|
|
ret = EXCP_INTERRUPT;
|
2008-11-05 19:29:27 +03:00
|
|
|
break;
|
|
|
|
case KVM_EXIT_UNKNOWN:
|
2011-01-21 23:48:07 +03:00
|
|
|
fprintf(stderr, "KVM: unknown exit, hardware reason %" PRIx64 "\n",
|
|
|
|
(uint64_t)run->hw.hardware_exit_reason);
|
2011-01-21 23:48:06 +03:00
|
|
|
ret = -1;
|
2008-11-05 19:29:27 +03:00
|
|
|
break;
|
2010-03-23 19:37:11 +03:00
|
|
|
case KVM_EXIT_INTERNAL_ERROR:
|
2013-05-27 03:55:29 +04:00
|
|
|
ret = kvm_handle_internal_error(cpu, run);
|
2010-03-23 19:37:11 +03:00
|
|
|
break;
|
2014-06-19 21:06:25 +04:00
|
|
|
case KVM_EXIT_SYSTEM_EVENT:
|
|
|
|
switch (run->system_event.type) {
|
|
|
|
case KVM_SYSTEM_EVENT_SHUTDOWN:
|
2017-05-16 00:41:13 +03:00
|
|
|
qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN);
|
2014-06-19 21:06:25 +04:00
|
|
|
ret = EXCP_INTERRUPT;
|
|
|
|
break;
|
|
|
|
case KVM_SYSTEM_EVENT_RESET:
|
2017-05-16 00:41:13 +03:00
|
|
|
qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
|
2014-06-19 21:06:25 +04:00
|
|
|
ret = EXCP_INTERRUPT;
|
|
|
|
break;
|
2015-07-03 15:01:43 +03:00
|
|
|
case KVM_SYSTEM_EVENT_CRASH:
|
2017-02-14 09:25:22 +03:00
|
|
|
kvm_cpu_synchronize_state(cpu);
|
2015-07-03 15:01:43 +03:00
|
|
|
qemu_mutex_lock_iothread();
|
2017-02-14 09:25:23 +03:00
|
|
|
qemu_system_guest_panicked(cpu_get_crash_info(cpu));
|
2015-07-03 15:01:43 +03:00
|
|
|
qemu_mutex_unlock_iothread();
|
|
|
|
ret = 0;
|
|
|
|
break;
|
2014-06-19 21:06:25 +04:00
|
|
|
default:
|
|
|
|
DPRINTF("kvm_arch_handle_exit\n");
|
|
|
|
ret = kvm_arch_handle_exit(cpu, run);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
break;
|
2008-11-05 19:29:27 +03:00
|
|
|
default:
|
2010-04-18 18:22:14 +04:00
|
|
|
DPRINTF("kvm_arch_handle_exit\n");
|
2012-10-31 09:57:49 +04:00
|
|
|
ret = kvm_arch_handle_exit(cpu, run);
|
2008-11-05 19:29:27 +03:00
|
|
|
break;
|
|
|
|
}
|
2011-03-15 14:26:27 +03:00
|
|
|
} while (ret == 0);
|
2008-11-05 19:29:27 +03:00
|
|
|
|
2017-06-06 21:19:39 +03:00
|
|
|
cpu_exec_end(cpu);
|
2015-06-18 19:47:23 +03:00
|
|
|
qemu_mutex_lock_iothread();
|
|
|
|
|
2011-01-21 23:48:06 +03:00
|
|
|
if (ret < 0) {
|
2013-05-27 03:33:50 +04:00
|
|
|
cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_CODE);
|
2011-09-30 21:45:27 +04:00
|
|
|
vm_stop(RUN_STATE_INTERNAL_ERROR);
|
2008-11-10 18:55:14 +03:00
|
|
|
}
|
|
|
|
|
2017-02-15 17:36:11 +03:00
|
|
|
atomic_set(&cpu->exit_request, 0);
|
2008-11-05 19:29:27 +03:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2008-11-13 22:21:00 +03:00
|
|
|
int kvm_ioctl(KVMState *s, int type, ...)
|
2008-11-05 19:29:27 +03:00
|
|
|
{
|
|
|
|
int ret;
|
2008-11-13 22:21:00 +03:00
|
|
|
void *arg;
|
|
|
|
va_list ap;
|
2008-11-05 19:29:27 +03:00
|
|
|
|
2008-11-13 22:21:00 +03:00
|
|
|
va_start(ap, type);
|
|
|
|
arg = va_arg(ap, void *);
|
|
|
|
va_end(ap);
|
|
|
|
|
2013-03-29 08:27:05 +04:00
|
|
|
trace_kvm_ioctl(type, arg);
|
2008-11-13 22:21:00 +03:00
|
|
|
ret = ioctl(s->fd, type, arg);
|
2011-01-04 11:32:13 +03:00
|
|
|
if (ret == -1) {
|
2008-11-05 19:29:27 +03:00
|
|
|
ret = -errno;
|
2011-01-04 11:32:13 +03:00
|
|
|
}
|
2008-11-05 19:29:27 +03:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2008-11-13 22:21:00 +03:00
|
|
|
int kvm_vm_ioctl(KVMState *s, int type, ...)
|
2008-11-05 19:29:27 +03:00
|
|
|
{
|
|
|
|
int ret;
|
2008-11-13 22:21:00 +03:00
|
|
|
void *arg;
|
|
|
|
va_list ap;
|
|
|
|
|
|
|
|
va_start(ap, type);
|
|
|
|
arg = va_arg(ap, void *);
|
|
|
|
va_end(ap);
|
2008-11-05 19:29:27 +03:00
|
|
|
|
2013-03-29 08:27:05 +04:00
|
|
|
trace_kvm_vm_ioctl(type, arg);
|
2008-11-13 22:21:00 +03:00
|
|
|
ret = ioctl(s->vmfd, type, arg);
|
2011-01-04 11:32:13 +03:00
|
|
|
if (ret == -1) {
|
2008-11-05 19:29:27 +03:00
|
|
|
ret = -errno;
|
2011-01-04 11:32:13 +03:00
|
|
|
}
|
2008-11-05 19:29:27 +03:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2012-10-31 09:06:49 +04:00
|
|
|
int kvm_vcpu_ioctl(CPUState *cpu, int type, ...)
|
2008-11-05 19:29:27 +03:00
|
|
|
{
|
|
|
|
int ret;
|
2008-11-13 22:21:00 +03:00
|
|
|
void *arg;
|
|
|
|
va_list ap;
|
|
|
|
|
|
|
|
va_start(ap, type);
|
|
|
|
arg = va_arg(ap, void *);
|
|
|
|
va_end(ap);
|
2008-11-05 19:29:27 +03:00
|
|
|
|
2013-03-29 08:27:05 +04:00
|
|
|
trace_kvm_vcpu_ioctl(cpu->cpu_index, type, arg);
|
2012-10-31 08:29:00 +04:00
|
|
|
ret = ioctl(cpu->kvm_fd, type, arg);
|
2011-01-04 11:32:13 +03:00
|
|
|
if (ret == -1) {
|
2008-11-05 19:29:27 +03:00
|
|
|
ret = -errno;
|
2011-01-04 11:32:13 +03:00
|
|
|
}
|
2008-11-05 19:29:27 +03:00
|
|
|
return ret;
|
|
|
|
}
|
2008-12-04 23:33:06 +03:00
|
|
|
|
2014-02-26 21:20:00 +04:00
|
|
|
int kvm_device_ioctl(int fd, int type, ...)
|
|
|
|
{
|
|
|
|
int ret;
|
|
|
|
void *arg;
|
|
|
|
va_list ap;
|
|
|
|
|
|
|
|
va_start(ap, type);
|
|
|
|
arg = va_arg(ap, void *);
|
|
|
|
va_end(ap);
|
|
|
|
|
|
|
|
trace_kvm_device_ioctl(fd, type, arg);
|
|
|
|
ret = ioctl(fd, type, arg);
|
|
|
|
if (ret == -1) {
|
|
|
|
ret = -errno;
|
|
|
|
}
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2015-03-12 15:53:49 +03:00
|
|
|
int kvm_vm_check_attr(KVMState *s, uint32_t group, uint64_t attr)
|
|
|
|
{
|
|
|
|
int ret;
|
|
|
|
struct kvm_device_attr attribute = {
|
|
|
|
.group = group,
|
|
|
|
.attr = attr,
|
|
|
|
};
|
|
|
|
|
|
|
|
if (!kvm_vm_attributes_allowed) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
ret = kvm_vm_ioctl(s, KVM_HAS_DEVICE_ATTR, &attribute);
|
|
|
|
/* kvm returns 0 on success for HAS_DEVICE_ATTR */
|
|
|
|
return ret ? 0 : 1;
|
|
|
|
}
|
|
|
|
|
2015-09-24 03:29:36 +03:00
|
|
|
int kvm_device_check_attr(int dev_fd, uint32_t group, uint64_t attr)
|
|
|
|
{
|
|
|
|
struct kvm_device_attr attribute = {
|
|
|
|
.group = group,
|
|
|
|
.attr = attr,
|
|
|
|
.flags = 0,
|
|
|
|
};
|
|
|
|
|
|
|
|
return kvm_device_ioctl(dev_fd, KVM_HAS_DEVICE_ATTR, &attribute) ? 0 : 1;
|
|
|
|
}
|
|
|
|
|
2017-06-13 16:57:00 +03:00
|
|
|
int kvm_device_access(int fd, int group, uint64_t attr,
|
|
|
|
void *val, bool write, Error **errp)
|
2015-09-24 03:29:36 +03:00
|
|
|
{
|
|
|
|
struct kvm_device_attr kvmattr;
|
|
|
|
int err;
|
|
|
|
|
|
|
|
kvmattr.flags = 0;
|
|
|
|
kvmattr.group = group;
|
|
|
|
kvmattr.attr = attr;
|
|
|
|
kvmattr.addr = (uintptr_t)val;
|
|
|
|
|
|
|
|
err = kvm_device_ioctl(fd,
|
|
|
|
write ? KVM_SET_DEVICE_ATTR : KVM_GET_DEVICE_ATTR,
|
|
|
|
&kvmattr);
|
|
|
|
if (err < 0) {
|
2017-06-13 16:57:00 +03:00
|
|
|
error_setg_errno(errp, -err,
|
|
|
|
"KVM_%s_DEVICE_ATTR failed: Group %d "
|
|
|
|
"attr 0x%016" PRIx64,
|
|
|
|
write ? "SET" : "GET", group, attr);
|
2015-09-24 03:29:36 +03:00
|
|
|
}
|
2017-06-13 16:57:00 +03:00
|
|
|
return err;
|
2015-09-24 03:29:36 +03:00
|
|
|
}
|
|
|
|
|
kvm: check KVM_CAP_SYNC_MMU with kvm_vm_check_extension()
On a server-class ppc host, this capability depends on the KVM type,
ie, HV or PR. If both KVM are present in the kernel, we will always
get the HV specific value, even if we explicitely requested PR on
the command line.
This can have an impact if we're using hugepages or a balloon device.
Since we've already created the VM at the time any user calls
kvm_has_sync_mmu(), switching to kvm_vm_check_extension() is
enough to fix any potential issue.
It is okay for the other archs that also implement KVM_CAP_SYNC_MMU,
ie, mips, s390, x86 and arm, because they don't depend on the VM being
created or not.
While here, let's cache the state of this extension in a bool variable,
since it has several users in the code, as suggested by Thomas Huth.
Signed-off-by: Greg Kurz <groug@kaod.org>
Message-Id: <150600965332.30533.14702405809647835716.stgit@bahia.lan>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-09-21 19:00:53 +03:00
|
|
|
bool kvm_has_sync_mmu(void)
|
2008-12-04 23:33:06 +03:00
|
|
|
{
|
kvm: check KVM_CAP_SYNC_MMU with kvm_vm_check_extension()
On a server-class ppc host, this capability depends on the KVM type,
ie, HV or PR. If both KVM are present in the kernel, we will always
get the HV specific value, even if we explicitely requested PR on
the command line.
This can have an impact if we're using hugepages or a balloon device.
Since we've already created the VM at the time any user calls
kvm_has_sync_mmu(), switching to kvm_vm_check_extension() is
enough to fix any potential issue.
It is okay for the other archs that also implement KVM_CAP_SYNC_MMU,
ie, mips, s390, x86 and arm, because they don't depend on the VM being
created or not.
While here, let's cache the state of this extension in a bool variable,
since it has several users in the code, as suggested by Thomas Huth.
Signed-off-by: Greg Kurz <groug@kaod.org>
Message-Id: <150600965332.30533.14702405809647835716.stgit@bahia.lan>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-09-21 19:00:53 +03:00
|
|
|
return kvm_state->sync_mmu;
|
2008-12-04 23:33:06 +03:00
|
|
|
}
|
2009-03-12 23:12:48 +03:00
|
|
|
|
2009-11-25 02:33:03 +03:00
|
|
|
int kvm_has_vcpu_events(void)
|
|
|
|
{
|
|
|
|
return kvm_state->vcpu_events;
|
|
|
|
}
|
|
|
|
|
2010-03-01 21:10:29 +03:00
|
|
|
int kvm_has_robust_singlestep(void)
|
|
|
|
{
|
|
|
|
return kvm_state->robust_singlestep;
|
|
|
|
}
|
|
|
|
|
2010-03-12 17:20:49 +03:00
|
|
|
int kvm_has_debugregs(void)
|
|
|
|
{
|
|
|
|
return kvm_state->debugregs;
|
|
|
|
}
|
|
|
|
|
2011-01-10 14:50:05 +03:00
|
|
|
int kvm_has_many_ioeventfds(void)
|
|
|
|
{
|
|
|
|
if (!kvm_enabled()) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
return kvm_state->many_ioeventfds;
|
|
|
|
}
|
|
|
|
|
2011-10-15 13:49:47 +04:00
|
|
|
int kvm_has_gsi_routing(void)
|
|
|
|
{
|
2012-01-25 21:28:05 +04:00
|
|
|
#ifdef KVM_CAP_IRQ_ROUTING
|
2011-10-15 13:49:47 +04:00
|
|
|
return kvm_check_extension(kvm_state, KVM_CAP_IRQ_ROUTING);
|
2012-01-25 21:28:05 +04:00
|
|
|
#else
|
|
|
|
return false;
|
|
|
|
#endif
|
2011-10-15 13:49:47 +04:00
|
|
|
}
|
|
|
|
|
2012-08-27 10:28:39 +04:00
|
|
|
int kvm_has_intx_set_mask(void)
|
|
|
|
{
|
|
|
|
return kvm_state->intx_set_mask;
|
|
|
|
}
|
|
|
|
|
2017-07-11 13:21:26 +03:00
|
|
|
bool kvm_arm_supports_user_irq(void)
|
|
|
|
{
|
|
|
|
return kvm_check_extension(kvm_state, KVM_CAP_ARM_USER_IRQ);
|
|
|
|
}
|
|
|
|
|
2009-03-12 23:12:48 +03:00
|
|
|
#ifdef KVM_CAP_SET_GUEST_DEBUG
|
2012-12-01 08:35:08 +04:00
|
|
|
struct kvm_sw_breakpoint *kvm_find_sw_breakpoint(CPUState *cpu,
|
2009-03-12 23:12:48 +03:00
|
|
|
target_ulong pc)
|
|
|
|
{
|
|
|
|
struct kvm_sw_breakpoint *bp;
|
|
|
|
|
2012-12-01 08:35:08 +04:00
|
|
|
QTAILQ_FOREACH(bp, &cpu->kvm_state->kvm_sw_breakpoints, entry) {
|
2011-01-04 11:32:13 +03:00
|
|
|
if (bp->pc == pc) {
|
2009-03-12 23:12:48 +03:00
|
|
|
return bp;
|
2011-01-04 11:32:13 +03:00
|
|
|
}
|
2009-03-12 23:12:48 +03:00
|
|
|
}
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2012-12-01 08:35:08 +04:00
|
|
|
int kvm_sw_breakpoints_active(CPUState *cpu)
|
2009-03-12 23:12:48 +03:00
|
|
|
{
|
2012-12-01 08:35:08 +04:00
|
|
|
return !QTAILQ_EMPTY(&cpu->kvm_state->kvm_sw_breakpoints);
|
2009-03-12 23:12:48 +03:00
|
|
|
}
|
|
|
|
|
2009-07-17 01:55:28 +04:00
|
|
|
struct kvm_set_guest_debug_data {
|
|
|
|
struct kvm_guest_debug dbg;
|
|
|
|
int err;
|
|
|
|
};
|
|
|
|
|
2016-10-31 12:36:08 +03:00
|
|
|
static void kvm_invoke_set_guest_debug(CPUState *cpu, run_on_cpu_data data)
|
2009-07-17 01:55:28 +04:00
|
|
|
{
|
2016-10-31 12:36:08 +03:00
|
|
|
struct kvm_set_guest_debug_data *dbg_data =
|
|
|
|
(struct kvm_set_guest_debug_data *) data.host_ptr;
|
2009-09-17 22:05:58 +04:00
|
|
|
|
2016-10-10 18:46:25 +03:00
|
|
|
dbg_data->err = kvm_vcpu_ioctl(cpu, KVM_SET_GUEST_DEBUG,
|
2012-12-01 08:35:08 +04:00
|
|
|
&dbg_data->dbg);
|
2009-07-17 01:55:28 +04:00
|
|
|
}
|
|
|
|
|
2013-07-25 22:50:21 +04:00
|
|
|
int kvm_update_guest_debug(CPUState *cpu, unsigned long reinject_trap)
|
2009-03-12 23:12:48 +03:00
|
|
|
{
|
2009-07-17 01:55:28 +04:00
|
|
|
struct kvm_set_guest_debug_data data;
|
2009-03-12 23:12:48 +03:00
|
|
|
|
2010-03-01 21:10:29 +03:00
|
|
|
data.dbg.control = reinject_trap;
|
2009-03-12 23:12:48 +03:00
|
|
|
|
2013-06-21 22:20:45 +04:00
|
|
|
if (cpu->singlestep_enabled) {
|
2010-03-01 21:10:29 +03:00
|
|
|
data.dbg.control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP;
|
|
|
|
}
|
2012-10-31 09:57:49 +04:00
|
|
|
kvm_arch_update_guest_debug(cpu, &data.dbg);
|
2009-03-12 23:12:48 +03:00
|
|
|
|
2016-10-31 12:36:08 +03:00
|
|
|
run_on_cpu(cpu, kvm_invoke_set_guest_debug,
|
|
|
|
RUN_ON_CPU_HOST_PTR(&data));
|
2009-07-17 01:55:28 +04:00
|
|
|
return data.err;
|
2009-03-12 23:12:48 +03:00
|
|
|
}
|
|
|
|
|
2013-06-27 19:12:06 +04:00
|
|
|
int kvm_insert_breakpoint(CPUState *cpu, target_ulong addr,
|
2009-03-12 23:12:48 +03:00
|
|
|
target_ulong len, int type)
|
|
|
|
{
|
|
|
|
struct kvm_sw_breakpoint *bp;
|
|
|
|
int err;
|
|
|
|
|
|
|
|
if (type == GDB_BREAKPOINT_SW) {
|
2013-06-19 19:37:31 +04:00
|
|
|
bp = kvm_find_sw_breakpoint(cpu, addr);
|
2009-03-12 23:12:48 +03:00
|
|
|
if (bp) {
|
|
|
|
bp->use_count++;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2011-08-21 07:09:37 +04:00
|
|
|
bp = g_malloc(sizeof(struct kvm_sw_breakpoint));
|
2009-03-12 23:12:48 +03:00
|
|
|
bp->pc = addr;
|
|
|
|
bp->use_count = 1;
|
2013-06-19 19:37:31 +04:00
|
|
|
err = kvm_arch_insert_sw_breakpoint(cpu, bp);
|
2009-03-12 23:12:48 +03:00
|
|
|
if (err) {
|
2011-08-21 07:09:37 +04:00
|
|
|
g_free(bp);
|
2009-03-12 23:12:48 +03:00
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
2013-06-19 19:37:31 +04:00
|
|
|
QTAILQ_INSERT_HEAD(&cpu->kvm_state->kvm_sw_breakpoints, bp, entry);
|
2009-03-12 23:12:48 +03:00
|
|
|
} else {
|
|
|
|
err = kvm_arch_insert_hw_breakpoint(addr, len, type);
|
2011-01-04 11:32:13 +03:00
|
|
|
if (err) {
|
2009-03-12 23:12:48 +03:00
|
|
|
return err;
|
2011-01-04 11:32:13 +03:00
|
|
|
}
|
2009-03-12 23:12:48 +03:00
|
|
|
}
|
|
|
|
|
2013-06-25 01:50:24 +04:00
|
|
|
CPU_FOREACH(cpu) {
|
2013-07-25 22:50:21 +04:00
|
|
|
err = kvm_update_guest_debug(cpu, 0);
|
2011-01-04 11:32:13 +03:00
|
|
|
if (err) {
|
2009-03-12 23:12:48 +03:00
|
|
|
return err;
|
2011-01-04 11:32:13 +03:00
|
|
|
}
|
2009-03-12 23:12:48 +03:00
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2013-06-27 19:12:06 +04:00
|
|
|
int kvm_remove_breakpoint(CPUState *cpu, target_ulong addr,
|
2009-03-12 23:12:48 +03:00
|
|
|
target_ulong len, int type)
|
|
|
|
{
|
|
|
|
struct kvm_sw_breakpoint *bp;
|
|
|
|
int err;
|
|
|
|
|
|
|
|
if (type == GDB_BREAKPOINT_SW) {
|
2013-06-19 19:37:31 +04:00
|
|
|
bp = kvm_find_sw_breakpoint(cpu, addr);
|
2011-01-04 11:32:13 +03:00
|
|
|
if (!bp) {
|
2009-03-12 23:12:48 +03:00
|
|
|
return -ENOENT;
|
2011-01-04 11:32:13 +03:00
|
|
|
}
|
2009-03-12 23:12:48 +03:00
|
|
|
|
|
|
|
if (bp->use_count > 1) {
|
|
|
|
bp->use_count--;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2013-06-19 19:37:31 +04:00
|
|
|
err = kvm_arch_remove_sw_breakpoint(cpu, bp);
|
2011-01-04 11:32:13 +03:00
|
|
|
if (err) {
|
2009-03-12 23:12:48 +03:00
|
|
|
return err;
|
2011-01-04 11:32:13 +03:00
|
|
|
}
|
2009-03-12 23:12:48 +03:00
|
|
|
|
2013-06-19 19:37:31 +04:00
|
|
|
QTAILQ_REMOVE(&cpu->kvm_state->kvm_sw_breakpoints, bp, entry);
|
2011-08-21 07:09:37 +04:00
|
|
|
g_free(bp);
|
2009-03-12 23:12:48 +03:00
|
|
|
} else {
|
|
|
|
err = kvm_arch_remove_hw_breakpoint(addr, len, type);
|
2011-01-04 11:32:13 +03:00
|
|
|
if (err) {
|
2009-03-12 23:12:48 +03:00
|
|
|
return err;
|
2011-01-04 11:32:13 +03:00
|
|
|
}
|
2009-03-12 23:12:48 +03:00
|
|
|
}
|
|
|
|
|
2013-06-25 01:50:24 +04:00
|
|
|
CPU_FOREACH(cpu) {
|
2013-07-25 22:50:21 +04:00
|
|
|
err = kvm_update_guest_debug(cpu, 0);
|
2011-01-04 11:32:13 +03:00
|
|
|
if (err) {
|
2009-03-12 23:12:48 +03:00
|
|
|
return err;
|
2011-01-04 11:32:13 +03:00
|
|
|
}
|
2009-03-12 23:12:48 +03:00
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2013-05-27 16:40:48 +04:00
|
|
|
void kvm_remove_all_breakpoints(CPUState *cpu)
|
2009-03-12 23:12:48 +03:00
|
|
|
{
|
|
|
|
struct kvm_sw_breakpoint *bp, *next;
|
2013-06-19 19:37:31 +04:00
|
|
|
KVMState *s = cpu->kvm_state;
|
2014-07-19 05:21:46 +04:00
|
|
|
CPUState *tmpcpu;
|
2009-03-12 23:12:48 +03:00
|
|
|
|
2009-09-12 11:36:22 +04:00
|
|
|
QTAILQ_FOREACH_SAFE(bp, &s->kvm_sw_breakpoints, entry, next) {
|
2013-06-19 19:37:31 +04:00
|
|
|
if (kvm_arch_remove_sw_breakpoint(cpu, bp) != 0) {
|
2009-03-12 23:12:48 +03:00
|
|
|
/* Try harder to find a CPU that currently sees the breakpoint. */
|
2014-07-19 05:21:46 +04:00
|
|
|
CPU_FOREACH(tmpcpu) {
|
|
|
|
if (kvm_arch_remove_sw_breakpoint(tmpcpu, bp) == 0) {
|
2009-03-12 23:12:48 +03:00
|
|
|
break;
|
2011-01-04 11:32:13 +03:00
|
|
|
}
|
2009-03-12 23:12:48 +03:00
|
|
|
}
|
|
|
|
}
|
2012-11-12 18:04:35 +04:00
|
|
|
QTAILQ_REMOVE(&s->kvm_sw_breakpoints, bp, entry);
|
|
|
|
g_free(bp);
|
2009-03-12 23:12:48 +03:00
|
|
|
}
|
|
|
|
kvm_arch_remove_all_hw_breakpoints();
|
|
|
|
|
2013-06-25 01:50:24 +04:00
|
|
|
CPU_FOREACH(cpu) {
|
2013-07-25 22:50:21 +04:00
|
|
|
kvm_update_guest_debug(cpu, 0);
|
2011-01-04 11:32:13 +03:00
|
|
|
}
|
2009-03-12 23:12:48 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
#else /* !KVM_CAP_SET_GUEST_DEBUG */
|
|
|
|
|
2013-07-25 22:50:21 +04:00
|
|
|
int kvm_update_guest_debug(CPUState *cpu, unsigned long reinject_trap)
|
2009-03-12 23:12:48 +03:00
|
|
|
{
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2013-06-27 19:12:06 +04:00
|
|
|
int kvm_insert_breakpoint(CPUState *cpu, target_ulong addr,
|
2009-03-12 23:12:48 +03:00
|
|
|
target_ulong len, int type)
|
|
|
|
{
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2013-06-27 19:12:06 +04:00
|
|
|
int kvm_remove_breakpoint(CPUState *cpu, target_ulong addr,
|
2009-03-12 23:12:48 +03:00
|
|
|
target_ulong len, int type)
|
|
|
|
{
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2013-05-27 16:40:48 +04:00
|
|
|
void kvm_remove_all_breakpoints(CPUState *cpu)
|
2009-03-12 23:12:48 +03:00
|
|
|
{
|
|
|
|
}
|
|
|
|
#endif /* !KVM_CAP_SET_GUEST_DEBUG */
|
2010-02-18 01:14:42 +03:00
|
|
|
|
2017-02-09 11:41:14 +03:00
|
|
|
static int kvm_set_signal_mask(CPUState *cpu, const sigset_t *sigset)
|
2010-02-18 01:14:42 +03:00
|
|
|
{
|
2014-06-18 02:10:31 +04:00
|
|
|
KVMState *s = kvm_state;
|
2010-02-18 01:14:42 +03:00
|
|
|
struct kvm_signal_mask *sigmask;
|
|
|
|
int r;
|
|
|
|
|
2011-08-21 07:09:37 +04:00
|
|
|
sigmask = g_malloc(sizeof(*sigmask) + sizeof(*sigset));
|
2010-02-18 01:14:42 +03:00
|
|
|
|
2014-06-18 02:10:31 +04:00
|
|
|
sigmask->len = s->sigmask_len;
|
2010-02-18 01:14:42 +03:00
|
|
|
memcpy(sigmask->sigset, sigset, sizeof(*sigset));
|
2012-10-31 09:06:49 +04:00
|
|
|
r = kvm_vcpu_ioctl(cpu, KVM_SET_SIGNAL_MASK, sigmask);
|
2011-08-21 07:09:37 +04:00
|
|
|
g_free(sigmask);
|
2010-02-18 01:14:42 +03:00
|
|
|
|
|
|
|
return r;
|
|
|
|
}
|
2017-02-09 12:04:34 +03:00
|
|
|
|
2017-02-08 15:52:50 +03:00
|
|
|
static void kvm_ipi_signal(int sig)
|
2017-02-09 11:41:14 +03:00
|
|
|
{
|
2017-02-08 15:52:50 +03:00
|
|
|
if (current_cpu) {
|
|
|
|
assert(kvm_immediate_exit);
|
|
|
|
kvm_cpu_kick(current_cpu);
|
|
|
|
}
|
2017-02-09 11:41:14 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
void kvm_init_cpu_signals(CPUState *cpu)
|
|
|
|
{
|
|
|
|
int r;
|
|
|
|
sigset_t set;
|
|
|
|
struct sigaction sigact;
|
|
|
|
|
|
|
|
memset(&sigact, 0, sizeof(sigact));
|
2017-02-08 15:52:50 +03:00
|
|
|
sigact.sa_handler = kvm_ipi_signal;
|
2017-02-09 11:41:14 +03:00
|
|
|
sigaction(SIG_IPI, &sigact, NULL);
|
|
|
|
|
|
|
|
pthread_sigmask(SIG_BLOCK, NULL, &set);
|
|
|
|
#if defined KVM_HAVE_MCE_INJECTION
|
|
|
|
sigdelset(&set, SIGBUS);
|
|
|
|
pthread_sigmask(SIG_SETMASK, &set, NULL);
|
|
|
|
#endif
|
|
|
|
sigdelset(&set, SIG_IPI);
|
2017-02-08 15:52:50 +03:00
|
|
|
if (kvm_immediate_exit) {
|
|
|
|
r = pthread_sigmask(SIG_SETMASK, &set, NULL);
|
|
|
|
} else {
|
|
|
|
r = kvm_set_signal_mask(cpu, &set);
|
|
|
|
}
|
2017-02-09 11:41:14 +03:00
|
|
|
if (r) {
|
|
|
|
fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(-r));
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-02-08 14:48:54 +03:00
|
|
|
/* Called asynchronously in VCPU thread. */
|
2013-01-17 12:30:27 +04:00
|
|
|
int kvm_on_sigbus_vcpu(CPUState *cpu, int code, void *addr)
|
2011-02-02 00:15:51 +03:00
|
|
|
{
|
2017-02-08 14:48:54 +03:00
|
|
|
#ifdef KVM_HAVE_MCE_INJECTION
|
|
|
|
if (have_sigbus_pending) {
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
have_sigbus_pending = true;
|
|
|
|
pending_sigbus_addr = addr;
|
|
|
|
pending_sigbus_code = code;
|
|
|
|
atomic_set(&cpu->exit_request, 1);
|
|
|
|
return 0;
|
|
|
|
#else
|
|
|
|
return 1;
|
|
|
|
#endif
|
2011-02-02 00:15:51 +03:00
|
|
|
}
|
|
|
|
|
2017-02-08 14:48:54 +03:00
|
|
|
/* Called synchronously (via signalfd) in main thread. */
|
2011-02-02 00:15:51 +03:00
|
|
|
int kvm_on_sigbus(int code, void *addr)
|
|
|
|
{
|
2017-02-08 14:48:54 +03:00
|
|
|
#ifdef KVM_HAVE_MCE_INJECTION
|
2017-02-09 12:04:34 +03:00
|
|
|
/* Action required MCE kills the process if SIGBUS is blocked. Because
|
|
|
|
* that's what happens in the I/O thread, where we handle MCE via signalfd,
|
|
|
|
* we can only get action optional here.
|
|
|
|
*/
|
|
|
|
assert(code != BUS_MCEERR_AR);
|
|
|
|
kvm_arch_on_sigbus_vcpu(first_cpu, code, addr);
|
|
|
|
return 0;
|
2017-02-08 14:48:54 +03:00
|
|
|
#else
|
|
|
|
return 1;
|
|
|
|
#endif
|
2011-02-02 00:15:51 +03:00
|
|
|
}
|
2014-02-26 21:20:00 +04:00
|
|
|
|
|
|
|
int kvm_create_device(KVMState *s, uint64_t type, bool test)
|
|
|
|
{
|
|
|
|
int ret;
|
|
|
|
struct kvm_create_device create_dev;
|
|
|
|
|
|
|
|
create_dev.type = type;
|
|
|
|
create_dev.fd = -1;
|
|
|
|
create_dev.flags = test ? KVM_CREATE_DEVICE_TEST : 0;
|
|
|
|
|
|
|
|
if (!kvm_check_extension(s, KVM_CAP_DEVICE_CTRL)) {
|
|
|
|
return -ENOTSUP;
|
|
|
|
}
|
|
|
|
|
|
|
|
ret = kvm_vm_ioctl(s, KVM_CREATE_DEVICE, &create_dev);
|
|
|
|
if (ret) {
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
return test ? 0 : create_dev.fd;
|
|
|
|
}
|
2014-05-09 12:06:46 +04:00
|
|
|
|
2016-03-30 19:27:24 +03:00
|
|
|
bool kvm_device_supported(int vmfd, uint64_t type)
|
|
|
|
{
|
|
|
|
struct kvm_create_device create_dev = {
|
|
|
|
.type = type,
|
|
|
|
.fd = -1,
|
|
|
|
.flags = KVM_CREATE_DEVICE_TEST,
|
|
|
|
};
|
|
|
|
|
|
|
|
if (ioctl(vmfd, KVM_CHECK_EXTENSION, KVM_CAP_DEVICE_CTRL) <= 0) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
return (ioctl(vmfd, KVM_CREATE_DEVICE, &create_dev) >= 0);
|
|
|
|
}
|
|
|
|
|
2014-05-09 12:06:46 +04:00
|
|
|
int kvm_set_one_reg(CPUState *cs, uint64_t id, void *source)
|
|
|
|
{
|
|
|
|
struct kvm_one_reg reg;
|
|
|
|
int r;
|
|
|
|
|
|
|
|
reg.id = id;
|
|
|
|
reg.addr = (uintptr_t) source;
|
|
|
|
r = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®);
|
|
|
|
if (r) {
|
2016-02-01 22:37:44 +03:00
|
|
|
trace_kvm_failed_reg_set(id, strerror(-r));
|
2014-05-09 12:06:46 +04:00
|
|
|
}
|
|
|
|
return r;
|
|
|
|
}
|
|
|
|
|
|
|
|
int kvm_get_one_reg(CPUState *cs, uint64_t id, void *target)
|
|
|
|
{
|
|
|
|
struct kvm_one_reg reg;
|
|
|
|
int r;
|
|
|
|
|
|
|
|
reg.id = id;
|
|
|
|
reg.addr = (uintptr_t) target;
|
|
|
|
r = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®);
|
|
|
|
if (r) {
|
2016-02-01 22:37:44 +03:00
|
|
|
trace_kvm_failed_reg_get(id, strerror(-r));
|
2014-05-09 12:06:46 +04:00
|
|
|
}
|
|
|
|
return r;
|
|
|
|
}
|
2014-09-27 00:45:24 +04:00
|
|
|
|
|
|
|
static void kvm_accel_class_init(ObjectClass *oc, void *data)
|
|
|
|
{
|
|
|
|
AccelClass *ac = ACCEL_CLASS(oc);
|
|
|
|
ac->name = "KVM";
|
accel: Rename 'init' method to 'init_machine'
Today, all accelerator init functions affect some global state:
* tcg_init() calls tcg_exec_init() and affects globals such as tcg_tcx,
page size globals, and possibly others;
* kvm_init() changes the kvm_state global, cpu_interrupt_handler, and possibly
others;
* xen_init() changes the xen_xc global, and registers a change state handler.
With the new accelerator QOM classes, initialization may now be split in two
steps:
* instance_init() will do basic initialization that doesn't affect any global
state and don't need MachineState or MachineClass data. This will allow
probing code to safely create multiple accelerator objects on the fly just
for reporting host/accelerator capabilities, for example.
* accel_init_machine()/init_machine() will save the accelerator object in
MachineState, and do initialization steps which still affect global state,
machine state, or that need data from MachineClass or MachineState.
To clarify the difference between those two steps, rename init() to
init_machine().
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2014-09-27 00:45:29 +04:00
|
|
|
ac->init_machine = kvm_init;
|
2014-09-27 00:45:24 +04:00
|
|
|
ac->allowed = &kvm_allowed;
|
|
|
|
}
|
|
|
|
|
|
|
|
static const TypeInfo kvm_accel_type = {
|
|
|
|
.name = TYPE_KVM_ACCEL,
|
|
|
|
.parent = TYPE_ACCEL,
|
|
|
|
.class_init = kvm_accel_class_init,
|
2014-09-27 00:45:32 +04:00
|
|
|
.instance_size = sizeof(KVMState),
|
2014-09-27 00:45:24 +04:00
|
|
|
};
|
|
|
|
|
|
|
|
static void kvm_type_init(void)
|
|
|
|
{
|
|
|
|
type_register_static(&kvm_accel_type);
|
|
|
|
}
|
|
|
|
|
|
|
|
type_init(kvm_type_init);
|