Merge remote-tracking branch 'qemu-kvm/uq/master' into staging

* qemu-kvm/uq/master:
  virtio/vhost: Add support for KVM in-kernel MSI injection
  msix: Add msix_nr_vectors_allocated
  kvm: Enable use of kvm_irqchip_in_kernel in hwlib code
  kvm: Introduce kvm_irqchip_add/remove_irqfd
  kvm: Make kvm_irqchip_commit_routes an internal service
  kvm: Publicize kvm_irqchip_release_virq
  kvm: Introduce kvm_irqchip_add_msi_route
  kvm: Rename kvm_irqchip_add_route to kvm_irqchip_add_irq_route
  msix: Introduce vector notifiers
  msix: Invoke msix_handle_mask_update on msix_mask_all
  msix: Factor out msix_get_message
  kvm: update vmxcap for EPT A/D, INVPCID, RDRAND, VMFUNC
  kvm: Enable in-kernel irqchip support by default
  kvm: Add support for direct MSI injections
  kvm: Update kernel headers
  kvm: x86: Wire up MSI support for in-kernel irqchip
  pc: Enable MSI support at APIC level
  kvm: Introduce basic MSI support for in-kernel irqchips
  Introduce MSIMessage structure
  kvm: Refactor KVMState::max_gsi to gsi_count
This commit is contained in:
Anthony Liguori 2012-06-03 07:56:23 +08:00
commit 74f4d2279b
18 changed files with 624 additions and 52 deletions

View File

@ -19,6 +19,7 @@
#include "apic_internal.h" #include "apic_internal.h"
#include "apic.h" #include "apic.h"
#include "ioapic.h" #include "ioapic.h"
#include "msi.h"
#include "host-utils.h" #include "host-utils.h"
#include "trace.h" #include "trace.h"
#include "pc.h" #include "pc.h"
@ -862,6 +863,8 @@ static void apic_init(APICCommonState *s)
s->timer = qemu_new_timer_ns(vm_clock, apic_timer, s); s->timer = qemu_new_timer_ns(vm_clock, apic_timer, s);
local_apics[s->idx] = s; local_apics[s->idx] = s;
msi_supported = true;
} }
static void apic_class_init(ObjectClass *klass, void *data) static void apic_class_init(ObjectClass *klass, void *data)

View File

@ -10,6 +10,7 @@
* See the COPYING file in the top-level directory. * See the COPYING file in the top-level directory.
*/ */
#include "hw/apic_internal.h" #include "hw/apic_internal.h"
#include "hw/msi.h"
#include "kvm.h" #include "kvm.h"
static inline void kvm_apic_set_reg(struct kvm_lapic_state *kapic, static inline void kvm_apic_set_reg(struct kvm_lapic_state *kapic,
@ -145,10 +146,39 @@ static void kvm_apic_external_nmi(APICCommonState *s)
run_on_cpu(s->cpu_env, do_inject_external_nmi, s); run_on_cpu(s->cpu_env, do_inject_external_nmi, s);
} }
static uint64_t kvm_apic_mem_read(void *opaque, target_phys_addr_t addr,
unsigned size)
{
return ~(uint64_t)0;
}
static void kvm_apic_mem_write(void *opaque, target_phys_addr_t addr,
uint64_t data, unsigned size)
{
MSIMessage msg = { .address = addr, .data = data };
int ret;
ret = kvm_irqchip_send_msi(kvm_state, msg);
if (ret < 0) {
fprintf(stderr, "KVM: injection failed, MSI lost (%s)\n",
strerror(-ret));
}
}
static const MemoryRegionOps kvm_apic_io_ops = {
.read = kvm_apic_mem_read,
.write = kvm_apic_mem_write,
.endianness = DEVICE_NATIVE_ENDIAN,
};
static void kvm_apic_init(APICCommonState *s) static void kvm_apic_init(APICCommonState *s)
{ {
memory_region_init_reservation(&s->io_memory, "kvm-apic-msi", memory_region_init_io(&s->io_memory, &kvm_apic_io_ops, s, "kvm-apic-msi",
MSI_SPACE_SIZE); MSI_SPACE_SIZE);
if (kvm_has_gsi_routing()) {
msi_supported = true;
}
} }
static void kvm_apic_class_init(ObjectClass *klass, void *data) static void kvm_apic_class_init(ObjectClass *klass, void *data)

View File

@ -24,6 +24,11 @@
#include "qemu-common.h" #include "qemu-common.h"
#include "pci.h" #include "pci.h"
struct MSIMessage {
uint64_t address;
uint32_t data;
};
extern bool msi_supported; extern bool msi_supported;
bool msi_enabled(const PCIDevice *dev); bool msi_enabled(const PCIDevice *dev);

121
hw/msix.c
View File

@ -35,6 +35,15 @@
#define MSIX_PAGE_PENDING (MSIX_PAGE_SIZE / 2) #define MSIX_PAGE_PENDING (MSIX_PAGE_SIZE / 2)
#define MSIX_MAX_ENTRIES 32 #define MSIX_MAX_ENTRIES 32
static MSIMessage msix_get_message(PCIDevice *dev, unsigned vector)
{
uint8_t *table_entry = dev->msix_table_page + vector * PCI_MSIX_ENTRY_SIZE;
MSIMessage msg;
msg.address = pci_get_quad(table_entry + PCI_MSIX_ENTRY_LOWER_ADDR);
msg.data = pci_get_long(table_entry + PCI_MSIX_ENTRY_DATA);
return msg;
}
/* Add MSI-X capability to the config space for the device. */ /* Add MSI-X capability to the config space for the device. */
/* Given a bar and its size, add MSI-X table on top of it /* Given a bar and its size, add MSI-X table on top of it
@ -130,13 +139,34 @@ static bool msix_is_masked(PCIDevice *dev, int vector)
return msix_vector_masked(dev, vector, dev->msix_function_masked); return msix_vector_masked(dev, vector, dev->msix_function_masked);
} }
static void msix_fire_vector_notifier(PCIDevice *dev,
unsigned int vector, bool is_masked)
{
MSIMessage msg;
int ret;
if (!dev->msix_vector_use_notifier) {
return;
}
if (is_masked) {
dev->msix_vector_release_notifier(dev, vector);
} else {
msg = msix_get_message(dev, vector);
ret = dev->msix_vector_use_notifier(dev, vector, msg);
assert(ret >= 0);
}
}
static void msix_handle_mask_update(PCIDevice *dev, int vector, bool was_masked) static void msix_handle_mask_update(PCIDevice *dev, int vector, bool was_masked)
{ {
bool is_masked = msix_is_masked(dev, vector); bool is_masked = msix_is_masked(dev, vector);
if (is_masked == was_masked) { if (is_masked == was_masked) {
return; return;
} }
msix_fire_vector_notifier(dev, vector, is_masked);
if (!is_masked && msix_is_pending(dev, vector)) { if (!is_masked && msix_is_pending(dev, vector)) {
msix_clr_pending(dev, vector); msix_clr_pending(dev, vector);
msix_notify(dev, vector); msix_notify(dev, vector);
@ -222,10 +252,14 @@ static void msix_mmio_setup(PCIDevice *d, MemoryRegion *bar)
static void msix_mask_all(struct PCIDevice *dev, unsigned nentries) static void msix_mask_all(struct PCIDevice *dev, unsigned nentries)
{ {
int vector; int vector;
for (vector = 0; vector < nentries; ++vector) { for (vector = 0; vector < nentries; ++vector) {
unsigned offset = unsigned offset =
vector * PCI_MSIX_ENTRY_SIZE + PCI_MSIX_ENTRY_VECTOR_CTRL; vector * PCI_MSIX_ENTRY_SIZE + PCI_MSIX_ENTRY_VECTOR_CTRL;
bool was_masked = msix_is_masked(dev, vector);
dev->msix_table_page[offset] |= PCI_MSIX_ENTRY_CTRL_MASKBIT; dev->msix_table_page[offset] |= PCI_MSIX_ENTRY_CTRL_MASKBIT;
msix_handle_mask_update(dev, vector, was_masked);
} }
} }
@ -317,6 +351,7 @@ void msix_save(PCIDevice *dev, QEMUFile *f)
void msix_load(PCIDevice *dev, QEMUFile *f) void msix_load(PCIDevice *dev, QEMUFile *f)
{ {
unsigned n = dev->msix_entries_nr; unsigned n = dev->msix_entries_nr;
unsigned int vector;
if (!(dev->cap_present & QEMU_PCI_CAP_MSIX)) { if (!(dev->cap_present & QEMU_PCI_CAP_MSIX)) {
return; return;
@ -326,6 +361,10 @@ void msix_load(PCIDevice *dev, QEMUFile *f)
qemu_get_buffer(f, dev->msix_table_page, n * PCI_MSIX_ENTRY_SIZE); qemu_get_buffer(f, dev->msix_table_page, n * PCI_MSIX_ENTRY_SIZE);
qemu_get_buffer(f, dev->msix_table_page + MSIX_PAGE_PENDING, (n + 7) / 8); qemu_get_buffer(f, dev->msix_table_page + MSIX_PAGE_PENDING, (n + 7) / 8);
msix_update_function_masked(dev); msix_update_function_masked(dev);
for (vector = 0; vector < n; vector++) {
msix_handle_mask_update(dev, vector, true);
}
} }
/* Does device support MSI-X? */ /* Does device support MSI-X? */
@ -352,9 +391,7 @@ uint32_t msix_bar_size(PCIDevice *dev)
/* Send an MSI-X message */ /* Send an MSI-X message */
void msix_notify(PCIDevice *dev, unsigned vector) void msix_notify(PCIDevice *dev, unsigned vector)
{ {
uint8_t *table_entry = dev->msix_table_page + vector * PCI_MSIX_ENTRY_SIZE; MSIMessage msg;
uint64_t address;
uint32_t data;
if (vector >= dev->msix_entries_nr || !dev->msix_entry_used[vector]) if (vector >= dev->msix_entries_nr || !dev->msix_entry_used[vector])
return; return;
@ -363,9 +400,9 @@ void msix_notify(PCIDevice *dev, unsigned vector)
return; return;
} }
address = pci_get_quad(table_entry + PCI_MSIX_ENTRY_LOWER_ADDR); msg = msix_get_message(dev, vector);
data = pci_get_long(table_entry + PCI_MSIX_ENTRY_DATA);
stl_le_phys(address, data); stl_le_phys(msg.address, msg.data);
} }
void msix_reset(PCIDevice *dev) void msix_reset(PCIDevice *dev)
@ -414,3 +451,75 @@ void msix_unuse_all_vectors(PCIDevice *dev)
return; return;
msix_free_irq_entries(dev); msix_free_irq_entries(dev);
} }
unsigned int msix_nr_vectors_allocated(const PCIDevice *dev)
{
return dev->msix_entries_nr;
}
static int msix_set_notifier_for_vector(PCIDevice *dev, unsigned int vector)
{
MSIMessage msg;
if (msix_is_masked(dev, vector)) {
return 0;
}
msg = msix_get_message(dev, vector);
return dev->msix_vector_use_notifier(dev, vector, msg);
}
static void msix_unset_notifier_for_vector(PCIDevice *dev, unsigned int vector)
{
if (msix_is_masked(dev, vector)) {
return;
}
dev->msix_vector_release_notifier(dev, vector);
}
int msix_set_vector_notifiers(PCIDevice *dev,
MSIVectorUseNotifier use_notifier,
MSIVectorReleaseNotifier release_notifier)
{
int vector, ret;
assert(use_notifier && release_notifier);
dev->msix_vector_use_notifier = use_notifier;
dev->msix_vector_release_notifier = release_notifier;
if ((dev->config[dev->msix_cap + MSIX_CONTROL_OFFSET] &
(MSIX_ENABLE_MASK | MSIX_MASKALL_MASK)) == MSIX_ENABLE_MASK) {
for (vector = 0; vector < dev->msix_entries_nr; vector++) {
ret = msix_set_notifier_for_vector(dev, vector);
if (ret < 0) {
goto undo;
}
}
}
return 0;
undo:
while (--vector >= 0) {
msix_unset_notifier_for_vector(dev, vector);
}
dev->msix_vector_use_notifier = NULL;
dev->msix_vector_release_notifier = NULL;
return ret;
}
void msix_unset_vector_notifiers(PCIDevice *dev)
{
int vector;
assert(dev->msix_vector_use_notifier &&
dev->msix_vector_release_notifier);
if ((dev->config[dev->msix_cap + MSIX_CONTROL_OFFSET] &
(MSIX_ENABLE_MASK | MSIX_MASKALL_MASK)) == MSIX_ENABLE_MASK) {
for (vector = 0; vector < dev->msix_entries_nr; vector++) {
msix_unset_notifier_for_vector(dev, vector);
}
}
dev->msix_vector_use_notifier = NULL;
dev->msix_vector_release_notifier = NULL;
}

View File

@ -13,6 +13,8 @@ void msix_write_config(PCIDevice *pci_dev, uint32_t address,
int msix_uninit(PCIDevice *d, MemoryRegion *bar); int msix_uninit(PCIDevice *d, MemoryRegion *bar);
unsigned int msix_nr_vectors_allocated(const PCIDevice *dev);
void msix_save(PCIDevice *dev, QEMUFile *f); void msix_save(PCIDevice *dev, QEMUFile *f);
void msix_load(PCIDevice *dev, QEMUFile *f); void msix_load(PCIDevice *dev, QEMUFile *f);
@ -29,4 +31,8 @@ void msix_notify(PCIDevice *dev, unsigned vector);
void msix_reset(PCIDevice *dev); void msix_reset(PCIDevice *dev);
int msix_set_vector_notifiers(PCIDevice *dev,
MSIVectorUseNotifier use_notifier,
MSIVectorReleaseNotifier release_notifier);
void msix_unset_vector_notifiers(PCIDevice *dev);
#endif #endif

View File

@ -912,15 +912,6 @@ static DeviceState *apic_init(void *env, uint8_t apic_id)
apic_mapped = 1; apic_mapped = 1;
} }
/* KVM does not support MSI yet. */
if (!kvm_irqchip_in_kernel()) {
msi_supported = true;
}
if (xen_msi_support()) {
msi_supported = true;
}
return dev; return dev;
} }

View File

@ -56,31 +56,27 @@ static void kvm_piix3_setup_irq_routing(bool pci_enabled)
{ {
#ifdef CONFIG_KVM #ifdef CONFIG_KVM
KVMState *s = kvm_state; KVMState *s = kvm_state;
int ret, i; int i;
if (kvm_check_extension(s, KVM_CAP_IRQ_ROUTING)) { if (kvm_check_extension(s, KVM_CAP_IRQ_ROUTING)) {
for (i = 0; i < 8; ++i) { for (i = 0; i < 8; ++i) {
if (i == 2) { if (i == 2) {
continue; continue;
} }
kvm_irqchip_add_route(s, i, KVM_IRQCHIP_PIC_MASTER, i); kvm_irqchip_add_irq_route(s, i, KVM_IRQCHIP_PIC_MASTER, i);
} }
for (i = 8; i < 16; ++i) { for (i = 8; i < 16; ++i) {
kvm_irqchip_add_route(s, i, KVM_IRQCHIP_PIC_SLAVE, i - 8); kvm_irqchip_add_irq_route(s, i, KVM_IRQCHIP_PIC_SLAVE, i - 8);
} }
if (pci_enabled) { if (pci_enabled) {
for (i = 0; i < 24; ++i) { for (i = 0; i < 24; ++i) {
if (i == 0) { if (i == 0) {
kvm_irqchip_add_route(s, i, KVM_IRQCHIP_IOAPIC, 2); kvm_irqchip_add_irq_route(s, i, KVM_IRQCHIP_IOAPIC, 2);
} else if (i != 2) { } else if (i != 2) {
kvm_irqchip_add_route(s, i, KVM_IRQCHIP_IOAPIC, i); kvm_irqchip_add_irq_route(s, i, KVM_IRQCHIP_IOAPIC, i);
} }
} }
} }
ret = kvm_irqchip_commit_routes(s);
if (ret < 0) {
hw_error("KVM IRQ routing setup failed");
}
} }
#endif /* CONFIG_KVM */ #endif /* CONFIG_KVM */
} }

View File

@ -173,6 +173,10 @@ typedef struct PCIDeviceClass {
const char *romfile; const char *romfile;
} PCIDeviceClass; } PCIDeviceClass;
typedef int (*MSIVectorUseNotifier)(PCIDevice *dev, unsigned int vector,
MSIMessage msg);
typedef void (*MSIVectorReleaseNotifier)(PCIDevice *dev, unsigned int vector);
struct PCIDevice { struct PCIDevice {
DeviceState qdev; DeviceState qdev;
/* PCI config space */ /* PCI config space */
@ -243,6 +247,10 @@ struct PCIDevice {
bool has_rom; bool has_rom;
MemoryRegion rom; MemoryRegion rom;
uint32_t rom_bar; uint32_t rom_bar;
/* MSI-X notifiers */
MSIVectorUseNotifier msix_vector_use_notifier;
MSIVectorReleaseNotifier msix_vector_release_notifier;
}; };
void pci_register_bar(PCIDevice *pci_dev, int region_num, void pci_register_bar(PCIDevice *pci_dev, int region_num,

View File

@ -24,6 +24,7 @@
#include "virtio-scsi.h" #include "virtio-scsi.h"
#include "pci.h" #include "pci.h"
#include "qemu-error.h" #include "qemu-error.h"
#include "msi.h"
#include "msix.h" #include "msix.h"
#include "net.h" #include "net.h"
#include "loader.h" #include "loader.h"
@ -539,6 +540,107 @@ static void virtio_pci_guest_notifier_read(void *opaque)
} }
} }
static int kvm_virtio_pci_vq_vector_use(VirtIOPCIProxy *proxy,
unsigned int queue_no,
unsigned int vector,
MSIMessage msg)
{
VirtQueue *vq = virtio_get_queue(proxy->vdev, queue_no);
VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector];
int fd, ret;
fd = event_notifier_get_fd(virtio_queue_get_guest_notifier(vq));
if (irqfd->users == 0) {
ret = kvm_irqchip_add_msi_route(kvm_state, msg);
if (ret < 0) {
return ret;
}
irqfd->virq = ret;
}
irqfd->users++;
ret = kvm_irqchip_add_irqfd(kvm_state, fd, irqfd->virq);
if (ret < 0) {
if (--irqfd->users == 0) {
kvm_irqchip_release_virq(kvm_state, irqfd->virq);
}
return ret;
}
qemu_set_fd_handler(fd, NULL, NULL, NULL);
return 0;
}
static void kvm_virtio_pci_vq_vector_release(VirtIOPCIProxy *proxy,
unsigned int queue_no,
unsigned int vector)
{
VirtQueue *vq = virtio_get_queue(proxy->vdev, queue_no);
VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector];
int fd, ret;
fd = event_notifier_get_fd(virtio_queue_get_guest_notifier(vq));
ret = kvm_irqchip_remove_irqfd(kvm_state, fd, irqfd->virq);
assert(ret == 0);
if (--irqfd->users == 0) {
kvm_irqchip_release_virq(kvm_state, irqfd->virq);
}
qemu_set_fd_handler(fd, virtio_pci_guest_notifier_read, NULL, vq);
}
static int kvm_virtio_pci_vector_use(PCIDevice *dev, unsigned vector,
MSIMessage msg)
{
VirtIOPCIProxy *proxy = container_of(dev, VirtIOPCIProxy, pci_dev);
VirtIODevice *vdev = proxy->vdev;
int ret, queue_no;
for (queue_no = 0; queue_no < VIRTIO_PCI_QUEUE_MAX; queue_no++) {
if (!virtio_queue_get_num(vdev, queue_no)) {
break;
}
if (virtio_queue_vector(vdev, queue_no) != vector) {
continue;
}
ret = kvm_virtio_pci_vq_vector_use(proxy, queue_no, vector, msg);
if (ret < 0) {
goto undo;
}
}
return 0;
undo:
while (--queue_no >= 0) {
if (virtio_queue_vector(vdev, queue_no) != vector) {
continue;
}
kvm_virtio_pci_vq_vector_release(proxy, queue_no, vector);
}
return ret;
}
static void kvm_virtio_pci_vector_release(PCIDevice *dev, unsigned vector)
{
VirtIOPCIProxy *proxy = container_of(dev, VirtIOPCIProxy, pci_dev);
VirtIODevice *vdev = proxy->vdev;
int queue_no;
for (queue_no = 0; queue_no < VIRTIO_PCI_QUEUE_MAX; queue_no++) {
if (!virtio_queue_get_num(vdev, queue_no)) {
break;
}
if (virtio_queue_vector(vdev, queue_no) != vector) {
continue;
}
kvm_virtio_pci_vq_vector_release(proxy, queue_no, vector);
}
}
static int virtio_pci_set_guest_notifier(void *opaque, int n, bool assign) static int virtio_pci_set_guest_notifier(void *opaque, int n, bool assign)
{ {
VirtIOPCIProxy *proxy = opaque; VirtIOPCIProxy *proxy = opaque;
@ -555,6 +657,9 @@ static int virtio_pci_set_guest_notifier(void *opaque, int n, bool assign)
} else { } else {
qemu_set_fd_handler(event_notifier_get_fd(notifier), qemu_set_fd_handler(event_notifier_get_fd(notifier),
NULL, NULL, NULL); NULL, NULL, NULL);
/* Test and clear notifier before closing it,
* in case poll callback didn't have time to run. */
virtio_pci_guest_notifier_read(vq);
event_notifier_cleanup(notifier); event_notifier_cleanup(notifier);
} }
@ -573,6 +678,13 @@ static int virtio_pci_set_guest_notifiers(void *opaque, bool assign)
VirtIODevice *vdev = proxy->vdev; VirtIODevice *vdev = proxy->vdev;
int r, n; int r, n;
/* Must unset vector notifier while guest notifier is still assigned */
if (kvm_irqchip_in_kernel() && !assign) {
msix_unset_vector_notifiers(&proxy->pci_dev);
g_free(proxy->vector_irqfd);
proxy->vector_irqfd = NULL;
}
for (n = 0; n < VIRTIO_PCI_QUEUE_MAX; n++) { for (n = 0; n < VIRTIO_PCI_QUEUE_MAX; n++) {
if (!virtio_queue_get_num(vdev, n)) { if (!virtio_queue_get_num(vdev, n)) {
break; break;
@ -584,10 +696,24 @@ static int virtio_pci_set_guest_notifiers(void *opaque, bool assign)
} }
} }
/* Must set vector notifier after guest notifier has been assigned */
if (kvm_irqchip_in_kernel() && assign) {
proxy->vector_irqfd =
g_malloc0(sizeof(*proxy->vector_irqfd) *
msix_nr_vectors_allocated(&proxy->pci_dev));
r = msix_set_vector_notifiers(&proxy->pci_dev,
kvm_virtio_pci_vector_use,
kvm_virtio_pci_vector_release);
if (r < 0) {
goto assign_error;
}
}
return 0; return 0;
assign_error: assign_error:
/* We get here on assignment failure. Recover by undoing for VQs 0 .. n. */ /* We get here on assignment failure. Recover by undoing for VQs 0 .. n. */
assert(assign);
while (--n >= 0) { while (--n >= 0) {
virtio_pci_set_guest_notifier(opaque, n, !assign); virtio_pci_set_guest_notifier(opaque, n, !assign);
} }

View File

@ -25,6 +25,11 @@
#define VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT 1 #define VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT 1
#define VIRTIO_PCI_FLAG_USE_IOEVENTFD (1 << VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT) #define VIRTIO_PCI_FLAG_USE_IOEVENTFD (1 << VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT)
typedef struct {
int virq;
unsigned int users;
} VirtIOIRQFD;
typedef struct { typedef struct {
PCIDevice pci_dev; PCIDevice pci_dev;
VirtIODevice *vdev; VirtIODevice *vdev;
@ -44,6 +49,7 @@ typedef struct {
VirtIOSCSIConf scsi; VirtIOSCSIConf scsi;
bool ioeventfd_disabled; bool ioeventfd_disabled;
bool ioeventfd_started; bool ioeventfd_started;
VirtIOIRQFD *vector_irqfd;
} VirtIOPCIProxy; } VirtIOPCIProxy;
void virtio_init_pci(VirtIOPCIProxy *proxy, VirtIODevice *vdev); void virtio_init_pci(VirtIOPCIProxy *proxy, VirtIODevice *vdev);

View File

@ -57,14 +57,4 @@ void xen_register_framebuffer(struct MemoryRegion *mr);
# define HVM_MAX_VCPUS 32 # define HVM_MAX_VCPUS 32
#endif #endif
static inline int xen_msi_support(void)
{
#if defined(CONFIG_XEN_CTRL_INTERFACE_VERSION) \
&& CONFIG_XEN_CTRL_INTERFACE_VERSION >= 420
return xen_enabled();
#else
return 0;
#endif
}
#endif /* QEMU_HW_XEN_H */ #endif /* QEMU_HW_XEN_H */

View File

@ -40,6 +40,11 @@ static void xen_apic_init(APICCommonState *s)
{ {
memory_region_init_io(&s->io_memory, &xen_apic_io_ops, s, "xen-apic-msi", memory_region_init_io(&s->io_memory, &xen_apic_io_ops, s, "xen-apic-msi",
MSI_SPACE_SIZE); MSI_SPACE_SIZE);
#if defined(CONFIG_XEN_CTRL_INTERFACE_VERSION) \
&& CONFIG_XEN_CTRL_INTERFACE_VERSION >= 420
msi_supported = true;
#endif
} }
static void xen_apic_set_base(APICCommonState *s, uint64_t val) static void xen_apic_set_base(APICCommonState *s, uint64_t val)

236
kvm-all.c
View File

@ -24,6 +24,7 @@
#include "qemu-barrier.h" #include "qemu-barrier.h"
#include "sysemu.h" #include "sysemu.h"
#include "hw/hw.h" #include "hw/hw.h"
#include "hw/msi.h"
#include "gdbstub.h" #include "gdbstub.h"
#include "kvm.h" #include "kvm.h"
#include "bswap.h" #include "bswap.h"
@ -48,6 +49,8 @@
do { } while (0) do { } while (0)
#endif #endif
#define KVM_MSI_HASHTAB_SIZE 256
typedef struct KVMSlot typedef struct KVMSlot
{ {
target_phys_addr_t start_addr; target_phys_addr_t start_addr;
@ -59,6 +62,11 @@ typedef struct KVMSlot
typedef struct kvm_dirty_log KVMDirtyLog; typedef struct kvm_dirty_log KVMDirtyLog;
typedef struct KVMMSIRoute {
struct kvm_irq_routing_entry kroute;
QTAILQ_ENTRY(KVMMSIRoute) entry;
} KVMMSIRoute;
struct KVMState struct KVMState
{ {
KVMSlot slots[32]; KVMSlot slots[32];
@ -86,7 +94,9 @@ struct KVMState
struct kvm_irq_routing *irq_routes; struct kvm_irq_routing *irq_routes;
int nr_allocated_irq_routes; int nr_allocated_irq_routes;
uint32_t *used_gsi_bitmap; uint32_t *used_gsi_bitmap;
unsigned int max_gsi; unsigned int gsi_count;
QTAILQ_HEAD(msi_hashtab, KVMMSIRoute) msi_hashtab[KVM_MSI_HASHTAB_SIZE];
bool direct_msi;
#endif #endif
}; };
@ -859,14 +869,17 @@ int kvm_irqchip_set_irq(KVMState *s, int irq, int level)
#ifdef KVM_CAP_IRQ_ROUTING #ifdef KVM_CAP_IRQ_ROUTING
static void set_gsi(KVMState *s, unsigned int gsi) static void set_gsi(KVMState *s, unsigned int gsi)
{ {
assert(gsi < s->max_gsi);
s->used_gsi_bitmap[gsi / 32] |= 1U << (gsi % 32); s->used_gsi_bitmap[gsi / 32] |= 1U << (gsi % 32);
} }
static void clear_gsi(KVMState *s, unsigned int gsi)
{
s->used_gsi_bitmap[gsi / 32] &= ~(1U << (gsi % 32));
}
static void kvm_init_irq_routing(KVMState *s) static void kvm_init_irq_routing(KVMState *s)
{ {
int gsi_count; int gsi_count, i;
gsi_count = kvm_check_extension(s, KVM_CAP_IRQ_ROUTING); gsi_count = kvm_check_extension(s, KVM_CAP_IRQ_ROUTING);
if (gsi_count > 0) { if (gsi_count > 0) {
@ -875,7 +888,7 @@ static void kvm_init_irq_routing(KVMState *s)
/* Round up so we can search ints using ffs */ /* Round up so we can search ints using ffs */
gsi_bits = ALIGN(gsi_count, 32); gsi_bits = ALIGN(gsi_count, 32);
s->used_gsi_bitmap = g_malloc0(gsi_bits / 8); s->used_gsi_bitmap = g_malloc0(gsi_bits / 8);
s->max_gsi = gsi_bits; s->gsi_count = gsi_count;
/* Mark any over-allocated bits as already in use */ /* Mark any over-allocated bits as already in use */
for (i = gsi_count; i < gsi_bits; i++) { for (i = gsi_count; i < gsi_bits; i++) {
@ -886,9 +899,24 @@ static void kvm_init_irq_routing(KVMState *s)
s->irq_routes = g_malloc0(sizeof(*s->irq_routes)); s->irq_routes = g_malloc0(sizeof(*s->irq_routes));
s->nr_allocated_irq_routes = 0; s->nr_allocated_irq_routes = 0;
if (!s->direct_msi) {
for (i = 0; i < KVM_MSI_HASHTAB_SIZE; i++) {
QTAILQ_INIT(&s->msi_hashtab[i]);
}
}
kvm_arch_init_irq_routing(s); kvm_arch_init_irq_routing(s);
} }
static void kvm_irqchip_commit_routes(KVMState *s)
{
int ret;
s->irq_routes->flags = 0;
ret = kvm_vm_ioctl(s, KVM_SET_GSI_ROUTING, s->irq_routes);
assert(ret == 0);
}
static void kvm_add_routing_entry(KVMState *s, static void kvm_add_routing_entry(KVMState *s,
struct kvm_irq_routing_entry *entry) struct kvm_irq_routing_entry *entry)
{ {
@ -914,12 +942,16 @@ static void kvm_add_routing_entry(KVMState *s,
new->u = entry->u; new->u = entry->u;
set_gsi(s, entry->gsi); set_gsi(s, entry->gsi);
kvm_irqchip_commit_routes(s);
} }
void kvm_irqchip_add_route(KVMState *s, int irq, int irqchip, int pin) void kvm_irqchip_add_irq_route(KVMState *s, int irq, int irqchip, int pin)
{ {
struct kvm_irq_routing_entry e; struct kvm_irq_routing_entry e;
assert(pin < s->gsi_count);
e.gsi = irq; e.gsi = irq;
e.type = KVM_IRQ_ROUTING_IRQCHIP; e.type = KVM_IRQ_ROUTING_IRQCHIP;
e.flags = 0; e.flags = 0;
@ -928,10 +960,167 @@ void kvm_irqchip_add_route(KVMState *s, int irq, int irqchip, int pin)
kvm_add_routing_entry(s, &e); kvm_add_routing_entry(s, &e);
} }
int kvm_irqchip_commit_routes(KVMState *s) void kvm_irqchip_release_virq(KVMState *s, int virq)
{ {
s->irq_routes->flags = 0; struct kvm_irq_routing_entry *e;
return kvm_vm_ioctl(s, KVM_SET_GSI_ROUTING, s->irq_routes); int i;
for (i = 0; i < s->irq_routes->nr; i++) {
e = &s->irq_routes->entries[i];
if (e->gsi == virq) {
s->irq_routes->nr--;
*e = s->irq_routes->entries[s->irq_routes->nr];
}
}
clear_gsi(s, virq);
kvm_irqchip_commit_routes(s);
}
static unsigned int kvm_hash_msi(uint32_t data)
{
/* This is optimized for IA32 MSI layout. However, no other arch shall
* repeat the mistake of not providing a direct MSI injection API. */
return data & 0xff;
}
static void kvm_flush_dynamic_msi_routes(KVMState *s)
{
KVMMSIRoute *route, *next;
unsigned int hash;
for (hash = 0; hash < KVM_MSI_HASHTAB_SIZE; hash++) {
QTAILQ_FOREACH_SAFE(route, &s->msi_hashtab[hash], entry, next) {
kvm_irqchip_release_virq(s, route->kroute.gsi);
QTAILQ_REMOVE(&s->msi_hashtab[hash], route, entry);
g_free(route);
}
}
}
static int kvm_irqchip_get_virq(KVMState *s)
{
uint32_t *word = s->used_gsi_bitmap;
int max_words = ALIGN(s->gsi_count, 32) / 32;
int i, bit;
bool retry = true;
again:
/* Return the lowest unused GSI in the bitmap */
for (i = 0; i < max_words; i++) {
bit = ffs(~word[i]);
if (!bit) {
continue;
}
return bit - 1 + i * 32;
}
if (!s->direct_msi && retry) {
retry = false;
kvm_flush_dynamic_msi_routes(s);
goto again;
}
return -ENOSPC;
}
static KVMMSIRoute *kvm_lookup_msi_route(KVMState *s, MSIMessage msg)
{
unsigned int hash = kvm_hash_msi(msg.data);
KVMMSIRoute *route;
QTAILQ_FOREACH(route, &s->msi_hashtab[hash], entry) {
if (route->kroute.u.msi.address_lo == (uint32_t)msg.address &&
route->kroute.u.msi.address_hi == (msg.address >> 32) &&
route->kroute.u.msi.data == msg.data) {
return route;
}
}
return NULL;
}
int kvm_irqchip_send_msi(KVMState *s, MSIMessage msg)
{
struct kvm_msi msi;
KVMMSIRoute *route;
if (s->direct_msi) {
msi.address_lo = (uint32_t)msg.address;
msi.address_hi = msg.address >> 32;
msi.data = msg.data;
msi.flags = 0;
memset(msi.pad, 0, sizeof(msi.pad));
return kvm_vm_ioctl(s, KVM_SIGNAL_MSI, &msi);
}
route = kvm_lookup_msi_route(s, msg);
if (!route) {
int virq;
virq = kvm_irqchip_get_virq(s);
if (virq < 0) {
return virq;
}
route = g_malloc(sizeof(KVMMSIRoute));
route->kroute.gsi = virq;
route->kroute.type = KVM_IRQ_ROUTING_MSI;
route->kroute.flags = 0;
route->kroute.u.msi.address_lo = (uint32_t)msg.address;
route->kroute.u.msi.address_hi = msg.address >> 32;
route->kroute.u.msi.data = msg.data;
kvm_add_routing_entry(s, &route->kroute);
QTAILQ_INSERT_TAIL(&s->msi_hashtab[kvm_hash_msi(msg.data)], route,
entry);
}
assert(route->kroute.type == KVM_IRQ_ROUTING_MSI);
return kvm_irqchip_set_irq(s, route->kroute.gsi, 1);
}
int kvm_irqchip_add_msi_route(KVMState *s, MSIMessage msg)
{
struct kvm_irq_routing_entry kroute;
int virq;
if (!kvm_irqchip_in_kernel()) {
return -ENOSYS;
}
virq = kvm_irqchip_get_virq(s);
if (virq < 0) {
return virq;
}
kroute.gsi = virq;
kroute.type = KVM_IRQ_ROUTING_MSI;
kroute.flags = 0;
kroute.u.msi.address_lo = (uint32_t)msg.address;
kroute.u.msi.address_hi = msg.address >> 32;
kroute.u.msi.data = msg.data;
kvm_add_routing_entry(s, &kroute);
return virq;
}
static int kvm_irqchip_assign_irqfd(KVMState *s, int fd, int virq, bool assign)
{
struct kvm_irqfd irqfd = {
.fd = fd,
.gsi = virq,
.flags = assign ? 0 : KVM_IRQFD_FLAG_DEASSIGN,
};
if (!kvm_irqchip_in_kernel()) {
return -ENOSYS;
}
return kvm_vm_ioctl(s, KVM_IRQFD, &irqfd);
} }
#else /* !KVM_CAP_IRQ_ROUTING */ #else /* !KVM_CAP_IRQ_ROUTING */
@ -939,8 +1128,33 @@ int kvm_irqchip_commit_routes(KVMState *s)
static void kvm_init_irq_routing(KVMState *s) static void kvm_init_irq_routing(KVMState *s)
{ {
} }
int kvm_irqchip_send_msi(KVMState *s, MSIMessage msg)
{
abort();
}
int kvm_irqchip_add_msi_route(KVMState *s, MSIMessage msg)
{
abort();
}
static int kvm_irqchip_assign_irqfd(KVMState *s, int fd, int virq, bool assign)
{
abort();
}
#endif /* !KVM_CAP_IRQ_ROUTING */ #endif /* !KVM_CAP_IRQ_ROUTING */
int kvm_irqchip_add_irqfd(KVMState *s, int fd, int virq)
{
return kvm_irqchip_assign_irqfd(s, fd, virq, true);
}
int kvm_irqchip_remove_irqfd(KVMState *s, int fd, int virq)
{
return kvm_irqchip_assign_irqfd(s, fd, virq, false);
}
static int kvm_irqchip_create(KVMState *s) static int kvm_irqchip_create(KVMState *s)
{ {
QemuOptsList *list = qemu_find_opts("machine"); QemuOptsList *list = qemu_find_opts("machine");
@ -948,7 +1162,7 @@ static int kvm_irqchip_create(KVMState *s)
if (QTAILQ_EMPTY(&list->head) || if (QTAILQ_EMPTY(&list->head) ||
!qemu_opt_get_bool(QTAILQ_FIRST(&list->head), !qemu_opt_get_bool(QTAILQ_FIRST(&list->head),
"kernel_irqchip", false) || "kernel_irqchip", true) ||
!kvm_check_extension(s, KVM_CAP_IRQCHIP)) { !kvm_check_extension(s, KVM_CAP_IRQCHIP)) {
return 0; return 0;
} }
@ -1072,6 +1286,8 @@ int kvm_init(void)
s->pit_state2 = kvm_check_extension(s, KVM_CAP_PIT_STATE2); s->pit_state2 = kvm_check_extension(s, KVM_CAP_PIT_STATE2);
#endif #endif
s->direct_msi = (kvm_check_extension(s, KVM_CAP_SIGNAL_MSI) > 0);
ret = kvm_arch_init(s); ret = kvm_arch_init(s);
if (ret < 0) { if (ret < 0) {
goto err; goto err;

View File

@ -12,10 +12,14 @@
#include "qemu-common.h" #include "qemu-common.h"
#include "hw/hw.h" #include "hw/hw.h"
#include "hw/msi.h"
#include "cpu.h" #include "cpu.h"
#include "gdbstub.h" #include "gdbstub.h"
#include "kvm.h" #include "kvm.h"
KVMState *kvm_state;
bool kvm_kernel_irqchip;
int kvm_init_vcpu(CPUArchState *env) int kvm_init_vcpu(CPUArchState *env)
{ {
return -ENOSYS; return -ENOSYS;
@ -128,3 +132,22 @@ int kvm_on_sigbus(int code, void *addr)
{ {
return 1; return 1;
} }
int kvm_irqchip_add_msi_route(KVMState *s, MSIMessage msg)
{
return -ENOSYS;
}
void kvm_irqchip_release_virq(KVMState *s, int virq)
{
}
int kvm_irqchip_add_irqfd(KVMState *s, int fd, int virq)
{
return -ENOSYS;
}
int kvm_irqchip_remove_irqfd(KVMState *s, int fd, int virq)
{
return -ENOSYS;
}

18
kvm.h
View File

@ -44,6 +44,10 @@ typedef struct KVMCapabilityInfo {
#define KVM_CAP_INFO(CAP) { "KVM_CAP_" stringify(CAP), KVM_CAP_##CAP } #define KVM_CAP_INFO(CAP) { "KVM_CAP_" stringify(CAP), KVM_CAP_##CAP }
#define KVM_CAP_LAST_INFO { NULL, 0 } #define KVM_CAP_LAST_INFO { NULL, 0 }
struct KVMState;
typedef struct KVMState KVMState;
extern KVMState *kvm_state;
/* external API */ /* external API */
int kvm_init(void); int kvm_init(void);
@ -88,10 +92,6 @@ int kvm_on_sigbus(int code, void *addr);
/* internal API */ /* internal API */
struct KVMState;
typedef struct KVMState KVMState;
extern KVMState *kvm_state;
int kvm_ioctl(KVMState *s, int type, ...); int kvm_ioctl(KVMState *s, int type, ...);
int kvm_vm_ioctl(KVMState *s, int type, ...); int kvm_vm_ioctl(KVMState *s, int type, ...);
@ -132,9 +132,9 @@ int kvm_arch_on_sigbus(int code, void *addr);
void kvm_arch_init_irq_routing(KVMState *s); void kvm_arch_init_irq_routing(KVMState *s);
int kvm_irqchip_set_irq(KVMState *s, int irq, int level); int kvm_irqchip_set_irq(KVMState *s, int irq, int level);
int kvm_irqchip_send_msi(KVMState *s, MSIMessage msg);
void kvm_irqchip_add_route(KVMState *s, int gsi, int irqchip, int pin); void kvm_irqchip_add_irq_route(KVMState *s, int gsi, int irqchip, int pin);
int kvm_irqchip_commit_routes(KVMState *s);
void kvm_put_apic_state(DeviceState *d, struct kvm_lapic_state *kapic); void kvm_put_apic_state(DeviceState *d, struct kvm_lapic_state *kapic);
void kvm_get_apic_state(DeviceState *d, struct kvm_lapic_state *kapic); void kvm_get_apic_state(DeviceState *d, struct kvm_lapic_state *kapic);
@ -212,4 +212,10 @@ int kvm_set_ioeventfd_mmio(int fd, uint32_t adr, uint32_t val, bool assign,
uint32_t size); uint32_t size);
int kvm_set_ioeventfd_pio_word(int fd, uint16_t adr, uint16_t val, bool assign); int kvm_set_ioeventfd_pio_word(int fd, uint16_t adr, uint16_t val, bool assign);
int kvm_irqchip_add_msi_route(KVMState *s, MSIMessage msg);
void kvm_irqchip_release_virq(KVMState *s, int virq);
int kvm_irqchip_add_irqfd(KVMState *s, int fd, int virq);
int kvm_irqchip_remove_irqfd(KVMState *s, int fd, int virq);
#endif #endif

View File

@ -449,6 +449,30 @@ struct kvm_ppc_pvinfo {
__u8 pad[108]; __u8 pad[108];
}; };
/* for KVM_PPC_GET_SMMU_INFO */
#define KVM_PPC_PAGE_SIZES_MAX_SZ 8
struct kvm_ppc_one_page_size {
__u32 page_shift; /* Page shift (or 0) */
__u32 pte_enc; /* Encoding in the HPTE (>>12) */
};
struct kvm_ppc_one_seg_page_size {
__u32 page_shift; /* Base page shift of segment (or 0) */
__u32 slb_enc; /* SLB encoding for BookS */
struct kvm_ppc_one_page_size enc[KVM_PPC_PAGE_SIZES_MAX_SZ];
};
#define KVM_PPC_PAGE_SIZES_REAL 0x00000001
#define KVM_PPC_1T_SEGMENTS 0x00000002
struct kvm_ppc_smmu_info {
__u64 flags;
__u32 slb_size;
__u32 pad;
struct kvm_ppc_one_seg_page_size sps[KVM_PPC_PAGE_SIZES_MAX_SZ];
};
#define KVMIO 0xAE #define KVMIO 0xAE
/* machine type bits, to be used as argument to KVM_CREATE_VM */ /* machine type bits, to be used as argument to KVM_CREATE_VM */
@ -590,6 +614,8 @@ struct kvm_ppc_pvinfo {
#define KVM_CAP_SYNC_REGS 74 #define KVM_CAP_SYNC_REGS 74
#define KVM_CAP_PCI_2_3 75 #define KVM_CAP_PCI_2_3 75
#define KVM_CAP_KVMCLOCK_CTRL 76 #define KVM_CAP_KVMCLOCK_CTRL 76
#define KVM_CAP_SIGNAL_MSI 77
#define KVM_CAP_PPC_GET_SMMU_INFO 78
#ifdef KVM_CAP_IRQ_ROUTING #ifdef KVM_CAP_IRQ_ROUTING
@ -715,6 +741,14 @@ struct kvm_one_reg {
__u64 addr; __u64 addr;
}; };
struct kvm_msi {
__u32 address_lo;
__u32 address_hi;
__u32 data;
__u32 flags;
__u8 pad[16];
};
/* /*
* ioctls for VM fds * ioctls for VM fds
*/ */
@ -789,6 +823,10 @@ struct kvm_s390_ucas_mapping {
/* Available with KVM_CAP_PCI_2_3 */ /* Available with KVM_CAP_PCI_2_3 */
#define KVM_ASSIGN_SET_INTX_MASK _IOW(KVMIO, 0xa4, \ #define KVM_ASSIGN_SET_INTX_MASK _IOW(KVMIO, 0xa4, \
struct kvm_assigned_pci_dev) struct kvm_assigned_pci_dev)
/* Available with KVM_CAP_SIGNAL_MSI */
#define KVM_SIGNAL_MSI _IOW(KVMIO, 0xa5, struct kvm_msi)
/* Available with KVM_CAP_PPC_GET_SMMU_INFO */
#define KVM_PPC_GET_SMMU_INFO _IOR(KVMIO, 0xa6, struct kvm_ppc_smmu_info)
/* /*
* ioctls for vcpu fds * ioctls for vcpu fds

View File

@ -251,6 +251,7 @@ typedef struct PCIEAERLog PCIEAERLog;
typedef struct PCIEAERErr PCIEAERErr; typedef struct PCIEAERErr PCIEAERErr;
typedef struct PCIEPort PCIEPort; typedef struct PCIEPort PCIEPort;
typedef struct PCIESlot PCIESlot; typedef struct PCIESlot PCIESlot;
typedef struct MSIMessage MSIMessage;
typedef struct SerialState SerialState; typedef struct SerialState SerialState;
typedef struct IRQState *qemu_irq; typedef struct IRQState *qemu_irq;
typedef struct PCMCIACardState PCMCIACardState; typedef struct PCMCIACardState PCMCIACardState;

View File

@ -22,6 +22,7 @@ MSR_IA32_VMX_TRUE_PINBASED_CTLS = 0x48D
MSR_IA32_VMX_TRUE_PROCBASED_CTLS = 0x48E MSR_IA32_VMX_TRUE_PROCBASED_CTLS = 0x48E
MSR_IA32_VMX_TRUE_EXIT_CTLS = 0x48F MSR_IA32_VMX_TRUE_EXIT_CTLS = 0x48F
MSR_IA32_VMX_TRUE_ENTRY_CTLS = 0x490 MSR_IA32_VMX_TRUE_ENTRY_CTLS = 0x490
MSR_IA32_VMX_VMFUNC = 0x491
class msr(object): class msr(object):
def __init__(self): def __init__(self):
@ -147,6 +148,9 @@ controls = [
6: 'WBINVD exiting', 6: 'WBINVD exiting',
7: 'Unrestricted guest', 7: 'Unrestricted guest',
10: 'PAUSE-loop exiting', 10: 'PAUSE-loop exiting',
11: 'RDRAND exiting',
12: 'Enable INVPCID',
13: 'Enable VM functions',
}, },
cap_msr = MSR_IA32_VMX_PROCBASED_CTLS2, cap_msr = MSR_IA32_VMX_PROCBASED_CTLS2,
), ),
@ -193,6 +197,7 @@ controls = [
8: 'Wait-for-SIPI activity state', 8: 'Wait-for-SIPI activity state',
(16,24): 'Number of CR3-target values', (16,24): 'Number of CR3-target values',
(25,27): 'MSR-load/store count recommenation', (25,27): 'MSR-load/store count recommenation',
28: 'IA32_SMM_MONITOR_CTL[2] can be set to 1',
(32,62): 'MSEG revision identifier', (32,62): 'MSEG revision identifier',
}, },
msr = MSR_IA32_VMX_MISC_CTLS, msr = MSR_IA32_VMX_MISC_CTLS,
@ -208,6 +213,7 @@ controls = [
16: '2MB EPT pages', 16: '2MB EPT pages',
17: '1GB EPT pages', 17: '1GB EPT pages',
20: 'INVEPT supported', 20: 'INVEPT supported',
21: 'EPT accessed and dirty flags',
25: 'Single-context INVEPT', 25: 'Single-context INVEPT',
26: 'All-context INVEPT', 26: 'All-context INVEPT',
32: 'INVVPID supported', 32: 'INVVPID supported',
@ -218,6 +224,13 @@ controls = [
}, },
msr = MSR_IA32_VMX_EPT_VPID_CAP, msr = MSR_IA32_VMX_EPT_VPID_CAP,
), ),
Misc(
name = 'VM Functions',
bits = {
0: 'EPTP Switching',
},
msr = MSR_IA32_VMX_VMFUNC,
),
] ]
for c in controls: for c in controls: