diff --git a/hw/apic.c b/hw/apic.c index 4eeaf8801c..5fbf01c278 100644 --- a/hw/apic.c +++ b/hw/apic.c @@ -19,6 +19,7 @@ #include "apic_internal.h" #include "apic.h" #include "ioapic.h" +#include "msi.h" #include "host-utils.h" #include "trace.h" #include "pc.h" @@ -862,6 +863,8 @@ static void apic_init(APICCommonState *s) s->timer = qemu_new_timer_ns(vm_clock, apic_timer, s); local_apics[s->idx] = s; + + msi_supported = true; } static void apic_class_init(ObjectClass *klass, void *data) diff --git a/hw/kvm/apic.c b/hw/kvm/apic.c index ffe7a521b7..8ba4079025 100644 --- a/hw/kvm/apic.c +++ b/hw/kvm/apic.c @@ -10,6 +10,7 @@ * See the COPYING file in the top-level directory. */ #include "hw/apic_internal.h" +#include "hw/msi.h" #include "kvm.h" static inline void kvm_apic_set_reg(struct kvm_lapic_state *kapic, @@ -145,10 +146,39 @@ static void kvm_apic_external_nmi(APICCommonState *s) run_on_cpu(s->cpu_env, do_inject_external_nmi, s); } +static uint64_t kvm_apic_mem_read(void *opaque, target_phys_addr_t addr, + unsigned size) +{ + return ~(uint64_t)0; +} + +static void kvm_apic_mem_write(void *opaque, target_phys_addr_t addr, + uint64_t data, unsigned size) +{ + MSIMessage msg = { .address = addr, .data = data }; + int ret; + + ret = kvm_irqchip_send_msi(kvm_state, msg); + if (ret < 0) { + fprintf(stderr, "KVM: injection failed, MSI lost (%s)\n", + strerror(-ret)); + } +} + +static const MemoryRegionOps kvm_apic_io_ops = { + .read = kvm_apic_mem_read, + .write = kvm_apic_mem_write, + .endianness = DEVICE_NATIVE_ENDIAN, +}; + static void kvm_apic_init(APICCommonState *s) { - memory_region_init_reservation(&s->io_memory, "kvm-apic-msi", - MSI_SPACE_SIZE); + memory_region_init_io(&s->io_memory, &kvm_apic_io_ops, s, "kvm-apic-msi", + MSI_SPACE_SIZE); + + if (kvm_has_gsi_routing()) { + msi_supported = true; + } } static void kvm_apic_class_init(ObjectClass *klass, void *data) diff --git a/hw/msi.h b/hw/msi.h index 3040bb0b43..75747abc25 100644 --- a/hw/msi.h +++ b/hw/msi.h @@ -24,6 +24,11 @@ #include "qemu-common.h" #include "pci.h" +struct MSIMessage { + uint64_t address; + uint32_t data; +}; + extern bool msi_supported; bool msi_enabled(const PCIDevice *dev); diff --git a/hw/msix.c b/hw/msix.c index 3835eaaf28..59c7a8388f 100644 --- a/hw/msix.c +++ b/hw/msix.c @@ -35,6 +35,15 @@ #define MSIX_PAGE_PENDING (MSIX_PAGE_SIZE / 2) #define MSIX_MAX_ENTRIES 32 +static MSIMessage msix_get_message(PCIDevice *dev, unsigned vector) +{ + uint8_t *table_entry = dev->msix_table_page + vector * PCI_MSIX_ENTRY_SIZE; + MSIMessage msg; + + msg.address = pci_get_quad(table_entry + PCI_MSIX_ENTRY_LOWER_ADDR); + msg.data = pci_get_long(table_entry + PCI_MSIX_ENTRY_DATA); + return msg; +} /* Add MSI-X capability to the config space for the device. */ /* Given a bar and its size, add MSI-X table on top of it @@ -130,13 +139,34 @@ static bool msix_is_masked(PCIDevice *dev, int vector) return msix_vector_masked(dev, vector, dev->msix_function_masked); } +static void msix_fire_vector_notifier(PCIDevice *dev, + unsigned int vector, bool is_masked) +{ + MSIMessage msg; + int ret; + + if (!dev->msix_vector_use_notifier) { + return; + } + if (is_masked) { + dev->msix_vector_release_notifier(dev, vector); + } else { + msg = msix_get_message(dev, vector); + ret = dev->msix_vector_use_notifier(dev, vector, msg); + assert(ret >= 0); + } +} + static void msix_handle_mask_update(PCIDevice *dev, int vector, bool was_masked) { bool is_masked = msix_is_masked(dev, vector); + if (is_masked == was_masked) { return; } + msix_fire_vector_notifier(dev, vector, is_masked); + if (!is_masked && msix_is_pending(dev, vector)) { msix_clr_pending(dev, vector); msix_notify(dev, vector); @@ -222,10 +252,14 @@ static void msix_mmio_setup(PCIDevice *d, MemoryRegion *bar) static void msix_mask_all(struct PCIDevice *dev, unsigned nentries) { int vector; + for (vector = 0; vector < nentries; ++vector) { unsigned offset = vector * PCI_MSIX_ENTRY_SIZE + PCI_MSIX_ENTRY_VECTOR_CTRL; + bool was_masked = msix_is_masked(dev, vector); + dev->msix_table_page[offset] |= PCI_MSIX_ENTRY_CTRL_MASKBIT; + msix_handle_mask_update(dev, vector, was_masked); } } @@ -317,6 +351,7 @@ void msix_save(PCIDevice *dev, QEMUFile *f) void msix_load(PCIDevice *dev, QEMUFile *f) { unsigned n = dev->msix_entries_nr; + unsigned int vector; if (!(dev->cap_present & QEMU_PCI_CAP_MSIX)) { return; @@ -326,6 +361,10 @@ void msix_load(PCIDevice *dev, QEMUFile *f) qemu_get_buffer(f, dev->msix_table_page, n * PCI_MSIX_ENTRY_SIZE); qemu_get_buffer(f, dev->msix_table_page + MSIX_PAGE_PENDING, (n + 7) / 8); msix_update_function_masked(dev); + + for (vector = 0; vector < n; vector++) { + msix_handle_mask_update(dev, vector, true); + } } /* Does device support MSI-X? */ @@ -352,9 +391,7 @@ uint32_t msix_bar_size(PCIDevice *dev) /* Send an MSI-X message */ void msix_notify(PCIDevice *dev, unsigned vector) { - uint8_t *table_entry = dev->msix_table_page + vector * PCI_MSIX_ENTRY_SIZE; - uint64_t address; - uint32_t data; + MSIMessage msg; if (vector >= dev->msix_entries_nr || !dev->msix_entry_used[vector]) return; @@ -363,9 +400,9 @@ void msix_notify(PCIDevice *dev, unsigned vector) return; } - address = pci_get_quad(table_entry + PCI_MSIX_ENTRY_LOWER_ADDR); - data = pci_get_long(table_entry + PCI_MSIX_ENTRY_DATA); - stl_le_phys(address, data); + msg = msix_get_message(dev, vector); + + stl_le_phys(msg.address, msg.data); } void msix_reset(PCIDevice *dev) @@ -414,3 +451,75 @@ void msix_unuse_all_vectors(PCIDevice *dev) return; msix_free_irq_entries(dev); } + +unsigned int msix_nr_vectors_allocated(const PCIDevice *dev) +{ + return dev->msix_entries_nr; +} + +static int msix_set_notifier_for_vector(PCIDevice *dev, unsigned int vector) +{ + MSIMessage msg; + + if (msix_is_masked(dev, vector)) { + return 0; + } + msg = msix_get_message(dev, vector); + return dev->msix_vector_use_notifier(dev, vector, msg); +} + +static void msix_unset_notifier_for_vector(PCIDevice *dev, unsigned int vector) +{ + if (msix_is_masked(dev, vector)) { + return; + } + dev->msix_vector_release_notifier(dev, vector); +} + +int msix_set_vector_notifiers(PCIDevice *dev, + MSIVectorUseNotifier use_notifier, + MSIVectorReleaseNotifier release_notifier) +{ + int vector, ret; + + assert(use_notifier && release_notifier); + + dev->msix_vector_use_notifier = use_notifier; + dev->msix_vector_release_notifier = release_notifier; + + if ((dev->config[dev->msix_cap + MSIX_CONTROL_OFFSET] & + (MSIX_ENABLE_MASK | MSIX_MASKALL_MASK)) == MSIX_ENABLE_MASK) { + for (vector = 0; vector < dev->msix_entries_nr; vector++) { + ret = msix_set_notifier_for_vector(dev, vector); + if (ret < 0) { + goto undo; + } + } + } + return 0; + +undo: + while (--vector >= 0) { + msix_unset_notifier_for_vector(dev, vector); + } + dev->msix_vector_use_notifier = NULL; + dev->msix_vector_release_notifier = NULL; + return ret; +} + +void msix_unset_vector_notifiers(PCIDevice *dev) +{ + int vector; + + assert(dev->msix_vector_use_notifier && + dev->msix_vector_release_notifier); + + if ((dev->config[dev->msix_cap + MSIX_CONTROL_OFFSET] & + (MSIX_ENABLE_MASK | MSIX_MASKALL_MASK)) == MSIX_ENABLE_MASK) { + for (vector = 0; vector < dev->msix_entries_nr; vector++) { + msix_unset_notifier_for_vector(dev, vector); + } + } + dev->msix_vector_use_notifier = NULL; + dev->msix_vector_release_notifier = NULL; +} diff --git a/hw/msix.h b/hw/msix.h index 5aba22b858..50aee8221a 100644 --- a/hw/msix.h +++ b/hw/msix.h @@ -13,6 +13,8 @@ void msix_write_config(PCIDevice *pci_dev, uint32_t address, int msix_uninit(PCIDevice *d, MemoryRegion *bar); +unsigned int msix_nr_vectors_allocated(const PCIDevice *dev); + void msix_save(PCIDevice *dev, QEMUFile *f); void msix_load(PCIDevice *dev, QEMUFile *f); @@ -29,4 +31,8 @@ void msix_notify(PCIDevice *dev, unsigned vector); void msix_reset(PCIDevice *dev); +int msix_set_vector_notifiers(PCIDevice *dev, + MSIVectorUseNotifier use_notifier, + MSIVectorReleaseNotifier release_notifier); +void msix_unset_vector_notifiers(PCIDevice *dev); #endif diff --git a/hw/pc.c b/hw/pc.c index e81a06c161..c790bcbfd7 100644 --- a/hw/pc.c +++ b/hw/pc.c @@ -912,15 +912,6 @@ static DeviceState *apic_init(void *env, uint8_t apic_id) apic_mapped = 1; } - /* KVM does not support MSI yet. */ - if (!kvm_irqchip_in_kernel()) { - msi_supported = true; - } - - if (xen_msi_support()) { - msi_supported = true; - } - return dev; } diff --git a/hw/pc_piix.c b/hw/pc_piix.c index a7aad4b022..f49b0aaf89 100644 --- a/hw/pc_piix.c +++ b/hw/pc_piix.c @@ -56,31 +56,27 @@ static void kvm_piix3_setup_irq_routing(bool pci_enabled) { #ifdef CONFIG_KVM KVMState *s = kvm_state; - int ret, i; + int i; if (kvm_check_extension(s, KVM_CAP_IRQ_ROUTING)) { for (i = 0; i < 8; ++i) { if (i == 2) { continue; } - kvm_irqchip_add_route(s, i, KVM_IRQCHIP_PIC_MASTER, i); + kvm_irqchip_add_irq_route(s, i, KVM_IRQCHIP_PIC_MASTER, i); } for (i = 8; i < 16; ++i) { - kvm_irqchip_add_route(s, i, KVM_IRQCHIP_PIC_SLAVE, i - 8); + kvm_irqchip_add_irq_route(s, i, KVM_IRQCHIP_PIC_SLAVE, i - 8); } if (pci_enabled) { for (i = 0; i < 24; ++i) { if (i == 0) { - kvm_irqchip_add_route(s, i, KVM_IRQCHIP_IOAPIC, 2); + kvm_irqchip_add_irq_route(s, i, KVM_IRQCHIP_IOAPIC, 2); } else if (i != 2) { - kvm_irqchip_add_route(s, i, KVM_IRQCHIP_IOAPIC, i); + kvm_irqchip_add_irq_route(s, i, KVM_IRQCHIP_IOAPIC, i); } } } - ret = kvm_irqchip_commit_routes(s); - if (ret < 0) { - hw_error("KVM IRQ routing setup failed"); - } } #endif /* CONFIG_KVM */ } diff --git a/hw/pci.h b/hw/pci.h index 8d0aa498e5..c3cacce046 100644 --- a/hw/pci.h +++ b/hw/pci.h @@ -173,6 +173,10 @@ typedef struct PCIDeviceClass { const char *romfile; } PCIDeviceClass; +typedef int (*MSIVectorUseNotifier)(PCIDevice *dev, unsigned int vector, + MSIMessage msg); +typedef void (*MSIVectorReleaseNotifier)(PCIDevice *dev, unsigned int vector); + struct PCIDevice { DeviceState qdev; /* PCI config space */ @@ -243,6 +247,10 @@ struct PCIDevice { bool has_rom; MemoryRegion rom; uint32_t rom_bar; + + /* MSI-X notifiers */ + MSIVectorUseNotifier msix_vector_use_notifier; + MSIVectorReleaseNotifier msix_vector_release_notifier; }; void pci_register_bar(PCIDevice *pci_dev, int region_num, diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c index 79b86f1aad..d08c1590d2 100644 --- a/hw/virtio-pci.c +++ b/hw/virtio-pci.c @@ -24,6 +24,7 @@ #include "virtio-scsi.h" #include "pci.h" #include "qemu-error.h" +#include "msi.h" #include "msix.h" #include "net.h" #include "loader.h" @@ -539,6 +540,107 @@ static void virtio_pci_guest_notifier_read(void *opaque) } } +static int kvm_virtio_pci_vq_vector_use(VirtIOPCIProxy *proxy, + unsigned int queue_no, + unsigned int vector, + MSIMessage msg) +{ + VirtQueue *vq = virtio_get_queue(proxy->vdev, queue_no); + VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector]; + int fd, ret; + + fd = event_notifier_get_fd(virtio_queue_get_guest_notifier(vq)); + + if (irqfd->users == 0) { + ret = kvm_irqchip_add_msi_route(kvm_state, msg); + if (ret < 0) { + return ret; + } + irqfd->virq = ret; + } + irqfd->users++; + + ret = kvm_irqchip_add_irqfd(kvm_state, fd, irqfd->virq); + if (ret < 0) { + if (--irqfd->users == 0) { + kvm_irqchip_release_virq(kvm_state, irqfd->virq); + } + return ret; + } + + qemu_set_fd_handler(fd, NULL, NULL, NULL); + + return 0; +} + +static void kvm_virtio_pci_vq_vector_release(VirtIOPCIProxy *proxy, + unsigned int queue_no, + unsigned int vector) +{ + VirtQueue *vq = virtio_get_queue(proxy->vdev, queue_no); + VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector]; + int fd, ret; + + fd = event_notifier_get_fd(virtio_queue_get_guest_notifier(vq)); + + ret = kvm_irqchip_remove_irqfd(kvm_state, fd, irqfd->virq); + assert(ret == 0); + + if (--irqfd->users == 0) { + kvm_irqchip_release_virq(kvm_state, irqfd->virq); + } + + qemu_set_fd_handler(fd, virtio_pci_guest_notifier_read, NULL, vq); +} + +static int kvm_virtio_pci_vector_use(PCIDevice *dev, unsigned vector, + MSIMessage msg) +{ + VirtIOPCIProxy *proxy = container_of(dev, VirtIOPCIProxy, pci_dev); + VirtIODevice *vdev = proxy->vdev; + int ret, queue_no; + + for (queue_no = 0; queue_no < VIRTIO_PCI_QUEUE_MAX; queue_no++) { + if (!virtio_queue_get_num(vdev, queue_no)) { + break; + } + if (virtio_queue_vector(vdev, queue_no) != vector) { + continue; + } + ret = kvm_virtio_pci_vq_vector_use(proxy, queue_no, vector, msg); + if (ret < 0) { + goto undo; + } + } + return 0; + +undo: + while (--queue_no >= 0) { + if (virtio_queue_vector(vdev, queue_no) != vector) { + continue; + } + kvm_virtio_pci_vq_vector_release(proxy, queue_no, vector); + } + return ret; +} + +static void kvm_virtio_pci_vector_release(PCIDevice *dev, unsigned vector) +{ + VirtIOPCIProxy *proxy = container_of(dev, VirtIOPCIProxy, pci_dev); + VirtIODevice *vdev = proxy->vdev; + int queue_no; + + for (queue_no = 0; queue_no < VIRTIO_PCI_QUEUE_MAX; queue_no++) { + if (!virtio_queue_get_num(vdev, queue_no)) { + break; + } + if (virtio_queue_vector(vdev, queue_no) != vector) { + continue; + } + kvm_virtio_pci_vq_vector_release(proxy, queue_no, vector); + } +} + static int virtio_pci_set_guest_notifier(void *opaque, int n, bool assign) { VirtIOPCIProxy *proxy = opaque; @@ -555,6 +657,9 @@ static int virtio_pci_set_guest_notifier(void *opaque, int n, bool assign) } else { qemu_set_fd_handler(event_notifier_get_fd(notifier), NULL, NULL, NULL); + /* Test and clear notifier before closing it, + * in case poll callback didn't have time to run. */ + virtio_pci_guest_notifier_read(vq); event_notifier_cleanup(notifier); } @@ -573,6 +678,13 @@ static int virtio_pci_set_guest_notifiers(void *opaque, bool assign) VirtIODevice *vdev = proxy->vdev; int r, n; + /* Must unset vector notifier while guest notifier is still assigned */ + if (kvm_irqchip_in_kernel() && !assign) { + msix_unset_vector_notifiers(&proxy->pci_dev); + g_free(proxy->vector_irqfd); + proxy->vector_irqfd = NULL; + } + for (n = 0; n < VIRTIO_PCI_QUEUE_MAX; n++) { if (!virtio_queue_get_num(vdev, n)) { break; @@ -584,10 +696,24 @@ static int virtio_pci_set_guest_notifiers(void *opaque, bool assign) } } + /* Must set vector notifier after guest notifier has been assigned */ + if (kvm_irqchip_in_kernel() && assign) { + proxy->vector_irqfd = + g_malloc0(sizeof(*proxy->vector_irqfd) * + msix_nr_vectors_allocated(&proxy->pci_dev)); + r = msix_set_vector_notifiers(&proxy->pci_dev, + kvm_virtio_pci_vector_use, + kvm_virtio_pci_vector_release); + if (r < 0) { + goto assign_error; + } + } + return 0; assign_error: /* We get here on assignment failure. Recover by undoing for VQs 0 .. n. */ + assert(assign); while (--n >= 0) { virtio_pci_set_guest_notifier(opaque, n, !assign); } diff --git a/hw/virtio-pci.h b/hw/virtio-pci.h index 889e59e421..91b791ba9d 100644 --- a/hw/virtio-pci.h +++ b/hw/virtio-pci.h @@ -25,6 +25,11 @@ #define VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT 1 #define VIRTIO_PCI_FLAG_USE_IOEVENTFD (1 << VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT) +typedef struct { + int virq; + unsigned int users; +} VirtIOIRQFD; + typedef struct { PCIDevice pci_dev; VirtIODevice *vdev; @@ -44,6 +49,7 @@ typedef struct { VirtIOSCSIConf scsi; bool ioeventfd_disabled; bool ioeventfd_started; + VirtIOIRQFD *vector_irqfd; } VirtIOPCIProxy; void virtio_init_pci(VirtIOPCIProxy *proxy, VirtIODevice *vdev); diff --git a/hw/xen.h b/hw/xen.h index 3ae4cd0f5c..e5926b7b8a 100644 --- a/hw/xen.h +++ b/hw/xen.h @@ -57,14 +57,4 @@ void xen_register_framebuffer(struct MemoryRegion *mr); # define HVM_MAX_VCPUS 32 #endif -static inline int xen_msi_support(void) -{ -#if defined(CONFIG_XEN_CTRL_INTERFACE_VERSION) \ - && CONFIG_XEN_CTRL_INTERFACE_VERSION >= 420 - return xen_enabled(); -#else - return 0; -#endif -} - #endif /* QEMU_HW_XEN_H */ diff --git a/hw/xen_apic.c b/hw/xen_apic.c index 1725ff67dd..a9e101f315 100644 --- a/hw/xen_apic.c +++ b/hw/xen_apic.c @@ -40,6 +40,11 @@ static void xen_apic_init(APICCommonState *s) { memory_region_init_io(&s->io_memory, &xen_apic_io_ops, s, "xen-apic-msi", MSI_SPACE_SIZE); + +#if defined(CONFIG_XEN_CTRL_INTERFACE_VERSION) \ + && CONFIG_XEN_CTRL_INTERFACE_VERSION >= 420 + msi_supported = true; +#endif } static void xen_apic_set_base(APICCommonState *s, uint64_t val) diff --git a/kvm-all.c b/kvm-all.c index 9b73ccfbec..489ee53ad2 100644 --- a/kvm-all.c +++ b/kvm-all.c @@ -24,6 +24,7 @@ #include "qemu-barrier.h" #include "sysemu.h" #include "hw/hw.h" +#include "hw/msi.h" #include "gdbstub.h" #include "kvm.h" #include "bswap.h" @@ -48,6 +49,8 @@ do { } while (0) #endif +#define KVM_MSI_HASHTAB_SIZE 256 + typedef struct KVMSlot { target_phys_addr_t start_addr; @@ -59,6 +62,11 @@ typedef struct KVMSlot typedef struct kvm_dirty_log KVMDirtyLog; +typedef struct KVMMSIRoute { + struct kvm_irq_routing_entry kroute; + QTAILQ_ENTRY(KVMMSIRoute) entry; +} KVMMSIRoute; + struct KVMState { KVMSlot slots[32]; @@ -86,7 +94,9 @@ struct KVMState struct kvm_irq_routing *irq_routes; int nr_allocated_irq_routes; uint32_t *used_gsi_bitmap; - unsigned int max_gsi; + unsigned int gsi_count; + QTAILQ_HEAD(msi_hashtab, KVMMSIRoute) msi_hashtab[KVM_MSI_HASHTAB_SIZE]; + bool direct_msi; #endif }; @@ -859,14 +869,17 @@ int kvm_irqchip_set_irq(KVMState *s, int irq, int level) #ifdef KVM_CAP_IRQ_ROUTING static void set_gsi(KVMState *s, unsigned int gsi) { - assert(gsi < s->max_gsi); - s->used_gsi_bitmap[gsi / 32] |= 1U << (gsi % 32); } +static void clear_gsi(KVMState *s, unsigned int gsi) +{ + s->used_gsi_bitmap[gsi / 32] &= ~(1U << (gsi % 32)); +} + static void kvm_init_irq_routing(KVMState *s) { - int gsi_count; + int gsi_count, i; gsi_count = kvm_check_extension(s, KVM_CAP_IRQ_ROUTING); if (gsi_count > 0) { @@ -875,7 +888,7 @@ static void kvm_init_irq_routing(KVMState *s) /* Round up so we can search ints using ffs */ gsi_bits = ALIGN(gsi_count, 32); s->used_gsi_bitmap = g_malloc0(gsi_bits / 8); - s->max_gsi = gsi_bits; + s->gsi_count = gsi_count; /* Mark any over-allocated bits as already in use */ for (i = gsi_count; i < gsi_bits; i++) { @@ -886,9 +899,24 @@ static void kvm_init_irq_routing(KVMState *s) s->irq_routes = g_malloc0(sizeof(*s->irq_routes)); s->nr_allocated_irq_routes = 0; + if (!s->direct_msi) { + for (i = 0; i < KVM_MSI_HASHTAB_SIZE; i++) { + QTAILQ_INIT(&s->msi_hashtab[i]); + } + } + kvm_arch_init_irq_routing(s); } +static void kvm_irqchip_commit_routes(KVMState *s) +{ + int ret; + + s->irq_routes->flags = 0; + ret = kvm_vm_ioctl(s, KVM_SET_GSI_ROUTING, s->irq_routes); + assert(ret == 0); +} + static void kvm_add_routing_entry(KVMState *s, struct kvm_irq_routing_entry *entry) { @@ -914,12 +942,16 @@ static void kvm_add_routing_entry(KVMState *s, new->u = entry->u; set_gsi(s, entry->gsi); + + kvm_irqchip_commit_routes(s); } -void kvm_irqchip_add_route(KVMState *s, int irq, int irqchip, int pin) +void kvm_irqchip_add_irq_route(KVMState *s, int irq, int irqchip, int pin) { struct kvm_irq_routing_entry e; + assert(pin < s->gsi_count); + e.gsi = irq; e.type = KVM_IRQ_ROUTING_IRQCHIP; e.flags = 0; @@ -928,10 +960,167 @@ void kvm_irqchip_add_route(KVMState *s, int irq, int irqchip, int pin) kvm_add_routing_entry(s, &e); } -int kvm_irqchip_commit_routes(KVMState *s) +void kvm_irqchip_release_virq(KVMState *s, int virq) { - s->irq_routes->flags = 0; - return kvm_vm_ioctl(s, KVM_SET_GSI_ROUTING, s->irq_routes); + struct kvm_irq_routing_entry *e; + int i; + + for (i = 0; i < s->irq_routes->nr; i++) { + e = &s->irq_routes->entries[i]; + if (e->gsi == virq) { + s->irq_routes->nr--; + *e = s->irq_routes->entries[s->irq_routes->nr]; + } + } + clear_gsi(s, virq); + + kvm_irqchip_commit_routes(s); +} + +static unsigned int kvm_hash_msi(uint32_t data) +{ + /* This is optimized for IA32 MSI layout. However, no other arch shall + * repeat the mistake of not providing a direct MSI injection API. */ + return data & 0xff; +} + +static void kvm_flush_dynamic_msi_routes(KVMState *s) +{ + KVMMSIRoute *route, *next; + unsigned int hash; + + for (hash = 0; hash < KVM_MSI_HASHTAB_SIZE; hash++) { + QTAILQ_FOREACH_SAFE(route, &s->msi_hashtab[hash], entry, next) { + kvm_irqchip_release_virq(s, route->kroute.gsi); + QTAILQ_REMOVE(&s->msi_hashtab[hash], route, entry); + g_free(route); + } + } +} + +static int kvm_irqchip_get_virq(KVMState *s) +{ + uint32_t *word = s->used_gsi_bitmap; + int max_words = ALIGN(s->gsi_count, 32) / 32; + int i, bit; + bool retry = true; + +again: + /* Return the lowest unused GSI in the bitmap */ + for (i = 0; i < max_words; i++) { + bit = ffs(~word[i]); + if (!bit) { + continue; + } + + return bit - 1 + i * 32; + } + if (!s->direct_msi && retry) { + retry = false; + kvm_flush_dynamic_msi_routes(s); + goto again; + } + return -ENOSPC; + +} + +static KVMMSIRoute *kvm_lookup_msi_route(KVMState *s, MSIMessage msg) +{ + unsigned int hash = kvm_hash_msi(msg.data); + KVMMSIRoute *route; + + QTAILQ_FOREACH(route, &s->msi_hashtab[hash], entry) { + if (route->kroute.u.msi.address_lo == (uint32_t)msg.address && + route->kroute.u.msi.address_hi == (msg.address >> 32) && + route->kroute.u.msi.data == msg.data) { + return route; + } + } + return NULL; +} + +int kvm_irqchip_send_msi(KVMState *s, MSIMessage msg) +{ + struct kvm_msi msi; + KVMMSIRoute *route; + + if (s->direct_msi) { + msi.address_lo = (uint32_t)msg.address; + msi.address_hi = msg.address >> 32; + msi.data = msg.data; + msi.flags = 0; + memset(msi.pad, 0, sizeof(msi.pad)); + + return kvm_vm_ioctl(s, KVM_SIGNAL_MSI, &msi); + } + + route = kvm_lookup_msi_route(s, msg); + if (!route) { + int virq; + + virq = kvm_irqchip_get_virq(s); + if (virq < 0) { + return virq; + } + + route = g_malloc(sizeof(KVMMSIRoute)); + route->kroute.gsi = virq; + route->kroute.type = KVM_IRQ_ROUTING_MSI; + route->kroute.flags = 0; + route->kroute.u.msi.address_lo = (uint32_t)msg.address; + route->kroute.u.msi.address_hi = msg.address >> 32; + route->kroute.u.msi.data = msg.data; + + kvm_add_routing_entry(s, &route->kroute); + + QTAILQ_INSERT_TAIL(&s->msi_hashtab[kvm_hash_msi(msg.data)], route, + entry); + } + + assert(route->kroute.type == KVM_IRQ_ROUTING_MSI); + + return kvm_irqchip_set_irq(s, route->kroute.gsi, 1); +} + +int kvm_irqchip_add_msi_route(KVMState *s, MSIMessage msg) +{ + struct kvm_irq_routing_entry kroute; + int virq; + + if (!kvm_irqchip_in_kernel()) { + return -ENOSYS; + } + + virq = kvm_irqchip_get_virq(s); + if (virq < 0) { + return virq; + } + + kroute.gsi = virq; + kroute.type = KVM_IRQ_ROUTING_MSI; + kroute.flags = 0; + kroute.u.msi.address_lo = (uint32_t)msg.address; + kroute.u.msi.address_hi = msg.address >> 32; + kroute.u.msi.data = msg.data; + + kvm_add_routing_entry(s, &kroute); + + return virq; +} + +static int kvm_irqchip_assign_irqfd(KVMState *s, int fd, int virq, bool assign) +{ + struct kvm_irqfd irqfd = { + .fd = fd, + .gsi = virq, + .flags = assign ? 0 : KVM_IRQFD_FLAG_DEASSIGN, + }; + + if (!kvm_irqchip_in_kernel()) { + return -ENOSYS; + } + + return kvm_vm_ioctl(s, KVM_IRQFD, &irqfd); } #else /* !KVM_CAP_IRQ_ROUTING */ @@ -939,8 +1128,33 @@ int kvm_irqchip_commit_routes(KVMState *s) static void kvm_init_irq_routing(KVMState *s) { } + +int kvm_irqchip_send_msi(KVMState *s, MSIMessage msg) +{ + abort(); +} + +int kvm_irqchip_add_msi_route(KVMState *s, MSIMessage msg) +{ + abort(); +} + +static int kvm_irqchip_assign_irqfd(KVMState *s, int fd, int virq, bool assign) +{ + abort(); +} #endif /* !KVM_CAP_IRQ_ROUTING */ +int kvm_irqchip_add_irqfd(KVMState *s, int fd, int virq) +{ + return kvm_irqchip_assign_irqfd(s, fd, virq, true); +} + +int kvm_irqchip_remove_irqfd(KVMState *s, int fd, int virq) +{ + return kvm_irqchip_assign_irqfd(s, fd, virq, false); +} + static int kvm_irqchip_create(KVMState *s) { QemuOptsList *list = qemu_find_opts("machine"); @@ -948,7 +1162,7 @@ static int kvm_irqchip_create(KVMState *s) if (QTAILQ_EMPTY(&list->head) || !qemu_opt_get_bool(QTAILQ_FIRST(&list->head), - "kernel_irqchip", false) || + "kernel_irqchip", true) || !kvm_check_extension(s, KVM_CAP_IRQCHIP)) { return 0; } @@ -1072,6 +1286,8 @@ int kvm_init(void) s->pit_state2 = kvm_check_extension(s, KVM_CAP_PIT_STATE2); #endif + s->direct_msi = (kvm_check_extension(s, KVM_CAP_SIGNAL_MSI) > 0); + ret = kvm_arch_init(s); if (ret < 0) { goto err; diff --git a/kvm-stub.c b/kvm-stub.c index 47c573d6f3..ec9a36454d 100644 --- a/kvm-stub.c +++ b/kvm-stub.c @@ -12,10 +12,14 @@ #include "qemu-common.h" #include "hw/hw.h" +#include "hw/msi.h" #include "cpu.h" #include "gdbstub.h" #include "kvm.h" +KVMState *kvm_state; +bool kvm_kernel_irqchip; + int kvm_init_vcpu(CPUArchState *env) { return -ENOSYS; @@ -128,3 +132,22 @@ int kvm_on_sigbus(int code, void *addr) { return 1; } + +int kvm_irqchip_add_msi_route(KVMState *s, MSIMessage msg) +{ + return -ENOSYS; +} + +void kvm_irqchip_release_virq(KVMState *s, int virq) +{ +} + +int kvm_irqchip_add_irqfd(KVMState *s, int fd, int virq) +{ + return -ENOSYS; +} + +int kvm_irqchip_remove_irqfd(KVMState *s, int fd, int virq) +{ + return -ENOSYS; +} diff --git a/kvm.h b/kvm.h index 4ccae8c0c8..9c7b0ea6ae 100644 --- a/kvm.h +++ b/kvm.h @@ -44,6 +44,10 @@ typedef struct KVMCapabilityInfo { #define KVM_CAP_INFO(CAP) { "KVM_CAP_" stringify(CAP), KVM_CAP_##CAP } #define KVM_CAP_LAST_INFO { NULL, 0 } +struct KVMState; +typedef struct KVMState KVMState; +extern KVMState *kvm_state; + /* external API */ int kvm_init(void); @@ -88,10 +92,6 @@ int kvm_on_sigbus(int code, void *addr); /* internal API */ -struct KVMState; -typedef struct KVMState KVMState; -extern KVMState *kvm_state; - int kvm_ioctl(KVMState *s, int type, ...); int kvm_vm_ioctl(KVMState *s, int type, ...); @@ -132,9 +132,9 @@ int kvm_arch_on_sigbus(int code, void *addr); void kvm_arch_init_irq_routing(KVMState *s); int kvm_irqchip_set_irq(KVMState *s, int irq, int level); +int kvm_irqchip_send_msi(KVMState *s, MSIMessage msg); -void kvm_irqchip_add_route(KVMState *s, int gsi, int irqchip, int pin); -int kvm_irqchip_commit_routes(KVMState *s); +void kvm_irqchip_add_irq_route(KVMState *s, int gsi, int irqchip, int pin); void kvm_put_apic_state(DeviceState *d, struct kvm_lapic_state *kapic); void kvm_get_apic_state(DeviceState *d, struct kvm_lapic_state *kapic); @@ -212,4 +212,10 @@ int kvm_set_ioeventfd_mmio(int fd, uint32_t adr, uint32_t val, bool assign, uint32_t size); int kvm_set_ioeventfd_pio_word(int fd, uint16_t adr, uint16_t val, bool assign); + +int kvm_irqchip_add_msi_route(KVMState *s, MSIMessage msg); +void kvm_irqchip_release_virq(KVMState *s, int virq); + +int kvm_irqchip_add_irqfd(KVMState *s, int fd, int virq); +int kvm_irqchip_remove_irqfd(KVMState *s, int fd, int virq); #endif diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h index ee7bd9cc32..c4426ec73d 100644 --- a/linux-headers/linux/kvm.h +++ b/linux-headers/linux/kvm.h @@ -449,6 +449,30 @@ struct kvm_ppc_pvinfo { __u8 pad[108]; }; +/* for KVM_PPC_GET_SMMU_INFO */ +#define KVM_PPC_PAGE_SIZES_MAX_SZ 8 + +struct kvm_ppc_one_page_size { + __u32 page_shift; /* Page shift (or 0) */ + __u32 pte_enc; /* Encoding in the HPTE (>>12) */ +}; + +struct kvm_ppc_one_seg_page_size { + __u32 page_shift; /* Base page shift of segment (or 0) */ + __u32 slb_enc; /* SLB encoding for BookS */ + struct kvm_ppc_one_page_size enc[KVM_PPC_PAGE_SIZES_MAX_SZ]; +}; + +#define KVM_PPC_PAGE_SIZES_REAL 0x00000001 +#define KVM_PPC_1T_SEGMENTS 0x00000002 + +struct kvm_ppc_smmu_info { + __u64 flags; + __u32 slb_size; + __u32 pad; + struct kvm_ppc_one_seg_page_size sps[KVM_PPC_PAGE_SIZES_MAX_SZ]; +}; + #define KVMIO 0xAE /* machine type bits, to be used as argument to KVM_CREATE_VM */ @@ -590,6 +614,8 @@ struct kvm_ppc_pvinfo { #define KVM_CAP_SYNC_REGS 74 #define KVM_CAP_PCI_2_3 75 #define KVM_CAP_KVMCLOCK_CTRL 76 +#define KVM_CAP_SIGNAL_MSI 77 +#define KVM_CAP_PPC_GET_SMMU_INFO 78 #ifdef KVM_CAP_IRQ_ROUTING @@ -715,6 +741,14 @@ struct kvm_one_reg { __u64 addr; }; +struct kvm_msi { + __u32 address_lo; + __u32 address_hi; + __u32 data; + __u32 flags; + __u8 pad[16]; +}; + /* * ioctls for VM fds */ @@ -789,6 +823,10 @@ struct kvm_s390_ucas_mapping { /* Available with KVM_CAP_PCI_2_3 */ #define KVM_ASSIGN_SET_INTX_MASK _IOW(KVMIO, 0xa4, \ struct kvm_assigned_pci_dev) +/* Available with KVM_CAP_SIGNAL_MSI */ +#define KVM_SIGNAL_MSI _IOW(KVMIO, 0xa5, struct kvm_msi) +/* Available with KVM_CAP_PPC_GET_SMMU_INFO */ +#define KVM_PPC_GET_SMMU_INFO _IOR(KVMIO, 0xa6, struct kvm_ppc_smmu_info) /* * ioctls for vcpu fds diff --git a/qemu-common.h b/qemu-common.h index cccfb42dd6..91e056296d 100644 --- a/qemu-common.h +++ b/qemu-common.h @@ -251,6 +251,7 @@ typedef struct PCIEAERLog PCIEAERLog; typedef struct PCIEAERErr PCIEAERErr; typedef struct PCIEPort PCIEPort; typedef struct PCIESlot PCIESlot; +typedef struct MSIMessage MSIMessage; typedef struct SerialState SerialState; typedef struct IRQState *qemu_irq; typedef struct PCMCIACardState PCMCIACardState; diff --git a/scripts/kvm/vmxcap b/scripts/kvm/vmxcap index a74ce71917..cbe6440ba3 100755 --- a/scripts/kvm/vmxcap +++ b/scripts/kvm/vmxcap @@ -22,6 +22,7 @@ MSR_IA32_VMX_TRUE_PINBASED_CTLS = 0x48D MSR_IA32_VMX_TRUE_PROCBASED_CTLS = 0x48E MSR_IA32_VMX_TRUE_EXIT_CTLS = 0x48F MSR_IA32_VMX_TRUE_ENTRY_CTLS = 0x490 +MSR_IA32_VMX_VMFUNC = 0x491 class msr(object): def __init__(self): @@ -147,6 +148,9 @@ controls = [ 6: 'WBINVD exiting', 7: 'Unrestricted guest', 10: 'PAUSE-loop exiting', + 11: 'RDRAND exiting', + 12: 'Enable INVPCID', + 13: 'Enable VM functions', }, cap_msr = MSR_IA32_VMX_PROCBASED_CTLS2, ), @@ -193,6 +197,7 @@ controls = [ 8: 'Wait-for-SIPI activity state', (16,24): 'Number of CR3-target values', (25,27): 'MSR-load/store count recommenation', + 28: 'IA32_SMM_MONITOR_CTL[2] can be set to 1', (32,62): 'MSEG revision identifier', }, msr = MSR_IA32_VMX_MISC_CTLS, @@ -208,6 +213,7 @@ controls = [ 16: '2MB EPT pages', 17: '1GB EPT pages', 20: 'INVEPT supported', + 21: 'EPT accessed and dirty flags', 25: 'Single-context INVEPT', 26: 'All-context INVEPT', 32: 'INVVPID supported', @@ -218,6 +224,13 @@ controls = [ }, msr = MSR_IA32_VMX_EPT_VPID_CAP, ), + Misc( + name = 'VM Functions', + bits = { + 0: 'EPTP Switching', + }, + msr = MSR_IA32_VMX_VMFUNC, + ), ] for c in controls: