From 4e2e4e6355959a1af011167b0db5ac7ffd3adf94 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Wed, 16 May 2012 15:41:08 -0300 Subject: [PATCH 01/20] kvm: Refactor KVMState::max_gsi to gsi_count Instead of the bitmap size, store the maximum of GSIs the kernel support. Move the GSI limit assertion to the API function kvm_irqchip_add_route and make it stricter. Signed-off-by: Jan Kiszka Signed-off-by: Marcelo Tosatti --- kvm-all.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/kvm-all.c b/kvm-all.c index 9b73ccfbec..2d82d54701 100644 --- a/kvm-all.c +++ b/kvm-all.c @@ -86,7 +86,7 @@ struct KVMState struct kvm_irq_routing *irq_routes; int nr_allocated_irq_routes; uint32_t *used_gsi_bitmap; - unsigned int max_gsi; + unsigned int gsi_count; #endif }; @@ -859,8 +859,6 @@ int kvm_irqchip_set_irq(KVMState *s, int irq, int level) #ifdef KVM_CAP_IRQ_ROUTING static void set_gsi(KVMState *s, unsigned int gsi) { - assert(gsi < s->max_gsi); - s->used_gsi_bitmap[gsi / 32] |= 1U << (gsi % 32); } @@ -875,7 +873,7 @@ static void kvm_init_irq_routing(KVMState *s) /* Round up so we can search ints using ffs */ gsi_bits = ALIGN(gsi_count, 32); s->used_gsi_bitmap = g_malloc0(gsi_bits / 8); - s->max_gsi = gsi_bits; + s->gsi_count = gsi_count; /* Mark any over-allocated bits as already in use */ for (i = gsi_count; i < gsi_bits; i++) { @@ -920,6 +918,8 @@ void kvm_irqchip_add_route(KVMState *s, int irq, int irqchip, int pin) { struct kvm_irq_routing_entry e; + assert(pin < s->gsi_count); + e.gsi = irq; e.type = KVM_IRQ_ROUTING_IRQCHIP; e.flags = 0; From 14de9bab9e4aa47215c26d87de7385afbcb37afa Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Wed, 16 May 2012 15:41:09 -0300 Subject: [PATCH 02/20] Introduce MSIMessage structure Will be used for generating and distributing MSI messages, both in emulation mode and under KVM. Signed-off-by: Jan Kiszka Signed-off-by: Marcelo Tosatti --- hw/msi.h | 5 +++++ qemu-common.h | 1 + 2 files changed, 6 insertions(+) diff --git a/hw/msi.h b/hw/msi.h index 3040bb0b43..75747abc25 100644 --- a/hw/msi.h +++ b/hw/msi.h @@ -24,6 +24,11 @@ #include "qemu-common.h" #include "pci.h" +struct MSIMessage { + uint64_t address; + uint32_t data; +}; + extern bool msi_supported; bool msi_enabled(const PCIDevice *dev); diff --git a/qemu-common.h b/qemu-common.h index 50f659af07..57fe28fd9a 100644 --- a/qemu-common.h +++ b/qemu-common.h @@ -248,6 +248,7 @@ typedef struct PCIEAERLog PCIEAERLog; typedef struct PCIEAERErr PCIEAERErr; typedef struct PCIEPort PCIEPort; typedef struct PCIESlot PCIESlot; +typedef struct MSIMessage MSIMessage; typedef struct SerialState SerialState; typedef struct IRQState *qemu_irq; typedef struct PCMCIACardState PCMCIACardState; From 04fa27f5ae5f025424bb7b88d3453c46e8900102 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Wed, 16 May 2012 15:41:10 -0300 Subject: [PATCH 03/20] kvm: Introduce basic MSI support for in-kernel irqchips This patch basically adds kvm_irqchip_send_msi, a service for sending arbitrary MSI messages to KVM's in-kernel irqchip models. As the original KVM API requires us to establish a static route from a pseudo GSI to the target MSI message and inject the MSI via toggling that virtual IRQ, we need to play some tricks to make this interface transparent. We create those routes on demand and keep them in a hash table. Succeeding messages can then search for an existing route in the table first and reuse it whenever possible. If we should run out of limited GSIs, we simply flush the table and rebuild it as messages are sent. This approach is rather simple and could be optimized further. However, latest kernels contains a more efficient MSI injection interface that will obsolete the GSI-based dynamic injection. Signed-off-by: Jan Kiszka Signed-off-by: Marcelo Tosatti --- kvm-all.c | 139 +++++++++++++++++++++++++++++++++++++++++++++++++++++- kvm.h | 1 + 2 files changed, 139 insertions(+), 1 deletion(-) diff --git a/kvm-all.c b/kvm-all.c index 2d82d54701..ff0534b105 100644 --- a/kvm-all.c +++ b/kvm-all.c @@ -24,6 +24,7 @@ #include "qemu-barrier.h" #include "sysemu.h" #include "hw/hw.h" +#include "hw/msi.h" #include "gdbstub.h" #include "kvm.h" #include "bswap.h" @@ -48,6 +49,8 @@ do { } while (0) #endif +#define KVM_MSI_HASHTAB_SIZE 256 + typedef struct KVMSlot { target_phys_addr_t start_addr; @@ -59,6 +62,11 @@ typedef struct KVMSlot typedef struct kvm_dirty_log KVMDirtyLog; +typedef struct KVMMSIRoute { + struct kvm_irq_routing_entry kroute; + QTAILQ_ENTRY(KVMMSIRoute) entry; +} KVMMSIRoute; + struct KVMState { KVMSlot slots[32]; @@ -87,6 +95,7 @@ struct KVMState int nr_allocated_irq_routes; uint32_t *used_gsi_bitmap; unsigned int gsi_count; + QTAILQ_HEAD(msi_hashtab, KVMMSIRoute) msi_hashtab[KVM_MSI_HASHTAB_SIZE]; #endif }; @@ -862,9 +871,14 @@ static void set_gsi(KVMState *s, unsigned int gsi) s->used_gsi_bitmap[gsi / 32] |= 1U << (gsi % 32); } +static void clear_gsi(KVMState *s, unsigned int gsi) +{ + s->used_gsi_bitmap[gsi / 32] &= ~(1U << (gsi % 32)); +} + static void kvm_init_irq_routing(KVMState *s) { - int gsi_count; + int gsi_count, i; gsi_count = kvm_check_extension(s, KVM_CAP_IRQ_ROUTING); if (gsi_count > 0) { @@ -884,6 +898,10 @@ static void kvm_init_irq_routing(KVMState *s) s->irq_routes = g_malloc0(sizeof(*s->irq_routes)); s->nr_allocated_irq_routes = 0; + for (i = 0; i < KVM_MSI_HASHTAB_SIZE; i++) { + QTAILQ_INIT(&s->msi_hashtab[i]); + } + kvm_arch_init_irq_routing(s); } @@ -934,11 +952,130 @@ int kvm_irqchip_commit_routes(KVMState *s) return kvm_vm_ioctl(s, KVM_SET_GSI_ROUTING, s->irq_routes); } +static void kvm_irqchip_release_virq(KVMState *s, int virq) +{ + struct kvm_irq_routing_entry *e; + int i; + + for (i = 0; i < s->irq_routes->nr; i++) { + e = &s->irq_routes->entries[i]; + if (e->gsi == virq) { + s->irq_routes->nr--; + *e = s->irq_routes->entries[s->irq_routes->nr]; + } + } + clear_gsi(s, virq); +} + +static unsigned int kvm_hash_msi(uint32_t data) +{ + /* This is optimized for IA32 MSI layout. However, no other arch shall + * repeat the mistake of not providing a direct MSI injection API. */ + return data & 0xff; +} + +static void kvm_flush_dynamic_msi_routes(KVMState *s) +{ + KVMMSIRoute *route, *next; + unsigned int hash; + + for (hash = 0; hash < KVM_MSI_HASHTAB_SIZE; hash++) { + QTAILQ_FOREACH_SAFE(route, &s->msi_hashtab[hash], entry, next) { + kvm_irqchip_release_virq(s, route->kroute.gsi); + QTAILQ_REMOVE(&s->msi_hashtab[hash], route, entry); + g_free(route); + } + } +} + +static int kvm_irqchip_get_virq(KVMState *s) +{ + uint32_t *word = s->used_gsi_bitmap; + int max_words = ALIGN(s->gsi_count, 32) / 32; + int i, bit; + bool retry = true; + +again: + /* Return the lowest unused GSI in the bitmap */ + for (i = 0; i < max_words; i++) { + bit = ffs(~word[i]); + if (!bit) { + continue; + } + + return bit - 1 + i * 32; + } + if (retry) { + retry = false; + kvm_flush_dynamic_msi_routes(s); + goto again; + } + return -ENOSPC; + +} + +static KVMMSIRoute *kvm_lookup_msi_route(KVMState *s, MSIMessage msg) +{ + unsigned int hash = kvm_hash_msi(msg.data); + KVMMSIRoute *route; + + QTAILQ_FOREACH(route, &s->msi_hashtab[hash], entry) { + if (route->kroute.u.msi.address_lo == (uint32_t)msg.address && + route->kroute.u.msi.address_hi == (msg.address >> 32) && + route->kroute.u.msi.data == msg.data) { + return route; + } + } + return NULL; +} + +int kvm_irqchip_send_msi(KVMState *s, MSIMessage msg) +{ + KVMMSIRoute *route; + + route = kvm_lookup_msi_route(s, msg); + if (!route) { + int virq, ret; + + virq = kvm_irqchip_get_virq(s); + if (virq < 0) { + return virq; + } + + route = g_malloc(sizeof(KVMMSIRoute)); + route->kroute.gsi = virq; + route->kroute.type = KVM_IRQ_ROUTING_MSI; + route->kroute.flags = 0; + route->kroute.u.msi.address_lo = (uint32_t)msg.address; + route->kroute.u.msi.address_hi = msg.address >> 32; + route->kroute.u.msi.data = msg.data; + + kvm_add_routing_entry(s, &route->kroute); + + QTAILQ_INSERT_TAIL(&s->msi_hashtab[kvm_hash_msi(msg.data)], route, + entry); + + ret = kvm_irqchip_commit_routes(s); + if (ret < 0) { + return ret; + } + } + + assert(route->kroute.type == KVM_IRQ_ROUTING_MSI); + + return kvm_irqchip_set_irq(s, route->kroute.gsi, 1); +} + #else /* !KVM_CAP_IRQ_ROUTING */ static void kvm_init_irq_routing(KVMState *s) { } + +int kvm_irqchip_send_msi(KVMState *s, MSIMessage msg) +{ + abort(); +} #endif /* !KVM_CAP_IRQ_ROUTING */ static int kvm_irqchip_create(KVMState *s) diff --git a/kvm.h b/kvm.h index 4ccae8c0c8..7857dbfb05 100644 --- a/kvm.h +++ b/kvm.h @@ -132,6 +132,7 @@ int kvm_arch_on_sigbus(int code, void *addr); void kvm_arch_init_irq_routing(KVMState *s); int kvm_irqchip_set_irq(KVMState *s, int irq, int level); +int kvm_irqchip_send_msi(KVMState *s, MSIMessage msg); void kvm_irqchip_add_route(KVMState *s, int gsi, int irqchip, int pin); int kvm_irqchip_commit_routes(KVMState *s); From 08a82ac01cb5409480128f8e1f144557d99b74a3 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Wed, 16 May 2012 15:41:11 -0300 Subject: [PATCH 04/20] pc: Enable MSI support at APIC level Push msi_supported enabling to the APIC implementations where we can encapsulate the decision more cleanly, hiding the details from the generic code. Acked-by: Stefano Stabellini Signed-off-by: Jan Kiszka Signed-off-by: Marcelo Tosatti --- hw/apic.c | 3 +++ hw/pc.c | 9 --------- hw/xen.h | 10 ---------- hw/xen_apic.c | 5 +++++ 4 files changed, 8 insertions(+), 19 deletions(-) diff --git a/hw/apic.c b/hw/apic.c index 4eeaf8801c..5fbf01c278 100644 --- a/hw/apic.c +++ b/hw/apic.c @@ -19,6 +19,7 @@ #include "apic_internal.h" #include "apic.h" #include "ioapic.h" +#include "msi.h" #include "host-utils.h" #include "trace.h" #include "pc.h" @@ -862,6 +863,8 @@ static void apic_init(APICCommonState *s) s->timer = qemu_new_timer_ns(vm_clock, apic_timer, s); local_apics[s->idx] = s; + + msi_supported = true; } static void apic_class_init(ObjectClass *klass, void *data) diff --git a/hw/pc.c b/hw/pc.c index 4d34a335ed..6691b18196 100644 --- a/hw/pc.c +++ b/hw/pc.c @@ -911,15 +911,6 @@ static DeviceState *apic_init(void *env, uint8_t apic_id) apic_mapped = 1; } - /* KVM does not support MSI yet. */ - if (!kvm_irqchip_in_kernel()) { - msi_supported = true; - } - - if (xen_msi_support()) { - msi_supported = true; - } - return dev; } diff --git a/hw/xen.h b/hw/xen.h index 3ae4cd0f5c..e5926b7b8a 100644 --- a/hw/xen.h +++ b/hw/xen.h @@ -57,14 +57,4 @@ void xen_register_framebuffer(struct MemoryRegion *mr); # define HVM_MAX_VCPUS 32 #endif -static inline int xen_msi_support(void) -{ -#if defined(CONFIG_XEN_CTRL_INTERFACE_VERSION) \ - && CONFIG_XEN_CTRL_INTERFACE_VERSION >= 420 - return xen_enabled(); -#else - return 0; -#endif -} - #endif /* QEMU_HW_XEN_H */ diff --git a/hw/xen_apic.c b/hw/xen_apic.c index 1725ff67dd..a9e101f315 100644 --- a/hw/xen_apic.c +++ b/hw/xen_apic.c @@ -40,6 +40,11 @@ static void xen_apic_init(APICCommonState *s) { memory_region_init_io(&s->io_memory, &xen_apic_io_ops, s, "xen-apic-msi", MSI_SPACE_SIZE); + +#if defined(CONFIG_XEN_CTRL_INTERFACE_VERSION) \ + && CONFIG_XEN_CTRL_INTERFACE_VERSION >= 420 + msi_supported = true; +#endif } static void xen_apic_set_base(APICCommonState *s, uint64_t val) From ffb8d4296e01f0ead3ba81b08a34637c5bbff0da Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Wed, 16 May 2012 15:41:12 -0300 Subject: [PATCH 05/20] kvm: x86: Wire up MSI support for in-kernel irqchip Catch writes to the MSI MMIO region in the KVM APIC and forward them to the kernel. Provide the kernel support GSI routing, this allows to enable MSI support also for in-kernel irqchip mode. Signed-off-by: Jan Kiszka Signed-off-by: Marcelo Tosatti --- hw/kvm/apic.c | 34 ++++++++++++++++++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-) diff --git a/hw/kvm/apic.c b/hw/kvm/apic.c index ffe7a521b7..8ba4079025 100644 --- a/hw/kvm/apic.c +++ b/hw/kvm/apic.c @@ -10,6 +10,7 @@ * See the COPYING file in the top-level directory. */ #include "hw/apic_internal.h" +#include "hw/msi.h" #include "kvm.h" static inline void kvm_apic_set_reg(struct kvm_lapic_state *kapic, @@ -145,10 +146,39 @@ static void kvm_apic_external_nmi(APICCommonState *s) run_on_cpu(s->cpu_env, do_inject_external_nmi, s); } +static uint64_t kvm_apic_mem_read(void *opaque, target_phys_addr_t addr, + unsigned size) +{ + return ~(uint64_t)0; +} + +static void kvm_apic_mem_write(void *opaque, target_phys_addr_t addr, + uint64_t data, unsigned size) +{ + MSIMessage msg = { .address = addr, .data = data }; + int ret; + + ret = kvm_irqchip_send_msi(kvm_state, msg); + if (ret < 0) { + fprintf(stderr, "KVM: injection failed, MSI lost (%s)\n", + strerror(-ret)); + } +} + +static const MemoryRegionOps kvm_apic_io_ops = { + .read = kvm_apic_mem_read, + .write = kvm_apic_mem_write, + .endianness = DEVICE_NATIVE_ENDIAN, +}; + static void kvm_apic_init(APICCommonState *s) { - memory_region_init_reservation(&s->io_memory, "kvm-apic-msi", - MSI_SPACE_SIZE); + memory_region_init_io(&s->io_memory, &kvm_apic_io_ops, s, "kvm-apic-msi", + MSI_SPACE_SIZE); + + if (kvm_has_gsi_routing()) { + msi_supported = true; + } } static void kvm_apic_class_init(ObjectClass *klass, void *data) From 80a3eb10fe8ff61666e3843f8c7711fd496fa89b Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Wed, 16 May 2012 15:41:13 -0300 Subject: [PATCH 06/20] kvm: Update kernel headers Corresponding kvm.git hash: f2569053e0 Signed-off-by: Jan Kiszka Signed-off-by: Marcelo Tosatti --- linux-headers/linux/kvm.h | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h index ee7bd9cc32..c4426ec73d 100644 --- a/linux-headers/linux/kvm.h +++ b/linux-headers/linux/kvm.h @@ -449,6 +449,30 @@ struct kvm_ppc_pvinfo { __u8 pad[108]; }; +/* for KVM_PPC_GET_SMMU_INFO */ +#define KVM_PPC_PAGE_SIZES_MAX_SZ 8 + +struct kvm_ppc_one_page_size { + __u32 page_shift; /* Page shift (or 0) */ + __u32 pte_enc; /* Encoding in the HPTE (>>12) */ +}; + +struct kvm_ppc_one_seg_page_size { + __u32 page_shift; /* Base page shift of segment (or 0) */ + __u32 slb_enc; /* SLB encoding for BookS */ + struct kvm_ppc_one_page_size enc[KVM_PPC_PAGE_SIZES_MAX_SZ]; +}; + +#define KVM_PPC_PAGE_SIZES_REAL 0x00000001 +#define KVM_PPC_1T_SEGMENTS 0x00000002 + +struct kvm_ppc_smmu_info { + __u64 flags; + __u32 slb_size; + __u32 pad; + struct kvm_ppc_one_seg_page_size sps[KVM_PPC_PAGE_SIZES_MAX_SZ]; +}; + #define KVMIO 0xAE /* machine type bits, to be used as argument to KVM_CREATE_VM */ @@ -590,6 +614,8 @@ struct kvm_ppc_pvinfo { #define KVM_CAP_SYNC_REGS 74 #define KVM_CAP_PCI_2_3 75 #define KVM_CAP_KVMCLOCK_CTRL 76 +#define KVM_CAP_SIGNAL_MSI 77 +#define KVM_CAP_PPC_GET_SMMU_INFO 78 #ifdef KVM_CAP_IRQ_ROUTING @@ -715,6 +741,14 @@ struct kvm_one_reg { __u64 addr; }; +struct kvm_msi { + __u32 address_lo; + __u32 address_hi; + __u32 data; + __u32 flags; + __u8 pad[16]; +}; + /* * ioctls for VM fds */ @@ -789,6 +823,10 @@ struct kvm_s390_ucas_mapping { /* Available with KVM_CAP_PCI_2_3 */ #define KVM_ASSIGN_SET_INTX_MASK _IOW(KVMIO, 0xa4, \ struct kvm_assigned_pci_dev) +/* Available with KVM_CAP_SIGNAL_MSI */ +#define KVM_SIGNAL_MSI _IOW(KVMIO, 0xa5, struct kvm_msi) +/* Available with KVM_CAP_PPC_GET_SMMU_INFO */ +#define KVM_PPC_GET_SMMU_INFO _IOR(KVMIO, 0xa6, struct kvm_ppc_smmu_info) /* * ioctls for vcpu fds From 4a3adebb1854d48f0c67958e164c6b2f29d44064 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Wed, 16 May 2012 15:41:14 -0300 Subject: [PATCH 07/20] kvm: Add support for direct MSI injections If the kernel supports KVM_SIGNAL_MSI, we can avoid the route-based MSI injection mechanism. Signed-off-by: Jan Kiszka Signed-off-by: Marcelo Tosatti --- kvm-all.c | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/kvm-all.c b/kvm-all.c index ff0534b105..d222012218 100644 --- a/kvm-all.c +++ b/kvm-all.c @@ -96,6 +96,7 @@ struct KVMState uint32_t *used_gsi_bitmap; unsigned int gsi_count; QTAILQ_HEAD(msi_hashtab, KVMMSIRoute) msi_hashtab[KVM_MSI_HASHTAB_SIZE]; + bool direct_msi; #endif }; @@ -898,8 +899,10 @@ static void kvm_init_irq_routing(KVMState *s) s->irq_routes = g_malloc0(sizeof(*s->irq_routes)); s->nr_allocated_irq_routes = 0; - for (i = 0; i < KVM_MSI_HASHTAB_SIZE; i++) { - QTAILQ_INIT(&s->msi_hashtab[i]); + if (!s->direct_msi) { + for (i = 0; i < KVM_MSI_HASHTAB_SIZE; i++) { + QTAILQ_INIT(&s->msi_hashtab[i]); + } } kvm_arch_init_irq_routing(s); @@ -1005,7 +1008,7 @@ again: return bit - 1 + i * 32; } - if (retry) { + if (!s->direct_msi && retry) { retry = false; kvm_flush_dynamic_msi_routes(s); goto again; @@ -1031,8 +1034,19 @@ static KVMMSIRoute *kvm_lookup_msi_route(KVMState *s, MSIMessage msg) int kvm_irqchip_send_msi(KVMState *s, MSIMessage msg) { + struct kvm_msi msi; KVMMSIRoute *route; + if (s->direct_msi) { + msi.address_lo = (uint32_t)msg.address; + msi.address_hi = msg.address >> 32; + msi.data = msg.data; + msi.flags = 0; + memset(msi.pad, 0, sizeof(msi.pad)); + + return kvm_vm_ioctl(s, KVM_SIGNAL_MSI, &msi); + } + route = kvm_lookup_msi_route(s, msg); if (!route) { int virq, ret; @@ -1209,6 +1223,8 @@ int kvm_init(void) s->pit_state2 = kvm_check_extension(s, KVM_CAP_PIT_STATE2); #endif + s->direct_msi = (kvm_check_extension(s, KVM_CAP_SIGNAL_MSI) > 0); + ret = kvm_arch_init(s); if (ret < 0) { goto err; From a24b9106fa1ba8de0d62d93b64ebbf83ae6a131a Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Wed, 16 May 2012 15:41:15 -0300 Subject: [PATCH 08/20] kvm: Enable in-kernel irqchip support by default As MSI is now fully supported by KVM (/wrt available features in upstream), we can finally enable the in-kernel irqchip by default. Signed-off-by: Jan Kiszka Signed-off-by: Marcelo Tosatti --- kvm-all.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kvm-all.c b/kvm-all.c index d222012218..1913d6a337 100644 --- a/kvm-all.c +++ b/kvm-all.c @@ -1099,7 +1099,7 @@ static int kvm_irqchip_create(KVMState *s) if (QTAILQ_EMPTY(&list->head) || !qemu_opt_get_bool(QTAILQ_FIRST(&list->head), - "kernel_irqchip", false) || + "kernel_irqchip", true) || !kvm_check_extension(s, KVM_CAP_IRQCHIP)) { return 0; } From 287d55c6769c3a38e9083b103cb148fb38858b3a Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 16 May 2012 14:31:37 +0300 Subject: [PATCH 09/20] kvm: update vmxcap for EPT A/D, INVPCID, RDRAND, VMFUNC Signed-off-by: Avi Kivity Signed-off-by: Marcelo Tosatti --- scripts/kvm/vmxcap | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/scripts/kvm/vmxcap b/scripts/kvm/vmxcap index a74ce71917..cbe6440ba3 100755 --- a/scripts/kvm/vmxcap +++ b/scripts/kvm/vmxcap @@ -22,6 +22,7 @@ MSR_IA32_VMX_TRUE_PINBASED_CTLS = 0x48D MSR_IA32_VMX_TRUE_PROCBASED_CTLS = 0x48E MSR_IA32_VMX_TRUE_EXIT_CTLS = 0x48F MSR_IA32_VMX_TRUE_ENTRY_CTLS = 0x490 +MSR_IA32_VMX_VMFUNC = 0x491 class msr(object): def __init__(self): @@ -147,6 +148,9 @@ controls = [ 6: 'WBINVD exiting', 7: 'Unrestricted guest', 10: 'PAUSE-loop exiting', + 11: 'RDRAND exiting', + 12: 'Enable INVPCID', + 13: 'Enable VM functions', }, cap_msr = MSR_IA32_VMX_PROCBASED_CTLS2, ), @@ -193,6 +197,7 @@ controls = [ 8: 'Wait-for-SIPI activity state', (16,24): 'Number of CR3-target values', (25,27): 'MSR-load/store count recommenation', + 28: 'IA32_SMM_MONITOR_CTL[2] can be set to 1', (32,62): 'MSEG revision identifier', }, msr = MSR_IA32_VMX_MISC_CTLS, @@ -208,6 +213,7 @@ controls = [ 16: '2MB EPT pages', 17: '1GB EPT pages', 20: 'INVEPT supported', + 21: 'EPT accessed and dirty flags', 25: 'Single-context INVEPT', 26: 'All-context INVEPT', 32: 'INVVPID supported', @@ -218,6 +224,13 @@ controls = [ }, msr = MSR_IA32_VMX_EPT_VPID_CAP, ), + Misc( + name = 'VM Functions', + bits = { + 0: 'EPTP Switching', + }, + msr = MSR_IA32_VMX_VMFUNC, + ), ] for c in controls: From bc4caf49c7fee6d1e063df32ca7554e5b98bfc89 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Thu, 17 May 2012 10:32:29 -0300 Subject: [PATCH 10/20] msix: Factor out msix_get_message This helper will also be used by the upcoming config notifier. Signed-off-by: Jan Kiszka Signed-off-by: Avi Kivity --- hw/msix.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/hw/msix.c b/hw/msix.c index 3835eaaf28..31974657d3 100644 --- a/hw/msix.c +++ b/hw/msix.c @@ -35,6 +35,15 @@ #define MSIX_PAGE_PENDING (MSIX_PAGE_SIZE / 2) #define MSIX_MAX_ENTRIES 32 +static MSIMessage msix_get_message(PCIDevice *dev, unsigned vector) +{ + uint8_t *table_entry = dev->msix_table_page + vector * PCI_MSIX_ENTRY_SIZE; + MSIMessage msg; + + msg.address = pci_get_quad(table_entry + PCI_MSIX_ENTRY_LOWER_ADDR); + msg.data = pci_get_long(table_entry + PCI_MSIX_ENTRY_DATA); + return msg; +} /* Add MSI-X capability to the config space for the device. */ /* Given a bar and its size, add MSI-X table on top of it @@ -352,9 +361,7 @@ uint32_t msix_bar_size(PCIDevice *dev) /* Send an MSI-X message */ void msix_notify(PCIDevice *dev, unsigned vector) { - uint8_t *table_entry = dev->msix_table_page + vector * PCI_MSIX_ENTRY_SIZE; - uint64_t address; - uint32_t data; + MSIMessage msg; if (vector >= dev->msix_entries_nr || !dev->msix_entry_used[vector]) return; @@ -363,9 +370,9 @@ void msix_notify(PCIDevice *dev, unsigned vector) return; } - address = pci_get_quad(table_entry + PCI_MSIX_ENTRY_LOWER_ADDR); - data = pci_get_long(table_entry + PCI_MSIX_ENTRY_DATA); - stl_le_phys(address, data); + msg = msix_get_message(dev, vector); + + stl_le_phys(msg.address, msg.data); } void msix_reset(PCIDevice *dev) From 5b5f1330da2d7e5b5cbde8c60738774b2bd8692f Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Thu, 17 May 2012 10:32:30 -0300 Subject: [PATCH 11/20] msix: Invoke msix_handle_mask_update on msix_mask_all In preparation of firing vector notifiers on mask changes, call msix_handle_mask_update also from msix_mask_all. So far, this will have no real effect. Signed-off-by: Jan Kiszka Signed-off-by: Avi Kivity --- hw/msix.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/hw/msix.c b/hw/msix.c index 31974657d3..e1a7d92fea 100644 --- a/hw/msix.c +++ b/hw/msix.c @@ -231,10 +231,14 @@ static void msix_mmio_setup(PCIDevice *d, MemoryRegion *bar) static void msix_mask_all(struct PCIDevice *dev, unsigned nentries) { int vector; + for (vector = 0; vector < nentries; ++vector) { unsigned offset = vector * PCI_MSIX_ENTRY_SIZE + PCI_MSIX_ENTRY_VECTOR_CTRL; + bool was_masked = msix_is_masked(dev, vector); + dev->msix_table_page[offset] |= PCI_MSIX_ENTRY_CTRL_MASKBIT; + msix_handle_mask_update(dev, vector, was_masked); } } From 2cdfe53c5f502415932de552c0c7dad369954d4c Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Thu, 17 May 2012 10:32:31 -0300 Subject: [PATCH 12/20] msix: Introduce vector notifiers Vector notifiers shall be triggered by the MSI/MSI-X core whenever a relevant configuration change is programmed by the guest. In case of MSI-X, changes are reported when the effective mask (global && per-vector) alters its state. On unmask, the current vector configuration is included in the event report. This allows users - e.g. virtio-pci layer - to transfer this information to external MSI-X routing subsystems - like vhost + KVM in-kernel irqchip. This implementation only provides MSI-X support, but extension to MSI is feasible and will be provided later on when adding support for KVM PCI device assignment. Signed-off-by: Jan Kiszka Signed-off-by: Avi Kivity --- hw/msix.c | 93 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ hw/msix.h | 4 +++ hw/pci.h | 8 +++++ 3 files changed, 105 insertions(+) diff --git a/hw/msix.c b/hw/msix.c index e1a7d92fea..1622e16188 100644 --- a/hw/msix.c +++ b/hw/msix.c @@ -139,13 +139,34 @@ static bool msix_is_masked(PCIDevice *dev, int vector) return msix_vector_masked(dev, vector, dev->msix_function_masked); } +static void msix_fire_vector_notifier(PCIDevice *dev, + unsigned int vector, bool is_masked) +{ + MSIMessage msg; + int ret; + + if (!dev->msix_vector_use_notifier) { + return; + } + if (is_masked) { + dev->msix_vector_release_notifier(dev, vector); + } else { + msg = msix_get_message(dev, vector); + ret = dev->msix_vector_use_notifier(dev, vector, msg); + assert(ret >= 0); + } +} + static void msix_handle_mask_update(PCIDevice *dev, int vector, bool was_masked) { bool is_masked = msix_is_masked(dev, vector); + if (is_masked == was_masked) { return; } + msix_fire_vector_notifier(dev, vector, is_masked); + if (!is_masked && msix_is_pending(dev, vector)) { msix_clr_pending(dev, vector); msix_notify(dev, vector); @@ -330,6 +351,7 @@ void msix_save(PCIDevice *dev, QEMUFile *f) void msix_load(PCIDevice *dev, QEMUFile *f) { unsigned n = dev->msix_entries_nr; + unsigned int vector; if (!(dev->cap_present & QEMU_PCI_CAP_MSIX)) { return; @@ -339,6 +361,10 @@ void msix_load(PCIDevice *dev, QEMUFile *f) qemu_get_buffer(f, dev->msix_table_page, n * PCI_MSIX_ENTRY_SIZE); qemu_get_buffer(f, dev->msix_table_page + MSIX_PAGE_PENDING, (n + 7) / 8); msix_update_function_masked(dev); + + for (vector = 0; vector < n; vector++) { + msix_handle_mask_update(dev, vector, true); + } } /* Does device support MSI-X? */ @@ -425,3 +451,70 @@ void msix_unuse_all_vectors(PCIDevice *dev) return; msix_free_irq_entries(dev); } + +static int msix_set_notifier_for_vector(PCIDevice *dev, unsigned int vector) +{ + MSIMessage msg; + + if (msix_is_masked(dev, vector)) { + return 0; + } + msg = msix_get_message(dev, vector); + return dev->msix_vector_use_notifier(dev, vector, msg); +} + +static void msix_unset_notifier_for_vector(PCIDevice *dev, unsigned int vector) +{ + if (msix_is_masked(dev, vector)) { + return; + } + dev->msix_vector_release_notifier(dev, vector); +} + +int msix_set_vector_notifiers(PCIDevice *dev, + MSIVectorUseNotifier use_notifier, + MSIVectorReleaseNotifier release_notifier) +{ + int vector, ret; + + assert(use_notifier && release_notifier); + + dev->msix_vector_use_notifier = use_notifier; + dev->msix_vector_release_notifier = release_notifier; + + if ((dev->config[dev->msix_cap + MSIX_CONTROL_OFFSET] & + (MSIX_ENABLE_MASK | MSIX_MASKALL_MASK)) == MSIX_ENABLE_MASK) { + for (vector = 0; vector < dev->msix_entries_nr; vector++) { + ret = msix_set_notifier_for_vector(dev, vector); + if (ret < 0) { + goto undo; + } + } + } + return 0; + +undo: + while (--vector >= 0) { + msix_unset_notifier_for_vector(dev, vector); + } + dev->msix_vector_use_notifier = NULL; + dev->msix_vector_release_notifier = NULL; + return ret; +} + +void msix_unset_vector_notifiers(PCIDevice *dev) +{ + int vector; + + assert(dev->msix_vector_use_notifier && + dev->msix_vector_release_notifier); + + if ((dev->config[dev->msix_cap + MSIX_CONTROL_OFFSET] & + (MSIX_ENABLE_MASK | MSIX_MASKALL_MASK)) == MSIX_ENABLE_MASK) { + for (vector = 0; vector < dev->msix_entries_nr; vector++) { + msix_unset_notifier_for_vector(dev, vector); + } + } + dev->msix_vector_use_notifier = NULL; + dev->msix_vector_release_notifier = NULL; +} diff --git a/hw/msix.h b/hw/msix.h index 5aba22b858..f33f18b484 100644 --- a/hw/msix.h +++ b/hw/msix.h @@ -29,4 +29,8 @@ void msix_notify(PCIDevice *dev, unsigned vector); void msix_reset(PCIDevice *dev); +int msix_set_vector_notifiers(PCIDevice *dev, + MSIVectorUseNotifier use_notifier, + MSIVectorReleaseNotifier release_notifier); +void msix_unset_vector_notifiers(PCIDevice *dev); #endif diff --git a/hw/pci.h b/hw/pci.h index 8d0aa498e5..c3cacce046 100644 --- a/hw/pci.h +++ b/hw/pci.h @@ -173,6 +173,10 @@ typedef struct PCIDeviceClass { const char *romfile; } PCIDeviceClass; +typedef int (*MSIVectorUseNotifier)(PCIDevice *dev, unsigned int vector, + MSIMessage msg); +typedef void (*MSIVectorReleaseNotifier)(PCIDevice *dev, unsigned int vector); + struct PCIDevice { DeviceState qdev; /* PCI config space */ @@ -243,6 +247,10 @@ struct PCIDevice { bool has_rom; MemoryRegion rom; uint32_t rom_bar; + + /* MSI-X notifiers */ + MSIVectorUseNotifier msix_vector_use_notifier; + MSIVectorReleaseNotifier msix_vector_release_notifier; }; void pci_register_bar(PCIDevice *pci_dev, int region_num, From 1df186df3507f25f7e0503e0c2a761c753a0bf17 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Thu, 17 May 2012 10:32:32 -0300 Subject: [PATCH 13/20] kvm: Rename kvm_irqchip_add_route to kvm_irqchip_add_irq_route We will add kvm_irqchip_add_msi_route, so let's make the difference clearer. Signed-off-by: Jan Kiszka Signed-off-by: Avi Kivity --- hw/pc_piix.c | 8 ++++---- kvm-all.c | 2 +- kvm.h | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/hw/pc_piix.c b/hw/pc_piix.c index 6a75718fbb..c17f906f2d 100644 --- a/hw/pc_piix.c +++ b/hw/pc_piix.c @@ -63,17 +63,17 @@ static void kvm_piix3_setup_irq_routing(bool pci_enabled) if (i == 2) { continue; } - kvm_irqchip_add_route(s, i, KVM_IRQCHIP_PIC_MASTER, i); + kvm_irqchip_add_irq_route(s, i, KVM_IRQCHIP_PIC_MASTER, i); } for (i = 8; i < 16; ++i) { - kvm_irqchip_add_route(s, i, KVM_IRQCHIP_PIC_SLAVE, i - 8); + kvm_irqchip_add_irq_route(s, i, KVM_IRQCHIP_PIC_SLAVE, i - 8); } if (pci_enabled) { for (i = 0; i < 24; ++i) { if (i == 0) { - kvm_irqchip_add_route(s, i, KVM_IRQCHIP_IOAPIC, 2); + kvm_irqchip_add_irq_route(s, i, KVM_IRQCHIP_IOAPIC, 2); } else if (i != 2) { - kvm_irqchip_add_route(s, i, KVM_IRQCHIP_IOAPIC, i); + kvm_irqchip_add_irq_route(s, i, KVM_IRQCHIP_IOAPIC, i); } } } diff --git a/kvm-all.c b/kvm-all.c index 1913d6a337..0117837e9a 100644 --- a/kvm-all.c +++ b/kvm-all.c @@ -935,7 +935,7 @@ static void kvm_add_routing_entry(KVMState *s, set_gsi(s, entry->gsi); } -void kvm_irqchip_add_route(KVMState *s, int irq, int irqchip, int pin) +void kvm_irqchip_add_irq_route(KVMState *s, int irq, int irqchip, int pin) { struct kvm_irq_routing_entry e; diff --git a/kvm.h b/kvm.h index 7857dbfb05..8b061bd0d0 100644 --- a/kvm.h +++ b/kvm.h @@ -134,7 +134,7 @@ void kvm_arch_init_irq_routing(KVMState *s); int kvm_irqchip_set_irq(KVMState *s, int irq, int level); int kvm_irqchip_send_msi(KVMState *s, MSIMessage msg); -void kvm_irqchip_add_route(KVMState *s, int gsi, int irqchip, int pin); +void kvm_irqchip_add_irq_route(KVMState *s, int gsi, int irqchip, int pin); int kvm_irqchip_commit_routes(KVMState *s); void kvm_put_apic_state(DeviceState *d, struct kvm_lapic_state *kapic); From 92b4e489822d2126440f7645b116ebd5f63c0ef7 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Thu, 17 May 2012 10:32:33 -0300 Subject: [PATCH 14/20] kvm: Introduce kvm_irqchip_add_msi_route Add a service that establishes a static route from a virtual IRQ line to an MSI message. Will be used for IRQFD and device assignment. As we will use this service outside of CONFIG_KVM protected code, stub it properly. Signed-off-by: Jan Kiszka Signed-off-by: Avi Kivity --- kvm-all.c | 31 +++++++++++++++++++++++++++++++ kvm-stub.c | 8 ++++++++ kvm.h | 10 ++++++---- 3 files changed, 45 insertions(+), 4 deletions(-) diff --git a/kvm-all.c b/kvm-all.c index 0117837e9a..7f906cafe5 100644 --- a/kvm-all.c +++ b/kvm-all.c @@ -1080,6 +1080,32 @@ int kvm_irqchip_send_msi(KVMState *s, MSIMessage msg) return kvm_irqchip_set_irq(s, route->kroute.gsi, 1); } +int kvm_irqchip_add_msi_route(KVMState *s, MSIMessage msg) +{ + struct kvm_irq_routing_entry kroute; + int virq; + + if (!kvm_irqchip_in_kernel()) { + return -ENOSYS; + } + + virq = kvm_irqchip_get_virq(s); + if (virq < 0) { + return virq; + } + + kroute.gsi = virq; + kroute.type = KVM_IRQ_ROUTING_MSI; + kroute.flags = 0; + kroute.u.msi.address_lo = (uint32_t)msg.address; + kroute.u.msi.address_hi = msg.address >> 32; + kroute.u.msi.data = msg.data; + + kvm_add_routing_entry(s, &kroute); + + return virq; +} + #else /* !KVM_CAP_IRQ_ROUTING */ static void kvm_init_irq_routing(KVMState *s) @@ -1090,6 +1116,11 @@ int kvm_irqchip_send_msi(KVMState *s, MSIMessage msg) { abort(); } + +int kvm_irqchip_add_msi_route(KVMState *s, MSIMessage msg) +{ + abort(); +} #endif /* !KVM_CAP_IRQ_ROUTING */ static int kvm_irqchip_create(KVMState *s) diff --git a/kvm-stub.c b/kvm-stub.c index 47c573d6f3..db3a7dc183 100644 --- a/kvm-stub.c +++ b/kvm-stub.c @@ -12,10 +12,13 @@ #include "qemu-common.h" #include "hw/hw.h" +#include "hw/msi.h" #include "cpu.h" #include "gdbstub.h" #include "kvm.h" +KVMState *kvm_state; + int kvm_init_vcpu(CPUArchState *env) { return -ENOSYS; @@ -128,3 +131,8 @@ int kvm_on_sigbus(int code, void *addr) { return 1; } + +int kvm_irqchip_add_msi_route(KVMState *s, MSIMessage msg) +{ + return -ENOSYS; +} diff --git a/kvm.h b/kvm.h index 8b061bd0d0..67df1f127f 100644 --- a/kvm.h +++ b/kvm.h @@ -44,6 +44,10 @@ typedef struct KVMCapabilityInfo { #define KVM_CAP_INFO(CAP) { "KVM_CAP_" stringify(CAP), KVM_CAP_##CAP } #define KVM_CAP_LAST_INFO { NULL, 0 } +struct KVMState; +typedef struct KVMState KVMState; +extern KVMState *kvm_state; + /* external API */ int kvm_init(void); @@ -88,10 +92,6 @@ int kvm_on_sigbus(int code, void *addr); /* internal API */ -struct KVMState; -typedef struct KVMState KVMState; -extern KVMState *kvm_state; - int kvm_ioctl(KVMState *s, int type, ...); int kvm_vm_ioctl(KVMState *s, int type, ...); @@ -213,4 +213,6 @@ int kvm_set_ioeventfd_mmio(int fd, uint32_t adr, uint32_t val, bool assign, uint32_t size); int kvm_set_ioeventfd_pio_word(int fd, uint16_t adr, uint16_t val, bool assign); + +int kvm_irqchip_add_msi_route(KVMState *s, MSIMessage msg); #endif From 1e2aa8be09184156f438ba5199eaa12190c39acc Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Thu, 17 May 2012 10:32:34 -0300 Subject: [PATCH 15/20] kvm: Publicize kvm_irqchip_release_virq This allows to drop routes created by kvm_irqchip_add_irq/msi_route again. Signed-off-by: Jan Kiszka Signed-off-by: Avi Kivity --- kvm-all.c | 2 +- kvm-stub.c | 4 ++++ kvm.h | 1 + 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/kvm-all.c b/kvm-all.c index 7f906cafe5..ca6cec607e 100644 --- a/kvm-all.c +++ b/kvm-all.c @@ -955,7 +955,7 @@ int kvm_irqchip_commit_routes(KVMState *s) return kvm_vm_ioctl(s, KVM_SET_GSI_ROUTING, s->irq_routes); } -static void kvm_irqchip_release_virq(KVMState *s, int virq) +void kvm_irqchip_release_virq(KVMState *s, int virq) { struct kvm_irq_routing_entry *e; int i; diff --git a/kvm-stub.c b/kvm-stub.c index db3a7dc183..ec351d9067 100644 --- a/kvm-stub.c +++ b/kvm-stub.c @@ -136,3 +136,7 @@ int kvm_irqchip_add_msi_route(KVMState *s, MSIMessage msg) { return -ENOSYS; } + +void kvm_irqchip_release_virq(KVMState *s, int virq) +{ +} diff --git a/kvm.h b/kvm.h index 67df1f127f..1779e73d21 100644 --- a/kvm.h +++ b/kvm.h @@ -215,4 +215,5 @@ int kvm_set_ioeventfd_mmio(int fd, uint32_t adr, uint32_t val, bool assign, int kvm_set_ioeventfd_pio_word(int fd, uint16_t adr, uint16_t val, bool assign); int kvm_irqchip_add_msi_route(KVMState *s, MSIMessage msg); +void kvm_irqchip_release_virq(KVMState *s, int virq); #endif From e7b2030862f63ee02331940b9f1742cd025c1908 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Thu, 17 May 2012 10:32:35 -0300 Subject: [PATCH 16/20] kvm: Make kvm_irqchip_commit_routes an internal service Automatically commit route changes after kvm_add_routing_entry and kvm_irqchip_release_virq. There is no performance relevant use case for which collecting multiple route changes is beneficial. This makes kvm_irqchip_commit_routes an internal service which assert()s that the corresponding IOCTL will always succeed. Signed-off-by: Jan Kiszka Signed-off-by: Avi Kivity --- hw/pc_piix.c | 6 +----- kvm-all.c | 26 ++++++++++++++------------ kvm.h | 1 - 3 files changed, 15 insertions(+), 18 deletions(-) diff --git a/hw/pc_piix.c b/hw/pc_piix.c index c17f906f2d..044dfcb377 100644 --- a/hw/pc_piix.c +++ b/hw/pc_piix.c @@ -56,7 +56,7 @@ static void kvm_piix3_setup_irq_routing(bool pci_enabled) { #ifdef CONFIG_KVM KVMState *s = kvm_state; - int ret, i; + int i; if (kvm_check_extension(s, KVM_CAP_IRQ_ROUTING)) { for (i = 0; i < 8; ++i) { @@ -77,10 +77,6 @@ static void kvm_piix3_setup_irq_routing(bool pci_enabled) } } } - ret = kvm_irqchip_commit_routes(s); - if (ret < 0) { - hw_error("KVM IRQ routing setup failed"); - } } #endif /* CONFIG_KVM */ } diff --git a/kvm-all.c b/kvm-all.c index ca6cec607e..e96f092842 100644 --- a/kvm-all.c +++ b/kvm-all.c @@ -908,6 +908,15 @@ static void kvm_init_irq_routing(KVMState *s) kvm_arch_init_irq_routing(s); } +static void kvm_irqchip_commit_routes(KVMState *s) +{ + int ret; + + s->irq_routes->flags = 0; + ret = kvm_vm_ioctl(s, KVM_SET_GSI_ROUTING, s->irq_routes); + assert(ret == 0); +} + static void kvm_add_routing_entry(KVMState *s, struct kvm_irq_routing_entry *entry) { @@ -933,6 +942,8 @@ static void kvm_add_routing_entry(KVMState *s, new->u = entry->u; set_gsi(s, entry->gsi); + + kvm_irqchip_commit_routes(s); } void kvm_irqchip_add_irq_route(KVMState *s, int irq, int irqchip, int pin) @@ -949,12 +960,6 @@ void kvm_irqchip_add_irq_route(KVMState *s, int irq, int irqchip, int pin) kvm_add_routing_entry(s, &e); } -int kvm_irqchip_commit_routes(KVMState *s) -{ - s->irq_routes->flags = 0; - return kvm_vm_ioctl(s, KVM_SET_GSI_ROUTING, s->irq_routes); -} - void kvm_irqchip_release_virq(KVMState *s, int virq) { struct kvm_irq_routing_entry *e; @@ -968,6 +973,8 @@ void kvm_irqchip_release_virq(KVMState *s, int virq) } } clear_gsi(s, virq); + + kvm_irqchip_commit_routes(s); } static unsigned int kvm_hash_msi(uint32_t data) @@ -1049,7 +1056,7 @@ int kvm_irqchip_send_msi(KVMState *s, MSIMessage msg) route = kvm_lookup_msi_route(s, msg); if (!route) { - int virq, ret; + int virq; virq = kvm_irqchip_get_virq(s); if (virq < 0) { @@ -1068,11 +1075,6 @@ int kvm_irqchip_send_msi(KVMState *s, MSIMessage msg) QTAILQ_INSERT_TAIL(&s->msi_hashtab[kvm_hash_msi(msg.data)], route, entry); - - ret = kvm_irqchip_commit_routes(s); - if (ret < 0) { - return ret; - } } assert(route->kroute.type == KVM_IRQ_ROUTING_MSI); diff --git a/kvm.h b/kvm.h index 1779e73d21..f0d0c539c9 100644 --- a/kvm.h +++ b/kvm.h @@ -135,7 +135,6 @@ int kvm_irqchip_set_irq(KVMState *s, int irq, int level); int kvm_irqchip_send_msi(KVMState *s, MSIMessage msg); void kvm_irqchip_add_irq_route(KVMState *s, int gsi, int irqchip, int pin); -int kvm_irqchip_commit_routes(KVMState *s); void kvm_put_apic_state(DeviceState *d, struct kvm_lapic_state *kapic); void kvm_get_apic_state(DeviceState *d, struct kvm_lapic_state *kapic); From 39853bbc497d4ca19c553a620afdc459e81d67ce Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Thu, 17 May 2012 10:32:36 -0300 Subject: [PATCH 17/20] kvm: Introduce kvm_irqchip_add/remove_irqfd Add services to associate an eventfd file descriptor as input with an IRQ line as output. Such a line can be an input pin of an in-kernel irqchip or a virtual line returned by kvm_irqchip_add_route. Signed-off-by: Jan Kiszka Signed-off-by: Avi Kivity --- kvm-all.c | 30 ++++++++++++++++++++++++++++++ kvm-stub.c | 10 ++++++++++ kvm.h | 3 +++ 3 files changed, 43 insertions(+) diff --git a/kvm-all.c b/kvm-all.c index e96f092842..489ee53ad2 100644 --- a/kvm-all.c +++ b/kvm-all.c @@ -1108,6 +1108,21 @@ int kvm_irqchip_add_msi_route(KVMState *s, MSIMessage msg) return virq; } +static int kvm_irqchip_assign_irqfd(KVMState *s, int fd, int virq, bool assign) +{ + struct kvm_irqfd irqfd = { + .fd = fd, + .gsi = virq, + .flags = assign ? 0 : KVM_IRQFD_FLAG_DEASSIGN, + }; + + if (!kvm_irqchip_in_kernel()) { + return -ENOSYS; + } + + return kvm_vm_ioctl(s, KVM_IRQFD, &irqfd); +} + #else /* !KVM_CAP_IRQ_ROUTING */ static void kvm_init_irq_routing(KVMState *s) @@ -1123,8 +1138,23 @@ int kvm_irqchip_add_msi_route(KVMState *s, MSIMessage msg) { abort(); } + +static int kvm_irqchip_assign_irqfd(KVMState *s, int fd, int virq, bool assign) +{ + abort(); +} #endif /* !KVM_CAP_IRQ_ROUTING */ +int kvm_irqchip_add_irqfd(KVMState *s, int fd, int virq) +{ + return kvm_irqchip_assign_irqfd(s, fd, virq, true); +} + +int kvm_irqchip_remove_irqfd(KVMState *s, int fd, int virq) +{ + return kvm_irqchip_assign_irqfd(s, fd, virq, false); +} + static int kvm_irqchip_create(KVMState *s) { QemuOptsList *list = qemu_find_opts("machine"); diff --git a/kvm-stub.c b/kvm-stub.c index ec351d9067..b4cf03fb7f 100644 --- a/kvm-stub.c +++ b/kvm-stub.c @@ -140,3 +140,13 @@ int kvm_irqchip_add_msi_route(KVMState *s, MSIMessage msg) void kvm_irqchip_release_virq(KVMState *s, int virq) { } + +int kvm_irqchip_add_irqfd(KVMState *s, int fd, int virq) +{ + return -ENOSYS; +} + +int kvm_irqchip_remove_irqfd(KVMState *s, int fd, int virq) +{ + return -ENOSYS; +} diff --git a/kvm.h b/kvm.h index f0d0c539c9..9c7b0ea6ae 100644 --- a/kvm.h +++ b/kvm.h @@ -215,4 +215,7 @@ int kvm_set_ioeventfd_pio_word(int fd, uint16_t adr, uint16_t val, bool assign); int kvm_irqchip_add_msi_route(KVMState *s, MSIMessage msg); void kvm_irqchip_release_virq(KVMState *s, int virq); + +int kvm_irqchip_add_irqfd(KVMState *s, int fd, int virq); +int kvm_irqchip_remove_irqfd(KVMState *s, int fd, int virq); #endif From bbf3b80401d55ac3339160cefa17f0015317f512 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Thu, 17 May 2012 10:32:37 -0300 Subject: [PATCH 18/20] kvm: Enable use of kvm_irqchip_in_kernel in hwlib code Provide a dummy kvm_kernel_irqchip so that kvm_irqchip_in_kernel can be used by code that is not under CONFIG_KVM protection. Signed-off-by: Jan Kiszka Signed-off-by: Avi Kivity --- kvm-stub.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kvm-stub.c b/kvm-stub.c index b4cf03fb7f..ec9a36454d 100644 --- a/kvm-stub.c +++ b/kvm-stub.c @@ -18,6 +18,7 @@ #include "kvm.h" KVMState *kvm_state; +bool kvm_kernel_irqchip; int kvm_init_vcpu(CPUArchState *env) { From cb697aaab9c1d8078721513a11cb1ce2729b9c92 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Thu, 17 May 2012 10:32:38 -0300 Subject: [PATCH 19/20] msix: Add msix_nr_vectors_allocated Analogously to msi_nr_vectors_allocated, add a service for MSI-X. Will be used by the virtio-pci layer. Signed-off-by: Jan Kiszka Signed-off-by: Avi Kivity --- hw/msix.c | 5 +++++ hw/msix.h | 2 ++ 2 files changed, 7 insertions(+) diff --git a/hw/msix.c b/hw/msix.c index 1622e16188..59c7a8388f 100644 --- a/hw/msix.c +++ b/hw/msix.c @@ -452,6 +452,11 @@ void msix_unuse_all_vectors(PCIDevice *dev) msix_free_irq_entries(dev); } +unsigned int msix_nr_vectors_allocated(const PCIDevice *dev) +{ + return dev->msix_entries_nr; +} + static int msix_set_notifier_for_vector(PCIDevice *dev, unsigned int vector) { MSIMessage msg; diff --git a/hw/msix.h b/hw/msix.h index f33f18b484..50aee8221a 100644 --- a/hw/msix.h +++ b/hw/msix.h @@ -13,6 +13,8 @@ void msix_write_config(PCIDevice *pci_dev, uint32_t address, int msix_uninit(PCIDevice *d, MemoryRegion *bar); +unsigned int msix_nr_vectors_allocated(const PCIDevice *dev); + void msix_save(PCIDevice *dev, QEMUFile *f); void msix_load(PCIDevice *dev, QEMUFile *f); From 7d37d351dffee60fc7048bbfd8573421f15eb724 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Thu, 17 May 2012 10:32:39 -0300 Subject: [PATCH 20/20] virtio/vhost: Add support for KVM in-kernel MSI injection Make use of the new vector notifier to track changes of the MSI-X configuration of virtio PCI devices. On enabling events, we establish the required virtual IRQ to MSI-X message route and link the signaling eventfd file descriptor to this vIRQ line. That way, vhost-generated interrupts can be directly delivered to an in-kernel MSI-X consumer like the x86 APIC. Signed-off-by: Jan Kiszka Signed-off-by: Avi Kivity --- hw/virtio-pci.c | 126 ++++++++++++++++++++++++++++++++++++++++++++++++ hw/virtio-pci.h | 6 +++ 2 files changed, 132 insertions(+) diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c index 4a4413d52c..01f5b92b1c 100644 --- a/hw/virtio-pci.c +++ b/hw/virtio-pci.c @@ -24,6 +24,7 @@ #include "virtio-scsi.h" #include "pci.h" #include "qemu-error.h" +#include "msi.h" #include "msix.h" #include "net.h" #include "loader.h" @@ -539,6 +540,107 @@ static void virtio_pci_guest_notifier_read(void *opaque) } } +static int kvm_virtio_pci_vq_vector_use(VirtIOPCIProxy *proxy, + unsigned int queue_no, + unsigned int vector, + MSIMessage msg) +{ + VirtQueue *vq = virtio_get_queue(proxy->vdev, queue_no); + VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector]; + int fd, ret; + + fd = event_notifier_get_fd(virtio_queue_get_guest_notifier(vq)); + + if (irqfd->users == 0) { + ret = kvm_irqchip_add_msi_route(kvm_state, msg); + if (ret < 0) { + return ret; + } + irqfd->virq = ret; + } + irqfd->users++; + + ret = kvm_irqchip_add_irqfd(kvm_state, fd, irqfd->virq); + if (ret < 0) { + if (--irqfd->users == 0) { + kvm_irqchip_release_virq(kvm_state, irqfd->virq); + } + return ret; + } + + qemu_set_fd_handler(fd, NULL, NULL, NULL); + + return 0; +} + +static void kvm_virtio_pci_vq_vector_release(VirtIOPCIProxy *proxy, + unsigned int queue_no, + unsigned int vector) +{ + VirtQueue *vq = virtio_get_queue(proxy->vdev, queue_no); + VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector]; + int fd, ret; + + fd = event_notifier_get_fd(virtio_queue_get_guest_notifier(vq)); + + ret = kvm_irqchip_remove_irqfd(kvm_state, fd, irqfd->virq); + assert(ret == 0); + + if (--irqfd->users == 0) { + kvm_irqchip_release_virq(kvm_state, irqfd->virq); + } + + qemu_set_fd_handler(fd, virtio_pci_guest_notifier_read, NULL, vq); +} + +static int kvm_virtio_pci_vector_use(PCIDevice *dev, unsigned vector, + MSIMessage msg) +{ + VirtIOPCIProxy *proxy = container_of(dev, VirtIOPCIProxy, pci_dev); + VirtIODevice *vdev = proxy->vdev; + int ret, queue_no; + + for (queue_no = 0; queue_no < VIRTIO_PCI_QUEUE_MAX; queue_no++) { + if (!virtio_queue_get_num(vdev, queue_no)) { + break; + } + if (virtio_queue_vector(vdev, queue_no) != vector) { + continue; + } + ret = kvm_virtio_pci_vq_vector_use(proxy, queue_no, vector, msg); + if (ret < 0) { + goto undo; + } + } + return 0; + +undo: + while (--queue_no >= 0) { + if (virtio_queue_vector(vdev, queue_no) != vector) { + continue; + } + kvm_virtio_pci_vq_vector_release(proxy, queue_no, vector); + } + return ret; +} + +static void kvm_virtio_pci_vector_release(PCIDevice *dev, unsigned vector) +{ + VirtIOPCIProxy *proxy = container_of(dev, VirtIOPCIProxy, pci_dev); + VirtIODevice *vdev = proxy->vdev; + int queue_no; + + for (queue_no = 0; queue_no < VIRTIO_PCI_QUEUE_MAX; queue_no++) { + if (!virtio_queue_get_num(vdev, queue_no)) { + break; + } + if (virtio_queue_vector(vdev, queue_no) != vector) { + continue; + } + kvm_virtio_pci_vq_vector_release(proxy, queue_no, vector); + } +} + static int virtio_pci_set_guest_notifier(void *opaque, int n, bool assign) { VirtIOPCIProxy *proxy = opaque; @@ -555,6 +657,9 @@ static int virtio_pci_set_guest_notifier(void *opaque, int n, bool assign) } else { qemu_set_fd_handler(event_notifier_get_fd(notifier), NULL, NULL, NULL); + /* Test and clear notifier before closing it, + * in case poll callback didn't have time to run. */ + virtio_pci_guest_notifier_read(vq); event_notifier_cleanup(notifier); } @@ -573,6 +678,13 @@ static int virtio_pci_set_guest_notifiers(void *opaque, bool assign) VirtIODevice *vdev = proxy->vdev; int r, n; + /* Must unset vector notifier while guest notifier is still assigned */ + if (kvm_irqchip_in_kernel() && !assign) { + msix_unset_vector_notifiers(&proxy->pci_dev); + g_free(proxy->vector_irqfd); + proxy->vector_irqfd = NULL; + } + for (n = 0; n < VIRTIO_PCI_QUEUE_MAX; n++) { if (!virtio_queue_get_num(vdev, n)) { break; @@ -584,10 +696,24 @@ static int virtio_pci_set_guest_notifiers(void *opaque, bool assign) } } + /* Must set vector notifier after guest notifier has been assigned */ + if (kvm_irqchip_in_kernel() && assign) { + proxy->vector_irqfd = + g_malloc0(sizeof(*proxy->vector_irqfd) * + msix_nr_vectors_allocated(&proxy->pci_dev)); + r = msix_set_vector_notifiers(&proxy->pci_dev, + kvm_virtio_pci_vector_use, + kvm_virtio_pci_vector_release); + if (r < 0) { + goto assign_error; + } + } + return 0; assign_error: /* We get here on assignment failure. Recover by undoing for VQs 0 .. n. */ + assert(assign); while (--n >= 0) { virtio_pci_set_guest_notifier(opaque, n, !assign); } diff --git a/hw/virtio-pci.h b/hw/virtio-pci.h index e5604282e5..8d28d4b789 100644 --- a/hw/virtio-pci.h +++ b/hw/virtio-pci.h @@ -24,6 +24,11 @@ #define VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT 1 #define VIRTIO_PCI_FLAG_USE_IOEVENTFD (1 << VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT) +typedef struct { + int virq; + unsigned int users; +} VirtIOIRQFD; + typedef struct { PCIDevice pci_dev; VirtIODevice *vdev; @@ -44,6 +49,7 @@ typedef struct { VirtIOSCSIConf scsi; bool ioeventfd_disabled; bool ioeventfd_started; + VirtIOIRQFD *vector_irqfd; } VirtIOPCIProxy; void virtio_init_pci(VirtIOPCIProxy *proxy, VirtIODevice *vdev);