From 28f52cc04d341045e810bd487a478fa9ec5f40be Mon Sep 17 00:00:00 2001 From: Vadim Rozenfeld Date: Sun, 18 Dec 2011 22:48:13 +0200 Subject: [PATCH 01/20] hyper-v: introduce Hyper-V support infrastructure. [Jan: fix build with CONFIG_USER_ONLY] Signed-off-by: Marcelo Tosatti --- Makefile.target | 2 ++ target-i386/cpuid.c | 14 ++++++++++ target-i386/hyperv.c | 64 ++++++++++++++++++++++++++++++++++++++++++++ target-i386/hyperv.h | 43 +++++++++++++++++++++++++++++ 4 files changed, 123 insertions(+) create mode 100644 target-i386/hyperv.c create mode 100644 target-i386/hyperv.h diff --git a/Makefile.target b/Makefile.target index 06d79b8028..798dd30fe9 100644 --- a/Makefile.target +++ b/Makefile.target @@ -199,6 +199,8 @@ obj-$(CONFIG_NO_KVM) += kvm-stub.o obj-y += memory.o savevm.o LIBS+=-lz +obj-i386-y +=hyperv.o + QEMU_CFLAGS += $(VNC_TLS_CFLAGS) QEMU_CFLAGS += $(VNC_SASL_CFLAGS) QEMU_CFLAGS += $(VNC_JPEG_CFLAGS) diff --git a/target-i386/cpuid.c b/target-i386/cpuid.c index 91a104ba0b..b9bfeaff70 100644 --- a/target-i386/cpuid.c +++ b/target-i386/cpuid.c @@ -27,6 +27,8 @@ #include "qemu-option.h" #include "qemu-config.h" +#include "hyperv.h" + /* feature flags taken from "Intel Processor Identification and the CPUID * Instruction" and AMD's "CPUID Specification". In cases of disagreement * between feature naming conventions, aliases may be added. @@ -716,6 +718,14 @@ static int cpu_x86_find_by_name(x86_def_t *x86_cpu_def, const char *cpu_model) goto error; } x86_cpu_def->tsc_khz = tsc_freq / 1000; + } else if (!strcmp(featurestr, "hv_spinlocks")) { + char *err; + numvalue = strtoul(val, &err, 0); + if (!*val || *err) { + fprintf(stderr, "bad numerical value %s\n", val); + goto error; + } + hyperv_set_spinlock_retries(numvalue); } else { fprintf(stderr, "unrecognized feature %s\n", featurestr); goto error; @@ -724,6 +734,10 @@ static int cpu_x86_find_by_name(x86_def_t *x86_cpu_def, const char *cpu_model) check_cpuid = 1; } else if (!strcmp(featurestr, "enforce")) { check_cpuid = enforce_cpuid = 1; + } else if (!strcmp(featurestr, "hv_relaxed")) { + hyperv_enable_relaxed_timing(true); + } else if (!strcmp(featurestr, "hv_vapic")) { + hyperv_enable_vapic_recommended(true); } else { fprintf(stderr, "feature string `%s' not in format (+feature|-feature|feature=xyz)\n", featurestr); goto error; diff --git a/target-i386/hyperv.c b/target-i386/hyperv.c new file mode 100644 index 0000000000..f284e99772 --- /dev/null +++ b/target-i386/hyperv.c @@ -0,0 +1,64 @@ +/* + * QEMU Hyper-V support + * + * Copyright Red Hat, Inc. 2011 + * + * Author: Vadim Rozenfeld + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#include "hyperv.h" + +static bool hyperv_vapic; +static bool hyperv_relaxed_timing; +static int hyperv_spinlock_attempts = HYPERV_SPINLOCK_NEVER_RETRY; + +void hyperv_enable_vapic_recommended(bool val) +{ + hyperv_vapic = val; +} + +void hyperv_enable_relaxed_timing(bool val) +{ + hyperv_relaxed_timing = val; +} + +void hyperv_set_spinlock_retries(int val) +{ + hyperv_spinlock_attempts = val; + if (hyperv_spinlock_attempts < 0xFFF) { + hyperv_spinlock_attempts = 0xFFF; + } +} + +bool hyperv_enabled(void) +{ + return hyperv_hypercall_available() || hyperv_relaxed_timing_enabled(); +} + +bool hyperv_hypercall_available(void) +{ + if (hyperv_vapic || + (hyperv_spinlock_attempts != HYPERV_SPINLOCK_NEVER_RETRY)) { + return true; + } + return false; +} + +bool hyperv_vapic_recommended(void) +{ + return hyperv_vapic; +} + +bool hyperv_relaxed_timing_enabled(void) +{ + return hyperv_relaxed_timing; +} + +int hyperv_get_spinlock_retries(void) +{ + return hyperv_spinlock_attempts; +} diff --git a/target-i386/hyperv.h b/target-i386/hyperv.h new file mode 100644 index 0000000000..15467bf397 --- /dev/null +++ b/target-i386/hyperv.h @@ -0,0 +1,43 @@ +/* + * QEMU Hyper-V support + * + * Copyright Red Hat, Inc. 2011 + * + * Author: Vadim Rozenfeld + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#ifndef QEMU_HW_HYPERV_H +#define QEMU_HW_HYPERV_H 1 + +#include "qemu-common.h" +#include + +#ifndef HYPERV_SPINLOCK_NEVER_RETRY +#define HYPERV_SPINLOCK_NEVER_RETRY 0xFFFFFFFF +#endif + +#ifndef KVM_CPUID_SIGNATURE_NEXT +#define KVM_CPUID_SIGNATURE_NEXT 0x40000100 +#endif + +#ifndef CONFIG_USER_ONLY +void hyperv_enable_vapic_recommended(bool val); +void hyperv_enable_relaxed_timing(bool val); +void hyperv_set_spinlock_retries(int val); +#else +static inline void hyperv_enable_vapic_recommended(bool val) { } +static inline void hyperv_enable_relaxed_timing(bool val) { } +static inline void hyperv_set_spinlock_retries(int val) { } +#endif + +bool hyperv_enabled(void); +bool hyperv_hypercall_available(void); +bool hyperv_vapic_recommended(void); +bool hyperv_relaxed_timing_enabled(void); +int hyperv_get_spinlock_retries(void); + +#endif /* QEMU_HW_HYPERV_H */ From eab70139a6772f1735444d4f1daadc7bcfa7dc47 Mon Sep 17 00:00:00 2001 From: Vadim Rozenfeld Date: Sun, 18 Dec 2011 22:48:14 +0200 Subject: [PATCH 02/20] hyper-v: initialize Hyper-V CPUID leaves. Signed-off-by: Marcelo Tosatti --- target-i386/kvm.c | 65 +++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 63 insertions(+), 2 deletions(-) diff --git a/target-i386/kvm.c b/target-i386/kvm.c index 04e65c5ea1..1f56492cd2 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -29,6 +29,7 @@ #include "hw/pc.h" #include "hw/apic.h" #include "ioport.h" +#include "hyperv.h" //#define DEBUG_KVM @@ -373,11 +374,16 @@ int kvm_arch_init_vcpu(CPUState *env) cpuid_i = 0; /* Paravirtualization CPUIDs */ - memcpy(signature, "KVMKVMKVM\0\0\0", 12); c = &cpuid_data.entries[cpuid_i++]; memset(c, 0, sizeof(*c)); c->function = KVM_CPUID_SIGNATURE; - c->eax = 0; + if (!hyperv_enabled()) { + memcpy(signature, "KVMKVMKVM\0\0\0", 12); + c->eax = 0; + } else { + memcpy(signature, "Microsoft Hv", 12); + c->eax = HYPERV_CPUID_MIN; + } c->ebx = signature[0]; c->ecx = signature[1]; c->edx = signature[2]; @@ -388,6 +394,54 @@ int kvm_arch_init_vcpu(CPUState *env) c->eax = env->cpuid_kvm_features & kvm_arch_get_supported_cpuid(s, KVM_CPUID_FEATURES, 0, R_EAX); + if (hyperv_enabled()) { + memcpy(signature, "Hv#1\0\0\0\0\0\0\0\0", 12); + c->eax = signature[0]; + + c = &cpuid_data.entries[cpuid_i++]; + memset(c, 0, sizeof(*c)); + c->function = HYPERV_CPUID_VERSION; + c->eax = 0x00001bbc; + c->ebx = 0x00060001; + + c = &cpuid_data.entries[cpuid_i++]; + memset(c, 0, sizeof(*c)); + c->function = HYPERV_CPUID_FEATURES; + if (hyperv_relaxed_timing_enabled()) { + c->eax |= HV_X64_MSR_HYPERCALL_AVAILABLE; + } + if (hyperv_vapic_recommended()) { + c->eax |= HV_X64_MSR_HYPERCALL_AVAILABLE; + c->eax |= HV_X64_MSR_APIC_ACCESS_AVAILABLE; + } + + c = &cpuid_data.entries[cpuid_i++]; + memset(c, 0, sizeof(*c)); + c->function = HYPERV_CPUID_ENLIGHTMENT_INFO; + if (hyperv_relaxed_timing_enabled()) { + c->eax |= HV_X64_RELAXED_TIMING_RECOMMENDED; + } + if (hyperv_vapic_recommended()) { + c->eax |= HV_X64_APIC_ACCESS_RECOMMENDED; + } + c->ebx = hyperv_get_spinlock_retries(); + + c = &cpuid_data.entries[cpuid_i++]; + memset(c, 0, sizeof(*c)); + c->function = HYPERV_CPUID_IMPLEMENT_LIMITS; + c->eax = 0x40; + c->ebx = 0x40; + + c = &cpuid_data.entries[cpuid_i++]; + memset(c, 0, sizeof(*c)); + c->function = KVM_CPUID_SIGNATURE_NEXT; + memcpy(signature, "KVMKVMKVM\0\0\0", 12); + c->eax = 0; + c->ebx = signature[0]; + c->ecx = signature[1]; + c->edx = signature[2]; + } + has_msr_async_pf_en = c->eax & (1 << KVM_FEATURE_ASYNC_PF); cpu_x86_cpuid(env, 0, 0, &limit, &unused, &unused, &unused); @@ -933,6 +987,13 @@ static int kvm_put_msrs(CPUState *env, int level) kvm_msr_entry_set(&msrs[n++], MSR_KVM_ASYNC_PF_EN, env->async_pf_en_msr); } + if (hyperv_hypercall_available()) { + kvm_msr_entry_set(&msrs[n++], HV_X64_MSR_GUEST_OS_ID, 0); + kvm_msr_entry_set(&msrs[n++], HV_X64_MSR_HYPERCALL, 0); + } + if (hyperv_vapic_recommended()) { + kvm_msr_entry_set(&msrs[n++], HV_X64_MSR_APIC_ASSIST_PAGE, 0); + } } if (env->mcg_cap) { int i; From 60ba3cc231d6bc7b802ad4fe6b6fc159ecb112e2 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Sat, 15 Oct 2011 14:33:17 +0200 Subject: [PATCH 03/20] msi: Generalize msix_supported to msi_supported Rename msix_supported to msi_supported and control MSI and MSI-X activation this way. That was likely to original intention for this flag, but MSI support came after MSI-X. Signed-off-by: Jan Kiszka --- hw/msi.c | 8 ++++++++ hw/msi.h | 2 ++ hw/msix.c | 11 +++++------ hw/msix.h | 2 -- hw/pc.c | 4 ++-- 5 files changed, 17 insertions(+), 10 deletions(-) diff --git a/hw/msi.c b/hw/msi.c index f214fcf579..5d6ceb6df0 100644 --- a/hw/msi.c +++ b/hw/msi.c @@ -36,6 +36,9 @@ #define PCI_MSI_VECTORS_MAX 32 +/* Flag for interrupt controller to declare MSI/MSI-X support */ +bool msi_supported; + /* If we get rid of cap allocator, we won't need this. */ static inline uint8_t msi_cap_sizeof(uint16_t flags) { @@ -116,6 +119,11 @@ int msi_init(struct PCIDevice *dev, uint8_t offset, uint16_t flags; uint8_t cap_size; int config_offset; + + if (!msi_supported) { + return -ENOTSUP; + } + MSI_DEV_PRINTF(dev, "init offset: 0x%"PRIx8" vector: %"PRId8 " 64bit %d mask %d\n", diff --git a/hw/msi.h b/hw/msi.h index 5766018d79..3040bb0b43 100644 --- a/hw/msi.h +++ b/hw/msi.h @@ -24,6 +24,8 @@ #include "qemu-common.h" #include "pci.h" +extern bool msi_supported; + bool msi_enabled(const PCIDevice *dev); int msi_init(struct PCIDevice *dev, uint8_t offset, unsigned int nr_vectors, bool msi64bit, bool msi_per_vector_mask); diff --git a/hw/msix.c b/hw/msix.c index f47d26bb49..3835eaaf28 100644 --- a/hw/msix.c +++ b/hw/msix.c @@ -15,6 +15,7 @@ */ #include "hw.h" +#include "msi.h" #include "msix.h" #include "pci.h" #include "range.h" @@ -35,9 +36,6 @@ #define MSIX_MAX_ENTRIES 32 -/* Flag for interrupt controller to declare MSI-X support */ -int msix_supported; - /* Add MSI-X capability to the config space for the device. */ /* Given a bar and its size, add MSI-X table on top of it * and fill MSI-X capability in the config space. @@ -238,10 +236,11 @@ int msix_init(struct PCIDevice *dev, unsigned short nentries, unsigned bar_nr, unsigned bar_size) { int ret; - /* Nothing to do if MSI is not supported by interrupt controller */ - if (!msix_supported) - return -ENOTSUP; + /* Nothing to do if MSI is not supported by interrupt controller */ + if (!msi_supported) { + return -ENOTSUP; + } if (nentries > MSIX_MAX_ENTRIES) return -EINVAL; diff --git a/hw/msix.h b/hw/msix.h index 7e04336618..5aba22b858 100644 --- a/hw/msix.h +++ b/hw/msix.h @@ -29,6 +29,4 @@ void msix_notify(PCIDevice *dev, unsigned vector); void msix_reset(PCIDevice *dev); -extern int msix_supported; - #endif diff --git a/hw/pc.c b/hw/pc.c index 85304cf115..04304e0ca3 100644 --- a/hw/pc.c +++ b/hw/pc.c @@ -36,7 +36,7 @@ #include "elf.h" #include "multiboot.h" #include "mc146818rtc.h" -#include "msix.h" +#include "msi.h" #include "sysbus.h" #include "sysemu.h" #include "blockdev.h" @@ -896,7 +896,7 @@ static DeviceState *apic_init(void *env, uint8_t apic_id) apic_mapped = 1; } - msix_supported = 1; + msi_supported = true; return dev; } From 3b9a6ee50e88c47f64486b6b143b1363fa5c327c Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Sat, 15 Oct 2011 10:01:27 +0200 Subject: [PATCH 04/20] kvm: Move kvmclock into hw/kvm folder More KVM-specific devices will come, so let's start with moving the kvmclock into a dedicated folder. Signed-off-by: Jan Kiszka --- Makefile.target | 4 ++-- configure | 1 + hw/{kvmclock.c => kvm/clock.c} | 4 ++-- hw/{kvmclock.h => kvm/clock.h} | 0 hw/pc_piix.c | 2 +- 5 files changed, 6 insertions(+), 5 deletions(-) rename hw/{kvmclock.c => kvm/clock.c} (98%) rename hw/{kvmclock.h => kvm/clock.h} (100%) diff --git a/Makefile.target b/Makefile.target index 798dd30fe9..0451b637d6 100644 --- a/Makefile.target +++ b/Makefile.target @@ -233,7 +233,7 @@ obj-i386-y += vmport.o obj-i386-y += pci-hotplug.o smbios.o wdt_ib700.o obj-i386-y += debugcon.o multiboot.o obj-i386-y += pc_piix.o -obj-i386-$(CONFIG_KVM) += kvmclock.o +obj-i386-$(CONFIG_KVM) += kvm/clock.o obj-i386-$(CONFIG_SPICE) += qxl.o qxl-logger.o qxl-render.o # shared objects @@ -424,7 +424,7 @@ qmp-commands-old.h: $(SRC_PATH)/qmp-commands.hx clean: rm -f *.o *.a *~ $(PROGS) nwfpe/*.o fpu/*.o - rm -f *.d */*.d tcg/*.o ide/*.o 9pfs/*.o + rm -f *.d */*.d tcg/*.o ide/*.o 9pfs/*.o kvm/*.o rm -f hmp-commands.h qmp-commands-old.h gdbstub-xml.c ifdef CONFIG_TRACE_SYSTEMTAP rm -f *.stp diff --git a/configure b/configure index 467e87bf09..9d5175bb0f 100755 --- a/configure +++ b/configure @@ -3363,6 +3363,7 @@ mkdir -p $target_dir/fpu mkdir -p $target_dir/tcg mkdir -p $target_dir/ide mkdir -p $target_dir/9pfs +mkdir -p $target_dir/kvm if test "$target" = "arm-linux-user" -o "$target" = "armeb-linux-user" -o "$target" = "arm-bsd-user" -o "$target" = "armeb-bsd-user" ; then mkdir -p $target_dir/nwfpe fi diff --git a/hw/kvmclock.c b/hw/kvm/clock.c similarity index 98% rename from hw/kvmclock.c rename to hw/kvm/clock.c index 3b9fb20495..bb28c088db 100644 --- a/hw/kvmclock.c +++ b/hw/kvm/clock.c @@ -15,9 +15,9 @@ #include "qemu-common.h" #include "sysemu.h" -#include "sysbus.h" #include "kvm.h" -#include "kvmclock.h" +#include "hw/sysbus.h" +#include "hw/kvm/clock.h" #include #include diff --git a/hw/kvmclock.h b/hw/kvm/clock.h similarity index 100% rename from hw/kvmclock.h rename to hw/kvm/clock.h diff --git a/hw/pc_piix.c b/hw/pc_piix.c index 3aea3cc9de..cde810db27 100644 --- a/hw/pc_piix.c +++ b/hw/pc_piix.c @@ -34,7 +34,7 @@ #include "boards.h" #include "ide.h" #include "kvm.h" -#include "kvmclock.h" +#include "kvm/clock.h" #include "sysemu.h" #include "sysbus.h" #include "arch_init.h" From ab388a98148e5b44ba0b6bc0269fdb983b4a6838 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Sun, 16 Oct 2011 11:59:30 +0200 Subject: [PATCH 05/20] apic: Stop timer on reset All LVTs are masked on reset, so the timer becomes ineffective. Letting it tick nevertheless is harmless, but will at least create a spurious trace event. Signed-off-by: Jan Kiszka --- hw/apic.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/hw/apic.c b/hw/apic.c index 9d0f460b58..4b97b17dbe 100644 --- a/hw/apic.c +++ b/hw/apic.c @@ -528,6 +528,8 @@ void apic_init_reset(DeviceState *d) s->initial_count_load_time = 0; s->next_time = 0; s->wait_for_sipi = 1; + + qemu_del_timer(s->timer); } static void apic_startup(APICState *s, int vector_num) From 02c091953cd8c24db46649ad2862b9648c50f865 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Tue, 18 Oct 2011 00:00:06 +0800 Subject: [PATCH 06/20] apic: Inject external NMI events via LINT1 On real hardware, NMI button events are injected via the LINT1 line of the APICs. E.g. kdump expect this wiring and gets upset if the per-APIC LINT1 mask is not respected, i.e. if NMIs are injected to VCPUs that should not receive them. Change the APIC emulation code to reflect this. Based on qemu-kvm patch by Lai Jiangshan. CC: Lai Jiangshan Reported-by: Kenji Kaneshige Signed-off-by: Jan Kiszka --- cpus.c | 6 +++++- hw/apic.c | 7 +++++++ hw/apic.h | 1 + 3 files changed, 13 insertions(+), 1 deletion(-) diff --git a/cpus.c b/cpus.c index 2dae549225..f45a438b29 100644 --- a/cpus.c +++ b/cpus.c @@ -1217,7 +1217,11 @@ void qmp_inject_nmi(Error **errp) CPUState *env; for (env = first_cpu; env != NULL; env = env->next_cpu) { - cpu_interrupt(env, CPU_INTERRUPT_NMI); + if (!env->apic_state) { + cpu_interrupt(env, CPU_INTERRUPT_NMI); + } else { + apic_deliver_nmi(env->apic_state); + } } #else error_set(errp, QERR_UNSUPPORTED); diff --git a/hw/apic.c b/hw/apic.c index 4b97b17dbe..b9d733c31a 100644 --- a/hw/apic.c +++ b/hw/apic.c @@ -205,6 +205,13 @@ void apic_deliver_pic_intr(DeviceState *d, int level) } } +void apic_deliver_nmi(DeviceState *d) +{ + APICState *s = DO_UPCAST(APICState, busdev.qdev, d); + + apic_local_deliver(s, APIC_LVT_LINT1); +} + #define foreach_apic(apic, deliver_bitmask, code) \ {\ int __i, __j, __mask;\ diff --git a/hw/apic.h b/hw/apic.h index a5c910fe0a..a62d83ba9f 100644 --- a/hw/apic.h +++ b/hw/apic.h @@ -8,6 +8,7 @@ void apic_deliver_irq(uint8_t dest, uint8_t dest_mode, uint8_t delivery_mode, uint8_t vector_num, uint8_t trigger_mode); int apic_accept_pic_intr(DeviceState *s); void apic_deliver_pic_intr(DeviceState *s, int level); +void apic_deliver_nmi(DeviceState *d); int apic_get_interrupt(DeviceState *s); void apic_reset_irq_delivered(void); int apic_get_irq_delivered(void); From 343270ea8777fa95ce2c287fc00c2eaa53255265 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Tue, 13 Dec 2011 15:39:04 +0100 Subject: [PATCH 07/20] apic: Introduce apic_report_irq_delivered The in-kernel i8259 and IOAPIC backends for KVM will need this, so encapsulate the shared bits. Signed-off-by: Jan Kiszka --- hw/apic.c | 11 ++++++++--- hw/apic.h | 1 + trace-events | 2 +- 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/hw/apic.c b/hw/apic.c index b9d733c31a..bec493bf63 100644 --- a/hw/apic.c +++ b/hw/apic.c @@ -413,6 +413,13 @@ static void apic_update_irq(APICState *s) } } +void apic_report_irq_delivered(int delivered) +{ + apic_irq_delivered += delivered; + + trace_apic_report_irq_delivered(apic_irq_delivered); +} + void apic_reset_irq_delivered(void) { trace_apic_reset_irq_delivered(apic_irq_delivered); @@ -429,9 +436,7 @@ int apic_get_irq_delivered(void) static void apic_set_irq(APICState *s, int vector_num, int trigger_mode) { - apic_irq_delivered += !get_bit(s->irr, vector_num); - - trace_apic_set_irq(apic_irq_delivered); + apic_report_irq_delivered(!get_bit(s->irr, vector_num)); set_bit(s->irr, vector_num); if (trigger_mode) diff --git a/hw/apic.h b/hw/apic.h index a62d83ba9f..8173d8a790 100644 --- a/hw/apic.h +++ b/hw/apic.h @@ -10,6 +10,7 @@ int apic_accept_pic_intr(DeviceState *s); void apic_deliver_pic_intr(DeviceState *s, int level); void apic_deliver_nmi(DeviceState *d); int apic_get_interrupt(DeviceState *s); +void apic_report_irq_delivered(int delivered); void apic_reset_irq_delivered(void); int apic_get_irq_delivered(void); void cpu_set_apic_base(DeviceState *s, uint64_t val); diff --git a/trace-events b/trace-events index c18435bbe1..5a260d6a09 100644 --- a/trace-events +++ b/trace-events @@ -95,9 +95,9 @@ cpu_get_apic_base(uint64_t val) "%016"PRIx64 apic_mem_readl(uint64_t addr, uint32_t val) "%"PRIx64" = %08x" apic_mem_writel(uint64_t addr, uint32_t val) "%"PRIx64" = %08x" # coalescing +apic_report_irq_delivered(int apic_irq_delivered) "coalescing %d" apic_reset_irq_delivered(int apic_irq_delivered) "old coalescing %d" apic_get_irq_delivered(int apic_irq_delivered) "returning coalescing %d" -apic_set_irq(int apic_irq_delivered) "coalescing %d" # hw/cs4231.c cs4231_mem_readl_dreg(uint32_t reg, uint32_t ret) "read dreg %d: 0x%02x" From dae01685280cef9b70ade9167340b5373eada9e8 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Sun, 16 Oct 2011 11:16:36 +0200 Subject: [PATCH 08/20] apic: Factor out base class for KVM reuse The KVM in-kernel APIC model will reuse parts of the user space model while providing the same frontend view to guest and most management interfaces. Factor out an APIC base class to encapsulate those parts that will be shared by user space and KVM model. This class offers callback hooks for init, base/tpr setting, and the external NMI delivery that will be set via APICCommonInfo structure and implemented specifically in the subclasses. Signed-off-by: Jan Kiszka --- Makefile.target | 2 +- hw/apic.c | 338 ++++++--------------------------------------- hw/apic.h | 1 - hw/apic_common.c | 252 +++++++++++++++++++++++++++++++++ hw/apic_internal.h | 112 +++++++++++++++ 5 files changed, 406 insertions(+), 299 deletions(-) create mode 100644 hw/apic_common.c create mode 100644 hw/apic_internal.h diff --git a/Makefile.target b/Makefile.target index 0451b637d6..4446273cd9 100644 --- a/Makefile.target +++ b/Makefile.target @@ -228,7 +228,7 @@ obj-y += device-hotplug.o # Hardware support obj-i386-y += vga.o obj-i386-y += mc146818rtc.o pc.o -obj-i386-y += cirrus_vga.o sga.o apic.o ioapic.o piix_pci.o +obj-i386-y += cirrus_vga.o sga.o apic_common.o apic.o ioapic.o piix_pci.o obj-i386-y += vmport.o obj-i386-y += pci-hotplug.o smbios.o wdt_ib700.o obj-i386-y += debugcon.o multiboot.o diff --git a/hw/apic.c b/hw/apic.c index bec493bf63..387a46940a 100644 --- a/hw/apic.c +++ b/hw/apic.c @@ -16,53 +16,13 @@ * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, see */ -#include "hw.h" +#include "apic_internal.h" #include "apic.h" #include "ioapic.h" -#include "qemu-timer.h" #include "host-utils.h" -#include "sysbus.h" #include "trace.h" #include "pc.h" -/* APIC Local Vector Table */ -#define APIC_LVT_TIMER 0 -#define APIC_LVT_THERMAL 1 -#define APIC_LVT_PERFORM 2 -#define APIC_LVT_LINT0 3 -#define APIC_LVT_LINT1 4 -#define APIC_LVT_ERROR 5 -#define APIC_LVT_NB 6 - -/* APIC delivery modes */ -#define APIC_DM_FIXED 0 -#define APIC_DM_LOWPRI 1 -#define APIC_DM_SMI 2 -#define APIC_DM_NMI 4 -#define APIC_DM_INIT 5 -#define APIC_DM_SIPI 6 -#define APIC_DM_EXTINT 7 - -/* APIC destination mode */ -#define APIC_DESTMODE_FLAT 0xf -#define APIC_DESTMODE_CLUSTER 1 - -#define APIC_TRIGGER_EDGE 0 -#define APIC_TRIGGER_LEVEL 1 - -#define APIC_LVT_TIMER_PERIODIC (1<<17) -#define APIC_LVT_MASKED (1<<16) -#define APIC_LVT_LEVEL_TRIGGER (1<<15) -#define APIC_LVT_REMOTE_IRR (1<<14) -#define APIC_INPUT_POLARITY (1<<13) -#define APIC_SEND_PENDING (1<<12) - -#define ESR_ILLEGAL_ADDRESS (1 << 7) - -#define APIC_SV_DIRECTED_IO (1<<12) -#define APIC_SV_ENABLE (1<<8) - -#define MAX_APICS 255 #define MAX_APIC_WORDS 8 /* Intel APIC constants: from include/asm/msidef.h */ @@ -75,43 +35,10 @@ #define MSI_ADDR_DEST_ID_SHIFT 12 #define MSI_ADDR_DEST_ID_MASK 0x00ffff0 -#define MSI_ADDR_SIZE 0x100000 +static APICCommonState *local_apics[MAX_APICS + 1]; -typedef struct APICState APICState; - -struct APICState { - SysBusDevice busdev; - MemoryRegion io_memory; - void *cpu_env; - uint32_t apicbase; - uint8_t id; - uint8_t arb_id; - uint8_t tpr; - uint32_t spurious_vec; - uint8_t log_dest; - uint8_t dest_mode; - uint32_t isr[8]; /* in service register */ - uint32_t tmr[8]; /* trigger mode register */ - uint32_t irr[8]; /* interrupt request register */ - uint32_t lvt[APIC_LVT_NB]; - uint32_t esr; /* error register */ - uint32_t icr[2]; - - uint32_t divide_conf; - int count_shift; - uint32_t initial_count; - int64_t initial_count_load_time, next_time; - uint32_t idx; - QEMUTimer *timer; - int sipi_vector; - int wait_for_sipi; -}; - -static APICState *local_apics[MAX_APICS + 1]; -static int apic_irq_delivered; - -static void apic_set_irq(APICState *s, int vector_num, int trigger_mode); -static void apic_update_irq(APICState *s); +static void apic_set_irq(APICCommonState *s, int vector_num, int trigger_mode); +static void apic_update_irq(APICCommonState *s); static void apic_get_delivery_bitmask(uint32_t *deliver_bitmask, uint8_t dest, uint8_t dest_mode); @@ -151,7 +78,7 @@ static inline int get_bit(uint32_t *tab, int index) return !!(tab[i] & mask); } -static void apic_local_deliver(APICState *s, int vector) +static void apic_local_deliver(APICCommonState *s, int vector) { uint32_t lvt = s->lvt[vector]; int trigger_mode; @@ -185,7 +112,7 @@ static void apic_local_deliver(APICState *s, int vector) void apic_deliver_pic_intr(DeviceState *d, int level) { - APICState *s = DO_UPCAST(APICState, busdev.qdev, d); + APICCommonState *s = DO_UPCAST(APICCommonState, busdev.qdev, d); if (level) { apic_local_deliver(s, APIC_LVT_LINT0); @@ -205,10 +132,8 @@ void apic_deliver_pic_intr(DeviceState *d, int level) } } -void apic_deliver_nmi(DeviceState *d) +static void apic_external_nmi(APICCommonState *s) { - APICState *s = DO_UPCAST(APICState, busdev.qdev, d); - apic_local_deliver(s, APIC_LVT_LINT1); } @@ -234,7 +159,7 @@ static void apic_bus_deliver(const uint32_t *deliver_bitmask, uint8_t delivery_mode, uint8_t vector_num, uint8_t trigger_mode) { - APICState *apic_iter; + APICCommonState *apic_iter; switch (delivery_mode) { case APIC_DM_LOWPRI: @@ -300,14 +225,8 @@ void apic_deliver_irq(uint8_t dest, uint8_t dest_mode, uint8_t delivery_mode, apic_bus_deliver(deliver_bitmask, delivery_mode, vector_num, trigger_mode); } -void cpu_set_apic_base(DeviceState *d, uint64_t val) +static void apic_set_base(APICCommonState *s, uint64_t val) { - APICState *s = DO_UPCAST(APICState, busdev.qdev, d); - - trace_cpu_set_apic_base(val); - - if (!s) - return; s->apicbase = (val & 0xfffff000) | (s->apicbase & (MSR_IA32_APICBASE_BSP | MSR_IA32_APICBASE_ENABLE)); /* if disabled, cannot be enabled again */ @@ -318,32 +237,12 @@ void cpu_set_apic_base(DeviceState *d, uint64_t val) } } -uint64_t cpu_get_apic_base(DeviceState *d) +static void apic_set_tpr(APICCommonState *s, uint8_t val) { - APICState *s = DO_UPCAST(APICState, busdev.qdev, d); - - trace_cpu_get_apic_base(s ? (uint64_t)s->apicbase: 0); - - return s ? s->apicbase : 0; -} - -void cpu_set_apic_tpr(DeviceState *d, uint8_t val) -{ - APICState *s = DO_UPCAST(APICState, busdev.qdev, d); - - if (!s) - return; s->tpr = (val & 0x0f) << 4; apic_update_irq(s); } -uint8_t cpu_get_apic_tpr(DeviceState *d) -{ - APICState *s = DO_UPCAST(APICState, busdev.qdev, d); - - return s ? s->tpr >> 4 : 0; -} - /* return -1 if no bit is set */ static int get_highest_priority_int(uint32_t *tab) { @@ -356,7 +255,7 @@ static int get_highest_priority_int(uint32_t *tab) return -1; } -static int apic_get_ppr(APICState *s) +static int apic_get_ppr(APICCommonState *s) { int tpr, isrv, ppr; @@ -372,7 +271,7 @@ static int apic_get_ppr(APICState *s) return ppr; } -static int apic_get_arb_pri(APICState *s) +static int apic_get_arb_pri(APICCommonState *s) { /* XXX: arbitration */ return 0; @@ -384,7 +283,7 @@ static int apic_get_arb_pri(APICState *s) * 0 - no interrupt, * >0 - interrupt number */ -static int apic_irq_pending(APICState *s) +static int apic_irq_pending(APICCommonState *s) { int irrv, ppr; irrv = get_highest_priority_int(s->irr); @@ -400,7 +299,7 @@ static int apic_irq_pending(APICState *s) } /* signal the CPU if an irq is pending */ -static void apic_update_irq(APICState *s) +static void apic_update_irq(APICCommonState *s) { if (!(s->spurious_vec & APIC_SV_ENABLE)) { return; @@ -413,28 +312,7 @@ static void apic_update_irq(APICState *s) } } -void apic_report_irq_delivered(int delivered) -{ - apic_irq_delivered += delivered; - - trace_apic_report_irq_delivered(apic_irq_delivered); -} - -void apic_reset_irq_delivered(void) -{ - trace_apic_reset_irq_delivered(apic_irq_delivered); - - apic_irq_delivered = 0; -} - -int apic_get_irq_delivered(void) -{ - trace_apic_get_irq_delivered(apic_irq_delivered); - - return apic_irq_delivered; -} - -static void apic_set_irq(APICState *s, int vector_num, int trigger_mode) +static void apic_set_irq(APICCommonState *s, int vector_num, int trigger_mode) { apic_report_irq_delivered(!get_bit(s->irr, vector_num)); @@ -446,7 +324,7 @@ static void apic_set_irq(APICState *s, int vector_num, int trigger_mode) apic_update_irq(s); } -static void apic_eoi(APICState *s) +static void apic_eoi(APICCommonState *s) { int isrv; isrv = get_highest_priority_int(s->isr); @@ -461,7 +339,7 @@ static void apic_eoi(APICState *s) static int apic_find_dest(uint8_t dest) { - APICState *apic = local_apics[dest]; + APICCommonState *apic = local_apics[dest]; int i; if (apic && apic->id == dest) @@ -481,7 +359,7 @@ static int apic_find_dest(uint8_t dest) static void apic_get_delivery_bitmask(uint32_t *deliver_bitmask, uint8_t dest, uint8_t dest_mode) { - APICState *apic_iter; + APICCommonState *apic_iter; int i; if (dest_mode == 0) { @@ -515,36 +393,7 @@ static void apic_get_delivery_bitmask(uint32_t *deliver_bitmask, } } -void apic_init_reset(DeviceState *d) -{ - APICState *s = DO_UPCAST(APICState, busdev.qdev, d); - int i; - - if (!s) - return; - - s->tpr = 0; - s->spurious_vec = 0xff; - s->log_dest = 0; - s->dest_mode = 0xf; - memset(s->isr, 0, sizeof(s->isr)); - memset(s->tmr, 0, sizeof(s->tmr)); - memset(s->irr, 0, sizeof(s->irr)); - for(i = 0; i < APIC_LVT_NB; i++) - s->lvt[i] = 1 << 16; /* mask LVT */ - s->esr = 0; - memset(s->icr, 0, sizeof(s->icr)); - s->divide_conf = 0; - s->count_shift = 0; - s->initial_count = 0; - s->initial_count_load_time = 0; - s->next_time = 0; - s->wait_for_sipi = 1; - - qemu_del_timer(s->timer); -} - -static void apic_startup(APICState *s, int vector_num) +static void apic_startup(APICCommonState *s, int vector_num) { s->sipi_vector = vector_num; cpu_interrupt(s->cpu_env, CPU_INTERRUPT_SIPI); @@ -552,7 +401,7 @@ static void apic_startup(APICState *s, int vector_num) void apic_sipi(DeviceState *d) { - APICState *s = DO_UPCAST(APICState, busdev.qdev, d); + APICCommonState *s = DO_UPCAST(APICCommonState, busdev.qdev, d); cpu_reset_interrupt(s->cpu_env, CPU_INTERRUPT_SIPI); @@ -566,10 +415,10 @@ static void apic_deliver(DeviceState *d, uint8_t dest, uint8_t dest_mode, uint8_t delivery_mode, uint8_t vector_num, uint8_t trigger_mode) { - APICState *s = DO_UPCAST(APICState, busdev.qdev, d); + APICCommonState *s = DO_UPCAST(APICCommonState, busdev.qdev, d); uint32_t deliver_bitmask[MAX_APIC_WORDS]; int dest_shorthand = (s->icr[0] >> 18) & 3; - APICState *apic_iter; + APICCommonState *apic_iter; switch (dest_shorthand) { case 0: @@ -612,7 +461,7 @@ static void apic_deliver(DeviceState *d, uint8_t dest, uint8_t dest_mode, int apic_get_interrupt(DeviceState *d) { - APICState *s = DO_UPCAST(APICState, busdev.qdev, d); + APICCommonState *s = DO_UPCAST(APICCommonState, busdev.qdev, d); int intno; /* if the APIC is installed or enabled, we let the 8259 handle the @@ -637,7 +486,7 @@ int apic_get_interrupt(DeviceState *d) int apic_accept_pic_intr(DeviceState *d) { - APICState *s = DO_UPCAST(APICState, busdev.qdev, d); + APICCommonState *s = DO_UPCAST(APICCommonState, busdev.qdev, d); uint32_t lvt0; if (!s) @@ -652,7 +501,7 @@ int apic_accept_pic_intr(DeviceState *d) return 0; } -static uint32_t apic_get_current_count(APICState *s) +static uint32_t apic_get_current_count(APICCommonState *s) { int64_t d; uint32_t val; @@ -670,7 +519,7 @@ static uint32_t apic_get_current_count(APICState *s) return val; } -static void apic_timer_update(APICState *s, int64_t current_time) +static void apic_timer_update(APICCommonState *s, int64_t current_time) { int64_t next_time, d; @@ -697,7 +546,7 @@ static void apic_timer_update(APICState *s, int64_t current_time) static void apic_timer(void *opaque) { - APICState *s = opaque; + APICCommonState *s = opaque; apic_local_deliver(s, APIC_LVT_TIMER); apic_timer_update(s, s->next_time); @@ -724,7 +573,7 @@ static void apic_mem_writew(void *opaque, target_phys_addr_t addr, uint32_t val) static uint32_t apic_mem_readl(void *opaque, target_phys_addr_t addr) { DeviceState *d; - APICState *s; + APICCommonState *s; uint32_t val; int index; @@ -732,7 +581,7 @@ static uint32_t apic_mem_readl(void *opaque, target_phys_addr_t addr) if (!d) { return 0; } - s = DO_UPCAST(APICState, busdev.qdev, d); + s = DO_UPCAST(APICCommonState, busdev.qdev, d); index = (addr >> 4) & 0xff; switch(index) { @@ -815,7 +664,7 @@ static void apic_send_msi(target_phys_addr_t addr, uint32_t data) static void apic_mem_writel(void *opaque, target_phys_addr_t addr, uint32_t val) { DeviceState *d; - APICState *s; + APICCommonState *s; int index = (addr >> 4) & 0xff; if (addr > 0xfff || !index) { /* MSI and MMIO APIC are at the same memory location, @@ -831,7 +680,7 @@ static void apic_mem_writel(void *opaque, target_phys_addr_t addr, uint32_t val) if (!d) { return; } - s = DO_UPCAST(APICState, busdev.qdev, d); + s = DO_UPCAST(APICCommonState, busdev.qdev, d); trace_apic_mem_writel(addr, val); @@ -904,96 +753,6 @@ static void apic_mem_writel(void *opaque, target_phys_addr_t addr, uint32_t val) } } -/* This function is only used for old state version 1 and 2 */ -static int apic_load_old(QEMUFile *f, void *opaque, int version_id) -{ - APICState *s = opaque; - int i; - - if (version_id > 2) - return -EINVAL; - - /* XXX: what if the base changes? (registered memory regions) */ - qemu_get_be32s(f, &s->apicbase); - qemu_get_8s(f, &s->id); - qemu_get_8s(f, &s->arb_id); - qemu_get_8s(f, &s->tpr); - qemu_get_be32s(f, &s->spurious_vec); - qemu_get_8s(f, &s->log_dest); - qemu_get_8s(f, &s->dest_mode); - for (i = 0; i < 8; i++) { - qemu_get_be32s(f, &s->isr[i]); - qemu_get_be32s(f, &s->tmr[i]); - qemu_get_be32s(f, &s->irr[i]); - } - for (i = 0; i < APIC_LVT_NB; i++) { - qemu_get_be32s(f, &s->lvt[i]); - } - qemu_get_be32s(f, &s->esr); - qemu_get_be32s(f, &s->icr[0]); - qemu_get_be32s(f, &s->icr[1]); - qemu_get_be32s(f, &s->divide_conf); - s->count_shift=qemu_get_be32(f); - qemu_get_be32s(f, &s->initial_count); - s->initial_count_load_time=qemu_get_be64(f); - s->next_time=qemu_get_be64(f); - - if (version_id >= 2) - qemu_get_timer(f, s->timer); - return 0; -} - -static const VMStateDescription vmstate_apic = { - .name = "apic", - .version_id = 3, - .minimum_version_id = 3, - .minimum_version_id_old = 1, - .load_state_old = apic_load_old, - .fields = (VMStateField []) { - VMSTATE_UINT32(apicbase, APICState), - VMSTATE_UINT8(id, APICState), - VMSTATE_UINT8(arb_id, APICState), - VMSTATE_UINT8(tpr, APICState), - VMSTATE_UINT32(spurious_vec, APICState), - VMSTATE_UINT8(log_dest, APICState), - VMSTATE_UINT8(dest_mode, APICState), - VMSTATE_UINT32_ARRAY(isr, APICState, 8), - VMSTATE_UINT32_ARRAY(tmr, APICState, 8), - VMSTATE_UINT32_ARRAY(irr, APICState, 8), - VMSTATE_UINT32_ARRAY(lvt, APICState, APIC_LVT_NB), - VMSTATE_UINT32(esr, APICState), - VMSTATE_UINT32_ARRAY(icr, APICState, 2), - VMSTATE_UINT32(divide_conf, APICState), - VMSTATE_INT32(count_shift, APICState), - VMSTATE_UINT32(initial_count, APICState), - VMSTATE_INT64(initial_count_load_time, APICState), - VMSTATE_INT64(next_time, APICState), - VMSTATE_TIMER(timer, APICState), - VMSTATE_END_OF_LIST() - } -}; - -static void apic_reset(DeviceState *d) -{ - APICState *s = DO_UPCAST(APICState, busdev.qdev, d); - int bsp; - - bsp = cpu_is_bsp(s->cpu_env); - s->apicbase = 0xfee00000 | - (bsp ? MSR_IA32_APICBASE_BSP : 0) | MSR_IA32_APICBASE_ENABLE; - - apic_init_reset(d); - - if (bsp) { - /* - * LINT0 delivery mode on CPU #0 is set to ExtInt at initialization - * time typically by BIOS, so PIC interrupt can be delivered to the - * processor when local APIC is enabled. - */ - s->lvt[APIC_LVT_LINT0] = 0x700; - } -} - static const MemoryRegionOps apic_io_ops = { .old_mmio = { .read = { apic_mem_readb, apic_mem_readw, apic_mem_readl, }, @@ -1002,41 +761,26 @@ static const MemoryRegionOps apic_io_ops = { .endianness = DEVICE_NATIVE_ENDIAN, }; -static int apic_init1(SysBusDevice *dev) +static void apic_init(APICCommonState *s) { - APICState *s = FROM_SYSBUS(APICState, dev); - static int last_apic_idx; - - if (last_apic_idx >= MAX_APICS) { - return -1; - } - memory_region_init_io(&s->io_memory, &apic_io_ops, s, "apic", - MSI_ADDR_SIZE); - sysbus_init_mmio(dev, &s->io_memory); + memory_region_init_io(&s->io_memory, &apic_io_ops, s, "apic-msi", + MSI_SPACE_SIZE); s->timer = qemu_new_timer_ns(vm_clock, apic_timer, s); - s->idx = last_apic_idx++; local_apics[s->idx] = s; - return 0; } -static SysBusDeviceInfo apic_info = { - .init = apic_init1, - .qdev.name = "apic", - .qdev.size = sizeof(APICState), - .qdev.vmsd = &vmstate_apic, - .qdev.reset = apic_reset, - .qdev.no_user = 1, - .qdev.props = (Property[]) { - DEFINE_PROP_UINT8("id", APICState, id, -1), - DEFINE_PROP_PTR("cpu_env", APICState, cpu_env), - DEFINE_PROP_END_OF_LIST(), - } +static APICCommonInfo apic_info = { + .busdev.qdev.name = "apic", + .init = apic_init, + .set_base = apic_set_base, + .set_tpr = apic_set_tpr, + .external_nmi = apic_external_nmi, }; static void apic_register_devices(void) { - sysbus_register_withprop(&apic_info); + apic_qdev_register(&apic_info); } device_init(apic_register_devices) diff --git a/hw/apic.h b/hw/apic.h index 8173d8a790..a62d83ba9f 100644 --- a/hw/apic.h +++ b/hw/apic.h @@ -10,7 +10,6 @@ int apic_accept_pic_intr(DeviceState *s); void apic_deliver_pic_intr(DeviceState *s, int level); void apic_deliver_nmi(DeviceState *d); int apic_get_interrupt(DeviceState *s); -void apic_report_irq_delivered(int delivered); void apic_reset_irq_delivered(void); int apic_get_irq_delivered(void); void cpu_set_apic_base(DeviceState *s, uint64_t val); diff --git a/hw/apic_common.c b/hw/apic_common.c new file mode 100644 index 0000000000..eef977ff1c --- /dev/null +++ b/hw/apic_common.c @@ -0,0 +1,252 @@ +/* + * APIC support - common bits of emulated and KVM kernel model + * + * Copyright (c) 2004-2005 Fabrice Bellard + * Copyright (c) 2011 Jan Kiszka, Siemens AG + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see + */ +#include "apic.h" +#include "apic_internal.h" +#include "trace.h" + +static int apic_irq_delivered; + +void cpu_set_apic_base(DeviceState *d, uint64_t val) +{ + APICCommonState *s = DO_UPCAST(APICCommonState, busdev.qdev, d); + APICCommonInfo *info; + + trace_cpu_set_apic_base(val); + + if (s) { + info = DO_UPCAST(APICCommonInfo, busdev.qdev, s->busdev.qdev.info); + info->set_base(s, val); + } +} + +uint64_t cpu_get_apic_base(DeviceState *d) +{ + APICCommonState *s = DO_UPCAST(APICCommonState, busdev.qdev, d); + + trace_cpu_get_apic_base(s ? (uint64_t)s->apicbase : 0); + + return s ? s->apicbase : 0; +} + +void cpu_set_apic_tpr(DeviceState *d, uint8_t val) +{ + APICCommonState *s = DO_UPCAST(APICCommonState, busdev.qdev, d); + APICCommonInfo *info; + + if (s) { + info = DO_UPCAST(APICCommonInfo, busdev.qdev, s->busdev.qdev.info); + info->set_tpr(s, val); + } +} + +uint8_t cpu_get_apic_tpr(DeviceState *d) +{ + APICCommonState *s = DO_UPCAST(APICCommonState, busdev.qdev, d); + + return s ? s->tpr >> 4 : 0; +} + +void apic_report_irq_delivered(int delivered) +{ + apic_irq_delivered += delivered; + + trace_apic_report_irq_delivered(apic_irq_delivered); +} + +void apic_reset_irq_delivered(void) +{ + trace_apic_reset_irq_delivered(apic_irq_delivered); + + apic_irq_delivered = 0; +} + +int apic_get_irq_delivered(void) +{ + trace_apic_get_irq_delivered(apic_irq_delivered); + + return apic_irq_delivered; +} + +void apic_deliver_nmi(DeviceState *d) +{ + APICCommonState *s = DO_UPCAST(APICCommonState, busdev.qdev, d); + APICCommonInfo *info; + + info = DO_UPCAST(APICCommonInfo, busdev.qdev, s->busdev.qdev.info); + info->external_nmi(s); +} + +void apic_init_reset(DeviceState *d) +{ + APICCommonState *s = DO_UPCAST(APICCommonState, busdev.qdev, d); + int i; + + if (!s) { + return; + } + s->tpr = 0; + s->spurious_vec = 0xff; + s->log_dest = 0; + s->dest_mode = 0xf; + memset(s->isr, 0, sizeof(s->isr)); + memset(s->tmr, 0, sizeof(s->tmr)); + memset(s->irr, 0, sizeof(s->irr)); + for (i = 0; i < APIC_LVT_NB; i++) { + s->lvt[i] = APIC_LVT_MASKED; + } + s->esr = 0; + memset(s->icr, 0, sizeof(s->icr)); + s->divide_conf = 0; + s->count_shift = 0; + s->initial_count = 0; + s->initial_count_load_time = 0; + s->next_time = 0; + s->wait_for_sipi = 1; + + qemu_del_timer(s->timer); +} + +static void apic_reset_common(DeviceState *d) +{ + APICCommonState *s = DO_UPCAST(APICCommonState, busdev.qdev, d); + bool bsp; + + bsp = cpu_is_bsp(s->cpu_env); + s->apicbase = 0xfee00000 | + (bsp ? MSR_IA32_APICBASE_BSP : 0) | MSR_IA32_APICBASE_ENABLE; + + apic_init_reset(d); + + if (bsp) { + /* + * LINT0 delivery mode on CPU #0 is set to ExtInt at initialization + * time typically by BIOS, so PIC interrupt can be delivered to the + * processor when local APIC is enabled. + */ + s->lvt[APIC_LVT_LINT0] = 0x700; + } +} + +/* This function is only used for old state version 1 and 2 */ +static int apic_load_old(QEMUFile *f, void *opaque, int version_id) +{ + APICCommonState *s = opaque; + int i; + + if (version_id > 2) { + return -EINVAL; + } + + /* XXX: what if the base changes? (registered memory regions) */ + qemu_get_be32s(f, &s->apicbase); + qemu_get_8s(f, &s->id); + qemu_get_8s(f, &s->arb_id); + qemu_get_8s(f, &s->tpr); + qemu_get_be32s(f, &s->spurious_vec); + qemu_get_8s(f, &s->log_dest); + qemu_get_8s(f, &s->dest_mode); + for (i = 0; i < 8; i++) { + qemu_get_be32s(f, &s->isr[i]); + qemu_get_be32s(f, &s->tmr[i]); + qemu_get_be32s(f, &s->irr[i]); + } + for (i = 0; i < APIC_LVT_NB; i++) { + qemu_get_be32s(f, &s->lvt[i]); + } + qemu_get_be32s(f, &s->esr); + qemu_get_be32s(f, &s->icr[0]); + qemu_get_be32s(f, &s->icr[1]); + qemu_get_be32s(f, &s->divide_conf); + s->count_shift = qemu_get_be32(f); + qemu_get_be32s(f, &s->initial_count); + s->initial_count_load_time = qemu_get_be64(f); + s->next_time = qemu_get_be64(f); + + if (version_id >= 2) { + qemu_get_timer(f, s->timer); + } + return 0; +} + +static int apic_init_common(SysBusDevice *dev) +{ + APICCommonState *s = FROM_SYSBUS(APICCommonState, dev); + APICCommonInfo *info; + static int apic_no; + + if (apic_no >= MAX_APICS) { + return -1; + } + s->idx = apic_no++; + + info = DO_UPCAST(APICCommonInfo, busdev.qdev, s->busdev.qdev.info); + info->init(s); + + sysbus_init_mmio(&s->busdev, &s->io_memory); + return 0; +} + +static const VMStateDescription vmstate_apic_common = { + .name = "apic", + .version_id = 3, + .minimum_version_id = 3, + .minimum_version_id_old = 1, + .load_state_old = apic_load_old, + .fields = (VMStateField[]) { + VMSTATE_UINT32(apicbase, APICCommonState), + VMSTATE_UINT8(id, APICCommonState), + VMSTATE_UINT8(arb_id, APICCommonState), + VMSTATE_UINT8(tpr, APICCommonState), + VMSTATE_UINT32(spurious_vec, APICCommonState), + VMSTATE_UINT8(log_dest, APICCommonState), + VMSTATE_UINT8(dest_mode, APICCommonState), + VMSTATE_UINT32_ARRAY(isr, APICCommonState, 8), + VMSTATE_UINT32_ARRAY(tmr, APICCommonState, 8), + VMSTATE_UINT32_ARRAY(irr, APICCommonState, 8), + VMSTATE_UINT32_ARRAY(lvt, APICCommonState, APIC_LVT_NB), + VMSTATE_UINT32(esr, APICCommonState), + VMSTATE_UINT32_ARRAY(icr, APICCommonState, 2), + VMSTATE_UINT32(divide_conf, APICCommonState), + VMSTATE_INT32(count_shift, APICCommonState), + VMSTATE_UINT32(initial_count, APICCommonState), + VMSTATE_INT64(initial_count_load_time, APICCommonState), + VMSTATE_INT64(next_time, APICCommonState), + VMSTATE_TIMER(timer, APICCommonState), + VMSTATE_END_OF_LIST() + } +}; + +static Property apic_properties_common[] = { + DEFINE_PROP_UINT8("id", APICCommonState, id, -1), + DEFINE_PROP_PTR("cpu_env", APICCommonState, cpu_env), + DEFINE_PROP_END_OF_LIST(), +}; + + +void apic_qdev_register(APICCommonInfo *info) +{ + info->busdev.init = apic_init_common; + info->busdev.qdev.size = sizeof(APICCommonState), + info->busdev.qdev.vmsd = &vmstate_apic_common; + info->busdev.qdev.reset = apic_reset_common; + info->busdev.qdev.no_user = 1; + info->busdev.qdev.props = apic_properties_common; + sysbus_register_withprop(&info->busdev); +} diff --git a/hw/apic_internal.h b/hw/apic_internal.h new file mode 100644 index 0000000000..a7433fb0e6 --- /dev/null +++ b/hw/apic_internal.h @@ -0,0 +1,112 @@ +/* + * APIC support - internal interfaces + * + * Copyright (c) 2004-2005 Fabrice Bellard + * Copyright (c) 2011 Jan Kiszka, Siemens AG + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see + */ +#ifndef QEMU_APIC_INTERNAL_H +#define QEMU_APIC_INTERNAL_H + +#include "memory.h" +#include "sysbus.h" +#include "qemu-timer.h" + +/* APIC Local Vector Table */ +#define APIC_LVT_TIMER 0 +#define APIC_LVT_THERMAL 1 +#define APIC_LVT_PERFORM 2 +#define APIC_LVT_LINT0 3 +#define APIC_LVT_LINT1 4 +#define APIC_LVT_ERROR 5 +#define APIC_LVT_NB 6 + +/* APIC delivery modes */ +#define APIC_DM_FIXED 0 +#define APIC_DM_LOWPRI 1 +#define APIC_DM_SMI 2 +#define APIC_DM_NMI 4 +#define APIC_DM_INIT 5 +#define APIC_DM_SIPI 6 +#define APIC_DM_EXTINT 7 + +/* APIC destination mode */ +#define APIC_DESTMODE_FLAT 0xf +#define APIC_DESTMODE_CLUSTER 1 + +#define APIC_TRIGGER_EDGE 0 +#define APIC_TRIGGER_LEVEL 1 + +#define APIC_LVT_TIMER_PERIODIC (1<<17) +#define APIC_LVT_MASKED (1<<16) +#define APIC_LVT_LEVEL_TRIGGER (1<<15) +#define APIC_LVT_REMOTE_IRR (1<<14) +#define APIC_INPUT_POLARITY (1<<13) +#define APIC_SEND_PENDING (1<<12) + +#define ESR_ILLEGAL_ADDRESS (1 << 7) + +#define APIC_SV_DIRECTED_IO (1<<12) +#define APIC_SV_ENABLE (1<<8) + +#define MAX_APICS 255 + +#define MSI_SPACE_SIZE 0x100000 + +typedef struct APICCommonState APICCommonState; + +struct APICCommonState { + SysBusDevice busdev; + MemoryRegion io_memory; + void *cpu_env; + uint32_t apicbase; + uint8_t id; + uint8_t arb_id; + uint8_t tpr; + uint32_t spurious_vec; + uint8_t log_dest; + uint8_t dest_mode; + uint32_t isr[8]; /* in service register */ + uint32_t tmr[8]; /* trigger mode register */ + uint32_t irr[8]; /* interrupt request register */ + uint32_t lvt[APIC_LVT_NB]; + uint32_t esr; /* error register */ + uint32_t icr[2]; + + uint32_t divide_conf; + int count_shift; + uint32_t initial_count; + int64_t initial_count_load_time; + int64_t next_time; + int idx; + QEMUTimer *timer; + int sipi_vector; + int wait_for_sipi; +}; + +typedef struct APICCommonInfo APICCommonInfo; + +struct APICCommonInfo { + SysBusDeviceInfo busdev; + void (*init)(APICCommonState *s); + void (*set_base)(APICCommonState *s, uint64_t val); + void (*set_tpr)(APICCommonState *s, uint8_t val); + void (*external_nmi)(APICCommonState *s); +}; + +void apic_report_irq_delivered(int delivered); +void apic_qdev_register(APICCommonInfo *info); + +#endif /* !QEMU_APIC_INTERNAL_H */ From 7a380ca350f84b5b99391da20a2b4ea505b0524d Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Sun, 16 Oct 2011 12:19:12 +0200 Subject: [PATCH 09/20] apic: Open-code timer save/restore To enable migration between accelerated and non-accelerated APIC models, we will need to handle the timer saving and restoring specially and can no longer rely on the automatics of VMSTATE_TIMER. Specifically, accelerated model will not start any QEMUTimer. This patch therefore factors out the generic bits into apic_next_timer and use a post-load callback to implemented model-specific logic. Signed-off-by: Jan Kiszka --- hw/apic.c | 30 +++++++++++--------------- hw/apic_common.c | 54 ++++++++++++++++++++++++++++++++++++++++++++-- hw/apic_internal.h | 3 +++ 3 files changed, 67 insertions(+), 20 deletions(-) diff --git a/hw/apic.c b/hw/apic.c index 387a46940a..e59c964083 100644 --- a/hw/apic.c +++ b/hw/apic.c @@ -521,25 +521,9 @@ static uint32_t apic_get_current_count(APICCommonState *s) static void apic_timer_update(APICCommonState *s, int64_t current_time) { - int64_t next_time, d; - - if (!(s->lvt[APIC_LVT_TIMER] & APIC_LVT_MASKED)) { - d = (current_time - s->initial_count_load_time) >> - s->count_shift; - if (s->lvt[APIC_LVT_TIMER] & APIC_LVT_TIMER_PERIODIC) { - if (!s->initial_count) - goto no_timer; - d = ((d / ((uint64_t)s->initial_count + 1)) + 1) * ((uint64_t)s->initial_count + 1); - } else { - if (d >= s->initial_count) - goto no_timer; - d = (uint64_t)s->initial_count + 1; - } - next_time = s->initial_count_load_time + (d << s->count_shift); - qemu_mod_timer(s->timer, next_time); - s->next_time = next_time; + if (apic_next_timer(s, current_time)) { + qemu_mod_timer(s->timer, s->next_time); } else { - no_timer: qemu_del_timer(s->timer); } } @@ -753,6 +737,15 @@ static void apic_mem_writel(void *opaque, target_phys_addr_t addr, uint32_t val) } } +static void apic_post_load(APICCommonState *s) +{ + if (s->timer_expiry != -1) { + qemu_mod_timer(s->timer, s->timer_expiry); + } else { + qemu_del_timer(s->timer); + } +} + static const MemoryRegionOps apic_io_ops = { .old_mmio = { .read = { apic_mem_readb, apic_mem_readw, apic_mem_readl, }, @@ -776,6 +769,7 @@ static APICCommonInfo apic_info = { .set_base = apic_set_base, .set_tpr = apic_set_tpr, .external_nmi = apic_external_nmi, + .post_load = apic_post_load, }; static void apic_register_devices(void) diff --git a/hw/apic_common.c b/hw/apic_common.c index eef977ff1c..e05369caab 100644 --- a/hw/apic_common.c +++ b/hw/apic_common.c @@ -93,6 +93,39 @@ void apic_deliver_nmi(DeviceState *d) info->external_nmi(s); } +bool apic_next_timer(APICCommonState *s, int64_t current_time) +{ + int64_t d; + + /* We need to store the timer state separately to support APIC + * implementations that maintain a non-QEMU timer, e.g. inside the + * host kernel. This open-coded state allows us to migrate between + * both models. */ + s->timer_expiry = -1; + + if (s->lvt[APIC_LVT_TIMER] & APIC_LVT_MASKED) { + return false; + } + + d = (current_time - s->initial_count_load_time) >> s->count_shift; + + if (s->lvt[APIC_LVT_TIMER] & APIC_LVT_TIMER_PERIODIC) { + if (!s->initial_count) { + return false; + } + d = ((d / ((uint64_t)s->initial_count + 1)) + 1) * + ((uint64_t)s->initial_count + 1); + } else { + if (d >= s->initial_count) { + return false; + } + d = (uint64_t)s->initial_count + 1; + } + s->next_time = s->initial_count_load_time + (d << s->count_shift); + s->timer_expiry = s->next_time; + return true; +} + void apic_init_reset(DeviceState *d) { APICCommonState *s = DO_UPCAST(APICCommonState, busdev.qdev, d); @@ -120,7 +153,10 @@ void apic_init_reset(DeviceState *d) s->next_time = 0; s->wait_for_sipi = 1; - qemu_del_timer(s->timer); + if (s->timer) { + qemu_del_timer(s->timer); + } + s->timer_expiry = -1; } static void apic_reset_common(DeviceState *d) @@ -203,12 +239,25 @@ static int apic_init_common(SysBusDevice *dev) return 0; } +static int apic_dispatch_post_load(void *opaque, int version_id) +{ + APICCommonState *s = opaque; + APICCommonInfo *info = + DO_UPCAST(APICCommonInfo, busdev.qdev, s->busdev.qdev.info); + + if (info->post_load) { + info->post_load(s); + } + return 0; +} + static const VMStateDescription vmstate_apic_common = { .name = "apic", .version_id = 3, .minimum_version_id = 3, .minimum_version_id_old = 1, .load_state_old = apic_load_old, + .post_load = apic_dispatch_post_load, .fields = (VMStateField[]) { VMSTATE_UINT32(apicbase, APICCommonState), VMSTATE_UINT8(id, APICCommonState), @@ -228,7 +277,8 @@ static const VMStateDescription vmstate_apic_common = { VMSTATE_UINT32(initial_count, APICCommonState), VMSTATE_INT64(initial_count_load_time, APICCommonState), VMSTATE_INT64(next_time, APICCommonState), - VMSTATE_TIMER(timer, APICCommonState), + VMSTATE_INT64(timer_expiry, + APICCommonState), /* open-coded timer state */ VMSTATE_END_OF_LIST() } }; diff --git a/hw/apic_internal.h b/hw/apic_internal.h index a7433fb0e6..1db4f061b5 100644 --- a/hw/apic_internal.h +++ b/hw/apic_internal.h @@ -92,6 +92,7 @@ struct APICCommonState { int64_t next_time; int idx; QEMUTimer *timer; + int64_t timer_expiry; int sipi_vector; int wait_for_sipi; }; @@ -104,9 +105,11 @@ struct APICCommonInfo { void (*set_base)(APICCommonState *s, uint64_t val); void (*set_tpr)(APICCommonState *s, uint8_t val); void (*external_nmi)(APICCommonState *s); + void (*post_load)(APICCommonState *s); }; void apic_report_irq_delivered(int delivered); void apic_qdev_register(APICCommonInfo *info); +bool apic_next_timer(APICCommonState *s, int64_t current_time); #endif /* !QEMU_APIC_INTERNAL_H */ From 9aa78c425f6cd6a57ec53dd1a76233a080dc83b6 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Tue, 10 Jan 2012 16:31:16 +0100 Subject: [PATCH 10/20] i8259: Completely privatize PicState Use DeviceState instead of PicState in the public i8259 API. This is cleaner and allows to reorganize the PIC data structures for KVM reuse. Signed-off-by: Jan Kiszka --- hw/i8259.c | 17 +++++++++++------ hw/pc.h | 7 +++---- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/hw/i8259.c b/hw/i8259.c index 7331e0e61c..cfaa35c092 100644 --- a/hw/i8259.c +++ b/hw/i8259.c @@ -40,6 +40,8 @@ //#define DEBUG_IRQ_LATENCY //#define DEBUG_IRQ_COUNT +typedef struct PicState PicState; + struct PicState { ISADevice dev; uint8_t last_irr; /* edge detection */ @@ -76,7 +78,7 @@ static uint64_t irq_count[16]; #ifdef DEBUG_IRQ_LATENCY static int64_t irq_time[16]; #endif -PicState *isa_pic; +DeviceState *isa_pic; static PicState *slave_pic; /* return the highest priority found in mask (highest = smallest @@ -206,8 +208,9 @@ static void pic_intack(PicState *s, int irq) pic_update_irq(s); } -int pic_read_irq(PicState *s) +int pic_read_irq(DeviceState *d) { + PicState *s = DO_UPCAST(PicState, dev.qdev, d); int irq, irq2, intno; irq = pic_get_irq(s); @@ -269,7 +272,7 @@ static void pic_init_reset(PicState *s) static void pic_reset(DeviceState *dev) { - PicState *s = container_of(dev, PicState, dev.qdev); + PicState *s = DO_UPCAST(PicState, dev.qdev, dev); pic_init_reset(s); s->elcr = 0; @@ -399,8 +402,10 @@ static uint64_t pic_ioport_read(void *opaque, target_phys_addr_t addr, return ret; } -int pic_get_output(PicState *s) +int pic_get_output(DeviceState *d) { + PicState *s = DO_UPCAST(PicState, dev.qdev, d); + return (pic_get_irq(s) >= 0); } @@ -491,7 +496,7 @@ void pic_info(Monitor *mon) return; } for (i = 0; i < 2; i++) { - s = i == 0 ? isa_pic : slave_pic; + s = i == 0 ? DO_UPCAST(PicState, dev.qdev, isa_pic) : slave_pic; monitor_printf(mon, "pic%d: irr=%02x imr=%02x isr=%02x hprio=%d " "irq_base=%02x rr_sel=%d elcr=%02x fnm=%d\n", i, s->irr, s->imr, s->isr, s->priority_add, @@ -538,7 +543,7 @@ qemu_irq *i8259_init(ISABus *bus, qemu_irq parent_irq) irq_set[i] = qdev_get_gpio_in(&dev->qdev, i); } - isa_pic = DO_UPCAST(PicState, dev, dev); + isa_pic = &dev->qdev; dev = isa_create(bus, "isa-i8259"); qdev_prop_set_uint32(&dev->qdev, "iobase", 0xa0); diff --git a/hw/pc.h b/hw/pc.h index 13e41f101e..ece069ad1f 100644 --- a/hw/pc.h +++ b/hw/pc.h @@ -62,11 +62,10 @@ bool parallel_mm_init(MemoryRegion *address_space, /* i8259.c */ -typedef struct PicState PicState; -extern PicState *isa_pic; +extern DeviceState *isa_pic; qemu_irq *i8259_init(ISABus *bus, qemu_irq parent_irq); -int pic_read_irq(PicState *s); -int pic_get_output(PicState *s); +int pic_read_irq(DeviceState *d); +int pic_get_output(DeviceState *d); void pic_info(Monitor *mon); void irq_info(Monitor *mon); From 512709f559dce1690fa89fe2a67a8e1984cd3895 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Sun, 16 Oct 2011 14:38:45 +0200 Subject: [PATCH 11/20] i8259: Factor out base class for KVM reuse Analogously to the APIC, we will reuse some parts of the user space i8259 model for KVM. The base class provides a common device state, the vmstate, the property list, a reset core and some shared init bits. This also introduces a common helper to instantiate a single i8259 chip from the cascade-creating i8259_init function. Signed-off-by: Jan Kiszka --- Makefile.objs | 2 +- hw/i8259.c | 158 +++++++++----------------------------------- hw/i8259_common.c | 147 +++++++++++++++++++++++++++++++++++++++++ hw/i8259_internal.h | 76 +++++++++++++++++++++ 4 files changed, 254 insertions(+), 129 deletions(-) create mode 100644 hw/i8259_common.c create mode 100644 hw/i8259_internal.h diff --git a/Makefile.objs b/Makefile.objs index 4f6d26c917..6270a95747 100644 --- a/Makefile.objs +++ b/Makefile.objs @@ -220,7 +220,7 @@ hw-obj-$(CONFIG_APPLESMC) += applesmc.o hw-obj-$(CONFIG_SMARTCARD) += usb-ccid.o ccid-card-passthru.o hw-obj-$(CONFIG_SMARTCARD_NSS) += ccid-card-emulated.o hw-obj-$(CONFIG_USB_REDIR) += usb-redir.o -hw-obj-$(CONFIG_I8259) += i8259.o +hw-obj-$(CONFIG_I8259) += i8259_common.o i8259.o # PPC devices hw-obj-$(CONFIG_PREP_PCI) += prep_pci.o diff --git a/hw/i8259.c b/hw/i8259.c index cfaa35c092..3005ce244d 100644 --- a/hw/i8259.c +++ b/hw/i8259.c @@ -26,6 +26,7 @@ #include "isa.h" #include "monitor.h" #include "qemu-timer.h" +#include "i8259_internal.h" /* debug PIC */ //#define DEBUG_PIC @@ -40,35 +41,6 @@ //#define DEBUG_IRQ_LATENCY //#define DEBUG_IRQ_COUNT -typedef struct PicState PicState; - -struct PicState { - ISADevice dev; - uint8_t last_irr; /* edge detection */ - uint8_t irr; /* interrupt request register */ - uint8_t imr; /* interrupt mask register */ - uint8_t isr; /* interrupt service register */ - uint8_t priority_add; /* highest irq priority */ - uint8_t irq_base; - uint8_t read_reg_select; - uint8_t poll; - uint8_t special_mask; - uint8_t init_state; - uint8_t auto_eoi; - uint8_t rotate_on_auto_eoi; - uint8_t special_fully_nested_mode; - uint8_t init4; /* true if 4 byte init */ - uint8_t single_mode; /* true if slave pic is not initialized */ - uint8_t elcr; /* PIIX edge/trigger selection*/ - uint8_t elcr_mask; - qemu_irq int_out[1]; - uint32_t master; /* reflects /SP input pin */ - uint32_t iobase; - uint32_t elcr_addr; - MemoryRegion base_io; - MemoryRegion elcr_io; -}; - #if defined(DEBUG_PIC) || defined(DEBUG_IRQ_COUNT) static int irq_level[16]; #endif @@ -79,11 +51,11 @@ static uint64_t irq_count[16]; static int64_t irq_time[16]; #endif DeviceState *isa_pic; -static PicState *slave_pic; +static PICCommonState *slave_pic; /* return the highest priority found in mask (highest = smallest number). Return 8 if no irq */ -static int get_priority(PicState *s, int mask) +static int get_priority(PICCommonState *s, int mask) { int priority; @@ -98,7 +70,7 @@ static int get_priority(PicState *s, int mask) } /* return the pic wanted interrupt. return -1 if none */ -static int pic_get_irq(PicState *s) +static int pic_get_irq(PICCommonState *s) { int mask, cur_priority, priority; @@ -127,7 +99,7 @@ static int pic_get_irq(PicState *s) } /* Update INT output. Must be called every time the output may have changed. */ -static void pic_update_irq(PicState *s) +static void pic_update_irq(PICCommonState *s) { int irq; @@ -144,7 +116,7 @@ static void pic_update_irq(PicState *s) /* set irq level. If an edge is detected, then the IRR is set to 1 */ static void pic_set_irq(void *opaque, int irq, int level) { - PicState *s = opaque; + PICCommonState *s = opaque; int mask = 1 << irq; #if defined(DEBUG_PIC) || defined(DEBUG_IRQ_COUNT) || \ @@ -192,7 +164,7 @@ static void pic_set_irq(void *opaque, int irq, int level) } /* acknowledge interrupt 'irq' */ -static void pic_intack(PicState *s, int irq) +static void pic_intack(PICCommonState *s, int irq) { if (s->auto_eoi) { if (s->rotate_on_auto_eoi) { @@ -210,7 +182,7 @@ static void pic_intack(PicState *s, int irq) int pic_read_irq(DeviceState *d) { - PicState *s = DO_UPCAST(PicState, dev.qdev, d); + PICCommonState *s = DO_UPCAST(PICCommonState, dev.qdev, d); int irq, irq2, intno; irq = pic_get_irq(s); @@ -249,30 +221,15 @@ int pic_read_irq(DeviceState *d) return intno; } -static void pic_init_reset(PicState *s) +static void pic_init_reset(PICCommonState *s) { - s->last_irr = 0; - s->irr = 0; - s->imr = 0; - s->isr = 0; - s->priority_add = 0; - s->irq_base = 0; - s->read_reg_select = 0; - s->poll = 0; - s->special_mask = 0; - s->init_state = 0; - s->auto_eoi = 0; - s->rotate_on_auto_eoi = 0; - s->special_fully_nested_mode = 0; - s->init4 = 0; - s->single_mode = 0; - /* Note: ELCR is not reset */ + pic_reset_common(s); pic_update_irq(s); } static void pic_reset(DeviceState *dev) { - PicState *s = DO_UPCAST(PicState, dev.qdev, dev); + PICCommonState *s = DO_UPCAST(PICCommonState, dev.qdev, dev); pic_init_reset(s); s->elcr = 0; @@ -281,7 +238,7 @@ static void pic_reset(DeviceState *dev) static void pic_ioport_write(void *opaque, target_phys_addr_t addr64, uint64_t val64, unsigned size) { - PicState *s = opaque; + PICCommonState *s = opaque; uint32_t addr = addr64; uint32_t val = val64; int priority, cmd, irq; @@ -375,7 +332,7 @@ static void pic_ioport_write(void *opaque, target_phys_addr_t addr64, static uint64_t pic_ioport_read(void *opaque, target_phys_addr_t addr, unsigned size) { - PicState *s = opaque; + PICCommonState *s = opaque; int ret; if (s->poll) { @@ -404,7 +361,7 @@ static uint64_t pic_ioport_read(void *opaque, target_phys_addr_t addr, int pic_get_output(DeviceState *d) { - PicState *s = DO_UPCAST(PicState, dev.qdev, d); + PICCommonState *s = DO_UPCAST(PICCommonState, dev.qdev, d); return (pic_get_irq(s) >= 0); } @@ -412,43 +369,17 @@ int pic_get_output(DeviceState *d) static void elcr_ioport_write(void *opaque, target_phys_addr_t addr, uint64_t val, unsigned size) { - PicState *s = opaque; + PICCommonState *s = opaque; s->elcr = val & s->elcr_mask; } static uint64_t elcr_ioport_read(void *opaque, target_phys_addr_t addr, unsigned size) { - PicState *s = opaque; + PICCommonState *s = opaque; return s->elcr; } -static const VMStateDescription vmstate_pic = { - .name = "i8259", - .version_id = 1, - .minimum_version_id = 1, - .minimum_version_id_old = 1, - .fields = (VMStateField[]) { - VMSTATE_UINT8(last_irr, PicState), - VMSTATE_UINT8(irr, PicState), - VMSTATE_UINT8(imr, PicState), - VMSTATE_UINT8(isr, PicState), - VMSTATE_UINT8(priority_add, PicState), - VMSTATE_UINT8(irq_base, PicState), - VMSTATE_UINT8(read_reg_select, PicState), - VMSTATE_UINT8(poll, PicState), - VMSTATE_UINT8(special_mask, PicState), - VMSTATE_UINT8(init_state, PicState), - VMSTATE_UINT8(auto_eoi, PicState), - VMSTATE_UINT8(rotate_on_auto_eoi, PicState), - VMSTATE_UINT8(special_fully_nested_mode, PicState), - VMSTATE_UINT8(init4, PicState), - VMSTATE_UINT8(single_mode, PicState), - VMSTATE_UINT8(elcr, PicState), - VMSTATE_END_OF_LIST() - } -}; - static const MemoryRegionOps pic_base_ioport_ops = { .read = pic_ioport_read, .write = pic_ioport_write, @@ -467,36 +398,25 @@ static const MemoryRegionOps pic_elcr_ioport_ops = { }, }; -static int pic_initfn(ISADevice *dev) +static void pic_init(PICCommonState *s) { - PicState *s = DO_UPCAST(PicState, dev, dev); - memory_region_init_io(&s->base_io, &pic_base_ioport_ops, s, "pic", 2); memory_region_init_io(&s->elcr_io, &pic_elcr_ioport_ops, s, "elcr", 1); - isa_register_ioport(dev, &s->base_io, s->iobase); - if (s->elcr_addr != -1) { - isa_register_ioport(dev, &s->elcr_io, s->elcr_addr); - } - - qdev_init_gpio_out(&dev->qdev, s->int_out, ARRAY_SIZE(s->int_out)); - qdev_init_gpio_in(&dev->qdev, pic_set_irq, 8); - - qdev_set_legacy_instance_id(&dev->qdev, s->iobase, 1); - - return 0; + qdev_init_gpio_out(&s->dev.qdev, s->int_out, ARRAY_SIZE(s->int_out)); + qdev_init_gpio_in(&s->dev.qdev, pic_set_irq, 8); } void pic_info(Monitor *mon) { int i; - PicState *s; + PICCommonState *s; if (!isa_pic) { return; } for (i = 0; i < 2; i++) { - s = i == 0 ? DO_UPCAST(PicState, dev.qdev, isa_pic) : slave_pic; + s = i == 0 ? DO_UPCAST(PICCommonState, dev.qdev, isa_pic) : slave_pic; monitor_printf(mon, "pic%d: irr=%02x imr=%02x isr=%02x hprio=%d " "irq_base=%02x rr_sel=%d elcr=%02x fnm=%d\n", i, s->irr, s->imr, s->isr, s->priority_add, @@ -531,12 +451,7 @@ qemu_irq *i8259_init(ISABus *bus, qemu_irq parent_irq) irq_set = g_malloc(ISA_NUM_IRQS * sizeof(qemu_irq)); - dev = isa_create(bus, "isa-i8259"); - qdev_prop_set_uint32(&dev->qdev, "iobase", 0x20); - qdev_prop_set_uint32(&dev->qdev, "elcr_addr", 0x4d0); - qdev_prop_set_uint8(&dev->qdev, "elcr_mask", 0xf8); - qdev_prop_set_bit(&dev->qdev, "master", true); - qdev_init_nofail(&dev->qdev); + dev = i8259_init_chip("isa-i8259", bus, true); qdev_connect_gpio_out(&dev->qdev, 0, parent_irq); for (i = 0 ; i < 8; i++) { @@ -545,40 +460,27 @@ qemu_irq *i8259_init(ISABus *bus, qemu_irq parent_irq) isa_pic = &dev->qdev; - dev = isa_create(bus, "isa-i8259"); - qdev_prop_set_uint32(&dev->qdev, "iobase", 0xa0); - qdev_prop_set_uint32(&dev->qdev, "elcr_addr", 0x4d1); - qdev_prop_set_uint8(&dev->qdev, "elcr_mask", 0xde); - qdev_init_nofail(&dev->qdev); + dev = i8259_init_chip("isa-i8259", bus, false); qdev_connect_gpio_out(&dev->qdev, 0, irq_set[2]); for (i = 0 ; i < 8; i++) { irq_set[i + 8] = qdev_get_gpio_in(&dev->qdev, i); } - slave_pic = DO_UPCAST(PicState, dev, dev); + slave_pic = DO_UPCAST(PICCommonState, dev, dev); return irq_set; } -static ISADeviceInfo i8259_info = { - .qdev.name = "isa-i8259", - .qdev.size = sizeof(PicState), - .qdev.vmsd = &vmstate_pic, - .qdev.reset = pic_reset, - .qdev.no_user = 1, - .init = pic_initfn, - .qdev.props = (Property[]) { - DEFINE_PROP_HEX32("iobase", PicState, iobase, -1), - DEFINE_PROP_HEX32("elcr_addr", PicState, elcr_addr, -1), - DEFINE_PROP_HEX8("elcr_mask", PicState, elcr_mask, -1), - DEFINE_PROP_BIT("master", PicState, master, 0, false), - DEFINE_PROP_END_OF_LIST(), - }, +static PICCommonInfo i8259_info = { + .isadev.qdev.name = "isa-i8259", + .isadev.qdev.reset = pic_reset, + .init = pic_init, }; static void pic_register(void) { - isa_qdev_register(&i8259_info); + pic_qdev_register(&i8259_info); } + device_init(pic_register) diff --git a/hw/i8259_common.c b/hw/i8259_common.c new file mode 100644 index 0000000000..e515876c48 --- /dev/null +++ b/hw/i8259_common.c @@ -0,0 +1,147 @@ +/* + * QEMU 8259 - common bits of emulated and KVM kernel model + * + * Copyright (c) 2003-2004 Fabrice Bellard + * Copyright (c) 2011 Jan Kiszka, Siemens AG + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include "pc.h" +#include "i8259_internal.h" + +void pic_reset_common(PICCommonState *s) +{ + s->last_irr = 0; + s->irr = 0; + s->imr = 0; + s->isr = 0; + s->priority_add = 0; + s->irq_base = 0; + s->read_reg_select = 0; + s->poll = 0; + s->special_mask = 0; + s->init_state = 0; + s->auto_eoi = 0; + s->rotate_on_auto_eoi = 0; + s->special_fully_nested_mode = 0; + s->init4 = 0; + s->single_mode = 0; + /* Note: ELCR is not reset */ +} + +static void pic_dispatch_pre_save(void *opaque) +{ + PICCommonState *s = opaque; + PICCommonInfo *info = + DO_UPCAST(PICCommonInfo, isadev.qdev, s->dev.qdev.info); + + if (info->pre_save) { + info->pre_save(s); + } +} + +static int pic_dispatch_post_load(void *opaque, int version_id) +{ + PICCommonState *s = opaque; + PICCommonInfo *info = + DO_UPCAST(PICCommonInfo, isadev.qdev, s->dev.qdev.info); + + if (info->post_load) { + info->post_load(s); + } + return 0; +} + +static int pic_init_common(ISADevice *dev) +{ + PICCommonState *s = DO_UPCAST(PICCommonState, dev, dev); + PICCommonInfo *info = + DO_UPCAST(PICCommonInfo, isadev.qdev, dev->qdev.info); + + info->init(s); + + isa_register_ioport(NULL, &s->base_io, s->iobase); + if (s->elcr_addr != -1) { + isa_register_ioport(NULL, &s->elcr_io, s->elcr_addr); + } + + qdev_set_legacy_instance_id(&s->dev.qdev, s->iobase, 1); + + return 0; +} + +ISADevice *i8259_init_chip(const char *name, ISABus *bus, bool master) +{ + ISADevice *dev; + + dev = isa_create(bus, name); + qdev_prop_set_uint32(&dev->qdev, "iobase", master ? 0x20 : 0xa0); + qdev_prop_set_uint32(&dev->qdev, "elcr_addr", master ? 0x4d0 : 0x4d1); + qdev_prop_set_uint8(&dev->qdev, "elcr_mask", master ? 0xf8 : 0xde); + qdev_prop_set_bit(&dev->qdev, "master", master); + qdev_init_nofail(&dev->qdev); + + return dev; +} + +static const VMStateDescription vmstate_pic_common = { + .name = "i8259", + .version_id = 1, + .minimum_version_id = 1, + .minimum_version_id_old = 1, + .pre_save = pic_dispatch_pre_save, + .post_load = pic_dispatch_post_load, + .fields = (VMStateField[]) { + VMSTATE_UINT8(last_irr, PICCommonState), + VMSTATE_UINT8(irr, PICCommonState), + VMSTATE_UINT8(imr, PICCommonState), + VMSTATE_UINT8(isr, PICCommonState), + VMSTATE_UINT8(priority_add, PICCommonState), + VMSTATE_UINT8(irq_base, PICCommonState), + VMSTATE_UINT8(read_reg_select, PICCommonState), + VMSTATE_UINT8(poll, PICCommonState), + VMSTATE_UINT8(special_mask, PICCommonState), + VMSTATE_UINT8(init_state, PICCommonState), + VMSTATE_UINT8(auto_eoi, PICCommonState), + VMSTATE_UINT8(rotate_on_auto_eoi, PICCommonState), + VMSTATE_UINT8(special_fully_nested_mode, PICCommonState), + VMSTATE_UINT8(init4, PICCommonState), + VMSTATE_UINT8(single_mode, PICCommonState), + VMSTATE_UINT8(elcr, PICCommonState), + VMSTATE_END_OF_LIST() + } +}; + +static Property pic_properties_common[] = { + DEFINE_PROP_HEX32("iobase", PICCommonState, iobase, -1), + DEFINE_PROP_HEX32("elcr_addr", PICCommonState, elcr_addr, -1), + DEFINE_PROP_HEX8("elcr_mask", PICCommonState, elcr_mask, -1), + DEFINE_PROP_BIT("master", PICCommonState, master, 0, false), + DEFINE_PROP_END_OF_LIST(), +}; + +void pic_qdev_register(PICCommonInfo *info) +{ + info->isadev.init = pic_init_common; + info->isadev.qdev.size = sizeof(PICCommonState); + info->isadev.qdev.vmsd = &vmstate_pic_common; + info->isadev.qdev.no_user = 1; + info->isadev.qdev.props = pic_properties_common; + isa_qdev_register(&info->isadev); +} diff --git a/hw/i8259_internal.h b/hw/i8259_internal.h new file mode 100644 index 0000000000..13deb14b63 --- /dev/null +++ b/hw/i8259_internal.h @@ -0,0 +1,76 @@ +/* + * QEMU 8259 - internal interfaces + * + * Copyright (c) 2011 Jan Kiszka, Siemens AG + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef QEMU_I8259_INTERNAL_H +#define QEMU_I8259_INTERNAL_H + +#include "hw.h" +#include "pc.h" +#include "isa.h" + +typedef struct PICCommonState PICCommonState; + +struct PICCommonState { + ISADevice dev; + uint8_t last_irr; /* edge detection */ + uint8_t irr; /* interrupt request register */ + uint8_t imr; /* interrupt mask register */ + uint8_t isr; /* interrupt service register */ + uint8_t priority_add; /* highest irq priority */ + uint8_t irq_base; + uint8_t read_reg_select; + uint8_t poll; + uint8_t special_mask; + uint8_t init_state; + uint8_t auto_eoi; + uint8_t rotate_on_auto_eoi; + uint8_t special_fully_nested_mode; + uint8_t init4; /* true if 4 byte init */ + uint8_t single_mode; /* true if slave pic is not initialized */ + uint8_t elcr; /* PIIX edge/trigger selection*/ + uint8_t elcr_mask; + qemu_irq int_out[1]; + uint32_t master; /* reflects /SP input pin */ + uint32_t iobase; + uint32_t elcr_addr; + MemoryRegion base_io; + MemoryRegion elcr_io; +}; + +typedef struct PICCommonInfo PICCommonInfo; + +struct PICCommonInfo { + ISADeviceInfo isadev; + void (*init)(PICCommonState *s); + void (*pre_save)(PICCommonState *s); + void (*post_load)(PICCommonState *s); +}; + +void pic_reset_common(PICCommonState *s); + +ISADevice *i8259_init_chip(const char *name, ISABus *bus, bool master); + +void pic_qdev_register(PICCommonInfo *info); + +#endif /* !QEMU_I8259_INTERNAL_H */ From ac791b881442e8f929ad0f7423f0e6f253dbef70 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Tue, 10 Jan 2012 12:24:10 +0100 Subject: [PATCH 12/20] ioapic: Drop post-load irr initialization As all devices undergo a reset prior to vmloa, and the reset value of irr is 0, we do not need to do this clearing for older vmstates explicitly. Dropping this redundant code will also make KVM integration a bit simpler. Signed-off-by: Jan Kiszka --- hw/ioapic.c | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/hw/ioapic.c b/hw/ioapic.c index 27b07c6317..0743af6ed6 100644 --- a/hw/ioapic.c +++ b/hw/ioapic.c @@ -278,21 +278,9 @@ ioapic_mem_write(void *opaque, target_phys_addr_t addr, uint64_t val, } } -static int ioapic_post_load(void *opaque, int version_id) -{ - IOAPICState *s = opaque; - - if (version_id == 1) { - /* set sane value */ - s->irr = 0; - } - return 0; -} - static const VMStateDescription vmstate_ioapic = { .name = "ioapic", .version_id = 3, - .post_load = ioapic_post_load, .minimum_version_id = 1, .minimum_version_id_old = 1, .fields = (VMStateField[]) { From 244ac3af234fd636141182d60a007fcffd0970dc Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Sun, 16 Oct 2011 19:38:22 +0200 Subject: [PATCH 13/20] ioapic: Factor out base class for KVM reuse Split up the IOAPIC analogously to APIC and i8259. KVM will share the IOAPICCommonState, the vmstate, reset logic and certain init parts with the user space model. Signed-off-by: Jan Kiszka --- Makefile.target | 2 +- hw/ioapic.c | 130 ++++++------------------------------------- hw/ioapic_common.c | 104 ++++++++++++++++++++++++++++++++++ hw/ioapic_internal.h | 97 ++++++++++++++++++++++++++++++++ 4 files changed, 218 insertions(+), 115 deletions(-) create mode 100644 hw/ioapic_common.c create mode 100644 hw/ioapic_internal.h diff --git a/Makefile.target b/Makefile.target index 4446273cd9..556942d579 100644 --- a/Makefile.target +++ b/Makefile.target @@ -228,7 +228,7 @@ obj-y += device-hotplug.o # Hardware support obj-i386-y += vga.o obj-i386-y += mc146818rtc.o pc.o -obj-i386-y += cirrus_vga.o sga.o apic_common.o apic.o ioapic.o piix_pci.o +obj-i386-y += cirrus_vga.o sga.o apic_common.o apic.o ioapic_common.o ioapic.o piix_pci.o obj-i386-y += vmport.o obj-i386-y += pci-hotplug.o smbios.o wdt_ib700.o obj-i386-y += debugcon.o multiboot.o diff --git a/hw/ioapic.c b/hw/ioapic.c index 0743af6ed6..0c8be5006a 100644 --- a/hw/ioapic.c +++ b/hw/ioapic.c @@ -24,9 +24,7 @@ #include "pc.h" #include "apic.h" #include "ioapic.h" -#include "qemu-timer.h" -#include "host-utils.h" -#include "sysbus.h" +#include "ioapic_internal.h" //#define DEBUG_IOAPIC @@ -37,65 +35,9 @@ #define DPRINTF(fmt, ...) #endif -#define MAX_IOAPICS 1 +static IOAPICCommonState *ioapics[MAX_IOAPICS]; -#define IOAPIC_VERSION 0x11 - -#define IOAPIC_LVT_DEST_SHIFT 56 -#define IOAPIC_LVT_MASKED_SHIFT 16 -#define IOAPIC_LVT_TRIGGER_MODE_SHIFT 15 -#define IOAPIC_LVT_REMOTE_IRR_SHIFT 14 -#define IOAPIC_LVT_POLARITY_SHIFT 13 -#define IOAPIC_LVT_DELIV_STATUS_SHIFT 12 -#define IOAPIC_LVT_DEST_MODE_SHIFT 11 -#define IOAPIC_LVT_DELIV_MODE_SHIFT 8 - -#define IOAPIC_LVT_MASKED (1 << IOAPIC_LVT_MASKED_SHIFT) -#define IOAPIC_LVT_REMOTE_IRR (1 << IOAPIC_LVT_REMOTE_IRR_SHIFT) - -#define IOAPIC_TRIGGER_EDGE 0 -#define IOAPIC_TRIGGER_LEVEL 1 - -/*io{apic,sapic} delivery mode*/ -#define IOAPIC_DM_FIXED 0x0 -#define IOAPIC_DM_LOWEST_PRIORITY 0x1 -#define IOAPIC_DM_PMI 0x2 -#define IOAPIC_DM_NMI 0x4 -#define IOAPIC_DM_INIT 0x5 -#define IOAPIC_DM_SIPI 0x6 -#define IOAPIC_DM_EXTINT 0x7 -#define IOAPIC_DM_MASK 0x7 - -#define IOAPIC_VECTOR_MASK 0xff - -#define IOAPIC_IOREGSEL 0x00 -#define IOAPIC_IOWIN 0x10 - -#define IOAPIC_REG_ID 0x00 -#define IOAPIC_REG_VER 0x01 -#define IOAPIC_REG_ARB 0x02 -#define IOAPIC_REG_REDTBL_BASE 0x10 -#define IOAPIC_ID 0x00 - -#define IOAPIC_ID_SHIFT 24 -#define IOAPIC_ID_MASK 0xf - -#define IOAPIC_VER_ENTRIES_SHIFT 16 - -typedef struct IOAPICState IOAPICState; - -struct IOAPICState { - SysBusDevice busdev; - MemoryRegion io_memory; - uint8_t id; - uint8_t ioregsel; - uint32_t irr; - uint64_t ioredtbl[IOAPIC_NUM_PINS]; -}; - -static IOAPICState *ioapics[MAX_IOAPICS]; - -static void ioapic_service(IOAPICState *s) +static void ioapic_service(IOAPICCommonState *s) { uint8_t i; uint8_t trig_mode; @@ -135,7 +77,7 @@ static void ioapic_service(IOAPICState *s) static void ioapic_set_irq(void *opaque, int vector, int level) { - IOAPICState *s = opaque; + IOAPICCommonState *s = opaque; /* ISA IRQs map to GSI 1-1 except for IRQ0 which maps * to GSI 2. GSI maps to ioapic 1-1. This is not @@ -174,7 +116,7 @@ static void ioapic_set_irq(void *opaque, int vector, int level) void ioapic_eoi_broadcast(int vector) { - IOAPICState *s; + IOAPICCommonState *s; uint64_t entry; int i, n; @@ -199,7 +141,7 @@ void ioapic_eoi_broadcast(int vector) static uint64_t ioapic_mem_read(void *opaque, target_phys_addr_t addr, unsigned int size) { - IOAPICState *s = opaque; + IOAPICCommonState *s = opaque; int index; uint32_t val = 0; @@ -242,7 +184,7 @@ static void ioapic_mem_write(void *opaque, target_phys_addr_t addr, uint64_t val, unsigned int size) { - IOAPICState *s = opaque; + IOAPICCommonState *s = opaque; int index; switch (addr & 0xff) { @@ -278,71 +220,31 @@ ioapic_mem_write(void *opaque, target_phys_addr_t addr, uint64_t val, } } -static const VMStateDescription vmstate_ioapic = { - .name = "ioapic", - .version_id = 3, - .minimum_version_id = 1, - .minimum_version_id_old = 1, - .fields = (VMStateField[]) { - VMSTATE_UINT8(id, IOAPICState), - VMSTATE_UINT8(ioregsel, IOAPICState), - VMSTATE_UNUSED_V(2, 8), /* to account for qemu-kvm's v2 format */ - VMSTATE_UINT32_V(irr, IOAPICState, 2), - VMSTATE_UINT64_ARRAY(ioredtbl, IOAPICState, IOAPIC_NUM_PINS), - VMSTATE_END_OF_LIST() - } -}; - -static void ioapic_reset(DeviceState *d) -{ - IOAPICState *s = DO_UPCAST(IOAPICState, busdev.qdev, d); - int i; - - s->id = 0; - s->ioregsel = 0; - s->irr = 0; - for (i = 0; i < IOAPIC_NUM_PINS; i++) { - s->ioredtbl[i] = 1 << IOAPIC_LVT_MASKED_SHIFT; - } -} - static const MemoryRegionOps ioapic_io_ops = { .read = ioapic_mem_read, .write = ioapic_mem_write, .endianness = DEVICE_NATIVE_ENDIAN, }; -static int ioapic_init1(SysBusDevice *dev) +static void ioapic_init(IOAPICCommonState *s, int instance_no) { - IOAPICState *s = FROM_SYSBUS(IOAPICState, dev); - static int ioapic_no; - - if (ioapic_no >= MAX_IOAPICS) { - return -1; - } - memory_region_init_io(&s->io_memory, &ioapic_io_ops, s, "ioapic", 0x1000); - sysbus_init_mmio(dev, &s->io_memory); - qdev_init_gpio_in(&dev->qdev, ioapic_set_irq, IOAPIC_NUM_PINS); + qdev_init_gpio_in(&s->busdev.qdev, ioapic_set_irq, IOAPIC_NUM_PINS); - ioapics[ioapic_no++] = s; - - return 0; + ioapics[instance_no] = s; } -static SysBusDeviceInfo ioapic_info = { - .init = ioapic_init1, - .qdev.name = "ioapic", - .qdev.size = sizeof(IOAPICState), - .qdev.vmsd = &vmstate_ioapic, - .qdev.reset = ioapic_reset, - .qdev.no_user = 1, +static IOAPICCommonInfo ioapic_info = { + .busdev.qdev.name = "ioapic", + .busdev.qdev.size = sizeof(IOAPICCommonState), + .busdev.qdev.reset = ioapic_reset_common, + .init = ioapic_init, }; static void ioapic_register_devices(void) { - sysbus_register_withprop(&ioapic_info); + ioapic_qdev_register(&ioapic_info); } device_init(ioapic_register_devices) diff --git a/hw/ioapic_common.c b/hw/ioapic_common.c new file mode 100644 index 0000000000..3aa9a1cf57 --- /dev/null +++ b/hw/ioapic_common.c @@ -0,0 +1,104 @@ +/* + * IOAPIC emulation logic - common bits of emulated and KVM kernel model + * + * Copyright (c) 2004-2005 Fabrice Bellard + * Copyright (c) 2009 Xiantao Zhang, Intel + * Copyright (c) 2011 Jan Kiszka, Siemens AG + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#include "ioapic.h" +#include "ioapic_internal.h" +#include "sysbus.h" + +void ioapic_reset_common(DeviceState *dev) +{ + IOAPICCommonState *s = DO_UPCAST(IOAPICCommonState, busdev.qdev, dev); + int i; + + s->id = 0; + s->ioregsel = 0; + s->irr = 0; + for (i = 0; i < IOAPIC_NUM_PINS; i++) { + s->ioredtbl[i] = 1 << IOAPIC_LVT_MASKED_SHIFT; + } +} + +static void ioapic_dispatch_pre_save(void *opaque) +{ + IOAPICCommonState *s = opaque; + IOAPICCommonInfo *info = + DO_UPCAST(IOAPICCommonInfo, busdev.qdev, s->busdev.qdev.info); + + if (info->pre_save) { + info->pre_save(s); + } +} + +static int ioapic_dispatch_post_load(void *opaque, int version_id) +{ + IOAPICCommonState *s = opaque; + IOAPICCommonInfo *info = + DO_UPCAST(IOAPICCommonInfo, busdev.qdev, s->busdev.qdev.info); + + if (info->post_load) { + info->post_load(s); + } + return 0; +} + +static int ioapic_init_common(SysBusDevice *dev) +{ + IOAPICCommonState *s = FROM_SYSBUS(IOAPICCommonState, dev); + IOAPICCommonInfo *info; + static int ioapic_no; + + if (ioapic_no >= MAX_IOAPICS) { + return -1; + } + + info = DO_UPCAST(IOAPICCommonInfo, busdev.qdev, s->busdev.qdev.info); + info->init(s, ioapic_no); + + sysbus_init_mmio(&s->busdev, &s->io_memory); + ioapic_no++; + + return 0; +} + +static const VMStateDescription vmstate_ioapic_common = { + .name = "ioapic", + .version_id = 3, + .minimum_version_id = 1, + .minimum_version_id_old = 1, + .pre_save = ioapic_dispatch_pre_save, + .post_load = ioapic_dispatch_post_load, + .fields = (VMStateField[]) { + VMSTATE_UINT8(id, IOAPICCommonState), + VMSTATE_UINT8(ioregsel, IOAPICCommonState), + VMSTATE_UNUSED_V(2, 8), /* to account for qemu-kvm's v2 format */ + VMSTATE_UINT32_V(irr, IOAPICCommonState, 2), + VMSTATE_UINT64_ARRAY(ioredtbl, IOAPICCommonState, IOAPIC_NUM_PINS), + VMSTATE_END_OF_LIST() + } +}; + +void ioapic_qdev_register(IOAPICCommonInfo *info) +{ + info->busdev.init = ioapic_init_common; + info->busdev.qdev.vmsd = &vmstate_ioapic_common; + info->busdev.qdev.no_user = 1; + sysbus_register_withprop(&info->busdev); +} diff --git a/hw/ioapic_internal.h b/hw/ioapic_internal.h new file mode 100644 index 0000000000..f8d90c0fda --- /dev/null +++ b/hw/ioapic_internal.h @@ -0,0 +1,97 @@ +/* + * IOAPIC emulation logic - internal interfaces + * + * Copyright (c) 2004-2005 Fabrice Bellard + * Copyright (c) 2009 Xiantao Zhang, Intel + * Copyright (c) 2011 Jan Kiszka, Siemens AG + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#ifndef QEMU_IOAPIC_INTERNAL_H +#define QEMU_IOAPIC_INTERNAL_H + +#include "hw.h" +#include "memory.h" +#include "sysbus.h" + +#define MAX_IOAPICS 1 + +#define IOAPIC_VERSION 0x11 + +#define IOAPIC_LVT_DEST_SHIFT 56 +#define IOAPIC_LVT_MASKED_SHIFT 16 +#define IOAPIC_LVT_TRIGGER_MODE_SHIFT 15 +#define IOAPIC_LVT_REMOTE_IRR_SHIFT 14 +#define IOAPIC_LVT_POLARITY_SHIFT 13 +#define IOAPIC_LVT_DELIV_STATUS_SHIFT 12 +#define IOAPIC_LVT_DEST_MODE_SHIFT 11 +#define IOAPIC_LVT_DELIV_MODE_SHIFT 8 + +#define IOAPIC_LVT_MASKED (1 << IOAPIC_LVT_MASKED_SHIFT) +#define IOAPIC_LVT_REMOTE_IRR (1 << IOAPIC_LVT_REMOTE_IRR_SHIFT) + +#define IOAPIC_TRIGGER_EDGE 0 +#define IOAPIC_TRIGGER_LEVEL 1 + +/*io{apic,sapic} delivery mode*/ +#define IOAPIC_DM_FIXED 0x0 +#define IOAPIC_DM_LOWEST_PRIORITY 0x1 +#define IOAPIC_DM_PMI 0x2 +#define IOAPIC_DM_NMI 0x4 +#define IOAPIC_DM_INIT 0x5 +#define IOAPIC_DM_SIPI 0x6 +#define IOAPIC_DM_EXTINT 0x7 +#define IOAPIC_DM_MASK 0x7 + +#define IOAPIC_VECTOR_MASK 0xff + +#define IOAPIC_IOREGSEL 0x00 +#define IOAPIC_IOWIN 0x10 + +#define IOAPIC_REG_ID 0x00 +#define IOAPIC_REG_VER 0x01 +#define IOAPIC_REG_ARB 0x02 +#define IOAPIC_REG_REDTBL_BASE 0x10 +#define IOAPIC_ID 0x00 + +#define IOAPIC_ID_SHIFT 24 +#define IOAPIC_ID_MASK 0xf + +#define IOAPIC_VER_ENTRIES_SHIFT 16 + +typedef struct IOAPICCommonState IOAPICCommonState; + +struct IOAPICCommonState { + SysBusDevice busdev; + MemoryRegion io_memory; + uint8_t id; + uint8_t ioregsel; + uint32_t irr; + uint64_t ioredtbl[IOAPIC_NUM_PINS]; +}; + +typedef struct IOAPICCommonInfo IOAPICCommonInfo; + +struct IOAPICCommonInfo { + SysBusDeviceInfo busdev; + void (*init)(IOAPICCommonState *s, int instance_no); + void (*pre_save)(IOAPICCommonState *s); + void (*post_load)(IOAPICCommonState *s); +}; + +void ioapic_qdev_register(IOAPICCommonInfo *info); +void ioapic_reset_common(DeviceState *dev); + +#endif /* !QEMU_IOAPIC_INTERNAL_H */ From 1660e72d4fbbd87e34eb4017d7e7c0ff4e29ca84 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Sun, 23 Oct 2011 16:01:19 +0200 Subject: [PATCH 14/20] memory: Introduce memory_region_init_reservation Introduce a memory region type that can reserve I/O space. Such regions are useful for modeling I/O that is only handled outside of QEMU, i.e. in the context of an accelerator like KVM. Any access to such a region from QEMU is a bug, but could theoretically be triggered by guest code (DMA to reserved region). So only warning about such events once, then ignore them. Signed-off-by: Jan Kiszka --- memory.c | 36 ++++++++++++++++++++++++++++++++++++ memory.h | 16 ++++++++++++++++ 2 files changed, 52 insertions(+) diff --git a/memory.c b/memory.c index 6201a3749a..68e5cdfb45 100644 --- a/memory.c +++ b/memory.c @@ -1049,6 +1049,42 @@ void memory_region_init_rom_device(MemoryRegion *mr, mr->ram_addr |= cpu_register_io_memory(mr); } +static uint64_t invalid_read(void *opaque, target_phys_addr_t addr, + unsigned size) +{ + MemoryRegion *mr = opaque; + + if (!mr->warning_printed) { + fprintf(stderr, "Invalid read from memory region %s\n", mr->name); + mr->warning_printed = true; + } + return -1U; +} + +static void invalid_write(void *opaque, target_phys_addr_t addr, uint64_t data, + unsigned size) +{ + MemoryRegion *mr = opaque; + + if (!mr->warning_printed) { + fprintf(stderr, "Invalid write to memory region %s\n", mr->name); + mr->warning_printed = true; + } +} + +static const MemoryRegionOps reservation_ops = { + .read = invalid_read, + .write = invalid_write, + .endianness = DEVICE_NATIVE_ENDIAN, +}; + +void memory_region_init_reservation(MemoryRegion *mr, + const char *name, + uint64_t size) +{ + memory_region_init_io(mr, &reservation_ops, mr, name, size); +} + void memory_region_destroy(MemoryRegion *mr) { assert(QTAILQ_EMPTY(&mr->subregions)); diff --git a/memory.h b/memory.h index d48b08bf94..34c69cfc7a 100644 --- a/memory.h +++ b/memory.h @@ -126,6 +126,7 @@ struct MemoryRegion { bool readonly; /* For RAM regions */ bool enabled; bool rom_device; + bool warning_printed; /* For reservations */ MemoryRegion *alias; target_phys_addr_t alias_offset; unsigned priority; @@ -279,6 +280,21 @@ void memory_region_init_rom_device(MemoryRegion *mr, const char *name, uint64_t size); +/** + * memory_region_init_reservation: Initialize a memory region that reserves + * I/O space. + * + * A reservation region primariy serves debugging purposes. It claims I/O + * space that is not supposed to be handled by QEMU itself. Any access via + * the memory API will cause an abort(). + * + * @mr: the #MemoryRegion to be initialized + * @name: used for debugging; not visible to the user or ABI + * @size: size of the region. + */ +void memory_region_init_reservation(MemoryRegion *mr, + const char *name, + uint64_t size); /** * memory_region_destroy: Destroy a memory region and reclaim all resources. * From 84b058d7df1e75543ef7422d97b039cd413f68f1 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Sat, 15 Oct 2011 11:49:47 +0200 Subject: [PATCH 15/20] kvm: Introduce core services for in-kernel irqchip support Add the basic infrastructure to active in-kernel irqchip support, inject interrupts into these models, and maintain IRQ routes. Routing is optional and depends on the host arch supporting KVM_CAP_IRQ_ROUTING. When it's not available on x86, we looe the HPET as we can't route GSI0 to IOAPIC pin 2. In-kernel irqchip support will once be controlled by the machine property 'kernel_irqchip', but this is not yet wired up. Signed-off-by: Jan Kiszka --- kvm-all.c | 149 ++++++++++++++++++++++++++++++++++++++++++++++ kvm.h | 8 +++ target-i386/kvm.c | 11 ++++ 3 files changed, 168 insertions(+) diff --git a/kvm-all.c b/kvm-all.c index 2cc4562921..fa9d92d005 100644 --- a/kvm-all.c +++ b/kvm-all.c @@ -78,6 +78,13 @@ struct KVMState int pit_in_kernel; int xsave, xcrs; int many_ioeventfds; + int irqchip_inject_ioctl; +#ifdef KVM_CAP_IRQ_ROUTING + struct kvm_irq_routing *irq_routes; + int nr_allocated_irq_routes; + uint32_t *used_gsi_bitmap; + unsigned int max_gsi; +#endif }; KVMState *kvm_state; @@ -728,6 +735,138 @@ static void kvm_handle_interrupt(CPUState *env, int mask) } } +int kvm_irqchip_set_irq(KVMState *s, int irq, int level) +{ + struct kvm_irq_level event; + int ret; + + assert(s->irqchip_in_kernel); + + event.level = level; + event.irq = irq; + ret = kvm_vm_ioctl(s, s->irqchip_inject_ioctl, &event); + if (ret < 0) { + perror("kvm_set_irqchip_line"); + abort(); + } + + return (s->irqchip_inject_ioctl == KVM_IRQ_LINE) ? 1 : event.status; +} + +#ifdef KVM_CAP_IRQ_ROUTING +static void set_gsi(KVMState *s, unsigned int gsi) +{ + assert(gsi < s->max_gsi); + + s->used_gsi_bitmap[gsi / 32] |= 1U << (gsi % 32); +} + +static void kvm_init_irq_routing(KVMState *s) +{ + int gsi_count; + + gsi_count = kvm_check_extension(s, KVM_CAP_IRQ_ROUTING); + if (gsi_count > 0) { + unsigned int gsi_bits, i; + + /* Round up so we can search ints using ffs */ + gsi_bits = (gsi_count + 31) / 32; + s->used_gsi_bitmap = g_malloc0(gsi_bits / 8); + s->max_gsi = gsi_bits; + + /* Mark any over-allocated bits as already in use */ + for (i = gsi_count; i < gsi_bits; i++) { + set_gsi(s, i); + } + } + + s->irq_routes = g_malloc0(sizeof(*s->irq_routes)); + s->nr_allocated_irq_routes = 0; + + kvm_arch_init_irq_routing(s); +} + +static void kvm_add_routing_entry(KVMState *s, + struct kvm_irq_routing_entry *entry) +{ + struct kvm_irq_routing_entry *new; + int n, size; + + if (s->irq_routes->nr == s->nr_allocated_irq_routes) { + n = s->nr_allocated_irq_routes * 2; + if (n < 64) { + n = 64; + } + size = sizeof(struct kvm_irq_routing); + size += n * sizeof(*new); + s->irq_routes = g_realloc(s->irq_routes, size); + s->nr_allocated_irq_routes = n; + } + n = s->irq_routes->nr++; + new = &s->irq_routes->entries[n]; + memset(new, 0, sizeof(*new)); + new->gsi = entry->gsi; + new->type = entry->type; + new->flags = entry->flags; + new->u = entry->u; + + set_gsi(s, entry->gsi); +} + +void kvm_irqchip_add_route(KVMState *s, int irq, int irqchip, int pin) +{ + struct kvm_irq_routing_entry e; + + e.gsi = irq; + e.type = KVM_IRQ_ROUTING_IRQCHIP; + e.flags = 0; + e.u.irqchip.irqchip = irqchip; + e.u.irqchip.pin = pin; + kvm_add_routing_entry(s, &e); +} + +int kvm_irqchip_commit_routes(KVMState *s) +{ + s->irq_routes->flags = 0; + return kvm_vm_ioctl(s, KVM_SET_GSI_ROUTING, s->irq_routes); +} + +#else /* !KVM_CAP_IRQ_ROUTING */ + +static void kvm_init_irq_routing(KVMState *s) +{ +} +#endif /* !KVM_CAP_IRQ_ROUTING */ + +static int kvm_irqchip_create(KVMState *s) +{ + QemuOptsList *list = qemu_find_opts("machine"); + int ret; + + if (QTAILQ_EMPTY(&list->head) || + !qemu_opt_get_bool(QTAILQ_FIRST(&list->head), + "kernel_irqchip", false) || + !kvm_check_extension(s, KVM_CAP_IRQCHIP)) { + return 0; + } + + ret = kvm_vm_ioctl(s, KVM_CREATE_IRQCHIP); + if (ret < 0) { + fprintf(stderr, "Create kernel irqchip failed\n"); + return ret; + } + + s->irqchip_inject_ioctl = KVM_IRQ_LINE; + if (kvm_check_extension(s, KVM_CAP_IRQ_INJECT_STATUS)) { + s->irqchip_inject_ioctl = KVM_IRQ_LINE_STATUS; + } + s->irqchip_in_kernel = 1; + + kvm_init_irq_routing(s); + + return 0; +} + int kvm_init(void) { static const char upgrade_note[] = @@ -823,6 +962,11 @@ int kvm_init(void) goto err; } + ret = kvm_irqchip_create(s); + if (ret < 0) { + goto err; + } + kvm_state = s; memory_listener_register(&kvm_memory_listener); @@ -1158,6 +1302,11 @@ int kvm_has_many_ioeventfds(void) return kvm_state->many_ioeventfds; } +int kvm_has_gsi_routing(void) +{ + return kvm_check_extension(kvm_state, KVM_CAP_IRQ_ROUTING); +} + void kvm_setup_guest_memory(void *start, size_t size) { if (!kvm_has_sync_mmu()) { diff --git a/kvm.h b/kvm.h index c1de81a11c..dd2d4f0a94 100644 --- a/kvm.h +++ b/kvm.h @@ -51,6 +51,7 @@ int kvm_has_debugregs(void); int kvm_has_xsave(void); int kvm_has_xcrs(void); int kvm_has_many_ioeventfds(void); +int kvm_has_gsi_routing(void); #ifdef NEED_CPU_H int kvm_init_vcpu(CPUState *env); @@ -124,6 +125,13 @@ void kvm_arch_reset_vcpu(CPUState *env); int kvm_arch_on_sigbus_vcpu(CPUState *env, int code, void *addr); int kvm_arch_on_sigbus(int code, void *addr); +void kvm_arch_init_irq_routing(KVMState *s); + +int kvm_irqchip_set_irq(KVMState *s, int irq, int level); + +void kvm_irqchip_add_route(KVMState *s, int gsi, int irqchip, int pin); +int kvm_irqchip_commit_routes(KVMState *s); + struct kvm_guest_debug; struct kvm_debug_exit_arch; diff --git a/target-i386/kvm.c b/target-i386/kvm.c index 1f56492cd2..f6f4189f4d 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -1939,3 +1939,14 @@ bool kvm_arch_stop_on_emulation_error(CPUState *env) return !(env->cr[0] & CR0_PE_MASK) || ((env->segs[R_CS].selector & 3) != 3); } + +void kvm_arch_init_irq_routing(KVMState *s) +{ + if (!kvm_check_extension(s, KVM_CAP_IRQ_ROUTING)) { + /* If kernel can't do irq routing, interrupt source + * override 0->2 cannot be set up as required by HPET. + * So we have to disable it. + */ + no_hpet = 1; + } +} From 9b5b76d44930dc9266bb6d30862704cb3c86d2ca Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Sat, 15 Oct 2011 14:08:26 +0200 Subject: [PATCH 16/20] kvm: x86: Establish IRQ0 override control KVM is forced to disable the IRQ0 override when we run with in-kernel irqchip but without IRQ routing support of the kernel. Set the fwcfg value correspondingly. This aligns us with qemu-kvm. Signed-off-by: Jan Kiszka --- hw/pc.c | 3 ++- kvm-all.c | 5 +++++ kvm-stub.c | 5 +++++ kvm.h | 2 ++ sysemu.h | 1 - vl.c | 1 - 6 files changed, 14 insertions(+), 3 deletions(-) diff --git a/hw/pc.c b/hw/pc.c index 04304e0ca3..38d787a74a 100644 --- a/hw/pc.c +++ b/hw/pc.c @@ -39,6 +39,7 @@ #include "msi.h" #include "sysbus.h" #include "sysemu.h" +#include "kvm.h" #include "blockdev.h" #include "ui/qemu-spice.h" #include "memory.h" @@ -609,7 +610,7 @@ static void *bochs_bios_init(void) fw_cfg_add_i64(fw_cfg, FW_CFG_RAM_SIZE, (uint64_t)ram_size); fw_cfg_add_bytes(fw_cfg, FW_CFG_ACPI_TABLES, (uint8_t *)acpi_tables, acpi_tables_len); - fw_cfg_add_bytes(fw_cfg, FW_CFG_IRQ0_OVERRIDE, &irq0override, 1); + fw_cfg_add_i32(fw_cfg, FW_CFG_IRQ0_OVERRIDE, kvm_allows_irq0_override()); smbios_table = smbios_get_table(&smbios_len); if (smbios_table) diff --git a/kvm-all.c b/kvm-all.c index fa9d92d005..88f11567f7 100644 --- a/kvm-all.c +++ b/kvm-all.c @@ -1307,6 +1307,11 @@ int kvm_has_gsi_routing(void) return kvm_check_extension(kvm_state, KVM_CAP_IRQ_ROUTING); } +int kvm_allows_irq0_override(void) +{ + return !kvm_enabled() || !kvm_irqchip_in_kernel() || kvm_has_gsi_routing(); +} + void kvm_setup_guest_memory(void *start, size_t size) { if (!kvm_has_sync_mmu()) { diff --git a/kvm-stub.c b/kvm-stub.c index 06064b9a86..6c2b06ba32 100644 --- a/kvm-stub.c +++ b/kvm-stub.c @@ -78,6 +78,11 @@ int kvm_has_many_ioeventfds(void) return 0; } +int kvm_allows_irq0_override(void) +{ + return 1; +} + void kvm_setup_guest_memory(void *start, size_t size) { } diff --git a/kvm.h b/kvm.h index dd2d4f0a94..ad430fd09a 100644 --- a/kvm.h +++ b/kvm.h @@ -53,6 +53,8 @@ int kvm_has_xcrs(void); int kvm_has_many_ioeventfds(void); int kvm_has_gsi_routing(void); +int kvm_allows_irq0_override(void); + #ifdef NEED_CPU_H int kvm_init_vcpu(CPUState *env); diff --git a/sysemu.h b/sysemu.h index ddef2bb0c1..caff268bd2 100644 --- a/sysemu.h +++ b/sysemu.h @@ -102,7 +102,6 @@ extern int vga_interface_type; extern int graphic_width; extern int graphic_height; extern int graphic_depth; -extern uint8_t irq0override; extern DisplayType display_type; extern const char *keyboard_layout; extern int win2k_install_hack; diff --git a/vl.c b/vl.c index ba55b356cf..132c387c2f 100644 --- a/vl.c +++ b/vl.c @@ -218,7 +218,6 @@ int no_reboot = 0; int no_shutdown = 0; int cursor_hide = 1; int graphic_rotate = 0; -uint8_t irq0override = 1; const char *watchdog; QEMUOptionRom option_rom[MAX_OPTION_ROMS]; int nb_option_roms; From 680c1c6fd73c0cb3971938944936f18bbb7bad1b Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Sun, 16 Oct 2011 13:23:26 +0200 Subject: [PATCH 17/20] kvm: x86: Add user space part for in-kernel APIC This introduces the alternative APIC device which makes use of KVM's in-kernel device model. External NMI injection via LINT1 is emulated by checking the current state of the in-kernel APIC, only injecting a NMI into the VCPU if LINT1 is unmasked and configured to DM_NMI. MSI is not yet supported, so we disable this when the in-kernel model is in use. CC: Lai Jiangshan Signed-off-by: Jan Kiszka --- Makefile.target | 2 +- hw/kvm/apic.c | 138 ++++++++++++++++++++++++++++++++++++++++++++++ hw/pc.c | 15 +++-- kvm.h | 4 ++ target-i386/kvm.c | 38 +++++++++++++ 5 files changed, 191 insertions(+), 6 deletions(-) create mode 100644 hw/kvm/apic.c diff --git a/Makefile.target b/Makefile.target index 556942d579..1a63a1c3e7 100644 --- a/Makefile.target +++ b/Makefile.target @@ -233,7 +233,7 @@ obj-i386-y += vmport.o obj-i386-y += pci-hotplug.o smbios.o wdt_ib700.o obj-i386-y += debugcon.o multiboot.o obj-i386-y += pc_piix.o -obj-i386-$(CONFIG_KVM) += kvm/clock.o +obj-i386-$(CONFIG_KVM) += kvm/clock.o kvm/apic.o obj-i386-$(CONFIG_SPICE) += qxl.o qxl-logger.o qxl-render.o # shared objects diff --git a/hw/kvm/apic.c b/hw/kvm/apic.c new file mode 100644 index 0000000000..6300695e86 --- /dev/null +++ b/hw/kvm/apic.c @@ -0,0 +1,138 @@ +/* + * KVM in-kernel APIC support + * + * Copyright (c) 2011 Siemens AG + * + * Authors: + * Jan Kiszka + * + * This work is licensed under the terms of the GNU GPL version 2. + * See the COPYING file in the top-level directory. + */ +#include "hw/apic_internal.h" +#include "kvm.h" + +static inline void kvm_apic_set_reg(struct kvm_lapic_state *kapic, + int reg_id, uint32_t val) +{ + *((uint32_t *)(kapic->regs + (reg_id << 4))) = val; +} + +static inline uint32_t kvm_apic_get_reg(struct kvm_lapic_state *kapic, + int reg_id) +{ + return *((uint32_t *)(kapic->regs + (reg_id << 4))); +} + +void kvm_put_apic_state(DeviceState *d, struct kvm_lapic_state *kapic) +{ + APICCommonState *s = DO_UPCAST(APICCommonState, busdev.qdev, d); + int i; + + memset(kapic, 0, sizeof(kapic)); + kvm_apic_set_reg(kapic, 0x2, s->id << 24); + kvm_apic_set_reg(kapic, 0x8, s->tpr); + kvm_apic_set_reg(kapic, 0xd, s->log_dest << 24); + kvm_apic_set_reg(kapic, 0xe, s->dest_mode << 28 | 0x0fffffff); + kvm_apic_set_reg(kapic, 0xf, s->spurious_vec); + for (i = 0; i < 8; i++) { + kvm_apic_set_reg(kapic, 0x10 + i, s->isr[i]); + kvm_apic_set_reg(kapic, 0x18 + i, s->tmr[i]); + kvm_apic_set_reg(kapic, 0x20 + i, s->irr[i]); + } + kvm_apic_set_reg(kapic, 0x28, s->esr); + kvm_apic_set_reg(kapic, 0x30, s->icr[0]); + kvm_apic_set_reg(kapic, 0x31, s->icr[1]); + for (i = 0; i < APIC_LVT_NB; i++) { + kvm_apic_set_reg(kapic, 0x32 + i, s->lvt[i]); + } + kvm_apic_set_reg(kapic, 0x38, s->initial_count); + kvm_apic_set_reg(kapic, 0x3e, s->divide_conf); +} + +void kvm_get_apic_state(DeviceState *d, struct kvm_lapic_state *kapic) +{ + APICCommonState *s = DO_UPCAST(APICCommonState, busdev.qdev, d); + int i, v; + + s->id = kvm_apic_get_reg(kapic, 0x2) >> 24; + s->tpr = kvm_apic_get_reg(kapic, 0x8); + s->arb_id = kvm_apic_get_reg(kapic, 0x9); + s->log_dest = kvm_apic_get_reg(kapic, 0xd) >> 24; + s->dest_mode = kvm_apic_get_reg(kapic, 0xe) >> 28; + s->spurious_vec = kvm_apic_get_reg(kapic, 0xf); + for (i = 0; i < 8; i++) { + s->isr[i] = kvm_apic_get_reg(kapic, 0x10 + i); + s->tmr[i] = kvm_apic_get_reg(kapic, 0x18 + i); + s->irr[i] = kvm_apic_get_reg(kapic, 0x20 + i); + } + s->esr = kvm_apic_get_reg(kapic, 0x28); + s->icr[0] = kvm_apic_get_reg(kapic, 0x30); + s->icr[1] = kvm_apic_get_reg(kapic, 0x31); + for (i = 0; i < APIC_LVT_NB; i++) { + s->lvt[i] = kvm_apic_get_reg(kapic, 0x32 + i); + } + s->initial_count = kvm_apic_get_reg(kapic, 0x38); + s->divide_conf = kvm_apic_get_reg(kapic, 0x3e); + + v = (s->divide_conf & 3) | ((s->divide_conf >> 1) & 4); + s->count_shift = (v + 1) & 7; + + s->initial_count_load_time = qemu_get_clock_ns(vm_clock); + apic_next_timer(s, s->initial_count_load_time); +} + +static void kvm_apic_set_base(APICCommonState *s, uint64_t val) +{ + s->apicbase = val; +} + +static void kvm_apic_set_tpr(APICCommonState *s, uint8_t val) +{ + s->tpr = (val & 0x0f) << 4; +} + +static void do_inject_external_nmi(void *data) +{ + APICCommonState *s = data; + CPUState *env = s->cpu_env; + uint32_t lvt; + int ret; + + cpu_synchronize_state(env); + + lvt = s->lvt[APIC_LVT_LINT1]; + if (!(lvt & APIC_LVT_MASKED) && ((lvt >> 8) & 7) == APIC_DM_NMI) { + ret = kvm_vcpu_ioctl(env, KVM_NMI); + if (ret < 0) { + fprintf(stderr, "KVM: injection failed, NMI lost (%s)\n", + strerror(-ret)); + } + } +} + +static void kvm_apic_external_nmi(APICCommonState *s) +{ + run_on_cpu(s->cpu_env, do_inject_external_nmi, s); +} + +static void kvm_apic_init(APICCommonState *s) +{ + memory_region_init_reservation(&s->io_memory, "kvm-apic-msi", + MSI_SPACE_SIZE); +} + +static APICCommonInfo kvm_apic_info = { + .busdev.qdev.name = "kvm-apic", + .init = kvm_apic_init, + .set_base = kvm_apic_set_base, + .set_tpr = kvm_apic_set_tpr, + .external_nmi = kvm_apic_external_nmi, +}; + +static void kvm_apic_register_device(void) +{ + apic_qdev_register(&kvm_apic_info); +} + +device_init(kvm_apic_register_device) diff --git a/hw/pc.c b/hw/pc.c index 38d787a74a..6a8a8719ce 100644 --- a/hw/pc.c +++ b/hw/pc.c @@ -879,25 +879,30 @@ DeviceState *cpu_get_current_apic(void) static DeviceState *apic_init(void *env, uint8_t apic_id) { DeviceState *dev; - SysBusDevice *d; static int apic_mapped; - dev = qdev_create(NULL, "apic"); + if (kvm_enabled() && kvm_irqchip_in_kernel()) { + dev = qdev_create(NULL, "kvm-apic"); + } else { + dev = qdev_create(NULL, "apic"); + } qdev_prop_set_uint8(dev, "id", apic_id); qdev_prop_set_ptr(dev, "cpu_env", env); qdev_init_nofail(dev); - d = sysbus_from_qdev(dev); /* XXX: mapping more APICs at the same memory location */ if (apic_mapped == 0) { /* NOTE: the APIC is directly connected to the CPU - it is not on the global memory bus. */ /* XXX: what if the base changes? */ - sysbus_mmio_map(d, 0, MSI_ADDR_BASE); + sysbus_mmio_map(sysbus_from_qdev(dev), 0, MSI_ADDR_BASE); apic_mapped = 1; } - msi_supported = true; + /* KVM does not support MSI yet. */ + if (!kvm_enabled() || !kvm_irqchip_in_kernel()) { + msi_supported = true; + } return dev; } diff --git a/kvm.h b/kvm.h index ad430fd09a..40b5ffcfa3 100644 --- a/kvm.h +++ b/kvm.h @@ -31,6 +31,7 @@ extern int kvm_allowed; #endif struct kvm_run; +struct kvm_lapic_state; typedef struct KVMCapabilityInfo { const char *name; @@ -134,6 +135,9 @@ int kvm_irqchip_set_irq(KVMState *s, int irq, int level); void kvm_irqchip_add_route(KVMState *s, int gsi, int irqchip, int pin); int kvm_irqchip_commit_routes(KVMState *s); +void kvm_put_apic_state(DeviceState *d, struct kvm_lapic_state *kapic); +void kvm_get_apic_state(DeviceState *d, struct kvm_lapic_state *kapic); + struct kvm_guest_debug; struct kvm_debug_exit_arch; diff --git a/target-i386/kvm.c b/target-i386/kvm.c index f6f4189f4d..e41de394d2 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -1337,6 +1337,36 @@ static int kvm_get_mp_state(CPUState *env) return 0; } +static int kvm_get_apic(CPUState *env) +{ + DeviceState *apic = env->apic_state; + struct kvm_lapic_state kapic; + int ret; + + if (apic && kvm_enabled() && kvm_irqchip_in_kernel()) { + ret = kvm_vcpu_ioctl(env, KVM_GET_LAPIC, &kapic); + if (ret < 0) { + return ret; + } + + kvm_get_apic_state(apic, &kapic); + } + return 0; +} + +static int kvm_put_apic(CPUState *env) +{ + DeviceState *apic = env->apic_state; + struct kvm_lapic_state kapic; + + if (apic && kvm_enabled() && kvm_irqchip_in_kernel()) { + kvm_put_apic_state(apic, &kapic); + + return kvm_vcpu_ioctl(env, KVM_SET_LAPIC, &kapic); + } + return 0; +} + static int kvm_put_vcpu_events(CPUState *env, int level) { struct kvm_vcpu_events events; @@ -1510,6 +1540,10 @@ int kvm_arch_put_registers(CPUState *env, int level) if (ret < 0) { return ret; } + ret = kvm_put_apic(env); + if (ret < 0) { + return ret; + } } ret = kvm_put_vcpu_events(env, level); if (ret < 0) { @@ -1557,6 +1591,10 @@ int kvm_arch_get_registers(CPUState *env) if (ret < 0) { return ret; } + ret = kvm_get_apic(env); + if (ret < 0) { + return ret; + } ret = kvm_get_vcpu_events(env); if (ret < 0) { return ret; From 10b618827507fbdbe7cf1a9b1f2c81d254dcd8b8 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Sun, 16 Oct 2011 15:30:27 +0200 Subject: [PATCH 18/20] kvm: x86: Add user space part for in-kernel i8259 Introduce the alternative 'kvm-i8259' device model that exploits KVM in-kernel acceleration. The PIIX3 initialization code is furthermore extended by KVM specific IRQ route setup. GSI injection differs in KVM mode from the user space model. As we can dispatch ISA-range IRQs to both IOAPIC and PIC inside the kernel, we do not need to inject them separately. This is reflected by a KVM-specific GSI handler. Signed-off-by: Jan Kiszka --- Makefile.target | 2 +- hw/kvm/i8259.c | 128 ++++++++++++++++++++++++++++++++++++++++++++++++ hw/pc.h | 1 + hw/pc_piix.c | 52 ++++++++++++++++++-- 4 files changed, 178 insertions(+), 5 deletions(-) create mode 100644 hw/kvm/i8259.c diff --git a/Makefile.target b/Makefile.target index 1a63a1c3e7..701073d155 100644 --- a/Makefile.target +++ b/Makefile.target @@ -233,7 +233,7 @@ obj-i386-y += vmport.o obj-i386-y += pci-hotplug.o smbios.o wdt_ib700.o obj-i386-y += debugcon.o multiboot.o obj-i386-y += pc_piix.o -obj-i386-$(CONFIG_KVM) += kvm/clock.o kvm/apic.o +obj-i386-$(CONFIG_KVM) += kvm/clock.o kvm/apic.o kvm/i8259.o obj-i386-$(CONFIG_SPICE) += qxl.o qxl-logger.o qxl-render.o # shared objects diff --git a/hw/kvm/i8259.c b/hw/kvm/i8259.c new file mode 100644 index 0000000000..64bb5c26e2 --- /dev/null +++ b/hw/kvm/i8259.c @@ -0,0 +1,128 @@ +/* + * KVM in-kernel PIC (i8259) support + * + * Copyright (c) 2011 Siemens AG + * + * Authors: + * Jan Kiszka + * + * This work is licensed under the terms of the GNU GPL version 2. + * See the COPYING file in the top-level directory. + */ +#include "hw/i8259_internal.h" +#include "hw/apic_internal.h" +#include "kvm.h" + +static void kvm_pic_get(PICCommonState *s) +{ + struct kvm_irqchip chip; + struct kvm_pic_state *kpic; + int ret; + + chip.chip_id = s->master ? KVM_IRQCHIP_PIC_MASTER : KVM_IRQCHIP_PIC_SLAVE; + ret = kvm_vm_ioctl(kvm_state, KVM_GET_IRQCHIP, &chip); + if (ret < 0) { + fprintf(stderr, "KVM_GET_IRQCHIP failed: %s\n", strerror(ret)); + abort(); + } + + kpic = &chip.chip.pic; + + s->last_irr = kpic->last_irr; + s->irr = kpic->irr; + s->imr = kpic->imr; + s->isr = kpic->isr; + s->priority_add = kpic->priority_add; + s->irq_base = kpic->irq_base; + s->read_reg_select = kpic->read_reg_select; + s->poll = kpic->poll; + s->special_mask = kpic->special_mask; + s->init_state = kpic->init_state; + s->auto_eoi = kpic->auto_eoi; + s->rotate_on_auto_eoi = kpic->rotate_on_auto_eoi; + s->special_fully_nested_mode = kpic->special_fully_nested_mode; + s->init4 = kpic->init4; + s->elcr = kpic->elcr; + s->elcr_mask = kpic->elcr_mask; +} + +static void kvm_pic_put(PICCommonState *s) +{ + struct kvm_irqchip chip; + struct kvm_pic_state *kpic; + int ret; + + chip.chip_id = s->master ? KVM_IRQCHIP_PIC_MASTER : KVM_IRQCHIP_PIC_SLAVE; + + kpic = &chip.chip.pic; + + kpic->last_irr = s->last_irr; + kpic->irr = s->irr; + kpic->imr = s->imr; + kpic->isr = s->isr; + kpic->priority_add = s->priority_add; + kpic->irq_base = s->irq_base; + kpic->read_reg_select = s->read_reg_select; + kpic->poll = s->poll; + kpic->special_mask = s->special_mask; + kpic->init_state = s->init_state; + kpic->auto_eoi = s->auto_eoi; + kpic->rotate_on_auto_eoi = s->rotate_on_auto_eoi; + kpic->special_fully_nested_mode = s->special_fully_nested_mode; + kpic->init4 = s->init4; + kpic->elcr = s->elcr; + kpic->elcr_mask = s->elcr_mask; + + ret = kvm_vm_ioctl(kvm_state, KVM_SET_IRQCHIP, &chip); + if (ret < 0) { + fprintf(stderr, "KVM_GET_IRQCHIP failed: %s\n", strerror(ret)); + abort(); + } +} + +static void kvm_pic_reset(DeviceState *dev) +{ + PICCommonState *s = DO_UPCAST(PICCommonState, dev.qdev, dev); + + pic_reset_common(s); + s->elcr = 0; + + kvm_pic_put(s); +} + +static void kvm_pic_set_irq(void *opaque, int irq, int level) +{ + int delivered; + + delivered = kvm_irqchip_set_irq(kvm_state, irq, level); + apic_report_irq_delivered(delivered); +} + +static void kvm_pic_init(PICCommonState *s) +{ + memory_region_init_reservation(&s->base_io, "kvm-pic", 2); + memory_region_init_reservation(&s->elcr_io, "kvm-elcr", 1); +} + +qemu_irq *kvm_i8259_init(ISABus *bus) +{ + i8259_init_chip("kvm-i8259", bus, true); + i8259_init_chip("kvm-i8259", bus, false); + + return qemu_allocate_irqs(kvm_pic_set_irq, NULL, ISA_NUM_IRQS); +} + +static PICCommonInfo kvm_i8259_info = { + .isadev.qdev.name = "kvm-i8259", + .isadev.qdev.reset = kvm_pic_reset, + .init = kvm_pic_init, + .pre_save = kvm_pic_get, + .post_load = kvm_pic_put, +}; + +static void kvm_pic_register(void) +{ + pic_qdev_register(&kvm_i8259_info); +} + +device_init(kvm_pic_register) diff --git a/hw/pc.h b/hw/pc.h index ece069ad1f..5e913dbf85 100644 --- a/hw/pc.h +++ b/hw/pc.h @@ -64,6 +64,7 @@ bool parallel_mm_init(MemoryRegion *address_space, extern DeviceState *isa_pic; qemu_irq *i8259_init(ISABus *bus, qemu_irq parent_irq); +qemu_irq *kvm_i8259_init(ISABus *bus); int pic_read_irq(DeviceState *d); int pic_get_output(DeviceState *d); void pic_info(Monitor *mon); diff --git a/hw/pc_piix.c b/hw/pc_piix.c index cde810db27..297c04a9ab 100644 --- a/hw/pc_piix.c +++ b/hw/pc_piix.c @@ -53,6 +53,42 @@ static const int ide_iobase[MAX_IDE_BUS] = { 0x1f0, 0x170 }; static const int ide_iobase2[MAX_IDE_BUS] = { 0x3f6, 0x376 }; static const int ide_irq[MAX_IDE_BUS] = { 14, 15 }; +static void kvm_piix3_setup_irq_routing(bool pci_enabled) +{ +#ifdef CONFIG_KVM + KVMState *s = kvm_state; + int ret, i; + + if (kvm_check_extension(s, KVM_CAP_IRQ_ROUTING)) { + for (i = 0; i < 8; ++i) { + if (i == 2) { + continue; + } + kvm_irqchip_add_route(s, i, KVM_IRQCHIP_PIC_MASTER, i); + } + for (i = 8; i < 16; ++i) { + kvm_irqchip_add_route(s, i, KVM_IRQCHIP_PIC_SLAVE, i - 8); + } + ret = kvm_irqchip_commit_routes(s); + if (ret < 0) { + hw_error("KVM IRQ routing setup failed"); + } + } +#endif /* CONFIG_KVM */ +} + +static void kvm_piix3_gsi_handler(void *opaque, int n, int level) +{ + GSIState *s = opaque; + + if (n < ISA_NUM_IRQS) { + /* Kernel will forward to both PIC and IOAPIC */ + qemu_set_irq(s->i8259_irq[n], level); + } else { + qemu_set_irq(s->ioapic_irq[n], level); + } +} + static void ioapic_init(GSIState *gsi_state) { DeviceState *dev; @@ -134,7 +170,13 @@ static void pc_init1(MemoryRegion *system_memory, } gsi_state = g_malloc0(sizeof(*gsi_state)); - gsi = qemu_allocate_irqs(gsi_handler, gsi_state, GSI_NUM_PINS); + if (kvm_enabled() && kvm_irqchip_in_kernel()) { + kvm_piix3_setup_irq_routing(pci_enabled); + gsi = qemu_allocate_irqs(kvm_piix3_gsi_handler, gsi_state, + GSI_NUM_PINS); + } else { + gsi = qemu_allocate_irqs(gsi_handler, gsi_state, GSI_NUM_PINS); + } if (pci_enabled) { pci_bus = i440fx_init(&i440fx_state, &piix3_devfn, &isa_bus, gsi, @@ -154,11 +196,13 @@ static void pc_init1(MemoryRegion *system_memory, } isa_bus_irqs(isa_bus, gsi); - if (!xen_enabled()) { + if (kvm_enabled() && kvm_irqchip_in_kernel()) { + i8259 = kvm_i8259_init(isa_bus); + } else if (xen_enabled()) { + i8259 = xen_interrupt_controller_init(); + } else { cpu_irq = pc_allocate_cpu_irq(); i8259 = i8259_init(isa_bus, cpu_irq[0]); - } else { - i8259 = xen_interrupt_controller_init(); } for (i = 0; i < ISA_NUM_IRQS; i++) { From a39c1d47ac970312333bb93456e249e965315490 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Sun, 16 Oct 2011 23:25:49 +0200 Subject: [PATCH 19/20] kvm: x86: Add user space part for in-kernel IOAPIC This introduces the KVM-accelerated IOAPIC model 'kvm-ioapic' and extends the IRQ routing setup by the 0->2 redirection when needed. The kvm-ioapic model has a property that allows to define its GSI base for injecting interrupts into the kernel model. This will allow to disentangle PIC and IOAPIC pins for chipsets that support more sophisticated IRQ routes than the PIIX3. So far the base is kept at 0, i.e. PIC and IOAPIC share pins 0..15. Signed-off-by: Jan Kiszka --- Makefile.target | 2 +- hw/kvm/ioapic.c | 114 ++++++++++++++++++++++++++++++++++++++++++++++++ hw/pc_piix.c | 15 ++++++- 3 files changed, 129 insertions(+), 2 deletions(-) create mode 100644 hw/kvm/ioapic.c diff --git a/Makefile.target b/Makefile.target index 701073d155..98cb997abf 100644 --- a/Makefile.target +++ b/Makefile.target @@ -233,7 +233,7 @@ obj-i386-y += vmport.o obj-i386-y += pci-hotplug.o smbios.o wdt_ib700.o obj-i386-y += debugcon.o multiboot.o obj-i386-y += pc_piix.o -obj-i386-$(CONFIG_KVM) += kvm/clock.o kvm/apic.o kvm/i8259.o +obj-i386-$(CONFIG_KVM) += kvm/clock.o kvm/apic.o kvm/i8259.o kvm/ioapic.o obj-i386-$(CONFIG_SPICE) += qxl.o qxl-logger.o qxl-render.o # shared objects diff --git a/hw/kvm/ioapic.c b/hw/kvm/ioapic.c new file mode 100644 index 0000000000..10ffdd4b20 --- /dev/null +++ b/hw/kvm/ioapic.c @@ -0,0 +1,114 @@ +/* + * KVM in-kernel IOPIC support + * + * Copyright (c) 2011 Siemens AG + * + * Authors: + * Jan Kiszka + * + * This work is licensed under the terms of the GNU GPL version 2. + * See the COPYING file in the top-level directory. + */ + +#include "hw/pc.h" +#include "hw/ioapic_internal.h" +#include "hw/apic_internal.h" +#include "kvm.h" + +typedef struct KVMIOAPICState KVMIOAPICState; + +struct KVMIOAPICState { + IOAPICCommonState ioapic; + uint32_t kvm_gsi_base; +}; + +static void kvm_ioapic_get(IOAPICCommonState *s) +{ + struct kvm_irqchip chip; + struct kvm_ioapic_state *kioapic; + int ret, i; + + chip.chip_id = KVM_IRQCHIP_IOAPIC; + ret = kvm_vm_ioctl(kvm_state, KVM_GET_IRQCHIP, &chip); + if (ret < 0) { + fprintf(stderr, "KVM_GET_IRQCHIP failed: %s\n", strerror(ret)); + abort(); + } + + kioapic = &chip.chip.ioapic; + + s->id = kioapic->id; + s->ioregsel = kioapic->ioregsel; + s->irr = kioapic->irr; + for (i = 0; i < IOAPIC_NUM_PINS; i++) { + s->ioredtbl[i] = kioapic->redirtbl[i].bits; + } +} + +static void kvm_ioapic_put(IOAPICCommonState *s) +{ + struct kvm_irqchip chip; + struct kvm_ioapic_state *kioapic; + int ret, i; + + chip.chip_id = KVM_IRQCHIP_IOAPIC; + kioapic = &chip.chip.ioapic; + + kioapic->id = s->id; + kioapic->ioregsel = s->ioregsel; + kioapic->base_address = s->busdev.mmio[0].addr; + kioapic->irr = s->irr; + for (i = 0; i < IOAPIC_NUM_PINS; i++) { + kioapic->redirtbl[i].bits = s->ioredtbl[i]; + } + + ret = kvm_vm_ioctl(kvm_state, KVM_SET_IRQCHIP, &chip); + if (ret < 0) { + fprintf(stderr, "KVM_GET_IRQCHIP failed: %s\n", strerror(ret)); + abort(); + } +} + +static void kvm_ioapic_reset(DeviceState *dev) +{ + IOAPICCommonState *s = DO_UPCAST(IOAPICCommonState, busdev.qdev, dev); + + ioapic_reset_common(dev); + kvm_ioapic_put(s); +} + +static void kvm_ioapic_set_irq(void *opaque, int irq, int level) +{ + KVMIOAPICState *s = opaque; + int delivered; + + delivered = kvm_irqchip_set_irq(kvm_state, s->kvm_gsi_base + irq, level); + apic_report_irq_delivered(delivered); +} + +static void kvm_ioapic_init(IOAPICCommonState *s, int instance_no) +{ + memory_region_init_reservation(&s->io_memory, "kvm-ioapic", 0x1000); + + qdev_init_gpio_in(&s->busdev.qdev, kvm_ioapic_set_irq, IOAPIC_NUM_PINS); +} + +static IOAPICCommonInfo kvm_ioapic_info = { + .busdev.qdev.name = "kvm-ioapic", + .busdev.qdev.size = sizeof(KVMIOAPICState), + .busdev.qdev.reset = kvm_ioapic_reset, + .busdev.qdev.props = (Property[]) { + DEFINE_PROP_UINT32("gsi_base", KVMIOAPICState, kvm_gsi_base, 0), + DEFINE_PROP_END_OF_LIST() + }, + .init = kvm_ioapic_init, + .pre_save = kvm_ioapic_get, + .post_load = kvm_ioapic_put, +}; + +static void kvm_ioapic_register_device(void) +{ + ioapic_qdev_register(&kvm_ioapic_info); +} + +device_init(kvm_ioapic_register_device) diff --git a/hw/pc_piix.c b/hw/pc_piix.c index 297c04a9ab..a285ad25f8 100644 --- a/hw/pc_piix.c +++ b/hw/pc_piix.c @@ -69,6 +69,15 @@ static void kvm_piix3_setup_irq_routing(bool pci_enabled) for (i = 8; i < 16; ++i) { kvm_irqchip_add_route(s, i, KVM_IRQCHIP_PIC_SLAVE, i - 8); } + if (pci_enabled) { + for (i = 0; i < 24; ++i) { + if (i == 0) { + kvm_irqchip_add_route(s, i, KVM_IRQCHIP_IOAPIC, 2); + } else if (i != 2) { + kvm_irqchip_add_route(s, i, KVM_IRQCHIP_IOAPIC, i); + } + } + } ret = kvm_irqchip_commit_routes(s); if (ret < 0) { hw_error("KVM IRQ routing setup failed"); @@ -95,7 +104,11 @@ static void ioapic_init(GSIState *gsi_state) SysBusDevice *d; unsigned int i; - dev = qdev_create(NULL, "ioapic"); + if (kvm_enabled() && kvm_irqchip_in_kernel()) { + dev = qdev_create(NULL, "kvm-ioapic"); + } else { + dev = qdev_create(NULL, "ioapic"); + } qdev_init_nofail(dev); d = sysbus_from_qdev(dev); sysbus_mmio_map(d, 0, 0xfec00000); From 6a48ffaaa732b2142c1b5030178f2d4a0fa499fe Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Sat, 15 Oct 2011 13:43:48 +0200 Subject: [PATCH 20/20] kvm: Activate in-kernel irqchip support Make the basic in-kernel irqchip support selectable via -machine ...,kernel_irqchip=on. Leave it off by default until it can fully replace user space models. Signed-off-by: Jan Kiszka --- qemu-config.c | 4 ++++ qemu-options.hx | 5 ++++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/qemu-config.c b/qemu-config.c index ecc88e8d40..b030205e23 100644 --- a/qemu-config.c +++ b/qemu-config.c @@ -531,6 +531,10 @@ static QemuOptsList qemu_machine_opts = { .name = "accel", .type = QEMU_OPT_STRING, .help = "accelerator list", + }, { + .name = "kernel_irqchip", + .type = QEMU_OPT_BOOL, + .help = "use KVM in-kernel irqchip", }, { /* End of list */ } }, diff --git a/qemu-options.hx b/qemu-options.hx index 6295cde351..3a07ae8231 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -31,7 +31,8 @@ DEF("machine", HAS_ARG, QEMU_OPTION_machine, \ "-machine [type=]name[,prop[=value][,...]]\n" " selects emulated machine (-machine ? for list)\n" " property accel=accel1[:accel2[:...]] selects accelerator\n" - " supported accelerators are kvm, xen, tcg (default: tcg)\n", + " supported accelerators are kvm, xen, tcg (default: tcg)\n" + " kernel_irqchip=on|off controls accelerated irqchip support\n", QEMU_ARCH_ALL) STEXI @item -machine [type=]@var{name}[,prop=@var{value}[,...]] @@ -44,6 +45,8 @@ This is used to enable an accelerator. Depending on the target architecture, kvm, xen, or tcg can be available. By default, tcg is used. If there is more than one accelerator specified, the next one is used if the previous one fails to initialize. +@item kernel_irqchip=on|off +Enables in-kernel irqchip support for the chosen accelerator when available. @end table ETEXI