hw/xen: Support HVM_PARAM_CALLBACK_TYPE_GSI callback

The GSI callback (and later PCI_INTX) is a level triggered interrupt. It
is asserted when an event channel is delivered to vCPU0, and is supposed
to be cleared when the vcpu_info->evtchn_upcall_pending field for vCPU0
is cleared again.

Thankfully, Xen does *not* assert the GSI if the guest sets its own
evtchn_upcall_pending field; we only need to assert the GSI when we
have delivered an event for ourselves. So that's the easy part, kind of.

There's a slight complexity in that we need to hold the BQL before we
can call qemu_set_irq(), and we definitely can't do that while holding
our own port_lock (because we'll need to take that from the qemu-side
functions that the PV backend drivers will call). So if we end up
wanting to set the IRQ in a context where we *don't* already hold the
BQL, defer to a BH.

However, we *do* need to poll for the evtchn_upcall_pending flag being
cleared. In an ideal world we would poll that when the EOI happens on
the PIC/IOAPIC. That's how it works in the kernel with the VFIO eventfd
pairs — one is used to trigger the interrupt, and the other works in the
other direction to 'resample' on EOI, and trigger the first eventfd
again if the line is still active.

However, QEMU doesn't seem to do that. Even VFIO level interrupts seem
to be supported by temporarily unmapping the device's BARs from the
guest when an interrupt happens, then trapping *all* MMIO to the device
and sending the 'resample' event on *every* MMIO access until the IRQ
is cleared! Maybe in future we'll plumb the 'resample' concept through
QEMU's irq framework but for now we'll do what Xen itself does: just
check the flag on every vmexit if the upcall GSI is known to be
asserted.

Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Reviewed-by: Paul Durrant <paul@xen.org>
This commit is contained in:
David Woodhouse 2022-12-15 20:35:24 +00:00
parent 507cb64d6e
commit ddf0fd9ae1
8 changed files with 161 additions and 0 deletions

View File

@ -27,6 +27,8 @@
#include "hw/sysbus.h" #include "hw/sysbus.h"
#include "hw/xen/xen.h" #include "hw/xen/xen.h"
#include "hw/i386/x86.h"
#include "hw/irq.h"
#include "xen_evtchn.h" #include "xen_evtchn.h"
#include "xen_overlay.h" #include "xen_overlay.h"
@ -100,9 +102,12 @@ struct XenEvtchnState {
uint64_t callback_param; uint64_t callback_param;
bool evtchn_in_kernel; bool evtchn_in_kernel;
QEMUBH *gsi_bh;
QemuMutex port_lock; QemuMutex port_lock;
uint32_t nr_ports; uint32_t nr_ports;
XenEvtchnPort port_table[EVTCHN_2L_NR_CHANNELS]; XenEvtchnPort port_table[EVTCHN_2L_NR_CHANNELS];
qemu_irq gsis[IOAPIC_NUM_PINS];
}; };
struct XenEvtchnState *xen_evtchn_singleton; struct XenEvtchnState *xen_evtchn_singleton;
@ -167,13 +172,42 @@ static const TypeInfo xen_evtchn_info = {
.class_init = xen_evtchn_class_init, .class_init = xen_evtchn_class_init,
}; };
static void gsi_assert_bh(void *opaque)
{
struct vcpu_info *vi = kvm_xen_get_vcpu_info_hva(0);
if (vi) {
xen_evtchn_set_callback_level(!!vi->evtchn_upcall_pending);
}
}
void xen_evtchn_create(void) void xen_evtchn_create(void)
{ {
XenEvtchnState *s = XEN_EVTCHN(sysbus_create_simple(TYPE_XEN_EVTCHN, XenEvtchnState *s = XEN_EVTCHN(sysbus_create_simple(TYPE_XEN_EVTCHN,
-1, NULL)); -1, NULL));
int i;
xen_evtchn_singleton = s; xen_evtchn_singleton = s;
qemu_mutex_init(&s->port_lock); qemu_mutex_init(&s->port_lock);
s->gsi_bh = aio_bh_new(qemu_get_aio_context(), gsi_assert_bh, s);
for (i = 0; i < IOAPIC_NUM_PINS; i++) {
sysbus_init_irq(SYS_BUS_DEVICE(s), &s->gsis[i]);
}
}
void xen_evtchn_connect_gsis(qemu_irq *system_gsis)
{
XenEvtchnState *s = xen_evtchn_singleton;
int i;
if (!s) {
return;
}
for (i = 0; i < IOAPIC_NUM_PINS; i++) {
sysbus_connect_irq(SYS_BUS_DEVICE(s), i, system_gsis[i]);
}
} }
static void xen_evtchn_register_types(void) static void xen_evtchn_register_types(void)
@ -183,6 +217,64 @@ static void xen_evtchn_register_types(void)
type_init(xen_evtchn_register_types) type_init(xen_evtchn_register_types)
void xen_evtchn_set_callback_level(int level)
{
XenEvtchnState *s = xen_evtchn_singleton;
uint32_t param;
if (!s) {
return;
}
/*
* We get to this function in a number of ways:
*
* From I/O context, via PV backend drivers sending a notification to
* the guest.
*
* From guest vCPU context, via loopback interdomain event channels
* (or theoretically even IPIs but guests don't use those with GSI
* delivery because that's pointless. We don't want a malicious guest
* to be able to trigger a deadlock though, so we can't rule it out.)
*
* From guest vCPU context when the HVM_PARAM_CALLBACK_IRQ is being
* configured.
*
* From guest vCPU context in the KVM exit handler, if the upcall
* pending flag has been cleared and the GSI needs to be deasserted.
*
* Maybe in future, in an interrupt ack/eoi notifier when the GSI has
* been acked in the irqchip.
*
* Whichever context we come from if we aren't already holding the BQL
* then e can't take it now, as we may already hold s->port_lock. So
* trigger the BH to set the IRQ for us instead of doing it immediately.
*
* In the HVM_PARAM_CALLBACK_IRQ and KVM exit handler cases, the caller
* will deliberately take the BQL because they want the change to take
* effect immediately. That just leaves interdomain loopback as the case
* which uses the BH.
*/
if (!qemu_mutex_iothread_locked()) {
qemu_bh_schedule(s->gsi_bh);
return;
}
param = (uint32_t)s->callback_param;
switch (s->callback_param >> CALLBACK_VIA_TYPE_SHIFT) {
case HVM_PARAM_CALLBACK_TYPE_GSI:
if (param < IOAPIC_NUM_PINS) {
qemu_set_irq(s->gsis[param], level);
if (level) {
/* Ensure the vCPU polls for deassertion */
kvm_xen_set_callback_asserted();
}
}
break;
}
}
int xen_evtchn_set_callback_param(uint64_t param) int xen_evtchn_set_callback_param(uint64_t param)
{ {
XenEvtchnState *s = xen_evtchn_singleton; XenEvtchnState *s = xen_evtchn_singleton;
@ -209,6 +301,11 @@ int xen_evtchn_set_callback_param(uint64_t param)
} }
break; break;
} }
case HVM_PARAM_CALLBACK_TYPE_GSI:
ret = 0;
break;
default: default:
/* Xen doesn't return error even if you set something bogus */ /* Xen doesn't return error even if you set something bogus */
ret = 0; ret = 0;

View File

@ -12,9 +12,13 @@
#ifndef QEMU_XEN_EVTCHN_H #ifndef QEMU_XEN_EVTCHN_H
#define QEMU_XEN_EVTCHN_H #define QEMU_XEN_EVTCHN_H
#include "hw/sysbus.h"
void xen_evtchn_create(void); void xen_evtchn_create(void);
int xen_evtchn_soft_reset(void); int xen_evtchn_soft_reset(void);
int xen_evtchn_set_callback_param(uint64_t param); int xen_evtchn_set_callback_param(uint64_t param);
void xen_evtchn_connect_gsis(qemu_irq *system_gsis);
void xen_evtchn_set_callback_level(int level);
struct evtchn_status; struct evtchn_status;
struct evtchn_close; struct evtchn_close;

View File

@ -1310,6 +1310,12 @@ void pc_basic_device_init(struct PCMachineState *pcms,
} }
*rtc_state = ISA_DEVICE(mc146818_rtc_init(isa_bus, 2000, rtc_irq)); *rtc_state = ISA_DEVICE(mc146818_rtc_init(isa_bus, 2000, rtc_irq));
#ifdef CONFIG_XEN_EMU
if (xen_mode == XEN_EMULATE) {
xen_evtchn_connect_gsis(gsi);
}
#endif
qemu_register_boot_set(pc_boot_set, *rtc_state); qemu_register_boot_set(pc_boot_set, *rtc_state);
if (!xen_enabled() && if (!xen_enabled() &&

View File

@ -23,6 +23,7 @@ int kvm_xen_soft_reset(void);
uint32_t kvm_xen_get_caps(void); uint32_t kvm_xen_get_caps(void);
void *kvm_xen_get_vcpu_info_hva(uint32_t vcpu_id); void *kvm_xen_get_vcpu_info_hva(uint32_t vcpu_id);
void kvm_xen_inject_vcpu_callback_vector(uint32_t vcpu_id, int type); void kvm_xen_inject_vcpu_callback_vector(uint32_t vcpu_id, int type);
void kvm_xen_set_callback_asserted(void);
int kvm_xen_set_vcpu_virq(uint32_t vcpu_id, uint16_t virq, uint16_t port); int kvm_xen_set_vcpu_virq(uint32_t vcpu_id, uint16_t virq, uint16_t port);
#define kvm_xen_has_cap(cap) (!!(kvm_xen_get_caps() & \ #define kvm_xen_has_cap(cap) (!!(kvm_xen_get_caps() & \

View File

@ -1808,6 +1808,7 @@ typedef struct CPUArchState {
uint64_t xen_vcpu_time_info_gpa; uint64_t xen_vcpu_time_info_gpa;
uint64_t xen_vcpu_runstate_gpa; uint64_t xen_vcpu_runstate_gpa;
uint8_t xen_vcpu_callback_vector; uint8_t xen_vcpu_callback_vector;
bool xen_callback_asserted;
uint16_t xen_virq[XEN_NR_VIRQS]; uint16_t xen_virq[XEN_NR_VIRQS];
uint64_t xen_singleshot_timer_ns; uint64_t xen_singleshot_timer_ns;
#endif #endif

View File

@ -4990,6 +4990,17 @@ MemTxAttrs kvm_arch_post_run(CPUState *cpu, struct kvm_run *run)
kvm_rate_limit_on_bus_lock(); kvm_rate_limit_on_bus_lock();
} }
/*
* If the callback is asserted as a GSI (or PCI INTx) then check if
* vcpu_info->evtchn_upcall_pending has been cleared, and deassert
* the callback IRQ if so. Ideally we could hook into the PIC/IOAPIC
* EOI and only resample then, exactly how the VFIO eventfd pairs
* are designed to work for level triggered interrupts.
*/
if (x86_cpu->env.xen_callback_asserted) {
kvm_xen_maybe_deassert_callback(cpu);
}
/* We need to protect the apic state against concurrent accesses from /* We need to protect the apic state against concurrent accesses from
* different threads in case the userspace irqchip is used. */ * different threads in case the userspace irqchip is used. */
if (!kvm_irqchip_in_kernel()) { if (!kvm_irqchip_in_kernel()) {

View File

@ -320,6 +320,39 @@ void *kvm_xen_get_vcpu_info_hva(uint32_t vcpu_id)
return X86_CPU(cs)->env.xen_vcpu_info_hva; return X86_CPU(cs)->env.xen_vcpu_info_hva;
} }
void kvm_xen_maybe_deassert_callback(CPUState *cs)
{
CPUX86State *env = &X86_CPU(cs)->env;
struct vcpu_info *vi = env->xen_vcpu_info_hva;
if (!vi) {
return;
}
/* If the evtchn_upcall_pending flag is cleared, turn the GSI off. */
if (!vi->evtchn_upcall_pending) {
qemu_mutex_lock_iothread();
/*
* Check again now we have the lock, because it may have been
* asserted in the interim. And we don't want to take the lock
* every time because this is a fast path.
*/
if (!vi->evtchn_upcall_pending) {
X86_CPU(cs)->env.xen_callback_asserted = false;
xen_evtchn_set_callback_level(0);
}
qemu_mutex_unlock_iothread();
}
}
void kvm_xen_set_callback_asserted(void)
{
CPUState *cs = qemu_get_cpu(0);
if (cs) {
X86_CPU(cs)->env.xen_callback_asserted = true;
}
}
void kvm_xen_inject_vcpu_callback_vector(uint32_t vcpu_id, int type) void kvm_xen_inject_vcpu_callback_vector(uint32_t vcpu_id, int type)
{ {
CPUState *cs = qemu_get_cpu(vcpu_id); CPUState *cs = qemu_get_cpu(vcpu_id);
@ -352,6 +385,13 @@ void kvm_xen_inject_vcpu_callback_vector(uint32_t vcpu_id, int type)
*/ */
qemu_cpu_kick(cs); qemu_cpu_kick(cs);
break; break;
case HVM_PARAM_CALLBACK_TYPE_GSI:
case HVM_PARAM_CALLBACK_TYPE_PCI_INTX:
if (vcpu_id == 0) {
xen_evtchn_set_callback_level(1);
}
break;
} }
} }

View File

@ -28,5 +28,6 @@ int kvm_xen_init_vcpu(CPUState *cs);
int kvm_xen_handle_exit(X86CPU *cpu, struct kvm_xen_exit *exit); int kvm_xen_handle_exit(X86CPU *cpu, struct kvm_xen_exit *exit);
int kvm_put_xen_state(CPUState *cs); int kvm_put_xen_state(CPUState *cs);
int kvm_get_xen_state(CPUState *cs); int kvm_get_xen_state(CPUState *cs);
void kvm_xen_maybe_deassert_callback(CPUState *cs);
#endif /* QEMU_I386_KVM_XEN_EMU_H */ #endif /* QEMU_I386_KVM_XEN_EMU_H */