b31911c616
The sPAPR machine has four different IRQ backends, each implementing the XICS or XIVE interrupt mode or both in the case of the 'dual' backend. If a machine is started in P8 compat mode, QEMU should necessarily support the XICS interrupt mode and in that case, the XIVE-only IRQ backend is invalid. Currently, spapr_irq_check() tests the pointer value to the IRQ backend to check for this condition, instead use the 'xics' flag. It's equivalent and it will ease the introduction of new XIVE-only IRQ backends if needed. Signed-off-by: Cédric Le Goater <clg@kaod.org> Message-Id: <20200820140106.2357228-1-clg@kaod.org> Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
600 lines
16 KiB
C
600 lines
16 KiB
C
/*
|
|
* QEMU PowerPC sPAPR IRQ interface
|
|
*
|
|
* Copyright (c) 2018, IBM Corporation.
|
|
*
|
|
* This code is licensed under the GPL version 2 or later. See the
|
|
* COPYING file in the top-level directory.
|
|
*/
|
|
|
|
#include "qemu/osdep.h"
|
|
#include "qemu/log.h"
|
|
#include "qemu/error-report.h"
|
|
#include "qapi/error.h"
|
|
#include "hw/irq.h"
|
|
#include "hw/ppc/spapr.h"
|
|
#include "hw/ppc/spapr_cpu_core.h"
|
|
#include "hw/ppc/spapr_xive.h"
|
|
#include "hw/ppc/xics.h"
|
|
#include "hw/ppc/xics_spapr.h"
|
|
#include "hw/qdev-properties.h"
|
|
#include "cpu-models.h"
|
|
#include "sysemu/kvm.h"
|
|
|
|
#include "trace.h"
|
|
|
|
static const TypeInfo spapr_intc_info = {
|
|
.name = TYPE_SPAPR_INTC,
|
|
.parent = TYPE_INTERFACE,
|
|
.class_size = sizeof(SpaprInterruptControllerClass),
|
|
};
|
|
|
|
static void spapr_irq_msi_init(SpaprMachineState *spapr)
|
|
{
|
|
if (SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) {
|
|
/* Legacy mode doesn't use this allocator */
|
|
return;
|
|
}
|
|
|
|
spapr->irq_map_nr = spapr_irq_nr_msis(spapr);
|
|
spapr->irq_map = bitmap_new(spapr->irq_map_nr);
|
|
}
|
|
|
|
int spapr_irq_msi_alloc(SpaprMachineState *spapr, uint32_t num, bool align,
|
|
Error **errp)
|
|
{
|
|
int irq;
|
|
|
|
/*
|
|
* The 'align_mask' parameter of bitmap_find_next_zero_area()
|
|
* should be one less than a power of 2; 0 means no
|
|
* alignment. Adapt the 'align' value of the former allocator
|
|
* to fit the requirements of bitmap_find_next_zero_area()
|
|
*/
|
|
align -= 1;
|
|
|
|
irq = bitmap_find_next_zero_area(spapr->irq_map, spapr->irq_map_nr, 0, num,
|
|
align);
|
|
if (irq == spapr->irq_map_nr) {
|
|
error_setg(errp, "can't find a free %d-IRQ block", num);
|
|
return -1;
|
|
}
|
|
|
|
bitmap_set(spapr->irq_map, irq, num);
|
|
|
|
return irq + SPAPR_IRQ_MSI;
|
|
}
|
|
|
|
void spapr_irq_msi_free(SpaprMachineState *spapr, int irq, uint32_t num)
|
|
{
|
|
bitmap_clear(spapr->irq_map, irq - SPAPR_IRQ_MSI, num);
|
|
}
|
|
|
|
int spapr_irq_init_kvm(SpaprInterruptControllerInitKvm fn,
|
|
SpaprInterruptController *intc,
|
|
uint32_t nr_servers,
|
|
Error **errp)
|
|
{
|
|
Error *local_err = NULL;
|
|
|
|
if (kvm_enabled() && kvm_kernel_irqchip_allowed()) {
|
|
if (fn(intc, nr_servers, &local_err) < 0) {
|
|
if (kvm_kernel_irqchip_required()) {
|
|
error_prepend(&local_err,
|
|
"kernel_irqchip requested but unavailable: ");
|
|
error_propagate(errp, local_err);
|
|
return -1;
|
|
}
|
|
|
|
/*
|
|
* We failed to initialize the KVM device, fallback to
|
|
* emulated mode
|
|
*/
|
|
error_prepend(&local_err,
|
|
"kernel_irqchip allowed but unavailable: ");
|
|
error_append_hint(&local_err,
|
|
"Falling back to kernel-irqchip=off\n");
|
|
warn_report_err(local_err);
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* XICS IRQ backend.
|
|
*/
|
|
|
|
SpaprIrq spapr_irq_xics = {
|
|
.xics = true,
|
|
.xive = false,
|
|
};
|
|
|
|
/*
|
|
* XIVE IRQ backend.
|
|
*/
|
|
|
|
SpaprIrq spapr_irq_xive = {
|
|
.xics = false,
|
|
.xive = true,
|
|
};
|
|
|
|
/*
|
|
* Dual XIVE and XICS IRQ backend.
|
|
*
|
|
* Both interrupt mode, XIVE and XICS, objects are created but the
|
|
* machine starts in legacy interrupt mode (XICS). It can be changed
|
|
* by the CAS negotiation process and, in that case, the new mode is
|
|
* activated after an extra machine reset.
|
|
*/
|
|
|
|
/*
|
|
* Define values in sync with the XIVE and XICS backend
|
|
*/
|
|
SpaprIrq spapr_irq_dual = {
|
|
.xics = true,
|
|
.xive = true,
|
|
};
|
|
|
|
|
|
static int spapr_irq_check(SpaprMachineState *spapr, Error **errp)
|
|
{
|
|
ERRP_GUARD();
|
|
MachineState *machine = MACHINE(spapr);
|
|
|
|
/*
|
|
* Sanity checks on non-P9 machines. On these, XIVE is not
|
|
* advertised, see spapr_dt_ov5_platform_support()
|
|
*/
|
|
if (!ppc_type_check_compat(machine->cpu_type, CPU_POWERPC_LOGICAL_3_00,
|
|
0, spapr->max_compat_pvr)) {
|
|
/*
|
|
* If the 'dual' interrupt mode is selected, force XICS as CAS
|
|
* negotiation is useless.
|
|
*/
|
|
if (spapr->irq == &spapr_irq_dual) {
|
|
spapr->irq = &spapr_irq_xics;
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Non-P9 machines using only XIVE is a bogus setup. We have two
|
|
* scenarios to take into account because of the compat mode:
|
|
*
|
|
* 1. POWER7/8 machines should fail to init later on when creating
|
|
* the XIVE interrupt presenters because a POWER9 exception
|
|
* model is required.
|
|
|
|
* 2. POWER9 machines using the POWER8 compat mode won't fail and
|
|
* will let the OS boot with a partial XIVE setup : DT
|
|
* properties but no hcalls.
|
|
*
|
|
* To cover both and not confuse the OS, add an early failure in
|
|
* QEMU.
|
|
*/
|
|
if (!spapr->irq->xics) {
|
|
error_setg(errp, "XIVE-only machines require a POWER9 CPU");
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* On a POWER9 host, some older KVM XICS devices cannot be destroyed and
|
|
* re-created. Same happens with KVM nested guests. Detect that early to
|
|
* avoid QEMU to exit later when the guest reboots.
|
|
*/
|
|
if (kvm_enabled() &&
|
|
spapr->irq == &spapr_irq_dual &&
|
|
kvm_kernel_irqchip_required() &&
|
|
xics_kvm_has_broken_disconnect(spapr)) {
|
|
error_setg(errp,
|
|
"KVM is incompatible with ic-mode=dual,kernel-irqchip=on");
|
|
error_append_hint(errp,
|
|
"This can happen with an old KVM or in a KVM nested guest.\n");
|
|
error_append_hint(errp,
|
|
"Try without kernel-irqchip or with kernel-irqchip=off.\n");
|
|
return -1;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* sPAPR IRQ frontend routines for devices
|
|
*/
|
|
#define ALL_INTCS(spapr_) \
|
|
{ SPAPR_INTC((spapr_)->ics), SPAPR_INTC((spapr_)->xive), }
|
|
|
|
int spapr_irq_cpu_intc_create(SpaprMachineState *spapr,
|
|
PowerPCCPU *cpu, Error **errp)
|
|
{
|
|
SpaprInterruptController *intcs[] = ALL_INTCS(spapr);
|
|
int i;
|
|
int rc;
|
|
|
|
for (i = 0; i < ARRAY_SIZE(intcs); i++) {
|
|
SpaprInterruptController *intc = intcs[i];
|
|
if (intc) {
|
|
SpaprInterruptControllerClass *sicc = SPAPR_INTC_GET_CLASS(intc);
|
|
rc = sicc->cpu_intc_create(intc, cpu, errp);
|
|
if (rc < 0) {
|
|
return rc;
|
|
}
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
void spapr_irq_cpu_intc_reset(SpaprMachineState *spapr, PowerPCCPU *cpu)
|
|
{
|
|
SpaprInterruptController *intcs[] = ALL_INTCS(spapr);
|
|
int i;
|
|
|
|
for (i = 0; i < ARRAY_SIZE(intcs); i++) {
|
|
SpaprInterruptController *intc = intcs[i];
|
|
if (intc) {
|
|
SpaprInterruptControllerClass *sicc = SPAPR_INTC_GET_CLASS(intc);
|
|
sicc->cpu_intc_reset(intc, cpu);
|
|
}
|
|
}
|
|
}
|
|
|
|
void spapr_irq_cpu_intc_destroy(SpaprMachineState *spapr, PowerPCCPU *cpu)
|
|
{
|
|
SpaprInterruptController *intcs[] = ALL_INTCS(spapr);
|
|
int i;
|
|
|
|
for (i = 0; i < ARRAY_SIZE(intcs); i++) {
|
|
SpaprInterruptController *intc = intcs[i];
|
|
if (intc) {
|
|
SpaprInterruptControllerClass *sicc = SPAPR_INTC_GET_CLASS(intc);
|
|
sicc->cpu_intc_destroy(intc, cpu);
|
|
}
|
|
}
|
|
}
|
|
|
|
static void spapr_set_irq(void *opaque, int irq, int level)
|
|
{
|
|
SpaprMachineState *spapr = SPAPR_MACHINE(opaque);
|
|
SpaprInterruptControllerClass *sicc
|
|
= SPAPR_INTC_GET_CLASS(spapr->active_intc);
|
|
|
|
sicc->set_irq(spapr->active_intc, irq, level);
|
|
}
|
|
|
|
void spapr_irq_print_info(SpaprMachineState *spapr, Monitor *mon)
|
|
{
|
|
SpaprInterruptControllerClass *sicc
|
|
= SPAPR_INTC_GET_CLASS(spapr->active_intc);
|
|
|
|
sicc->print_info(spapr->active_intc, mon);
|
|
}
|
|
|
|
void spapr_irq_dt(SpaprMachineState *spapr, uint32_t nr_servers,
|
|
void *fdt, uint32_t phandle)
|
|
{
|
|
SpaprInterruptControllerClass *sicc
|
|
= SPAPR_INTC_GET_CLASS(spapr->active_intc);
|
|
|
|
sicc->dt(spapr->active_intc, nr_servers, fdt, phandle);
|
|
}
|
|
|
|
uint32_t spapr_irq_nr_msis(SpaprMachineState *spapr)
|
|
{
|
|
SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
|
|
|
|
if (smc->legacy_irq_allocation) {
|
|
return smc->nr_xirqs;
|
|
} else {
|
|
return SPAPR_XIRQ_BASE + smc->nr_xirqs - SPAPR_IRQ_MSI;
|
|
}
|
|
}
|
|
|
|
void spapr_irq_init(SpaprMachineState *spapr, Error **errp)
|
|
{
|
|
SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
|
|
|
|
if (kvm_enabled() && kvm_kernel_irqchip_split()) {
|
|
error_setg(errp, "kernel_irqchip split mode not supported on pseries");
|
|
return;
|
|
}
|
|
|
|
if (spapr_irq_check(spapr, errp) < 0) {
|
|
return;
|
|
}
|
|
|
|
/* Initialize the MSI IRQ allocator. */
|
|
spapr_irq_msi_init(spapr);
|
|
|
|
if (spapr->irq->xics) {
|
|
Object *obj;
|
|
|
|
obj = object_new(TYPE_ICS_SPAPR);
|
|
|
|
object_property_add_child(OBJECT(spapr), "ics", obj);
|
|
object_property_set_link(obj, ICS_PROP_XICS, OBJECT(spapr),
|
|
&error_abort);
|
|
object_property_set_int(obj, "nr-irqs", smc->nr_xirqs, &error_abort);
|
|
if (!qdev_realize(DEVICE(obj), NULL, errp)) {
|
|
return;
|
|
}
|
|
|
|
spapr->ics = ICS_SPAPR(obj);
|
|
}
|
|
|
|
if (spapr->irq->xive) {
|
|
uint32_t nr_servers = spapr_max_server_number(spapr);
|
|
DeviceState *dev;
|
|
int i;
|
|
|
|
dev = qdev_new(TYPE_SPAPR_XIVE);
|
|
qdev_prop_set_uint32(dev, "nr-irqs", smc->nr_xirqs + SPAPR_XIRQ_BASE);
|
|
/*
|
|
* 8 XIVE END structures per CPU. One for each available
|
|
* priority
|
|
*/
|
|
qdev_prop_set_uint32(dev, "nr-ends", nr_servers << 3);
|
|
object_property_set_link(OBJECT(dev), "xive-fabric", OBJECT(spapr),
|
|
&error_abort);
|
|
sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
|
|
|
|
spapr->xive = SPAPR_XIVE(dev);
|
|
|
|
/* Enable the CPU IPIs */
|
|
for (i = 0; i < nr_servers; ++i) {
|
|
SpaprInterruptControllerClass *sicc
|
|
= SPAPR_INTC_GET_CLASS(spapr->xive);
|
|
|
|
if (sicc->claim_irq(SPAPR_INTC(spapr->xive), SPAPR_IRQ_IPI + i,
|
|
false, errp) < 0) {
|
|
return;
|
|
}
|
|
}
|
|
|
|
spapr_xive_hcall_init(spapr);
|
|
}
|
|
|
|
spapr->qirqs = qemu_allocate_irqs(spapr_set_irq, spapr,
|
|
smc->nr_xirqs + SPAPR_XIRQ_BASE);
|
|
|
|
/*
|
|
* Mostly we don't actually need this until reset, except that not
|
|
* having this set up can cause VFIO devices to issue a
|
|
* false-positive warning during realize(), because they don't yet
|
|
* have an in-kernel irq chip.
|
|
*/
|
|
spapr_irq_update_active_intc(spapr);
|
|
}
|
|
|
|
int spapr_irq_claim(SpaprMachineState *spapr, int irq, bool lsi, Error **errp)
|
|
{
|
|
SpaprInterruptController *intcs[] = ALL_INTCS(spapr);
|
|
int i;
|
|
SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
|
|
int rc;
|
|
|
|
assert(irq >= SPAPR_XIRQ_BASE);
|
|
assert(irq < (smc->nr_xirqs + SPAPR_XIRQ_BASE));
|
|
|
|
for (i = 0; i < ARRAY_SIZE(intcs); i++) {
|
|
SpaprInterruptController *intc = intcs[i];
|
|
if (intc) {
|
|
SpaprInterruptControllerClass *sicc = SPAPR_INTC_GET_CLASS(intc);
|
|
rc = sicc->claim_irq(intc, irq, lsi, errp);
|
|
if (rc < 0) {
|
|
return rc;
|
|
}
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
void spapr_irq_free(SpaprMachineState *spapr, int irq, int num)
|
|
{
|
|
SpaprInterruptController *intcs[] = ALL_INTCS(spapr);
|
|
int i, j;
|
|
SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
|
|
|
|
assert(irq >= SPAPR_XIRQ_BASE);
|
|
assert((irq + num) <= (smc->nr_xirqs + SPAPR_XIRQ_BASE));
|
|
|
|
for (i = irq; i < (irq + num); i++) {
|
|
for (j = 0; j < ARRAY_SIZE(intcs); j++) {
|
|
SpaprInterruptController *intc = intcs[j];
|
|
|
|
if (intc) {
|
|
SpaprInterruptControllerClass *sicc
|
|
= SPAPR_INTC_GET_CLASS(intc);
|
|
sicc->free_irq(intc, i);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
qemu_irq spapr_qirq(SpaprMachineState *spapr, int irq)
|
|
{
|
|
SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
|
|
|
|
/*
|
|
* This interface is basically for VIO and PHB devices to find the
|
|
* right qemu_irq to manipulate, so we only allow access to the
|
|
* external irqs for now. Currently anything which needs to
|
|
* access the IPIs most naturally gets there via the guest side
|
|
* interfaces, we can change this if we need to in future.
|
|
*/
|
|
assert(irq >= SPAPR_XIRQ_BASE);
|
|
assert(irq < (smc->nr_xirqs + SPAPR_XIRQ_BASE));
|
|
|
|
if (spapr->ics) {
|
|
assert(ics_valid_irq(spapr->ics, irq));
|
|
}
|
|
if (spapr->xive) {
|
|
assert(irq < spapr->xive->nr_irqs);
|
|
assert(xive_eas_is_valid(&spapr->xive->eat[irq]));
|
|
}
|
|
|
|
return spapr->qirqs[irq];
|
|
}
|
|
|
|
int spapr_irq_post_load(SpaprMachineState *spapr, int version_id)
|
|
{
|
|
SpaprInterruptControllerClass *sicc;
|
|
|
|
spapr_irq_update_active_intc(spapr);
|
|
sicc = SPAPR_INTC_GET_CLASS(spapr->active_intc);
|
|
return sicc->post_load(spapr->active_intc, version_id);
|
|
}
|
|
|
|
void spapr_irq_reset(SpaprMachineState *spapr, Error **errp)
|
|
{
|
|
assert(!spapr->irq_map || bitmap_empty(spapr->irq_map, spapr->irq_map_nr));
|
|
|
|
spapr_irq_update_active_intc(spapr);
|
|
}
|
|
|
|
int spapr_irq_get_phandle(SpaprMachineState *spapr, void *fdt, Error **errp)
|
|
{
|
|
const char *nodename = "interrupt-controller";
|
|
int offset, phandle;
|
|
|
|
offset = fdt_subnode_offset(fdt, 0, nodename);
|
|
if (offset < 0) {
|
|
error_setg(errp, "Can't find node \"%s\": %s",
|
|
nodename, fdt_strerror(offset));
|
|
return -1;
|
|
}
|
|
|
|
phandle = fdt_get_phandle(fdt, offset);
|
|
if (!phandle) {
|
|
error_setg(errp, "Can't get phandle of node \"%s\"", nodename);
|
|
return -1;
|
|
}
|
|
|
|
return phandle;
|
|
}
|
|
|
|
static void set_active_intc(SpaprMachineState *spapr,
|
|
SpaprInterruptController *new_intc)
|
|
{
|
|
SpaprInterruptControllerClass *sicc;
|
|
uint32_t nr_servers = spapr_max_server_number(spapr);
|
|
|
|
assert(new_intc);
|
|
|
|
if (new_intc == spapr->active_intc) {
|
|
/* Nothing to do */
|
|
return;
|
|
}
|
|
|
|
if (spapr->active_intc) {
|
|
sicc = SPAPR_INTC_GET_CLASS(spapr->active_intc);
|
|
if (sicc->deactivate) {
|
|
sicc->deactivate(spapr->active_intc);
|
|
}
|
|
}
|
|
|
|
sicc = SPAPR_INTC_GET_CLASS(new_intc);
|
|
if (sicc->activate) {
|
|
sicc->activate(new_intc, nr_servers, &error_fatal);
|
|
}
|
|
|
|
spapr->active_intc = new_intc;
|
|
|
|
/*
|
|
* We've changed the kernel irqchip, let VFIO devices know they
|
|
* need to readjust.
|
|
*/
|
|
kvm_irqchip_change_notify();
|
|
}
|
|
|
|
void spapr_irq_update_active_intc(SpaprMachineState *spapr)
|
|
{
|
|
SpaprInterruptController *new_intc;
|
|
|
|
if (!spapr->ics) {
|
|
/*
|
|
* XXX before we run CAS, ov5_cas is initialized empty, which
|
|
* indicates XICS, even if we have ic-mode=xive. TODO: clean
|
|
* up the CAS path so that we have a clearer way of handling
|
|
* this.
|
|
*/
|
|
new_intc = SPAPR_INTC(spapr->xive);
|
|
} else if (spapr->ov5_cas
|
|
&& spapr_ovec_test(spapr->ov5_cas, OV5_XIVE_EXPLOIT)) {
|
|
new_intc = SPAPR_INTC(spapr->xive);
|
|
} else {
|
|
new_intc = SPAPR_INTC(spapr->ics);
|
|
}
|
|
|
|
set_active_intc(spapr, new_intc);
|
|
}
|
|
|
|
/*
|
|
* XICS legacy routines - to deprecate one day
|
|
*/
|
|
|
|
static int ics_find_free_block(ICSState *ics, int num, int alignnum)
|
|
{
|
|
int first, i;
|
|
|
|
for (first = 0; first < ics->nr_irqs; first += alignnum) {
|
|
if (num > (ics->nr_irqs - first)) {
|
|
return -1;
|
|
}
|
|
for (i = first; i < first + num; ++i) {
|
|
if (!ics_irq_free(ics, i)) {
|
|
break;
|
|
}
|
|
}
|
|
if (i == (first + num)) {
|
|
return first;
|
|
}
|
|
}
|
|
|
|
return -1;
|
|
}
|
|
|
|
int spapr_irq_find(SpaprMachineState *spapr, int num, bool align, Error **errp)
|
|
{
|
|
ICSState *ics = spapr->ics;
|
|
int first = -1;
|
|
|
|
assert(ics);
|
|
|
|
/*
|
|
* MSIMesage::data is used for storing VIRQ so
|
|
* it has to be aligned to num to support multiple
|
|
* MSI vectors. MSI-X is not affected by this.
|
|
* The hint is used for the first IRQ, the rest should
|
|
* be allocated continuously.
|
|
*/
|
|
if (align) {
|
|
assert((num == 1) || (num == 2) || (num == 4) ||
|
|
(num == 8) || (num == 16) || (num == 32));
|
|
first = ics_find_free_block(ics, num, num);
|
|
} else {
|
|
first = ics_find_free_block(ics, num, 1);
|
|
}
|
|
|
|
if (first < 0) {
|
|
error_setg(errp, "can't find a free %d-IRQ block", num);
|
|
return -1;
|
|
}
|
|
|
|
return first + ics->offset;
|
|
}
|
|
|
|
SpaprIrq spapr_irq_xics_legacy = {
|
|
.xics = true,
|
|
.xive = false,
|
|
};
|
|
|
|
static void spapr_irq_register_types(void)
|
|
{
|
|
type_register_static(&spapr_intc_info);
|
|
}
|
|
|
|
type_init(spapr_irq_register_types)
|