qemu/hw/s390x/s390-pci-bus.c
Markus Armbruster b69c3c21a5 qdev: Unrealize must not fail
Devices may have component devices and buses.

Device realization may fail.  Realization is recursive: a device's
realize() method realizes its components, and device_set_realized()
realizes its buses (which should in turn realize the devices on that
bus, except bus_set_realized() doesn't implement that, yet).

When realization of a component or bus fails, we need to roll back:
unrealize everything we realized so far.  If any of these unrealizes
failed, the device would be left in an inconsistent state.  Must not
happen.

device_set_realized() lets it happen: it ignores errors in the roll
back code starting at label child_realize_fail.

Since realization is recursive, unrealization must be recursive, too.
But how could a partly failed unrealize be rolled back?  We'd have to
re-realize, which can fail.  This design is fundamentally broken.

device_set_realized() does not roll back at all.  Instead, it keeps
unrealizing, ignoring further errors.

It can screw up even for a device with no buses: if the lone
dc->unrealize() fails, it still unregisters vmstate, and calls
listeners' unrealize() callback.

bus_set_realized() does not roll back either.  Instead, it stops
unrealizing.

Fortunately, no unrealize method can fail, as we'll see below.

To fix the design error, drop parameter @errp from all the unrealize
methods.

Any unrealize method that uses @errp now needs an update.  This leads
us to unrealize() methods that can fail.  Merely passing it to another
unrealize method cannot cause failure, though.  Here are the ones that
do other things with @errp:

* virtio_serial_device_unrealize()

  Fails when qbus_set_hotplug_handler() fails, but still does all the
  other work.  On failure, the device would stay realized with its
  resources completely gone.  Oops.  Can't happen, because
  qbus_set_hotplug_handler() can't actually fail here.  Pass
  &error_abort to qbus_set_hotplug_handler() instead.

* hw/ppc/spapr_drc.c's unrealize()

  Fails when object_property_del() fails, but all the other work is
  already done.  On failure, the device would stay realized with its
  vmstate registration gone.  Oops.  Can't happen, because
  object_property_del() can't actually fail here.  Pass &error_abort
  to object_property_del() instead.

* spapr_phb_unrealize()

  Fails and bails out when remove_drcs() fails, but other work is
  already done.  On failure, the device would stay realized with some
  of its resources gone.  Oops.  remove_drcs() fails only when
  chassis_from_bus()'s object_property_get_uint() fails, and it can't
  here.  Pass &error_abort to remove_drcs() instead.

Therefore, no unrealize method can fail before this patch.

device_set_realized()'s recursive unrealization via bus uses
object_property_set_bool().  Can't drop @errp there, so pass
&error_abort.

We similarly unrealize with object_property_set_bool() elsewhere,
always ignoring errors.  Pass &error_abort instead.

Several unrealize methods no longer handle errors from other unrealize
methods: virtio_9p_device_unrealize(),
virtio_input_device_unrealize(), scsi_qdev_unrealize(), ...
Much of the deleted error handling looks wrong anyway.

One unrealize methods no longer ignore such errors:
usb_ehci_pci_exit().

Several realize methods no longer ignore errors when rolling back:
v9fs_device_realize_common(), pci_qdev_unrealize(),
spapr_phb_realize(), usb_qdev_realize(), vfio_ccw_realize(),
virtio_device_realize().

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20200505152926.18877-17-armbru@redhat.com>
2020-05-15 07:08:14 +02:00

1358 lines
38 KiB
C

/*
* s390 PCI BUS
*
* Copyright 2014 IBM Corp.
* Author(s): Frank Blaschka <frank.blaschka@de.ibm.com>
* Hong Bo Li <lihbbj@cn.ibm.com>
* Yi Min Zhao <zyimin@cn.ibm.com>
*
* This work is licensed under the terms of the GNU GPL, version 2 or (at
* your option) any later version. See the COPYING file in the top-level
* directory.
*/
#include "qemu/osdep.h"
#include "qapi/error.h"
#include "qapi/visitor.h"
#include "cpu.h"
#include "s390-pci-bus.h"
#include "s390-pci-inst.h"
#include "hw/pci/pci_bus.h"
#include "hw/qdev-properties.h"
#include "hw/pci/pci_bridge.h"
#include "hw/pci/msi.h"
#include "qemu/error-report.h"
#include "qemu/module.h"
#ifndef DEBUG_S390PCI_BUS
#define DEBUG_S390PCI_BUS 0
#endif
#define DPRINTF(fmt, ...) \
do { \
if (DEBUG_S390PCI_BUS) { \
fprintf(stderr, "S390pci-bus: " fmt, ## __VA_ARGS__); \
} \
} while (0)
S390pciState *s390_get_phb(void)
{
static S390pciState *phb;
if (!phb) {
phb = S390_PCI_HOST_BRIDGE(
object_resolve_path(TYPE_S390_PCI_HOST_BRIDGE, NULL));
assert(phb != NULL);
}
return phb;
}
int pci_chsc_sei_nt2_get_event(void *res)
{
ChscSeiNt2Res *nt2_res = (ChscSeiNt2Res *)res;
PciCcdfAvail *accdf;
PciCcdfErr *eccdf;
int rc = 1;
SeiContainer *sei_cont;
S390pciState *s = s390_get_phb();
sei_cont = QTAILQ_FIRST(&s->pending_sei);
if (sei_cont) {
QTAILQ_REMOVE(&s->pending_sei, sei_cont, link);
nt2_res->nt = 2;
nt2_res->cc = sei_cont->cc;
nt2_res->length = cpu_to_be16(sizeof(ChscSeiNt2Res));
switch (sei_cont->cc) {
case 1: /* error event */
eccdf = (PciCcdfErr *)nt2_res->ccdf;
eccdf->fid = cpu_to_be32(sei_cont->fid);
eccdf->fh = cpu_to_be32(sei_cont->fh);
eccdf->e = cpu_to_be32(sei_cont->e);
eccdf->faddr = cpu_to_be64(sei_cont->faddr);
eccdf->pec = cpu_to_be16(sei_cont->pec);
break;
case 2: /* availability event */
accdf = (PciCcdfAvail *)nt2_res->ccdf;
accdf->fid = cpu_to_be32(sei_cont->fid);
accdf->fh = cpu_to_be32(sei_cont->fh);
accdf->pec = cpu_to_be16(sei_cont->pec);
break;
default:
abort();
}
g_free(sei_cont);
rc = 0;
}
return rc;
}
int pci_chsc_sei_nt2_have_event(void)
{
S390pciState *s = s390_get_phb();
return !QTAILQ_EMPTY(&s->pending_sei);
}
S390PCIBusDevice *s390_pci_find_next_avail_dev(S390pciState *s,
S390PCIBusDevice *pbdev)
{
S390PCIBusDevice *ret = pbdev ? QTAILQ_NEXT(pbdev, link) :
QTAILQ_FIRST(&s->zpci_devs);
while (ret && ret->state == ZPCI_FS_RESERVED) {
ret = QTAILQ_NEXT(ret, link);
}
return ret;
}
S390PCIBusDevice *s390_pci_find_dev_by_fid(S390pciState *s, uint32_t fid)
{
S390PCIBusDevice *pbdev;
QTAILQ_FOREACH(pbdev, &s->zpci_devs, link) {
if (pbdev->fid == fid) {
return pbdev;
}
}
return NULL;
}
void s390_pci_sclp_configure(SCCB *sccb)
{
IoaCfgSccb *psccb = (IoaCfgSccb *)sccb;
S390PCIBusDevice *pbdev = s390_pci_find_dev_by_fid(s390_get_phb(),
be32_to_cpu(psccb->aid));
uint16_t rc;
if (!pbdev) {
DPRINTF("sclp config no dev found\n");
rc = SCLP_RC_ADAPTER_ID_NOT_RECOGNIZED;
goto out;
}
switch (pbdev->state) {
case ZPCI_FS_RESERVED:
rc = SCLP_RC_ADAPTER_IN_RESERVED_STATE;
break;
case ZPCI_FS_STANDBY:
pbdev->state = ZPCI_FS_DISABLED;
rc = SCLP_RC_NORMAL_COMPLETION;
break;
default:
rc = SCLP_RC_NO_ACTION_REQUIRED;
}
out:
psccb->header.response_code = cpu_to_be16(rc);
}
static void s390_pci_perform_unplug(S390PCIBusDevice *pbdev)
{
HotplugHandler *hotplug_ctrl;
/* Unplug the PCI device */
if (pbdev->pdev) {
DeviceState *pdev = DEVICE(pbdev->pdev);
hotplug_ctrl = qdev_get_hotplug_handler(pdev);
hotplug_handler_unplug(hotplug_ctrl, pdev, &error_abort);
object_unparent(OBJECT(pdev));
}
/* Unplug the zPCI device */
hotplug_ctrl = qdev_get_hotplug_handler(DEVICE(pbdev));
hotplug_handler_unplug(hotplug_ctrl, DEVICE(pbdev), &error_abort);
object_unparent(OBJECT(pbdev));
}
void s390_pci_sclp_deconfigure(SCCB *sccb)
{
IoaCfgSccb *psccb = (IoaCfgSccb *)sccb;
S390PCIBusDevice *pbdev = s390_pci_find_dev_by_fid(s390_get_phb(),
be32_to_cpu(psccb->aid));
uint16_t rc;
if (!pbdev) {
DPRINTF("sclp deconfig no dev found\n");
rc = SCLP_RC_ADAPTER_ID_NOT_RECOGNIZED;
goto out;
}
switch (pbdev->state) {
case ZPCI_FS_RESERVED:
rc = SCLP_RC_ADAPTER_IN_RESERVED_STATE;
break;
case ZPCI_FS_STANDBY:
rc = SCLP_RC_NO_ACTION_REQUIRED;
break;
default:
if (pbdev->summary_ind) {
pci_dereg_irqs(pbdev);
}
if (pbdev->iommu->enabled) {
pci_dereg_ioat(pbdev->iommu);
}
pbdev->state = ZPCI_FS_STANDBY;
rc = SCLP_RC_NORMAL_COMPLETION;
if (pbdev->unplug_requested) {
s390_pci_perform_unplug(pbdev);
}
}
out:
psccb->header.response_code = cpu_to_be16(rc);
}
static S390PCIBusDevice *s390_pci_find_dev_by_uid(S390pciState *s, uint16_t uid)
{
S390PCIBusDevice *pbdev;
QTAILQ_FOREACH(pbdev, &s->zpci_devs, link) {
if (pbdev->uid == uid) {
return pbdev;
}
}
return NULL;
}
S390PCIBusDevice *s390_pci_find_dev_by_target(S390pciState *s,
const char *target)
{
S390PCIBusDevice *pbdev;
if (!target) {
return NULL;
}
QTAILQ_FOREACH(pbdev, &s->zpci_devs, link) {
if (!strcmp(pbdev->target, target)) {
return pbdev;
}
}
return NULL;
}
static S390PCIBusDevice *s390_pci_find_dev_by_pci(S390pciState *s,
PCIDevice *pci_dev)
{
S390PCIBusDevice *pbdev;
if (!pci_dev) {
return NULL;
}
QTAILQ_FOREACH(pbdev, &s->zpci_devs, link) {
if (pbdev->pdev == pci_dev) {
return pbdev;
}
}
return NULL;
}
S390PCIBusDevice *s390_pci_find_dev_by_idx(S390pciState *s, uint32_t idx)
{
return g_hash_table_lookup(s->zpci_table, &idx);
}
S390PCIBusDevice *s390_pci_find_dev_by_fh(S390pciState *s, uint32_t fh)
{
uint32_t idx = FH_MASK_INDEX & fh;
S390PCIBusDevice *pbdev = s390_pci_find_dev_by_idx(s, idx);
if (pbdev && pbdev->fh == fh) {
return pbdev;
}
return NULL;
}
static void s390_pci_generate_event(uint8_t cc, uint16_t pec, uint32_t fh,
uint32_t fid, uint64_t faddr, uint32_t e)
{
SeiContainer *sei_cont;
S390pciState *s = s390_get_phb();
sei_cont = g_new0(SeiContainer, 1);
sei_cont->fh = fh;
sei_cont->fid = fid;
sei_cont->cc = cc;
sei_cont->pec = pec;
sei_cont->faddr = faddr;
sei_cont->e = e;
QTAILQ_INSERT_TAIL(&s->pending_sei, sei_cont, link);
css_generate_css_crws(0);
}
static void s390_pci_generate_plug_event(uint16_t pec, uint32_t fh,
uint32_t fid)
{
s390_pci_generate_event(2, pec, fh, fid, 0, 0);
}
void s390_pci_generate_error_event(uint16_t pec, uint32_t fh, uint32_t fid,
uint64_t faddr, uint32_t e)
{
s390_pci_generate_event(1, pec, fh, fid, faddr, e);
}
static void s390_pci_set_irq(void *opaque, int irq, int level)
{
/* nothing to do */
}
static int s390_pci_map_irq(PCIDevice *pci_dev, int irq_num)
{
/* nothing to do */
return 0;
}
static uint64_t s390_pci_get_table_origin(uint64_t iota)
{
return iota & ~ZPCI_IOTA_RTTO_FLAG;
}
static unsigned int calc_rtx(dma_addr_t ptr)
{
return ((unsigned long) ptr >> ZPCI_RT_SHIFT) & ZPCI_INDEX_MASK;
}
static unsigned int calc_sx(dma_addr_t ptr)
{
return ((unsigned long) ptr >> ZPCI_ST_SHIFT) & ZPCI_INDEX_MASK;
}
static unsigned int calc_px(dma_addr_t ptr)
{
return ((unsigned long) ptr >> PAGE_SHIFT) & ZPCI_PT_MASK;
}
static uint64_t get_rt_sto(uint64_t entry)
{
return ((entry & ZPCI_TABLE_TYPE_MASK) == ZPCI_TABLE_TYPE_RTX)
? (entry & ZPCI_RTE_ADDR_MASK)
: 0;
}
static uint64_t get_st_pto(uint64_t entry)
{
return ((entry & ZPCI_TABLE_TYPE_MASK) == ZPCI_TABLE_TYPE_SX)
? (entry & ZPCI_STE_ADDR_MASK)
: 0;
}
static bool rt_entry_isvalid(uint64_t entry)
{
return (entry & ZPCI_TABLE_VALID_MASK) == ZPCI_TABLE_VALID;
}
static bool pt_entry_isvalid(uint64_t entry)
{
return (entry & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID;
}
static bool entry_isprotected(uint64_t entry)
{
return (entry & ZPCI_TABLE_PROT_MASK) == ZPCI_TABLE_PROTECTED;
}
/* ett is expected table type, -1 page table, 0 segment table, 1 region table */
static uint64_t get_table_index(uint64_t iova, int8_t ett)
{
switch (ett) {
case ZPCI_ETT_PT:
return calc_px(iova);
case ZPCI_ETT_ST:
return calc_sx(iova);
case ZPCI_ETT_RT:
return calc_rtx(iova);
}
return -1;
}
static bool entry_isvalid(uint64_t entry, int8_t ett)
{
switch (ett) {
case ZPCI_ETT_PT:
return pt_entry_isvalid(entry);
case ZPCI_ETT_ST:
case ZPCI_ETT_RT:
return rt_entry_isvalid(entry);
}
return false;
}
/* Return true if address translation is done */
static bool translate_iscomplete(uint64_t entry, int8_t ett)
{
switch (ett) {
case 0:
return (entry & ZPCI_TABLE_FC) ? true : false;
case 1:
return false;
}
return true;
}
static uint64_t get_frame_size(int8_t ett)
{
switch (ett) {
case ZPCI_ETT_PT:
return 1ULL << 12;
case ZPCI_ETT_ST:
return 1ULL << 20;
case ZPCI_ETT_RT:
return 1ULL << 31;
}
return 0;
}
static uint64_t get_next_table_origin(uint64_t entry, int8_t ett)
{
switch (ett) {
case ZPCI_ETT_PT:
return entry & ZPCI_PTE_ADDR_MASK;
case ZPCI_ETT_ST:
return get_st_pto(entry);
case ZPCI_ETT_RT:
return get_rt_sto(entry);
}
return 0;
}
/**
* table_translate: do translation within one table and return the following
* table origin
*
* @entry: the entry being translated, the result is stored in this.
* @to: the address of table origin.
* @ett: expected table type, 1 region table, 0 segment table and -1 page table.
* @error: error code
*/
static uint64_t table_translate(S390IOTLBEntry *entry, uint64_t to, int8_t ett,
uint16_t *error)
{
uint64_t tx, te, nto = 0;
uint16_t err = 0;
tx = get_table_index(entry->iova, ett);
te = address_space_ldq(&address_space_memory, to + tx * sizeof(uint64_t),
MEMTXATTRS_UNSPECIFIED, NULL);
if (!te) {
err = ERR_EVENT_INVALTE;
goto out;
}
if (!entry_isvalid(te, ett)) {
entry->perm &= IOMMU_NONE;
goto out;
}
if (ett == ZPCI_ETT_RT && ((te & ZPCI_TABLE_LEN_RTX) != ZPCI_TABLE_LEN_RTX
|| te & ZPCI_TABLE_OFFSET_MASK)) {
err = ERR_EVENT_INVALTL;
goto out;
}
nto = get_next_table_origin(te, ett);
if (!nto) {
err = ERR_EVENT_TT;
goto out;
}
if (entry_isprotected(te)) {
entry->perm &= IOMMU_RO;
} else {
entry->perm &= IOMMU_RW;
}
if (translate_iscomplete(te, ett)) {
switch (ett) {
case ZPCI_ETT_PT:
entry->translated_addr = te & ZPCI_PTE_ADDR_MASK;
break;
case ZPCI_ETT_ST:
entry->translated_addr = (te & ZPCI_SFAA_MASK) |
(entry->iova & ~ZPCI_SFAA_MASK);
break;
}
nto = 0;
}
out:
if (err) {
entry->perm = IOMMU_NONE;
*error = err;
}
entry->len = get_frame_size(ett);
return nto;
}
uint16_t s390_guest_io_table_walk(uint64_t g_iota, hwaddr addr,
S390IOTLBEntry *entry)
{
uint64_t to = s390_pci_get_table_origin(g_iota);
int8_t ett = 1;
uint16_t error = 0;
entry->iova = addr & PAGE_MASK;
entry->translated_addr = 0;
entry->perm = IOMMU_RW;
if (entry_isprotected(g_iota)) {
entry->perm &= IOMMU_RO;
}
while (to) {
to = table_translate(entry, to, ett--, &error);
}
return error;
}
static IOMMUTLBEntry s390_translate_iommu(IOMMUMemoryRegion *mr, hwaddr addr,
IOMMUAccessFlags flag, int iommu_idx)
{
S390PCIIOMMU *iommu = container_of(mr, S390PCIIOMMU, iommu_mr);
S390IOTLBEntry *entry;
uint64_t iova = addr & PAGE_MASK;
uint16_t error = 0;
IOMMUTLBEntry ret = {
.target_as = &address_space_memory,
.iova = 0,
.translated_addr = 0,
.addr_mask = ~(hwaddr)0,
.perm = IOMMU_NONE,
};
switch (iommu->pbdev->state) {
case ZPCI_FS_ENABLED:
case ZPCI_FS_BLOCKED:
if (!iommu->enabled) {
return ret;
}
break;
default:
return ret;
}
DPRINTF("iommu trans addr 0x%" PRIx64 "\n", addr);
if (addr < iommu->pba || addr > iommu->pal) {
error = ERR_EVENT_OORANGE;
goto err;
}
entry = g_hash_table_lookup(iommu->iotlb, &iova);
if (entry) {
ret.iova = entry->iova;
ret.translated_addr = entry->translated_addr;
ret.addr_mask = entry->len - 1;
ret.perm = entry->perm;
} else {
ret.iova = iova;
ret.addr_mask = ~PAGE_MASK;
ret.perm = IOMMU_NONE;
}
if (flag != IOMMU_NONE && !(flag & ret.perm)) {
error = ERR_EVENT_TPROTE;
}
err:
if (error) {
iommu->pbdev->state = ZPCI_FS_ERROR;
s390_pci_generate_error_event(error, iommu->pbdev->fh,
iommu->pbdev->fid, addr, 0);
}
return ret;
}
static void s390_pci_iommu_replay(IOMMUMemoryRegion *iommu,
IOMMUNotifier *notifier)
{
/* It's impossible to plug a pci device on s390x that already has iommu
* mappings which need to be replayed, that is due to the "one iommu per
* zpci device" construct. But when we support migration of vfio-pci
* devices in future, we need to revisit this.
*/
return;
}
static S390PCIIOMMU *s390_pci_get_iommu(S390pciState *s, PCIBus *bus,
int devfn)
{
uint64_t key = (uintptr_t)bus;
S390PCIIOMMUTable *table = g_hash_table_lookup(s->iommu_table, &key);
S390PCIIOMMU *iommu;
if (!table) {
table = g_new0(S390PCIIOMMUTable, 1);
table->key = key;
g_hash_table_insert(s->iommu_table, &table->key, table);
}
iommu = table->iommu[PCI_SLOT(devfn)];
if (!iommu) {
iommu = S390_PCI_IOMMU(object_new(TYPE_S390_PCI_IOMMU));
char *mr_name = g_strdup_printf("iommu-root-%02x:%02x.%01x",
pci_bus_num(bus),
PCI_SLOT(devfn),
PCI_FUNC(devfn));
char *as_name = g_strdup_printf("iommu-pci-%02x:%02x.%01x",
pci_bus_num(bus),
PCI_SLOT(devfn),
PCI_FUNC(devfn));
memory_region_init(&iommu->mr, OBJECT(iommu), mr_name, UINT64_MAX);
address_space_init(&iommu->as, &iommu->mr, as_name);
iommu->iotlb = g_hash_table_new_full(g_int64_hash, g_int64_equal,
NULL, g_free);
table->iommu[PCI_SLOT(devfn)] = iommu;
g_free(mr_name);
g_free(as_name);
}
return iommu;
}
static AddressSpace *s390_pci_dma_iommu(PCIBus *bus, void *opaque, int devfn)
{
S390pciState *s = opaque;
S390PCIIOMMU *iommu = s390_pci_get_iommu(s, bus, devfn);
return &iommu->as;
}
static uint8_t set_ind_atomic(uint64_t ind_loc, uint8_t to_be_set)
{
uint8_t ind_old, ind_new;
hwaddr len = 1;
uint8_t *ind_addr;
ind_addr = cpu_physical_memory_map(ind_loc, &len, true);
if (!ind_addr) {
s390_pci_generate_error_event(ERR_EVENT_AIRERR, 0, 0, 0, 0);
return -1;
}
do {
ind_old = *ind_addr;
ind_new = ind_old | to_be_set;
} while (atomic_cmpxchg(ind_addr, ind_old, ind_new) != ind_old);
cpu_physical_memory_unmap(ind_addr, len, 1, len);
return ind_old;
}
static void s390_msi_ctrl_write(void *opaque, hwaddr addr, uint64_t data,
unsigned int size)
{
S390PCIBusDevice *pbdev = opaque;
uint32_t vec = data & ZPCI_MSI_VEC_MASK;
uint64_t ind_bit;
uint32_t sum_bit;
assert(pbdev);
DPRINTF("write_msix data 0x%" PRIx64 " idx %d vec 0x%x\n", data,
pbdev->idx, vec);
if (pbdev->state != ZPCI_FS_ENABLED) {
return;
}
ind_bit = pbdev->routes.adapter.ind_offset;
sum_bit = pbdev->routes.adapter.summary_offset;
set_ind_atomic(pbdev->routes.adapter.ind_addr + (ind_bit + vec) / 8,
0x80 >> ((ind_bit + vec) % 8));
if (!set_ind_atomic(pbdev->routes.adapter.summary_addr + sum_bit / 8,
0x80 >> (sum_bit % 8))) {
css_adapter_interrupt(CSS_IO_ADAPTER_PCI, pbdev->isc);
}
}
static uint64_t s390_msi_ctrl_read(void *opaque, hwaddr addr, unsigned size)
{
return 0xffffffff;
}
static const MemoryRegionOps s390_msi_ctrl_ops = {
.write = s390_msi_ctrl_write,
.read = s390_msi_ctrl_read,
.endianness = DEVICE_LITTLE_ENDIAN,
};
void s390_pci_iommu_enable(S390PCIIOMMU *iommu)
{
/*
* The iommu region is initialized against a 0-mapped address space,
* so the smallest IOMMU region we can define runs from 0 to the end
* of the PCI address space.
*/
char *name = g_strdup_printf("iommu-s390-%04x", iommu->pbdev->uid);
memory_region_init_iommu(&iommu->iommu_mr, sizeof(iommu->iommu_mr),
TYPE_S390_IOMMU_MEMORY_REGION, OBJECT(&iommu->mr),
name, iommu->pal + 1);
iommu->enabled = true;
memory_region_add_subregion(&iommu->mr, 0, MEMORY_REGION(&iommu->iommu_mr));
g_free(name);
}
void s390_pci_iommu_disable(S390PCIIOMMU *iommu)
{
iommu->enabled = false;
g_hash_table_remove_all(iommu->iotlb);
memory_region_del_subregion(&iommu->mr, MEMORY_REGION(&iommu->iommu_mr));
object_unparent(OBJECT(&iommu->iommu_mr));
}
static void s390_pci_iommu_free(S390pciState *s, PCIBus *bus, int32_t devfn)
{
uint64_t key = (uintptr_t)bus;
S390PCIIOMMUTable *table = g_hash_table_lookup(s->iommu_table, &key);
S390PCIIOMMU *iommu = table ? table->iommu[PCI_SLOT(devfn)] : NULL;
if (!table || !iommu) {
return;
}
table->iommu[PCI_SLOT(devfn)] = NULL;
g_hash_table_destroy(iommu->iotlb);
address_space_destroy(&iommu->as);
object_unparent(OBJECT(&iommu->mr));
object_unparent(OBJECT(iommu));
object_unref(OBJECT(iommu));
}
static void s390_pcihost_realize(DeviceState *dev, Error **errp)
{
PCIBus *b;
BusState *bus;
PCIHostState *phb = PCI_HOST_BRIDGE(dev);
S390pciState *s = S390_PCI_HOST_BRIDGE(dev);
Error *local_err = NULL;
DPRINTF("host_init\n");
b = pci_register_root_bus(dev, NULL, s390_pci_set_irq, s390_pci_map_irq,
NULL, get_system_memory(), get_system_io(), 0,
64, TYPE_PCI_BUS);
pci_setup_iommu(b, s390_pci_dma_iommu, s);
bus = BUS(b);
qbus_set_hotplug_handler(bus, OBJECT(dev), &local_err);
if (local_err) {
error_propagate(errp, local_err);
return;
}
phb->bus = b;
s->bus = S390_PCI_BUS(qbus_create(TYPE_S390_PCI_BUS, dev, NULL));
qbus_set_hotplug_handler(BUS(s->bus), OBJECT(dev), &local_err);
if (local_err) {
error_propagate(errp, local_err);
return;
}
s->iommu_table = g_hash_table_new_full(g_int64_hash, g_int64_equal,
NULL, g_free);
s->zpci_table = g_hash_table_new_full(g_int_hash, g_int_equal, NULL, NULL);
s->bus_no = 0;
QTAILQ_INIT(&s->pending_sei);
QTAILQ_INIT(&s->zpci_devs);
css_register_io_adapters(CSS_IO_ADAPTER_PCI, true, false,
S390_ADAPTER_SUPPRESSIBLE, &local_err);
error_propagate(errp, local_err);
}
static int s390_pci_msix_init(S390PCIBusDevice *pbdev)
{
char *name;
uint8_t pos;
uint16_t ctrl;
uint32_t table, pba;
pos = pci_find_capability(pbdev->pdev, PCI_CAP_ID_MSIX);
if (!pos) {
return -1;
}
ctrl = pci_host_config_read_common(pbdev->pdev, pos + PCI_MSIX_FLAGS,
pci_config_size(pbdev->pdev), sizeof(ctrl));
table = pci_host_config_read_common(pbdev->pdev, pos + PCI_MSIX_TABLE,
pci_config_size(pbdev->pdev), sizeof(table));
pba = pci_host_config_read_common(pbdev->pdev, pos + PCI_MSIX_PBA,
pci_config_size(pbdev->pdev), sizeof(pba));
pbdev->msix.table_bar = table & PCI_MSIX_FLAGS_BIRMASK;
pbdev->msix.table_offset = table & ~PCI_MSIX_FLAGS_BIRMASK;
pbdev->msix.pba_bar = pba & PCI_MSIX_FLAGS_BIRMASK;
pbdev->msix.pba_offset = pba & ~PCI_MSIX_FLAGS_BIRMASK;
pbdev->msix.entries = (ctrl & PCI_MSIX_FLAGS_QSIZE) + 1;
name = g_strdup_printf("msix-s390-%04x", pbdev->uid);
memory_region_init_io(&pbdev->msix_notify_mr, OBJECT(pbdev),
&s390_msi_ctrl_ops, pbdev, name, PAGE_SIZE);
memory_region_add_subregion(&pbdev->iommu->mr, ZPCI_MSI_ADDR,
&pbdev->msix_notify_mr);
g_free(name);
return 0;
}
static void s390_pci_msix_free(S390PCIBusDevice *pbdev)
{
memory_region_del_subregion(&pbdev->iommu->mr, &pbdev->msix_notify_mr);
object_unparent(OBJECT(&pbdev->msix_notify_mr));
}
static S390PCIBusDevice *s390_pci_device_new(S390pciState *s,
const char *target, Error **errp)
{
Error *local_err = NULL;
DeviceState *dev;
dev = qdev_try_create(BUS(s->bus), TYPE_S390_PCI_DEVICE);
if (!dev) {
error_setg(errp, "zPCI device could not be created");
return NULL;
}
object_property_set_str(OBJECT(dev), target, "target", &local_err);
if (local_err) {
object_unparent(OBJECT(dev));
error_propagate_prepend(errp, local_err,
"zPCI device could not be created: ");
return NULL;
}
object_property_set_bool(OBJECT(dev), true, "realized", &local_err);
if (local_err) {
object_unparent(OBJECT(dev));
error_propagate_prepend(errp, local_err,
"zPCI device could not be created: ");
return NULL;
}
return S390_PCI_DEVICE(dev);
}
static bool s390_pci_alloc_idx(S390pciState *s, S390PCIBusDevice *pbdev)
{
uint32_t idx;
idx = s->next_idx;
while (s390_pci_find_dev_by_idx(s, idx)) {
idx = (idx + 1) & FH_MASK_INDEX;
if (idx == s->next_idx) {
return false;
}
}
pbdev->idx = idx;
return true;
}
static void s390_pcihost_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
Error **errp)
{
S390pciState *s = S390_PCI_HOST_BRIDGE(hotplug_dev);
if (!s390_has_feat(S390_FEAT_ZPCI)) {
warn_report("Plugging a PCI/zPCI device without the 'zpci' CPU "
"feature enabled; the guest will not be able to see/use "
"this device");
}
if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) {
PCIDevice *pdev = PCI_DEVICE(dev);
if (pdev->cap_present & QEMU_PCI_CAP_MULTIFUNCTION) {
error_setg(errp, "multifunction not supported in s390");
return;
}
} else if (object_dynamic_cast(OBJECT(dev), TYPE_S390_PCI_DEVICE)) {
S390PCIBusDevice *pbdev = S390_PCI_DEVICE(dev);
if (!s390_pci_alloc_idx(s, pbdev)) {
error_setg(errp, "no slot for plugging zpci device");
return;
}
}
}
static void s390_pci_update_subordinate(PCIDevice *dev, uint32_t nr)
{
uint32_t old_nr;
pci_default_write_config(dev, PCI_SUBORDINATE_BUS, nr, 1);
while (!pci_bus_is_root(pci_get_bus(dev))) {
dev = pci_get_bus(dev)->parent_dev;
old_nr = pci_default_read_config(dev, PCI_SUBORDINATE_BUS, 1);
if (old_nr < nr) {
pci_default_write_config(dev, PCI_SUBORDINATE_BUS, nr, 1);
}
}
}
static void s390_pcihost_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
Error **errp)
{
S390pciState *s = S390_PCI_HOST_BRIDGE(hotplug_dev);
PCIDevice *pdev = NULL;
S390PCIBusDevice *pbdev = NULL;
if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_BRIDGE)) {
PCIBridge *pb = PCI_BRIDGE(dev);
pdev = PCI_DEVICE(dev);
pci_bridge_map_irq(pb, dev->id, s390_pci_map_irq);
pci_setup_iommu(&pb->sec_bus, s390_pci_dma_iommu, s);
qbus_set_hotplug_handler(BUS(&pb->sec_bus), OBJECT(s), errp);
if (dev->hotplugged) {
pci_default_write_config(pdev, PCI_PRIMARY_BUS,
pci_dev_bus_num(pdev), 1);
s->bus_no += 1;
pci_default_write_config(pdev, PCI_SECONDARY_BUS, s->bus_no, 1);
s390_pci_update_subordinate(pdev, s->bus_no);
}
} else if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) {
pdev = PCI_DEVICE(dev);
if (!dev->id) {
/* In the case the PCI device does not define an id */
/* we generate one based on the PCI address */
dev->id = g_strdup_printf("auto_%02x:%02x.%01x",
pci_dev_bus_num(pdev),
PCI_SLOT(pdev->devfn),
PCI_FUNC(pdev->devfn));
}
pbdev = s390_pci_find_dev_by_target(s, dev->id);
if (!pbdev) {
pbdev = s390_pci_device_new(s, dev->id, errp);
if (!pbdev) {
return;
}
}
if (object_dynamic_cast(OBJECT(dev), "vfio-pci")) {
pbdev->fh |= FH_SHM_VFIO;
} else {
pbdev->fh |= FH_SHM_EMUL;
}
pbdev->pdev = pdev;
pbdev->iommu = s390_pci_get_iommu(s, pci_get_bus(pdev), pdev->devfn);
pbdev->iommu->pbdev = pbdev;
pbdev->state = ZPCI_FS_DISABLED;
if (s390_pci_msix_init(pbdev)) {
error_setg(errp, "MSI-X support is mandatory "
"in the S390 architecture");
return;
}
if (dev->hotplugged) {
s390_pci_generate_plug_event(HP_EVENT_TO_CONFIGURED ,
pbdev->fh, pbdev->fid);
}
} else if (object_dynamic_cast(OBJECT(dev), TYPE_S390_PCI_DEVICE)) {
pbdev = S390_PCI_DEVICE(dev);
/* the allocated idx is actually getting used */
s->next_idx = (pbdev->idx + 1) & FH_MASK_INDEX;
pbdev->fh = pbdev->idx;
QTAILQ_INSERT_TAIL(&s->zpci_devs, pbdev, link);
g_hash_table_insert(s->zpci_table, &pbdev->idx, pbdev);
} else {
g_assert_not_reached();
}
}
static void s390_pcihost_unplug(HotplugHandler *hotplug_dev, DeviceState *dev,
Error **errp)
{
S390pciState *s = S390_PCI_HOST_BRIDGE(hotplug_dev);
S390PCIBusDevice *pbdev = NULL;
if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) {
PCIDevice *pci_dev = PCI_DEVICE(dev);
PCIBus *bus;
int32_t devfn;
pbdev = s390_pci_find_dev_by_pci(s, PCI_DEVICE(dev));
g_assert(pbdev);
s390_pci_generate_plug_event(HP_EVENT_STANDBY_TO_RESERVED,
pbdev->fh, pbdev->fid);
bus = pci_get_bus(pci_dev);
devfn = pci_dev->devfn;
object_property_set_bool(OBJECT(dev), false, "realized", &error_abort);
s390_pci_msix_free(pbdev);
s390_pci_iommu_free(s, bus, devfn);
pbdev->pdev = NULL;
pbdev->state = ZPCI_FS_RESERVED;
} else if (object_dynamic_cast(OBJECT(dev), TYPE_S390_PCI_DEVICE)) {
pbdev = S390_PCI_DEVICE(dev);
pbdev->fid = 0;
QTAILQ_REMOVE(&s->zpci_devs, pbdev, link);
g_hash_table_remove(s->zpci_table, &pbdev->idx);
object_property_set_bool(OBJECT(dev), false, "realized", &error_abort);
}
}
static void s390_pcihost_unplug_request(HotplugHandler *hotplug_dev,
DeviceState *dev,
Error **errp)
{
S390pciState *s = S390_PCI_HOST_BRIDGE(hotplug_dev);
S390PCIBusDevice *pbdev;
if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_BRIDGE)) {
error_setg(errp, "PCI bridge hot unplug currently not supported");
} else if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) {
/*
* Redirect the unplug request to the zPCI device and remember that
* we've checked the PCI device already (to prevent endless recursion).
*/
pbdev = s390_pci_find_dev_by_pci(s, PCI_DEVICE(dev));
g_assert(pbdev);
pbdev->pci_unplug_request_processed = true;
qdev_unplug(DEVICE(pbdev), errp);
} else if (object_dynamic_cast(OBJECT(dev), TYPE_S390_PCI_DEVICE)) {
pbdev = S390_PCI_DEVICE(dev);
/*
* If unplug was initially requested for the zPCI device, we
* first have to redirect to the PCI device, which will in return
* redirect back to us after performing its checks (if the request
* is not blocked, e.g. because it's a PCI bridge).
*/
if (pbdev->pdev && !pbdev->pci_unplug_request_processed) {
qdev_unplug(DEVICE(pbdev->pdev), errp);
return;
}
pbdev->pci_unplug_request_processed = false;
switch (pbdev->state) {
case ZPCI_FS_STANDBY:
case ZPCI_FS_RESERVED:
s390_pci_perform_unplug(pbdev);
break;
default:
/*
* Allow to send multiple requests, e.g. if the guest crashed
* before releasing the device, we would not be able to send
* another request to the same VM (e.g. fresh OS).
*/
pbdev->unplug_requested = true;
s390_pci_generate_plug_event(HP_EVENT_DECONFIGURE_REQUEST,
pbdev->fh, pbdev->fid);
}
} else {
g_assert_not_reached();
}
}
static void s390_pci_enumerate_bridge(PCIBus *bus, PCIDevice *pdev,
void *opaque)
{
S390pciState *s = opaque;
PCIBus *sec_bus = NULL;
if ((pci_default_read_config(pdev, PCI_HEADER_TYPE, 1) !=
PCI_HEADER_TYPE_BRIDGE)) {
return;
}
(s->bus_no)++;
pci_default_write_config(pdev, PCI_PRIMARY_BUS, pci_dev_bus_num(pdev), 1);
pci_default_write_config(pdev, PCI_SECONDARY_BUS, s->bus_no, 1);
pci_default_write_config(pdev, PCI_SUBORDINATE_BUS, s->bus_no, 1);
sec_bus = pci_bridge_get_sec_bus(PCI_BRIDGE(pdev));
if (!sec_bus) {
return;
}
/* Assign numbers to all child bridges. The last is the highest number. */
pci_for_each_device(sec_bus, pci_bus_num(sec_bus),
s390_pci_enumerate_bridge, s);
pci_default_write_config(pdev, PCI_SUBORDINATE_BUS, s->bus_no, 1);
}
static void s390_pcihost_reset(DeviceState *dev)
{
S390pciState *s = S390_PCI_HOST_BRIDGE(dev);
PCIBus *bus = s->parent_obj.bus;
S390PCIBusDevice *pbdev, *next;
/* Process all pending unplug requests */
QTAILQ_FOREACH_SAFE(pbdev, &s->zpci_devs, link, next) {
if (pbdev->unplug_requested) {
if (pbdev->summary_ind) {
pci_dereg_irqs(pbdev);
}
if (pbdev->iommu->enabled) {
pci_dereg_ioat(pbdev->iommu);
}
pbdev->state = ZPCI_FS_STANDBY;
s390_pci_perform_unplug(pbdev);
}
}
/*
* When resetting a PCI bridge, the assigned numbers are set to 0. So
* on every system reset, we also have to reassign numbers.
*/
s->bus_no = 0;
pci_for_each_device(bus, pci_bus_num(bus), s390_pci_enumerate_bridge, s);
}
static void s390_pcihost_class_init(ObjectClass *klass, void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(klass);
dc->reset = s390_pcihost_reset;
dc->realize = s390_pcihost_realize;
hc->pre_plug = s390_pcihost_pre_plug;
hc->plug = s390_pcihost_plug;
hc->unplug_request = s390_pcihost_unplug_request;
hc->unplug = s390_pcihost_unplug;
msi_nonbroken = true;
}
static const TypeInfo s390_pcihost_info = {
.name = TYPE_S390_PCI_HOST_BRIDGE,
.parent = TYPE_PCI_HOST_BRIDGE,
.instance_size = sizeof(S390pciState),
.class_init = s390_pcihost_class_init,
.interfaces = (InterfaceInfo[]) {
{ TYPE_HOTPLUG_HANDLER },
{ }
}
};
static const TypeInfo s390_pcibus_info = {
.name = TYPE_S390_PCI_BUS,
.parent = TYPE_BUS,
.instance_size = sizeof(S390PCIBus),
};
static uint16_t s390_pci_generate_uid(S390pciState *s)
{
uint16_t uid = 0;
do {
uid++;
if (!s390_pci_find_dev_by_uid(s, uid)) {
return uid;
}
} while (uid < ZPCI_MAX_UID);
return UID_UNDEFINED;
}
static uint32_t s390_pci_generate_fid(S390pciState *s, Error **errp)
{
uint32_t fid = 0;
do {
if (!s390_pci_find_dev_by_fid(s, fid)) {
return fid;
}
} while (fid++ != ZPCI_MAX_FID);
error_setg(errp, "no free fid could be found");
return 0;
}
static void s390_pci_device_realize(DeviceState *dev, Error **errp)
{
S390PCIBusDevice *zpci = S390_PCI_DEVICE(dev);
S390pciState *s = s390_get_phb();
if (!zpci->target) {
error_setg(errp, "target must be defined");
return;
}
if (s390_pci_find_dev_by_target(s, zpci->target)) {
error_setg(errp, "target %s already has an associated zpci device",
zpci->target);
return;
}
if (zpci->uid == UID_UNDEFINED) {
zpci->uid = s390_pci_generate_uid(s);
if (!zpci->uid) {
error_setg(errp, "no free uid could be found");
return;
}
} else if (s390_pci_find_dev_by_uid(s, zpci->uid)) {
error_setg(errp, "uid %u already in use", zpci->uid);
return;
}
if (!zpci->fid_defined) {
Error *local_error = NULL;
zpci->fid = s390_pci_generate_fid(s, &local_error);
if (local_error) {
error_propagate(errp, local_error);
return;
}
} else if (s390_pci_find_dev_by_fid(s, zpci->fid)) {
error_setg(errp, "fid %u already in use", zpci->fid);
return;
}
zpci->state = ZPCI_FS_RESERVED;
zpci->fmb.format = ZPCI_FMB_FORMAT;
}
static void s390_pci_device_reset(DeviceState *dev)
{
S390PCIBusDevice *pbdev = S390_PCI_DEVICE(dev);
switch (pbdev->state) {
case ZPCI_FS_RESERVED:
return;
case ZPCI_FS_STANDBY:
break;
default:
pbdev->fh &= ~FH_MASK_ENABLE;
pbdev->state = ZPCI_FS_DISABLED;
break;
}
if (pbdev->summary_ind) {
pci_dereg_irqs(pbdev);
}
if (pbdev->iommu->enabled) {
pci_dereg_ioat(pbdev->iommu);
}
fmb_timer_free(pbdev);
}
static void s390_pci_get_fid(Object *obj, Visitor *v, const char *name,
void *opaque, Error **errp)
{
Property *prop = opaque;
uint32_t *ptr = qdev_get_prop_ptr(DEVICE(obj), prop);
visit_type_uint32(v, name, ptr, errp);
}
static void s390_pci_set_fid(Object *obj, Visitor *v, const char *name,
void *opaque, Error **errp)
{
DeviceState *dev = DEVICE(obj);
S390PCIBusDevice *zpci = S390_PCI_DEVICE(obj);
Property *prop = opaque;
uint32_t *ptr = qdev_get_prop_ptr(dev, prop);
if (dev->realized) {
qdev_prop_set_after_realize(dev, name, errp);
return;
}
visit_type_uint32(v, name, ptr, errp);
zpci->fid_defined = true;
}
static const PropertyInfo s390_pci_fid_propinfo = {
.name = "zpci_fid",
.get = s390_pci_get_fid,
.set = s390_pci_set_fid,
};
#define DEFINE_PROP_S390_PCI_FID(_n, _s, _f) \
DEFINE_PROP(_n, _s, _f, s390_pci_fid_propinfo, uint32_t)
static Property s390_pci_device_properties[] = {
DEFINE_PROP_UINT16("uid", S390PCIBusDevice, uid, UID_UNDEFINED),
DEFINE_PROP_S390_PCI_FID("fid", S390PCIBusDevice, fid),
DEFINE_PROP_STRING("target", S390PCIBusDevice, target),
DEFINE_PROP_END_OF_LIST(),
};
static const VMStateDescription s390_pci_device_vmstate = {
.name = TYPE_S390_PCI_DEVICE,
/*
* TODO: add state handling here, so migration works at least with
* emulated pci devices on s390x
*/
.unmigratable = 1,
};
static void s390_pci_device_class_init(ObjectClass *klass, void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
dc->desc = "zpci device";
set_bit(DEVICE_CATEGORY_MISC, dc->categories);
dc->reset = s390_pci_device_reset;
dc->bus_type = TYPE_S390_PCI_BUS;
dc->realize = s390_pci_device_realize;
device_class_set_props(dc, s390_pci_device_properties);
dc->vmsd = &s390_pci_device_vmstate;
}
static const TypeInfo s390_pci_device_info = {
.name = TYPE_S390_PCI_DEVICE,
.parent = TYPE_DEVICE,
.instance_size = sizeof(S390PCIBusDevice),
.class_init = s390_pci_device_class_init,
};
static TypeInfo s390_pci_iommu_info = {
.name = TYPE_S390_PCI_IOMMU,
.parent = TYPE_OBJECT,
.instance_size = sizeof(S390PCIIOMMU),
};
static void s390_iommu_memory_region_class_init(ObjectClass *klass, void *data)
{
IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_CLASS(klass);
imrc->translate = s390_translate_iommu;
imrc->replay = s390_pci_iommu_replay;
}
static const TypeInfo s390_iommu_memory_region_info = {
.parent = TYPE_IOMMU_MEMORY_REGION,
.name = TYPE_S390_IOMMU_MEMORY_REGION,
.class_init = s390_iommu_memory_region_class_init,
};
static void s390_pci_register_types(void)
{
type_register_static(&s390_pcihost_info);
type_register_static(&s390_pcibus_info);
type_register_static(&s390_pci_device_info);
type_register_static(&s390_pci_iommu_info);
type_register_static(&s390_iommu_memory_region_info);
}
type_init(s390_pci_register_types)