diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c index c17e5f2c12..d2e4161998 100644 --- a/hw/ppc/spapr_pci.c +++ b/hw/ppc/spapr_pci.c @@ -33,9 +33,11 @@ #include #include "trace.h" #include "qemu/error-report.h" +#include "qapi/qmp/qerror.h" #include "hw/pci/pci_bus.h" #include "hw/ppc/spapr_drc.h" +#include "sysemu/device_tree.h" /* Copied from the kernel arch/powerpc/platforms/pseries/msi.c */ #define RTAS_QUERY_FN 0 @@ -48,6 +50,14 @@ #define RTAS_TYPE_MSI 1 #define RTAS_TYPE_MSIX 2 +#define _FDT(exp) \ + do { \ + int ret = (exp); \ + if (ret < 0) { \ + return ret; \ + } \ + } while (0) + sPAPRPHBState *spapr_pci_find_phb(sPAPREnvironment *spapr, uint64_t buid) { sPAPRPHBState *sphb; @@ -732,6 +742,368 @@ static AddressSpace *spapr_pci_dma_iommu(PCIBus *bus, void *opaque, int devfn) return &phb->iommu_as; } +/* Macros to operate with address in OF binding to PCI */ +#define b_x(x, p, l) (((x) & ((1<<(l))-1)) << (p)) +#define b_n(x) b_x((x), 31, 1) /* 0 if relocatable */ +#define b_p(x) b_x((x), 30, 1) /* 1 if prefetchable */ +#define b_t(x) b_x((x), 29, 1) /* 1 if the address is aliased */ +#define b_ss(x) b_x((x), 24, 2) /* the space code */ +#define b_bbbbbbbb(x) b_x((x), 16, 8) /* bus number */ +#define b_ddddd(x) b_x((x), 11, 5) /* device number */ +#define b_fff(x) b_x((x), 8, 3) /* function number */ +#define b_rrrrrrrr(x) b_x((x), 0, 8) /* register number */ + +/* for 'reg'/'assigned-addresses' OF properties */ +#define RESOURCE_CELLS_SIZE 2 +#define RESOURCE_CELLS_ADDRESS 3 + +typedef struct ResourceFields { + uint32_t phys_hi; + uint32_t phys_mid; + uint32_t phys_lo; + uint32_t size_hi; + uint32_t size_lo; +} QEMU_PACKED ResourceFields; + +typedef struct ResourceProps { + ResourceFields reg[8]; + ResourceFields assigned[7]; + uint32_t reg_len; + uint32_t assigned_len; +} ResourceProps; + +/* fill in the 'reg'/'assigned-resources' OF properties for + * a PCI device. 'reg' describes resource requirements for a + * device's IO/MEM regions, 'assigned-addresses' describes the + * actual resource assignments. + * + * the properties are arrays of ('phys-addr', 'size') pairs describing + * the addressable regions of the PCI device, where 'phys-addr' is a + * RESOURCE_CELLS_ADDRESS-tuple of 32-bit integers corresponding to + * (phys.hi, phys.mid, phys.lo), and 'size' is a + * RESOURCE_CELLS_SIZE-tuple corresponding to (size.hi, size.lo). + * + * phys.hi = 0xYYXXXXZZ, where: + * 0xYY = npt000ss + * ||| | + * ||| +-- space code: 1 if IO region, 2 if MEM region + * ||+------ for non-relocatable IO: 1 if aliased + * || for relocatable IO: 1 if below 64KB + * || for MEM: 1 if below 1MB + * |+------- 1 if region is prefetchable + * +-------- 1 if region is non-relocatable + * 0xXXXX = bbbbbbbb dddddfff, encoding bus, slot, and function + * bits respectively + * 0xZZ = rrrrrrrr, the register number of the BAR corresponding + * to the region + * + * phys.mid and phys.lo correspond respectively to the hi/lo portions + * of the actual address of the region. + * + * how the phys-addr/size values are used differ slightly between + * 'reg' and 'assigned-addresses' properties. namely, 'reg' has + * an additional description for the config space region of the + * device, and in the case of QEMU has n=0 and phys.mid=phys.lo=0 + * to describe the region as relocatable, with an address-mapping + * that corresponds directly to the PHB's address space for the + * resource. 'assigned-addresses' always has n=1 set with an absolute + * address assigned for the resource. in general, 'assigned-addresses' + * won't be populated, since addresses for PCI devices are generally + * unmapped initially and left to the guest to assign. + * + * note also that addresses defined in these properties are, at least + * for PAPR guests, relative to the PHBs IO/MEM windows, and + * correspond directly to the addresses in the BARs. + * + * in accordance with PCI Bus Binding to Open Firmware, + * IEEE Std 1275-1994, section 4.1.1, as implemented by PAPR+ v2.7, + * Appendix C. + */ +static void populate_resource_props(PCIDevice *d, ResourceProps *rp) +{ + int bus_num = pci_bus_num(PCI_BUS(qdev_get_parent_bus(DEVICE(d)))); + uint32_t dev_id = (b_bbbbbbbb(bus_num) | + b_ddddd(PCI_SLOT(d->devfn)) | + b_fff(PCI_FUNC(d->devfn))); + ResourceFields *reg, *assigned; + int i, reg_idx = 0, assigned_idx = 0; + + /* config space region */ + reg = &rp->reg[reg_idx++]; + reg->phys_hi = cpu_to_be32(dev_id); + reg->phys_mid = 0; + reg->phys_lo = 0; + reg->size_hi = 0; + reg->size_lo = 0; + + for (i = 0; i < PCI_NUM_REGIONS; i++) { + if (!d->io_regions[i].size) { + continue; + } + + reg = &rp->reg[reg_idx++]; + + reg->phys_hi = cpu_to_be32(dev_id | b_rrrrrrrr(pci_bar(d, i))); + if (d->io_regions[i].type & PCI_BASE_ADDRESS_SPACE_IO) { + reg->phys_hi |= cpu_to_be32(b_ss(1)); + } else { + reg->phys_hi |= cpu_to_be32(b_ss(2)); + } + reg->phys_mid = 0; + reg->phys_lo = 0; + reg->size_hi = cpu_to_be32(d->io_regions[i].size >> 32); + reg->size_lo = cpu_to_be32(d->io_regions[i].size); + + if (d->io_regions[i].addr == PCI_BAR_UNMAPPED) { + continue; + } + + assigned = &rp->assigned[assigned_idx++]; + assigned->phys_hi = cpu_to_be32(reg->phys_hi | b_n(1)); + assigned->phys_mid = cpu_to_be32(d->io_regions[i].addr >> 32); + assigned->phys_lo = cpu_to_be32(d->io_regions[i].addr); + assigned->size_hi = reg->size_hi; + assigned->size_lo = reg->size_lo; + } + + rp->reg_len = reg_idx * sizeof(ResourceFields); + rp->assigned_len = assigned_idx * sizeof(ResourceFields); +} + +static int spapr_populate_pci_child_dt(PCIDevice *dev, void *fdt, int offset, + int phb_index, int drc_index, + const char *drc_name) +{ + ResourceProps rp; + bool is_bridge = false; + int pci_status; + + if (pci_default_read_config(dev, PCI_HEADER_TYPE, 1) == + PCI_HEADER_TYPE_BRIDGE) { + is_bridge = true; + } + + /* in accordance with PAPR+ v2.7 13.6.3, Table 181 */ + _FDT(fdt_setprop_cell(fdt, offset, "vendor-id", + pci_default_read_config(dev, PCI_VENDOR_ID, 2))); + _FDT(fdt_setprop_cell(fdt, offset, "device-id", + pci_default_read_config(dev, PCI_DEVICE_ID, 2))); + _FDT(fdt_setprop_cell(fdt, offset, "revision-id", + pci_default_read_config(dev, PCI_REVISION_ID, 1))); + _FDT(fdt_setprop_cell(fdt, offset, "class-code", + pci_default_read_config(dev, PCI_CLASS_DEVICE, 2) + << 8)); + if (pci_default_read_config(dev, PCI_INTERRUPT_PIN, 1)) { + _FDT(fdt_setprop_cell(fdt, offset, "interrupts", + pci_default_read_config(dev, PCI_INTERRUPT_PIN, 1))); + } + + if (!is_bridge) { + _FDT(fdt_setprop_cell(fdt, offset, "min-grant", + pci_default_read_config(dev, PCI_MIN_GNT, 1))); + _FDT(fdt_setprop_cell(fdt, offset, "max-latency", + pci_default_read_config(dev, PCI_MAX_LAT, 1))); + } + + if (pci_default_read_config(dev, PCI_SUBSYSTEM_ID, 2)) { + _FDT(fdt_setprop_cell(fdt, offset, "subsystem-id", + pci_default_read_config(dev, PCI_SUBSYSTEM_ID, 2))); + } + + if (pci_default_read_config(dev, PCI_SUBSYSTEM_VENDOR_ID, 2)) { + _FDT(fdt_setprop_cell(fdt, offset, "subsystem-vendor-id", + pci_default_read_config(dev, PCI_SUBSYSTEM_VENDOR_ID, 2))); + } + + _FDT(fdt_setprop_cell(fdt, offset, "cache-line-size", + pci_default_read_config(dev, PCI_CACHE_LINE_SIZE, 1))); + + /* the following fdt cells are masked off the pci status register */ + pci_status = pci_default_read_config(dev, PCI_STATUS, 2); + _FDT(fdt_setprop_cell(fdt, offset, "devsel-speed", + PCI_STATUS_DEVSEL_MASK & pci_status)); + + if (pci_status & PCI_STATUS_FAST_BACK) { + _FDT(fdt_setprop(fdt, offset, "fast-back-to-back", NULL, 0)); + } + if (pci_status & PCI_STATUS_66MHZ) { + _FDT(fdt_setprop(fdt, offset, "66mhz-capable", NULL, 0)); + } + if (pci_status & PCI_STATUS_UDF) { + _FDT(fdt_setprop(fdt, offset, "udf-supported", NULL, 0)); + } + + /* NOTE: this is normally generated by firmware via path/unit name, + * but in our case we must set it manually since it does not get + * processed by OF beforehand + */ + _FDT(fdt_setprop_string(fdt, offset, "name", "pci")); + _FDT(fdt_setprop(fdt, offset, "ibm,loc-code", drc_name, strlen(drc_name))); + _FDT(fdt_setprop_cell(fdt, offset, "ibm,my-drc-index", drc_index)); + + _FDT(fdt_setprop_cell(fdt, offset, "#address-cells", + RESOURCE_CELLS_ADDRESS)); + _FDT(fdt_setprop_cell(fdt, offset, "#size-cells", + RESOURCE_CELLS_SIZE)); + _FDT(fdt_setprop_cell(fdt, offset, "ibm,req#msi-x", + RESOURCE_CELLS_SIZE)); + + populate_resource_props(dev, &rp); + _FDT(fdt_setprop(fdt, offset, "reg", (uint8_t *)rp.reg, rp.reg_len)); + _FDT(fdt_setprop(fdt, offset, "assigned-addresses", + (uint8_t *)rp.assigned, rp.assigned_len)); + + return 0; +} + +/* create OF node for pci device and required OF DT properties */ +static void *spapr_create_pci_child_dt(sPAPRPHBState *phb, PCIDevice *dev, + int drc_index, const char *drc_name, + int *dt_offset) +{ + void *fdt; + int offset, ret, fdt_size; + int slot = PCI_SLOT(dev->devfn); + int func = PCI_FUNC(dev->devfn); + char nodename[512]; + + fdt = create_device_tree(&fdt_size); + if (func != 0) { + sprintf(nodename, "pci@%d,%d", slot, func); + } else { + sprintf(nodename, "pci@%d", slot); + } + offset = fdt_add_subnode(fdt, 0, nodename); + ret = spapr_populate_pci_child_dt(dev, fdt, offset, phb->index, drc_index, + drc_name); + g_assert(!ret); + + *dt_offset = offset; + return fdt; +} + +static void spapr_phb_add_pci_device(sPAPRDRConnector *drc, + sPAPRPHBState *phb, + PCIDevice *pdev, + Error **errp) +{ + sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); + DeviceState *dev = DEVICE(pdev); + int drc_index = drck->get_index(drc); + const char *drc_name = drck->get_name(drc); + void *fdt = NULL; + int fdt_start_offset = 0; + + /* boot-time devices get their device tree node created by SLOF, but for + * hotplugged devices we need QEMU to generate it so the guest can fetch + * it via RTAS + */ + if (dev->hotplugged) { + fdt = spapr_create_pci_child_dt(phb, pdev, drc_index, drc_name, + &fdt_start_offset); + } + + drck->attach(drc, DEVICE(pdev), + fdt, fdt_start_offset, !dev->hotplugged, errp); + if (*errp) { + g_free(fdt); + } +} + +static void spapr_phb_remove_pci_device_cb(DeviceState *dev, void *opaque) +{ + /* some version guests do not wait for completion of a device + * cleanup (generally done asynchronously by the kernel) before + * signaling to QEMU that the device is safe, but instead sleep + * for some 'safe' period of time. unfortunately on a busy host + * this sleep isn't guaranteed to be long enough, resulting in + * bad things like IRQ lines being left asserted during final + * device removal. to deal with this we call reset just prior + * to finalizing the device, which will put the device back into + * an 'idle' state, as the device cleanup code expects. + */ + pci_device_reset(PCI_DEVICE(dev)); + object_unparent(OBJECT(dev)); +} + +static void spapr_phb_remove_pci_device(sPAPRDRConnector *drc, + sPAPRPHBState *phb, + PCIDevice *pdev, + Error **errp) +{ + sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); + + drck->detach(drc, DEVICE(pdev), spapr_phb_remove_pci_device_cb, phb, errp); +} + +static sPAPRDRConnector *spapr_phb_get_pci_drc(sPAPRPHBState *phb, + PCIDevice *pdev) +{ + uint32_t busnr = pci_bus_num(PCI_BUS(qdev_get_parent_bus(DEVICE(pdev)))); + return spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_PCI, + (phb->index << 16) | + (busnr << 8) | + pdev->devfn); +} + +static void spapr_phb_hot_plug_child(HotplugHandler *plug_handler, + DeviceState *plugged_dev, Error **errp) +{ + sPAPRPHBState *phb = SPAPR_PCI_HOST_BRIDGE(DEVICE(plug_handler)); + PCIDevice *pdev = PCI_DEVICE(plugged_dev); + sPAPRDRConnector *drc = spapr_phb_get_pci_drc(phb, pdev); + Error *local_err = NULL; + + /* if DR is disabled we don't need to do anything in the case of + * hotplug or coldplug callbacks + */ + if (!phb->dr_enabled) { + /* if this is a hotplug operation initiated by the user + * we need to let them know it's not enabled + */ + if (plugged_dev->hotplugged) { + error_set(errp, QERR_BUS_NO_HOTPLUG, + object_get_typename(OBJECT(phb))); + } + return; + } + + g_assert(drc); + + spapr_phb_add_pci_device(drc, phb, pdev, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } +} + +static void spapr_phb_hot_unplug_child(HotplugHandler *plug_handler, + DeviceState *plugged_dev, Error **errp) +{ + sPAPRPHBState *phb = SPAPR_PCI_HOST_BRIDGE(DEVICE(plug_handler)); + PCIDevice *pdev = PCI_DEVICE(plugged_dev); + sPAPRDRConnectorClass *drck; + sPAPRDRConnector *drc = spapr_phb_get_pci_drc(phb, pdev); + Error *local_err = NULL; + + if (!phb->dr_enabled) { + error_set(errp, QERR_BUS_NO_HOTPLUG, + object_get_typename(OBJECT(phb))); + return; + } + + g_assert(drc); + + drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); + if (!drck->release_pending(drc)) { + spapr_phb_remove_pci_device(drc, phb, pdev, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + } +} + static void spapr_phb_realize(DeviceState *dev, Error **errp) { SysBusDevice *s = SYS_BUS_DEVICE(dev); @@ -825,6 +1197,7 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp) &sphb->memspace, &sphb->iospace, PCI_DEVFN(0, 0), PCI_NUM_PINS, TYPE_PCI_BUS); phb->bus = bus; + qbus_set_hotplug_handler(BUS(phb->bus), DEVICE(sphb), NULL); /* * Initialize PHB address space. @@ -1061,6 +1434,7 @@ static void spapr_phb_class_init(ObjectClass *klass, void *data) PCIHostBridgeClass *hc = PCI_HOST_BRIDGE_CLASS(klass); DeviceClass *dc = DEVICE_CLASS(klass); sPAPRPHBClass *spc = SPAPR_PCI_HOST_BRIDGE_CLASS(klass); + HotplugHandlerClass *hp = HOTPLUG_HANDLER_CLASS(klass); hc->root_bus_path = spapr_phb_root_bus_path; dc->realize = spapr_phb_realize; @@ -1070,6 +1444,8 @@ static void spapr_phb_class_init(ObjectClass *klass, void *data) set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories); dc->cannot_instantiate_with_device_add_yet = false; spc->finish_realize = spapr_phb_finish_realize; + hp->plug = spapr_phb_hot_plug_child; + hp->unplug = spapr_phb_hot_unplug_child; } static const TypeInfo spapr_phb_info = { @@ -1078,6 +1454,10 @@ static const TypeInfo spapr_phb_info = { .instance_size = sizeof(sPAPRPHBState), .class_init = spapr_phb_class_init, .class_size = sizeof(sPAPRPHBClass), + .interfaces = (InterfaceInfo[]) { + { TYPE_HOTPLUG_HANDLER }, + { } + } }; PCIHostState *spapr_create_phb(sPAPREnvironment *spapr, int index) @@ -1091,17 +1471,6 @@ PCIHostState *spapr_create_phb(sPAPREnvironment *spapr, int index) return PCI_HOST_BRIDGE(dev); } -/* Macros to operate with address in OF binding to PCI */ -#define b_x(x, p, l) (((x) & ((1<<(l))-1)) << (p)) -#define b_n(x) b_x((x), 31, 1) /* 0 if relocatable */ -#define b_p(x) b_x((x), 30, 1) /* 1 if prefetchable */ -#define b_t(x) b_x((x), 29, 1) /* 1 if the address is aliased */ -#define b_ss(x) b_x((x), 24, 2) /* the space code */ -#define b_bbbbbbbb(x) b_x((x), 16, 8) /* bus number */ -#define b_ddddd(x) b_x((x), 11, 5) /* device number */ -#define b_fff(x) b_x((x), 8, 3) /* function number */ -#define b_rrrrrrrr(x) b_x((x), 0, 8) /* register number */ - int spapr_populate_pci_dt(sPAPRPHBState *phb, uint32_t xics_phandle, void *fdt) @@ -1149,14 +1518,6 @@ int spapr_populate_pci_dt(sPAPRPHBState *phb, return bus_off; } -#define _FDT(exp) \ - do { \ - int ret = (exp); \ - if (ret < 0) { \ - return ret; \ - } \ - } while (0) - /* Write PHB properties */ _FDT(fdt_setprop_string(fdt, bus_off, "device_type", "pci")); _FDT(fdt_setprop_string(fdt, bus_off, "compatible", "IBM,Logical_PHB"));