From 62e9cd771cc368a8fd0f152832b78c43557897a9 Mon Sep 17 00:00:00 2001 From: Markus Armbruster Date: Tue, 17 Mar 2015 08:46:15 +0100 Subject: [PATCH 01/40] macio: Convert to realize() Convert device models "macio-oldworld" and "macio-newworld". Signed-off-by: Markus Armbruster Signed-off-by: Alexander Graf --- hw/misc/macio/macio.c | 71 +++++++++++++++++++++++-------------------- 1 file changed, 38 insertions(+), 33 deletions(-) diff --git a/hw/misc/macio/macio.c b/hw/misc/macio/macio.c index 063ad80412..e9037b0c39 100644 --- a/hw/misc/macio/macio.c +++ b/hw/misc/macio/macio.c @@ -126,17 +126,18 @@ static void macio_bar_setup(MacIOState *macio_state) } } -static int macio_common_initfn(PCIDevice *d) +static void macio_common_realize(PCIDevice *d, Error **errp) { MacIOState *s = MACIO(d); SysBusDevice *sysbus_dev; - int ret; + Error *err = NULL; d->config[0x3d] = 0x01; // interrupt on pin 1 - ret = qdev_init(DEVICE(&s->cuda)); - if (ret < 0) { - return ret; + object_property_set_bool(OBJECT(&s->cuda), true, "realized", &err); + if (err) { + error_propagate(errp, err); + return; } sysbus_dev = SYS_BUS_DEVICE(&s->cuda); memory_region_add_subregion(&s->bar, 0x16000, @@ -144,12 +145,11 @@ static int macio_common_initfn(PCIDevice *d) macio_bar_setup(s); pci_register_bar(d, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, &s->bar); - - return 0; } -static int macio_initfn_ide(MacIOState *s, MACIOIDEState *ide, qemu_irq irq0, - qemu_irq irq1, int dmaid) +static void macio_realize_ide(MacIOState *s, MACIOIDEState *ide, + qemu_irq irq0, qemu_irq irq1, int dmaid, + Error **errp) { SysBusDevice *sysbus_dev; @@ -157,27 +157,31 @@ static int macio_initfn_ide(MacIOState *s, MACIOIDEState *ide, qemu_irq irq0, sysbus_connect_irq(sysbus_dev, 0, irq0); sysbus_connect_irq(sysbus_dev, 1, irq1); macio_ide_register_dma(ide, s->dbdma, dmaid); - return qdev_init(DEVICE(ide)); + object_property_set_bool(OBJECT(ide), true, "realized", errp); } -static int macio_oldworld_initfn(PCIDevice *d) +static void macio_oldworld_realize(PCIDevice *d, Error **errp) { MacIOState *s = MACIO(d); OldWorldMacIOState *os = OLDWORLD_MACIO(d); + Error *err = NULL; SysBusDevice *sysbus_dev; int i; int cur_irq = 0; - int ret = macio_common_initfn(d); - if (ret < 0) { - return ret; + + macio_common_realize(d, &err); + if (err) { + error_propagate(errp, err); + return; } sysbus_dev = SYS_BUS_DEVICE(&s->cuda); sysbus_connect_irq(sysbus_dev, 0, os->irqs[cur_irq++]); - ret = qdev_init(DEVICE(&os->nvram)); - if (ret < 0) { - return ret; + object_property_set_bool(OBJECT(&os->nvram), true, "realized", &err); + if (err) { + error_propagate(errp, err); + return; } sysbus_dev = SYS_BUS_DEVICE(&os->nvram); memory_region_add_subregion(&s->bar, 0x60000, @@ -194,13 +198,12 @@ static int macio_oldworld_initfn(PCIDevice *d) qemu_irq irq0 = os->irqs[cur_irq++]; qemu_irq irq1 = os->irqs[cur_irq++]; - ret = macio_initfn_ide(s, &os->ide[i], irq0, irq1, 0x16 + (i * 4)); - if (ret < 0) { - return ret; + macio_realize_ide(s, &os->ide[i], irq0, irq1, 0x16 + (i * 4), &err); + if (err) { + error_propagate(errp, err); + return; } } - - return 0; } static void macio_init_ide(MacIOState *s, MACIOIDEState *ide, size_t ide_size, @@ -268,17 +271,20 @@ static const MemoryRegionOps timer_ops = { .endianness = DEVICE_LITTLE_ENDIAN, }; -static int macio_newworld_initfn(PCIDevice *d) +static void macio_newworld_realize(PCIDevice *d, Error **errp) { MacIOState *s = MACIO(d); NewWorldMacIOState *ns = NEWWORLD_MACIO(d); + Error *err = NULL; SysBusDevice *sysbus_dev; MemoryRegion *timer_memory = NULL; int i; int cur_irq = 0; - int ret = macio_common_initfn(d); - if (ret < 0) { - return ret; + + macio_common_realize(d, &err); + if (err) { + error_propagate(errp, err); + return; } sysbus_dev = SYS_BUS_DEVICE(&s->cuda); @@ -294,9 +300,10 @@ static int macio_newworld_initfn(PCIDevice *d) qemu_irq irq0 = ns->irqs[cur_irq++]; qemu_irq irq1 = ns->irqs[cur_irq++]; - ret = macio_initfn_ide(s, &ns->ide[i], irq0, irq1, 0x16 + (i * 4)); - if (ret < 0) { - return ret; + macio_realize_ide(s, &ns->ide[i], irq0, irq1, 0x16 + (i * 4), &err); + if (err) { + error_propagate(errp, err); + return; } } @@ -305,8 +312,6 @@ static int macio_newworld_initfn(PCIDevice *d) memory_region_init_io(timer_memory, OBJECT(s), &timer_ops, NULL, "timer", 0x1000); memory_region_add_subregion(&s->bar, 0x15000, timer_memory); - - return 0; } static void macio_newworld_init(Object *obj) @@ -352,7 +357,7 @@ static void macio_oldworld_class_init(ObjectClass *oc, void *data) PCIDeviceClass *pdc = PCI_DEVICE_CLASS(oc); DeviceClass *dc = DEVICE_CLASS(oc); - pdc->init = macio_oldworld_initfn; + pdc->realize = macio_oldworld_realize; pdc->device_id = PCI_DEVICE_ID_APPLE_343S1201; dc->vmsd = &vmstate_macio_oldworld; } @@ -372,7 +377,7 @@ static void macio_newworld_class_init(ObjectClass *oc, void *data) PCIDeviceClass *pdc = PCI_DEVICE_CLASS(oc); DeviceClass *dc = DEVICE_CLASS(oc); - pdc->init = macio_newworld_initfn; + pdc->realize = macio_newworld_realize; pdc->device_id = PCI_DEVICE_ID_APPLE_UNI_N_KEYL; dc->vmsd = &vmstate_macio_newworld; } From 28f490b24af83a007937daacb9ea8bf7537f9084 Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Mon, 18 May 2015 12:59:48 +0200 Subject: [PATCH 02/40] dtc: Update dtc / libfdt submodule to version 1.4.0 Since some recent patches require libfdt version 1.4.0, let's update the dtc submodule to this version. Signed-off-by: Thomas Huth Signed-off-by: Alexander Graf --- dtc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dtc b/dtc index bc895d6d09..65cc4d2748 160000 --- a/dtc +++ b/dtc @@ -1 +1 @@ -Subproject commit bc895d6d09695d05ceb8b52486ffe861d6cfbdde +Subproject commit 65cc4d2748a2c2e6f27f1cf39e07a5dbabd80ebf From 31ce0adb79655003465070fa90d7d20a5b8c2ff5 Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Mon, 18 May 2015 12:59:49 +0200 Subject: [PATCH 03/40] configure: Check for libfdt version 1.4.0 Some recent patches require a function from libfdt version 1.4.0, so we should check for this version during the configure step already. Unfortunately, there does not seem to be a proper #define for the version number in the libfdt headers. So alternatively, we check for the availability of the required function fdt_get_property_by_offset() instead instead. Signed-off-by: Thomas Huth Signed-off-by: Alexander Graf --- configure | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/configure b/configure index 4e2f78a173..35111b22b0 100755 --- a/configure +++ b/configure @@ -3115,9 +3115,11 @@ fi if test "$fdt" != "no" ; then fdt_libs="-lfdt" # explicitly check for libfdt_env.h as it is missing in some stable installs + # and test for required functions to make sure we are on a version >= 1.4.0 cat > $TMPC << EOF +#include #include -int main(void) { return 0; } +int main(void) { fdt_get_property_by_offset(0, 0, 0); return 0; } EOF if compile_prog "" "$fdt_libs" ; then # system DTC is good - use it @@ -3135,7 +3137,7 @@ EOF fdt_libs="-L\$(BUILD_DIR)/dtc/libfdt $fdt_libs" elif test "$fdt" = "yes" ; then # have neither and want - prompt for system/submodule install - error_exit "DTC (libfdt) not present. Your options:" \ + error_exit "DTC (libfdt) version >= 1.4.0 not present. Your options:" \ " (1) Preferred: Install the DTC (libfdt) devel package" \ " (2) Fetch the DTC submodule, using:" \ " git submodule update --init dtc" From 421b1b27f6e9135ac8f01db219e0d8c0cefd7e71 Mon Sep 17 00:00:00 2001 From: David Gibson Date: Thu, 19 Mar 2015 15:14:18 +1100 Subject: [PATCH 04/40] spapr_pci: Fix unsafe signed/unsigned comparisons spapr_pci.c contains a number of expressions of the form (uval == -1) or (uval != -1), where 'uval' is an unsigned value. This mostly works in practice, because as long as the width of uval is greater or equal than that of (int), the -1 will be promoted to the unsigned type, which is the expected outcome. However, at least for the cases where uval is uint32_t, this would break on platforms where sizeof(int) > 4 (and a few such do exist), because then the uint32_t value would be promoted to the larger int type, and never be equal to -1. This patch fixes these errors. The fixes for the (uint32_t) cases are necessary as described above. I've made similar fixes to (uint64_t) and (hwaddr) cases. Those are strictly theoretical, since I don't know of any platforms where sizeof(int) > 8, but hey, it's not that hard so we might as well be strictly C standard compliant. Reported-by: Markus Armbruster Signed-off-by: David Gibson Reviewed-by: Markus Armbruster Signed-off-by: Alexander Graf --- hw/ppc/spapr_pci.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c index 05f4faca6e..03f6d961d2 100644 --- a/hw/ppc/spapr_pci.c +++ b/hw/ppc/spapr_pci.c @@ -742,12 +742,12 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp) PCIBus *bus; uint64_t msi_window_size = 4096; - if (sphb->index != -1) { + if (sphb->index != (uint32_t)-1) { hwaddr windows_base; - if ((sphb->buid != -1) || (sphb->dma_liobn != -1) - || (sphb->mem_win_addr != -1) - || (sphb->io_win_addr != -1)) { + if ((sphb->buid != (uint64_t)-1) || (sphb->dma_liobn != (uint32_t)-1) + || (sphb->mem_win_addr != (hwaddr)-1) + || (sphb->io_win_addr != (hwaddr)-1)) { error_setg(errp, "Either \"index\" or other parameters must" " be specified for PAPR PHB, not both"); return; @@ -768,22 +768,22 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp) sphb->io_win_addr = windows_base + SPAPR_PCI_IO_WIN_OFF; } - if (sphb->buid == -1) { + if (sphb->buid == (uint64_t)-1) { error_setg(errp, "BUID not specified for PHB"); return; } - if (sphb->dma_liobn == -1) { + if (sphb->dma_liobn == (uint32_t)-1) { error_setg(errp, "LIOBN not specified for PHB"); return; } - if (sphb->mem_win_addr == -1) { + if (sphb->mem_win_addr == (hwaddr)-1) { error_setg(errp, "Memory window address not specified for PHB"); return; } - if (sphb->io_win_addr == -1) { + if (sphb->io_win_addr == (hwaddr)-1) { error_setg(errp, "IO window address not specified for PHB"); return; } From 12fd28535891572be7aaf862a03019257dafa425 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Thu, 7 May 2015 15:33:28 +1000 Subject: [PATCH 05/40] spapr_iommu: Disable in-kernel IOMMU tables for >4GB windows The existing KVM_CREATE_SPAPR_TCE ioctl only support 4G windows max as the window size parameter to the kernel ioctl() is 32-bit so there's no way of expressing a TCE window > 4GB. We are going to add huge DMA windows support so this will create small window and unexpectedly fail later. This disables KVM_CREATE_SPAPR_TCE for windows bigger that 4GB. Signed-off-by: Alexey Kardashevskiy Reviewed-by: David Gibson Signed-off-by: David Gibson Signed-off-by: Alexander Graf --- hw/ppc/spapr_iommu.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c index f3990fdc32..e19bf89929 100644 --- a/hw/ppc/spapr_iommu.c +++ b/hw/ppc/spapr_iommu.c @@ -126,11 +126,11 @@ static MemoryRegionIOMMUOps spapr_iommu_ops = { static int spapr_tce_table_realize(DeviceState *dev) { sPAPRTCETable *tcet = SPAPR_TCE_TABLE(dev); + uint64_t window_size = (uint64_t)tcet->nb_table << tcet->page_shift; - if (kvm_enabled()) { + if (kvm_enabled() && !(window_size >> 32)) { tcet->table = kvmppc_create_spapr_tce(tcet->liobn, - tcet->nb_table << - tcet->page_shift, + window_size, &tcet->fd, tcet->vfio_accel); } From f1215ea702e6e6cb3876221cf1f7f60133e08c30 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Thu, 7 May 2015 15:33:29 +1000 Subject: [PATCH 06/40] spapr_iommu: Make H_PUT_TCE_INDIRECT endian-safe PAPR is defined as big endian so TCEs need an adjustment so does this patch. This changes code to have ldq_be_phys() in one place. Signed-off-by: Alexey Kardashevskiy Reviewed-by: David Gibson Signed-off-by: David Gibson Signed-off-by: Alexander Graf --- hw/ppc/spapr_iommu.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c index e19bf89929..65ca4691a4 100644 --- a/hw/ppc/spapr_iommu.c +++ b/hw/ppc/spapr_iommu.c @@ -247,7 +247,7 @@ static target_ulong h_put_tce_indirect(PowerPCCPU *cpu, target_ulong ioba1 = ioba; target_ulong tce_list = args[2]; target_ulong npages = args[3]; - target_ulong ret = H_PARAMETER; + target_ulong ret = H_PARAMETER, tce = 0; sPAPRTCETable *tcet = spapr_tce_find_by_liobn(liobn); CPUState *cs = CPU(cpu); hwaddr page_mask, page_size; @@ -267,7 +267,7 @@ static target_ulong h_put_tce_indirect(PowerPCCPU *cpu, for (i = 0; i < npages; ++i, ioba += page_size) { target_ulong off = (tce_list & ~SPAPR_TCE_RW) + i * sizeof(target_ulong); - target_ulong tce = ldq_phys(cs->as, off); + tce = ldq_be_phys(cs->as, off); ret = put_tce_emu(tcet, ioba, tce); if (ret) { @@ -278,8 +278,7 @@ static target_ulong h_put_tce_indirect(PowerPCCPU *cpu, /* Trace last successful or the first problematic entry */ i = i ? (i - 1) : 0; trace_spapr_iommu_indirect(liobn, ioba1, tce_list, i, - ldq_phys(cs->as, - tce_list + i * sizeof(target_ulong)), + tce, ret); return ret; From c8545818b331e9a32e5dd47f0aefbcf2b93e41da Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Thu, 7 May 2015 15:33:30 +1000 Subject: [PATCH 07/40] spapr_pci: Introduce a liobn number generating macros We are going to have multiple DMA windows per PHB and we want them to migrate so we need a predictable way of assigning LIOBNs. This introduces a macro which makes up a LIOBN from fixed prefix, PHB index (unique PHB id) and window number. This introduces a SPAPR_PCI_DMA_WINDOW_NUM() to know the window number from LIOBN. It is used to distinguish the default 32bit windows from dynamic windows and avoid picking default DMA window properties from a wrong TCE table. Signed-off-by: Alexey Kardashevskiy Reviewed-by: David Gibson Signed-off-by: David Gibson Signed-off-by: Alexander Graf --- hw/ppc/spapr_pci.c | 4 ++-- include/hw/ppc/spapr.h | 4 +++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c index 03f6d961d2..a69d908b95 100644 --- a/hw/ppc/spapr_pci.c +++ b/hw/ppc/spapr_pci.c @@ -760,7 +760,7 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp) } sphb->buid = SPAPR_PCI_BASE_BUID + sphb->index; - sphb->dma_liobn = SPAPR_PCI_BASE_LIOBN + sphb->index; + sphb->dma_liobn = SPAPR_PCI_LIOBN(sphb->index, 0); windows_base = SPAPR_PCI_WINDOW_BASE + sphb->index * SPAPR_PCI_WINDOW_SPACING; @@ -1101,7 +1101,7 @@ static int spapr_phb_children_dt(Object *child, void *opaque) sPAPRTCETable *tcet; tcet = (sPAPRTCETable *) object_dynamic_cast(child, TYPE_SPAPR_TCE_TABLE); - if (!tcet) { + if (!tcet || SPAPR_PCI_DMA_WINDOW_NUM(tcet->liobn)) { return 0; } diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h index af71e8b0d5..9a9bb90323 100644 --- a/include/hw/ppc/spapr.h +++ b/include/hw/ppc/spapr.h @@ -482,7 +482,9 @@ int spapr_rtas_device_tree_setup(void *fdt, hwaddr rtas_addr, #define SPAPR_TCE_PAGE_MASK (SPAPR_TCE_PAGE_SIZE - 1) #define SPAPR_VIO_BASE_LIOBN 0x00000000 -#define SPAPR_PCI_BASE_LIOBN 0x80000000 +#define SPAPR_PCI_LIOBN(phb_index, window_num) \ + (0x80000000 | ((phb_index) << 8) | (window_num)) +#define SPAPR_PCI_DMA_WINDOW_NUM(liobn) ((liobn) & 0xff) #define RTAS_ERROR_LOG_MAX 2048 From 4290ca49eed5e239695ce85c925a770e4a7317a6 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Thu, 7 May 2015 15:33:31 +1000 Subject: [PATCH 08/40] spapr_vio: Introduce a liobn number generating macros This introduces a macro which makes up a LIOBN from fixed prefix and VIO device address (@reg property). This is to keep LIOBN macros rendering consistent - the same macro for PCI has been added by the previous patch. Signed-off-by: Alexey Kardashevskiy Reviewed-by: David Gibson Signed-off-by: David Gibson Signed-off-by: Alexander Graf --- hw/ppc/spapr_vio.c | 2 +- include/hw/ppc/spapr.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/hw/ppc/spapr_vio.c b/hw/ppc/spapr_vio.c index 1360b97ab0..174033dd41 100644 --- a/hw/ppc/spapr_vio.c +++ b/hw/ppc/spapr_vio.c @@ -469,7 +469,7 @@ static void spapr_vio_busdev_realize(DeviceState *qdev, Error **errp) } if (pc->rtce_window_size) { - uint32_t liobn = SPAPR_VIO_BASE_LIOBN | dev->reg; + uint32_t liobn = SPAPR_VIO_LIOBN(dev->reg); memory_region_init(&dev->mrroot, OBJECT(dev), "iommu-spapr-root", ram_size); diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h index 9a9bb90323..92ee72b24c 100644 --- a/include/hw/ppc/spapr.h +++ b/include/hw/ppc/spapr.h @@ -482,6 +482,7 @@ int spapr_rtas_device_tree_setup(void *fdt, hwaddr rtas_addr, #define SPAPR_TCE_PAGE_MASK (SPAPR_TCE_PAGE_SIZE - 1) #define SPAPR_VIO_BASE_LIOBN 0x00000000 +#define SPAPR_VIO_LIOBN(reg) (0x00000000 | (reg)) #define SPAPR_PCI_LIOBN(phb_index, window_num) \ (0x80000000 | ((phb_index) << 8) | (window_num)) #define SPAPR_PCI_DMA_WINDOW_NUM(liobn) ((liobn) & 0xff) From 3e1a01cb554412e8a9c25573126356596dc0c50f Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Thu, 7 May 2015 15:33:32 +1000 Subject: [PATCH 09/40] spapr_pci: Define default DMA window size as a macro This gets rid of a magic constant describing the default DMA window size for an emulated PHB. Signed-off-by: Alexey Kardashevskiy Signed-off-by: David Gibson Signed-off-by: Alexander Graf --- hw/ppc/spapr_pci.c | 6 +++--- include/hw/pci-host/spapr.h | 2 ++ 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c index a69d908b95..312f0d9b16 100644 --- a/hw/ppc/spapr_pci.c +++ b/hw/ppc/spapr_pci.c @@ -893,11 +893,11 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp) static void spapr_phb_finish_realize(sPAPRPHBState *sphb, Error **errp) { sPAPRTCETable *tcet; + uint32_t nb_table; + nb_table = SPAPR_PCI_DMA32_SIZE >> SPAPR_TCE_PAGE_SHIFT; tcet = spapr_tce_new_table(DEVICE(sphb), sphb->dma_liobn, - 0, - SPAPR_TCE_PAGE_SHIFT, - 0x40000000 >> SPAPR_TCE_PAGE_SHIFT, false); + 0, SPAPR_TCE_PAGE_SHIFT, nb_table, false); if (!tcet) { error_setg(errp, "Unable to create TCE table for %s", sphb->dtbusname); diff --git a/include/hw/pci-host/spapr.h b/include/hw/pci-host/spapr.h index 895d273fee..d7b521dc12 100644 --- a/include/hw/pci-host/spapr.h +++ b/include/hw/pci-host/spapr.h @@ -114,6 +114,8 @@ struct sPAPRPHBVFIOState { #define SPAPR_PCI_MSI_WINDOW 0x40000000000ULL +#define SPAPR_PCI_DMA32_SIZE 0x40000000 + static inline qemu_irq spapr_phb_lsi_qirq(struct sPAPRPHBState *phb, int pin) { return xics_get_qirq(spapr->icp, phb->lsi_table[pin].irq); From d9d96a3cc7267880fbccb6bc4018fc31909fc930 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Thu, 7 May 2015 15:33:33 +1000 Subject: [PATCH 10/40] spapr_iommu: Add separate trace points for PCI DMA operations This is to reduce VIO noise while debugging PCI DMA. Signed-off-by: Alexey Kardashevskiy Signed-off-by: David Gibson Signed-off-by: Alexander Graf --- hw/ppc/spapr_iommu.c | 27 ++++++++++++++++++++------- include/hw/ppc/spapr.h | 1 + trace-events | 4 ++++ 3 files changed, 25 insertions(+), 7 deletions(-) diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c index 65ca4691a4..3a773f703e 100644 --- a/hw/ppc/spapr_iommu.c +++ b/hw/ppc/spapr_iommu.c @@ -277,10 +277,11 @@ static target_ulong h_put_tce_indirect(PowerPCCPU *cpu, /* Trace last successful or the first problematic entry */ i = i ? (i - 1) : 0; - trace_spapr_iommu_indirect(liobn, ioba1, tce_list, i, - tce, - ret); - + if (SPAPR_IS_PCI_LIOBN(liobn)) { + trace_spapr_iommu_pci_indirect(liobn, ioba1, tce_list, i, tce, ret); + } else { + trace_spapr_iommu_indirect(liobn, ioba1, tce_list, i, tce, ret); + } return ret; } @@ -314,7 +315,11 @@ static target_ulong h_stuff_tce(PowerPCCPU *cpu, sPAPREnvironment *spapr, break; } } - trace_spapr_iommu_stuff(liobn, ioba, tce_value, npages, ret); + if (SPAPR_IS_PCI_LIOBN(liobn)) { + trace_spapr_iommu_pci_stuff(liobn, ioba, tce_value, npages, ret); + } else { + trace_spapr_iommu_stuff(liobn, ioba, tce_value, npages, ret); + } return ret; } @@ -335,7 +340,11 @@ static target_ulong h_put_tce(PowerPCCPU *cpu, sPAPREnvironment *spapr, ret = put_tce_emu(tcet, ioba, tce); } - trace_spapr_iommu_put(liobn, ioba, tce, ret); + if (SPAPR_IS_PCI_LIOBN(liobn)) { + trace_spapr_iommu_pci_put(liobn, ioba, tce, ret); + } else { + trace_spapr_iommu_put(liobn, ioba, tce, ret); + } return ret; } @@ -375,7 +384,11 @@ static target_ulong h_get_tce(PowerPCCPU *cpu, sPAPREnvironment *spapr, args[0] = tce; } } - trace_spapr_iommu_get(liobn, ioba, ret, tce); + if (SPAPR_IS_PCI_LIOBN(liobn)) { + trace_spapr_iommu_pci_get(liobn, ioba, ret, tce); + } else { + trace_spapr_iommu_get(liobn, ioba, ret, tce); + } return ret; } diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h index 92ee72b24c..1dab3e1419 100644 --- a/include/hw/ppc/spapr.h +++ b/include/hw/ppc/spapr.h @@ -485,6 +485,7 @@ int spapr_rtas_device_tree_setup(void *fdt, hwaddr rtas_addr, #define SPAPR_VIO_LIOBN(reg) (0x00000000 | (reg)) #define SPAPR_PCI_LIOBN(phb_index, window_num) \ (0x80000000 | ((phb_index) << 8) | (window_num)) +#define SPAPR_IS_PCI_LIOBN(liobn) (!!((liobn) & 0x80000000)) #define SPAPR_PCI_DMA_WINDOW_NUM(liobn) ((liobn) & 0xff) #define RTAS_ERROR_LOG_MAX 2048 diff --git a/trace-events b/trace-events index 3bb1f042c9..a589650597 100644 --- a/trace-events +++ b/trace-events @@ -1338,6 +1338,10 @@ spapr_iommu_put(uint64_t liobn, uint64_t ioba, uint64_t tce, uint64_t ret) "liob spapr_iommu_get(uint64_t liobn, uint64_t ioba, uint64_t ret, uint64_t tce) "liobn=%"PRIx64" ioba=0x%"PRIx64" ret=%"PRId64" tce=0x%"PRIx64 spapr_iommu_indirect(uint64_t liobn, uint64_t ioba, uint64_t tce, uint64_t iobaN, uint64_t tceN, uint64_t ret) "liobn=%"PRIx64" ioba=0x%"PRIx64" tcelist=0x%"PRIx64" iobaN=0x%"PRIx64" tceN=0x%"PRIx64" ret=%"PRId64 spapr_iommu_stuff(uint64_t liobn, uint64_t ioba, uint64_t tce_value, uint64_t npages, uint64_t ret) "liobn=%"PRIx64" ioba=0x%"PRIx64" tcevalue=0x%"PRIx64" npages=%"PRId64" ret=%"PRId64 +spapr_iommu_pci_put(uint64_t liobn, uint64_t ioba, uint64_t tce, uint64_t ret) "liobn=%"PRIx64" ioba=0x%"PRIx64" tce=0x%"PRIx64" ret=%"PRId64 +spapr_iommu_pci_get(uint64_t liobn, uint64_t ioba, uint64_t ret, uint64_t tce) "liobn=%"PRIx64" ioba=0x%"PRIx64" ret=%"PRId64" tce=0x%"PRIx64 +spapr_iommu_pci_indirect(uint64_t liobn, uint64_t ioba, uint64_t tce, uint64_t iobaN, uint64_t tceN, uint64_t ret) "liobn=%"PRIx64" ioba=0x%"PRIx64" tcelist=0x%"PRIx64" iobaN=0x%"PRIx64" tceN=0x%"PRIx64" ret=%"PRId64 +spapr_iommu_pci_stuff(uint64_t liobn, uint64_t ioba, uint64_t tce_value, uint64_t npages, uint64_t ret) "liobn=%"PRIx64" ioba=0x%"PRIx64" tcevalue=0x%"PRIx64" npages=%"PRId64" ret=%"PRId64 spapr_iommu_xlate(uint64_t liobn, uint64_t ioba, uint64_t tce, unsigned perm, unsigned pgsize) "liobn=%"PRIx64" 0x%"PRIx64" -> 0x%"PRIx64" perm=%u mask=%x" spapr_iommu_new_table(uint64_t liobn, void *tcet, void *table, int fd) "liobn=%"PRIx64" tcet=%p table=%p fd=%d" From 46c5874e9cd752ed8ded31af03472edd8fc3efc1 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Thu, 7 May 2015 15:33:34 +1000 Subject: [PATCH 11/40] spapr_pci: Make find_phb()/find_dev() public This makes find_phb()/find_dev() public and changed its names to spapr_pci_find_phb()/spapr_pci_find_dev() as they are going to be used from other parts of QEMU such as VFIO DDW (dynamic DMA window) or VFIO PCI error injection or VFIO EEH handling - in all these cases there are RTAS calls which are addressed to BUID+config_addr in IEEE1275 format. Signed-off-by: Alexey Kardashevskiy Reviewed-by: David Gibson Signed-off-by: David Gibson Signed-off-by: Alexander Graf --- hw/ppc/spapr_pci.c | 36 ++++++++++++++++++------------------ include/hw/pci-host/spapr.h | 4 ++++ 2 files changed, 22 insertions(+), 18 deletions(-) diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c index 312f0d9b16..79b6d3da09 100644 --- a/hw/ppc/spapr_pci.c +++ b/hw/ppc/spapr_pci.c @@ -47,7 +47,7 @@ #define RTAS_TYPE_MSI 1 #define RTAS_TYPE_MSIX 2 -static sPAPRPHBState *find_phb(sPAPREnvironment *spapr, uint64_t buid) +sPAPRPHBState *spapr_pci_find_phb(sPAPREnvironment *spapr, uint64_t buid) { sPAPRPHBState *sphb; @@ -61,10 +61,10 @@ static sPAPRPHBState *find_phb(sPAPREnvironment *spapr, uint64_t buid) return NULL; } -static PCIDevice *find_dev(sPAPREnvironment *spapr, uint64_t buid, - uint32_t config_addr) +PCIDevice *spapr_pci_find_dev(sPAPREnvironment *spapr, uint64_t buid, + uint32_t config_addr) { - sPAPRPHBState *sphb = find_phb(spapr, buid); + sPAPRPHBState *sphb = spapr_pci_find_phb(spapr, buid); PCIHostState *phb = PCI_HOST_BRIDGE(sphb); int bus_num = (config_addr >> 16) & 0xFF; int devfn = (config_addr >> 8) & 0xFF; @@ -95,7 +95,7 @@ static void finish_read_pci_config(sPAPREnvironment *spapr, uint64_t buid, return; } - pci_dev = find_dev(spapr, buid, addr); + pci_dev = spapr_pci_find_dev(spapr, buid, addr); addr = rtas_pci_cfgaddr(addr); if (!pci_dev || (addr % size) || (addr >= pci_config_size(pci_dev))) { @@ -162,7 +162,7 @@ static void finish_write_pci_config(sPAPREnvironment *spapr, uint64_t buid, return; } - pci_dev = find_dev(spapr, buid, addr); + pci_dev = spapr_pci_find_dev(spapr, buid, addr); addr = rtas_pci_cfgaddr(addr); if (!pci_dev || (addr % size) || (addr >= pci_config_size(pci_dev))) { @@ -280,9 +280,9 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPREnvironment *spapr, } /* Fins sPAPRPHBState */ - phb = find_phb(spapr, buid); + phb = spapr_pci_find_phb(spapr, buid); if (phb) { - pdev = find_dev(spapr, buid, config_addr); + pdev = spapr_pci_find_dev(spapr, buid, config_addr); } if (!phb || !pdev) { rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR); @@ -381,9 +381,9 @@ static void rtas_ibm_query_interrupt_source_number(PowerPCCPU *cpu, spapr_pci_msi *msi; /* Find sPAPRPHBState */ - phb = find_phb(spapr, buid); + phb = spapr_pci_find_phb(spapr, buid); if (phb) { - pdev = find_dev(spapr, buid, config_addr); + pdev = spapr_pci_find_dev(spapr, buid, config_addr); } if (!phb || !pdev) { rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR); @@ -426,7 +426,7 @@ static void rtas_ibm_set_eeh_option(PowerPCCPU *cpu, addr = rtas_ld(args, 0); option = rtas_ld(args, 3); - sphb = find_phb(spapr, buid); + sphb = spapr_pci_find_phb(spapr, buid); if (!sphb) { goto param_error_exit; } @@ -461,7 +461,7 @@ static void rtas_ibm_get_config_addr_info2(PowerPCCPU *cpu, } buid = ((uint64_t)rtas_ld(args, 1) << 32) | rtas_ld(args, 2); - sphb = find_phb(spapr, buid); + sphb = spapr_pci_find_phb(spapr, buid); if (!sphb) { goto param_error_exit; } @@ -479,7 +479,7 @@ static void rtas_ibm_get_config_addr_info2(PowerPCCPU *cpu, switch (option) { case RTAS_GET_PE_ADDR: addr = rtas_ld(args, 0); - pdev = find_dev(spapr, buid, addr); + pdev = spapr_pci_find_dev(spapr, buid, addr); if (!pdev) { goto param_error_exit; } @@ -516,7 +516,7 @@ static void rtas_ibm_read_slot_reset_state2(PowerPCCPU *cpu, } buid = ((uint64_t)rtas_ld(args, 1) << 32) | rtas_ld(args, 2); - sphb = find_phb(spapr, buid); + sphb = spapr_pci_find_phb(spapr, buid); if (!sphb) { goto param_error_exit; } @@ -562,7 +562,7 @@ static void rtas_ibm_set_slot_reset(PowerPCCPU *cpu, buid = ((uint64_t)rtas_ld(args, 1) << 32) | rtas_ld(args, 2); option = rtas_ld(args, 3); - sphb = find_phb(spapr, buid); + sphb = spapr_pci_find_phb(spapr, buid); if (!sphb) { goto param_error_exit; } @@ -596,7 +596,7 @@ static void rtas_ibm_configure_pe(PowerPCCPU *cpu, } buid = ((uint64_t)rtas_ld(args, 1) << 32) | rtas_ld(args, 2); - sphb = find_phb(spapr, buid); + sphb = spapr_pci_find_phb(spapr, buid); if (!sphb) { goto param_error_exit; } @@ -631,7 +631,7 @@ static void rtas_ibm_slot_error_detail(PowerPCCPU *cpu, } buid = ((uint64_t)rtas_ld(args, 1) << 32) | rtas_ld(args, 2); - sphb = find_phb(spapr, buid); + sphb = spapr_pci_find_phb(spapr, buid); if (!sphb) { goto param_error_exit; } @@ -788,7 +788,7 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp) return; } - if (find_phb(spapr, sphb->buid)) { + if (spapr_pci_find_phb(spapr, sphb->buid)) { error_setg(errp, "PCI host bridges must have unique BUIDs"); return; } diff --git a/include/hw/pci-host/spapr.h b/include/hw/pci-host/spapr.h index d7b521dc12..5b497ce5f6 100644 --- a/include/hw/pci-host/spapr.h +++ b/include/hw/pci-host/spapr.h @@ -131,4 +131,8 @@ void spapr_pci_msi_init(sPAPREnvironment *spapr, hwaddr addr); void spapr_pci_rtas_init(void); +sPAPRPHBState *spapr_pci_find_phb(sPAPREnvironment *spapr, uint64_t buid); +PCIDevice *spapr_pci_find_dev(sPAPREnvironment *spapr, uint64_t buid, + uint32_t config_addr); + #endif /* __HW_SPAPR_PCI_H__ */ From fae807a2b182a613798fe619f9069bd0bbe3dc6a Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Thu, 7 May 2015 15:33:35 +1000 Subject: [PATCH 12/40] spapr_iommu: Make spapr_tce_find_by_liobn() public At the moment spapr_tce_find_by_liobn() is used by H_PUT_TCE/... handlers to find an IOMMU by LIOBN. We are going to implement Dynamic DMA windows (DDW), new code will go to a new file and we will use spapr_tce_find_by_liobn() there too so let's make it public. Signed-off-by: Alexey Kardashevskiy Reviewed-by: David Gibson Signed-off-by: David Gibson Signed-off-by: Alexander Graf --- hw/ppc/spapr_iommu.c | 2 +- include/hw/ppc/spapr.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c index 3a773f703e..c17e831c43 100644 --- a/hw/ppc/spapr_iommu.c +++ b/hw/ppc/spapr_iommu.c @@ -41,7 +41,7 @@ enum sPAPRTCEAccess { static QLIST_HEAD(spapr_tce_tables, sPAPRTCETable) spapr_tce_tables; -static sPAPRTCETable *spapr_tce_find_by_liobn(uint32_t liobn) +sPAPRTCETable *spapr_tce_find_by_liobn(uint32_t liobn) { sPAPRTCETable *tcet; diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h index 1dab3e1419..7d9ab9d16c 100644 --- a/include/hw/ppc/spapr.h +++ b/include/hw/ppc/spapr.h @@ -511,6 +511,7 @@ struct sPAPRTCETable { QLIST_ENTRY(sPAPRTCETable) list; }; +sPAPRTCETable *spapr_tce_find_by_liobn(uint32_t liobn); void spapr_events_init(sPAPREnvironment *spapr); void spapr_events_fdt_skel(void *fdt, uint32_t epow_irq); int spapr_h_cas_compose_response(target_ulong addr, target_ulong size); From ccf9ff8527a87ee485fbb6a0a73d28641cab5f60 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Thu, 7 May 2015 15:33:36 +1000 Subject: [PATCH 13/40] spapr_pci: Rework device-tree rendering This replaces object_child_foreach() and callback with existing SPAPR_PCI_LIOBN() and spapr_tce_find_by_liobn() to make the code easier to read. This is a mechanical patch so no behaviour change is expected. Signed-off-by: Alexey Kardashevskiy Signed-off-by: David Gibson Signed-off-by: Alexander Graf --- hw/ppc/spapr_pci.c | 30 +++++------------------------- 1 file changed, 5 insertions(+), 25 deletions(-) diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c index 79b6d3da09..52c5c73c5a 100644 --- a/hw/ppc/spapr_pci.c +++ b/hw/ppc/spapr_pci.c @@ -1090,29 +1090,6 @@ PCIHostState *spapr_create_phb(sPAPREnvironment *spapr, int index) #define b_fff(x) b_x((x), 8, 3) /* function number */ #define b_rrrrrrrr(x) b_x((x), 0, 8) /* register number */ -typedef struct sPAPRTCEDT { - void *fdt; - int node_off; -} sPAPRTCEDT; - -static int spapr_phb_children_dt(Object *child, void *opaque) -{ - sPAPRTCEDT *p = opaque; - sPAPRTCETable *tcet; - - tcet = (sPAPRTCETable *) object_dynamic_cast(child, TYPE_SPAPR_TCE_TABLE); - if (!tcet || SPAPR_PCI_DMA_WINDOW_NUM(tcet->liobn)) { - return 0; - } - - spapr_dma_dt(p->fdt, p->node_off, "ibm,dma-window", - tcet->liobn, tcet->bus_offset, - tcet->nb_table << tcet->page_shift); - /* Stop after the first window */ - - return 1; -} - int spapr_populate_pci_dt(sPAPRPHBState *phb, uint32_t xics_phandle, void *fdt) @@ -1151,6 +1128,7 @@ int spapr_populate_pci_dt(sPAPRPHBState *phb, uint32_t interrupt_map_mask[] = { cpu_to_be32(b_ddddd(-1)|b_fff(0)), 0x0, 0x0, cpu_to_be32(-1)}; uint32_t interrupt_map[PCI_SLOT_MAX * PCI_NUM_PINS][7]; + sPAPRTCETable *tcet; /* Start populating the FDT */ sprintf(nodename, "pci@%" PRIx64, phb->buid); @@ -1203,8 +1181,10 @@ int spapr_populate_pci_dt(sPAPRPHBState *phb, _FDT(fdt_setprop(fdt, bus_off, "interrupt-map", &interrupt_map, sizeof(interrupt_map))); - object_child_foreach(OBJECT(phb), spapr_phb_children_dt, - &((sPAPRTCEDT){ .fdt = fdt, .node_off = bus_off })); + tcet = spapr_tce_find_by_liobn(SPAPR_PCI_LIOBN(phb->index, 0)); + spapr_dma_dt(fdt, bus_off, "ibm,dma-window", + tcet->liobn, tcet->bus_offset, + tcet->nb_table << tcet->page_shift); return 0; } From dea1b3ce756d7242d4212c22b7d6e6a896495154 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Thu, 7 May 2015 15:33:37 +1000 Subject: [PATCH 14/40] spapr_iommu: Give unique QOM name to TCE table Useful for debugging. Signed-off-by: Alexey Kardashevskiy Signed-off-by: David Gibson Signed-off-by: Alexander Graf --- hw/ppc/spapr_iommu.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c index c17e831c43..a14cdc434f 100644 --- a/hw/ppc/spapr_iommu.c +++ b/hw/ppc/spapr_iommu.c @@ -161,6 +161,7 @@ sPAPRTCETable *spapr_tce_new_table(DeviceState *owner, uint32_t liobn, bool vfio_accel) { sPAPRTCETable *tcet; + char tmp[64]; if (spapr_tce_find_by_liobn(liobn)) { fprintf(stderr, "Attempted to create TCE table with duplicate" @@ -179,7 +180,8 @@ sPAPRTCETable *spapr_tce_new_table(DeviceState *owner, uint32_t liobn, tcet->nb_table = nb_table; tcet->vfio_accel = vfio_accel; - object_property_add_child(OBJECT(owner), "tce-table", OBJECT(tcet), NULL); + snprintf(tmp, sizeof(tmp), "tce-table-%x", liobn); + object_property_add_child(OBJECT(owner), tmp, OBJECT(tcet), NULL); object_property_set_bool(OBJECT(tcet), true, "realized", NULL); From f9ce8e0aa3fb55ae7a8ea34d3169e73e87feb337 Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Thu, 7 May 2015 15:33:38 +1000 Subject: [PATCH 15/40] hw/ppc/spapr_iommu: Fix the check for invalid upper bits in liobn The check "liobn & 0xFFFFFFFF00000000ULL" in spapr_tce_find_by_liobn() is completely useless since liobn is only declared as an uint32_t parameter. Fix this by using target_ulong instead (this is what most of the callers of this function are using, too). Signed-off-by: Thomas Huth Signed-off-by: David Gibson Signed-off-by: Alexander Graf --- hw/ppc/spapr_iommu.c | 4 ++-- include/hw/ppc/spapr.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c index a14cdc434f..8cd9dba9ac 100644 --- a/hw/ppc/spapr_iommu.c +++ b/hw/ppc/spapr_iommu.c @@ -41,7 +41,7 @@ enum sPAPRTCEAccess { static QLIST_HEAD(spapr_tce_tables, sPAPRTCETable) spapr_tce_tables; -sPAPRTCETable *spapr_tce_find_by_liobn(uint32_t liobn) +sPAPRTCETable *spapr_tce_find_by_liobn(target_ulong liobn) { sPAPRTCETable *tcet; @@ -52,7 +52,7 @@ sPAPRTCETable *spapr_tce_find_by_liobn(uint32_t liobn) } QLIST_FOREACH(tcet, &spapr_tce_tables, list) { - if (tcet->liobn == liobn) { + if (tcet->liobn == (uint32_t)liobn) { return tcet; } } diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h index 7d9ab9d16c..317feb68a0 100644 --- a/include/hw/ppc/spapr.h +++ b/include/hw/ppc/spapr.h @@ -511,7 +511,7 @@ struct sPAPRTCETable { QLIST_ENTRY(sPAPRTCETable) list; }; -sPAPRTCETable *spapr_tce_find_by_liobn(uint32_t liobn); +sPAPRTCETable *spapr_tce_find_by_liobn(target_ulong liobn); void spapr_events_init(sPAPREnvironment *spapr); void spapr_events_fdt_skel(void *fdt, uint32_t epow_irq); int spapr_h_cas_compose_response(target_ulong addr, target_ulong size); From a1a45612433edb0eb65c468f7ed579cd92358818 Mon Sep 17 00:00:00 2001 From: David Gibson Date: Thu, 7 May 2015 15:33:39 +1000 Subject: [PATCH 16/40] pseries: Add pseries-2.4 machine type Now that 2.4 development has opened, create a new pseries machine type variant. For now it is identical to the pseries-2.3 machine type, but a number of new features are coming that will need to set backwards compatibility options. Signed-off-by: David Gibson Signed-off-by: Alexander Graf --- hw/ppc/spapr.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index a15fa3c965..971cb5f85c 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -1905,10 +1905,15 @@ static const TypeInfo spapr_machine_2_2_info = { static void spapr_machine_2_3_class_init(ObjectClass *oc, void *data) { + static GlobalProperty compat_props[] = { + /* SPAPR_COMPAT_2_3, */ + { /* end of list */ } + }; MachineClass *mc = MACHINE_CLASS(oc); mc->name = "pseries-2.3"; mc->desc = "pSeries Logical Partition (PAPR compliant) v2.3"; + mc->compat_props = compat_props; } static const TypeInfo spapr_machine_2_3_info = { From 68fea5a0d7bac17fd74f0608ceed1d914eb0718e Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Thu, 7 May 2015 15:33:40 +1000 Subject: [PATCH 17/40] hw/ppc/spapr: Fix error message when firmware could not be loaded When specifying a non-existing file with the "-bios" parameter, QEMU complained that it "could not find LPAR rtas". That's obviously a copy-n-paste bug from the code which loads the spapr-rtas.bin, it should complain about a missing firmware file instead. Additionally the error message was printed with hw_error() - which also dumps the whole CPU state. However, this does not make much sense here since the CPU is not running yet and thus the registers only contain zeroes. So let's use error_report() here instead. And while we're at it, let's also bail out if the firmware file had zero length. Signed-off-by: Thomas Huth Signed-off-by: David Gibson Signed-off-by: Alexander Graf --- hw/ppc/spapr.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 971cb5f85c..9c05787942 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -1641,12 +1641,12 @@ static void ppc_spapr_init(MachineState *machine) } filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, bios_name); if (!filename) { - hw_error("Could not find LPAR rtas '%s'\n", bios_name); + error_report("Could not find LPAR firmware '%s'", bios_name); exit(1); } fw_size = load_image_targphys(filename, 0, FW_MAX_SIZE); - if (fw_size < 0) { - hw_error("qemu: could not load LPAR rtas '%s'\n", filename); + if (fw_size <= 0) { + error_report("Could not load LPAR firmware '%s'", filename); exit(1); } g_free(filename); From 730fce593bbaa9240a0be860616ac4366113194d Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Thu, 7 May 2015 15:33:41 +1000 Subject: [PATCH 18/40] hw/ppc/spapr: Use error_report() instead of hw_error() hw_error() is designed for printing CPU-related error messages (e.g. it also prints a full CPU register dump). For error messages that are not directly related to CPU problems, a function like error_report() should be used instead. Signed-off-by: Thomas Huth Signed-off-by: David Gibson Signed-off-by: Alexander Graf --- hw/ppc/spapr.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 9c05787942..8cf1f2a547 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -794,8 +794,8 @@ static void spapr_finalize_fdt(sPAPREnvironment *spapr, _FDT((fdt_pack(fdt))); if (fdt_totalsize(fdt) > FDT_MAX_SIZE) { - hw_error("FDT too big ! 0x%x bytes (max is 0x%x)\n", - fdt_totalsize(fdt), FDT_MAX_SIZE); + error_report("FDT too big ! 0x%x bytes (max is 0x%x)", + fdt_totalsize(fdt), FDT_MAX_SIZE); exit(1); } @@ -899,7 +899,7 @@ static int spapr_check_htab_fd(sPAPREnvironment *spapr) spapr->htab_fd = kvmppc_get_htab_fd(false); if (spapr->htab_fd < 0) { error_report("Unable to open fd for reading hash table from KVM: " - "%s", strerror(errno)); + "%s", strerror(errno)); rc = -1; } spapr->htab_fd_stale = false; @@ -1419,7 +1419,7 @@ static void ppc_spapr_init(MachineState *machine) rma_alloc_size = kvmppc_alloc_rma(&rma); if (rma_alloc_size == -1) { - hw_error("qemu: Unable to create RMA\n"); + error_report("Unable to create RMA"); exit(1); } @@ -1520,18 +1520,18 @@ static void ppc_spapr_init(MachineState *machine) filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, "spapr-rtas.bin"); if (!filename) { - hw_error("Could not find LPAR rtas '%s'\n", "spapr-rtas.bin"); + error_report("Could not find LPAR rtas '%s'", "spapr-rtas.bin"); exit(1); } spapr->rtas_size = get_image_size(filename); spapr->rtas_blob = g_malloc(spapr->rtas_size); if (load_image_size(filename, spapr->rtas_blob, spapr->rtas_size) < 0) { - hw_error("qemu: could not load LPAR rtas '%s'\n", filename); + error_report("Could not load LPAR rtas '%s'", filename); exit(1); } if (spapr->rtas_size > RTAS_MAX_SIZE) { - hw_error("RTAS too big ! 0x%zx bytes (max is 0x%x)\n", - (size_t)spapr->rtas_size, RTAS_MAX_SIZE); + error_report("RTAS too big ! 0x%zx bytes (max is 0x%x)", + (size_t)spapr->rtas_size, RTAS_MAX_SIZE); exit(1); } g_free(filename); From 11eec063f29733395846ba756ecd544876ef6839 Mon Sep 17 00:00:00 2001 From: Michael Roth Date: Thu, 7 May 2015 15:33:42 +1000 Subject: [PATCH 19/40] docs: add sPAPR hotplug/dynamic-reconfiguration documentation This adds a general overview of hotplug/dynamic-reconfiguration for sPAPR/pSeries guest. As specified in PAPR+ v2.7. Signed-off-by: Michael Roth Reviewed-by: David Gibson Signed-off-by: David Gibson Signed-off-by: Alexander Graf --- docs/specs/ppc-spapr-hotplug.txt | 287 +++++++++++++++++++++++++++++++ 1 file changed, 287 insertions(+) create mode 100644 docs/specs/ppc-spapr-hotplug.txt diff --git a/docs/specs/ppc-spapr-hotplug.txt b/docs/specs/ppc-spapr-hotplug.txt new file mode 100644 index 0000000000..d35771cc2b --- /dev/null +++ b/docs/specs/ppc-spapr-hotplug.txt @@ -0,0 +1,287 @@ += sPAPR Dynamic Reconfiguration = + +sPAPR/"pseries" guests make use of a facility called dynamic-reconfiguration +to handle hotplugging of dynamic "physical" resources like PCI cards, or +"logical"/paravirtual resources like memory, CPUs, and "physical" +host-bridges, which are generally managed by the host/hypervisor and provided +to guests as virtualized resources. The specifics of dynamic-reconfiguration +are documented extensively in PAPR+ v2.7, Section 13.1. This document +provides a summary of that information as it applies to the implementation +within QEMU. + +== Dynamic-reconfiguration Connectors == + +To manage hotplug/unplug of these resources, a firmware abstraction known as +a Dynamic Resource Connector (DRC) is used to assign a particular dynamic +resource to the guest, and provide an interface for the guest to manage +configuration/removal of the resource associated with it. + +== Device-tree description of DRCs == + +A set of 4 Open Firmware device tree array properties are used to describe +the name/index/power-domain/type of each DRC allocated to a guest at +boot-time. There may be multiple sets of these arrays, rooted at different +paths in the device tree depending on the type of resource the DRCs manage. + +In some cases, the DRCs themselves may be provided by a dynamic resource, +such as the DRCs managing PCI slots on a hotplugged PHB. In this case the +arrays would be fetched as part of the device tree retrieval interfaces +for hotplugged resources described under "Guest->Host interface". + +The array properties are described below. Each entry/element in an array +describes the DRC identified by the element in the corresponding position +of ibm,drc-indexes: + +ibm,drc-names: + first 4-bytes: BE-encoded integer denoting the number of entries + each entry: a NULL-terminated string encoded as a byte array + + values for logical/virtual resources are defined in PAPR+ v2.7, + Section 13.5.2.4, and basically consist of the type of the resource + followed by a space and a numerical value that's unique across resources + of that type. + + values for "physical" resources such as PCI or VIO devices are + defined as being "location codes", which are the "location labels" of + each encapsulating device, starting from the chassis down to the + individual slot for the device, concatenated by a hyphen. This provides + a mapping of resources to a physical location in a chassis for debugging + purposes. For QEMU, this mapping is less important, so we assign a + location code that conforms to naming specifications, but is simply a + location label for the slot by itself to simplify the implementation. + The naming convention for location labels is documented in detail in + PAPR+ v2.7, Section 12.3.1.5, and in our case amounts to using "C" + for PCI/VIO device slots, where is unique across all PCI/VIO + device slots. + +ibm,drc-indexes: + first 4-bytes: BE-encoded integer denoting the number of entries + each 4-byte entry: BE-encoded integer that is unique across all DRCs + in the machine + + is arbitrary, but in the case of QEMU we try to maintain the + convention used to assign them to pSeries guests on pHyp: + + bit[31:28]: integer encoding of , where is: + 1 for CPU resource + 2 for PHB resource + 3 for VIO resource + 4 for PCI resource + 8 for Memory resource + bit[27:0]: integer encoding of , where is unique across + all resources of specified type + +ibm,drc-power-domains: + first 4-bytes: BE-encoded integer denoting the number of entries + each 4-byte entry: 32-bit, BE-encoded integer that specifies the + power domain the resource will be assigned to. In the case of QEMU + we associated all resources with a "live insertion" domain, where the + power is assumed to be managed automatically. The integer value for + this domain is a special value of -1. + + +ibm,drc-types: + first 4-bytes: BE-encoded integer denoting the number of entries + each entry: a NULL-terminated string encoded as a byte array + + is assigned as follows: + "CPU" for a CPU + "PHB" for a physical host-bridge + "SLOT" for a VIO slot + "28" for a PCI slot + "MEM" for memory resource + +== Guest->Host interface to manage dynamic resources == + +Each DRC is given a globally unique DRC Index, and resources associated with +a particular DRC are configured/managed by the guest via a number of RTAS +calls which reference individual DRCs based on the DRC index. This can be +considered the guest->host interface. + +rtas-set-power-level: + arg[0]: integer identifying power domain + arg[1]: new power level for the domain, 0-100 + output[0]: status, 0 on success + output[1]: power level after command + + Set the power level for a specified power domain + +rtas-get-power-level: + arg[0]: integer identifying power domain + output[0]: status, 0 on success + output[1]: current power level + + Get the power level for a specified power domain + +rtas-set-indicator: + arg[0]: integer identifying sensor/indicator type + arg[1]: index of sensor, for DR-related sensors this is generally the + DRC index + arg[2]: desired sensor value + output[0]: status, 0 on success + + Set the state of an indicator or sensor. For the purpose of this document we + focus on the indicator/sensor types associated with a DRC. The types are: + + 9001: isolation-state, controls/indicates whether a device has been made + accessible to a guest + + supported sensor values: + 0: isolate, device is made unaccessible by guest OS + 1: unisolate, device is made available to guest OS + + 9002: dr-indicator, controls "visual" indicator associated with device + + supported sensor values: + 0: inactive, resource may be safely removed + 1: active, resource is in use and cannot be safely removed + 2: identify, used to visually identify slot for interactive hotplug + 3: action, in most cases, used in the same manner as identify + + 9003: allocation-state, generally only used for "logical" DR resources to + request the allocation/deallocation of a resource prior to acquiring + it via isolation-state->unisolate, or after releasing it via + isolation-state->isolate, respectively. for "physical" DR (like PCI + hotplug/unplug) the pre-allocation of the resource is implied and + this sensor is unused. + + supported sensor values: + 0: unusable, tell firmware/system the resource can be + unallocated/reclaimed and added back to the system resource pool + 1: usable, request the resource be allocated/reserved for use by + guest OS + 2: exchange, used to allocate a spare resource to use for fail-over + in certain situations. unused in QEMU + 3: recover, used to reclaim a previously allocated resource that's + not currently allocated to the guest OS. unused in QEMU + +rtas-get-sensor-state: + arg[0]: integer identifying sensor/indicator type + arg[1]: index of sensor, for DR-related sensors this is generally the + DRC index + output[0]: status, 0 on success + + Used to read an indicator or sensor value. + + For DR-related operations, the only noteworthy sensor is dr-entity-sense, + which has a type value of 9003, as allocation-state does in the case of + rtas-set-indicator. The semantics/encodings of the sensor values are distinct + however: + + supported sensor values for dr-entity-sense (9003) sensor: + 0: empty, + for physical resources: DRC/slot is empty + for logical resources: unused + 1: present, + for physical resources: DRC/slot is populated with a device/resource + for logical resources: resource has been allocated to the DRC + 2: unusable, + for physical resources: unused + for logical resources: DRC has no resource allocated to it + 3: exchange, + for physical resources: unused + for logical resources: resource available for exchange (see + allocation-state sensor semantics above) + 4: recovery, + for physical resources: unused + for logical resources: resource available for recovery (see + allocation-state sensor semantics above) + +rtas-ibm-configure-connector: + arg[0]: guest physical address of 4096-byte work area buffer + arg[1]: 0, or address of additional 4096-byte work area buffer. only non-zero + if a prior RTAS response indicated a need for additional memory + output[0]: status: + 0: completed transmittal of device-tree node + 1: instruct guest to prepare for next DT sibling node + 2: instruct guest to prepare for next DT child node + 3: instruct guest to prepare for next DT property + 4: instruct guest to ascend to parent DT node + 5: instruct guest to provide additional work-area buffer + via arg[1] + 990x: instruct guest that operation took too long and to try + again later + + Used to fetch an OF device-tree description of the resource associated with + a particular DRC. The DRC index is encoded in the first 4-bytes of the first + work area buffer. + + Work area layout, using 4-byte offsets: + wa[0]: DRC index of the DRC to fetch device-tree nodes from + wa[1]: 0 (hard-coded) + wa[2]: for next-sibling/next-child response: + wa offset of null-terminated string denoting the new node's name + for next-property response: + wa offset of null-terminated string denoting new property's name + wa[3]: for next-property response (unused otherwise): + byte-length of new property's value + wa[4]: for next-property response (unused otherwise): + new property's value, encoded as an OFDT-compatible byte array + +== hotplug/unplug events == + +For most DR operations, the hypervisor will issue host->guest add/remove events +using the EPOW/check-exception notification framework, where the host issues a +check-exception interrupt, then provides an RTAS event log via an +rtas-check-exception call issued by the guest in response. This framework is +documented by PAPR+ v2.7, and already use in by QEMU for generating powerdown +requests via EPOW events. + +For DR, this framework has been extended to include hotplug events, which were +previously unneeded due to direct manipulation of DR-related guest userspace +tools by host-level management such as an HMC. This level of management is not +applicable to PowerKVM, hence the reason for extending the notification +framework to support hotplug events. + +Note that these events are not yet formally part of the PAPR+ specification, +but support for this format has already been implemented in DR-related +guest tools such as powerpc-utils/librtas, as well as kernel patches that have +been submitted to handle in-kernel processing of memory/cpu-related hotplug +events[1], and is planned for formal inclusion is PAPR+ specification. The +hotplug-specific payload is QEMU implemented as follows (with all values +encoded in big-endian format): + +struct rtas_event_log_v6_hp { +#define SECTION_ID_HOTPLUG 0x4850 /* HP */ + struct section_header { + uint16_t section_id; /* set to SECTION_ID_HOTPLUG */ + uint16_t section_length; /* sizeof(rtas_event_log_v6_hp), + * plus the length of the DRC name + * if a DRC name identifier is + * specified for hotplug_identifier + */ + uint8_t section_version; /* version 1 */ + uint8_t section_subtype; /* unused */ + uint16_t creator_component_id; /* unused */ + } hdr; +#define RTAS_LOG_V6_HP_TYPE_CPU 1 +#define RTAS_LOG_V6_HP_TYPE_MEMORY 2 +#define RTAS_LOG_V6_HP_TYPE_SLOT 3 +#define RTAS_LOG_V6_HP_TYPE_PHB 4 +#define RTAS_LOG_V6_HP_TYPE_PCI 5 + uint8_t hotplug_type; /* type of resource/device */ +#define RTAS_LOG_V6_HP_ACTION_ADD 1 +#define RTAS_LOG_V6_HP_ACTION_REMOVE 2 + uint8_t hotplug_action; /* action (add/remove) */ +#define RTAS_LOG_V6_HP_ID_DRC_NAME 1 +#define RTAS_LOG_V6_HP_ID_DRC_INDEX 2 +#define RTAS_LOG_V6_HP_ID_DRC_COUNT 3 + uint8_t hotplug_identifier; /* type of the resource identifier, + * which serves as the discriminator + * for the 'drc' union field below + */ + uint8_t reserved; + union { + uint32_t index; /* DRC index of resource to take action + * on + */ + uint32_t count; /* number of DR resources to take + * action on (guest chooses which) + */ + char name[1]; /* string representing the name of the + * DRC to take action on + */ + } drc; +} QEMU_PACKED; + +[1] http://thread.gmane.org/gmane.linux.ports.ppc.embedded/75350/focus=106867 From bbf5c878ab76a74f6277f99082c77bbdb1ad4c5b Mon Sep 17 00:00:00 2001 From: Michael Roth Date: Thu, 7 May 2015 15:33:43 +1000 Subject: [PATCH 20/40] spapr_drc: initial implementation of sPAPRDRConnector device This device emulates a firmware abstraction used by pSeries guests to manage hotplug/dynamic-reconfiguration of host-bridges, PCI devices, memory, and CPUs. It is conceptually similar to an SHPC device, complete with LED indicators to identify individual slots to physical physical users and indicate when it is safe to remove a device. In some cases it is also used to manage virtualized resources, such a memory, CPUs, and physical-host bridges, which in the case of pSeries guests are virtualized resources where the physical components are managed by the host. Guests communicate with these DR Connectors using RTAS calls, generally by addressing the unique DRC index associated with a particular connector for a particular resource. For introspection purposes we expose this state initially as QOM properties, and in subsequent patches will introduce the RTAS calls that make use of it. This constitutes to the 'guest' interface. On the QEMU side we provide an attach/detach interface to associate or cleanup a DeviceState with a particular sPAPRDRConnector in response to hotplug/unplug, respectively. This constitutes the 'physical' interface to the DR Connector. Signed-off-by: Michael Roth Reviewed-by: David Gibson Signed-off-by: David Gibson Signed-off-by: Alexander Graf --- hw/ppc/Makefile.objs | 2 +- hw/ppc/spapr_drc.c | 588 +++++++++++++++++++++++++++++++++++++ include/hw/ppc/spapr_drc.h | 199 +++++++++++++ 3 files changed, 788 insertions(+), 1 deletion(-) create mode 100644 hw/ppc/spapr_drc.c create mode 100644 include/hw/ppc/spapr_drc.h diff --git a/hw/ppc/Makefile.objs b/hw/ppc/Makefile.objs index 437955d1d5..c8ab06e7f3 100644 --- a/hw/ppc/Makefile.objs +++ b/hw/ppc/Makefile.objs @@ -3,7 +3,7 @@ obj-y += ppc.o ppc_booke.o # IBM pSeries (sPAPR) obj-$(CONFIG_PSERIES) += spapr.o spapr_vio.o spapr_events.o obj-$(CONFIG_PSERIES) += spapr_hcall.o spapr_iommu.o spapr_rtas.o -obj-$(CONFIG_PSERIES) += spapr_pci.o spapr_rtc.o +obj-$(CONFIG_PSERIES) += spapr_pci.o spapr_rtc.o spapr_drc.o ifeq ($(CONFIG_PCI)$(CONFIG_PSERIES)$(CONFIG_LINUX), yyy) obj-y += spapr_pci_vfio.o endif diff --git a/hw/ppc/spapr_drc.c b/hw/ppc/spapr_drc.c new file mode 100644 index 0000000000..047c6c7393 --- /dev/null +++ b/hw/ppc/spapr_drc.c @@ -0,0 +1,588 @@ +/* + * QEMU SPAPR Dynamic Reconfiguration Connector Implementation + * + * Copyright IBM Corp. 2014 + * + * Authors: + * Michael Roth + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "hw/ppc/spapr_drc.h" +#include "qom/object.h" +#include "hw/qdev.h" +#include "qapi/visitor.h" +#include "qemu/error-report.h" + +/* #define DEBUG_SPAPR_DRC */ + +#ifdef DEBUG_SPAPR_DRC +#define DPRINTF(fmt, ...) \ + do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0) +#define DPRINTFN(fmt, ...) \ + do { DPRINTF(fmt, ## __VA_ARGS__); fprintf(stderr, "\n"); } while (0) +#else +#define DPRINTF(fmt, ...) \ + do { } while (0) +#define DPRINTFN(fmt, ...) \ + do { } while (0) +#endif + +#define DRC_CONTAINER_PATH "/dr-connector" +#define DRC_INDEX_TYPE_SHIFT 28 +#define DRC_INDEX_ID_MASK (~(~0 << DRC_INDEX_TYPE_SHIFT)) + +static sPAPRDRConnectorTypeShift get_type_shift(sPAPRDRConnectorType type) +{ + uint32_t shift = 0; + + /* make sure this isn't SPAPR_DR_CONNECTOR_TYPE_ANY, or some + * other wonky value. + */ + g_assert(is_power_of_2(type)); + + while (type != (1 << shift)) { + shift++; + } + return shift; +} + +static uint32_t get_index(sPAPRDRConnector *drc) +{ + /* no set format for a drc index: it only needs to be globally + * unique. this is how we encode the DRC type on bare-metal + * however, so might as well do that here + */ + return (get_type_shift(drc->type) << DRC_INDEX_TYPE_SHIFT) | + (drc->id & DRC_INDEX_ID_MASK); +} + +static int set_isolation_state(sPAPRDRConnector *drc, + sPAPRDRIsolationState state) +{ + sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); + + DPRINTFN("drc: %x, set_isolation_state: %x", get_index(drc), state); + + drc->isolation_state = state; + + if (drc->isolation_state == SPAPR_DR_ISOLATION_STATE_ISOLATED) { + /* if we're awaiting release, but still in an unconfigured state, + * it's likely the guest is still in the process of configuring + * the device and is transitioning the devices to an ISOLATED + * state as a part of that process. so we only complete the + * removal when this transition happens for a device in a + * configured state, as suggested by the state diagram from + * PAPR+ 2.7, 13.4 + */ + if (drc->awaiting_release) { + if (drc->configured) { + DPRINTFN("finalizing device removal"); + drck->detach(drc, DEVICE(drc->dev), drc->detach_cb, + drc->detach_cb_opaque, NULL); + } else { + DPRINTFN("deferring device removal on unconfigured device\n"); + } + } + drc->configured = false; + } + + return 0; +} + +static int set_indicator_state(sPAPRDRConnector *drc, + sPAPRDRIndicatorState state) +{ + DPRINTFN("drc: %x, set_indicator_state: %x", get_index(drc), state); + drc->indicator_state = state; + return 0; +} + +static int set_allocation_state(sPAPRDRConnector *drc, + sPAPRDRAllocationState state) +{ + sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); + + DPRINTFN("drc: %x, set_allocation_state: %x", get_index(drc), state); + + if (drc->type != SPAPR_DR_CONNECTOR_TYPE_PCI) { + drc->allocation_state = state; + if (drc->awaiting_release && + drc->allocation_state == SPAPR_DR_ALLOCATION_STATE_UNUSABLE) { + DPRINTFN("finalizing device removal"); + drck->detach(drc, DEVICE(drc->dev), drc->detach_cb, + drc->detach_cb_opaque, NULL); + } + } + return 0; +} + +static uint32_t get_type(sPAPRDRConnector *drc) +{ + return drc->type; +} + +static const char *get_name(sPAPRDRConnector *drc) +{ + return drc->name; +} + +static const void *get_fdt(sPAPRDRConnector *drc, int *fdt_start_offset) +{ + if (fdt_start_offset) { + *fdt_start_offset = drc->fdt_start_offset; + } + return drc->fdt; +} + +static void set_configured(sPAPRDRConnector *drc) +{ + DPRINTFN("drc: %x, set_configured", get_index(drc)); + + if (drc->isolation_state != SPAPR_DR_ISOLATION_STATE_UNISOLATED) { + /* guest should be not configuring an isolated device */ + DPRINTFN("drc: %x, set_configured: skipping isolated device", + get_index(drc)); + return; + } + drc->configured = true; +} + +/* + * dr-entity-sense sensor value + * returned via get-sensor-state RTAS calls + * as expected by state diagram in PAPR+ 2.7, 13.4 + * based on the current allocation/indicator/power states + * for the DR connector. + */ +static sPAPRDREntitySense entity_sense(sPAPRDRConnector *drc) +{ + sPAPRDREntitySense state; + + if (drc->dev) { + if (drc->type != SPAPR_DR_CONNECTOR_TYPE_PCI && + drc->allocation_state == SPAPR_DR_ALLOCATION_STATE_UNUSABLE) { + /* for logical DR, we return a state of UNUSABLE + * iff the allocation state UNUSABLE. + * Otherwise, report the state as USABLE/PRESENT, + * as we would for PCI. + */ + state = SPAPR_DR_ENTITY_SENSE_UNUSABLE; + } else { + /* this assumes all PCI devices are assigned to + * a 'live insertion' power domain, where QEMU + * manages power state automatically as opposed + * to the guest. present, non-PCI resources are + * unaffected by power state. + */ + state = SPAPR_DR_ENTITY_SENSE_PRESENT; + } + } else { + if (drc->type == SPAPR_DR_CONNECTOR_TYPE_PCI) { + /* PCI devices, and only PCI devices, use EMPTY + * in cases where we'd otherwise use UNUSABLE + */ + state = SPAPR_DR_ENTITY_SENSE_EMPTY; + } else { + state = SPAPR_DR_ENTITY_SENSE_UNUSABLE; + } + } + + DPRINTFN("drc: %x, entity_sense: %x", get_index(drc), state); + return state; +} + +static void prop_get_index(Object *obj, Visitor *v, void *opaque, + const char *name, Error **errp) +{ + sPAPRDRConnector *drc = SPAPR_DR_CONNECTOR(obj); + sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); + uint32_t value = (uint32_t)drck->get_index(drc); + visit_type_uint32(v, &value, name, errp); +} + +static void prop_get_type(Object *obj, Visitor *v, void *opaque, + const char *name, Error **errp) +{ + sPAPRDRConnector *drc = SPAPR_DR_CONNECTOR(obj); + sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); + uint32_t value = (uint32_t)drck->get_type(drc); + visit_type_uint32(v, &value, name, errp); +} + +static char *prop_get_name(Object *obj, Error **errp) +{ + sPAPRDRConnector *drc = SPAPR_DR_CONNECTOR(obj); + sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); + return g_strdup(drck->get_name(drc)); +} + +static void prop_get_entity_sense(Object *obj, Visitor *v, void *opaque, + const char *name, Error **errp) +{ + sPAPRDRConnector *drc = SPAPR_DR_CONNECTOR(obj); + sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); + uint32_t value = (uint32_t)drck->entity_sense(drc); + visit_type_uint32(v, &value, name, errp); +} + +static void prop_get_fdt(Object *obj, Visitor *v, void *opaque, + const char *name, Error **errp) +{ + sPAPRDRConnector *drc = SPAPR_DR_CONNECTOR(obj); + int fdt_offset_next, fdt_offset, fdt_depth; + void *fdt; + + if (!drc->fdt) { + return; + } + + fdt = drc->fdt; + fdt_offset = drc->fdt_start_offset; + fdt_depth = 0; + + do { + const char *name = NULL; + const struct fdt_property *prop = NULL; + int prop_len = 0, name_len = 0; + uint32_t tag; + + tag = fdt_next_tag(fdt, fdt_offset, &fdt_offset_next); + switch (tag) { + case FDT_BEGIN_NODE: + fdt_depth++; + name = fdt_get_name(fdt, fdt_offset, &name_len); + visit_start_struct(v, NULL, NULL, name, 0, NULL); + break; + case FDT_END_NODE: + /* shouldn't ever see an FDT_END_NODE before FDT_BEGIN_NODE */ + g_assert(fdt_depth > 0); + visit_end_struct(v, NULL); + fdt_depth--; + break; + case FDT_PROP: { + int i; + prop = fdt_get_property_by_offset(fdt, fdt_offset, &prop_len); + name = fdt_string(fdt, fdt32_to_cpu(prop->nameoff)); + visit_start_list(v, name, NULL); + for (i = 0; i < prop_len; i++) { + visit_type_uint8(v, (uint8_t *)&prop->data[i], NULL, NULL); + + } + visit_end_list(v, NULL); + break; + } + default: + error_setg(&error_abort, "device FDT in unexpected state: %d", tag); + } + fdt_offset = fdt_offset_next; + } while (fdt_depth != 0); +} + +static void attach(sPAPRDRConnector *drc, DeviceState *d, void *fdt, + int fdt_start_offset, bool coldplug, Error **errp) +{ + DPRINTFN("drc: %x, attach", get_index(drc)); + + if (drc->isolation_state != SPAPR_DR_ISOLATION_STATE_ISOLATED) { + error_setg(errp, "an attached device is still awaiting release"); + return; + } + if (drc->type == SPAPR_DR_CONNECTOR_TYPE_PCI) { + g_assert(drc->allocation_state == SPAPR_DR_ALLOCATION_STATE_USABLE); + } + g_assert(fdt || coldplug); + + /* NOTE: setting initial isolation state to UNISOLATED means we can't + * detach unless guest has a userspace/kernel that moves this state + * back to ISOLATED in response to an unplug event, or this is done + * manually by the admin prior. if we force things while the guest + * may be accessing the device, we can easily crash the guest, so we + * we defer completion of removal in such cases to the reset() hook. + */ + if (drc->type == SPAPR_DR_CONNECTOR_TYPE_PCI) { + drc->isolation_state = SPAPR_DR_ISOLATION_STATE_UNISOLATED; + } + drc->indicator_state = SPAPR_DR_INDICATOR_STATE_ACTIVE; + + drc->dev = d; + drc->fdt = fdt; + drc->fdt_start_offset = fdt_start_offset; + drc->configured = false; + + object_property_add_link(OBJECT(drc), "device", + object_get_typename(OBJECT(drc->dev)), + (Object **)(&drc->dev), + NULL, 0, NULL); +} + +static void detach(sPAPRDRConnector *drc, DeviceState *d, + spapr_drc_detach_cb *detach_cb, + void *detach_cb_opaque, Error **errp) +{ + DPRINTFN("drc: %x, detach", get_index(drc)); + + drc->detach_cb = detach_cb; + drc->detach_cb_opaque = detach_cb_opaque; + + if (drc->isolation_state != SPAPR_DR_ISOLATION_STATE_ISOLATED) { + DPRINTFN("awaiting transition to isolated state before removal"); + drc->awaiting_release = true; + return; + } + + if (drc->type != SPAPR_DR_CONNECTOR_TYPE_PCI && + drc->allocation_state != SPAPR_DR_ALLOCATION_STATE_UNUSABLE) { + DPRINTFN("awaiting transition to unusable state before removal"); + drc->awaiting_release = true; + return; + } + + drc->indicator_state = SPAPR_DR_INDICATOR_STATE_INACTIVE; + + if (drc->detach_cb) { + drc->detach_cb(drc->dev, drc->detach_cb_opaque); + } + + drc->awaiting_release = false; + g_free(drc->fdt); + drc->fdt = NULL; + drc->fdt_start_offset = 0; + object_property_del(OBJECT(drc), "device", NULL); + drc->dev = NULL; + drc->detach_cb = NULL; + drc->detach_cb_opaque = NULL; +} + +static bool release_pending(sPAPRDRConnector *drc) +{ + return drc->awaiting_release; +} + +static void reset(DeviceState *d) +{ + sPAPRDRConnector *drc = SPAPR_DR_CONNECTOR(d); + sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); + + DPRINTFN("drc reset: %x", drck->get_index(drc)); + /* immediately upon reset we can safely assume DRCs whose devices + * are pending removal can be safely removed, and that they will + * subsequently be left in an ISOLATED state. move the DRC to this + * state in these cases (which will in turn complete any pending + * device removals) + */ + if (drc->awaiting_release) { + drck->set_isolation_state(drc, SPAPR_DR_ISOLATION_STATE_ISOLATED); + /* generally this should also finalize the removal, but if the device + * hasn't yet been configured we normally defer removal under the + * assumption that this transition is taking place as part of device + * configuration. so check if we're still waiting after this, and + * force removal if we are + */ + if (drc->awaiting_release) { + drck->detach(drc, DEVICE(drc->dev), drc->detach_cb, + drc->detach_cb_opaque, NULL); + } + + /* non-PCI devices may be awaiting a transition to UNUSABLE */ + if (drc->type != SPAPR_DR_CONNECTOR_TYPE_PCI && + drc->awaiting_release) { + drck->set_allocation_state(drc, SPAPR_DR_ALLOCATION_STATE_UNUSABLE); + } + } +} + +static void realize(DeviceState *d, Error **errp) +{ + sPAPRDRConnector *drc = SPAPR_DR_CONNECTOR(d); + sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); + Object *root_container; + char link_name[256]; + gchar *child_name; + Error *err = NULL; + + DPRINTFN("drc realize: %x", drck->get_index(drc)); + /* NOTE: we do this as part of realize/unrealize due to the fact + * that the guest will communicate with the DRC via RTAS calls + * referencing the global DRC index. By unlinking the DRC + * from DRC_CONTAINER_PATH/ we effectively make it + * inaccessible by the guest, since lookups rely on this path + * existing in the composition tree + */ + root_container = container_get(object_get_root(), DRC_CONTAINER_PATH); + snprintf(link_name, sizeof(link_name), "%x", drck->get_index(drc)); + child_name = object_get_canonical_path_component(OBJECT(drc)); + DPRINTFN("drc child name: %s", child_name); + object_property_add_alias(root_container, link_name, + drc->owner, child_name, &err); + if (err) { + error_report("%s", error_get_pretty(err)); + error_free(err); + object_unref(OBJECT(drc)); + } + DPRINTFN("drc realize complete"); +} + +static void unrealize(DeviceState *d, Error **errp) +{ + sPAPRDRConnector *drc = SPAPR_DR_CONNECTOR(d); + sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); + Object *root_container; + char name[256]; + Error *err = NULL; + + DPRINTFN("drc unrealize: %x", drck->get_index(drc)); + root_container = container_get(object_get_root(), DRC_CONTAINER_PATH); + snprintf(name, sizeof(name), "%x", drck->get_index(drc)); + object_property_del(root_container, name, &err); + if (err) { + error_report("%s", error_get_pretty(err)); + error_free(err); + object_unref(OBJECT(drc)); + } +} + +sPAPRDRConnector *spapr_dr_connector_new(Object *owner, + sPAPRDRConnectorType type, + uint32_t id) +{ + sPAPRDRConnector *drc = + SPAPR_DR_CONNECTOR(object_new(TYPE_SPAPR_DR_CONNECTOR)); + + g_assert(type); + + drc->type = type; + drc->id = id; + drc->owner = owner; + object_property_add_child(owner, "dr-connector[*]", OBJECT(drc), NULL); + object_property_set_bool(OBJECT(drc), true, "realized", NULL); + + /* human-readable name for a DRC to encode into the DT + * description. this is mainly only used within a guest in place + * of the unique DRC index. + * + * in the case of VIO/PCI devices, it corresponds to a + * "location code" that maps a logical device/function (DRC index) + * to a physical (or virtual in the case of VIO) location in the + * system by chaining together the "location label" for each + * encapsulating component. + * + * since this is more to do with diagnosing physical hardware + * issues than guest compatibility, we choose location codes/DRC + * names that adhere to the documented format, but avoid encoding + * the entire topology information into the label/code, instead + * just using the location codes based on the labels for the + * endpoints (VIO/PCI adaptor connectors), which is basically + * just "C" followed by an integer ID. + * + * DRC names as documented by PAPR+ v2.7, 13.5.2.4 + * location codes as documented by PAPR+ v2.7, 12.3.1.5 + */ + switch (drc->type) { + case SPAPR_DR_CONNECTOR_TYPE_CPU: + drc->name = g_strdup_printf("CPU %d", id); + break; + case SPAPR_DR_CONNECTOR_TYPE_PHB: + drc->name = g_strdup_printf("PHB %d", id); + break; + case SPAPR_DR_CONNECTOR_TYPE_VIO: + case SPAPR_DR_CONNECTOR_TYPE_PCI: + drc->name = g_strdup_printf("C%d", id); + break; + case SPAPR_DR_CONNECTOR_TYPE_LMB: + drc->name = g_strdup_printf("LMB %d", id); + break; + default: + g_assert(false); + } + + /* PCI slot always start in a USABLE state, and stay there */ + if (drc->type == SPAPR_DR_CONNECTOR_TYPE_PCI) { + drc->allocation_state = SPAPR_DR_ALLOCATION_STATE_USABLE; + } + + return drc; +} + +static void spapr_dr_connector_instance_init(Object *obj) +{ + sPAPRDRConnector *drc = SPAPR_DR_CONNECTOR(obj); + + object_property_add_uint32_ptr(obj, "isolation-state", + &drc->isolation_state, NULL); + object_property_add_uint32_ptr(obj, "indicator-state", + &drc->indicator_state, NULL); + object_property_add_uint32_ptr(obj, "allocation-state", + &drc->allocation_state, NULL); + object_property_add_uint32_ptr(obj, "id", &drc->id, NULL); + object_property_add(obj, "index", "uint32", prop_get_index, + NULL, NULL, NULL, NULL); + object_property_add(obj, "connector_type", "uint32", prop_get_type, + NULL, NULL, NULL, NULL); + object_property_add_str(obj, "name", prop_get_name, NULL, NULL); + object_property_add(obj, "entity-sense", "uint32", prop_get_entity_sense, + NULL, NULL, NULL, NULL); + object_property_add(obj, "fdt", "struct", prop_get_fdt, + NULL, NULL, NULL, NULL); +} + +static void spapr_dr_connector_class_init(ObjectClass *k, void *data) +{ + DeviceClass *dk = DEVICE_CLASS(k); + sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_CLASS(k); + + dk->reset = reset; + dk->realize = realize; + dk->unrealize = unrealize; + drck->set_isolation_state = set_isolation_state; + drck->set_indicator_state = set_indicator_state; + drck->set_allocation_state = set_allocation_state; + drck->get_index = get_index; + drck->get_type = get_type; + drck->get_name = get_name; + drck->get_fdt = get_fdt; + drck->set_configured = set_configured; + drck->entity_sense = entity_sense; + drck->attach = attach; + drck->detach = detach; + drck->release_pending = release_pending; +} + +static const TypeInfo spapr_dr_connector_info = { + .name = TYPE_SPAPR_DR_CONNECTOR, + .parent = TYPE_DEVICE, + .instance_size = sizeof(sPAPRDRConnector), + .instance_init = spapr_dr_connector_instance_init, + .class_size = sizeof(sPAPRDRConnectorClass), + .class_init = spapr_dr_connector_class_init, +}; + +static void spapr_drc_register_types(void) +{ + type_register_static(&spapr_dr_connector_info); +} + +type_init(spapr_drc_register_types) + +/* helper functions for external users */ + +sPAPRDRConnector *spapr_dr_connector_by_index(uint32_t index) +{ + Object *obj; + char name[256]; + + snprintf(name, sizeof(name), "%s/%x", DRC_CONTAINER_PATH, index); + obj = object_resolve_path(name, NULL); + + return !obj ? NULL : SPAPR_DR_CONNECTOR(obj); +} + +sPAPRDRConnector *spapr_dr_connector_by_id(sPAPRDRConnectorType type, + uint32_t id) +{ + return spapr_dr_connector_by_index( + (get_type_shift(type) << DRC_INDEX_TYPE_SHIFT) | + (id & DRC_INDEX_ID_MASK)); +} diff --git a/include/hw/ppc/spapr_drc.h b/include/hw/ppc/spapr_drc.h new file mode 100644 index 0000000000..34fdef97e5 --- /dev/null +++ b/include/hw/ppc/spapr_drc.h @@ -0,0 +1,199 @@ +/* + * QEMU SPAPR Dynamic Reconfiguration Connector Implementation + * + * Copyright IBM Corp. 2014 + * + * Authors: + * Michael Roth + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ +#if !defined(__HW_SPAPR_DRC_H__) +#define __HW_SPAPR_DRC_H__ + +#include "qom/object.h" +#include "hw/qdev.h" +#include "libfdt.h" + +#define TYPE_SPAPR_DR_CONNECTOR "spapr-dr-connector" +#define SPAPR_DR_CONNECTOR_GET_CLASS(obj) \ + OBJECT_GET_CLASS(sPAPRDRConnectorClass, obj, TYPE_SPAPR_DR_CONNECTOR) +#define SPAPR_DR_CONNECTOR_CLASS(klass) \ + OBJECT_CLASS_CHECK(sPAPRDRConnectorClass, klass, \ + TYPE_SPAPR_DR_CONNECTOR) +#define SPAPR_DR_CONNECTOR(obj) OBJECT_CHECK(sPAPRDRConnector, (obj), \ + TYPE_SPAPR_DR_CONNECTOR) + +/* + * Various hotplug types managed by sPAPRDRConnector + * + * these are somewhat arbitrary, but to make things easier + * when generating DRC indexes later we've aligned the bit + * positions with the values used to assign DRC indexes on + * pSeries. we use those values as bit shifts to allow for + * the OR'ing of these values in various QEMU routines, but + * for values exposed to the guest (via DRC indexes for + * instance) we will use the shift amounts. + */ +typedef enum { + SPAPR_DR_CONNECTOR_TYPE_SHIFT_CPU = 1, + SPAPR_DR_CONNECTOR_TYPE_SHIFT_PHB = 2, + SPAPR_DR_CONNECTOR_TYPE_SHIFT_VIO = 3, + SPAPR_DR_CONNECTOR_TYPE_SHIFT_PCI = 4, + SPAPR_DR_CONNECTOR_TYPE_SHIFT_LMB = 8, +} sPAPRDRConnectorTypeShift; + +typedef enum { + SPAPR_DR_CONNECTOR_TYPE_ANY = ~0, + SPAPR_DR_CONNECTOR_TYPE_CPU = 1 << SPAPR_DR_CONNECTOR_TYPE_SHIFT_CPU, + SPAPR_DR_CONNECTOR_TYPE_PHB = 1 << SPAPR_DR_CONNECTOR_TYPE_SHIFT_PHB, + SPAPR_DR_CONNECTOR_TYPE_VIO = 1 << SPAPR_DR_CONNECTOR_TYPE_SHIFT_VIO, + SPAPR_DR_CONNECTOR_TYPE_PCI = 1 << SPAPR_DR_CONNECTOR_TYPE_SHIFT_PCI, + SPAPR_DR_CONNECTOR_TYPE_LMB = 1 << SPAPR_DR_CONNECTOR_TYPE_SHIFT_LMB, +} sPAPRDRConnectorType; + +/* + * set via set-indicator RTAS calls + * as documented by PAPR+ 2.7 13.5.3.4, Table 177 + * + * isolated: put device under firmware control + * unisolated: claim OS control of device (may or may not be in use) + */ +typedef enum { + SPAPR_DR_ISOLATION_STATE_ISOLATED = 0, + SPAPR_DR_ISOLATION_STATE_UNISOLATED = 1 +} sPAPRDRIsolationState; + +/* + * set via set-indicator RTAS calls + * as documented by PAPR+ 2.7 13.5.3.4, Table 177 + * + * unusable: mark device as unavailable to OS + * usable: mark device as available to OS + * exchange: (currently unused) + * recover: (currently unused) + */ +typedef enum { + SPAPR_DR_ALLOCATION_STATE_UNUSABLE = 0, + SPAPR_DR_ALLOCATION_STATE_USABLE = 1, + SPAPR_DR_ALLOCATION_STATE_EXCHANGE = 2, + SPAPR_DR_ALLOCATION_STATE_RECOVER = 3 +} sPAPRDRAllocationState; + +/* + * LED/visual indicator state + * + * set via set-indicator RTAS calls + * as documented by PAPR+ 2.7 13.5.3.4, Table 177, + * and PAPR+ 2.7 13.5.4.1, Table 180 + * + * inactive: hotpluggable entity inactive and safely removable + * active: hotpluggable entity in use and not safely removable + * identify: (currently unused) + * action: (currently unused) + */ +typedef enum { + SPAPR_DR_INDICATOR_STATE_INACTIVE = 0, + SPAPR_DR_INDICATOR_STATE_ACTIVE = 1, + SPAPR_DR_INDICATOR_STATE_IDENTIFY = 2, + SPAPR_DR_INDICATOR_STATE_ACTION = 3, +} sPAPRDRIndicatorState; + +/* + * returned via get-sensor-state RTAS calls + * as documented by PAPR+ 2.7 13.5.3.3, Table 175: + * + * empty: connector slot empty (e.g. empty hotpluggable PCI slot) + * present: connector slot populated and device available to OS + * unusable: device not currently available to OS + * exchange: (currently unused) + * recover: (currently unused) + */ +typedef enum { + SPAPR_DR_ENTITY_SENSE_EMPTY = 0, + SPAPR_DR_ENTITY_SENSE_PRESENT = 1, + SPAPR_DR_ENTITY_SENSE_UNUSABLE = 2, + SPAPR_DR_ENTITY_SENSE_EXCHANGE = 3, + SPAPR_DR_ENTITY_SENSE_RECOVER = 4, +} sPAPRDREntitySense; + +typedef enum { + SPAPR_DR_CC_RESPONSE_NEXT_SIB = 1, /* currently unused */ + SPAPR_DR_CC_RESPONSE_NEXT_CHILD = 2, + SPAPR_DR_CC_RESPONSE_NEXT_PROPERTY = 3, + SPAPR_DR_CC_RESPONSE_PREV_PARENT = 4, + SPAPR_DR_CC_RESPONSE_SUCCESS = 0, + SPAPR_DR_CC_RESPONSE_ERROR = -1, + SPAPR_DR_CC_RESPONSE_CONTINUE = -2, +} sPAPRDRCCResponse; + +typedef void (spapr_drc_detach_cb)(DeviceState *d, void *opaque); + +typedef struct sPAPRDRConnector { + /*< private >*/ + DeviceState parent; + + sPAPRDRConnectorType type; + uint32_t id; + Object *owner; + const char *name; + + /* sensor/indicator states */ + uint32_t isolation_state; + uint32_t allocation_state; + uint32_t indicator_state; + + /* configure-connector state */ + void *fdt; + int fdt_start_offset; + bool configured; + + bool awaiting_release; + + /* device pointer, via link property */ + DeviceState *dev; + spapr_drc_detach_cb *detach_cb; + void *detach_cb_opaque; +} sPAPRDRConnector; + +typedef struct sPAPRDRConnectorClass { + /*< private >*/ + DeviceClass parent; + + /*< public >*/ + + /* accessors for guest-visible (generally via RTAS) DR state */ + int (*set_isolation_state)(sPAPRDRConnector *drc, + sPAPRDRIsolationState state); + int (*set_indicator_state)(sPAPRDRConnector *drc, + sPAPRDRIndicatorState state); + int (*set_allocation_state)(sPAPRDRConnector *drc, + sPAPRDRAllocationState state); + uint32_t (*get_index)(sPAPRDRConnector *drc); + uint32_t (*get_type)(sPAPRDRConnector *drc); + const char *(*get_name)(sPAPRDRConnector *drc); + + sPAPRDREntitySense (*entity_sense)(sPAPRDRConnector *drc); + + /* QEMU interfaces for managing FDT/configure-connector */ + const void *(*get_fdt)(sPAPRDRConnector *drc, int *fdt_start_offset); + void (*set_configured)(sPAPRDRConnector *drc); + + /* QEMU interfaces for managing hotplug operations */ + void (*attach)(sPAPRDRConnector *drc, DeviceState *d, void *fdt, + int fdt_start_offset, bool coldplug, Error **errp); + void (*detach)(sPAPRDRConnector *drc, DeviceState *d, + spapr_drc_detach_cb *detach_cb, + void *detach_cb_opaque, Error **errp); + bool (*release_pending)(sPAPRDRConnector *drc); +} sPAPRDRConnectorClass; + +sPAPRDRConnector *spapr_dr_connector_new(Object *owner, + sPAPRDRConnectorType type, + uint32_t id); +sPAPRDRConnector *spapr_dr_connector_by_index(uint32_t index); +sPAPRDRConnector *spapr_dr_connector_by_id(sPAPRDRConnectorType type, + uint32_t id); + +#endif /* __HW_SPAPR_DRC_H__ */ From 094d20585ecdcd31959b1b88a390b4d2c4cfeab7 Mon Sep 17 00:00:00 2001 From: Nathan Fontenot Date: Thu, 7 May 2015 15:33:44 +1000 Subject: [PATCH 21/40] spapr_rtas: add get/set-power-level RTAS interfaces These interfaces manage the power domains that guest devices are assigned to and are used to power on/off devices. Currently we only utilize 1 power domain, the 'live-insertion' domain, which automates power management of plugged/unplugged devices, essentially making these calls no-ops, but the RTAS interfaces are still required by guest hotplug code and PAPR+. See docs/specs/ppc-spapr-hotplug.txt for a complete description of these interfaces. Signed-off-by: Nathan Fontenot Signed-off-by: Michael Roth Reviewed-by: David Gibson Signed-off-by: David Gibson Signed-off-by: Alexander Graf --- hw/ppc/spapr_rtas.c | 54 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c index 0f1ae55828..d7694cd0e0 100644 --- a/hw/ppc/spapr_rtas.c +++ b/hw/ppc/spapr_rtas.c @@ -245,6 +245,56 @@ static void rtas_ibm_os_term(PowerPCCPU *cpu, rtas_st(rets, 0, ret); } +static void rtas_set_power_level(PowerPCCPU *cpu, sPAPREnvironment *spapr, + uint32_t token, uint32_t nargs, + target_ulong args, uint32_t nret, + target_ulong rets) +{ + int32_t power_domain; + + if (nargs != 2 || nret != 2) { + rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR); + return; + } + + /* we currently only use a single, "live insert" powerdomain for + * hotplugged/dlpar'd resources, so the power is always live/full (100) + */ + power_domain = rtas_ld(args, 0); + if (power_domain != -1) { + rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED); + return; + } + + rtas_st(rets, 0, RTAS_OUT_SUCCESS); + rtas_st(rets, 1, 100); +} + +static void rtas_get_power_level(PowerPCCPU *cpu, sPAPREnvironment *spapr, + uint32_t token, uint32_t nargs, + target_ulong args, uint32_t nret, + target_ulong rets) +{ + int32_t power_domain; + + if (nargs != 1 || nret != 2) { + rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR); + return; + } + + /* we currently only use a single, "live insert" powerdomain for + * hotplugged/dlpar'd resources, so the power is always live/full (100) + */ + power_domain = rtas_ld(args, 0); + if (power_domain != -1) { + rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED); + return; + } + + rtas_st(rets, 0, RTAS_OUT_SUCCESS); + rtas_st(rets, 1, 100); +} + static struct rtas_call { const char *name; spapr_rtas_fn fn; @@ -370,6 +420,10 @@ static void core_rtas_register_types(void) rtas_ibm_set_system_parameter); spapr_rtas_register(RTAS_IBM_OS_TERM, "ibm,os-term", rtas_ibm_os_term); + spapr_rtas_register(RTAS_SET_POWER_LEVEL, "set-power-level", + rtas_set_power_level); + spapr_rtas_register(RTAS_GET_POWER_LEVEL, "get-power-level", + rtas_get_power_level); } type_init(core_rtas_register_types) From 8c8639df32f19d5ca9bf6a823ac83e298a188fd1 Mon Sep 17 00:00:00 2001 From: Mike Day Date: Thu, 7 May 2015 15:33:45 +1000 Subject: [PATCH 22/40] spapr_rtas: add set-indicator RTAS interface This interface allows a guest to control various platform/device sensors. Initially, we only implement support necessary to control sensors that are required for hotplug: DR connector indicators/LEDs, resource allocation state, and resource isolation state. See docs/specs/ppc-spapr-hotplug.txt for a complete description of this interface. Signed-off-by: Mike Day Signed-off-by: Michael Roth Reviewed-by: David Gibson Signed-off-by: David Gibson Signed-off-by: Alexander Graf --- hw/ppc/spapr_rtas.c | 84 ++++++++++++++++++++++++++++++++++++++++++ include/hw/ppc/spapr.h | 11 ++++++ 2 files changed, 95 insertions(+) diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c index d7694cd0e0..6c741facba 100644 --- a/hw/ppc/spapr_rtas.c +++ b/hw/ppc/spapr_rtas.c @@ -35,6 +35,18 @@ #include "qapi-event.h" #include +#include "hw/ppc/spapr_drc.h" + +/* #define DEBUG_SPAPR */ + +#ifdef DEBUG_SPAPR +#define DPRINTF(fmt, ...) \ + do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0) +#else +#define DPRINTF(fmt, ...) \ + do { } while (0) +#endif + static void rtas_display_character(PowerPCCPU *cpu, sPAPREnvironment *spapr, uint32_t token, uint32_t nargs, @@ -295,6 +307,76 @@ static void rtas_get_power_level(PowerPCCPU *cpu, sPAPREnvironment *spapr, rtas_st(rets, 1, 100); } +static bool sensor_type_is_dr(uint32_t sensor_type) +{ + switch (sensor_type) { + case RTAS_SENSOR_TYPE_ISOLATION_STATE: + case RTAS_SENSOR_TYPE_DR: + case RTAS_SENSOR_TYPE_ALLOCATION_STATE: + return true; + } + + return false; +} + +static void rtas_set_indicator(PowerPCCPU *cpu, sPAPREnvironment *spapr, + uint32_t token, uint32_t nargs, + target_ulong args, uint32_t nret, + target_ulong rets) +{ + uint32_t sensor_type; + uint32_t sensor_index; + uint32_t sensor_state; + sPAPRDRConnector *drc; + sPAPRDRConnectorClass *drck; + + if (nargs != 3 || nret != 1) { + rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR); + return; + } + + sensor_type = rtas_ld(args, 0); + sensor_index = rtas_ld(args, 1); + sensor_state = rtas_ld(args, 2); + + if (!sensor_type_is_dr(sensor_type)) { + goto out_unimplemented; + } + + /* if this is a DR sensor we can assume sensor_index == drc_index */ + drc = spapr_dr_connector_by_index(sensor_index); + if (!drc) { + DPRINTF("rtas_set_indicator: invalid sensor/DRC index: %xh\n", + sensor_index); + rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR); + return; + } + drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); + + switch (sensor_type) { + case RTAS_SENSOR_TYPE_ISOLATION_STATE: + drck->set_isolation_state(drc, sensor_state); + break; + case RTAS_SENSOR_TYPE_DR: + drck->set_indicator_state(drc, sensor_state); + break; + case RTAS_SENSOR_TYPE_ALLOCATION_STATE: + drck->set_allocation_state(drc, sensor_state); + break; + default: + goto out_unimplemented; + } + + rtas_st(rets, 0, RTAS_OUT_SUCCESS); + return; + +out_unimplemented: + /* currently only DR-related sensors are implemented */ + DPRINTF("rtas_set_indicator: sensor/indicator not implemented: %d\n", + sensor_type); + rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED); +} + static struct rtas_call { const char *name; spapr_rtas_fn fn; @@ -424,6 +506,8 @@ static void core_rtas_register_types(void) rtas_set_power_level); spapr_rtas_register(RTAS_GET_POWER_LEVEL, "get-power-level", rtas_get_power_level); + spapr_rtas_register(RTAS_SET_INDICATOR, "set-indicator", + rtas_set_indicator); } type_init(core_rtas_register_types) diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h index 317feb68a0..88109110bb 100644 --- a/include/hw/ppc/spapr.h +++ b/include/hw/ppc/spapr.h @@ -430,6 +430,17 @@ int spapr_allocate_irq_block(int num, bool lsi, bool msi); #define RTAS_SYSPARM_DIAGNOSTICS_RUN_MODE 42 #define RTAS_SYSPARM_UUID 48 +/* RTAS indicator/sensor types + * + * as defined by PAPR+ 2.7 7.3.5.4, Table 41 + * + * NOTE: currently only DR-related sensors are implemented here + */ +#define RTAS_SENSOR_TYPE_ISOLATION_STATE 9001 +#define RTAS_SENSOR_TYPE_DR 9002 +#define RTAS_SENSOR_TYPE_ALLOCATION_STATE 9003 +#define RTAS_SENSOR_TYPE_ENTITY_SENSE RTAS_SENSOR_TYPE_ALLOCATION_STATE + /* Possible values for the platform-processor-diagnostics-run-mode parameter * of the RTAS ibm,get-system-parameter call. */ From 886445a6ee808ee06533f9ecdf0f169c9ea83fbb Mon Sep 17 00:00:00 2001 From: Mike Day Date: Thu, 7 May 2015 15:33:46 +1000 Subject: [PATCH 23/40] spapr_rtas: add get-sensor-state RTAS interface This interface allows a guest to read various platform/device sensors. initially, we only implement support necessary to support hotplug: reading of the dr-entity-sense sensor, which communicates the state of a hotplugged resource/device to the guest (EMPTY/PRESENT/UNUSABLE). See docs/specs/ppc-spapr-hotplug.txt for a complete description of this interface. Signed-off-by: Mike Day Signed-off-by: Michael Roth Reviewed-by: David Gibson Signed-off-by: David Gibson Signed-off-by: Alexander Graf --- hw/ppc/spapr_rtas.c | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c index 6c741facba..f80beb2d9b 100644 --- a/hw/ppc/spapr_rtas.c +++ b/hw/ppc/spapr_rtas.c @@ -377,6 +377,47 @@ out_unimplemented: rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED); } +static void rtas_get_sensor_state(PowerPCCPU *cpu, sPAPREnvironment *spapr, + uint32_t token, uint32_t nargs, + target_ulong args, uint32_t nret, + target_ulong rets) +{ + uint32_t sensor_type; + uint32_t sensor_index; + sPAPRDRConnector *drc; + sPAPRDRConnectorClass *drck; + uint32_t entity_sense; + + if (nargs != 2 || nret != 2) { + rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR); + return; + } + + sensor_type = rtas_ld(args, 0); + sensor_index = rtas_ld(args, 1); + + if (sensor_type != RTAS_SENSOR_TYPE_ENTITY_SENSE) { + /* currently only DR-related sensors are implemented */ + DPRINTF("rtas_get_sensor_state: sensor/indicator not implemented: %d\n", + sensor_type); + rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED); + return; + } + + drc = spapr_dr_connector_by_index(sensor_index); + if (!drc) { + DPRINTF("rtas_get_sensor_state: invalid sensor/DRC index: %xh\n", + sensor_index); + rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR); + return; + } + drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); + entity_sense = drck->entity_sense(drc); + + rtas_st(rets, 0, RTAS_OUT_SUCCESS); + rtas_st(rets, 1, entity_sense); +} + static struct rtas_call { const char *name; spapr_rtas_fn fn; @@ -508,6 +549,8 @@ static void core_rtas_register_types(void) rtas_get_power_level); spapr_rtas_register(RTAS_SET_INDICATOR, "set-indicator", rtas_set_indicator); + spapr_rtas_register(RTAS_GET_SENSOR_STATE, "get-sensor-state", + rtas_get_sensor_state); } type_init(core_rtas_register_types) From ab316865db8ee97c53cd70c91b1b160c474102f8 Mon Sep 17 00:00:00 2001 From: Michael Roth Date: Thu, 7 May 2015 15:33:47 +1000 Subject: [PATCH 24/40] spapr: add rtas_st_buffer_direct() helper This is similar to the existing rtas_st_buffer(), but for cases where the guest is not expecting a length-encoded byte array. Namely, for calls where a "work area" buffer is used to pass around arbitrary fields/data. Signed-off-by: Michael Roth Reviewed-by: David Gibson Signed-off-by: David Gibson Signed-off-by: Alexander Graf --- include/hw/ppc/spapr.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h index 88109110bb..65ef7dd9f2 100644 --- a/include/hw/ppc/spapr.h +++ b/include/hw/ppc/spapr.h @@ -464,6 +464,13 @@ static inline void rtas_st(target_ulong phys, int n, uint32_t val) stl_be_phys(&address_space_memory, ppc64_phys_to_real(phys + 4*n), val); } +static inline void rtas_st_buffer_direct(target_ulong phys, + target_ulong phys_len, + uint8_t *buffer, uint16_t buffer_len) +{ + cpu_physical_memory_write(ppc64_phys_to_real(phys), buffer, + MIN(buffer_len, phys_len)); +} static inline void rtas_st_buffer(target_ulong phys, target_ulong phys_len, uint8_t *buffer, uint16_t buffer_len) @@ -473,8 +480,7 @@ static inline void rtas_st_buffer(target_ulong phys, target_ulong phys_len, } stw_be_phys(&address_space_memory, ppc64_phys_to_real(phys), buffer_len); - cpu_physical_memory_write(ppc64_phys_to_real(phys + 2), - buffer, MIN(buffer_len, phys_len - 2)); + rtas_st_buffer_direct(phys + 2, phys_len - 2, buffer, buffer_len); } typedef void (*spapr_rtas_fn)(PowerPCCPU *cpu, sPAPREnvironment *spapr, From 46503c2bc047bfe8c26440e17298fcbc59d7bbbe Mon Sep 17 00:00:00 2001 From: Michael Roth Date: Thu, 7 May 2015 15:33:48 +1000 Subject: [PATCH 25/40] spapr_rtas: add ibm, configure-connector RTAS interface This interface is used to fetch an OF device-tree nodes that describes a newly-attached device to guest. It is called multiple times to walk the device-tree node and fetch individual properties into a 'workarea'/buffer provided by the guest. The device-tree is generated by QEMU and passed to an sPAPRDRConnector during the initial hotplug operation, and the state of these RTAS calls is tracked by the sPAPRDRConnector. When the last of these properties is successfully fetched, we report as special return value to the guest and transition the device to a 'configured' state on the QEMU/DRC side. See docs/specs/ppc-spapr-hotplug.txt for a complete description of this interface. Signed-off-by: Michael Roth Signed-off-by: David Gibson Signed-off-by: Alexander Graf --- hw/ppc/spapr.c | 4 + hw/ppc/spapr_rtas.c | 180 +++++++++++++++++++++++++++++++++++++++++ include/hw/ppc/spapr.h | 14 ++++ 3 files changed, 198 insertions(+) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 8cf1f2a547..7323efda4c 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -1663,6 +1663,10 @@ static void ppc_spapr_init(MachineState *machine) kernel_cmdline, spapr->epow_irq); assert(spapr->fdt_skel != NULL); + /* used by RTAS */ + QTAILQ_INIT(&spapr->ccs_list); + qemu_register_reset(spapr_ccs_reset_hook, spapr); + qemu_register_boot_set(spapr_boot_set, spapr); } diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c index f80beb2d9b..fa28d43f81 100644 --- a/hw/ppc/spapr_rtas.c +++ b/hw/ppc/spapr_rtas.c @@ -47,6 +47,43 @@ do { } while (0) #endif +static sPAPRConfigureConnectorState *spapr_ccs_find(sPAPREnvironment *spapr, + uint32_t drc_index) +{ + sPAPRConfigureConnectorState *ccs = NULL; + + QTAILQ_FOREACH(ccs, &spapr->ccs_list, next) { + if (ccs->drc_index == drc_index) { + break; + } + } + + return ccs; +} + +static void spapr_ccs_add(sPAPREnvironment *spapr, + sPAPRConfigureConnectorState *ccs) +{ + g_assert(!spapr_ccs_find(spapr, ccs->drc_index)); + QTAILQ_INSERT_HEAD(&spapr->ccs_list, ccs, next); +} + +static void spapr_ccs_remove(sPAPREnvironment *spapr, + sPAPRConfigureConnectorState *ccs) +{ + QTAILQ_REMOVE(&spapr->ccs_list, ccs, next); + g_free(ccs); +} + +void spapr_ccs_reset_hook(void *opaque) +{ + sPAPREnvironment *spapr = opaque; + sPAPRConfigureConnectorState *ccs, *ccs_tmp; + + QTAILQ_FOREACH_SAFE(ccs, &spapr->ccs_list, next, ccs_tmp) { + spapr_ccs_remove(spapr, ccs); + } +} static void rtas_display_character(PowerPCCPU *cpu, sPAPREnvironment *spapr, uint32_t token, uint32_t nargs, @@ -355,6 +392,19 @@ static void rtas_set_indicator(PowerPCCPU *cpu, sPAPREnvironment *spapr, switch (sensor_type) { case RTAS_SENSOR_TYPE_ISOLATION_STATE: + /* if the guest is configuring a device attached to this + * DRC, we should reset the configuration state at this + * point since it may no longer be reliable (guest released + * device and needs to start over, or unplug occurred so + * the FDT is no longer valid) + */ + if (sensor_state == SPAPR_DR_ISOLATION_STATE_ISOLATED) { + sPAPRConfigureConnectorState *ccs = spapr_ccs_find(spapr, + sensor_index); + if (ccs) { + spapr_ccs_remove(spapr, ccs); + } + } drck->set_isolation_state(drc, sensor_state); break; case RTAS_SENSOR_TYPE_DR: @@ -418,6 +468,134 @@ static void rtas_get_sensor_state(PowerPCCPU *cpu, sPAPREnvironment *spapr, rtas_st(rets, 1, entity_sense); } +/* configure-connector work area offsets, int32_t units for field + * indexes, bytes for field offset/len values. + * + * as documented by PAPR+ v2.7, 13.5.3.5 + */ +#define CC_IDX_NODE_NAME_OFFSET 2 +#define CC_IDX_PROP_NAME_OFFSET 2 +#define CC_IDX_PROP_LEN 3 +#define CC_IDX_PROP_DATA_OFFSET 4 +#define CC_VAL_DATA_OFFSET ((CC_IDX_PROP_DATA_OFFSET + 1) * 4) +#define CC_WA_LEN 4096 + +static void rtas_ibm_configure_connector(PowerPCCPU *cpu, + sPAPREnvironment *spapr, + uint32_t token, uint32_t nargs, + target_ulong args, uint32_t nret, + target_ulong rets) +{ + uint64_t wa_addr; + uint64_t wa_offset; + uint32_t drc_index; + sPAPRDRConnector *drc; + sPAPRDRConnectorClass *drck; + sPAPRConfigureConnectorState *ccs; + sPAPRDRCCResponse resp = SPAPR_DR_CC_RESPONSE_CONTINUE; + int rc; + const void *fdt; + + if (nargs != 2 || nret != 1) { + rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR); + return; + } + + wa_addr = ((uint64_t)rtas_ld(args, 1) << 32) | rtas_ld(args, 0); + + drc_index = rtas_ld(wa_addr, 0); + drc = spapr_dr_connector_by_index(drc_index); + if (!drc) { + DPRINTF("rtas_ibm_configure_connector: invalid DRC index: %xh\n", + drc_index); + rc = RTAS_OUT_PARAM_ERROR; + goto out; + } + + drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); + fdt = drck->get_fdt(drc, NULL); + + ccs = spapr_ccs_find(spapr, drc_index); + if (!ccs) { + ccs = g_new0(sPAPRConfigureConnectorState, 1); + (void)drck->get_fdt(drc, &ccs->fdt_offset); + ccs->drc_index = drc_index; + spapr_ccs_add(spapr, ccs); + } + + do { + uint32_t tag; + const char *name; + const struct fdt_property *prop; + int fdt_offset_next, prop_len; + + tag = fdt_next_tag(fdt, ccs->fdt_offset, &fdt_offset_next); + + switch (tag) { + case FDT_BEGIN_NODE: + ccs->fdt_depth++; + name = fdt_get_name(fdt, ccs->fdt_offset, NULL); + + /* provide the name of the next OF node */ + wa_offset = CC_VAL_DATA_OFFSET; + rtas_st(wa_addr, CC_IDX_NODE_NAME_OFFSET, wa_offset); + rtas_st_buffer_direct(wa_addr + wa_offset, CC_WA_LEN - wa_offset, + (uint8_t *)name, strlen(name) + 1); + resp = SPAPR_DR_CC_RESPONSE_NEXT_CHILD; + break; + case FDT_END_NODE: + ccs->fdt_depth--; + if (ccs->fdt_depth == 0) { + /* done sending the device tree, don't need to track + * the state anymore + */ + drck->set_configured(drc); + spapr_ccs_remove(spapr, ccs); + ccs = NULL; + resp = SPAPR_DR_CC_RESPONSE_SUCCESS; + } else { + resp = SPAPR_DR_CC_RESPONSE_PREV_PARENT; + } + break; + case FDT_PROP: + prop = fdt_get_property_by_offset(fdt, ccs->fdt_offset, + &prop_len); + name = fdt_string(fdt, fdt32_to_cpu(prop->nameoff)); + + /* provide the name of the next OF property */ + wa_offset = CC_VAL_DATA_OFFSET; + rtas_st(wa_addr, CC_IDX_PROP_NAME_OFFSET, wa_offset); + rtas_st_buffer_direct(wa_addr + wa_offset, CC_WA_LEN - wa_offset, + (uint8_t *)name, strlen(name) + 1); + + /* provide the length and value of the OF property. data gets + * placed immediately after NULL terminator of the OF property's + * name string + */ + wa_offset += strlen(name) + 1, + rtas_st(wa_addr, CC_IDX_PROP_LEN, prop_len); + rtas_st(wa_addr, CC_IDX_PROP_DATA_OFFSET, wa_offset); + rtas_st_buffer_direct(wa_addr + wa_offset, CC_WA_LEN - wa_offset, + (uint8_t *)((struct fdt_property *)prop)->data, + prop_len); + resp = SPAPR_DR_CC_RESPONSE_NEXT_PROPERTY; + break; + case FDT_END: + resp = SPAPR_DR_CC_RESPONSE_ERROR; + default: + /* keep seeking for an actionable tag */ + break; + } + if (ccs) { + ccs->fdt_offset = fdt_offset_next; + } + } while (resp == SPAPR_DR_CC_RESPONSE_CONTINUE); + + rc = resp; +out: + rtas_st(rets, 0, rc); +} + static struct rtas_call { const char *name; spapr_rtas_fn fn; @@ -551,6 +729,8 @@ static void core_rtas_register_types(void) rtas_set_indicator); spapr_rtas_register(RTAS_GET_SENSOR_STATE, "get-sensor-state", rtas_get_sensor_state); + spapr_rtas_register(RTAS_IBM_CONFIGURE_CONNECTOR, "ibm,configure-connector", + rtas_ibm_configure_connector); } type_init(core_rtas_register_types) diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h index 65ef7dd9f2..86221934c8 100644 --- a/include/hw/ppc/spapr.h +++ b/include/hw/ppc/spapr.h @@ -7,6 +7,7 @@ struct VIOsPAPRBus; struct sPAPRPHBState; struct sPAPRNVRAM; +typedef struct sPAPRConfigureConnectorState sPAPRConfigureConnectorState; #define HPTE64_V_HPTE_DIRTY 0x0000000000000040ULL @@ -39,6 +40,9 @@ typedef struct sPAPREnvironment { bool htab_first_pass; int htab_fd; bool htab_fd_stale; + + /* RTAS state */ + QTAILQ_HEAD(, sPAPRConfigureConnectorState) ccs_list; } sPAPREnvironment; #define H_SUCCESS 0 @@ -544,6 +548,16 @@ int spapr_tcet_dma_dt(void *fdt, int node_off, const char *propname, sPAPRTCETable *tcet); void spapr_pci_switch_vga(bool big_endian); +/* rtas-configure-connector state */ +struct sPAPRConfigureConnectorState { + uint32_t drc_index; + int fdt_offset; + int fdt_depth; + QTAILQ_ENTRY(sPAPRConfigureConnectorState) next; +}; + +void spapr_ccs_reset_hook(void *opaque); + #define TYPE_SPAPR_RTC "spapr-rtc" void spapr_rtc_read(DeviceState *dev, struct tm *tm, uint32_t *ns); From 31fe14d15d08d613ff38abb249911e98c7966b86 Mon Sep 17 00:00:00 2001 From: Nathan Fontenot Date: Thu, 7 May 2015 15:33:49 +1000 Subject: [PATCH 26/40] spapr_events: re-use EPOW event infrastructure for hotplug events This extends the data structures currently used to report EPOW events to guests via the check-exception RTAS interfaces to also include event types for hotplug/unplug events. This is currently undocumented and being finalized for inclusion in PAPR specification, but we implement this here as an extension for guest userspace tools to implement (existing guest kernels simply log these events via a sysfs interface that's read by rtas_errd, and current versions of rtas_errd/powerpc-utils already support the use of this mechanism for initiating hotplug operations). We also add support for queues of pending RTAS events, since in the case of hotplug there's chance for multiple events being in-flight at any point in time. Signed-off-by: Nathan Fontenot Signed-off-by: Michael Roth Reviewed-by: David Gibson Signed-off-by: David Gibson Signed-off-by: Alexander Graf --- hw/ppc/spapr.c | 3 +- hw/ppc/spapr_events.c | 295 +++++++++++++++++++++++++++++++++-------- include/hw/ppc/spapr.h | 14 +- 3 files changed, 256 insertions(+), 56 deletions(-) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 7323efda4c..15eebb4b2d 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -1660,7 +1660,8 @@ static void ppc_spapr_init(MachineState *machine) /* Prepare the device tree */ spapr->fdt_skel = spapr_create_fdt_skel(initrd_base, initrd_size, kernel_size, kernel_le, - kernel_cmdline, spapr->epow_irq); + kernel_cmdline, + spapr->check_exception_irq); assert(spapr->fdt_skel != NULL); /* used by RTAS */ diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c index 283e96bca1..c634a3b435 100644 --- a/hw/ppc/spapr_events.c +++ b/hw/ppc/spapr_events.c @@ -32,6 +32,9 @@ #include "hw/ppc/spapr.h" #include "hw/ppc/spapr_vio.h" +#include "hw/pci/pci.h" +#include "hw/pci-host/spapr.h" +#include "hw/ppc/spapr_drc.h" #include @@ -77,6 +80,7 @@ struct rtas_error_log { #define RTAS_LOG_TYPE_ECC_UNCORR 0x00000009 #define RTAS_LOG_TYPE_ECC_CORR 0x0000000a #define RTAS_LOG_TYPE_EPOW 0x00000040 +#define RTAS_LOG_TYPE_HOTPLUG 0x000000e5 uint32_t extended_length; } QEMU_PACKED; @@ -166,6 +170,38 @@ struct epow_log_full { struct rtas_event_log_v6_epow epow; } QEMU_PACKED; +struct rtas_event_log_v6_hp { +#define RTAS_LOG_V6_SECTION_ID_HOTPLUG 0x4850 /* HP */ + struct rtas_event_log_v6_section_header hdr; + uint8_t hotplug_type; +#define RTAS_LOG_V6_HP_TYPE_CPU 1 +#define RTAS_LOG_V6_HP_TYPE_MEMORY 2 +#define RTAS_LOG_V6_HP_TYPE_SLOT 3 +#define RTAS_LOG_V6_HP_TYPE_PHB 4 +#define RTAS_LOG_V6_HP_TYPE_PCI 5 + uint8_t hotplug_action; +#define RTAS_LOG_V6_HP_ACTION_ADD 1 +#define RTAS_LOG_V6_HP_ACTION_REMOVE 2 + uint8_t hotplug_identifier; +#define RTAS_LOG_V6_HP_ID_DRC_NAME 1 +#define RTAS_LOG_V6_HP_ID_DRC_INDEX 2 +#define RTAS_LOG_V6_HP_ID_DRC_COUNT 3 + uint8_t reserved; + union { + uint32_t index; + uint32_t count; + char name[1]; + } drc; +} QEMU_PACKED; + +struct hp_log_full { + struct rtas_error_log hdr; + struct rtas_event_log_v6 v6hdr; + struct rtas_event_log_v6_maina maina; + struct rtas_event_log_v6_mainb mainb; + struct rtas_event_log_v6_hp hp; +} QEMU_PACKED; + #define EVENT_MASK_INTERNAL_ERRORS 0x80000000 #define EVENT_MASK_EPOW 0x40000000 #define EVENT_MASK_HOTPLUG 0x10000000 @@ -181,67 +217,95 @@ struct epow_log_full { } \ } while (0) -void spapr_events_fdt_skel(void *fdt, uint32_t epow_irq) +void spapr_events_fdt_skel(void *fdt, uint32_t check_exception_irq) { - uint32_t epow_irq_ranges[] = {cpu_to_be32(epow_irq), cpu_to_be32(1)}; - uint32_t epow_interrupts[] = {cpu_to_be32(epow_irq), 0}; + uint32_t irq_ranges[] = {cpu_to_be32(check_exception_irq), cpu_to_be32(1)}; + uint32_t interrupts[] = {cpu_to_be32(check_exception_irq), 0}; _FDT((fdt_begin_node(fdt, "event-sources"))); _FDT((fdt_property(fdt, "interrupt-controller", NULL, 0))); _FDT((fdt_property_cell(fdt, "#interrupt-cells", 2))); _FDT((fdt_property(fdt, "interrupt-ranges", - epow_irq_ranges, sizeof(epow_irq_ranges)))); + irq_ranges, sizeof(irq_ranges)))); _FDT((fdt_begin_node(fdt, "epow-events"))); - _FDT((fdt_property(fdt, "interrupts", - epow_interrupts, sizeof(epow_interrupts)))); + _FDT((fdt_property(fdt, "interrupts", interrupts, sizeof(interrupts)))); _FDT((fdt_end_node(fdt))); _FDT((fdt_end_node(fdt))); } -static struct epow_log_full *pending_epow; +static void rtas_event_log_queue(int log_type, void *data) +{ + sPAPREventLogEntry *entry = g_new(sPAPREventLogEntry, 1); + + g_assert(data); + entry->log_type = log_type; + entry->data = data; + QTAILQ_INSERT_TAIL(&spapr->pending_events, entry, next); +} + +static sPAPREventLogEntry *rtas_event_log_dequeue(uint32_t event_mask) +{ + sPAPREventLogEntry *entry = NULL; + + /* we only queue EPOW events atm. */ + if ((event_mask & EVENT_MASK_EPOW) == 0) { + return NULL; + } + + QTAILQ_FOREACH(entry, &spapr->pending_events, next) { + /* EPOW and hotplug events are surfaced in the same manner */ + if (entry->log_type == RTAS_LOG_TYPE_EPOW || + entry->log_type == RTAS_LOG_TYPE_HOTPLUG) { + break; + } + } + + if (entry) { + QTAILQ_REMOVE(&spapr->pending_events, entry, next); + } + + return entry; +} + +static bool rtas_event_log_contains(uint32_t event_mask) +{ + sPAPREventLogEntry *entry = NULL; + + /* we only queue EPOW events atm. */ + if ((event_mask & EVENT_MASK_EPOW) == 0) { + return false; + } + + QTAILQ_FOREACH(entry, &spapr->pending_events, next) { + /* EPOW and hotplug events are surfaced in the same manner */ + if (entry->log_type == RTAS_LOG_TYPE_EPOW || + entry->log_type == RTAS_LOG_TYPE_HOTPLUG) { + return true; + } + } + + return false; +} + static uint32_t next_plid; -static void spapr_powerdown_req(Notifier *n, void *opaque) +static void spapr_init_v6hdr(struct rtas_event_log_v6 *v6hdr) { - sPAPREnvironment *spapr = container_of(n, sPAPREnvironment, epow_notifier); - struct rtas_error_log *hdr; - struct rtas_event_log_v6 *v6hdr; - struct rtas_event_log_v6_maina *maina; - struct rtas_event_log_v6_mainb *mainb; - struct rtas_event_log_v6_epow *epow; - struct tm tm; - int year; - - if (pending_epow) { - /* For now, we just throw away earlier events if two come - * along before any are consumed. This is sufficient for our - * powerdown messages, but we'll need more if we do more - * general error/event logging */ - g_free(pending_epow); - } - pending_epow = g_malloc0(sizeof(*pending_epow)); - hdr = &pending_epow->hdr; - v6hdr = &pending_epow->v6hdr; - maina = &pending_epow->maina; - mainb = &pending_epow->mainb; - epow = &pending_epow->epow; - - hdr->summary = cpu_to_be32(RTAS_LOG_VERSION_6 - | RTAS_LOG_SEVERITY_EVENT - | RTAS_LOG_DISPOSITION_NOT_RECOVERED - | RTAS_LOG_OPTIONAL_PART_PRESENT - | RTAS_LOG_TYPE_EPOW); - hdr->extended_length = cpu_to_be32(sizeof(*pending_epow) - - sizeof(pending_epow->hdr)); - v6hdr->b0 = RTAS_LOG_V6_B0_VALID | RTAS_LOG_V6_B0_NEW_LOG | RTAS_LOG_V6_B0_BIGENDIAN; v6hdr->b2 = RTAS_LOG_V6_B2_POWERPC_FORMAT | RTAS_LOG_V6_B2_LOG_FORMAT_PLATFORM_EVENT; v6hdr->company = cpu_to_be32(RTAS_LOG_V6_COMPANY_IBM); +} + +static void spapr_init_maina(struct rtas_event_log_v6_maina *maina, + int section_count) +{ + struct tm tm; + int year; maina->hdr.section_id = cpu_to_be16(RTAS_LOG_V6_SECTION_ID_MAINA); maina->hdr.section_length = cpu_to_be16(sizeof(*maina)); @@ -256,8 +320,37 @@ static void spapr_powerdown_req(Notifier *n, void *opaque) | (to_bcd(tm.tm_min) << 16) | (to_bcd(tm.tm_sec) << 8)); maina->creator_id = 'H'; /* Hypervisor */ - maina->section_count = 3; /* Main-A, Main-B and EPOW */ + maina->section_count = section_count; maina->plid = next_plid++; +} + +static void spapr_powerdown_req(Notifier *n, void *opaque) +{ + sPAPREnvironment *spapr = container_of(n, sPAPREnvironment, epow_notifier); + struct rtas_error_log *hdr; + struct rtas_event_log_v6 *v6hdr; + struct rtas_event_log_v6_maina *maina; + struct rtas_event_log_v6_mainb *mainb; + struct rtas_event_log_v6_epow *epow; + struct epow_log_full *new_epow; + + new_epow = g_malloc0(sizeof(*new_epow)); + hdr = &new_epow->hdr; + v6hdr = &new_epow->v6hdr; + maina = &new_epow->maina; + mainb = &new_epow->mainb; + epow = &new_epow->epow; + + hdr->summary = cpu_to_be32(RTAS_LOG_VERSION_6 + | RTAS_LOG_SEVERITY_EVENT + | RTAS_LOG_DISPOSITION_NOT_RECOVERED + | RTAS_LOG_OPTIONAL_PART_PRESENT + | RTAS_LOG_TYPE_EPOW); + hdr->extended_length = cpu_to_be32(sizeof(*new_epow) + - sizeof(new_epow->hdr)); + + spapr_init_v6hdr(v6hdr); + spapr_init_maina(maina, 3 /* Main-A, Main-B and EPOW */); mainb->hdr.section_id = cpu_to_be16(RTAS_LOG_V6_SECTION_ID_MAINB); mainb->hdr.section_length = cpu_to_be16(sizeof(*mainb)); @@ -274,7 +367,80 @@ static void spapr_powerdown_req(Notifier *n, void *opaque) epow->event_modifier = RTAS_LOG_V6_EPOW_MODIFIER_NORMAL; epow->extended_modifier = RTAS_LOG_V6_EPOW_XMODIFIER_PARTITION_SPECIFIC; - qemu_irq_pulse(xics_get_qirq(spapr->icp, spapr->epow_irq)); + rtas_event_log_queue(RTAS_LOG_TYPE_EPOW, new_epow); + + qemu_irq_pulse(xics_get_qirq(spapr->icp, spapr->check_exception_irq)); +} + +static void spapr_hotplug_req_event(sPAPRDRConnector *drc, uint8_t hp_action) +{ + struct hp_log_full *new_hp; + struct rtas_error_log *hdr; + struct rtas_event_log_v6 *v6hdr; + struct rtas_event_log_v6_maina *maina; + struct rtas_event_log_v6_mainb *mainb; + struct rtas_event_log_v6_hp *hp; + sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); + sPAPRDRConnectorType drc_type = drck->get_type(drc); + + new_hp = g_malloc0(sizeof(struct hp_log_full)); + hdr = &new_hp->hdr; + v6hdr = &new_hp->v6hdr; + maina = &new_hp->maina; + mainb = &new_hp->mainb; + hp = &new_hp->hp; + + hdr->summary = cpu_to_be32(RTAS_LOG_VERSION_6 + | RTAS_LOG_SEVERITY_EVENT + | RTAS_LOG_DISPOSITION_NOT_RECOVERED + | RTAS_LOG_OPTIONAL_PART_PRESENT + | RTAS_LOG_INITIATOR_HOTPLUG + | RTAS_LOG_TYPE_HOTPLUG); + hdr->extended_length = cpu_to_be32(sizeof(*new_hp) + - sizeof(new_hp->hdr)); + + spapr_init_v6hdr(v6hdr); + spapr_init_maina(maina, 3 /* Main-A, Main-B, HP */); + + mainb->hdr.section_id = cpu_to_be16(RTAS_LOG_V6_SECTION_ID_MAINB); + mainb->hdr.section_length = cpu_to_be16(sizeof(*mainb)); + mainb->subsystem_id = 0x80; /* External environment */ + mainb->event_severity = 0x00; /* Informational / non-error */ + mainb->event_subtype = 0x00; /* Normal shutdown */ + + hp->hdr.section_id = cpu_to_be16(RTAS_LOG_V6_SECTION_ID_HOTPLUG); + hp->hdr.section_length = cpu_to_be16(sizeof(*hp)); + hp->hdr.section_version = 1; /* includes extended modifier */ + hp->hotplug_action = hp_action; + + + switch (drc_type) { + case SPAPR_DR_CONNECTOR_TYPE_PCI: + hp->drc.index = cpu_to_be32(drck->get_index(drc)); + hp->hotplug_identifier = RTAS_LOG_V6_HP_ID_DRC_INDEX; + hp->hotplug_type = RTAS_LOG_V6_HP_TYPE_PCI; + break; + default: + /* we shouldn't be signaling hotplug events for resources + * that don't support them + */ + g_assert(false); + return; + } + + rtas_event_log_queue(RTAS_LOG_TYPE_HOTPLUG, new_hp); + + qemu_irq_pulse(xics_get_qirq(spapr->icp, spapr->check_exception_irq)); +} + +void spapr_hotplug_req_add_event(sPAPRDRConnector *drc) +{ + spapr_hotplug_req_event(drc, RTAS_LOG_V6_HP_ACTION_ADD); +} + +void spapr_hotplug_req_remove_event(sPAPRDRConnector *drc) +{ + spapr_hotplug_req_event(drc, RTAS_LOG_V6_HP_ACTION_REMOVE); } static void check_exception(PowerPCCPU *cpu, sPAPREnvironment *spapr, @@ -282,8 +448,10 @@ static void check_exception(PowerPCCPU *cpu, sPAPREnvironment *spapr, target_ulong args, uint32_t nret, target_ulong rets) { - uint32_t mask, buf, len; + uint32_t mask, buf, len, event_len; uint64_t xinfo; + sPAPREventLogEntry *event; + struct rtas_error_log *hdr; if ((nargs < 6) || (nargs > 7) || nret != 1) { rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR); @@ -298,23 +466,42 @@ static void check_exception(PowerPCCPU *cpu, sPAPREnvironment *spapr, xinfo |= (uint64_t)rtas_ld(args, 6) << 32; } - if ((mask & EVENT_MASK_EPOW) && pending_epow) { - if (sizeof(*pending_epow) < len) { - len = sizeof(*pending_epow); - } - - cpu_physical_memory_write(buf, pending_epow, len); - g_free(pending_epow); - pending_epow = NULL; - rtas_st(rets, 0, RTAS_OUT_SUCCESS); - } else { - rtas_st(rets, 0, RTAS_OUT_NO_ERRORS_FOUND); + event = rtas_event_log_dequeue(mask); + if (!event) { + goto out_no_events; } + + hdr = event->data; + event_len = be32_to_cpu(hdr->extended_length) + sizeof(*hdr); + + if (event_len < len) { + len = event_len; + } + + cpu_physical_memory_write(buf, event->data, len); + rtas_st(rets, 0, RTAS_OUT_SUCCESS); + g_free(event->data); + g_free(event); + + /* according to PAPR+, the IRQ must be left asserted, or re-asserted, if + * there are still pending events to be fetched via check-exception. We + * do the latter here, since our code relies on edge-triggered + * interrupts. + */ + if (rtas_event_log_contains(mask)) { + qemu_irq_pulse(xics_get_qirq(spapr->icp, spapr->check_exception_irq)); + } + + return; + +out_no_events: + rtas_st(rets, 0, RTAS_OUT_NO_ERRORS_FOUND); } void spapr_events_init(sPAPREnvironment *spapr) { - spapr->epow_irq = xics_alloc(spapr->icp, 0, 0, false); + QTAILQ_INIT(&spapr->pending_events); + spapr->check_exception_irq = xics_alloc(spapr->icp, 0, 0, false); spapr->epow_notifier.notify = spapr_powerdown_req; qemu_register_powerdown_notifier(&spapr->epow_notifier); spapr_rtas_register(RTAS_CHECK_EXCEPTION, "check-exception", diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h index 86221934c8..71b3e0876b 100644 --- a/include/hw/ppc/spapr.h +++ b/include/hw/ppc/spapr.h @@ -3,11 +3,13 @@ #include "sysemu/dma.h" #include "hw/ppc/xics.h" +#include "hw/ppc/spapr_drc.h" struct VIOsPAPRBus; struct sPAPRPHBState; struct sPAPRNVRAM; typedef struct sPAPRConfigureConnectorState sPAPRConfigureConnectorState; +typedef struct sPAPREventLogEntry sPAPREventLogEntry; #define HPTE64_V_HPTE_DIRTY 0x0000000000000040ULL @@ -32,8 +34,9 @@ typedef struct sPAPREnvironment { struct PPCTimebase tb; bool has_graphics; - uint32_t epow_irq; + uint32_t check_exception_irq; Notifier epow_notifier; + QTAILQ_HEAD(, sPAPREventLogEntry) pending_events; /* Migration state */ int htab_save_index; @@ -533,6 +536,13 @@ struct sPAPRTCETable { }; sPAPRTCETable *spapr_tce_find_by_liobn(target_ulong liobn); + +struct sPAPREventLogEntry { + int log_type; + void *data; + QTAILQ_ENTRY(sPAPREventLogEntry) next; +}; + void spapr_events_init(sPAPREnvironment *spapr); void spapr_events_fdt_skel(void *fdt, uint32_t epow_irq); int spapr_h_cas_compose_response(target_ulong addr, target_ulong size); @@ -547,6 +557,8 @@ int spapr_dma_dt(void *fdt, int node_off, const char *propname, int spapr_tcet_dma_dt(void *fdt, int node_off, const char *propname, sPAPRTCETable *tcet); void spapr_pci_switch_vga(bool big_endian); +void spapr_hotplug_req_add_event(sPAPRDRConnector *drc); +void spapr_hotplug_req_remove_event(sPAPRDRConnector *drc); /* rtas-configure-connector state */ struct sPAPRConfigureConnectorState { From 79853e18d904b0a4bcef62701d48559688007c93 Mon Sep 17 00:00:00 2001 From: Tyrel Datwyler Date: Thu, 7 May 2015 15:33:50 +1000 Subject: [PATCH 27/40] spapr_events: event-scan RTAS interface We don't actually rely on this interface to surface hotplug events, and instead rely on the similar-but-interrupt-driven check-exception RTAS interface used for EPOW events. However, the existence of this interface is needed to ensure guest kernels initialize the event-reporting interfaces which will in turn be used by userspace tools to handle these events, so we implement this interface here. Since events surfaced by this call are mutually exclusive to those surfaced via check-exception, we also update the RTAS event queue code to accept a boolean to mark/filter for events accordingly. Events of this sort are not currently generated by QEMU, but the interface has been tested by surfacing hotplug events via event-scan in place of check-exception. Signed-off-by: Tyrel Datwyler Signed-off-by: Michael Roth Reviewed-by: David Gibson Signed-off-by: David Gibson Signed-off-by: Alexander Graf --- hw/ppc/spapr.c | 2 ++ hw/ppc/spapr_events.c | 65 +++++++++++++++++++++++++++++++++++++----- include/hw/ppc/spapr.h | 3 ++ 3 files changed, 63 insertions(+), 7 deletions(-) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 15eebb4b2d..b0b9f8116d 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -533,6 +533,8 @@ static void *spapr_create_fdt_skel(hwaddr initrd_base, refpoints, sizeof(refpoints)))); _FDT((fdt_property_cell(fdt, "rtas-error-log-max", RTAS_ERROR_LOG_MAX))); + _FDT((fdt_property_cell(fdt, "rtas-event-scan-rate", + RTAS_EVENT_SCAN_RATE))); /* * According to PAPR, rtas ibm,os-term does not guarantee a return diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c index c634a3b435..fda9e3590a 100644 --- a/hw/ppc/spapr_events.c +++ b/hw/ppc/spapr_events.c @@ -236,17 +236,19 @@ void spapr_events_fdt_skel(void *fdt, uint32_t check_exception_irq) _FDT((fdt_end_node(fdt))); } -static void rtas_event_log_queue(int log_type, void *data) +static void rtas_event_log_queue(int log_type, void *data, bool exception) { sPAPREventLogEntry *entry = g_new(sPAPREventLogEntry, 1); g_assert(data); entry->log_type = log_type; + entry->exception = exception; entry->data = data; QTAILQ_INSERT_TAIL(&spapr->pending_events, entry, next); } -static sPAPREventLogEntry *rtas_event_log_dequeue(uint32_t event_mask) +static sPAPREventLogEntry *rtas_event_log_dequeue(uint32_t event_mask, + bool exception) { sPAPREventLogEntry *entry = NULL; @@ -256,6 +258,10 @@ static sPAPREventLogEntry *rtas_event_log_dequeue(uint32_t event_mask) } QTAILQ_FOREACH(entry, &spapr->pending_events, next) { + if (entry->exception != exception) { + continue; + } + /* EPOW and hotplug events are surfaced in the same manner */ if (entry->log_type == RTAS_LOG_TYPE_EPOW || entry->log_type == RTAS_LOG_TYPE_HOTPLUG) { @@ -270,7 +276,7 @@ static sPAPREventLogEntry *rtas_event_log_dequeue(uint32_t event_mask) return entry; } -static bool rtas_event_log_contains(uint32_t event_mask) +static bool rtas_event_log_contains(uint32_t event_mask, bool exception) { sPAPREventLogEntry *entry = NULL; @@ -280,6 +286,10 @@ static bool rtas_event_log_contains(uint32_t event_mask) } QTAILQ_FOREACH(entry, &spapr->pending_events, next) { + if (entry->exception != exception) { + continue; + } + /* EPOW and hotplug events are surfaced in the same manner */ if (entry->log_type == RTAS_LOG_TYPE_EPOW || entry->log_type == RTAS_LOG_TYPE_HOTPLUG) { @@ -367,7 +377,7 @@ static void spapr_powerdown_req(Notifier *n, void *opaque) epow->event_modifier = RTAS_LOG_V6_EPOW_MODIFIER_NORMAL; epow->extended_modifier = RTAS_LOG_V6_EPOW_XMODIFIER_PARTITION_SPECIFIC; - rtas_event_log_queue(RTAS_LOG_TYPE_EPOW, new_epow); + rtas_event_log_queue(RTAS_LOG_TYPE_EPOW, new_epow, true); qemu_irq_pulse(xics_get_qirq(spapr->icp, spapr->check_exception_irq)); } @@ -428,7 +438,7 @@ static void spapr_hotplug_req_event(sPAPRDRConnector *drc, uint8_t hp_action) return; } - rtas_event_log_queue(RTAS_LOG_TYPE_HOTPLUG, new_hp); + rtas_event_log_queue(RTAS_LOG_TYPE_HOTPLUG, new_hp, true); qemu_irq_pulse(xics_get_qirq(spapr->icp, spapr->check_exception_irq)); } @@ -466,7 +476,7 @@ static void check_exception(PowerPCCPU *cpu, sPAPREnvironment *spapr, xinfo |= (uint64_t)rtas_ld(args, 6) << 32; } - event = rtas_event_log_dequeue(mask); + event = rtas_event_log_dequeue(mask, true); if (!event) { goto out_no_events; } @@ -488,7 +498,7 @@ static void check_exception(PowerPCCPU *cpu, sPAPREnvironment *spapr, * do the latter here, since our code relies on edge-triggered * interrupts. */ - if (rtas_event_log_contains(mask)) { + if (rtas_event_log_contains(mask, true)) { qemu_irq_pulse(xics_get_qirq(spapr->icp, spapr->check_exception_irq)); } @@ -498,6 +508,46 @@ out_no_events: rtas_st(rets, 0, RTAS_OUT_NO_ERRORS_FOUND); } +static void event_scan(PowerPCCPU *cpu, sPAPREnvironment *spapr, + uint32_t token, uint32_t nargs, + target_ulong args, + uint32_t nret, target_ulong rets) +{ + uint32_t mask, buf, len, event_len; + sPAPREventLogEntry *event; + struct rtas_error_log *hdr; + + if (nargs != 4 || nret != 1) { + rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR); + return; + } + + mask = rtas_ld(args, 0); + buf = rtas_ld(args, 2); + len = rtas_ld(args, 3); + + event = rtas_event_log_dequeue(mask, false); + if (!event) { + goto out_no_events; + } + + hdr = event->data; + event_len = be32_to_cpu(hdr->extended_length) + sizeof(*hdr); + + if (event_len < len) { + len = event_len; + } + + cpu_physical_memory_write(buf, event->data, len); + rtas_st(rets, 0, RTAS_OUT_SUCCESS); + g_free(event->data); + g_free(event); + return; + +out_no_events: + rtas_st(rets, 0, RTAS_OUT_NO_ERRORS_FOUND); +} + void spapr_events_init(sPAPREnvironment *spapr) { QTAILQ_INIT(&spapr->pending_events); @@ -506,4 +556,5 @@ void spapr_events_init(sPAPREnvironment *spapr) qemu_register_powerdown_notifier(&spapr->epow_notifier); spapr_rtas_register(RTAS_CHECK_EXCEPTION, "check-exception", check_exception); + spapr_rtas_register(RTAS_EVENT_SCAN, "event-scan", event_scan); } diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h index 71b3e0876b..7b4b1bb3d7 100644 --- a/include/hw/ppc/spapr.h +++ b/include/hw/ppc/spapr.h @@ -514,6 +514,8 @@ int spapr_rtas_device_tree_setup(void *fdt, hwaddr rtas_addr, #define RTAS_ERROR_LOG_MAX 2048 +#define RTAS_EVENT_SCAN_RATE 1 + typedef struct sPAPRTCETable sPAPRTCETable; #define TYPE_SPAPR_TCE_TABLE "spapr-tce-table" @@ -539,6 +541,7 @@ sPAPRTCETable *spapr_tce_find_by_liobn(target_ulong liobn); struct sPAPREventLogEntry { int log_type; + bool exception; void *data; QTAILQ_ENTRY(sPAPREventLogEntry) next; }; From e4b798bb53447ba4608fc7e6ed91927bdb1c3d5d Mon Sep 17 00:00:00 2001 From: Michael Roth Date: Thu, 7 May 2015 15:33:51 +1000 Subject: [PATCH 28/40] spapr_drc: add spapr_drc_populate_dt() This function handles generation of ibm,drc-* array device tree properties to describe DRC topology to guests. This will by used by the guest to direct RTAS calls to manage any dynamic resources we associate with a particular DR Connector as part of hotplug/unplug. Since general management of boot-time device trees are handled outside of sPAPRDRConnector, we insert these values blindly given an FDT and offset. A mask of sPAPRDRConnector types is given to instruct us on what types of connectors entries should be generated for, since descriptions for different connectors may live in different parts of the device tree. Based on code originally written by Nathan Fontenot. Signed-off-by: Nathan Fontenot Signed-off-by: Michael Roth Reviewed-by: David Gibson Signed-off-by: David Gibson Signed-off-by: Alexander Graf --- hw/ppc/spapr_drc.c | 156 +++++++++++++++++++++++++++++++++++++ include/hw/ppc/spapr_drc.h | 2 + 2 files changed, 158 insertions(+) diff --git a/hw/ppc/spapr_drc.c b/hw/ppc/spapr_drc.c index 047c6c7393..ef985381cb 100644 --- a/hw/ppc/spapr_drc.c +++ b/hw/ppc/spapr_drc.c @@ -586,3 +586,159 @@ sPAPRDRConnector *spapr_dr_connector_by_id(sPAPRDRConnectorType type, (get_type_shift(type) << DRC_INDEX_TYPE_SHIFT) | (id & DRC_INDEX_ID_MASK)); } + +/* generate a string the describes the DRC to encode into the + * device tree. + * + * as documented by PAPR+ v2.7, 13.5.2.6 and C.6.1 + */ +static const char *spapr_drc_get_type_str(sPAPRDRConnectorType type) +{ + switch (type) { + case SPAPR_DR_CONNECTOR_TYPE_CPU: + return "CPU"; + case SPAPR_DR_CONNECTOR_TYPE_PHB: + return "PHB"; + case SPAPR_DR_CONNECTOR_TYPE_VIO: + return "SLOT"; + case SPAPR_DR_CONNECTOR_TYPE_PCI: + return "28"; + case SPAPR_DR_CONNECTOR_TYPE_LMB: + return "MEM"; + default: + g_assert(false); + } + + return NULL; +} + +/** + * spapr_drc_populate_dt + * + * @fdt: libfdt device tree + * @path: path in the DT to generate properties + * @owner: parent Object/DeviceState for which to generate DRC + * descriptions for + * @drc_type_mask: mask of sPAPRDRConnectorType values corresponding + * to the types of DRCs to generate entries for + * + * generate OF properties to describe DRC topology/indices to guests + * + * as documented in PAPR+ v2.1, 13.5.2 + */ +int spapr_drc_populate_dt(void *fdt, int fdt_offset, Object *owner, + uint32_t drc_type_mask) +{ + Object *root_container; + ObjectProperty *prop; + uint32_t drc_count = 0; + GArray *drc_indexes, *drc_power_domains; + GString *drc_names, *drc_types; + int ret; + + /* the first entry of each properties is a 32-bit integer encoding + * the number of elements in the array. we won't know this until + * we complete the iteration through all the matching DRCs, but + * reserve the space now and set the offsets accordingly so we + * can fill them in later. + */ + drc_indexes = g_array_new(false, true, sizeof(uint32_t)); + drc_indexes = g_array_set_size(drc_indexes, 1); + drc_power_domains = g_array_new(false, true, sizeof(uint32_t)); + drc_power_domains = g_array_set_size(drc_power_domains, 1); + drc_names = g_string_set_size(g_string_new(NULL), sizeof(uint32_t)); + drc_types = g_string_set_size(g_string_new(NULL), sizeof(uint32_t)); + + /* aliases for all DRConnector objects will be rooted in QOM + * composition tree at DRC_CONTAINER_PATH + */ + root_container = container_get(object_get_root(), DRC_CONTAINER_PATH); + + QTAILQ_FOREACH(prop, &root_container->properties, node) { + Object *obj; + sPAPRDRConnector *drc; + sPAPRDRConnectorClass *drck; + uint32_t drc_index, drc_power_domain; + + if (!strstart(prop->type, "link<", NULL)) { + continue; + } + + obj = object_property_get_link(root_container, prop->name, NULL); + drc = SPAPR_DR_CONNECTOR(obj); + drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); + + if (owner && (drc->owner != owner)) { + continue; + } + + if ((drc->type & drc_type_mask) == 0) { + continue; + } + + drc_count++; + + /* ibm,drc-indexes */ + drc_index = cpu_to_be32(drck->get_index(drc)); + g_array_append_val(drc_indexes, drc_index); + + /* ibm,drc-power-domains */ + drc_power_domain = cpu_to_be32(-1); + g_array_append_val(drc_power_domains, drc_power_domain); + + /* ibm,drc-names */ + drc_names = g_string_append(drc_names, drck->get_name(drc)); + drc_names = g_string_insert_len(drc_names, -1, "\0", 1); + + /* ibm,drc-types */ + drc_types = g_string_append(drc_types, + spapr_drc_get_type_str(drc->type)); + drc_types = g_string_insert_len(drc_types, -1, "\0", 1); + } + + /* now write the drc count into the space we reserved at the + * beginning of the arrays previously + */ + *(uint32_t *)drc_indexes->data = cpu_to_be32(drc_count); + *(uint32_t *)drc_power_domains->data = cpu_to_be32(drc_count); + *(uint32_t *)drc_names->str = cpu_to_be32(drc_count); + *(uint32_t *)drc_types->str = cpu_to_be32(drc_count); + + ret = fdt_setprop(fdt, fdt_offset, "ibm,drc-indexes", + drc_indexes->data, + drc_indexes->len * sizeof(uint32_t)); + if (ret) { + fprintf(stderr, "Couldn't create ibm,drc-indexes property\n"); + goto out; + } + + ret = fdt_setprop(fdt, fdt_offset, "ibm,drc-power-domains", + drc_power_domains->data, + drc_power_domains->len * sizeof(uint32_t)); + if (ret) { + fprintf(stderr, "Couldn't finalize ibm,drc-power-domains property\n"); + goto out; + } + + ret = fdt_setprop(fdt, fdt_offset, "ibm,drc-names", + drc_names->str, drc_names->len); + if (ret) { + fprintf(stderr, "Couldn't finalize ibm,drc-names property\n"); + goto out; + } + + ret = fdt_setprop(fdt, fdt_offset, "ibm,drc-types", + drc_types->str, drc_types->len); + if (ret) { + fprintf(stderr, "Couldn't finalize ibm,drc-types property\n"); + goto out; + } + +out: + g_array_free(drc_indexes, true); + g_array_free(drc_power_domains, true); + g_string_free(drc_names, true); + g_string_free(drc_types, true); + + return ret; +} diff --git a/include/hw/ppc/spapr_drc.h b/include/hw/ppc/spapr_drc.h index 34fdef97e5..60cda35ed2 100644 --- a/include/hw/ppc/spapr_drc.h +++ b/include/hw/ppc/spapr_drc.h @@ -195,5 +195,7 @@ sPAPRDRConnector *spapr_dr_connector_new(Object *owner, sPAPRDRConnector *spapr_dr_connector_by_index(uint32_t index); sPAPRDRConnector *spapr_dr_connector_by_id(sPAPRDRConnectorType type, uint32_t id); +int spapr_drc_populate_dt(void *fdt, int fdt_offset, Object *owner, + uint32_t drc_type_mask); #endif /* __HW_SPAPR_DRC_H__ */ From 7619c7b00c90a39243f1229facde8c53a8fba921 Mon Sep 17 00:00:00 2001 From: Michael Roth Date: Thu, 7 May 2015 15:33:52 +1000 Subject: [PATCH 29/40] spapr_pci: add dynamic-reconfiguration option for spapr-pci-host-bridge This option enables/disables PCI hotplug for a particular PHB. Also add machine compatibility code to disable it by default for machine types prior to pseries-2.4. Signed-off-by: Michael Roth Reviewed-by: David Gibson Signed-off-by: David Gibson [agraf: move commas for compat fields] Signed-off-by: Alexander Graf --- hw/ppc/spapr.c | 9 +++++++-- hw/ppc/spapr_pci.c | 2 ++ include/hw/pci-host/spapr.h | 1 + 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index b0b9f8116d..8a21f1d3ef 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -1823,7 +1823,12 @@ static const TypeInfo spapr_machine_info = { }; #define SPAPR_COMPAT_2_3 \ - HW_COMPAT_2_3 + HW_COMPAT_2_3 \ + {\ + .driver = "spapr-pci-host-bridge",\ + .property = "dynamic-reconfiguration",\ + .value = "off",\ + }, #define SPAPR_COMPAT_2_2 \ SPAPR_COMPAT_2_3 \ @@ -1913,7 +1918,7 @@ static const TypeInfo spapr_machine_2_2_info = { static void spapr_machine_2_3_class_init(ObjectClass *oc, void *data) { static GlobalProperty compat_props[] = { - /* SPAPR_COMPAT_2_3, */ + SPAPR_COMPAT_2_3 { /* end of list */ } }; MachineClass *mc = MACHINE_CLASS(oc); diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c index 52c5c73c5a..a2dcc6ad76 100644 --- a/hw/ppc/spapr_pci.c +++ b/hw/ppc/spapr_pci.c @@ -936,6 +936,8 @@ static Property spapr_phb_properties[] = { DEFINE_PROP_UINT64("io_win_addr", sPAPRPHBState, io_win_addr, -1), DEFINE_PROP_UINT64("io_win_size", sPAPRPHBState, io_win_size, SPAPR_PCI_IO_WIN_SIZE), + DEFINE_PROP_BOOL("dynamic-reconfiguration", sPAPRPHBState, dr_enabled, + true), DEFINE_PROP_END_OF_LIST(), }; diff --git a/include/hw/pci-host/spapr.h b/include/hw/pci-host/spapr.h index 5b497ce5f6..9dca38837b 100644 --- a/include/hw/pci-host/spapr.h +++ b/include/hw/pci-host/spapr.h @@ -71,6 +71,7 @@ struct sPAPRPHBState { uint32_t index; uint64_t buid; char *dtbusname; + bool dr_enabled; MemoryRegion memspace, iospace; hwaddr mem_win_addr, mem_win_size, io_win_addr, io_win_size; From 62083979b0471ac07da6d94944bf12a9b18baa1f Mon Sep 17 00:00:00 2001 From: Michael Roth Date: Thu, 7 May 2015 15:33:53 +1000 Subject: [PATCH 30/40] spapr_pci: create DRConnectors for each PCI slot during PHB realize These will be used to support hotplug/unplug of PCI devices to the PCI bus associated with a particular PHB. We also set up device-tree properties in each PHBs initial FDT to describe the DRCs associated with them. This advertises to guests that each PHB is DR-capable device with physical hotpluggable slots, each managed by the corresponding DRC. This is necessary for allowing hotplugging of devices to it later via bus rescan or guest rpaphp hotplug module. Signed-off-by: Michael Roth Reviewed-by: David Gibson Signed-off-by: David Gibson Signed-off-by: Alexander Graf --- hw/ppc/spapr_pci.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c index a2dcc6ad76..c17e5f2c12 100644 --- a/hw/ppc/spapr_pci.c +++ b/hw/ppc/spapr_pci.c @@ -35,6 +35,7 @@ #include "qemu/error-report.h" #include "hw/pci/pci_bus.h" +#include "hw/ppc/spapr_drc.h" /* Copied from the kernel arch/powerpc/platforms/pseries/msi.c */ #define RTAS_QUERY_FN 0 @@ -880,6 +881,15 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp) sphb->lsi_table[i].irq = irq; } + /* allocate connectors for child PCI devices */ + if (sphb->dr_enabled) { + for (i = 0; i < PCI_SLOT_MAX * 8; i++) { + spapr_dr_connector_new(OBJECT(phb), + SPAPR_DR_CONNECTOR_TYPE_PCI, + (sphb->index << 16) | i); + } + } + if (!info->finish_realize) { error_setg(errp, "finish_realize not defined"); return; @@ -1096,7 +1106,7 @@ int spapr_populate_pci_dt(sPAPRPHBState *phb, uint32_t xics_phandle, void *fdt) { - int bus_off, i, j; + int bus_off, i, j, ret; char nodename[256]; uint32_t bus_range[] = { cpu_to_be32(0), cpu_to_be32(0xff) }; const uint64_t mmiosize = memory_region_size(&phb->memwindow); @@ -1188,6 +1198,12 @@ int spapr_populate_pci_dt(sPAPRPHBState *phb, tcet->liobn, tcet->bus_offset, tcet->nb_table << tcet->page_shift); + ret = spapr_drc_populate_dt(fdt, bus_off, OBJECT(phb), + SPAPR_DR_CONNECTOR_TYPE_PCI); + if (ret) { + return ret; + } + return 0; } From cf8c704d5a06e7b8327c65d19d0c342dc23fff84 Mon Sep 17 00:00:00 2001 From: Michael Roth Date: Thu, 7 May 2015 15:33:54 +1000 Subject: [PATCH 31/40] pci: make pci_bar useable outside pci.c We need to work with PCI BARs to generate OF properties during PCI hotplug for sPAPR guests. Signed-off-by: Michael Roth Reviewed-by: David Gibson Acked-by: Michael S. Tsirkin Signed-off-by: David Gibson Signed-off-by: Alexander Graf --- hw/pci/pci.c | 2 +- include/hw/pci/pci.h | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/hw/pci/pci.c b/hw/pci/pci.c index 48f19a306d..3423c3a1eb 100644 --- a/hw/pci/pci.c +++ b/hw/pci/pci.c @@ -123,7 +123,7 @@ static uint16_t pci_default_sub_device_id = PCI_SUBDEVICE_ID_QEMU; static QLIST_HEAD(, PCIHostState) pci_host_bridges; -static int pci_bar(PCIDevice *d, int reg) +int pci_bar(PCIDevice *d, int reg) { uint8_t type; diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h index 5d050c830f..6c2af0d46e 100644 --- a/include/hw/pci/pci.h +++ b/include/hw/pci/pci.h @@ -334,6 +334,12 @@ int pci_device_load(PCIDevice *s, QEMUFile *f); MemoryRegion *pci_address_space(PCIDevice *dev); MemoryRegion *pci_address_space_io(PCIDevice *dev); +/* + * Should not normally be used by devices. For use by sPAPR target + * where QEMU emulates firmware. + */ +int pci_bar(PCIDevice *d, int reg); + typedef void (*pci_set_irq_fn)(void *opaque, int irq_num, int level); typedef int (*pci_map_irq_fn)(PCIDevice *pci_dev, int irq_num); typedef PCIINTxRoute (*pci_route_irq_fn)(void *opaque, int pin); From 7454c7af91bdd60216e2b6eead827c012bb4d0d0 Mon Sep 17 00:00:00 2001 From: Michael Roth Date: Thu, 7 May 2015 15:33:55 +1000 Subject: [PATCH 32/40] spapr_pci: enable basic hotplug operations This enables hotplug of PCI devices to a PHB. Upon hotplug we generate the OF-nodes required by PAPR specification and IEEE 1275-1994 "PCI Bus Binding to Open Firmware" for the device. We associate the corresponding FDT for these nodes with the DRC corresponding to the slot, which will be fetched via ibm,configure-connector RTAS calls by the guest as described by PAPR specification. Signed-off-by: Michael Roth Reviewed-by: David Gibson Signed-off-by: David Gibson Signed-off-by: Alexander Graf --- hw/ppc/spapr_pci.c | 399 ++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 380 insertions(+), 19 deletions(-) diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c index c17e5f2c12..d2e4161998 100644 --- a/hw/ppc/spapr_pci.c +++ b/hw/ppc/spapr_pci.c @@ -33,9 +33,11 @@ #include #include "trace.h" #include "qemu/error-report.h" +#include "qapi/qmp/qerror.h" #include "hw/pci/pci_bus.h" #include "hw/ppc/spapr_drc.h" +#include "sysemu/device_tree.h" /* Copied from the kernel arch/powerpc/platforms/pseries/msi.c */ #define RTAS_QUERY_FN 0 @@ -48,6 +50,14 @@ #define RTAS_TYPE_MSI 1 #define RTAS_TYPE_MSIX 2 +#define _FDT(exp) \ + do { \ + int ret = (exp); \ + if (ret < 0) { \ + return ret; \ + } \ + } while (0) + sPAPRPHBState *spapr_pci_find_phb(sPAPREnvironment *spapr, uint64_t buid) { sPAPRPHBState *sphb; @@ -732,6 +742,368 @@ static AddressSpace *spapr_pci_dma_iommu(PCIBus *bus, void *opaque, int devfn) return &phb->iommu_as; } +/* Macros to operate with address in OF binding to PCI */ +#define b_x(x, p, l) (((x) & ((1<<(l))-1)) << (p)) +#define b_n(x) b_x((x), 31, 1) /* 0 if relocatable */ +#define b_p(x) b_x((x), 30, 1) /* 1 if prefetchable */ +#define b_t(x) b_x((x), 29, 1) /* 1 if the address is aliased */ +#define b_ss(x) b_x((x), 24, 2) /* the space code */ +#define b_bbbbbbbb(x) b_x((x), 16, 8) /* bus number */ +#define b_ddddd(x) b_x((x), 11, 5) /* device number */ +#define b_fff(x) b_x((x), 8, 3) /* function number */ +#define b_rrrrrrrr(x) b_x((x), 0, 8) /* register number */ + +/* for 'reg'/'assigned-addresses' OF properties */ +#define RESOURCE_CELLS_SIZE 2 +#define RESOURCE_CELLS_ADDRESS 3 + +typedef struct ResourceFields { + uint32_t phys_hi; + uint32_t phys_mid; + uint32_t phys_lo; + uint32_t size_hi; + uint32_t size_lo; +} QEMU_PACKED ResourceFields; + +typedef struct ResourceProps { + ResourceFields reg[8]; + ResourceFields assigned[7]; + uint32_t reg_len; + uint32_t assigned_len; +} ResourceProps; + +/* fill in the 'reg'/'assigned-resources' OF properties for + * a PCI device. 'reg' describes resource requirements for a + * device's IO/MEM regions, 'assigned-addresses' describes the + * actual resource assignments. + * + * the properties are arrays of ('phys-addr', 'size') pairs describing + * the addressable regions of the PCI device, where 'phys-addr' is a + * RESOURCE_CELLS_ADDRESS-tuple of 32-bit integers corresponding to + * (phys.hi, phys.mid, phys.lo), and 'size' is a + * RESOURCE_CELLS_SIZE-tuple corresponding to (size.hi, size.lo). + * + * phys.hi = 0xYYXXXXZZ, where: + * 0xYY = npt000ss + * ||| | + * ||| +-- space code: 1 if IO region, 2 if MEM region + * ||+------ for non-relocatable IO: 1 if aliased + * || for relocatable IO: 1 if below 64KB + * || for MEM: 1 if below 1MB + * |+------- 1 if region is prefetchable + * +-------- 1 if region is non-relocatable + * 0xXXXX = bbbbbbbb dddddfff, encoding bus, slot, and function + * bits respectively + * 0xZZ = rrrrrrrr, the register number of the BAR corresponding + * to the region + * + * phys.mid and phys.lo correspond respectively to the hi/lo portions + * of the actual address of the region. + * + * how the phys-addr/size values are used differ slightly between + * 'reg' and 'assigned-addresses' properties. namely, 'reg' has + * an additional description for the config space region of the + * device, and in the case of QEMU has n=0 and phys.mid=phys.lo=0 + * to describe the region as relocatable, with an address-mapping + * that corresponds directly to the PHB's address space for the + * resource. 'assigned-addresses' always has n=1 set with an absolute + * address assigned for the resource. in general, 'assigned-addresses' + * won't be populated, since addresses for PCI devices are generally + * unmapped initially and left to the guest to assign. + * + * note also that addresses defined in these properties are, at least + * for PAPR guests, relative to the PHBs IO/MEM windows, and + * correspond directly to the addresses in the BARs. + * + * in accordance with PCI Bus Binding to Open Firmware, + * IEEE Std 1275-1994, section 4.1.1, as implemented by PAPR+ v2.7, + * Appendix C. + */ +static void populate_resource_props(PCIDevice *d, ResourceProps *rp) +{ + int bus_num = pci_bus_num(PCI_BUS(qdev_get_parent_bus(DEVICE(d)))); + uint32_t dev_id = (b_bbbbbbbb(bus_num) | + b_ddddd(PCI_SLOT(d->devfn)) | + b_fff(PCI_FUNC(d->devfn))); + ResourceFields *reg, *assigned; + int i, reg_idx = 0, assigned_idx = 0; + + /* config space region */ + reg = &rp->reg[reg_idx++]; + reg->phys_hi = cpu_to_be32(dev_id); + reg->phys_mid = 0; + reg->phys_lo = 0; + reg->size_hi = 0; + reg->size_lo = 0; + + for (i = 0; i < PCI_NUM_REGIONS; i++) { + if (!d->io_regions[i].size) { + continue; + } + + reg = &rp->reg[reg_idx++]; + + reg->phys_hi = cpu_to_be32(dev_id | b_rrrrrrrr(pci_bar(d, i))); + if (d->io_regions[i].type & PCI_BASE_ADDRESS_SPACE_IO) { + reg->phys_hi |= cpu_to_be32(b_ss(1)); + } else { + reg->phys_hi |= cpu_to_be32(b_ss(2)); + } + reg->phys_mid = 0; + reg->phys_lo = 0; + reg->size_hi = cpu_to_be32(d->io_regions[i].size >> 32); + reg->size_lo = cpu_to_be32(d->io_regions[i].size); + + if (d->io_regions[i].addr == PCI_BAR_UNMAPPED) { + continue; + } + + assigned = &rp->assigned[assigned_idx++]; + assigned->phys_hi = cpu_to_be32(reg->phys_hi | b_n(1)); + assigned->phys_mid = cpu_to_be32(d->io_regions[i].addr >> 32); + assigned->phys_lo = cpu_to_be32(d->io_regions[i].addr); + assigned->size_hi = reg->size_hi; + assigned->size_lo = reg->size_lo; + } + + rp->reg_len = reg_idx * sizeof(ResourceFields); + rp->assigned_len = assigned_idx * sizeof(ResourceFields); +} + +static int spapr_populate_pci_child_dt(PCIDevice *dev, void *fdt, int offset, + int phb_index, int drc_index, + const char *drc_name) +{ + ResourceProps rp; + bool is_bridge = false; + int pci_status; + + if (pci_default_read_config(dev, PCI_HEADER_TYPE, 1) == + PCI_HEADER_TYPE_BRIDGE) { + is_bridge = true; + } + + /* in accordance with PAPR+ v2.7 13.6.3, Table 181 */ + _FDT(fdt_setprop_cell(fdt, offset, "vendor-id", + pci_default_read_config(dev, PCI_VENDOR_ID, 2))); + _FDT(fdt_setprop_cell(fdt, offset, "device-id", + pci_default_read_config(dev, PCI_DEVICE_ID, 2))); + _FDT(fdt_setprop_cell(fdt, offset, "revision-id", + pci_default_read_config(dev, PCI_REVISION_ID, 1))); + _FDT(fdt_setprop_cell(fdt, offset, "class-code", + pci_default_read_config(dev, PCI_CLASS_DEVICE, 2) + << 8)); + if (pci_default_read_config(dev, PCI_INTERRUPT_PIN, 1)) { + _FDT(fdt_setprop_cell(fdt, offset, "interrupts", + pci_default_read_config(dev, PCI_INTERRUPT_PIN, 1))); + } + + if (!is_bridge) { + _FDT(fdt_setprop_cell(fdt, offset, "min-grant", + pci_default_read_config(dev, PCI_MIN_GNT, 1))); + _FDT(fdt_setprop_cell(fdt, offset, "max-latency", + pci_default_read_config(dev, PCI_MAX_LAT, 1))); + } + + if (pci_default_read_config(dev, PCI_SUBSYSTEM_ID, 2)) { + _FDT(fdt_setprop_cell(fdt, offset, "subsystem-id", + pci_default_read_config(dev, PCI_SUBSYSTEM_ID, 2))); + } + + if (pci_default_read_config(dev, PCI_SUBSYSTEM_VENDOR_ID, 2)) { + _FDT(fdt_setprop_cell(fdt, offset, "subsystem-vendor-id", + pci_default_read_config(dev, PCI_SUBSYSTEM_VENDOR_ID, 2))); + } + + _FDT(fdt_setprop_cell(fdt, offset, "cache-line-size", + pci_default_read_config(dev, PCI_CACHE_LINE_SIZE, 1))); + + /* the following fdt cells are masked off the pci status register */ + pci_status = pci_default_read_config(dev, PCI_STATUS, 2); + _FDT(fdt_setprop_cell(fdt, offset, "devsel-speed", + PCI_STATUS_DEVSEL_MASK & pci_status)); + + if (pci_status & PCI_STATUS_FAST_BACK) { + _FDT(fdt_setprop(fdt, offset, "fast-back-to-back", NULL, 0)); + } + if (pci_status & PCI_STATUS_66MHZ) { + _FDT(fdt_setprop(fdt, offset, "66mhz-capable", NULL, 0)); + } + if (pci_status & PCI_STATUS_UDF) { + _FDT(fdt_setprop(fdt, offset, "udf-supported", NULL, 0)); + } + + /* NOTE: this is normally generated by firmware via path/unit name, + * but in our case we must set it manually since it does not get + * processed by OF beforehand + */ + _FDT(fdt_setprop_string(fdt, offset, "name", "pci")); + _FDT(fdt_setprop(fdt, offset, "ibm,loc-code", drc_name, strlen(drc_name))); + _FDT(fdt_setprop_cell(fdt, offset, "ibm,my-drc-index", drc_index)); + + _FDT(fdt_setprop_cell(fdt, offset, "#address-cells", + RESOURCE_CELLS_ADDRESS)); + _FDT(fdt_setprop_cell(fdt, offset, "#size-cells", + RESOURCE_CELLS_SIZE)); + _FDT(fdt_setprop_cell(fdt, offset, "ibm,req#msi-x", + RESOURCE_CELLS_SIZE)); + + populate_resource_props(dev, &rp); + _FDT(fdt_setprop(fdt, offset, "reg", (uint8_t *)rp.reg, rp.reg_len)); + _FDT(fdt_setprop(fdt, offset, "assigned-addresses", + (uint8_t *)rp.assigned, rp.assigned_len)); + + return 0; +} + +/* create OF node for pci device and required OF DT properties */ +static void *spapr_create_pci_child_dt(sPAPRPHBState *phb, PCIDevice *dev, + int drc_index, const char *drc_name, + int *dt_offset) +{ + void *fdt; + int offset, ret, fdt_size; + int slot = PCI_SLOT(dev->devfn); + int func = PCI_FUNC(dev->devfn); + char nodename[512]; + + fdt = create_device_tree(&fdt_size); + if (func != 0) { + sprintf(nodename, "pci@%d,%d", slot, func); + } else { + sprintf(nodename, "pci@%d", slot); + } + offset = fdt_add_subnode(fdt, 0, nodename); + ret = spapr_populate_pci_child_dt(dev, fdt, offset, phb->index, drc_index, + drc_name); + g_assert(!ret); + + *dt_offset = offset; + return fdt; +} + +static void spapr_phb_add_pci_device(sPAPRDRConnector *drc, + sPAPRPHBState *phb, + PCIDevice *pdev, + Error **errp) +{ + sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); + DeviceState *dev = DEVICE(pdev); + int drc_index = drck->get_index(drc); + const char *drc_name = drck->get_name(drc); + void *fdt = NULL; + int fdt_start_offset = 0; + + /* boot-time devices get their device tree node created by SLOF, but for + * hotplugged devices we need QEMU to generate it so the guest can fetch + * it via RTAS + */ + if (dev->hotplugged) { + fdt = spapr_create_pci_child_dt(phb, pdev, drc_index, drc_name, + &fdt_start_offset); + } + + drck->attach(drc, DEVICE(pdev), + fdt, fdt_start_offset, !dev->hotplugged, errp); + if (*errp) { + g_free(fdt); + } +} + +static void spapr_phb_remove_pci_device_cb(DeviceState *dev, void *opaque) +{ + /* some version guests do not wait for completion of a device + * cleanup (generally done asynchronously by the kernel) before + * signaling to QEMU that the device is safe, but instead sleep + * for some 'safe' period of time. unfortunately on a busy host + * this sleep isn't guaranteed to be long enough, resulting in + * bad things like IRQ lines being left asserted during final + * device removal. to deal with this we call reset just prior + * to finalizing the device, which will put the device back into + * an 'idle' state, as the device cleanup code expects. + */ + pci_device_reset(PCI_DEVICE(dev)); + object_unparent(OBJECT(dev)); +} + +static void spapr_phb_remove_pci_device(sPAPRDRConnector *drc, + sPAPRPHBState *phb, + PCIDevice *pdev, + Error **errp) +{ + sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); + + drck->detach(drc, DEVICE(pdev), spapr_phb_remove_pci_device_cb, phb, errp); +} + +static sPAPRDRConnector *spapr_phb_get_pci_drc(sPAPRPHBState *phb, + PCIDevice *pdev) +{ + uint32_t busnr = pci_bus_num(PCI_BUS(qdev_get_parent_bus(DEVICE(pdev)))); + return spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_PCI, + (phb->index << 16) | + (busnr << 8) | + pdev->devfn); +} + +static void spapr_phb_hot_plug_child(HotplugHandler *plug_handler, + DeviceState *plugged_dev, Error **errp) +{ + sPAPRPHBState *phb = SPAPR_PCI_HOST_BRIDGE(DEVICE(plug_handler)); + PCIDevice *pdev = PCI_DEVICE(plugged_dev); + sPAPRDRConnector *drc = spapr_phb_get_pci_drc(phb, pdev); + Error *local_err = NULL; + + /* if DR is disabled we don't need to do anything in the case of + * hotplug or coldplug callbacks + */ + if (!phb->dr_enabled) { + /* if this is a hotplug operation initiated by the user + * we need to let them know it's not enabled + */ + if (plugged_dev->hotplugged) { + error_set(errp, QERR_BUS_NO_HOTPLUG, + object_get_typename(OBJECT(phb))); + } + return; + } + + g_assert(drc); + + spapr_phb_add_pci_device(drc, phb, pdev, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } +} + +static void spapr_phb_hot_unplug_child(HotplugHandler *plug_handler, + DeviceState *plugged_dev, Error **errp) +{ + sPAPRPHBState *phb = SPAPR_PCI_HOST_BRIDGE(DEVICE(plug_handler)); + PCIDevice *pdev = PCI_DEVICE(plugged_dev); + sPAPRDRConnectorClass *drck; + sPAPRDRConnector *drc = spapr_phb_get_pci_drc(phb, pdev); + Error *local_err = NULL; + + if (!phb->dr_enabled) { + error_set(errp, QERR_BUS_NO_HOTPLUG, + object_get_typename(OBJECT(phb))); + return; + } + + g_assert(drc); + + drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); + if (!drck->release_pending(drc)) { + spapr_phb_remove_pci_device(drc, phb, pdev, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + } +} + static void spapr_phb_realize(DeviceState *dev, Error **errp) { SysBusDevice *s = SYS_BUS_DEVICE(dev); @@ -825,6 +1197,7 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp) &sphb->memspace, &sphb->iospace, PCI_DEVFN(0, 0), PCI_NUM_PINS, TYPE_PCI_BUS); phb->bus = bus; + qbus_set_hotplug_handler(BUS(phb->bus), DEVICE(sphb), NULL); /* * Initialize PHB address space. @@ -1061,6 +1434,7 @@ static void spapr_phb_class_init(ObjectClass *klass, void *data) PCIHostBridgeClass *hc = PCI_HOST_BRIDGE_CLASS(klass); DeviceClass *dc = DEVICE_CLASS(klass); sPAPRPHBClass *spc = SPAPR_PCI_HOST_BRIDGE_CLASS(klass); + HotplugHandlerClass *hp = HOTPLUG_HANDLER_CLASS(klass); hc->root_bus_path = spapr_phb_root_bus_path; dc->realize = spapr_phb_realize; @@ -1070,6 +1444,8 @@ static void spapr_phb_class_init(ObjectClass *klass, void *data) set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories); dc->cannot_instantiate_with_device_add_yet = false; spc->finish_realize = spapr_phb_finish_realize; + hp->plug = spapr_phb_hot_plug_child; + hp->unplug = spapr_phb_hot_unplug_child; } static const TypeInfo spapr_phb_info = { @@ -1078,6 +1454,10 @@ static const TypeInfo spapr_phb_info = { .instance_size = sizeof(sPAPRPHBState), .class_init = spapr_phb_class_init, .class_size = sizeof(sPAPRPHBClass), + .interfaces = (InterfaceInfo[]) { + { TYPE_HOTPLUG_HANDLER }, + { } + } }; PCIHostState *spapr_create_phb(sPAPREnvironment *spapr, int index) @@ -1091,17 +1471,6 @@ PCIHostState *spapr_create_phb(sPAPREnvironment *spapr, int index) return PCI_HOST_BRIDGE(dev); } -/* Macros to operate with address in OF binding to PCI */ -#define b_x(x, p, l) (((x) & ((1<<(l))-1)) << (p)) -#define b_n(x) b_x((x), 31, 1) /* 0 if relocatable */ -#define b_p(x) b_x((x), 30, 1) /* 1 if prefetchable */ -#define b_t(x) b_x((x), 29, 1) /* 1 if the address is aliased */ -#define b_ss(x) b_x((x), 24, 2) /* the space code */ -#define b_bbbbbbbb(x) b_x((x), 16, 8) /* bus number */ -#define b_ddddd(x) b_x((x), 11, 5) /* device number */ -#define b_fff(x) b_x((x), 8, 3) /* function number */ -#define b_rrrrrrrr(x) b_x((x), 0, 8) /* register number */ - int spapr_populate_pci_dt(sPAPRPHBState *phb, uint32_t xics_phandle, void *fdt) @@ -1149,14 +1518,6 @@ int spapr_populate_pci_dt(sPAPRPHBState *phb, return bus_off; } -#define _FDT(exp) \ - do { \ - int ret = (exp); \ - if (ret < 0) { \ - return ret; \ - } \ - } while (0) - /* Write PHB properties */ _FDT(fdt_setprop_string(fdt, bus_off, "device_type", "pci")); _FDT(fdt_setprop_string(fdt, bus_off, "compatible", "IBM,Logical_PHB")); From c5bc152bc399ae7ec8ac5227762e4320d0fd2d1c Mon Sep 17 00:00:00 2001 From: Tyrel Datwyler Date: Thu, 7 May 2015 15:33:56 +1000 Subject: [PATCH 33/40] spapr_pci: emit hotplug add/remove events during hotplug This uses extension of existing EPOW interrupt/event mechanism to notify userspace tools like librtas/drmgr to handle in-guest configuration/cleanup operations in response to device_add/device_del. Userspace tools that don't implement this extension will need to be run manually in response/advance of device_add/device_del, respectively. Signed-off-by: Tyrel Datwyler Signed-off-by: Michael Roth Reviewed-by: David Gibson Signed-off-by: David Gibson Signed-off-by: Alexander Graf --- hw/ppc/spapr_pci.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c index d2e4161998..4df3a33db4 100644 --- a/hw/ppc/spapr_pci.c +++ b/hw/ppc/spapr_pci.c @@ -1075,6 +1075,9 @@ static void spapr_phb_hot_plug_child(HotplugHandler *plug_handler, error_propagate(errp, local_err); return; } + if (plugged_dev->hotplugged) { + spapr_hotplug_req_add_event(drc); + } } static void spapr_phb_hot_unplug_child(HotplugHandler *plug_handler, @@ -1101,6 +1104,7 @@ static void spapr_phb_hot_unplug_child(HotplugHandler *plug_handler, error_propagate(errp, local_err); return; } + spapr_hotplug_req_remove_event(drc); } } From 076b35b5a56bca57c4aa41044ed304fe9c45d6c5 Mon Sep 17 00:00:00 2001 From: Nikunj A Dadhania Date: Thu, 7 May 2015 15:33:57 +1000 Subject: [PATCH 34/40] machine: add default_ram_size to machine class Machines types can have different requirement for default ram size. Introduce a member in the machine class and set the current default_ram_size to 128MB. For QEMUMachine types override the value during the registration of the machine and for MachineClass introduce the generic class init setting the default_ram_size. Add helpers [K,M,G,T,P,E]_BYTE for better readability and easy usage Signed-off-by: Nikunj A Dadhania Reviewed-by: Thomas Huth Reviewed-by: David Gibson Acked-by: Paolo Bonzini Signed-off-by: David Gibson Signed-off-by: Alexander Graf --- hw/core/machine.c | 9 +++++++++ include/hw/boards.h | 1 + include/qemu-common.h | 6 ++++++ vl.c | 30 ++++++++++++++++-------------- 4 files changed, 32 insertions(+), 14 deletions(-) diff --git a/hw/core/machine.c b/hw/core/machine.c index 25c45e6f9d..ac4654e9dd 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c @@ -294,6 +294,14 @@ static void machine_init_notify(Notifier *notifier, void *data) foreach_dynamic_sysbus_device(error_on_sysbus_device, NULL); } +static void machine_class_init(ObjectClass *oc, void *data) +{ + MachineClass *mc = MACHINE_CLASS(oc); + + /* Default 128 MB as guest ram size */ + mc->default_ram_size = 128 * M_BYTE; +} + static void machine_initfn(Object *obj) { MachineState *ms = MACHINE(obj); @@ -463,6 +471,7 @@ static const TypeInfo machine_info = { .parent = TYPE_OBJECT, .abstract = true, .class_size = sizeof(MachineClass), + .class_init = machine_class_init, .instance_size = sizeof(MachineState), .instance_init = machine_initfn, .instance_finalize = machine_finalize, diff --git a/include/hw/boards.h b/include/hw/boards.h index ff79797ce4..6379901528 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h @@ -106,6 +106,7 @@ struct MachineClass { const char *default_display; GlobalProperty *compat_props; const char *hw_version; + ram_addr_t default_ram_size; HotplugHandler *(*get_hotplug_handler)(MachineState *machine, DeviceState *dev); diff --git a/include/qemu-common.h b/include/qemu-common.h index 6b373ff7e3..d52d09cfb8 100644 --- a/include/qemu-common.h +++ b/include/qemu-common.h @@ -186,6 +186,12 @@ int64_t strtosz(const char *nptr, char **end); int64_t strtosz_suffix(const char *nptr, char **end, const char default_suffix); int64_t strtosz_suffix_unit(const char *nptr, char **end, const char default_suffix, int64_t unit); +#define K_BYTE (1ULL << 10) +#define M_BYTE (1ULL << 20) +#define G_BYTE (1ULL << 30) +#define T_BYTE (1ULL << 40) +#define P_BYTE (1ULL << 50) +#define E_BYTE (1ULL << 60) /* used to print char* safely */ #define STR_OR_NULL(str) ((str) ? (str) : "null") diff --git a/vl.c b/vl.c index 1d4c0890f2..6c7e4e4c7a 100644 --- a/vl.c +++ b/vl.c @@ -120,8 +120,6 @@ int main(int argc, char **argv) #include "qom/object_interfaces.h" #include "qapi-event.h" -#define DEFAULT_RAM_SIZE 128 - #define MAX_VIRTIO_CONSOLES 1 #define MAX_SCLP_CONSOLES 1 @@ -1310,7 +1308,11 @@ void hmp_usb_del(Monitor *mon, const QDict *qdict) MachineState *current_machine; -static void machine_class_init(ObjectClass *oc, void *data) +/* + * Transitional class registration/init used for converting from + * legacy QEMUMachine to MachineClass. + */ +static void qemu_machine_class_init(ObjectClass *oc, void *data) { MachineClass *mc = MACHINE_CLASS(oc); QEMUMachine *qm = data; @@ -1333,7 +1335,7 @@ int qemu_register_machine(QEMUMachine *m) TypeInfo ti = { .name = name, .parent = TYPE_MACHINE, - .class_init = machine_class_init, + .class_init = qemu_machine_class_init, .class_data = (void *)m, }; @@ -2647,13 +2649,13 @@ out: return 0; } -static void set_memory_options(uint64_t *ram_slots, ram_addr_t *maxram_size) +static void set_memory_options(uint64_t *ram_slots, ram_addr_t *maxram_size, + MachineClass *mc) { uint64_t sz; const char *mem_str; const char *maxmem_str, *slots_str; - const ram_addr_t default_ram_size = (ram_addr_t)DEFAULT_RAM_SIZE * - 1024 * 1024; + const ram_addr_t default_ram_size = mc->default_ram_size; QemuOpts *opts = qemu_find_opts_singleton("memory"); sz = 0; @@ -3769,7 +3771,13 @@ int main(int argc, char **argv, char **envp) machine_class = machine_parse(optarg); } - set_memory_options(&ram_slots, &maxram_size); + if (machine_class == NULL) { + fprintf(stderr, "No machine specified, and there is no default.\n" + "Use -machine help to list supported machines!\n"); + exit(1); + } + + set_memory_options(&ram_slots, &maxram_size, machine_class); loc_set_none(); @@ -3798,12 +3806,6 @@ int main(int argc, char **argv, char **envp) } #endif - if (machine_class == NULL) { - fprintf(stderr, "No machine specified, and there is no default.\n" - "Use -machine help to list supported machines!\n"); - exit(1); - } - current_machine = MACHINE(object_new(object_class_get_name( OBJECT_CLASS(machine_class)))); if (machine_help_func(qemu_get_machine_opts(), current_machine)) { From a34944fe2e2457309bde74c1ffe3a1c60c6da018 Mon Sep 17 00:00:00 2001 From: Nikunj A Dadhania Date: Thu, 7 May 2015 15:33:58 +1000 Subject: [PATCH 35/40] spapr: override default ram size to 512MB Signed-off-by: Nikunj A Dadhania Reviewed-by: Igor Mammedov Reviewed-by: Thomas Huth Acked-by: David Gibson Signed-off-by: David Gibson Signed-off-by: Alexander Graf --- hw/ppc/spapr.c | 1 + 1 file changed, 1 insertion(+) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 8a21f1d3ef..2e9ac87630 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -1801,6 +1801,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data) mc->max_cpus = MAX_CPUS; mc->no_parallel = 1; mc->default_boot_order = ""; + mc->default_ram_size = 512 * M_BYTE; mc->kvm_type = spapr_kvm_type; mc->has_dynamic_sysbus = true; From 026bfd89cb896c8a3460cc551cc4836219bd7ff9 Mon Sep 17 00:00:00 2001 From: David Gibson Date: Thu, 7 May 2015 15:33:59 +1000 Subject: [PATCH 36/40] pseries: Enable in-kernel H_LOGICAL_CI_{LOAD, STORE} implementations qemu currently implements the hypercalls H_LOGICAL_CI_LOAD and H_LOGICAL_CI_STORE as PAPR extensions. These are used by the SLOF firmware for IO, because performing cache inhibited MMIO accesses with the MMU off (real mode) is very awkward on POWER. This approach breaks when SLOF needs to access IO devices implemented within KVM instead of in qemu. The simplest example would be virtio-blk using an iothread, because the iothread / dataplane mechanism relies on an in-kernel implementation of the virtio queue notification MMIO. To fix this, an in-kernel implementation of these hypercalls has been made, (kernel commit 99342cf "kvmppc: Implement H_LOGICAL_CI_{LOAD,STORE} in KVM" however, the hypercalls still need to be enabled from qemu. This performs the necessary calls to do so. It would be nice to provide some warning if we encounter a problematic device with a kernel which doesn't support the new calls. Unfortunately, I can't see a way to detect this case which won't either warn in far too many cases that will probably work, or which is horribly invasive. Signed-off-by: David Gibson Reviewed-by: Thomas Huth Signed-off-by: Alexander Graf --- hw/ppc/spapr.c | 5 +++++ target-ppc/kvm.c | 17 +++++++++++++++++ target-ppc/kvm_ppc.h | 5 +++++ 3 files changed, 27 insertions(+) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 2e9ac87630..f174e5a0f3 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -1506,6 +1506,11 @@ static void ppc_spapr_init(MachineState *machine) qemu_register_reset(spapr_cpu_reset, cpu); } + if (kvm_enabled()) { + /* Enable H_LOGICAL_CI_* so SLOF can talk to in-kernel devices */ + kvmppc_enable_logical_ci_hcalls(); + } + /* allocate RAM */ spapr->ram_limit = ram_size; memory_region_allocate_system_memory(ram, NULL, "ppc_spapr.ram", diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c index 1da9ea81e5..97a50b1de3 100644 --- a/target-ppc/kvm.c +++ b/target-ppc/kvm.c @@ -1884,6 +1884,23 @@ int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len) return 0; } +static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall) +{ + return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1); +} + +void kvmppc_enable_logical_ci_hcalls(void) +{ + /* + * FIXME: it would be nice if we could detect the cases where + * we're using a device which requires the in kernel + * implementation of these hcalls, but the kernel lacks them and + * produce a warning. + */ + kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD); + kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE); +} + void kvmppc_set_papr(PowerPCCPU *cpu) { CPUState *cs = CPU(cpu); diff --git a/target-ppc/kvm_ppc.h b/target-ppc/kvm_ppc.h index 2e0224c6af..4d30e27951 100644 --- a/target-ppc/kvm_ppc.h +++ b/target-ppc/kvm_ppc.h @@ -24,6 +24,7 @@ bool kvmppc_get_host_serial(char **buf); int kvmppc_get_hasidle(CPUPPCState *env); int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len); int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level); +void kvmppc_enable_logical_ci_hcalls(void); void kvmppc_set_papr(PowerPCCPU *cpu); int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t cpu_version); void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy); @@ -107,6 +108,10 @@ static inline int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level) return -1; } +static inline void kvmppc_enable_logical_ci_hcalls(void) +{ +} + static inline void kvmppc_set_papr(PowerPCCPU *cpu) { } From 085eb217dfb3ee12e7985c11f71f8a038394735a Mon Sep 17 00:00:00 2001 From: David Gibson Date: Fri, 8 May 2015 10:11:00 +1000 Subject: [PATCH 37/40] Add David Gibson for sPAPR in MAINTAINERS file At Alex Graf's request I'm now acting as sub-maintainer for the sPAPR (-machine pseries) code. This updates MAINTAINERS accordingly. While we're at it, change the label to mention pseries since that's the actual name of the machine type, even if most of the C files use the sPAPR name. Signed-off-by: David Gibson Signed-off-by: Alexander Graf --- MAINTAINERS | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 0463696dd3..4ed82154ce 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -486,7 +486,8 @@ F: hw/ppc/prep.c F: hw/pci-host/prep.[hc] F: hw/isa/pc87312.[hc] -sPAPR +sPAPR (pseries) +M: David Gibson M: Alexander Graf L: qemu-ppc@nongnu.org S: Supported From 5a58e884d1d9905a835de2889c8cd73327fe2a94 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 19 May 2015 09:59:34 +0200 Subject: [PATCH 38/40] tci: do not use CPUArchState in tcg-target.h tcg-target.h does not use any QEMU-specific symbols, save for tci's usage of CPUArchState. Pull that up to tcg/tcg.h. This will make it possible to include tcg-target.h in cpu-defs.h. Signed-off-by: Paolo Bonzini Reviewed-by: Richard Henderson Signed-off-by: Alexander Graf --- tcg/tcg.h | 4 +++- tcg/tci/tcg-target.h | 3 +-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/tcg/tcg.h b/tcg/tcg.h index 8098f824b3..41e486959d 100644 --- a/tcg/tcg.h +++ b/tcg/tcg.h @@ -927,7 +927,9 @@ static inline unsigned get_mmuidx(TCGMemOpIdx oi) #define TB_EXIT_ICOUNT_EXPIRED 2 #define TB_EXIT_REQUESTED 3 -#if !defined(tcg_qemu_tb_exec) +#ifdef HAVE_TCG_QEMU_TB_EXEC +uintptr_t tcg_qemu_tb_exec(CPUArchState *env, uint8_t *tb_ptr); +#else # define tcg_qemu_tb_exec(env, tb_ptr) \ ((uintptr_t (*)(void *, void *))tcg_ctx.code_gen_prologue)(env, tb_ptr) #endif diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h index bd1e97468c..662d45c22f 100644 --- a/tcg/tci/tcg-target.h +++ b/tcg/tci/tcg-target.h @@ -175,8 +175,7 @@ typedef enum { void tci_disas(uint8_t opc); -uintptr_t tcg_qemu_tb_exec(CPUArchState *env, uint8_t *tb_ptr); -#define tcg_qemu_tb_exec tcg_qemu_tb_exec +#define HAVE_TCG_QEMU_TB_EXEC static inline void flush_icache_range(uintptr_t start, uintptr_t stop) { From 006f8638c62bca2b0caf609485f47fa5e14d8a3c Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 5 May 2015 09:18:22 +0200 Subject: [PATCH 39/40] tcg: add TCG_TARGET_TLB_DISPLACEMENT_BITS This will be used to size the TLB when more than 8 MMU modes are used by the target. Limitations come from the limited size of the immediate fields (which sometimes, as in the case of Aarch64, extend to instructions that shift the immediate). Signed-off-by: Paolo Bonzini Message-Id: <1424436345-37924-2-git-send-email-pbonzini@redhat.com> Reviewed-by: Richard Henderson Signed-off-by: Alexander Graf --- tcg/aarch64/tcg-target.h | 1 + tcg/arm/tcg-target.h | 1 + tcg/i386/tcg-target.h | 1 + tcg/ia64/tcg-target.h | 2 ++ tcg/mips/tcg-target.h | 1 + tcg/ppc/tcg-target.h | 1 + tcg/s390/tcg-target.h | 1 + tcg/sparc/tcg-target.h | 1 + tcg/tci/tcg-target.h | 1 + 9 files changed, 10 insertions(+) diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h index 60c7493ac1..8aec04d2bf 100644 --- a/tcg/aarch64/tcg-target.h +++ b/tcg/aarch64/tcg-target.h @@ -14,6 +14,7 @@ #define TCG_TARGET_AARCH64 1 #define TCG_TARGET_INSN_UNIT_SIZE 4 +#define TCG_TARGET_TLB_DISPLACEMENT_BITS 24 #undef TCG_TARGET_STACK_GROWSUP typedef enum { diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h index 1c719e2862..6559f80b71 100644 --- a/tcg/arm/tcg-target.h +++ b/tcg/arm/tcg-target.h @@ -27,6 +27,7 @@ #undef TCG_TARGET_STACK_GROWSUP #define TCG_TARGET_INSN_UNIT_SIZE 4 +#define TCG_TARGET_TLB_DISPLACEMENT_BITS 16 typedef enum { TCG_REG_R0 = 0, diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h index 7a9980e70e..25b513354c 100644 --- a/tcg/i386/tcg-target.h +++ b/tcg/i386/tcg-target.h @@ -25,6 +25,7 @@ #define TCG_TARGET_I386 1 #define TCG_TARGET_INSN_UNIT_SIZE 1 +#define TCG_TARGET_TLB_DISPLACEMENT_BITS 31 #ifdef __x86_64__ # define TCG_TARGET_REG_BITS 64 diff --git a/tcg/ia64/tcg-target.h b/tcg/ia64/tcg-target.h index d67558988a..a04ed81262 100644 --- a/tcg/ia64/tcg-target.h +++ b/tcg/ia64/tcg-target.h @@ -26,6 +26,8 @@ #define TCG_TARGET_IA64 1 #define TCG_TARGET_INSN_UNIT_SIZE 16 +#define TCG_TARGET_TLB_DISPLACEMENT_BITS 21 + typedef struct { uint64_t lo __attribute__((aligned(16))); uint64_t hi; diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h index c88a1c9272..f5ba52cacf 100644 --- a/tcg/mips/tcg-target.h +++ b/tcg/mips/tcg-target.h @@ -27,6 +27,7 @@ #define TCG_TARGET_MIPS 1 #define TCG_TARGET_INSN_UNIT_SIZE 4 +#define TCG_TARGET_TLB_DISPLACEMENT_BITS 16 #define TCG_TARGET_NB_REGS 32 typedef enum { diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h index 32ac4424db..7ce7048824 100644 --- a/tcg/ppc/tcg-target.h +++ b/tcg/ppc/tcg-target.h @@ -32,6 +32,7 @@ #define TCG_TARGET_NB_REGS 32 #define TCG_TARGET_INSN_UNIT_SIZE 4 +#define TCG_TARGET_TLB_DISPLACEMENT_BITS 16 typedef enum { TCG_REG_R0, TCG_REG_R1, TCG_REG_R2, TCG_REG_R3, diff --git a/tcg/s390/tcg-target.h b/tcg/s390/tcg-target.h index 5acc28ca6b..91576d5949 100644 --- a/tcg/s390/tcg-target.h +++ b/tcg/s390/tcg-target.h @@ -25,6 +25,7 @@ #define TCG_TARGET_S390 1 #define TCG_TARGET_INSN_UNIT_SIZE 2 +#define TCG_TARGET_TLB_DISPLACEMENT_BITS 19 typedef enum TCGReg { TCG_REG_R0 = 0, diff --git a/tcg/sparc/tcg-target.h b/tcg/sparc/tcg-target.h index 0c4c8af0b2..f584de4766 100644 --- a/tcg/sparc/tcg-target.h +++ b/tcg/sparc/tcg-target.h @@ -27,6 +27,7 @@ #define TCG_TARGET_REG_BITS 64 #define TCG_TARGET_INSN_UNIT_SIZE 4 +#define TCG_TARGET_TLB_DISPLACEMENT_BITS 32 #define TCG_TARGET_NB_REGS 32 typedef enum { diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h index 662d45c22f..cbf3f9b5a6 100644 --- a/tcg/tci/tcg-target.h +++ b/tcg/tci/tcg-target.h @@ -44,6 +44,7 @@ #define TCG_TARGET_INTERPRETER 1 #define TCG_TARGET_INSN_UNIT_SIZE 1 +#define TCG_TARGET_TLB_DISPLACEMENT_BITS 32 #if UINTPTR_MAX == UINT32_MAX # define TCG_TARGET_REG_BITS 32 From 1de29aef17a7d70dbc04a7fe51e18942e3ebe313 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 5 May 2015 09:18:23 +0200 Subject: [PATCH 40/40] softmmu: support up to 12 MMU modes At 8k per TLB (for 64-bit host or target), 8 or more modes make the TLBs bigger than 64k, and some RISC TCG backends do not like that. On the affected hosts, cut the TLB size in half---there is still a measurable speedup on PPC with the next patch. Signed-off-by: Paolo Bonzini Message-Id: <1424436345-37924-3-git-send-email-pbonzini@redhat.com> Reviewed-by: Richard Henderson Signed-off-by: Alexander Graf --- include/exec/cpu-defs.h | 35 +++++++++++++- include/exec/cpu_ldst.h | 104 +++++++++++++++++++++++++++++++++++++--- 2 files changed, 131 insertions(+), 8 deletions(-) diff --git a/include/exec/cpu-defs.h b/include/exec/cpu-defs.h index 3f56546066..d5aecaf49e 100644 --- a/include/exec/cpu-defs.h +++ b/include/exec/cpu-defs.h @@ -27,6 +27,7 @@ #include #include "qemu/osdep.h" #include "qemu/queue.h" +#include "tcg-target.h" #ifndef CONFIG_USER_ONLY #include "exec/hwaddr.h" #endif @@ -70,8 +71,6 @@ typedef uint64_t target_ulong; #define TB_JMP_PAGE_MASK (TB_JMP_CACHE_SIZE - TB_JMP_PAGE_SIZE) #if !defined(CONFIG_USER_ONLY) -#define CPU_TLB_BITS 8 -#define CPU_TLB_SIZE (1 << CPU_TLB_BITS) /* use a fully associative victim tlb of 8 entries */ #define CPU_VTLB_SIZE 8 @@ -81,6 +80,38 @@ typedef uint64_t target_ulong; #define CPU_TLB_ENTRY_BITS 5 #endif +/* TCG_TARGET_TLB_DISPLACEMENT_BITS is used in CPU_TLB_BITS to ensure that + * the TLB is not unnecessarily small, but still small enough for the + * TLB lookup instruction sequence used by the TCG target. + * + * TCG will have to generate an operand as large as the distance between + * env and the tlb_table[NB_MMU_MODES - 1][0].addend. For simplicity, + * the TCG targets just round everything up to the next power of two, and + * count bits. This works because: 1) the size of each TLB is a largish + * power of two, 2) and because the limit of the displacement is really close + * to a power of two, 3) the offset of tlb_table[0][0] inside env is smaller + * than the size of a TLB. + * + * For example, the maximum displacement 0xFFF0 on PPC and MIPS, but TCG + * just says "the displacement is 16 bits". TCG_TARGET_TLB_DISPLACEMENT_BITS + * then ensures that tlb_table at least 0x8000 bytes large ("not unnecessarily + * small": 2^15). The operand then will come up smaller than 0xFFF0 without + * any particular care, because the TLB for a single MMU mode is larger than + * 0x10000-0xFFF0=16 bytes. In the end, the maximum value of the operand + * could be something like 0xC000 (the offset of the last TLB table) plus + * 0x18 (the offset of the addend field in each TLB entry) plus the offset + * of tlb_table inside env (which is non-trivial but not huge). + */ +#define CPU_TLB_BITS \ + MIN(8, \ + TCG_TARGET_TLB_DISPLACEMENT_BITS - CPU_TLB_ENTRY_BITS - \ + (NB_MMU_MODES <= 1 ? 0 : \ + NB_MMU_MODES <= 2 ? 1 : \ + NB_MMU_MODES <= 4 ? 2 : \ + NB_MMU_MODES <= 8 ? 3 : 4)) + +#define CPU_TLB_SIZE (1 << CPU_TLB_BITS) + typedef struct CPUTLBEntry { /* bit TARGET_LONG_BITS to TARGET_PAGE_BITS : virtual address bit TARGET_PAGE_BITS-1..4 : Nonzero for accesses that should not diff --git a/include/exec/cpu_ldst.h b/include/exec/cpu_ldst.h index 1673287189..0ec398c0f8 100644 --- a/include/exec/cpu_ldst.h +++ b/include/exec/cpu_ldst.h @@ -263,12 +263,104 @@ uint64_t helper_ldq_cmmu(CPUArchState *env, target_ulong addr, int mmu_idx); #undef MEMSUFFIX #endif /* (NB_MMU_MODES >= 7) */ -#if (NB_MMU_MODES > 7) -/* Note that supporting NB_MMU_MODES == 9 would require - * changes to at least the ARM TCG backend. - */ -#error "NB_MMU_MODES > 7 is not supported for now" -#endif /* (NB_MMU_MODES > 7) */ +#if (NB_MMU_MODES >= 8) && defined(MMU_MODE7_SUFFIX) + +#define CPU_MMU_INDEX 7 +#define MEMSUFFIX MMU_MODE7_SUFFIX +#define DATA_SIZE 1 +#include "exec/cpu_ldst_template.h" + +#define DATA_SIZE 2 +#include "exec/cpu_ldst_template.h" + +#define DATA_SIZE 4 +#include "exec/cpu_ldst_template.h" + +#define DATA_SIZE 8 +#include "exec/cpu_ldst_template.h" +#undef CPU_MMU_INDEX +#undef MEMSUFFIX +#endif /* (NB_MMU_MODES >= 8) */ + +#if (NB_MMU_MODES >= 9) && defined(MMU_MODE8_SUFFIX) + +#define CPU_MMU_INDEX 8 +#define MEMSUFFIX MMU_MODE8_SUFFIX +#define DATA_SIZE 1 +#include "exec/cpu_ldst_template.h" + +#define DATA_SIZE 2 +#include "exec/cpu_ldst_template.h" + +#define DATA_SIZE 4 +#include "exec/cpu_ldst_template.h" + +#define DATA_SIZE 8 +#include "exec/cpu_ldst_template.h" +#undef CPU_MMU_INDEX +#undef MEMSUFFIX +#endif /* (NB_MMU_MODES >= 9) */ + +#if (NB_MMU_MODES >= 10) && defined(MMU_MODE9_SUFFIX) + +#define CPU_MMU_INDEX 9 +#define MEMSUFFIX MMU_MODE9_SUFFIX +#define DATA_SIZE 1 +#include "exec/cpu_ldst_template.h" + +#define DATA_SIZE 2 +#include "exec/cpu_ldst_template.h" + +#define DATA_SIZE 4 +#include "exec/cpu_ldst_template.h" + +#define DATA_SIZE 8 +#include "exec/cpu_ldst_template.h" +#undef CPU_MMU_INDEX +#undef MEMSUFFIX +#endif /* (NB_MMU_MODES >= 10) */ + +#if (NB_MMU_MODES >= 11) && defined(MMU_MODE10_SUFFIX) + +#define CPU_MMU_INDEX 10 +#define MEMSUFFIX MMU_MODE10_SUFFIX +#define DATA_SIZE 1 +#include "exec/cpu_ldst_template.h" + +#define DATA_SIZE 2 +#include "exec/cpu_ldst_template.h" + +#define DATA_SIZE 4 +#include "exec/cpu_ldst_template.h" + +#define DATA_SIZE 8 +#include "exec/cpu_ldst_template.h" +#undef CPU_MMU_INDEX +#undef MEMSUFFIX +#endif /* (NB_MMU_MODES >= 11) */ + +#if (NB_MMU_MODES >= 12) && defined(MMU_MODE11_SUFFIX) + +#define CPU_MMU_INDEX 11 +#define MEMSUFFIX MMU_MODE11_SUFFIX +#define DATA_SIZE 1 +#include "exec/cpu_ldst_template.h" + +#define DATA_SIZE 2 +#include "exec/cpu_ldst_template.h" + +#define DATA_SIZE 4 +#include "exec/cpu_ldst_template.h" + +#define DATA_SIZE 8 +#include "exec/cpu_ldst_template.h" +#undef CPU_MMU_INDEX +#undef MEMSUFFIX +#endif /* (NB_MMU_MODES >= 12) */ + +#if (NB_MMU_MODES > 12) +#error "NB_MMU_MODES > 12 is not supported for now" +#endif /* (NB_MMU_MODES > 12) */ /* these access are slower, they must be as rare as possible */ #define CPU_MMU_INDEX (cpu_mmu_index(env))