From 7abea552aba6e85b338015726648974d6d6f19c8 Mon Sep 17 00:00:00 2001 From: linzhecheng Date: Tue, 31 Oct 2017 16:03:03 +0800 Subject: [PATCH 1/9] fix: unrealize virtio device if we fail to hotplug it MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If we fail to hotplug virtio-blk device and then suspend or shutdown VM, qemu is likely to crash. Re-production steps: 1. Run VM named vm001 2. Create a virtio-blk.xml which contains wrong configurations: 3. Run command : virsh attach-device vm001 virtio-blk.xml error: Failed to attach device from blk-scsi.xml error: internal error: unable to execute QEMU command 'device_add': Please set scsi=off for virtio-blk devices in order to use virtio 1.0 it means hotplug virtio-blk device failed. 4. Suspend or shutdown VM will leads to qemu crash Problem happens in virtio_vmstate_change which is called by vm_state_notify: vdev’s parent_bus is NULL, so qdev_get_parent_bus(DEVICE(vdev)) will crash. virtio_vmstate_change is added to the list vm_change_state_head at virtio_blk_device_realize(virtio_init), but after hotplug virtio-blk failed, virtio_vmstate_change will not be removed from vm_change_state_head. Adding unrealize function of virtio-blk device can solve this problem. Signed-off-by: linzhecheng Reviewed-by: Stefan Hajnoczi Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/virtio.c | 1 + 1 file changed, 1 insertion(+) diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index 5884ce3480..ea532dc35f 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -2491,6 +2491,7 @@ static void virtio_device_realize(DeviceState *dev, Error **errp) virtio_bus_device_plugged(vdev, &err); if (err != NULL) { error_propagate(errp, err); + vdc->unrealize(dev, NULL); return; } From d06bce95ff8c6e2f290db150695f826c3d57324f Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Thu, 19 Oct 2017 13:15:05 +1100 Subject: [PATCH 2/9] pci: Initialize pci_dev->name before use MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This moves pci_dev->name initialization earlier so pci_dev->bus_master_as could get a name instead of an empty string. Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Peter Xu Signed-off-by: Alexey Kardashevskiy Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/pci/pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/pci/pci.c b/hw/pci/pci.c index 5ed3c8dca4..b2d139bd9a 100644 --- a/hw/pci/pci.c +++ b/hw/pci/pci.c @@ -1030,6 +1030,7 @@ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, PCIBus *bus, pci_dev->devfn = devfn; pci_dev->requester_id_cache = pci_req_id_cache_get(pci_dev); + pstrcpy(pci_dev->name, sizeof(pci_dev->name), name); memory_region_init(&pci_dev->bus_master_container_region, OBJECT(pci_dev), "bus master container", UINT64_MAX); @@ -1039,7 +1040,6 @@ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, PCIBus *bus, if (qdev_hotplug) { pci_init_bus_master(pci_dev); } - pstrcpy(pci_dev->name, sizeof(pci_dev->name), name); pci_dev->irq_state = 0; pci_config_alloc(pci_dev); From 9fa99d2519cbf71f871e46871df12cb446dc1c3e Mon Sep 17 00:00:00 2001 From: Marcel Apfelbaum Date: Sat, 11 Nov 2017 17:25:00 +0200 Subject: [PATCH 3/9] hw/pci-host: Fix x86 Host Bridges 64bit PCI hole Currently there is no MMIO range over 4G reserved for PCI hotplug. Since the 32bit PCI hole depends on the number of cold-plugged PCI devices and other factors, it is very possible is too small to hotplug PCI devices with large BARs. Fix it by reserving 2G for I4400FX chipset in order to comply with older Win32 Guest OSes and 32G for Q35 chipset. Even if the new defaults of pci-hole64-size will appear in "info qtree" also for older machines, the property was not implemented so no changes will be visible to guests. Note this is a regression since prev QEMU versions had some range reserved for 64bit PCI hotplug. Reviewed-by: Laszlo Ersek Reviewed-by: Gerd Hoffmann Signed-off-by: Marcel Apfelbaum Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/pc.c | 22 ++++++++++++++++++++ hw/pci-host/piix.c | 32 +++++++++++++++++++++++++++-- hw/pci-host/q35.c | 42 ++++++++++++++++++++++++++++++++++++--- include/hw/i386/pc.h | 10 +++++++++- include/hw/pci-host/q35.h | 1 + 5 files changed, 101 insertions(+), 6 deletions(-) diff --git a/hw/i386/pc.c b/hw/i386/pc.c index e11a65b545..fafe5ba5cd 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -1448,6 +1448,28 @@ void pc_memory_init(PCMachineState *pcms, pcms->ioapic_as = &address_space_memory; } +/* + * The 64bit pci hole starts after "above 4G RAM" and + * potentially the space reserved for memory hotplug. + */ +uint64_t pc_pci_hole64_start(void) +{ + PCMachineState *pcms = PC_MACHINE(qdev_get_machine()); + PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms); + uint64_t hole64_start = 0; + + if (pcmc->has_reserved_memory && pcms->hotplug_memory.base) { + hole64_start = pcms->hotplug_memory.base; + if (!pcmc->broken_reserved_end) { + hole64_start += memory_region_size(&pcms->hotplug_memory.mr); + } + } else { + hole64_start = 0x100000000ULL + pcms->above_4g_mem_size; + } + + return ROUND_UP(hole64_start, 1ULL << 30); +} + qemu_irq pc_allocate_cpu_irq(void) { return qemu_allocate_irq(pic_irq_request, NULL, 0); diff --git a/hw/pci-host/piix.c b/hw/pci-host/piix.c index a7e2256870..a684a7cca9 100644 --- a/hw/pci-host/piix.c +++ b/hw/pci-host/piix.c @@ -50,6 +50,7 @@ typedef struct I440FXState { PCIHostState parent_obj; Range pci_hole; uint64_t pci_hole64_size; + bool pci_hole64_fix; uint32_t short_root_bus; } I440FXState; @@ -112,6 +113,9 @@ struct PCII440FXState { #define I440FX_PAM_SIZE 7 #define I440FX_SMRAM 0x72 +/* Keep it 2G to comply with older win32 guests */ +#define I440FX_PCI_HOST_HOLE64_SIZE_DEFAULT (1ULL << 31) + /* Older coreboot versions (4.0 and older) read a config register that doesn't * exist in real hardware, to get the RAM size from QEMU. */ @@ -238,29 +242,52 @@ static void i440fx_pcihost_get_pci_hole_end(Object *obj, Visitor *v, visit_type_uint32(v, name, &value, errp); } +/* + * The 64bit PCI hole start is set by the Guest firmware + * as the address of the first 64bit PCI MEM resource. + * If no PCI device has resources on the 64bit area, + * the 64bit PCI hole will start after "over 4G RAM" and the + * reserved space for memory hotplug if any. + */ static void i440fx_pcihost_get_pci_hole64_start(Object *obj, Visitor *v, const char *name, void *opaque, Error **errp) { PCIHostState *h = PCI_HOST_BRIDGE(obj); + I440FXState *s = I440FX_PCI_HOST_BRIDGE(obj); Range w64; uint64_t value; pci_bus_get_w64_range(h->bus, &w64); value = range_is_empty(&w64) ? 0 : range_lob(&w64); + if (!value && s->pci_hole64_fix) { + value = pc_pci_hole64_start(); + } visit_type_uint64(v, name, &value, errp); } +/* + * The 64bit PCI hole end is set by the Guest firmware + * as the address of the last 64bit PCI MEM resource. + * Then it is expanded to the PCI_HOST_PROP_PCI_HOLE64_SIZE + * that can be configured by the user. + */ static void i440fx_pcihost_get_pci_hole64_end(Object *obj, Visitor *v, const char *name, void *opaque, Error **errp) { PCIHostState *h = PCI_HOST_BRIDGE(obj); + I440FXState *s = I440FX_PCI_HOST_BRIDGE(obj); + uint64_t hole64_start = pc_pci_hole64_start(); Range w64; - uint64_t value; + uint64_t value, hole64_end; pci_bus_get_w64_range(h->bus, &w64); value = range_is_empty(&w64) ? 0 : range_upb(&w64) + 1; + hole64_end = ROUND_UP(hole64_start + s->pci_hole64_size, 1ULL << 30); + if (s->pci_hole64_fix && value < hole64_end) { + value = hole64_end; + } visit_type_uint64(v, name, &value, errp); } @@ -863,8 +890,9 @@ static const char *i440fx_pcihost_root_bus_path(PCIHostState *host_bridge, static Property i440fx_props[] = { DEFINE_PROP_SIZE(PCI_HOST_PROP_PCI_HOLE64_SIZE, I440FXState, - pci_hole64_size, DEFAULT_PCI_HOLE64_SIZE), + pci_hole64_size, I440FX_PCI_HOST_HOLE64_SIZE_DEFAULT), DEFINE_PROP_UINT32("short_root_bus", I440FXState, short_root_bus, 0), + DEFINE_PROP_BOOL("x-pci-hole64-fix", I440FXState, pci_hole64_fix, true), DEFINE_PROP_END_OF_LIST(), }; diff --git a/hw/pci-host/q35.c b/hw/pci-host/q35.c index ddaa7d1b44..6cb9a8d121 100644 --- a/hw/pci-host/q35.c +++ b/hw/pci-host/q35.c @@ -37,6 +37,8 @@ * Q35 host */ +#define Q35_PCI_HOST_HOLE64_SIZE_DEFAULT (1ULL << 35) + static void q35_host_realize(DeviceState *dev, Error **errp) { PCIHostState *pci = PCI_HOST_BRIDGE(dev); @@ -99,29 +101,52 @@ static void q35_host_get_pci_hole_end(Object *obj, Visitor *v, visit_type_uint32(v, name, &value, errp); } +/* + * The 64bit PCI hole start is set by the Guest firmware + * as the address of the first 64bit PCI MEM resource. + * If no PCI device has resources on the 64bit area, + * the 64bit PCI hole will start after "over 4G RAM" and the + * reserved space for memory hotplug if any. + */ static void q35_host_get_pci_hole64_start(Object *obj, Visitor *v, const char *name, void *opaque, Error **errp) { PCIHostState *h = PCI_HOST_BRIDGE(obj); + Q35PCIHost *s = Q35_HOST_DEVICE(obj); Range w64; uint64_t value; pci_bus_get_w64_range(h->bus, &w64); value = range_is_empty(&w64) ? 0 : range_lob(&w64); + if (!value && s->pci_hole64_fix) { + value = pc_pci_hole64_start(); + } visit_type_uint64(v, name, &value, errp); } +/* + * The 64bit PCI hole end is set by the Guest firmware + * as the address of the last 64bit PCI MEM resource. + * Then it is expanded to the PCI_HOST_PROP_PCI_HOLE64_SIZE + * that can be configured by the user. + */ static void q35_host_get_pci_hole64_end(Object *obj, Visitor *v, const char *name, void *opaque, Error **errp) { PCIHostState *h = PCI_HOST_BRIDGE(obj); + Q35PCIHost *s = Q35_HOST_DEVICE(obj); + uint64_t hole64_start = pc_pci_hole64_start(); Range w64; - uint64_t value; + uint64_t value, hole64_end; pci_bus_get_w64_range(h->bus, &w64); value = range_is_empty(&w64) ? 0 : range_upb(&w64) + 1; + hole64_end = ROUND_UP(hole64_start + s->mch.pci_hole64_size, 1ULL << 30); + if (s->pci_hole64_fix && value < hole64_end) { + value = hole64_end; + } visit_type_uint64(v, name, &value, errp); } @@ -133,16 +158,25 @@ static void q35_host_get_mmcfg_size(Object *obj, Visitor *v, const char *name, visit_type_uint64(v, name, &e->size, errp); } +/* + * NOTE: setting defaults for the mch.* fields in this table + * doesn't work, because mch is a separate QOM object that is + * zeroed by the object_initialize(&s->mch, ...) call inside + * q35_host_initfn(). The default values for those + * properties need to be initialized manually by + * q35_host_initfn() after the object_initialize() call. + */ static Property q35_host_props[] = { DEFINE_PROP_UINT64(PCIE_HOST_MCFG_BASE, Q35PCIHost, parent_obj.base_addr, MCH_HOST_BRIDGE_PCIEXBAR_DEFAULT), DEFINE_PROP_SIZE(PCI_HOST_PROP_PCI_HOLE64_SIZE, Q35PCIHost, - mch.pci_hole64_size, DEFAULT_PCI_HOLE64_SIZE), + mch.pci_hole64_size, Q35_PCI_HOST_HOLE64_SIZE_DEFAULT), DEFINE_PROP_UINT32("short_root_bus", Q35PCIHost, mch.short_root_bus, 0), DEFINE_PROP_SIZE(PCI_HOST_BELOW_4G_MEM_SIZE, Q35PCIHost, mch.below_4g_mem_size, 0), DEFINE_PROP_SIZE(PCI_HOST_ABOVE_4G_MEM_SIZE, Q35PCIHost, mch.above_4g_mem_size, 0), + DEFINE_PROP_BOOL("x-pci-hole64-fix", Q35PCIHost, pci_hole64_fix, true), DEFINE_PROP_END_OF_LIST(), }; @@ -174,7 +208,9 @@ static void q35_host_initfn(Object *obj) object_property_add_child(OBJECT(s), "mch", OBJECT(&s->mch), NULL); qdev_prop_set_int32(DEVICE(&s->mch), "addr", PCI_DEVFN(0, 0)); qdev_prop_set_bit(DEVICE(&s->mch), "multifunction", false); - + /* mch's object_initialize resets the default value, set it again */ + qdev_prop_set_uint64(DEVICE(s), PCI_HOST_PROP_PCI_HOLE64_SIZE, + Q35_PCI_HOST_HOLE64_SIZE_DEFAULT); object_property_add(obj, PCI_HOST_PROP_PCI_HOLE_START, "uint32", q35_host_get_pci_hole_start, NULL, NULL, NULL, NULL); diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h index 087d184ef5..ef438bd765 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -238,7 +238,6 @@ void pc_guest_info_init(PCMachineState *pcms); #define PCI_HOST_PROP_PCI_HOLE64_SIZE "pci-hole64-size" #define PCI_HOST_BELOW_4G_MEM_SIZE "below-4g-mem-size" #define PCI_HOST_ABOVE_4G_MEM_SIZE "above-4g-mem-size" -#define DEFAULT_PCI_HOLE64_SIZE (~0x0ULL) void pc_pci_as_mapping_init(Object *owner, MemoryRegion *system_memory, @@ -249,6 +248,7 @@ void pc_memory_init(PCMachineState *pcms, MemoryRegion *system_memory, MemoryRegion *rom_memory, MemoryRegion **ram_memory); +uint64_t pc_pci_hole64_start(void); qemu_irq pc_allocate_cpu_irq(void); DeviceState *pc_vga_init(ISABus *isa_bus, PCIBus *pci_bus); void pc_basic_device_init(ISABus *isa_bus, qemu_irq *gsi, @@ -375,6 +375,14 @@ bool e820_get_entry(int, uint32_t, uint64_t *, uint64_t *); .driver = TYPE_X86_CPU,\ .property = "x-hv-max-vps",\ .value = "0x40",\ + },{\ + .driver = "i440FX-pcihost",\ + .property = "x-pci-hole64-fix",\ + .value = "off",\ + },{\ + .driver = "q35-pcihost",\ + .property = "x-pci-hole64-fix",\ + .value = "off",\ }, #define PC_COMPAT_2_9 \ diff --git a/include/hw/pci-host/q35.h b/include/hw/pci-host/q35.h index 58983c00b3..8f4ddde393 100644 --- a/include/hw/pci-host/q35.h +++ b/include/hw/pci-host/q35.h @@ -68,6 +68,7 @@ typedef struct Q35PCIHost { PCIExpressHost parent_obj; /*< public >*/ + bool pci_hole64_fix; MCHPCIState mch; } Q35PCIHost; From 2d0f99ed38e291498613633bcab79811c7c4fd07 Mon Sep 17 00:00:00 2001 From: Marcel Apfelbaum Date: Thu, 9 Nov 2017 17:46:45 +0200 Subject: [PATCH 4/9] hw/pcie-pci-bridge: restrict to X86 and ARM MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The PCIE-PCI bridge is specific to "pure" PCIe systems (on QEMU we have X86 and ARM), it does not make sense to have it in other archs. Reported-by: Thomas Huth Signed-off-by: Marcel Apfelbaum Reviewed-by: Thomas Huth Reviewed-by: Philippe Mathieu-Daudé Tested-by: Philippe Mathieu-Daudé Reviewed-by: Cornelia Huck Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Tested-by: Yongbok Kim --- hw/pci-bridge/Makefile.objs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hw/pci-bridge/Makefile.objs b/hw/pci-bridge/Makefile.objs index 666db37da2..1b05023662 100644 --- a/hw/pci-bridge/Makefile.objs +++ b/hw/pci-bridge/Makefile.objs @@ -1,5 +1,5 @@ -common-obj-y += pci_bridge_dev.o pcie_pci_bridge.o -common-obj-$(CONFIG_PCIE_PORT) += pcie_root_port.o gen_pcie_root_port.o +common-obj-y += pci_bridge_dev.o +common-obj-$(CONFIG_PCIE_PORT) += pcie_root_port.o gen_pcie_root_port.o pcie_pci_bridge.o common-obj-$(CONFIG_PXB) += pci_expander_bridge.o common-obj-$(CONFIG_XIO3130) += xio3130_upstream.o xio3130_downstream.o common-obj-$(CONFIG_IOH3420) += ioh3420.o From 45bd4b1c099843565e1686f09ae307984a08a3d6 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Wed, 15 Nov 2017 05:00:52 +0200 Subject: [PATCH 5/9] tests/acpi-test-data: update _CRS in DSDT commit dadf988e81b15065ac1d6dbaf4b87b5b80c7b670 hw/pci-host: Fix x86 Host Bridges 64bit PCI hole Added a 64 bit hole to _CRS of PCI0. Update the expected files accordingly. Signed-off-by: Michael S. Tsirkin --- tests/acpi-test-data/pc/DSDT | Bin 5098 -> 5144 bytes tests/acpi-test-data/pc/DSDT.bridge | Bin 6957 -> 7003 bytes tests/acpi-test-data/pc/DSDT.cphp | Bin 5561 -> 5607 bytes tests/acpi-test-data/pc/DSDT.ipmikcs | Bin 5170 -> 5216 bytes tests/acpi-test-data/pc/DSDT.memhp | Bin 6463 -> 6509 bytes tests/acpi-test-data/q35/DSDT | Bin 7782 -> 7828 bytes tests/acpi-test-data/q35/DSDT.bridge | Bin 7799 -> 7845 bytes tests/acpi-test-data/q35/DSDT.cphp | Bin 8245 -> 8291 bytes tests/acpi-test-data/q35/DSDT.ipmibt | Bin 7857 -> 7903 bytes tests/acpi-test-data/q35/DSDT.memhp | Bin 9147 -> 9193 bytes 10 files changed, 0 insertions(+), 0 deletions(-) diff --git a/tests/acpi-test-data/pc/DSDT b/tests/acpi-test-data/pc/DSDT index 15c3135d65f168a91edfdc3471ea1d3f012a824f..99f05a502752d9dbac38fdd93f1ebb79b4564fb4 100644 GIT binary patch delta 98 zcmaE*K0|}cCD5uv2`1v!?+^ymL^npaU1zoXPdvIeJ~% d3=BNX3`l?x$o~KTe?5ps0u3Pc=GWZ+*#RA&6M_H$ delta 51 zcmbQC@k*V`CDxV5j&1XHNr;c;}#CK__;uyvg<4Ih!SU H{<8xBdNvM& diff --git a/tests/acpi-test-data/pc/DSDT.bridge b/tests/acpi-test-data/pc/DSDT.bridge index d38586c95bf31f0212279a2505efd8e2fd321ccc..cf23343e6402421f09da5d09f72811108fbd2661 100644 GIT binary patch delta 98 zcmZ2$cH4~0CD&9Awg_yJmb6N3N% delta 51 zcmca@w$_ZxCDxV5j&1XHNr;c;}#CK__;uyvg<4Ih!SU GocIB3sttqy diff --git a/tests/acpi-test-data/pc/DSDT.cphp b/tests/acpi-test-data/pc/DSDT.cphp index 2dd70bf9520406c36c3684714bb714e536a28d20..c99c49f43705e99d1e0a8ba19d44145dfa63d009 100644 GIT binary patch delta 98 zcmdm~{al;NCD&e7}A eW?)i<76CD5kr=)BV5j&1XHNr;c;}#CK?g3bIg_slxV5j&1XHNr;c;}#CK__;uyvbJtayI7) H{$~dOd}0qS diff --git a/tests/acpi-test-data/q35/DSDT b/tests/acpi-test-data/q35/DSDT index a080e2ace20ce9b88d5a61078d8caa0262617eed..aa402cca667f82ed0a2dc4969508d8f6e38ad910 100644 GIT binary patch delta 97 zcmaE6GsTw6CDJsB}io*2FOV5j&1XHNr;c;}#CK__;uyvhG$awgA^ G{R04T6c3I7 diff --git a/tests/acpi-test-data/q35/DSDT.bridge b/tests/acpi-test-data/q35/DSDT.bridge index 31a76732e563dde32e6d976670baa732a6b91807..fc3e79c583ababf5615e76ba2f7bc3df1483abb4 100644 GIT binary patch delta 98 zcmexvv(%Q$CD0XqQbM(5j b85nq&8IS-Yko_MBIFQ6ZOb~zaR@pxQS6~zP delta 50 zcmca_yU~`*CD Date: Tue, 14 Nov 2017 10:34:01 +0800 Subject: [PATCH 6/9] NUMA: Enable adding NUMA node implicitly Linux and Windows need ACPI SRAT table to make memory hotplug work properly, however currently QEMU doesn't create SRAT table if numa options aren't present on CLI. Which breaks both linux and windows guests in certain conditions: * Windows: won't enable memory hotplug without SRAT table at all * Linux: if QEMU is started with initial memory all below 4Gb and no SRAT table present, guest kernel will use nommu DMA ops, which breaks 32bit hw drivers when memory is hotplugged and guest tries to use it with that drivers. Fix above issues by automatically creating a numa node when QEMU is started with memory hotplug enabled but without '-numa' options on CLI. (PS: auto-create numa node only for new machine types so not to break migration). Which would provide SRAT table to guests without explicit -numa options on CLI and would allow: * Windows: to enable memory hotplug * Linux: switch to SWIOTLB DMA ops, to bounce DMA transfers to 32bit allocated buffers that legacy drivers/hw can handle. [Rewritten by Igor] Reported-by: Thadeu Lima de Souza Cascardo Suggested-by: Igor Mammedov Signed-off-by: Dou Liyang Cc: Paolo Bonzini Cc: Richard Henderson Cc: Eduardo Habkost Cc: "Michael S. Tsirkin" Cc: Marcel Apfelbaum Cc: Igor Mammedov Cc: David Hildenbrand Cc: Thomas Huth Cc: Alistair Francis Cc: Takao Indoh Cc: Izumi Taku Reviewed-by: Igor Mammedov Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/pc.c | 1 + hw/i386/pc_piix.c | 1 + hw/i386/pc_q35.c | 1 + include/hw/boards.h | 1 + numa.c | 21 ++++++++++++++++++++- vl.c | 3 +-- 6 files changed, 25 insertions(+), 3 deletions(-) diff --git a/hw/i386/pc.c b/hw/i386/pc.c index fafe5ba5cd..c3afe5b7f1 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -2347,6 +2347,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) mc->cpu_index_to_instance_props = pc_cpu_index_to_props; mc->get_default_cpu_node_id = pc_get_default_cpu_node_id; mc->possible_cpu_arch_ids = pc_possible_cpu_arch_ids; + mc->auto_enable_numa_with_memhp = true; mc->has_hotpluggable_cpus = true; mc->default_boot_order = "cad"; mc->hot_add_cpu = pc_hot_add_cpu; diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index f79d5cb694..5e47528993 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -446,6 +446,7 @@ static void pc_i440fx_2_10_machine_options(MachineClass *m) m->is_default = 0; m->alias = NULL; SET_MACHINE_COMPAT(m, PC_COMPAT_2_10); + m->auto_enable_numa_with_memhp = false; } DEFINE_I440FX_MACHINE(v2_10, "pc-i440fx-2.10", NULL, diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c index da3ea602e1..d6060043ac 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -318,6 +318,7 @@ static void pc_q35_2_10_machine_options(MachineClass *m) m->alias = NULL; SET_MACHINE_COMPAT(m, PC_COMPAT_2_10); m->numa_auto_assign_ram = numa_legacy_auto_assign_ram; + m->auto_enable_numa_with_memhp = false; } DEFINE_Q35_MACHINE(v2_10, "pc-q35-2.10", NULL, diff --git a/include/hw/boards.h b/include/hw/boards.h index 62f160e0aa..156b16f7a6 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h @@ -197,6 +197,7 @@ struct MachineClass { bool ignore_memory_transaction_failures; int numa_mem_align_shift; const char **valid_cpu_types; + bool auto_enable_numa_with_memhp; void (*numa_auto_assign_ram)(MachineClass *mc, NodeInfo *nodes, int nb_nodes, ram_addr_t size); diff --git a/numa.c b/numa.c index 8d78d959f6..7151b24d1c 100644 --- a/numa.c +++ b/numa.c @@ -216,6 +216,7 @@ static void parse_numa_node(MachineState *ms, NumaNodeOptions *node, } numa_info[nodenr].present = true; max_numa_nodeid = MAX(max_numa_nodeid, nodenr + 1); + nb_numa_nodes++; } static void parse_numa_distance(NumaDistOptions *dist, Error **errp) @@ -282,7 +283,6 @@ static int parse_numa(void *opaque, QemuOpts *opts, Error **errp) if (err) { goto end; } - nb_numa_nodes++; break; case NUMA_OPTIONS_TYPE_DIST: parse_numa_distance(&object->u.dist, &err); @@ -433,6 +433,25 @@ void parse_numa_opts(MachineState *ms) exit(1); } + /* + * If memory hotplug is enabled (slots > 0) but without '-numa' + * options explicitly on CLI, guestes will break. + * + * Windows: won't enable memory hotplug without SRAT table at all + * + * Linux: if QEMU is started with initial memory all below 4Gb + * and no SRAT table present, guest kernel will use nommu DMA ops, + * which breaks 32bit hw drivers when memory is hotplugged and + * guest tries to use it with that drivers. + * + * Enable NUMA implicitly by adding a new NUMA node automatically. + */ + if (ms->ram_slots > 0 && nb_numa_nodes == 0 && + mc->auto_enable_numa_with_memhp) { + NumaNodeOptions node = { }; + parse_numa_node(ms, &node, NULL); + } + assert(max_numa_nodeid <= MAX_NODES); /* No support for sparse NUMA node IDs yet: */ diff --git a/vl.c b/vl.c index 7372424fa7..1ad1c04637 100644 --- a/vl.c +++ b/vl.c @@ -4690,8 +4690,6 @@ int main(int argc, char **argv, char **envp) default_drive(default_floppy, snapshot, IF_FLOPPY, 0, FD_OPTS); default_drive(default_sdcard, snapshot, IF_SD, 0, SD_OPTS); - parse_numa_opts(current_machine); - if (qemu_opts_foreach(qemu_find_opts("mon"), mon_init_func, NULL, NULL)) { exit(1); @@ -4741,6 +4739,7 @@ int main(int argc, char **argv, char **envp) current_machine->boot_order = boot_order; current_machine->cpu_model = cpu_model; + parse_numa_opts(current_machine); /* parse features once if machine provides default cpu_type */ if (machine_class->default_cpu_type) { From b948bb55dac527ae6b0c5e6dc69d00866a3a6fee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= Date: Mon, 6 Nov 2017 12:50:32 +0100 Subject: [PATCH 7/9] vmcoreinfo: put it in the 'misc' device category MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Marc-André Lureau Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/misc/vmcoreinfo.c | 1 + 1 file changed, 1 insertion(+) diff --git a/hw/misc/vmcoreinfo.c b/hw/misc/vmcoreinfo.c index a618e12677..31db57ab44 100644 --- a/hw/misc/vmcoreinfo.c +++ b/hw/misc/vmcoreinfo.c @@ -79,6 +79,7 @@ static void vmcoreinfo_device_class_init(ObjectClass *klass, void *data) dc->vmsd = &vmstate_vmcoreinfo; dc->realize = vmcoreinfo_realize; dc->hotpluggable = false; + set_bit(DEVICE_CATEGORY_MISC, dc->categories); } static const TypeInfo vmcoreinfo_device_info = { From f865da7c369fa00b2ccaf6bce158ad2701b2a27c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= Date: Mon, 6 Nov 2017 12:53:47 +0100 Subject: [PATCH 8/9] build-sys: restrict vmcoreinfo to fw_cfg+dma capable targets MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit vmcoreinfo is built for all targets. However, it requires fw_cfg with DMA operations support (write operation). Restrict vmcoreinfo exposure to architectures that are supporting FW_CFG_DMA, that is arm-virt and x86 only atm. Signed-off-by: Marc-André Lureau Reviewed-by: Thomas Huth Reviewed-by: Daniel Henrique Barboza Tested-by: Daniel Henrique Barboza Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- default-configs/arm-softmmu.mak | 2 ++ default-configs/i386-softmmu.mak | 1 + default-configs/x86_64-softmmu.mak | 1 + hw/misc/Makefile.objs | 2 +- 4 files changed, 5 insertions(+), 1 deletion(-) diff --git a/default-configs/arm-softmmu.mak b/default-configs/arm-softmmu.mak index 5059d134c8..d37edc4312 100644 --- a/default-configs/arm-softmmu.mak +++ b/default-configs/arm-softmmu.mak @@ -130,3 +130,5 @@ CONFIG_SMBIOS=y CONFIG_ASPEED_SOC=y CONFIG_GPIO_KEY=y CONFIG_MSF2=y + +CONFIG_FW_CFG_DMA=y diff --git a/default-configs/i386-softmmu.mak b/default-configs/i386-softmmu.mak index a685c439e7..95ac4b464a 100644 --- a/default-configs/i386-softmmu.mak +++ b/default-configs/i386-softmmu.mak @@ -60,3 +60,4 @@ CONFIG_SMBIOS=y CONFIG_HYPERV_TESTDEV=$(CONFIG_KVM) CONFIG_PXB=y CONFIG_ACPI_VMGENID=y +CONFIG_FW_CFG_DMA=y diff --git a/default-configs/x86_64-softmmu.mak b/default-configs/x86_64-softmmu.mak index ea69e8289e..0221236825 100644 --- a/default-configs/x86_64-softmmu.mak +++ b/default-configs/x86_64-softmmu.mak @@ -60,3 +60,4 @@ CONFIG_SMBIOS=y CONFIG_HYPERV_TESTDEV=$(CONFIG_KVM) CONFIG_PXB=y CONFIG_ACPI_VMGENID=y +CONFIG_FW_CFG_DMA=y diff --git a/hw/misc/Makefile.objs b/hw/misc/Makefile.objs index 19202d90cf..10c88a84b4 100644 --- a/hw/misc/Makefile.objs +++ b/hw/misc/Makefile.objs @@ -9,7 +9,7 @@ common-obj-$(CONFIG_PCI_TESTDEV) += pci-testdev.o common-obj-$(CONFIG_EDU) += edu.o common-obj-y += unimp.o -common-obj-y += vmcoreinfo.o +common-obj-$(CONFIG_FW_CFG_DMA) += vmcoreinfo.o obj-$(CONFIG_VMPORT) += vmport.o From 3831c07b89ab1f7aa1427bc56e9cdf70f5367933 Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Thu, 16 Nov 2017 13:17:02 +0100 Subject: [PATCH 9/9] tests/bios-tables-test: Fix endianess problems when passing data to iasl The bios-tables-test was writing out files that we pass to iasl in with the wrong endianness in the header when running on a big endian host. So instead of storing mixed endian information in our structures, let's keep everything in little endian and byte-swap it only when we need a value in the code. Reported-by: Daniel P. Berrange Buglink: https://bugs.launchpad.net/qemu/+bug/1724570 Suggested-by: Michael S. Tsirkin Signed-off-by: Thomas Huth Tested-by: "Daniel P. Berrange" Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- tests/acpi-utils.h | 27 +++++--------------------- tests/bios-tables-test.c | 42 +++++++++++++++++++++------------------- tests/vmgenid-test.c | 22 +++++++++++---------- 3 files changed, 39 insertions(+), 52 deletions(-) diff --git a/tests/acpi-utils.h b/tests/acpi-utils.h index f8d87236c6..d5ca5b6238 100644 --- a/tests/acpi-utils.h +++ b/tests/acpi-utils.h @@ -28,24 +28,9 @@ typedef struct { bool tmp_files_retain; /* do not delete the temp asl/aml */ } AcpiSdtTable; -#define ACPI_READ_FIELD(field, addr) \ - do { \ - switch (sizeof(field)) { \ - case 1: \ - field = readb(addr); \ - break; \ - case 2: \ - field = readw(addr); \ - break; \ - case 4: \ - field = readl(addr); \ - break; \ - case 8: \ - field = readq(addr); \ - break; \ - default: \ - g_assert(false); \ - } \ +#define ACPI_READ_FIELD(field, addr) \ + do { \ + memread(addr, &field, sizeof(field)); \ addr += sizeof(field); \ } while (0); @@ -74,16 +59,14 @@ typedef struct { } while (0); #define ACPI_ASSERT_CMP(actual, expected) do { \ - uint32_t ACPI_ASSERT_CMP_le = cpu_to_le32(actual); \ char ACPI_ASSERT_CMP_str[5] = {}; \ - memcpy(ACPI_ASSERT_CMP_str, &ACPI_ASSERT_CMP_le, 4); \ + memcpy(ACPI_ASSERT_CMP_str, &actual, 4); \ g_assert_cmpstr(ACPI_ASSERT_CMP_str, ==, expected); \ } while (0) #define ACPI_ASSERT_CMP64(actual, expected) do { \ - uint64_t ACPI_ASSERT_CMP_le = cpu_to_le64(actual); \ char ACPI_ASSERT_CMP_str[9] = {}; \ - memcpy(ACPI_ASSERT_CMP_str, &ACPI_ASSERT_CMP_le, 8); \ + memcpy(ACPI_ASSERT_CMP_str, &actual, 8); \ g_assert_cmpstr(ACPI_ASSERT_CMP_str, ==, expected); \ } while (0) diff --git a/tests/bios-tables-test.c b/tests/bios-tables-test.c index 564da45f65..e28e0c98cf 100644 --- a/tests/bios-tables-test.c +++ b/tests/bios-tables-test.c @@ -96,17 +96,20 @@ static void test_acpi_rsdp_table(test_data *data) static void test_acpi_rsdt_table(test_data *data) { AcpiRsdtDescriptorRev1 *rsdt_table = &data->rsdt_table; - uint32_t addr = data->rsdp_table.rsdt_physical_address; + uint32_t addr = le32_to_cpu(data->rsdp_table.rsdt_physical_address); uint32_t *tables; int tables_nr; uint8_t checksum; + uint32_t rsdt_table_length; /* read the header */ ACPI_READ_TABLE_HEADER(rsdt_table, addr); ACPI_ASSERT_CMP(rsdt_table->signature, "RSDT"); + rsdt_table_length = le32_to_cpu(rsdt_table->length); + /* compute the table entries in rsdt */ - tables_nr = (rsdt_table->length - sizeof(AcpiRsdtDescriptorRev1)) / + tables_nr = (rsdt_table_length - sizeof(AcpiRsdtDescriptorRev1)) / sizeof(uint32_t); g_assert(tables_nr > 0); @@ -114,7 +117,7 @@ static void test_acpi_rsdt_table(test_data *data) tables = g_new0(uint32_t, tables_nr); ACPI_READ_ARRAY_PTR(tables, tables_nr, addr); - checksum = acpi_calc_checksum((uint8_t *)rsdt_table, rsdt_table->length) + + checksum = acpi_calc_checksum((uint8_t *)rsdt_table, rsdt_table_length) + acpi_calc_checksum((uint8_t *)tables, tables_nr * sizeof(uint32_t)); g_assert(!checksum); @@ -130,7 +133,7 @@ static void test_acpi_fadt_table(test_data *data) uint32_t addr; /* FADT table comes first */ - addr = data->rsdt_tables_addr[0]; + addr = le32_to_cpu(data->rsdt_tables_addr[0]); ACPI_READ_TABLE_HEADER(fadt_table, addr); ACPI_READ_FIELD(fadt_table->firmware_ctrl, addr); @@ -187,13 +190,14 @@ static void test_acpi_fadt_table(test_data *data) ACPI_READ_GENERIC_ADDRESS(fadt_table->xgpe1_block, addr); ACPI_ASSERT_CMP(fadt_table->signature, "FACP"); - g_assert(!acpi_calc_checksum((uint8_t *)fadt_table, fadt_table->length)); + g_assert(!acpi_calc_checksum((uint8_t *)fadt_table, + le32_to_cpu(fadt_table->length))); } static void test_acpi_facs_table(test_data *data) { AcpiFacsDescriptorRev1 *facs_table = &data->facs_table; - uint32_t addr = data->fadt_table.firmware_ctrl; + uint32_t addr = le32_to_cpu(data->fadt_table.firmware_ctrl); ACPI_READ_FIELD(facs_table->signature, addr); ACPI_READ_FIELD(facs_table->length, addr); @@ -212,7 +216,8 @@ static void test_dst_table(AcpiSdtTable *sdt_table, uint32_t addr) ACPI_READ_TABLE_HEADER(&sdt_table->header, addr); - sdt_table->aml_len = sdt_table->header.length - sizeof(AcpiTableHeader); + sdt_table->aml_len = le32_to_cpu(sdt_table->header.length) + - sizeof(AcpiTableHeader); sdt_table->aml = g_malloc0(sdt_table->aml_len); ACPI_READ_ARRAY_PTR(sdt_table->aml, sdt_table->aml_len, addr); @@ -226,7 +231,7 @@ static void test_dst_table(AcpiSdtTable *sdt_table, uint32_t addr) static void test_acpi_dsdt_table(test_data *data) { AcpiSdtTable dsdt_table; - uint32_t addr = data->fadt_table.dsdt; + uint32_t addr = le32_to_cpu(data->fadt_table.dsdt); memset(&dsdt_table, 0, sizeof(dsdt_table)); data->tables = g_array_new(false, true, sizeof(AcpiSdtTable)); @@ -245,9 +250,10 @@ static void test_acpi_tables(test_data *data) for (i = 0; i < tables_nr; i++) { AcpiSdtTable ssdt_table; + uint32_t addr; memset(&ssdt_table, 0, sizeof(ssdt_table)); - uint32_t addr = data->rsdt_tables_addr[i + 1]; /* fadt is first */ + addr = le32_to_cpu(data->rsdt_tables_addr[i + 1]); /* fadt is first */ test_dst_table(&ssdt_table, addr); g_array_append_val(data->tables, ssdt_table); } @@ -268,9 +274,8 @@ static void dump_aml_files(test_data *data, bool rebuild) g_assert(sdt->aml); if (rebuild) { - uint32_t signature = cpu_to_le32(sdt->header.signature); aml_file = g_strdup_printf("%s/%s/%.4s%s", data_dir, data->machine, - (gchar *)&signature, ext); + (gchar *)&sdt->header.signature, ext); fd = g_open(aml_file, O_WRONLY|O_TRUNC|O_CREAT, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH); } else { @@ -381,7 +386,6 @@ static GArray *load_expected_aml(test_data *data) GArray *exp_tables = g_array_new(false, true, sizeof(AcpiSdtTable)); for (i = 0; i < data->tables->len; ++i) { AcpiSdtTable exp_sdt; - uint32_t signature; gchar *aml_file = NULL; const char *ext = data->variant ? data->variant : ""; @@ -390,11 +394,9 @@ static GArray *load_expected_aml(test_data *data) memset(&exp_sdt, 0, sizeof(exp_sdt)); exp_sdt.header.signature = sdt->header.signature; - signature = cpu_to_le32(sdt->header.signature); - try_again: aml_file = g_strdup_printf("%s/%s/%.4s%s", data_dir, data->machine, - (gchar *)&signature, ext); + (gchar *)&sdt->header.signature, ext); if (getenv("V")) { fprintf(stderr, "\nLooking for expected file '%s'\n", aml_file); } @@ -571,12 +573,12 @@ static void test_smbios_structs(test_data *data) { DECLARE_BITMAP(struct_bitmap, SMBIOS_MAX_TYPE+1) = { 0 }; struct smbios_21_entry_point *ep_table = &data->smbios_ep_table; - uint32_t addr = ep_table->structure_table_address; + uint32_t addr = le32_to_cpu(ep_table->structure_table_address); int i, len, max_len = 0; uint8_t type, prv, crt; /* walk the smbios tables */ - for (i = 0; i < ep_table->number_of_structures; i++) { + for (i = 0; i < le16_to_cpu(ep_table->number_of_structures); i++) { /* grab type and formatted area length from struct header */ type = readb(addr); @@ -608,9 +610,9 @@ static void test_smbios_structs(test_data *data) } /* total table length and max struct size must match entry point values */ - g_assert_cmpuint(ep_table->structure_table_length, ==, - addr - ep_table->structure_table_address); - g_assert_cmpuint(ep_table->max_structure_size, ==, max_len); + g_assert_cmpuint(le16_to_cpu(ep_table->structure_table_length), ==, + addr - le32_to_cpu(ep_table->structure_table_address)); + g_assert_cmpuint(le16_to_cpu(ep_table->max_structure_size), ==, max_len); /* required struct types must all be present */ for (i = 0; i < data->required_struct_types_len; i++) { diff --git a/tests/vmgenid-test.c b/tests/vmgenid-test.c index b6e7b3b086..5a86b40775 100644 --- a/tests/vmgenid-test.c +++ b/tests/vmgenid-test.c @@ -38,7 +38,7 @@ static uint32_t acpi_find_vgia(void) uint32_t rsdp_offset; uint32_t guid_offset = 0; AcpiRsdpDescriptor rsdp_table; - uint32_t rsdt; + uint32_t rsdt, rsdt_table_length; AcpiRsdtDescriptorRev1 rsdt_table; size_t tables_nr; uint32_t *tables; @@ -56,14 +56,15 @@ static uint32_t acpi_find_vgia(void) acpi_parse_rsdp_table(rsdp_offset, &rsdp_table); - rsdt = rsdp_table.rsdt_physical_address; + rsdt = le32_to_cpu(rsdp_table.rsdt_physical_address); /* read the header */ ACPI_READ_TABLE_HEADER(&rsdt_table, rsdt); ACPI_ASSERT_CMP(rsdt_table.signature, "RSDT"); + rsdt_table_length = le32_to_cpu(rsdt_table.length); /* compute the table entries in rsdt */ - g_assert_cmpint(rsdt_table.length, >, sizeof(AcpiRsdtDescriptorRev1)); - tables_nr = (rsdt_table.length - sizeof(AcpiRsdtDescriptorRev1)) / + g_assert_cmpint(rsdt_table_length, >, sizeof(AcpiRsdtDescriptorRev1)); + tables_nr = (rsdt_table_length - sizeof(AcpiRsdtDescriptorRev1)) / sizeof(uint32_t); /* get the addresses of the tables pointed by rsdt */ @@ -71,23 +72,24 @@ static uint32_t acpi_find_vgia(void) ACPI_READ_ARRAY_PTR(tables, tables_nr, rsdt); for (i = 0; i < tables_nr; i++) { - ACPI_READ_TABLE_HEADER(&ssdt_table, tables[i]); + uint32_t addr = le32_to_cpu(tables[i]); + ACPI_READ_TABLE_HEADER(&ssdt_table, addr); if (!strncmp((char *)ssdt_table.oem_table_id, "VMGENID", 7)) { /* the first entry in the table should be VGIA * That's all we need */ - ACPI_READ_FIELD(vgid_table.name_op, tables[i]); + ACPI_READ_FIELD(vgid_table.name_op, addr); g_assert(vgid_table.name_op == 0x08); /* name */ - ACPI_READ_ARRAY(vgid_table.vgia, tables[i]); + ACPI_READ_ARRAY(vgid_table.vgia, addr); g_assert(memcmp(vgid_table.vgia, "VGIA", 4) == 0); - ACPI_READ_FIELD(vgid_table.val_op, tables[i]); + ACPI_READ_FIELD(vgid_table.val_op, addr); g_assert(vgid_table.val_op == 0x0C); /* dword */ - ACPI_READ_FIELD(vgid_table.vgia_val, tables[i]); + ACPI_READ_FIELD(vgid_table.vgia_val, addr); /* The GUID is written at a fixed offset into the fw_cfg file * in order to implement the "OVMF SDT Header probe suppressor" * see docs/specs/vmgenid.txt for more details */ - guid_offset = vgid_table.vgia_val + VMGENID_GUID_OFFSET; + guid_offset = le32_to_cpu(vgid_table.vgia_val) + VMGENID_GUID_OFFSET; break; } }