qemu/hw/i386/pc_q35.c

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

680 lines
23 KiB
C
Raw Normal View History

/*
* Q35 chipset based pc system emulator
*
* Copyright (c) 2003-2004 Fabrice Bellard
* Copyright (c) 2009, 2010
* Isaku Yamahata <yamahata at valinux co jp>
* VA Linux Systems Japan K.K.
* Copyright (C) 2012 Jason Baron <jbaron@redhat.com>
*
* This is based on pc.c, but heavily modified.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include "qemu/osdep.h"
#include "qemu/units.h"
#include "hw/acpi/acpi.h"
#include "hw/char/parallel-isa.h"
#include "hw/loader.h"
#include "hw/i2c/smbus_eeprom.h"
#include "hw/rtc/mc146818rtc.h"
#include "sysemu/tcg.h"
#include "sysemu/kvm.h"
#include "hw/i386/kvm/clock.h"
#include "hw/pci-host/q35.h"
#include "hw/pci/pcie_port.h"
#include "hw/qdev-properties.h"
#include "hw/i386/x86.h"
#include "hw/i386/pc.h"
#include "hw/i386/amd_iommu.h"
#include "hw/i386/intel_iommu.h"
#include "hw/virtio/virtio-iommu.h"
#include "hw/display/ramfb.h"
#include "hw/ide/pci.h"
#include "hw/ide/ahci-pci.h"
#include "hw/intc/ioapic.h"
#include "hw/southbridge/ich9.h"
#include "hw/usb.h"
#include "hw/usb/hcd-uhci.h"
#include "qapi/error.h"
#include "qemu/error-report.h"
numa: equally distribute memory on nodes When there are more nodes than available memory to put the minimum allowed memory by node, all the memory is put on the last node. This is because we put (ram_size / nb_numa_nodes) & ~((1 << mc->numa_mem_align_shift) - 1); on each node, and in this case the value is 0. This is particularly true with pseries, as the memory must be aligned to 256MB. To avoid this problem, this patch uses an error diffusion algorithm [1] to distribute equally the memory on nodes. We introduce numa_auto_assign_ram() function in MachineClass to keep compatibility between machine type versions. The legacy function is used with pseries-2.9, pc-q35-2.9 and pc-i440fx-2.9 (and previous), the new one with all others. Example: qemu-system-ppc64 -S -nographic -nodefaults -monitor stdio -m 1G -smp 8 \ -numa node -numa node -numa node \ -numa node -numa node -numa node Before: (qemu) info numa 6 nodes node 0 cpus: 0 6 node 0 size: 0 MB node 1 cpus: 1 7 node 1 size: 0 MB node 2 cpus: 2 node 2 size: 0 MB node 3 cpus: 3 node 3 size: 0 MB node 4 cpus: 4 node 4 size: 0 MB node 5 cpus: 5 node 5 size: 1024 MB After: (qemu) info numa 6 nodes node 0 cpus: 0 6 node 0 size: 0 MB node 1 cpus: 1 7 node 1 size: 256 MB node 2 cpus: 2 node 2 size: 0 MB node 3 cpus: 3 node 3 size: 256 MB node 4 cpus: 4 node 4 size: 256 MB node 5 cpus: 5 node 5 size: 256 MB [1] https://en.wikipedia.org/wiki/Error_diffusion Signed-off-by: Laurent Vivier <lvivier@redhat.com> Message-Id: <20170502162955.1610-2-lvivier@redhat.com> Reviewed-by: Eduardo Habkost <ehabkost@redhat.com> [ehabkost: s/ram_size/size/ at numa_default_auto_assign_ram()] Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2017-05-02 19:29:55 +03:00
#include "sysemu/numa.h"
#include "hw/hyperv/vmbus-bridge.h"
hw/i386: Include "hw/mem/nvdimm.h" All this files use methods/definitions declared in the NVDIMM device header. Include it. This fixes (when modifying unrelated headers): hw/i386/acpi-build.c:2733:9: error: implicit declaration of function 'nvdimm_build_acpi' is invalid in C99 [-Werror,-Wimplicit-function-declaration] nvdimm_build_acpi(table_offsets, tables_blob, tables->linker, ^ hw/i386/pc.c:1996:61: error: use of undeclared identifier 'TYPE_NVDIMM' const bool is_nvdimm = object_dynamic_cast(OBJECT(dev), TYPE_NVDIMM); ^ hw/i386/pc.c:2032:55: error: use of undeclared identifier 'TYPE_NVDIMM' bool is_nvdimm = object_dynamic_cast(OBJECT(dev), TYPE_NVDIMM); ^ hw/i386/pc.c:2040:9: error: implicit declaration of function 'nvdimm_plug' is invalid in C99 [-Werror,-Wimplicit-function-declaration] nvdimm_plug(ms->nvdimms_state); ^ hw/i386/pc.c:2040:9: error: this function declaration is not a prototype [-Werror,-Wstrict-prototypes] nvdimm_plug(ms->nvdimms_state); ^ hw/i386/pc.c:2065:42: error: use of undeclared identifier 'TYPE_NVDIMM' if (object_dynamic_cast(OBJECT(dev), TYPE_NVDIMM)) { ^ hw/i386/pc_i440fx.c:307:9: error: implicit declaration of function 'nvdimm_init_acpi_state' is invalid in C99 [-Werror,-Wimplicit-function-declaration] nvdimm_init_acpi_state(machine->nvdimms_state, system_io, ^ hw/i386/pc_q35.c:332:9: error: implicit declaration of function 'nvdimm_init_acpi_state' is invalid in C99 [-Werror,-Wimplicit-function-declaration] nvdimm_init_acpi_state(machine->nvdimms_state, system_io, ^ Acked-by: John Snow <jsnow@redhat.com> Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com> Reviewed-by: Richard Henderson <richard.henderson@linaro.org> Message-Id: <20200228114649.12818-17-philmd@redhat.com> Signed-off-by: Laurent Vivier <laurent@vivier.eu>
2020-02-28 14:46:47 +03:00
#include "hw/mem/nvdimm.h"
#include "hw/i386/acpi-build.h"
#include "target/i386/cpu.h"
/* ICH9 AHCI has 6 ports */
#define MAX_SATA_PORTS 6
static GlobalProperty pc_q35_compat_defaults[] = {
{ TYPE_VIRTIO_IOMMU_PCI, "aw-bits", "39" },
};
static const size_t pc_q35_compat_defaults_len =
G_N_ELEMENTS(pc_q35_compat_defaults);
struct ehci_companions {
const char *name;
int func;
int port;
};
static const struct ehci_companions ich9_1d[] = {
{ .name = TYPE_ICH9_USB_UHCI(1), .func = 0, .port = 0 },
{ .name = TYPE_ICH9_USB_UHCI(2), .func = 1, .port = 2 },
{ .name = TYPE_ICH9_USB_UHCI(3), .func = 2, .port = 4 },
};
static const struct ehci_companions ich9_1a[] = {
{ .name = TYPE_ICH9_USB_UHCI(4), .func = 0, .port = 0 },
{ .name = TYPE_ICH9_USB_UHCI(5), .func = 1, .port = 2 },
{ .name = TYPE_ICH9_USB_UHCI(6), .func = 2, .port = 4 },
};
static int ehci_create_ich9_with_companions(PCIBus *bus, int slot)
{
const struct ehci_companions *comp;
PCIDevice *ehci, *uhci;
BusState *usbbus;
const char *name;
int i;
switch (slot) {
case 0x1d:
name = "ich9-usb-ehci1";
comp = ich9_1d;
break;
case 0x1a:
name = "ich9-usb-ehci2";
comp = ich9_1a;
break;
default:
return -1;
}
ehci = pci_new_multifunction(PCI_DEVFN(slot, 7), name);
pci: Convert uses of pci_create() etc. with Coccinelle Replace dev = pci_create(bus, type_name); ... qdev_init_nofail(dev); by dev = pci_new(type_name); ... pci_realize_and_unref(dev, bus, &error_fatal); and similarly for pci_create_multifunction(). Recent commit "qdev: New qdev_new(), qdev_realize(), etc." explains why. Coccinelle script: @@ expression dev, bus, expr; expression list args; @@ - dev = pci_create(bus, args); + dev = pci_new(args); ... when != dev = expr - qdev_init_nofail(&dev->qdev); + pci_realize_and_unref(dev, bus, &error_fatal); @@ expression dev, bus, expr; expression list args; expression d; @@ - dev = pci_create(bus, args); + dev = pci_new(args); ( d = &dev->qdev; | d = DEVICE(dev); ) ... when != dev = expr - qdev_init_nofail(d); + pci_realize_and_unref(dev, bus, &error_fatal); @@ expression dev, bus, expr; expression list args; @@ - dev = pci_create(bus, args); + dev = pci_new(args); ... when != dev = expr - qdev_init_nofail(DEVICE(dev)); + pci_realize_and_unref(dev, bus, &error_fatal); @@ expression dev, bus, expr; expression list args; @@ - dev = DEVICE(pci_create(bus, args)); + PCIDevice *pci_dev; // TODO move + pci_dev = pci_new(args); + dev = DEVICE(pci_dev); ... when != dev = expr - qdev_init_nofail(dev); + pci_realize_and_unref(pci_dev, bus, &error_fatal); @@ expression dev, bus, expr; expression list args; @@ - dev = pci_create_multifunction(bus, args); + dev = pci_new_multifunction(args); ... when != dev = expr - qdev_init_nofail(&dev->qdev); + pci_realize_and_unref(dev, bus, &error_fatal); @@ expression bus, expr; expression list args; identifier dev; @@ - PCIDevice *dev = pci_create_multifunction(bus, args); + PCIDevice *dev = pci_new_multifunction(args); ... when != dev = expr - qdev_init_nofail(&dev->qdev); + pci_realize_and_unref(dev, bus, &error_fatal); @@ expression dev, bus, expr; expression list args; @@ - dev = pci_create_multifunction(bus, args); + dev = pci_new_multifunction(args); ... when != dev = expr - qdev_init_nofail(DEVICE(dev)); + pci_realize_and_unref(dev, bus, &error_fatal); Missing #include "qapi/error.h" added manually, whitespace changes minimized manually, @pci_dev declarations moved manually. Cc: Michael S. Tsirkin <mst@redhat.com> Cc: Marcel Apfelbaum <marcel.apfelbaum@gmail.com> Signed-off-by: Markus Armbruster <armbru@redhat.com> Reviewed-by: Paolo Bonzini <pbonzini@redhat.com> Message-Id: <20200610053247.1583243-16-armbru@redhat.com> Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
2020-06-10 08:32:04 +03:00
pci_realize_and_unref(ehci, bus, &error_fatal);
usbbus = QLIST_FIRST(&ehci->qdev.child_bus);
for (i = 0; i < 3; i++) {
uhci = pci_new_multifunction(PCI_DEVFN(slot, comp[i].func),
pci: Convert uses of pci_create() etc. with Coccinelle Replace dev = pci_create(bus, type_name); ... qdev_init_nofail(dev); by dev = pci_new(type_name); ... pci_realize_and_unref(dev, bus, &error_fatal); and similarly for pci_create_multifunction(). Recent commit "qdev: New qdev_new(), qdev_realize(), etc." explains why. Coccinelle script: @@ expression dev, bus, expr; expression list args; @@ - dev = pci_create(bus, args); + dev = pci_new(args); ... when != dev = expr - qdev_init_nofail(&dev->qdev); + pci_realize_and_unref(dev, bus, &error_fatal); @@ expression dev, bus, expr; expression list args; expression d; @@ - dev = pci_create(bus, args); + dev = pci_new(args); ( d = &dev->qdev; | d = DEVICE(dev); ) ... when != dev = expr - qdev_init_nofail(d); + pci_realize_and_unref(dev, bus, &error_fatal); @@ expression dev, bus, expr; expression list args; @@ - dev = pci_create(bus, args); + dev = pci_new(args); ... when != dev = expr - qdev_init_nofail(DEVICE(dev)); + pci_realize_and_unref(dev, bus, &error_fatal); @@ expression dev, bus, expr; expression list args; @@ - dev = DEVICE(pci_create(bus, args)); + PCIDevice *pci_dev; // TODO move + pci_dev = pci_new(args); + dev = DEVICE(pci_dev); ... when != dev = expr - qdev_init_nofail(dev); + pci_realize_and_unref(pci_dev, bus, &error_fatal); @@ expression dev, bus, expr; expression list args; @@ - dev = pci_create_multifunction(bus, args); + dev = pci_new_multifunction(args); ... when != dev = expr - qdev_init_nofail(&dev->qdev); + pci_realize_and_unref(dev, bus, &error_fatal); @@ expression bus, expr; expression list args; identifier dev; @@ - PCIDevice *dev = pci_create_multifunction(bus, args); + PCIDevice *dev = pci_new_multifunction(args); ... when != dev = expr - qdev_init_nofail(&dev->qdev); + pci_realize_and_unref(dev, bus, &error_fatal); @@ expression dev, bus, expr; expression list args; @@ - dev = pci_create_multifunction(bus, args); + dev = pci_new_multifunction(args); ... when != dev = expr - qdev_init_nofail(DEVICE(dev)); + pci_realize_and_unref(dev, bus, &error_fatal); Missing #include "qapi/error.h" added manually, whitespace changes minimized manually, @pci_dev declarations moved manually. Cc: Michael S. Tsirkin <mst@redhat.com> Cc: Marcel Apfelbaum <marcel.apfelbaum@gmail.com> Signed-off-by: Markus Armbruster <armbru@redhat.com> Reviewed-by: Paolo Bonzini <pbonzini@redhat.com> Message-Id: <20200610053247.1583243-16-armbru@redhat.com> Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
2020-06-10 08:32:04 +03:00
comp[i].name);
qdev_prop_set_string(&uhci->qdev, "masterbus", usbbus->name);
qdev_prop_set_uint32(&uhci->qdev, "firstport", comp[i].port);
pci: Convert uses of pci_create() etc. with Coccinelle Replace dev = pci_create(bus, type_name); ... qdev_init_nofail(dev); by dev = pci_new(type_name); ... pci_realize_and_unref(dev, bus, &error_fatal); and similarly for pci_create_multifunction(). Recent commit "qdev: New qdev_new(), qdev_realize(), etc." explains why. Coccinelle script: @@ expression dev, bus, expr; expression list args; @@ - dev = pci_create(bus, args); + dev = pci_new(args); ... when != dev = expr - qdev_init_nofail(&dev->qdev); + pci_realize_and_unref(dev, bus, &error_fatal); @@ expression dev, bus, expr; expression list args; expression d; @@ - dev = pci_create(bus, args); + dev = pci_new(args); ( d = &dev->qdev; | d = DEVICE(dev); ) ... when != dev = expr - qdev_init_nofail(d); + pci_realize_and_unref(dev, bus, &error_fatal); @@ expression dev, bus, expr; expression list args; @@ - dev = pci_create(bus, args); + dev = pci_new(args); ... when != dev = expr - qdev_init_nofail(DEVICE(dev)); + pci_realize_and_unref(dev, bus, &error_fatal); @@ expression dev, bus, expr; expression list args; @@ - dev = DEVICE(pci_create(bus, args)); + PCIDevice *pci_dev; // TODO move + pci_dev = pci_new(args); + dev = DEVICE(pci_dev); ... when != dev = expr - qdev_init_nofail(dev); + pci_realize_and_unref(pci_dev, bus, &error_fatal); @@ expression dev, bus, expr; expression list args; @@ - dev = pci_create_multifunction(bus, args); + dev = pci_new_multifunction(args); ... when != dev = expr - qdev_init_nofail(&dev->qdev); + pci_realize_and_unref(dev, bus, &error_fatal); @@ expression bus, expr; expression list args; identifier dev; @@ - PCIDevice *dev = pci_create_multifunction(bus, args); + PCIDevice *dev = pci_new_multifunction(args); ... when != dev = expr - qdev_init_nofail(&dev->qdev); + pci_realize_and_unref(dev, bus, &error_fatal); @@ expression dev, bus, expr; expression list args; @@ - dev = pci_create_multifunction(bus, args); + dev = pci_new_multifunction(args); ... when != dev = expr - qdev_init_nofail(DEVICE(dev)); + pci_realize_and_unref(dev, bus, &error_fatal); Missing #include "qapi/error.h" added manually, whitespace changes minimized manually, @pci_dev declarations moved manually. Cc: Michael S. Tsirkin <mst@redhat.com> Cc: Marcel Apfelbaum <marcel.apfelbaum@gmail.com> Signed-off-by: Markus Armbruster <armbru@redhat.com> Reviewed-by: Paolo Bonzini <pbonzini@redhat.com> Message-Id: <20200610053247.1583243-16-armbru@redhat.com> Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
2020-06-10 08:32:04 +03:00
pci_realize_and_unref(uhci, bus, &error_fatal);
}
return 0;
}
/* PC hardware initialisation */
static void pc_q35_init(MachineState *machine)
{
PCMachineState *pcms = PC_MACHINE(machine);
PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms);
X86MachineState *x86ms = X86_MACHINE(machine);
Object *phb;
PCIDevice *lpc;
DeviceState *lpc_dev;
MemoryRegion *system_memory = get_system_memory();
MemoryRegion *system_io = get_system_io();
MemoryRegion *pci_memory = g_new(MemoryRegion, 1);
GSIState *gsi_state;
ISABus *isa_bus;
int i;
ram_addr_t lowmem;
DriveInfo *hd[MAX_SATA_PORTS];
MachineClass *mc = MACHINE_GET_CLASS(machine);
bool acpi_pcihp;
bool keep_pci_slot_hpc;
uint64_t pci_hole64_size = 0;
assert(pcmc->pci_enabled);
/* Check whether RAM fits below 4G (leaving 1/2 GByte for IO memory
* and 256 Mbytes for PCI Express Enhanced Configuration Access Mapping
* also known as MMCFG).
* If it doesn't, we need to split it in chunks below and above 4G.
* In any case, try to make sure that guest addresses aligned at
* 1G boundaries get mapped to host addresses aligned at 1G boundaries.
*/
if (machine->ram_size >= 0xb0000000) {
lowmem = 0x80000000;
} else {
lowmem = 0xb0000000;
}
/* Handle the machine opt max-ram-below-4g. It is basically doing
* min(qemu limit, user limit).
*/
if (!pcms->max_ram_below_4g) {
pcms->max_ram_below_4g = 4 * GiB;
}
if (lowmem > pcms->max_ram_below_4g) {
lowmem = pcms->max_ram_below_4g;
if (machine->ram_size - lowmem > lowmem &&
lowmem & (1 * GiB - 1)) {
warn_report("There is possibly poor performance as the ram size "
" (0x%" PRIx64 ") is more then twice the size of"
" max-ram-below-4g (%"PRIu64") and"
" max-ram-below-4g is not a multiple of 1G.",
(uint64_t)machine->ram_size, pcms->max_ram_below_4g);
}
}
if (machine->ram_size >= lowmem) {
x86ms->above_4g_mem_size = machine->ram_size - lowmem;
x86ms->below_4g_mem_size = lowmem;
} else {
x86ms->above_4g_mem_size = 0;
x86ms->below_4g_mem_size = machine->ram_size;
}
pc_machine_init_sgx_epc(pcms);
x86_cpus_init(x86ms, pcmc->default_cpu_version);
if (kvm_enabled()) {
kvmclock_create(pcmc->kvmclock_create_always);
}
/* create pci host bus */
phb = OBJECT(qdev_new(TYPE_Q35_HOST_DEVICE));
pci_hole64_size = object_property_get_uint(phb,
PCI_HOST_PROP_PCI_HOLE64_SIZE,
&error_abort);
/* allocate ram and load rom/bios */
memory_region_init(pci_memory, NULL, "pci", UINT64_MAX);
pc_memory_init(pcms, system_memory, pci_memory, pci_hole64_size);
object_property_add_child(OBJECT(machine), "q35", phb);
object_property_set_link(phb, PCI_HOST_PROP_RAM_MEM,
OBJECT(machine->ram), NULL);
object_property_set_link(phb, PCI_HOST_PROP_PCI_MEM,
qom: Put name parameter before value / visitor parameter The object_property_set_FOO() setters take property name and value in an unusual order: void object_property_set_FOO(Object *obj, FOO_TYPE value, const char *name, Error **errp) Having to pass value before name feels grating. Swap them. Same for object_property_set(), object_property_get(), and object_property_parse(). Convert callers with this Coccinelle script: @@ identifier fun = { object_property_get, object_property_parse, object_property_set_str, object_property_set_link, object_property_set_bool, object_property_set_int, object_property_set_uint, object_property_set, object_property_set_qobject }; expression obj, v, name, errp; @@ - fun(obj, v, name, errp) + fun(obj, name, v, errp) Chokes on hw/arm/musicpal.c's lcd_refresh() with the unhelpful error message "no position information". Convert that one manually. Fails to convert hw/arm/armsse.c, because Coccinelle gets confused by ARMSSE being used both as typedef and function-like macro there. Convert manually. Fails to convert hw/rx/rx-gdbsim.c, because Coccinelle gets confused by RXCPU being used both as typedef and function-like macro there. Convert manually. The other files using RXCPU that way don't need conversion. Signed-off-by: Markus Armbruster <armbru@redhat.com> Reviewed-by: Eric Blake <eblake@redhat.com> Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> Message-Id: <20200707160613.848843-27-armbru@redhat.com> [Straightforwad conflict with commit 2336172d9b "audio: set default value for pcspk.iobase property" resolved]
2020-07-07 19:05:54 +03:00
OBJECT(pci_memory), NULL);
object_property_set_link(phb, PCI_HOST_PROP_SYSTEM_MEM,
OBJECT(system_memory), NULL);
object_property_set_link(phb, PCI_HOST_PROP_IO_MEM,
qom: Put name parameter before value / visitor parameter The object_property_set_FOO() setters take property name and value in an unusual order: void object_property_set_FOO(Object *obj, FOO_TYPE value, const char *name, Error **errp) Having to pass value before name feels grating. Swap them. Same for object_property_set(), object_property_get(), and object_property_parse(). Convert callers with this Coccinelle script: @@ identifier fun = { object_property_get, object_property_parse, object_property_set_str, object_property_set_link, object_property_set_bool, object_property_set_int, object_property_set_uint, object_property_set, object_property_set_qobject }; expression obj, v, name, errp; @@ - fun(obj, v, name, errp) + fun(obj, name, v, errp) Chokes on hw/arm/musicpal.c's lcd_refresh() with the unhelpful error message "no position information". Convert that one manually. Fails to convert hw/arm/armsse.c, because Coccinelle gets confused by ARMSSE being used both as typedef and function-like macro there. Convert manually. Fails to convert hw/rx/rx-gdbsim.c, because Coccinelle gets confused by RXCPU being used both as typedef and function-like macro there. Convert manually. The other files using RXCPU that way don't need conversion. Signed-off-by: Markus Armbruster <armbru@redhat.com> Reviewed-by: Eric Blake <eblake@redhat.com> Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> Message-Id: <20200707160613.848843-27-armbru@redhat.com> [Straightforwad conflict with commit 2336172d9b "audio: set default value for pcspk.iobase property" resolved]
2020-07-07 19:05:54 +03:00
OBJECT(system_io), NULL);
object_property_set_int(phb, PCI_HOST_BELOW_4G_MEM_SIZE,
qom: Put name parameter before value / visitor parameter The object_property_set_FOO() setters take property name and value in an unusual order: void object_property_set_FOO(Object *obj, FOO_TYPE value, const char *name, Error **errp) Having to pass value before name feels grating. Swap them. Same for object_property_set(), object_property_get(), and object_property_parse(). Convert callers with this Coccinelle script: @@ identifier fun = { object_property_get, object_property_parse, object_property_set_str, object_property_set_link, object_property_set_bool, object_property_set_int, object_property_set_uint, object_property_set, object_property_set_qobject }; expression obj, v, name, errp; @@ - fun(obj, v, name, errp) + fun(obj, name, v, errp) Chokes on hw/arm/musicpal.c's lcd_refresh() with the unhelpful error message "no position information". Convert that one manually. Fails to convert hw/arm/armsse.c, because Coccinelle gets confused by ARMSSE being used both as typedef and function-like macro there. Convert manually. Fails to convert hw/rx/rx-gdbsim.c, because Coccinelle gets confused by RXCPU being used both as typedef and function-like macro there. Convert manually. The other files using RXCPU that way don't need conversion. Signed-off-by: Markus Armbruster <armbru@redhat.com> Reviewed-by: Eric Blake <eblake@redhat.com> Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> Message-Id: <20200707160613.848843-27-armbru@redhat.com> [Straightforwad conflict with commit 2336172d9b "audio: set default value for pcspk.iobase property" resolved]
2020-07-07 19:05:54 +03:00
x86ms->below_4g_mem_size, NULL);
object_property_set_int(phb, PCI_HOST_ABOVE_4G_MEM_SIZE,
qom: Put name parameter before value / visitor parameter The object_property_set_FOO() setters take property name and value in an unusual order: void object_property_set_FOO(Object *obj, FOO_TYPE value, const char *name, Error **errp) Having to pass value before name feels grating. Swap them. Same for object_property_set(), object_property_get(), and object_property_parse(). Convert callers with this Coccinelle script: @@ identifier fun = { object_property_get, object_property_parse, object_property_set_str, object_property_set_link, object_property_set_bool, object_property_set_int, object_property_set_uint, object_property_set, object_property_set_qobject }; expression obj, v, name, errp; @@ - fun(obj, v, name, errp) + fun(obj, name, v, errp) Chokes on hw/arm/musicpal.c's lcd_refresh() with the unhelpful error message "no position information". Convert that one manually. Fails to convert hw/arm/armsse.c, because Coccinelle gets confused by ARMSSE being used both as typedef and function-like macro there. Convert manually. Fails to convert hw/rx/rx-gdbsim.c, because Coccinelle gets confused by RXCPU being used both as typedef and function-like macro there. Convert manually. The other files using RXCPU that way don't need conversion. Signed-off-by: Markus Armbruster <armbru@redhat.com> Reviewed-by: Eric Blake <eblake@redhat.com> Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> Message-Id: <20200707160613.848843-27-armbru@redhat.com> [Straightforwad conflict with commit 2336172d9b "audio: set default value for pcspk.iobase property" resolved]
2020-07-07 19:05:54 +03:00
x86ms->above_4g_mem_size, NULL);
object_property_set_bool(phb, PCI_HOST_BYPASS_IOMMU,
pcms->default_bus_bypass_iommu, NULL);
object_property_set_bool(phb, PCI_HOST_PROP_SMM_RANGES,
x86_machine_is_smm_enabled(x86ms), NULL);
sysbus_realize_and_unref(SYS_BUS_DEVICE(phb), &error_fatal);
/* pci */
pcms->pcibus = PCI_BUS(qdev_get_child_bus(DEVICE(phb), "pcie.0"));
/* irq lines */
gsi_state = pc_gsi_create(&x86ms->gsi, true);
/* create ISA bus */
lpc = pci_new_multifunction(PCI_DEVFN(ICH9_LPC_DEV, ICH9_LPC_FUNC),
TYPE_ICH9_LPC_DEVICE);
lpc_dev = DEVICE(lpc);
qdev_prop_set_bit(lpc_dev, "smm-enabled",
x86_machine_is_smm_enabled(x86ms));
for (i = 0; i < IOAPIC_NUM_PINS; i++) {
qdev_connect_gpio_out_named(lpc_dev, ICH9_GPIO_GSI, i, x86ms->gsi[i]);
}
pci_realize_and_unref(lpc, pcms->pcibus, &error_fatal);
x86ms->rtc = ISA_DEVICE(object_resolve_path_component(OBJECT(lpc), "rtc"));
object_property_add_link(OBJECT(machine), PC_MACHINE_ACPI_DEVICE_PROP,
TYPE_HOTPLUG_HANDLER,
(Object **)&x86ms->acpi_dev,
object_property_allow_set_link,
qom: Drop parameter @errp of object_property_add() & friends The only way object_property_add() can fail is when a property with the same name already exists. Since our property names are all hardcoded, failure is a programming error, and the appropriate way to handle it is passing &error_abort. Same for its variants, except for object_property_add_child(), which additionally fails when the child already has a parent. Parentage is also under program control, so this is a programming error, too. We have a bit over 500 callers. Almost half of them pass &error_abort, slightly fewer ignore errors, one test case handles errors, and the remaining few callers pass them to their own callers. The previous few commits demonstrated once again that ignoring programming errors is a bad idea. Of the few ones that pass on errors, several violate the Error API. The Error ** argument must be NULL, &error_abort, &error_fatal, or a pointer to a variable containing NULL. Passing an argument of the latter kind twice without clearing it in between is wrong: if the first call sets an error, it no longer points to NULL for the second call. ich9_pm_add_properties(), sparc32_ledma_realize(), sparc32_dma_realize(), xilinx_axidma_realize(), xilinx_enet_realize() are wrong that way. When the one appropriate choice of argument is &error_abort, letting users pick the argument is a bad idea. Drop parameter @errp and assert the preconditions instead. There's one exception to "duplicate property name is a programming error": the way object_property_add() implements the magic (and undocumented) "automatic arrayification". Don't drop @errp there. Instead, rename object_property_add() to object_property_try_add(), and add the obvious wrapper object_property_add(). Signed-off-by: Markus Armbruster <armbru@redhat.com> Reviewed-by: Eric Blake <eblake@redhat.com> Reviewed-by: Paolo Bonzini <pbonzini@redhat.com> Message-Id: <20200505152926.18877-15-armbru@redhat.com> [Two semantic rebase conflicts resolved]
2020-05-05 18:29:22 +03:00
OBJ_PROP_LINK_STRONG);
qom: Put name parameter before value / visitor parameter The object_property_set_FOO() setters take property name and value in an unusual order: void object_property_set_FOO(Object *obj, FOO_TYPE value, const char *name, Error **errp) Having to pass value before name feels grating. Swap them. Same for object_property_set(), object_property_get(), and object_property_parse(). Convert callers with this Coccinelle script: @@ identifier fun = { object_property_get, object_property_parse, object_property_set_str, object_property_set_link, object_property_set_bool, object_property_set_int, object_property_set_uint, object_property_set, object_property_set_qobject }; expression obj, v, name, errp; @@ - fun(obj, v, name, errp) + fun(obj, name, v, errp) Chokes on hw/arm/musicpal.c's lcd_refresh() with the unhelpful error message "no position information". Convert that one manually. Fails to convert hw/arm/armsse.c, because Coccinelle gets confused by ARMSSE being used both as typedef and function-like macro there. Convert manually. Fails to convert hw/rx/rx-gdbsim.c, because Coccinelle gets confused by RXCPU being used both as typedef and function-like macro there. Convert manually. The other files using RXCPU that way don't need conversion. Signed-off-by: Markus Armbruster <armbru@redhat.com> Reviewed-by: Eric Blake <eblake@redhat.com> Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> Message-Id: <20200707160613.848843-27-armbru@redhat.com> [Straightforwad conflict with commit 2336172d9b "audio: set default value for pcspk.iobase property" resolved]
2020-07-07 19:05:54 +03:00
object_property_set_link(OBJECT(machine), PC_MACHINE_ACPI_DEVICE_PROP,
OBJECT(lpc), &error_abort);
acpi_pcihp = object_property_get_bool(OBJECT(lpc),
ACPI_PM_PROP_ACPI_PCIHP_BRIDGE,
NULL);
keep_pci_slot_hpc = object_property_get_bool(OBJECT(lpc),
"x-keep-pci-slot-hpc",
NULL);
if (!keep_pci_slot_hpc && acpi_pcihp) {
object_register_sugar_prop(TYPE_PCIE_SLOT,
"x-do-not-expose-native-hotplug-cap",
"true", true);
}
isa_bus = ISA_BUS(qdev_get_child_bus(lpc_dev, "isa.0"));
if (x86ms->pic == ON_OFF_AUTO_ON || x86ms->pic == ON_OFF_AUTO_AUTO) {
pc_i8259_create(isa_bus, gsi_state->i8259_irq);
}
ioapic_init_gsi(gsi_state, OBJECT(phb));
if (tcg_enabled()) {
x86_register_ferr_irq(x86ms->gsi[13]);
}
/* init basic PC hardware */
pc_basic_device_init(pcms, isa_bus, x86ms->gsi, x86ms->rtc, !mc->no_floppy,
0xff0104);
if (pcms->sata_enabled) {
PCIDevice *pdev;
AHCIPCIState *ich9;
/* ahci and SATA device, for q35 1 ahci controller is built-in */
pdev = pci_create_simple_multifunction(pcms->pcibus,
PCI_DEVFN(ICH9_SATA1_DEV,
ICH9_SATA1_FUNC),
"ich9-ahci");
ich9 = ICH9_AHCI(pdev);
pcms->idebus[0] = qdev_get_child_bus(DEVICE(pdev), "ide.0");
pcms->idebus[1] = qdev_get_child_bus(DEVICE(pdev), "ide.1");
g_assert(MAX_SATA_PORTS == ich9->ahci.ports);
ide_drive_get(hd, ich9->ahci.ports);
ahci_ide_create_devs(&ich9->ahci, hd);
}
if (machine_usb(machine)) {
/* Should we create 6 UHCI according to ich9 spec? */
ehci_create_ich9_with_companions(pcms->pcibus, 0x1d);
}
if (pcms->smbus_enabled) {
PCIDevice *smb;
/* TODO: Populate SPD eeprom data. */
smb = pci_create_simple_multifunction(pcms->pcibus,
PCI_DEVFN(ICH9_SMB_DEV,
ICH9_SMB_FUNC),
TYPE_ICH9_SMB_DEVICE);
pcms->smbus = I2C_BUS(qdev_get_child_bus(DEVICE(smb), "i2c"));
smbus_eeprom_init(pcms->smbus, 8, NULL, 0);
}
/* the rest devices to which pci devfn is automatically assigned */
pc_vga_init(isa_bus, pcms->pcibus);
pc_nic_init(pcmc, isa_bus, pcms->pcibus);
if (machine->nvdimms_state->is_enabled) {
nvdimm_init_acpi_state(machine->nvdimms_state, system_io,
x86_nvdimm_acpi_dsmio,
x86ms->fw_cfg, OBJECT(pcms));
}
}
#define DEFINE_Q35_MACHINE(major, minor) \
DEFINE_PC_VER_MACHINE(pc_q35, "pc-q35", pc_q35_init, major, minor);
#define DEFINE_Q35_MACHINE_BUGFIX(major, minor, micro) \
DEFINE_PC_VER_MACHINE(pc_q35, "pc-q35", pc_q35_init, major, minor, micro);
pc: Kill the "use flash device for BIOS unless KVM" misfeature Use of a flash memory device for the BIOS was added in series "[PATCH v10 0/8] PC system flash support", commit 4732dca..1b89faf, v1.1. Flash vs. ROM is a guest-visible difference. Thus, flash use had to be suppressed for machine types pc-1.0 and older. This was accomplished by adding a dummy device "pc-sysfw" with property "rom_only": * Non-zero rom_only means "use ROM". Default for pc-1.0 and older. * Zero rom_only means "maybe use flash". Default for newer machines. Not only is the dummy device ugly, it was also retroactively added to the older machine types! Fortunately, it's not guest-visible (thus no immediate guest ABI breakage), and has no vmstate (thus no immediate migration breakage). Breakage occurs only if the user unwisely enables flash by setting rom_only to zero. Patch review FAIL #1. Why "maybe use flash"? Flash didn't (and still doesn't) work with KVM. Therefore, rom_only=0 really means "use flash, except when KVM is enabled, use ROM". This is a Bad Idea, because it makes enabling/ disabling KVM guest-visible. Patch review FAIL #2. Aside: it also precludes migrating between KVM on and off, but that's not possible for other reasons anyway. Fix as follows: 1. Change the meaning of rom_only=0 to mean "use flash, no ifs, buts, or maybes" for pc-i440fx-1.5 and pc-q35-1.5. Don't change anything for older machines (to remain bug-compatible). 2. Change the default value from 0 to 1 for these machines. Necessary, because 0 doesn't work with KVM. Once it does, we can flip the default back to 0. 3. Don't revert the retroactive addition of device "pc-sysfw" to older machine types. Seems not worth the trouble. 4. Add a TODO comment asking for device "pc-sysfw" to be dropped once flash works with KVM. Net effect is that you get a BIOS ROM again even when KVM is disabled, just like for machines predating the introduction of flash. To get flash instead, use "--global pc-sysfw.rom_only=0". Signed-off-by: Markus Armbruster <armbru@redhat.com> Message-id: 1365780303-26398-4-git-send-email-armbru@redhat.com Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>
2013-04-12 19:25:03 +04:00
static void pc_q35_machine_options(MachineClass *m)
{
PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
pcmc->pci_root_uid = 0;
pcmc->default_cpu_version = 1;
m->family = "pc_q35";
m->desc = "Standard PC (Q35 + ICH9, 2009)";
m->units_per_default_bus = 1;
m->default_machine_opts = "firmware=bios-256k.bin";
m->default_display = "std";
m->default_nic = "e1000e";
m->default_kernel_irqchip_split = false;
m->no_floppy = 1;
pc: q35: Bump max_cpus to 4096 vcpus Since commit f10a570b093e6 ("KVM: x86: Add CONFIG_KVM_MAX_NR_VCPUS to allow up to 4096 vCPUs") Linux kernel can support upto a maximum number of 4096 vcpus when MAXSMP is enabled in the kernel. At present, QEMU has been tested to correctly boot a linux guest with 4096 vcpus using the current edk2 upstream master branch that has the fixes corresponding to the following two PRs: https://github.com/tianocore/edk2/pull/5410 https://github.com/tianocore/edk2/pull/5418 The changes merged into edk2 with the above PRs will be in the upcoming 2024-05 release. With current seabios firmware, it boots fine with 4096 vcpus already. So bump up the value max_cpus to 4096 for q35 machines versions 9 and newer. Q35 machines versions 8.2 and older continue to support 1024 maximum vcpus as before for compatibility reasons. If KVM is not able to support the specified number of vcpus, QEMU would return the following error messages: $ ./qemu-system-x86_64 -cpu host -accel kvm -machine q35 -smp 1728 qemu-system-x86_64: -accel kvm: warning: Number of SMP cpus requested (1728) exceeds the recommended cpus supported by KVM (12) qemu-system-x86_64: -accel kvm: warning: Number of hotpluggable cpus requested (1728) exceeds the recommended cpus supported by KVM (12) Number of SMP cpus requested (1728) exceeds the maximum cpus supported by KVM (1024) Cc: Daniel P. Berrangé <berrange@redhat.com> Cc: Igor Mammedov <imammedo@redhat.com> Cc: Michael S. Tsirkin <mst@redhat.com> Cc: Julia Suvorova <jusual@redhat.com> Cc: kraxel@redhat.com Reviewed-by: Daniel P. Berrangé <berrange@redhat.com> Reviewed-by: Igor Mammedov <imammedo@redhat.com> Reviewed-by: Gerd Hoffmann <kraxel@redhat.com> Signed-off-by: Ani Sinha <anisinha@redhat.com> Message-Id: <20240228143351.3967-1-anisinha@redhat.com> Reviewed-by: Zhao Liu <zhao1.liu@intel.com> Reviewed-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2024-02-28 17:33:51 +03:00
m->max_cpus = 4096;
m->no_parallel = !module_object_class_by_name(TYPE_ISA_PARALLEL);
machine_class_allow_dynamic_sysbus_dev(m, TYPE_AMD_IOMMU_DEVICE);
machine_class_allow_dynamic_sysbus_dev(m, TYPE_INTEL_IOMMU_DEVICE);
machine_class_allow_dynamic_sysbus_dev(m, TYPE_RAMFB_DEVICE);
machine_class_allow_dynamic_sysbus_dev(m, TYPE_VMBUS_BRIDGE);
compat_props_add(m->compat_props,
pc_q35_compat_defaults, pc_q35_compat_defaults_len);
}
static void pc_q35_machine_9_2_options(MachineClass *m)
{
pc_q35_machine_options(m);
m->alias = "q35";
}
DEFINE_Q35_MACHINE(9, 2);
static void pc_q35_machine_9_1_options(MachineClass *m)
{
pc_q35_machine_9_2_options(m);
m->alias = NULL;
compat_props_add(m->compat_props, hw_compat_9_1, hw_compat_9_1_len);
compat_props_add(m->compat_props, pc_compat_9_1, pc_compat_9_1_len);
}
DEFINE_Q35_MACHINE(9, 1);
static void pc_q35_machine_9_0_options(MachineClass *m)
{
hw/i386/pc_sysfw: Alias rather than copy isa-bios region In the -bios case the "isa-bios" memory region is an alias to the BIOS mapped to the top of the 4G memory boundary. Do the same in the -pflash case, but only for new machine versions for migration compatibility. This establishes common behavior and makes pflash commands work in the "isa-bios" region which some real-world legacy bioses rely on. Note that in the sev_enabled() case, the "isa-bios" memory region in the -pflash case will now also point to encrypted memory, just like it already does in the -bios case. When running `info mtree` before and after this commit with `qemu-system-x86_64 -S -drive \ if=pflash,format=raw,readonly=on,file=/usr/share/qemu/bios-256k.bin` and running `diff -u before.mtree after.mtree` results in the following changes in the memory tree: --- before.mtree +++ after.mtree @@ -71,7 +71,7 @@ 0000000000000000-ffffffffffffffff (prio -1, i/o): pci 00000000000a0000-00000000000bffff (prio 1, i/o): vga-lowmem 00000000000c0000-00000000000dffff (prio 1, rom): pc.rom - 00000000000e0000-00000000000fffff (prio 1, rom): isa-bios + 00000000000e0000-00000000000fffff (prio 1, romd): alias isa-bios @system.flash0 0000000000020000-000000000003ffff 00000000000a0000-00000000000bffff (prio 1, i/o): alias smram-region @pci 00000000000a0000-00000000000bffff 00000000000c0000-00000000000c3fff (prio 1, i/o): alias pam-pci @pci 00000000000c0000-00000000000c3fff 00000000000c4000-00000000000c7fff (prio 1, i/o): alias pam-pci @pci 00000000000c4000-00000000000c7fff @@ -108,7 +108,7 @@ 0000000000000000-ffffffffffffffff (prio -1, i/o): pci 00000000000a0000-00000000000bffff (prio 1, i/o): vga-lowmem 00000000000c0000-00000000000dffff (prio 1, rom): pc.rom - 00000000000e0000-00000000000fffff (prio 1, rom): isa-bios + 00000000000e0000-00000000000fffff (prio 1, romd): alias isa-bios @system.flash0 0000000000020000-000000000003ffff 00000000000a0000-00000000000bffff (prio 1, i/o): alias smram-region @pci 00000000000a0000-00000000000bffff 00000000000c0000-00000000000c3fff (prio 1, i/o): alias pam-pci @pci 00000000000c0000-00000000000c3fff 00000000000c4000-00000000000c7fff (prio 1, i/o): alias pam-pci @pci 00000000000c4000-00000000000c7fff @@ -131,11 +131,14 @@ memory-region: pc.ram 0000000000000000-0000000007ffffff (prio 0, ram): pc.ram +memory-region: system.flash0 + 00000000fffc0000-00000000ffffffff (prio 0, romd): system.flash0 + memory-region: pci 0000000000000000-ffffffffffffffff (prio -1, i/o): pci 00000000000a0000-00000000000bffff (prio 1, i/o): vga-lowmem 00000000000c0000-00000000000dffff (prio 1, rom): pc.rom - 00000000000e0000-00000000000fffff (prio 1, rom): isa-bios + 00000000000e0000-00000000000fffff (prio 1, romd): alias isa-bios @system.flash0 0000000000020000-000000000003ffff memory-region: smram 00000000000a0000-00000000000bffff (prio 0, ram): alias smram-low @pc.ram 00000000000a0000-00000000000bffff Note that in both cases the "system" memory region contains the entry 00000000fffc0000-00000000ffffffff (prio 0, romd): system.flash0 but the "system.flash0" memory region only appears standalone when "isa-bios" is an alias. Signed-off-by: Bernhard Beschow <shentey@gmail.com> Message-ID: <20240508175507.22270-7-shentey@gmail.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2024-05-08 20:55:07 +03:00
PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
pc_q35_machine_9_1_options(m);
smbios: make memory device size configurable per Machine Currently QEMU describes initial[1] RAM* in SMBIOS as a series of virtual DIMMs (capped at 16Gb max) using type 17 structure entries. Which is fine for the most cases. However when starting guest with terabytes of RAM this leads to too many memory device structures, which eventually upsets linux kernel as it reserves only 64K for these entries and when that border is crossed out it runs out of reserved memory. Instead of partitioning initial RAM on 16Gb DIMMs, use maximum possible chunk size that SMBIOS spec allows[2]. Which lets encode RAM in lower 31 bits of 32bit field (which amounts upto 2047Tb per DIMM). As result initial RAM will generate only one type 17 structure until host/guest reach ability to use more RAM in the future. Compat changes: We can't unconditionally change chunk size as it will break QEMU<->guest ABI (and migration). Thus introduce a new machine class field that would let older versioned machines to use legacy 16Gb chunks, while new(er) machine type[s] use maximum possible chunk size. PS: While it might seem to be risky to rise max entry size this large (much beyond of what current physical RAM modules support), I'd not expect it causing much issues, modulo uncovering bugs in software running within guest. And those should be fixed on guest side to handle SMBIOS spec properly, especially if guest is expected to support so huge RAM configs. In worst case, QEMU can reduce chunk size later if we would care enough about introducing a workaround for some 'unfixable' guest OS, either by fixing up the next machine type or giving users a CLI option to customize it. 1) Initial RAM - is RAM configured with help '-m SIZE' CLI option/ implicitly defined by machine. It doesn't include memory configured with help of '-device' option[s] (pcdimm,nvdimm,...) 2) SMBIOS 3.1.0 7.18.5 Memory Device — Extended Size PS: * tested on 8Tb host with RHEL6 guest, which seems to parse type 17 SMBIOS table entries correctly (according to 'dmidecode'). Signed-off-by: Igor Mammedov <imammedo@redhat.com> Message-Id: <20240715122417.4059293-1-imammedo@redhat.com> Reviewed-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2024-07-15 15:24:17 +03:00
m->smbios_memory_device_size = 16 * GiB;
compat_props_add(m->compat_props, hw_compat_9_0, hw_compat_9_0_len);
compat_props_add(m->compat_props, pc_compat_9_0, pc_compat_9_0_len);
hw/i386/pc_sysfw: Alias rather than copy isa-bios region In the -bios case the "isa-bios" memory region is an alias to the BIOS mapped to the top of the 4G memory boundary. Do the same in the -pflash case, but only for new machine versions for migration compatibility. This establishes common behavior and makes pflash commands work in the "isa-bios" region which some real-world legacy bioses rely on. Note that in the sev_enabled() case, the "isa-bios" memory region in the -pflash case will now also point to encrypted memory, just like it already does in the -bios case. When running `info mtree` before and after this commit with `qemu-system-x86_64 -S -drive \ if=pflash,format=raw,readonly=on,file=/usr/share/qemu/bios-256k.bin` and running `diff -u before.mtree after.mtree` results in the following changes in the memory tree: --- before.mtree +++ after.mtree @@ -71,7 +71,7 @@ 0000000000000000-ffffffffffffffff (prio -1, i/o): pci 00000000000a0000-00000000000bffff (prio 1, i/o): vga-lowmem 00000000000c0000-00000000000dffff (prio 1, rom): pc.rom - 00000000000e0000-00000000000fffff (prio 1, rom): isa-bios + 00000000000e0000-00000000000fffff (prio 1, romd): alias isa-bios @system.flash0 0000000000020000-000000000003ffff 00000000000a0000-00000000000bffff (prio 1, i/o): alias smram-region @pci 00000000000a0000-00000000000bffff 00000000000c0000-00000000000c3fff (prio 1, i/o): alias pam-pci @pci 00000000000c0000-00000000000c3fff 00000000000c4000-00000000000c7fff (prio 1, i/o): alias pam-pci @pci 00000000000c4000-00000000000c7fff @@ -108,7 +108,7 @@ 0000000000000000-ffffffffffffffff (prio -1, i/o): pci 00000000000a0000-00000000000bffff (prio 1, i/o): vga-lowmem 00000000000c0000-00000000000dffff (prio 1, rom): pc.rom - 00000000000e0000-00000000000fffff (prio 1, rom): isa-bios + 00000000000e0000-00000000000fffff (prio 1, romd): alias isa-bios @system.flash0 0000000000020000-000000000003ffff 00000000000a0000-00000000000bffff (prio 1, i/o): alias smram-region @pci 00000000000a0000-00000000000bffff 00000000000c0000-00000000000c3fff (prio 1, i/o): alias pam-pci @pci 00000000000c0000-00000000000c3fff 00000000000c4000-00000000000c7fff (prio 1, i/o): alias pam-pci @pci 00000000000c4000-00000000000c7fff @@ -131,11 +131,14 @@ memory-region: pc.ram 0000000000000000-0000000007ffffff (prio 0, ram): pc.ram +memory-region: system.flash0 + 00000000fffc0000-00000000ffffffff (prio 0, romd): system.flash0 + memory-region: pci 0000000000000000-ffffffffffffffff (prio -1, i/o): pci 00000000000a0000-00000000000bffff (prio 1, i/o): vga-lowmem 00000000000c0000-00000000000dffff (prio 1, rom): pc.rom - 00000000000e0000-00000000000fffff (prio 1, rom): isa-bios + 00000000000e0000-00000000000fffff (prio 1, romd): alias isa-bios @system.flash0 0000000000020000-000000000003ffff memory-region: smram 00000000000a0000-00000000000bffff (prio 0, ram): alias smram-low @pc.ram 00000000000a0000-00000000000bffff Note that in both cases the "system" memory region contains the entry 00000000fffc0000-00000000ffffffff (prio 0, romd): system.flash0 but the "system.flash0" memory region only appears standalone when "isa-bios" is an alias. Signed-off-by: Bernhard Beschow <shentey@gmail.com> Message-ID: <20240508175507.22270-7-shentey@gmail.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2024-05-08 20:55:07 +03:00
pcmc->isa_bios_alias = false;
}
DEFINE_Q35_MACHINE(9, 0);
static void pc_q35_machine_8_2_options(MachineClass *m)
{
PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
pc_q35_machine_9_0_options(m);
pc: q35: Bump max_cpus to 4096 vcpus Since commit f10a570b093e6 ("KVM: x86: Add CONFIG_KVM_MAX_NR_VCPUS to allow up to 4096 vCPUs") Linux kernel can support upto a maximum number of 4096 vcpus when MAXSMP is enabled in the kernel. At present, QEMU has been tested to correctly boot a linux guest with 4096 vcpus using the current edk2 upstream master branch that has the fixes corresponding to the following two PRs: https://github.com/tianocore/edk2/pull/5410 https://github.com/tianocore/edk2/pull/5418 The changes merged into edk2 with the above PRs will be in the upcoming 2024-05 release. With current seabios firmware, it boots fine with 4096 vcpus already. So bump up the value max_cpus to 4096 for q35 machines versions 9 and newer. Q35 machines versions 8.2 and older continue to support 1024 maximum vcpus as before for compatibility reasons. If KVM is not able to support the specified number of vcpus, QEMU would return the following error messages: $ ./qemu-system-x86_64 -cpu host -accel kvm -machine q35 -smp 1728 qemu-system-x86_64: -accel kvm: warning: Number of SMP cpus requested (1728) exceeds the recommended cpus supported by KVM (12) qemu-system-x86_64: -accel kvm: warning: Number of hotpluggable cpus requested (1728) exceeds the recommended cpus supported by KVM (12) Number of SMP cpus requested (1728) exceeds the maximum cpus supported by KVM (1024) Cc: Daniel P. Berrangé <berrange@redhat.com> Cc: Igor Mammedov <imammedo@redhat.com> Cc: Michael S. Tsirkin <mst@redhat.com> Cc: Julia Suvorova <jusual@redhat.com> Cc: kraxel@redhat.com Reviewed-by: Daniel P. Berrangé <berrange@redhat.com> Reviewed-by: Igor Mammedov <imammedo@redhat.com> Reviewed-by: Gerd Hoffmann <kraxel@redhat.com> Signed-off-by: Ani Sinha <anisinha@redhat.com> Message-Id: <20240228143351.3967-1-anisinha@redhat.com> Reviewed-by: Zhao Liu <zhao1.liu@intel.com> Reviewed-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2024-02-28 17:33:51 +03:00
m->max_cpus = 1024;
compat_props_add(m->compat_props, hw_compat_8_2, hw_compat_8_2_len);
compat_props_add(m->compat_props, pc_compat_8_2, pc_compat_8_2_len);
/* For pc-q35-8.2 and 8.1, use SMBIOS 3.X by default */
pcmc->default_smbios_ep_type = SMBIOS_ENTRY_POINT_TYPE_64;
}
DEFINE_Q35_MACHINE(8, 2);
static void pc_q35_machine_8_1_options(MachineClass *m)
{
hw/i386/pc: improve physical address space bound check for 32-bit x86 systems 32-bit x86 systems do not have a reserved memory for hole64. On those 32-bit systems without PSE36 or PAE CPU features, hotplugging memory devices are not supported by QEMU as QEMU always places hotplugged memory above 4 GiB boundary which is beyond the physical address space of the processor. Linux guests also does not support memory hotplug on those systems. Please see Linux kernel commit b59d02ed08690 ("mm/memory_hotplug: disable the functionality for 32b") for more details. Therefore, the maximum limit of the guest physical address in the absence of additional memory devices effectively coincides with the end of "above 4G memory space" region for 32-bit x86 without PAE/PSE36. When users configure additional memory devices, after properly accounting for the additional device memory region to find the maximum value of the guest physical address, the address will be outside the range of the processor's physical address space. This change adds improvements to take above into consideration. For example, previously this was allowed: $ ./qemu-system-x86_64 -cpu pentium -m size=10G With this change now it is no longer allowed: $ ./qemu-system-x86_64 -cpu pentium -m size=10G qemu-system-x86_64: Address space limit 0xffffffff < 0x2bfffffff phys-bits too low (32) However, the following are allowed since on both cases physical address space of the processor is 36 bits: $ ./qemu-system-x86_64 -cpu pentium2 -m size=10G $ ./qemu-system-x86_64 -cpu pentium,pse36=on -m size=10G For 32-bit, without PAE/PSE36, hotplugging additional memory is no longer allowed. $ ./qemu-system-i386 -m size=1G,maxmem=3G,slots=2 qemu-system-i386: Address space limit 0xffffffff < 0x1ffffffff phys-bits too low (32) $ ./qemu-system-i386 -machine q35 -m size=1G,maxmem=3G,slots=2 qemu-system-i386: Address space limit 0xffffffff < 0x1ffffffff phys-bits too low (32) A new compatibility flag is introduced to make sure pc_max_used_gpa() keeps returning the old value for machines 8.1 and older. Therefore, the above is still allowed for older machine types in order to support compatibility. Hence, the following still works: $ ./qemu-system-i386 -machine pc-i440fx-8.1 -m size=1G,maxmem=3G,slots=2 $ ./qemu-system-i386 -machine pc-q35-8.1 -m size=1G,maxmem=3G,slots=2 Further, following is also allowed as with PSE36, the processor has 36-bit address space: $ ./qemu-system-i386 -cpu 486,pse36=on -m size=1G,maxmem=3G,slots=2 After calling CPUID with EAX=0x80000001, all AMD64 compliant processors have the longmode-capable-bit turned on in the extended feature flags (bit 29) in EDX. The absence of CPUID longmode can be used to differentiate between 32-bit and 64-bit processors and is the recommended approach. QEMU takes this approach elsewhere (for example, please see x86_cpu_realizefn()), With this change, pc_max_used_gpa() also uses the same method to detect 32-bit processors. Unit tests are modified to not run 32-bit x86 tests that use memory hotplug. Suggested-by: David Hildenbrand <david@redhat.com> Signed-off-by: Ani Sinha <anisinha@redhat.com> Reviewed-by: David Hildenbrand <david@redhat.com> Message-Id: <20230922160413.165702-1-anisinha@redhat.com> Reviewed-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2023-09-22 19:04:13 +03:00
PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
pc_q35_machine_8_2_options(m);
hw/i386/pc: improve physical address space bound check for 32-bit x86 systems 32-bit x86 systems do not have a reserved memory for hole64. On those 32-bit systems without PSE36 or PAE CPU features, hotplugging memory devices are not supported by QEMU as QEMU always places hotplugged memory above 4 GiB boundary which is beyond the physical address space of the processor. Linux guests also does not support memory hotplug on those systems. Please see Linux kernel commit b59d02ed08690 ("mm/memory_hotplug: disable the functionality for 32b") for more details. Therefore, the maximum limit of the guest physical address in the absence of additional memory devices effectively coincides with the end of "above 4G memory space" region for 32-bit x86 without PAE/PSE36. When users configure additional memory devices, after properly accounting for the additional device memory region to find the maximum value of the guest physical address, the address will be outside the range of the processor's physical address space. This change adds improvements to take above into consideration. For example, previously this was allowed: $ ./qemu-system-x86_64 -cpu pentium -m size=10G With this change now it is no longer allowed: $ ./qemu-system-x86_64 -cpu pentium -m size=10G qemu-system-x86_64: Address space limit 0xffffffff < 0x2bfffffff phys-bits too low (32) However, the following are allowed since on both cases physical address space of the processor is 36 bits: $ ./qemu-system-x86_64 -cpu pentium2 -m size=10G $ ./qemu-system-x86_64 -cpu pentium,pse36=on -m size=10G For 32-bit, without PAE/PSE36, hotplugging additional memory is no longer allowed. $ ./qemu-system-i386 -m size=1G,maxmem=3G,slots=2 qemu-system-i386: Address space limit 0xffffffff < 0x1ffffffff phys-bits too low (32) $ ./qemu-system-i386 -machine q35 -m size=1G,maxmem=3G,slots=2 qemu-system-i386: Address space limit 0xffffffff < 0x1ffffffff phys-bits too low (32) A new compatibility flag is introduced to make sure pc_max_used_gpa() keeps returning the old value for machines 8.1 and older. Therefore, the above is still allowed for older machine types in order to support compatibility. Hence, the following still works: $ ./qemu-system-i386 -machine pc-i440fx-8.1 -m size=1G,maxmem=3G,slots=2 $ ./qemu-system-i386 -machine pc-q35-8.1 -m size=1G,maxmem=3G,slots=2 Further, following is also allowed as with PSE36, the processor has 36-bit address space: $ ./qemu-system-i386 -cpu 486,pse36=on -m size=1G,maxmem=3G,slots=2 After calling CPUID with EAX=0x80000001, all AMD64 compliant processors have the longmode-capable-bit turned on in the extended feature flags (bit 29) in EDX. The absence of CPUID longmode can be used to differentiate between 32-bit and 64-bit processors and is the recommended approach. QEMU takes this approach elsewhere (for example, please see x86_cpu_realizefn()), With this change, pc_max_used_gpa() also uses the same method to detect 32-bit processors. Unit tests are modified to not run 32-bit x86 tests that use memory hotplug. Suggested-by: David Hildenbrand <david@redhat.com> Signed-off-by: Ani Sinha <anisinha@redhat.com> Reviewed-by: David Hildenbrand <david@redhat.com> Message-Id: <20230922160413.165702-1-anisinha@redhat.com> Reviewed-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2023-09-22 19:04:13 +03:00
pcmc->broken_32bit_mem_addr_check = true;
compat_props_add(m->compat_props, hw_compat_8_1, hw_compat_8_1_len);
compat_props_add(m->compat_props, pc_compat_8_1, pc_compat_8_1_len);
}
DEFINE_Q35_MACHINE(8, 1);
static void pc_q35_machine_8_0_options(MachineClass *m)
{
PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
pc_q35_machine_8_1_options(m);
compat_props_add(m->compat_props, hw_compat_8_0, hw_compat_8_0_len);
compat_props_add(m->compat_props, pc_compat_8_0, pc_compat_8_0_len);
/* For pc-q35-8.0 and older, use SMBIOS 2.8 by default */
pcmc->default_smbios_ep_type = SMBIOS_ENTRY_POINT_TYPE_32;
m->max_cpus = 288;
}
DEFINE_Q35_MACHINE(8, 0);
static void pc_q35_machine_7_2_options(MachineClass *m)
{
pc_q35_machine_8_0_options(m);
compat_props_add(m->compat_props, hw_compat_7_2, hw_compat_7_2_len);
compat_props_add(m->compat_props, pc_compat_7_2, pc_compat_7_2_len);
}
DEFINE_Q35_MACHINE(7, 2);
static void pc_q35_machine_7_1_options(MachineClass *m)
{
pc_q35_machine_7_2_options(m);
compat_props_add(m->compat_props, hw_compat_7_1, hw_compat_7_1_len);
compat_props_add(m->compat_props, pc_compat_7_1, pc_compat_7_1_len);
}
DEFINE_Q35_MACHINE(7, 1);
static void pc_q35_machine_7_0_options(MachineClass *m)
{
PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
pc_q35_machine_7_1_options(m);
pcmc->enforce_amd_1tb_hole = false;
compat_props_add(m->compat_props, hw_compat_7_0, hw_compat_7_0_len);
compat_props_add(m->compat_props, pc_compat_7_0, pc_compat_7_0_len);
}
DEFINE_Q35_MACHINE(7, 0);
static void pc_q35_machine_6_2_options(MachineClass *m)
{
pc_q35_machine_7_0_options(m);
compat_props_add(m->compat_props, hw_compat_6_2, hw_compat_6_2_len);
compat_props_add(m->compat_props, pc_compat_6_2, pc_compat_6_2_len);
}
DEFINE_Q35_MACHINE(6, 2);
static void pc_q35_machine_6_1_options(MachineClass *m)
{
pc_q35_machine_6_2_options(m);
compat_props_add(m->compat_props, hw_compat_6_1, hw_compat_6_1_len);
compat_props_add(m->compat_props, pc_compat_6_1, pc_compat_6_1_len);
m->smp_props.prefer_sockets = true;
}
DEFINE_Q35_MACHINE(6, 1);
static void pc_q35_machine_6_0_options(MachineClass *m)
{
pc_q35_machine_6_1_options(m);
compat_props_add(m->compat_props, hw_compat_6_0, hw_compat_6_0_len);
compat_props_add(m->compat_props, pc_compat_6_0, pc_compat_6_0_len);
}
DEFINE_Q35_MACHINE(6, 0);
static void pc_q35_machine_5_2_options(MachineClass *m)
{
pc_q35_machine_6_0_options(m);
compat_props_add(m->compat_props, hw_compat_5_2, hw_compat_5_2_len);
compat_props_add(m->compat_props, pc_compat_5_2, pc_compat_5_2_len);
}
DEFINE_Q35_MACHINE(5, 2);
static void pc_q35_machine_5_1_options(MachineClass *m)
{
PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
pc_q35_machine_5_2_options(m);
compat_props_add(m->compat_props, hw_compat_5_1, hw_compat_5_1_len);
compat_props_add(m->compat_props, pc_compat_5_1, pc_compat_5_1_len);
pcmc->kvmclock_create_always = false;
pcmc->pci_root_uid = 1;
}
DEFINE_Q35_MACHINE(5, 1);
static void pc_q35_machine_5_0_options(MachineClass *m)
{
pc_q35_machine_5_1_options(m);
m->numa_mem_supported = true;
compat_props_add(m->compat_props, hw_compat_5_0, hw_compat_5_0_len);
compat_props_add(m->compat_props, pc_compat_5_0, pc_compat_5_0_len);
m->auto_enable_numa_with_memdev = false;
}
DEFINE_Q35_MACHINE(5, 0);
static void pc_q35_machine_4_2_options(MachineClass *m)
{
pc_q35_machine_5_0_options(m);
compat_props_add(m->compat_props, hw_compat_4_2, hw_compat_4_2_len);
compat_props_add(m->compat_props, pc_compat_4_2, pc_compat_4_2_len);
}
DEFINE_Q35_MACHINE(4, 2);
static void pc_q35_machine_4_1_options(MachineClass *m)
{
pc_q35_machine_4_2_options(m);
compat_props_add(m->compat_props, hw_compat_4_1, hw_compat_4_1_len);
compat_props_add(m->compat_props, pc_compat_4_1, pc_compat_4_1_len);
}
DEFINE_Q35_MACHINE(4, 1);
static void pc_q35_machine_4_0_1_options(MachineClass *m)
{
PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
pc_q35_machine_4_1_options(m);
pcmc->default_cpu_version = CPU_VERSION_LEGACY;
hw: Nuke hw_compat_4_0_1 and pc_compat_4_0_1 Commit c87759ce876a fixed a regression affecting pc-q35 machines by introducing a new pc-q35-4.0.1 machine version to be used instead of pc-q35-4.0. The only purpose was to revert the default behaviour of not using split irqchip, but the change also introduced the usual hw_compat and pc_compat bits, and wired them for pc-q35 only. This raises questions when it comes to add new compat properties for 4.0* machine versions of any architecture. Where to add them ? In 4.0, 4.0.1 or both ? Error prone. Another possibility would be to teach all other architectures about 4.0.1. This solution isn't satisfying, especially since this is a pc-q35 specific issue. It turns out that the split irqchip default is handled in the machine option function and doesn't involve compat lists at all. Drop all the 4.0.1 compat lists and use the 4.0 ones instead in the 4.0.1 machine option function. Move the compat props that were added to the 4.0.1 since c87759ce876a to 4.0. Even if only hw_compat_4_0_1 had an impact on other architectures, drop pc_compat_4_0_1 as well for consistency. Fixes: c87759ce876a "q35: Revert to kernel irqchip" Suggested-by: Dr. David Alan Gilbert <dgilbert@redhat.com> Signed-off-by: Greg Kurz <groug@kaod.org> Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com> Reviewed-by: Michael S. Tsirkin <mst@redhat.com> Message-Id: <156051774276.244890.8660277280145466396.stgit@bahia.lan> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2019-06-14 16:09:02 +03:00
/*
* This is the default machine for the 4.0-stable branch. It is basically
* a 4.0 that doesn't use split irqchip by default. It MUST hence apply the
* 4.0 compat props.
*/
compat_props_add(m->compat_props, hw_compat_4_0, hw_compat_4_0_len);
compat_props_add(m->compat_props, pc_compat_4_0, pc_compat_4_0_len);
}
DEFINE_Q35_MACHINE_BUGFIX(4, 0, 1);
static void pc_q35_machine_4_0_options(MachineClass *m)
{
pc_q35_machine_4_0_1_options(m);
m->default_kernel_irqchip_split = true;
hw: Nuke hw_compat_4_0_1 and pc_compat_4_0_1 Commit c87759ce876a fixed a regression affecting pc-q35 machines by introducing a new pc-q35-4.0.1 machine version to be used instead of pc-q35-4.0. The only purpose was to revert the default behaviour of not using split irqchip, but the change also introduced the usual hw_compat and pc_compat bits, and wired them for pc-q35 only. This raises questions when it comes to add new compat properties for 4.0* machine versions of any architecture. Where to add them ? In 4.0, 4.0.1 or both ? Error prone. Another possibility would be to teach all other architectures about 4.0.1. This solution isn't satisfying, especially since this is a pc-q35 specific issue. It turns out that the split irqchip default is handled in the machine option function and doesn't involve compat lists at all. Drop all the 4.0.1 compat lists and use the 4.0 ones instead in the 4.0.1 machine option function. Move the compat props that were added to the 4.0.1 since c87759ce876a to 4.0. Even if only hw_compat_4_0_1 had an impact on other architectures, drop pc_compat_4_0_1 as well for consistency. Fixes: c87759ce876a "q35: Revert to kernel irqchip" Suggested-by: Dr. David Alan Gilbert <dgilbert@redhat.com> Signed-off-by: Greg Kurz <groug@kaod.org> Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com> Reviewed-by: Michael S. Tsirkin <mst@redhat.com> Message-Id: <156051774276.244890.8660277280145466396.stgit@bahia.lan> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2019-06-14 16:09:02 +03:00
/* Compat props are applied by the 4.0.1 machine */
}
DEFINE_Q35_MACHINE(4, 0);
static void pc_q35_machine_3_1_options(MachineClass *m)
{
PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
pc_q35_machine_4_0_options(m);
m->default_kernel_irqchip_split = false;
m->smbus_no_migration_support = true;
pcmc->pvh_enabled = false;
compat_props_add(m->compat_props, hw_compat_3_1, hw_compat_3_1_len);
compat_props_add(m->compat_props, pc_compat_3_1, pc_compat_3_1_len);
}
DEFINE_Q35_MACHINE(3, 1);
static void pc_q35_machine_3_0_options(MachineClass *m)
{
pc_q35_machine_3_1_options(m);
compat_props_add(m->compat_props, hw_compat_3_0, hw_compat_3_0_len);
compat_props_add(m->compat_props, pc_compat_3_0, pc_compat_3_0_len);
}
DEFINE_Q35_MACHINE(3, 0);
static void pc_q35_machine_2_12_options(MachineClass *m)
{
pc_q35_machine_3_0_options(m);
compat_props_add(m->compat_props, hw_compat_2_12, hw_compat_2_12_len);
compat_props_add(m->compat_props, pc_compat_2_12, pc_compat_2_12_len);
}
DEFINE_Q35_MACHINE(2, 12);
static void pc_q35_machine_2_11_options(MachineClass *m)
{
pc_q35_machine_2_12_options(m);
m->default_nic = "e1000";
compat_props_add(m->compat_props, hw_compat_2_11, hw_compat_2_11_len);
compat_props_add(m->compat_props, pc_compat_2_11, pc_compat_2_11_len);
}
DEFINE_Q35_MACHINE(2, 11);
static void pc_q35_machine_2_10_options(MachineClass *m)
{
pc_q35_machine_2_11_options(m);
compat_props_add(m->compat_props, hw_compat_2_10, hw_compat_2_10_len);
compat_props_add(m->compat_props, pc_compat_2_10, pc_compat_2_10_len);
NUMA: Enable adding NUMA node implicitly Linux and Windows need ACPI SRAT table to make memory hotplug work properly, however currently QEMU doesn't create SRAT table if numa options aren't present on CLI. Which breaks both linux and windows guests in certain conditions: * Windows: won't enable memory hotplug without SRAT table at all * Linux: if QEMU is started with initial memory all below 4Gb and no SRAT table present, guest kernel will use nommu DMA ops, which breaks 32bit hw drivers when memory is hotplugged and guest tries to use it with that drivers. Fix above issues by automatically creating a numa node when QEMU is started with memory hotplug enabled but without '-numa' options on CLI. (PS: auto-create numa node only for new machine types so not to break migration). Which would provide SRAT table to guests without explicit -numa options on CLI and would allow: * Windows: to enable memory hotplug * Linux: switch to SWIOTLB DMA ops, to bounce DMA transfers to 32bit allocated buffers that legacy drivers/hw can handle. [Rewritten by Igor] Reported-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com> Suggested-by: Igor Mammedov <imammedo@redhat.com> Signed-off-by: Dou Liyang <douly.fnst@cn.fujitsu.com> Cc: Paolo Bonzini <pbonzini@redhat.com> Cc: Richard Henderson <rth@twiddle.net> Cc: Eduardo Habkost <ehabkost@redhat.com> Cc: "Michael S. Tsirkin" <mst@redhat.com> Cc: Marcel Apfelbaum <marcel@redhat.com> Cc: Igor Mammedov <imammedo@redhat.com> Cc: David Hildenbrand <david@redhat.com> Cc: Thomas Huth <thuth@redhat.com> Cc: Alistair Francis <alistair23@gmail.com> Cc: Takao Indoh <indou.takao@jp.fujitsu.com> Cc: Izumi Taku <izumi.taku@jp.fujitsu.com> Reviewed-by: Igor Mammedov <imammedo@redhat.com> Reviewed-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-11-14 05:34:01 +03:00
m->auto_enable_numa_with_memhp = false;
}
DEFINE_Q35_MACHINE(2, 10);
static void pc_q35_machine_2_9_options(MachineClass *m)
{
pc_q35_machine_2_10_options(m);
compat_props_add(m->compat_props, hw_compat_2_9, hw_compat_2_9_len);
compat_props_add(m->compat_props, pc_compat_2_9, pc_compat_2_9_len);
}
DEFINE_Q35_MACHINE(2, 9);
static void pc_q35_machine_2_8_options(MachineClass *m)
{
pc_q35_machine_2_9_options(m);
compat_props_add(m->compat_props, hw_compat_2_8, hw_compat_2_8_len);
compat_props_add(m->compat_props, pc_compat_2_8, pc_compat_2_8_len);
}
DEFINE_Q35_MACHINE(2, 8);
static void pc_q35_machine_2_7_options(MachineClass *m)
{
pc_q35_machine_2_8_options(m);
m->max_cpus = 255;
compat_props_add(m->compat_props, hw_compat_2_7, hw_compat_2_7_len);
compat_props_add(m->compat_props, pc_compat_2_7, pc_compat_2_7_len);
}
DEFINE_Q35_MACHINE(2, 7);
static void pc_q35_machine_2_6_options(MachineClass *m)
{
X86MachineClass *x86mc = X86_MACHINE_CLASS(m);
PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
pc_q35_machine_2_7_options(m);
pcmc->legacy_cpu_hotplug = true;
x86mc->fwcfg_dma_enabled = false;
compat_props_add(m->compat_props, hw_compat_2_6, hw_compat_2_6_len);
compat_props_add(m->compat_props, pc_compat_2_6, pc_compat_2_6_len);
}
DEFINE_Q35_MACHINE(2, 6);
static void pc_q35_machine_2_5_options(MachineClass *m)
{
X86MachineClass *x86mc = X86_MACHINE_CLASS(m);
pc_q35_machine_2_6_options(m);
x86mc->save_tsc_khz = false;
m->legacy_fw_cfg_order = 1;
compat_props_add(m->compat_props, hw_compat_2_5, hw_compat_2_5_len);
compat_props_add(m->compat_props, pc_compat_2_5, pc_compat_2_5_len);
}
DEFINE_Q35_MACHINE(2, 5);
static void pc_q35_machine_2_4_options(MachineClass *m)
{
PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
pc_q35_machine_2_5_options(m);
m->hw_version = "2.4.0";
pcmc->broken_reserved_end = true;
compat_props_add(m->compat_props, hw_compat_2_4, hw_compat_2_4_len);
compat_props_add(m->compat_props, pc_compat_2_4, pc_compat_2_4_len);
}
DEFINE_Q35_MACHINE(2, 4);